1 /* Initialize CPU feature data.
2    This file is part of the GNU C Library.
3    Copyright (C) 2008-2021 Free Software Foundation, Inc.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <dl-hwcap.h>
20 #include <libc-pointer-arith.h>
21 #include <get-isa-level.h>
22 #include <cacheinfo.h>
23 #include <dl-cacheinfo.h>
24 #include <dl-minsigstacksize.h>
25 
26 #if HAVE_TUNABLES
27 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
28   attribute_hidden;
29 
30 # if CET_ENABLED
31 extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
32   attribute_hidden;
33 extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
34   attribute_hidden;
35 # endif
36 #endif
37 
38 #if CET_ENABLED
39 # include <dl-cet.h>
40 #endif
41 
42 static void
update_active(struct cpu_features * cpu_features)43 update_active (struct cpu_features *cpu_features)
44 {
45   /* Copy the cpuid bits to active bits for CPU featuress whose usability
46      in user space can be detected without additonal OS support.  */
47   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
48   CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
49   CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
50   CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
51   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
52   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
53   CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
54   CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
55   CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
56   CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
57   CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
58   CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
59   CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
60   CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
61   CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
62   CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
63   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
64   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
65   CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
66   CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
67   CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
68   CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
69   CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
70   CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
71   CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
72   CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
73   CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
74   CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
75   CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
76   CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
77   CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
78   CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
79   CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
80   CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
81   CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
82   CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
83   CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
84   CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
85   CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
86   CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
87   CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
88   CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
89   CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
90   CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
91   CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
92   CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
93   CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
94   CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
95   CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
96   CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
97   CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
98   CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
99 
100   if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
101     CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
102 
103 #if CET_ENABLED
104   CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
105   CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
106 #endif
107 
108   /* Can we call xgetbv?  */
109   if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
110     {
111       unsigned int xcrlow;
112       unsigned int xcrhigh;
113       asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
114       /* Is YMM and XMM state usable?  */
115       if ((xcrlow & (bit_YMM_state | bit_XMM_state))
116 	  == (bit_YMM_state | bit_XMM_state))
117 	{
118 	  /* Determine if AVX is usable.  */
119 	  if (CPU_FEATURES_CPU_P (cpu_features, AVX))
120 	    {
121 	      CPU_FEATURE_SET (cpu_features, AVX);
122 	      /* The following features depend on AVX being usable.  */
123 	      /* Determine if AVX2 is usable.  */
124 	      if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
125 		{
126 		  CPU_FEATURE_SET (cpu_features, AVX2);
127 
128 		  /* Unaligned load with 256-bit AVX registers are faster
129 		     on Intel/AMD processors with AVX2.  */
130 		  cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
131 		    |= bit_arch_AVX_Fast_Unaligned_Load;
132 		}
133 	      /* Determine if AVX-VNNI is usable.  */
134 	      CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
135 	      /* Determine if FMA is usable.  */
136 	      CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
137 	      /* Determine if VAES is usable.  */
138 	      CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
139 	      /* Determine if VPCLMULQDQ is usable.  */
140 	      CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
141 	      /* Determine if XOP is usable.  */
142 	      CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
143 	      /* Determine if F16C is usable.  */
144 	      CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
145 	    }
146 
147 	  /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
148 	     ZMM16-ZMM31 state are enabled.  */
149 	  if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
150 			 | bit_ZMM16_31_state))
151 	      == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
152 	    {
153 	      /* Determine if AVX512F is usable.  */
154 	      if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
155 		{
156 		  CPU_FEATURE_SET (cpu_features, AVX512F);
157 		  /* Determine if AVX512CD is usable.  */
158 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
159 		  /* Determine if AVX512ER is usable.  */
160 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
161 		  /* Determine if AVX512PF is usable.  */
162 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
163 		  /* Determine if AVX512VL is usable.  */
164 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
165 		  /* Determine if AVX512DQ is usable.  */
166 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
167 		  /* Determine if AVX512BW is usable.  */
168 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
169 		  /* Determine if AVX512_4FMAPS is usable.  */
170 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
171 		  /* Determine if AVX512_4VNNIW is usable.  */
172 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
173 		  /* Determine if AVX512_BITALG is usable.  */
174 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
175 		  /* Determine if AVX512_IFMA is usable.  */
176 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
177 		  /* Determine if AVX512_VBMI is usable.  */
178 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
179 		  /* Determine if AVX512_VBMI2 is usable.  */
180 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
181 		  /* Determine if is AVX512_VNNI usable.  */
182 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
183 		  /* Determine if AVX512_VPOPCNTDQ is usable.  */
184 		  CPU_FEATURE_SET_ACTIVE (cpu_features,
185 					  AVX512_VPOPCNTDQ);
186 		  /* Determine if AVX512_VP2INTERSECT is usable.  */
187 		  CPU_FEATURE_SET_ACTIVE (cpu_features,
188 					  AVX512_VP2INTERSECT);
189 		  /* Determine if AVX512_BF16 is usable.  */
190 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
191 		  /* Determine if AVX512_FP16 is usable.  */
192 		  CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
193 		}
194 	    }
195 	}
196 
197       /* Are XTILECFG and XTILEDATA states usable?  */
198       if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
199 	  == (bit_XTILECFG_state | bit_XTILEDATA_state))
200 	{
201 	  /* Determine if AMX_BF16 is usable.  */
202 	  CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
203 	  /* Determine if AMX_TILE is usable.  */
204 	  CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
205 	  /* Determine if AMX_INT8 is usable.  */
206 	  CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
207 	}
208 
209       /* These features are usable only when OSXSAVE is enabled.  */
210       CPU_FEATURE_SET (cpu_features, XSAVE);
211       CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
212       CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
213       CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
214       CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
215 
216       /* For _dl_runtime_resolve, set xsave_state_size to xsave area
217 	 size + integer register save size and align it to 64 bytes.  */
218       if (cpu_features->basic.max_cpuid >= 0xd)
219 	{
220 	  unsigned int eax, ebx, ecx, edx;
221 
222 	  __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
223 	  if (ebx != 0)
224 	    {
225 	      unsigned int xsave_state_full_size
226 		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
227 
228 	      cpu_features->xsave_state_size
229 		= xsave_state_full_size;
230 	      cpu_features->xsave_state_full_size
231 		= xsave_state_full_size;
232 
233 	      /* Check if XSAVEC is available.  */
234 	      if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
235 		{
236 		  unsigned int xstate_comp_offsets[32];
237 		  unsigned int xstate_comp_sizes[32];
238 		  unsigned int i;
239 
240 		  xstate_comp_offsets[0] = 0;
241 		  xstate_comp_offsets[1] = 160;
242 		  xstate_comp_offsets[2] = 576;
243 		  xstate_comp_sizes[0] = 160;
244 		  xstate_comp_sizes[1] = 256;
245 
246 		  for (i = 2; i < 32; i++)
247 		    {
248 		      if ((STATE_SAVE_MASK & (1 << i)) != 0)
249 			{
250 			  __cpuid_count (0xd, i, eax, ebx, ecx, edx);
251 			  xstate_comp_sizes[i] = eax;
252 			}
253 		      else
254 			{
255 			  ecx = 0;
256 			  xstate_comp_sizes[i] = 0;
257 			}
258 
259 		      if (i > 2)
260 			{
261 			  xstate_comp_offsets[i]
262 			    = (xstate_comp_offsets[i - 1]
263 			       + xstate_comp_sizes[i -1]);
264 			  if ((ecx & (1 << 1)) != 0)
265 			    xstate_comp_offsets[i]
266 			      = ALIGN_UP (xstate_comp_offsets[i], 64);
267 			}
268 		    }
269 
270 		  /* Use XSAVEC.  */
271 		  unsigned int size
272 		    = xstate_comp_offsets[31] + xstate_comp_sizes[31];
273 		  if (size)
274 		    {
275 		      cpu_features->xsave_state_size
276 			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
277 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
278 		    }
279 		}
280 	    }
281 	}
282     }
283 
284   /* Determine if PKU is usable.  */
285   if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
286     CPU_FEATURE_SET (cpu_features, PKU);
287 
288   /* Determine if Key Locker instructions are usable.  */
289   if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
290     {
291       CPU_FEATURE_SET (cpu_features, AESKLE);
292       CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
293       CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
294     }
295 
296   cpu_features->isa_1 = get_isa_level (cpu_features);
297 }
298 
299 static void
get_extended_indices(struct cpu_features * cpu_features)300 get_extended_indices (struct cpu_features *cpu_features)
301 {
302   unsigned int eax, ebx, ecx, edx;
303   __cpuid (0x80000000, eax, ebx, ecx, edx);
304   if (eax >= 0x80000001)
305     __cpuid (0x80000001,
306 	     cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
307 	     cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
308 	     cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
309 	     cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
310   if (eax >= 0x80000007)
311     __cpuid (0x80000007,
312 	     cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
313 	     cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
314 	     cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
315 	     cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
316   if (eax >= 0x80000008)
317     __cpuid (0x80000008,
318 	     cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
319 	     cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
320 	     cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
321 	     cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
322 }
323 
324 static void
get_common_indices(struct cpu_features * cpu_features,unsigned int * family,unsigned int * model,unsigned int * extended_model,unsigned int * stepping)325 get_common_indices (struct cpu_features *cpu_features,
326 		    unsigned int *family, unsigned int *model,
327 		    unsigned int *extended_model, unsigned int *stepping)
328 {
329   if (family)
330     {
331       unsigned int eax;
332       __cpuid (1, eax,
333 	       cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
334 	       cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
335 	       cpu_features->features[CPUID_INDEX_1].cpuid.edx);
336       cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
337       *family = (eax >> 8) & 0x0f;
338       *model = (eax >> 4) & 0x0f;
339       *extended_model = (eax >> 12) & 0xf0;
340       *stepping = eax & 0x0f;
341       if (*family == 0x0f)
342 	{
343 	  *family += (eax >> 20) & 0xff;
344 	  *model += *extended_model;
345 	}
346     }
347 
348   if (cpu_features->basic.max_cpuid >= 7)
349     {
350       __cpuid_count (7, 0,
351 		     cpu_features->features[CPUID_INDEX_7].cpuid.eax,
352 		     cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
353 		     cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
354 		     cpu_features->features[CPUID_INDEX_7].cpuid.edx);
355       __cpuid_count (7, 1,
356 		     cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
357 		     cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
358 		     cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
359 		     cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
360     }
361 
362   if (cpu_features->basic.max_cpuid >= 0xd)
363     __cpuid_count (0xd, 1,
364 		   cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
365 		   cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
366 		   cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
367 		   cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
368 
369   if (cpu_features->basic.max_cpuid >= 0x14)
370     __cpuid_count (0x14, 0,
371 		   cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
372 		   cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
373 		   cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
374 		   cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
375 
376   if (cpu_features->basic.max_cpuid >= 0x19)
377     __cpuid_count (0x19, 0,
378 		   cpu_features->features[CPUID_INDEX_19].cpuid.eax,
379 		   cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
380 		   cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
381 		   cpu_features->features[CPUID_INDEX_19].cpuid.edx);
382 
383   dl_check_minsigstacksize (cpu_features);
384 }
385 
386 _Static_assert (((index_arch_Fast_Unaligned_Load
387 		  == index_arch_Fast_Unaligned_Copy)
388 		 && (index_arch_Fast_Unaligned_Load
389 		     == index_arch_Prefer_PMINUB_for_stringop)
390 		 && (index_arch_Fast_Unaligned_Load
391 		     == index_arch_Slow_SSE4_2)
392 		 && (index_arch_Fast_Unaligned_Load
393 		     == index_arch_Fast_Rep_String)
394 		 && (index_arch_Fast_Unaligned_Load
395 		     == index_arch_Fast_Copy_Backward)),
396 		"Incorrect index_arch_Fast_Unaligned_Load");
397 
398 static inline void
init_cpu_features(struct cpu_features * cpu_features)399 init_cpu_features (struct cpu_features *cpu_features)
400 {
401   unsigned int ebx, ecx, edx;
402   unsigned int family = 0;
403   unsigned int model = 0;
404   unsigned int stepping = 0;
405   enum cpu_features_kind kind;
406 
407 #if !HAS_CPUID
408   if (__get_cpuid_max (0, 0) == 0)
409     {
410       kind = arch_kind_other;
411       goto no_cpuid;
412     }
413 #endif
414 
415   __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
416 
417   /* This spells out "GenuineIntel".  */
418   if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
419     {
420       unsigned int extended_model;
421 
422       kind = arch_kind_intel;
423 
424       get_common_indices (cpu_features, &family, &model, &extended_model,
425 			  &stepping);
426 
427       get_extended_indices (cpu_features);
428 
429       update_active (cpu_features);
430 
431       if (family == 0x06)
432 	{
433 	  model += extended_model;
434 	  switch (model)
435 	    {
436 	    case 0x1c:
437 	    case 0x26:
438 	      /* BSF is slow on Atom.  */
439 	      cpu_features->preferred[index_arch_Slow_BSF]
440 		|= bit_arch_Slow_BSF;
441 	      break;
442 
443 	    case 0x57:
444 	      /* Knights Landing.  Enable Silvermont optimizations.  */
445 
446 	    case 0x7a:
447 	      /* Unaligned load versions are faster than SSSE3
448 		 on Goldmont Plus.  */
449 
450 	    case 0x5c:
451 	    case 0x5f:
452 	      /* Unaligned load versions are faster than SSSE3
453 		 on Goldmont.  */
454 
455 	    case 0x4c:
456 	    case 0x5a:
457 	    case 0x75:
458 	      /* Airmont is a die shrink of Silvermont.  */
459 
460 	    case 0x37:
461 	    case 0x4a:
462 	    case 0x4d:
463 	    case 0x5d:
464 	      /* Unaligned load versions are faster than SSSE3
465 		 on Silvermont.  */
466 	      cpu_features->preferred[index_arch_Fast_Unaligned_Load]
467 		|= (bit_arch_Fast_Unaligned_Load
468 		    | bit_arch_Fast_Unaligned_Copy
469 		    | bit_arch_Prefer_PMINUB_for_stringop
470 		    | bit_arch_Slow_SSE4_2);
471 	      break;
472 
473 	    case 0x86:
474 	    case 0x96:
475 	    case 0x9c:
476 	      /* Enable rep string instructions, unaligned load, unaligned
477 	         copy, pminub and avoid SSE 4.2 on Tremont.  */
478 	      cpu_features->preferred[index_arch_Fast_Rep_String]
479 		|= (bit_arch_Fast_Rep_String
480 		    | bit_arch_Fast_Unaligned_Load
481 		    | bit_arch_Fast_Unaligned_Copy
482 		    | bit_arch_Prefer_PMINUB_for_stringop
483 		    | bit_arch_Slow_SSE4_2);
484 	      break;
485 
486 	    default:
487 	      /* Unknown family 0x06 processors.  Assuming this is one
488 		 of Core i3/i5/i7 processors if AVX is available.  */
489 	      if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
490 		break;
491 	      /* Fall through.  */
492 
493 	    case 0x1a:
494 	    case 0x1e:
495 	    case 0x1f:
496 	    case 0x25:
497 	    case 0x2c:
498 	    case 0x2e:
499 	    case 0x2f:
500 	      /* Rep string instructions, unaligned load, unaligned copy,
501 		 and pminub are fast on Intel Core i3, i5 and i7.  */
502 	      cpu_features->preferred[index_arch_Fast_Rep_String]
503 		|= (bit_arch_Fast_Rep_String
504 		    | bit_arch_Fast_Unaligned_Load
505 		    | bit_arch_Fast_Unaligned_Copy
506 		    | bit_arch_Prefer_PMINUB_for_stringop);
507 	      break;
508 	    }
509 
510 	 /* Disable TSX on some Haswell processors to avoid TSX on kernels that
511 	    weren't updated with the latest microcode package (which disables
512 	    broken feature by default).  */
513 	 switch (model)
514 	    {
515 	    case 0x3f:
516 	      /* Xeon E7 v3 with stepping >= 4 has working TSX.  */
517 	      if (stepping >= 4)
518 		break;
519 	      /* Fall through.  */
520 	    case 0x3c:
521 	    case 0x45:
522 	    case 0x46:
523 	      /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
524 		 with stepping >= 4) to avoid TSX on kernels that weren't
525 		 updated with the latest microcode package (which disables
526 		 broken feature by default).  */
527 	      CPU_FEATURE_UNSET (cpu_features, RTM);
528 	      break;
529 	    }
530 	}
531 
532 
533       /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
534          if AVX512ER is available.  Don't use AVX512 to avoid lower CPU
535 	 frequency if AVX512ER isn't available.  */
536       if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
537 	cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
538 	  |= bit_arch_Prefer_No_VZEROUPPER;
539       else
540 	{
541 	  /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
542 	     when ZMM load and store instructions are used.  */
543 	  if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
544 	    cpu_features->preferred[index_arch_Prefer_No_AVX512]
545 	      |= bit_arch_Prefer_No_AVX512;
546 
547 	  /* Avoid RTM abort triggered by VZEROUPPER inside a
548 	     transactionally executing RTM region.  */
549 	  if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
550 	    cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
551 	      |= bit_arch_Prefer_No_VZEROUPPER;
552 	}
553 
554       /* Avoid avoid short distance REP MOVSB on processor with FSRM.  */
555       if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
556 	cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
557 	  |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
558     }
559   /* This spells out "AuthenticAMD" or "HygonGenuine".  */
560   else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
561 	   || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
562     {
563       unsigned int extended_model;
564 
565       kind = arch_kind_amd;
566 
567       get_common_indices (cpu_features, &family, &model, &extended_model,
568 			  &stepping);
569 
570       get_extended_indices (cpu_features);
571 
572       update_active (cpu_features);
573 
574       ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
575 
576       if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
577 	{
578 	  /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
579 	     FMA4 requires AVX, determine if FMA4 is usable here.  */
580 	  CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
581 	}
582 
583       if (family == 0x15)
584 	{
585 	  /* "Excavator"   */
586 	  if (model >= 0x60 && model <= 0x7f)
587 	  {
588 	    cpu_features->preferred[index_arch_Fast_Unaligned_Load]
589 	      |= (bit_arch_Fast_Unaligned_Load
590 		  | bit_arch_Fast_Copy_Backward);
591 
592 	    /* Unaligned AVX loads are slower.*/
593 	    cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
594 	      &= ~bit_arch_AVX_Fast_Unaligned_Load;
595 	  }
596 	}
597     }
598   /* This spells out "CentaurHauls" or " Shanghai ".  */
599   else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
600 	   || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
601     {
602       unsigned int extended_model, stepping;
603 
604       kind = arch_kind_zhaoxin;
605 
606       get_common_indices (cpu_features, &family, &model, &extended_model,
607 			  &stepping);
608 
609       get_extended_indices (cpu_features);
610 
611       update_active (cpu_features);
612 
613       model += extended_model;
614       if (family == 0x6)
615         {
616           if (model == 0xf || model == 0x19)
617             {
618 	      CPU_FEATURE_UNSET (cpu_features, AVX);
619 	      CPU_FEATURE_UNSET (cpu_features, AVX2);
620 
621               cpu_features->preferred[index_arch_Slow_SSE4_2]
622                 |= bit_arch_Slow_SSE4_2;
623 
624 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
625 		&= ~bit_arch_AVX_Fast_Unaligned_Load;
626             }
627         }
628       else if (family == 0x7)
629         {
630 	  if (model == 0x1b)
631 	    {
632 	      CPU_FEATURE_UNSET (cpu_features, AVX);
633 	      CPU_FEATURE_UNSET (cpu_features, AVX2);
634 
635 	      cpu_features->preferred[index_arch_Slow_SSE4_2]
636 		|= bit_arch_Slow_SSE4_2;
637 
638 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
639 		&= ~bit_arch_AVX_Fast_Unaligned_Load;
640 	    }
641 	  else if (model == 0x3b)
642 	    {
643 	      CPU_FEATURE_UNSET (cpu_features, AVX);
644 	      CPU_FEATURE_UNSET (cpu_features, AVX2);
645 
646 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
647 		&= ~bit_arch_AVX_Fast_Unaligned_Load;
648 	    }
649 	}
650     }
651   else
652     {
653       kind = arch_kind_other;
654       get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
655       update_active (cpu_features);
656     }
657 
658   /* Support i586 if CX8 is available.  */
659   if (CPU_FEATURES_CPU_P (cpu_features, CX8))
660     cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
661 
662   /* Support i686 if CMOV is available.  */
663   if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
664     cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
665 
666 #if !HAS_CPUID
667 no_cpuid:
668 #endif
669 
670   cpu_features->basic.kind = kind;
671   cpu_features->basic.family = family;
672   cpu_features->basic.model = model;
673   cpu_features->basic.stepping = stepping;
674 
675   dl_init_cacheinfo (cpu_features);
676 
677 #if HAVE_TUNABLES
678   TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
679 
680   bool disable_xsave_features = false;
681 
682   if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
683     {
684       /* These features are usable only if OSXSAVE is usable.  */
685       CPU_FEATURE_UNSET (cpu_features, XSAVE);
686       CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
687       CPU_FEATURE_UNSET (cpu_features, XSAVEC);
688       CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
689       CPU_FEATURE_UNSET (cpu_features, XFD);
690 
691       disable_xsave_features = true;
692     }
693 
694   if (disable_xsave_features
695       || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
696 	  && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
697     {
698       /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable.  */
699       cpu_features->xsave_state_size = 0;
700 
701       CPU_FEATURE_UNSET (cpu_features, AVX);
702       CPU_FEATURE_UNSET (cpu_features, AVX2);
703       CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
704       CPU_FEATURE_UNSET (cpu_features, FMA);
705       CPU_FEATURE_UNSET (cpu_features, VAES);
706       CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
707       CPU_FEATURE_UNSET (cpu_features, XOP);
708       CPU_FEATURE_UNSET (cpu_features, F16C);
709       CPU_FEATURE_UNSET (cpu_features, AVX512F);
710       CPU_FEATURE_UNSET (cpu_features, AVX512CD);
711       CPU_FEATURE_UNSET (cpu_features, AVX512ER);
712       CPU_FEATURE_UNSET (cpu_features, AVX512PF);
713       CPU_FEATURE_UNSET (cpu_features, AVX512VL);
714       CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
715       CPU_FEATURE_UNSET (cpu_features, AVX512BW);
716       CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
717       CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
718       CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
719       CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
720       CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
721       CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
722       CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
723       CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
724       CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
725       CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
726       CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
727       CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
728       CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
729       CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
730 
731       CPU_FEATURE_UNSET (cpu_features, FMA4);
732     }
733 
734 #elif defined SHARED
735   /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86.  The
736      glibc.cpu.hwcap_mask tunable is initialized already, so no
737      need to do this.  */
738   GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT;
739 #endif
740 
741 #ifdef __x86_64__
742   GLRO(dl_hwcap) = HWCAP_X86_64;
743   if (cpu_features->basic.kind == arch_kind_intel)
744     {
745       const char *platform = NULL;
746 
747       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
748 	{
749 	  if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
750 	    {
751 	      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
752 		platform = "xeon_phi";
753 	    }
754 	  else
755 	    {
756 	      if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
757 		  && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
758 		  && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
759 		GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
760 	    }
761 	}
762 
763       if (platform == NULL
764 	  && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
765 	  && CPU_FEATURE_USABLE_P (cpu_features, FMA)
766 	  && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
767 	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
768 	  && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
769 	  && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
770 	  && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
771 	platform = "haswell";
772 
773       if (platform != NULL)
774 	GLRO(dl_platform) = platform;
775     }
776 #else
777   GLRO(dl_hwcap) = 0;
778   if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
779     GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
780 
781   if (CPU_FEATURES_ARCH_P (cpu_features, I686))
782     GLRO(dl_platform) = "i686";
783   else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
784     GLRO(dl_platform) = "i586";
785 #endif
786 
787 #if CET_ENABLED
788 # if HAVE_TUNABLES
789   TUNABLE_GET (x86_ibt, tunable_val_t *,
790 	       TUNABLE_CALLBACK (set_x86_ibt));
791   TUNABLE_GET (x86_shstk, tunable_val_t *,
792 	       TUNABLE_CALLBACK (set_x86_shstk));
793 # endif
794 
795   /* Check CET status.  */
796   unsigned int cet_status = get_cet_status ();
797 
798   if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT) == 0)
799     CPU_FEATURE_UNSET (cpu_features, IBT)
800   if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK) == 0)
801     CPU_FEATURE_UNSET (cpu_features, SHSTK)
802 
803   if (cet_status)
804     {
805       GL(dl_x86_feature_1) = cet_status;
806 
807 # ifndef SHARED
808       /* Check if IBT and SHSTK are enabled by kernel.  */
809       if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT)
810 	  || (cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
811 	{
812 	  /* Disable IBT and/or SHSTK if they are enabled by kernel, but
813 	     disabled by environment variable:
814 
815 	     GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
816 	   */
817 	  unsigned int cet_feature = 0;
818 	  if (!CPU_FEATURE_USABLE (IBT))
819 	    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
820 	  if (!CPU_FEATURE_USABLE (SHSTK))
821 	    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
822 
823 	  if (cet_feature)
824 	    {
825 	      int res = dl_cet_disable_cet (cet_feature);
826 
827 	      /* Clear the disabled bits in dl_x86_feature_1.  */
828 	      if (res == 0)
829 		GL(dl_x86_feature_1) &= ~cet_feature;
830 	    }
831 
832 	  /* Lock CET if IBT or SHSTK is enabled in executable.  Don't
833 	     lock CET if IBT or SHSTK is enabled permissively.  */
834 	  if (GL(dl_x86_feature_control).ibt != cet_permissive
835 	      && GL(dl_x86_feature_control).shstk != cet_permissive)
836 	    dl_cet_lock_cet ();
837 	}
838 # endif
839     }
840 #endif
841 
842 #ifndef SHARED
843   /* NB: In libc.a, call init_cacheinfo.  */
844   init_cacheinfo ();
845 #endif
846 }
847