Checking patch sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core_sse4.S... error: while searching for: * */ #include /* tanhf data tables for avx2 and sse4 implementatins defined here. */ #define ONLY_DECL_OFFSET #include "svml_s_tanhf_rodata.S" .section .text.sse4, "ax", @progbits ENTRY(_ZGVbN4v_tanhf_sse4) /* Save copy of input in xmm12. */ movaps %xmm0, %xmm12 /* Here huge arguments, INF and NaNs are filtered out to callout. */ movdqu TANHF_DATA(_iExpMantMask)(%rip), %xmm3 pand %xmm0, %xmm3 error: patch failed: sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core_sse4.S:70 error: sysdeps/x86_64/fpu/multiarch/svml_s_tanhf4_core_sse4.S: patch does not apply Checking patch sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S... error: while searching for: * */ #include /* tanhf data tables for avx2 and sse4 implementatins defined here. */ #include "svml_s_tanhf_rodata.S" .section .text.avx2, "ax", @progbits ENTRY(_ZGVdN8v_tanhf_avx2) /* Here huge arguments, INF and NaNs are filtered out to callout. */ vpand TANHF_DATA(_iExpMantMask)(%rip), %ymm0, %ymm4 vpsubd TANHF_DATA(_iMinIdxOfsMask)(%rip), %ymm4, %ymm2 /* Selection of arguments between [0, 0x04280000] into ymm2. */ vpxor %ymm3, %ymm3, %ymm3 vpmaxsd %ymm3, %ymm2, %ymm2 vpminsd TANHF_DATA(_iMaxIdxMask)(%rip), %ymm2, %ymm2 /* * small table specific variables * * Constant loading */ vpsrld $14, %ymm2, %ymm1 /* We are splitting xmm1 into 8 GPRs. This may be faster to do with store/load as we can take advantage of store-forwarding. */ vmovq %xmm1, %r8 /* We have eliminated all negative values for ymm1 so no need to sign extend. */ movl %r8d, %r9d shrq $32, %r8 /* Store base of lookup table in rax. */ leaq TANHF_DATA(_lookupTable)(%rip), %rax /* Instead of using cross-lane permutes on ymm vectors, use vpinsertf128 with memory operand. This helps alleviate bottleneck on p5. */ vmovupd 16(%r9, %rax), %xmm5 vpextrq $1, %xmm1, %rsi error: patch failed: sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S:70 error: sysdeps/x86_64/fpu/multiarch/svml_s_tanhf8_core_avx2.S: patch does not apply Checking patch sysdeps/x86_64/fpu/multiarch/svml_s_tanhf_rodata.S... error: while searching for: /* Datatables for tanhf AVX2 and tanhf SSE4. Copyright (C) 2021-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see https://www.gnu.org/licenses/. */ /* Offsets are ordered by use in the function. On cold-starts this might help the prefetcher. If the streaming prefetchers kick in it will prefetch into the lookup table. */ #define _iExpMantMask 0 #define _iMinIdxOfsMask 32 #define _iMaxIdxMask 64 #define _sAbsMask 96 #define _iExpMask 128 #define _lookupTable 160 #define TANHF_DATA(offset) ((offset)+__svml_stanh_data_internal_avx2) #ifndef ONLY_DECL_OFFSET .section .rodata, "a" .align 32 # ifdef __svml_stanh_data_internal_typedef typedef unsigned int VUINT32; typedef struct { __declspec(align(32)) VUINT32 _iExpMantMask[8][1]; __declspec(align(32)) VUINT32 _iMinIdxOfsMask[8][1]; __declspec(align(32)) VUINT32 _iMaxIdxMask[8][1]; __declspec(align(32)) VUINT32 _sAbsMask[8][1]; __declspec(align(32)) VUINT32 _iExpMask[8][1]; __declspec(align(32)) VUINT32 _lookupTable[(134*4)][2]; } __svml_stanh_data_internal; # endif __svml_stanh_data_internal: .globl __svml_stanh_data_internal_avx2 __svml_stanh_data_internal_avx2: .align 32 /* _iExpMantMask. */ .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 .align 32 /* _iMinIdxOfsMask. */ .long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 .long 0x3cf80000, 0x3cf80000, 0x3cf80000, 0x3cf80000 .align 32 /* _iMaxIdxMask. */ .long 0x04280000, 0x04280000, 0x04280000, 0x04280000 .long 0x04280000, 0x04280000, 0x04280000, 0x04280000 .align 32 /* _sAbsMask. */ .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff .align 32 /* _iExpMask. */ .long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 .long 0x7f000000, 0x7f000000, 0x7f000000, 0x7f000000 .align 32 /* _lookupTable. */ /* Pol_000: err=7.93e-09, x in [0.0000000; 0.0312500]. */ .quad 0x0000000000000000 /* A00 = +0.000000000000000000000e-01. */ .quad 0x3FF00000022C70EB /* A01 = +1.000000008097283510367e+00. */ .quad 0xBED00E878CFFA194 /* A02 = -3.828228912518614443549e-06. */ .quad 0xBFD551766D0607A9 /* A03 = -3.330970825846813476723e-01. */ .quad 0xBE53D60CE3E4C297 /* A00 = -1.847383956330407336230e-08. */ .quad 0x3FF000024177CF5C /* A01 = +1.000002151235967140508e+00. */ .quad 0xBF1758BC94A51A25 /* A02 = -8.906031613262943753568e-05. */ .quad 0xBFD53EAE67E0D4F0 /* A03 = -3.319507612644221339337e-01. */ .quad 0xBE5A9E47EF32D6FE /* A00 = -2.479020984039698285657e-08. */ .quad 0x3FF00002DA983057 /* A01 = +1.000002721676556793895e+00. */ .quad 0xBF1BD953509E94AA /* A02 = -1.062352277175377670507e-04. */ .quad 0xBFD53BDB562EEDD5 /* A03 = -3.317783681520414806876e-01. */ .quad 0xBE6191BBE496D294 /* A00 = -3.272532162914017685901e-08. */ .quad 0x3FF0000390492017 /* A01 = +1.000003398528866105366e+00. */ .quad 0xBF20727E814A57CE /* A02 = -1.254825043772153972919e-04. */ .quad 0xBFD538DE060A6F22 /* A03 = -3.315959033004550748913e-01. */ .quad 0xBE66DAFA2A893A25 /* A00 = -4.257146219278012568149e-08. */ .quad 0x3FF0000465E08CD1 /* A01 = +1.000004194219219266770e+00. */ .quad 0xBF2341C765EF91B6 /* A02 = -1.469188600530365522261e-04. */ .quad 0xBFD535B6841FAF9E /* A03 = -3.314033785124993469751e-01. */ .quad 0xBE6D5794E361E964 /* A00 = -5.465 error: patch failed: sysdeps/x86_64/fpu/multiarch/svml_s_tanhf_rodata.S:1 error: sysdeps/x86_64/fpu/multiarch/svml_s_tanhf_rodata.S: patch does not apply Checking patch sysdeps/x86_64/fpu/multiarch/svml_s_tanhf_rodata.h.S...