1/* Vector Optimized 32/64 bit S/390 version of wmemset. 2 Copyright (C) 2015-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <ifunc-wmemset.h> 20#if HAVE_WMEMSET_Z13 21 22# include "sysdep.h" 23# include "asm-syntax.h" 24 25 .text 26 27/* wchar_t *wmemset(wchar_t *dest, wchar_t wc, size_t n) 28 Fill an array of wide-characters with a constant wide character 29 and returns dest. 30 31 Register usage: 32 -r0=tmp 33 -r1=tmp 34 -r2=dest or current-pointer 35 -r3=wc 36 -r4=n 37 -r5=tmp 38 -v16=replicated wc 39 -v17,v18,v19=copy of v16 for vstm 40 -v31=saved dest for return 41*/ 42ENTRY(WMEMSET_Z13) 43 .machine "z13" 44 .machinemode "zarch_nohighgprs" 45 46# if !defined __s390x__ 47 llgfr %r4,%r4 48# endif /* !defined __s390x__ */ 49 50 vlvgg %v31,%r2,0 /* Save destination pointer for return. */ 51 clgije %r4,0,.Lend 52 53 vlvgf %v16,%r3,0 /* Generate vector with wchar_t wc. */ 54 vrepf %v16,%v16,0 55 56 /* Check range of maxlen and convert to byte-count. */ 57# ifdef __s390x__ 58 tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ 59 lghi %r5,-4 /* Max byte-count is 18446744073709551612. */ 60# else 61 tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ 62 llilf %r5,4294967292 /* Max byte-count is 4294967292. */ 63# endif /* !__s390x__ */ 64 sllg %r4,%r4,2 /* Convert character-count to byte-count. */ 65 locgrne %r4,%r5 /* Use max byte-count, if bit 0/1 was one. */ 66 67 /* Align dest to 16 byte. */ 68 risbg %r0,%r2,60,128+63,0 /* Test if s is aligned and 69 %r3 = bits 60-63 'and' 15. */ 70 je .Lpreloop /* If s is aligned, loop aligned. */ 71 tmll %r2,3 /* Test if s is 4-byte aligned? */ 72 jne .Lfallback /* And use common-code variant if not. */ 73 lghi %r1,16 74 slr %r1,%r0 /* Compute byte count to load (16-x). */ 75 clgr %r1,%r4 76 locgrh %r1,%r4 /* min (byte count, n) */ 77 aghik %r5,%r1,-1 /* vstl needs highest index. */ 78 vstl %v16,%r5,0(%r2) /* Store remaining bytes. */ 79 clgrje %r1,%r4,.Lend /* Return if n bytes where set. */ 80 slgr %r4,%r1 /* Compute remaining byte count. */ 81 la %r2,0(%r1,%r2) 82 83.Lpreloop: 84 /* Now we are 16-byte aligned. */ 85 clgijl %r4,17,.Lremaining 86 srlg %r1,%r4,8 /* Split into 256byte blocks */ 87 clgije %r1,0,.Lpreloop64 88 vlr %v17,%v16 89 vlr %v18,%v16 90 vlr %v19,%v16 91 92.Lloop256: 93 vstm %v16,%v19,0(%r2) 94 vstm %v16,%v19,64(%r2) 95 vstm %v16,%v19,128(%r2) 96 vstm %v16,%v19,192(%r2) 97 la %r2,256(%r2) 98 brctg %r1,.Lloop256 /* Loop until all blocks are processed. */ 99 100 llgfr %r4,%r4 101 nilf %r4,255 /* Get remaining bytes */ 102 je .Lend /* Skip store remaining bytes if zero. */ 103 104.Lpreloop64: 105 clgijl %r4,17,.Lremaining 106 clgijl %r4,33,.Lpreloop16 107 srlg %r1,%r4,5 /* Split into 32byte blocks */ 108 109.Lloop32: 110 vst %v16,0(%r2) 111 vst %v16,16(%r2) 112 la %r2,32(%r2) 113 brctg %r1,.Lloop32 /* Loop until all blocks are processed. */ 114 115 llgfr %r4,%r4 116 nilf %r4,31 /* Get remaining bytes */ 117 je .Lend /* Skip store remaining bytes if zero. */ 118 119.Lpreloop16: 120 clgijl %r4,17,.Lremaining 121 srlg %r1,%r4,4 /* Split into 16byte blocks */ 122 123.Lloop16: 124 vst %v16,0(%r2) 125 la %r2,16(%r2) 126 brctg %r1,.Lloop16 /* Loop until all blocks are processed. */ 127 128 llgfr %r4,%r4 129 nilf %r4,15 /* Get remaining bytes */ 130 je .Lend /* Skip store remaining bytes if zero. */ 131 132.Lremaining: 133 aghi %r4,-1 /* vstl needs highest index. */ 134 vstl %v16,%r4,0(%r2) 135 136.Lend: 137 vlgvg %r2,%v31,0 /* Load saved dest for return value. */ 138 br %r14 139.Lfallback: 140 srlg %r4,%r4,2 /* Convert byte-count to character-count. */ 141 jg WMEMSET_C 142END(WMEMSET_Z13) 143 144# if ! HAVE_WMEMSET_IFUNC 145strong_alias (WMEMSET_Z13, __wmemset) 146weak_alias (__wmemset, wmemset) 147# endif 148 149# if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT \ 150 && defined SHARED && IS_IN (libc) 151strong_alias (WMEMSET_Z13, __GI___wmemset) 152weak_alias (WMEMSET_Z13, __GI_wmemset) 153# endif 154#endif 155