1/* Vector optimized 32/64 bit S/390 version of wcspbrk. 2 Copyright (C) 2015-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <ifunc-wcspbrk.h> 20#if HAVE_WCSPBRK_Z13 21 22# include "sysdep.h" 23# include "asm-syntax.h" 24 25 .text 26 27/* wchar_t *wcspbrk (const wchar_t *s, const wchar_t * accept) 28 The wcspbrk() function locates the first occurrence in the string s 29 of any of the characters in the string accept and returns a pointer 30 to that character or NULL if not found. 31 32 This method checks the length of accept string. If it fits entirely 33 in one vector register, a fast algorithm is used, which does not need 34 to check multiple parts of accept-string. Otherwise a slower full 35 check of accept-string is used. 36 37 register overview: 38 r3: pointer to start of accept-string 39 r2: pointer to start of search-string 40 r0: loaded byte count of vlbb search-string (32bit unsigned) 41 r4: found byte index (32bit unsigned) 42 r1: current return len (64bit unsigned) 43 v16: search-string 44 v17: accept-string 45 v18: temp-vreg 46 47 ONLY FOR SLOW: 48 v19: first accept-string 49 v20: zero for preparing acc-vector 50 v21: global mask; 1 indicates a match between 51 search-string-vreg and any accept-character 52 v22: current mask; 1 indicates a match between 53 search-string-vreg and any accept-character in current acc-vreg 54 v24: one for result-checking of former string-part 55 v30, v31: for re-/storing registers r6, r8, r9 56 r5: current len of accept-string 57 r6: zero-index in search-string or 16 if no zero 58 or min(zero-index, loaded byte count) 59 r8: >0, if former accept-string-part contains a zero, 60 otherwise =0; 61 r9: loaded byte count of vlbb accept-string 62*/ 63ENTRY(WCSPBRK_Z13) 64 .machine "z13" 65 .machinemode "zarch_nohighgprs" 66 67 tmll %r2,3 /* Test if s is 4-byte aligned? */ 68 jne .Lfallback /* And use common-code variant if not. */ 69 70 /* 71 Check if accept-string fits in one vreg: 72 ---------------------------------------- 73 */ 74 vlbb %v17,0(%r3),6 /* Load accept. */ 75 lcbb %r0,0(%r3),6 76 jo .Lcheck_onbb /* Special case if accept lays 77 on block-boundary. */ 78 79.Lcheck_notonbb: 80 lghi %r1,0 /* Zero out current len. */ 81 vlgvf %r0,%v17,0 /* Get first element. */ 82 clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ 83 84 vistrfs %v17,%v17 /* Fill with zeros after first zero. */ 85 je .Lfast /* Zero found -> accept fits in one vreg. */ 86 j .Lslow /* No zero -> accept exceeds one vreg */ 87 88 89.Lcheck_onbb: 90 /* Accept lays on block-boundary. */ 91 nill %r0,65532 /* Recognize only fully loaded characters. */ 92 je .Lcheck_onbb2 /* Reload vr, if we loaded no full wchar_t. */ 93 vfenezf %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ 94 vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ 95 clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> 96 accept fits in one vreg; 97 Fill with zeros and proceed 98 with FAST. */ 99.Lcheck_onbb2: 100 vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ 101 j .Lcheck_notonbb /* Check if accept fits in one vreg. */ 102 103 104 /* 105 Search s for accept in one vreg 106 ------------------------------- 107 */ 108.Lfast: 109 /* Complete accept-string in v17 and remaining bytes are zero. */ 110 111 vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ 112 lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ 113 114 vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any 115 in v17 or first zero element. */ 116 vlgvb %r4,%v18,7 /* Load byte index of found element. */ 117 /* If found index is within loaded bytes, return with found 118 element index (=equal count). */ 119 clrjl %r4,%r0,.Lfast_loop_found2 120 121 /* Align s to 16 byte. */ 122 risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ 123 lghi %r1,16 /* current_len = 16. */ 124 slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ 125 126.Lfast_loop: 127 vl %v16,0(%r1,%r2) /* Load search-string. */ 128 vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 equal to any 129 in v17 or first zero element. */ 130 jno .Lfast_loop_found 131 132 vl %v16,16(%r1,%r2) 133 vfaezfs %v18,%v16,%v17,0 134 jno .Lfast_loop_found16 135 136 vl %v16,32(%r1,%r2) 137 vfaezfs %v18,%v16,%v17,0 138 jno .Lfast_loop_found32 139 140 vl %v16,48(%r1,%r2) 141 vfaezfs %v18,%v16,%v17,0 142 jno .Lfast_loop_found48 143 144 aghi %r1,64 145 j .Lfast_loop /* Loop if no element was unequal to accept 146 and not zero. */ 147 148 /* Found equal or zero element. */ 149.Lfast_loop_found48: 150 aghi %r1,16 151.Lfast_loop_found32: 152 aghi %r1,16 153.Lfast_loop_found16: 154 aghi %r1,16 155.Lfast_loop_found: 156 vlgvb %r4,%v18,7 /* Load byte index of found element. */ 157.Lfast_loop_found2: 158 srlg %r5,%r4,2 /* Convert byte-index to character-index. */ 159 vlgvf %r0,%v16,0(%r5) /* Get found element. */ 160 clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ 161 algfr %r1,%r4 /* Add found index of char to current len. */ 162 la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ 163 br %r14 164 165.Lfast_end_null: 166 lghi %r2,0 /* Return null if no character is equal. */ 167 br %r14 168 169 170 171 172 /* 173 Search s for accept in multiple vregs 174 ------------------------------------- 175 */ 176.Lslow: 177 /* Save registers. */ 178 vlvgg %v30,%r6,0 179 vlvgp %v31,%r8,%r9 180 181 /* Accept in v17 without zero */ 182 vlr %v19,%v17 /* Save first acc-part for a fast reload. */ 183 vzero %v20 /* Zero for preparing acc-vector. */ 184 vone %v24 /* One for checking result of former string. */ 185 186 /* Align s to 16 byte. */ 187 risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and 188 %r4 = bits 60-63 'and' 15. */ 189 je .Lslow_loop_str /* If s is aligned, loop aligned. */ 190 lghi %r0,15 191 slr %r0,%r4 /* Compute highest index to load (15-x). */ 192 vll %v16,%r0,0(%r2) /* Load up to 16byte boundary; 193 needs highest index, left bytes are 0. */ 194 ahi %r0,1 /* Work with loaded byte count. */ 195 vzero %v21 /* Zero out global mask. */ 196 lghi %r5,0 /* Set current len of accept-string to zero. */ 197 vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ 198 lghi %r8,0 /* There is no zero in first accept-part. */ 199 vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ 200 clije %r6,0,.Lslow_end_null /* If first element is zero 201 (end of string) -> return null */ 202 clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ 203 locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ 204 j .Lslow_loop_acc 205 206 207 /* Process s in 16byte aligned loop. */ 208.Lslow_next_str: 209 /* Check results of former processed str-part. */ 210 vfeef %v18,%v21,%v24 /* Find first equal match in global mask 211 (ones in element). */ 212 vlgvb %r4,%v18,7 /* Get index of first one (=equal) 213 or 16 if no match. */ 214 /* Equal-index < min(zero-index, loaded byte count) 215 -> return pointer to equal element. */ 216 clrjl %r4,%r6,.Lslow_index_found 217 /* Zero-index < loaded byte count 218 -> former str-part was last str-part 219 -> return null */ 220 clrjl %r6,%r0,.Lslow_end_null 221 /* All elements are zero (=no match) -> proceed with next str-part. */ 222 223 vlr %v17,%v19 /* Load first part of accept (no zero). */ 224 algfr %r1,%r0 /* Add loaded byte count to current len. */ 225 226.Lslow_loop_str: 227 vl %v16,0(%r1,%r2) /* Load search-string */ 228 lghi %r0,16 /* Loaded byte count is 16. */ 229 vzero %v21 /* Zero out global mask. */ 230 lghi %r5,0 /* Set current len of accept to zero. */ 231 vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ 232 lghi %r8,0 /* There is no zero in first accept-part. */ 233 vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ 234 clije %r6,0,.Lslow_end_null /* If first element is zero 235 (end of string) -> return null. */ 236 237.Lslow_loop_acc: 238 vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> 239 Character matches any accepted character in 240 this accept-string-part) IN=0, RT=1. */ 241 vlgvf %r4,%v22,0 /* Get result of first element. */ 242 /* First element is equal to any accepted characters 243 (all other parts of accept cannot lead to a match before this one) 244 -> current len is pointing to first element 245 -> return found */ 246 clijh %r4,0,.Lslow_end_found 247 vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ 248 /* Proceed with next acc until end of acc is reached. */ 249 250 251.Lslow_next_acc: 252 clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part 253 -> add index to current len and 254 end. */ 255 vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ 256 aghi %r5,16 /* Increment current len of accept-string. */ 257 lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ 258 jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is 259 on block-boundary. */ 260.Lslow_next_acc_notonbb: 261 vistrfs %v17,%v17 /* Fill with zeros after first zero. */ 262 jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ 263 264.Lslow_next_acc_prepare_zero: 265 /* Zero in accept-part: fill zeros with first-accept-character. */ 266 vlgvf %r8,%v17,0 /* Load first element of acc-part. */ 267 clije %r8,0,.Lslow_next_str /* Proceed with next string-part, 268 If first char in this part of accept 269 is a zero. */ 270 /* r8>0 -> zero found in this acc-part. */ 271 vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ 272 vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars 273 by comparing with 0 (v20). */ 274 vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ 275 j .Lslow_loop_acc /* Accept part is prepared -> process. */ 276 277.Lslow_next_acc_onbb: 278 nill %r9,65532 /* Recognize only fully loaded characters. */ 279 je .Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t. */ 280 vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ 281 vlgvb %r8,%v18,7 /* Load byte index of zero. */ 282 clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes 283 -> Prepare vreg. */ 284.Lslow_next_acc_onbb2: 285 vl %v17,0(%r5,%r3) /* Load over boundary ... */ 286 lghi %r8,0 /* r8=0 -> no zero in this part of acc, 287 check for zero is in jump-target. */ 288 j .Lslow_next_acc_notonbb /* ... and search for zero in 289 fully loaded vreg again. */ 290 291.Lslow_end_null: 292 lghi %r1,0 /* Return null if no character is equal. */ 293 j .Lslow_end 294 295.Lslow_loop_found: 296 vlgvb %r4,%v18,7 /* Load byte index of found element. */ 297 srlg %r5,%r4,2 /* Convert byte-index to character-index. */ 298 vlgvf %r0,%v16,0(%r5) /* Get found element. */ 299 clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ 300 301.Lslow_index_found: 302 algfr %r1,%r4 /* Add found index of char to current len. */ 303.Lslow_end_found: 304 la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ 305 306.Lslow_end: 307 /* Restore registers. */ 308 vlgvg %r6,%v30,0 309 vlgvg %r8,%v31,0 310 vlgvg %r9,%v31,1 311 lgr %r2,%r1 312 br %r14 313.Lfallback: 314 jg WCSPBRK_C 315END(WCSPBRK_Z13) 316 317# if ! HAVE_WCSPBRK_IFUNC 318strong_alias (WCSPBRK_Z13, wcspbrk) 319# endif 320 321# if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT \ 322 && defined SHARED && IS_IN (libc) 323strong_alias (WCSPBRK_Z13, __GI_wcspbrk) 324# endif 325#endif 326