1/* Vector optimized 32/64 bit S/390 version of memmem. 2 Copyright (C) 2019-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <ifunc-memmem.h> 20#if HAVE_MEMMEM_ARCH13 21# include "sysdep.h" 22# include "asm-syntax.h" 23 .text 24 25/* void *memmem(const void *haystack=r2, size_t haystacklen=r3, 26 const void *needle=r4, size_t needlelen=r5); 27 Locate a substring. */ 28ENTRY(MEMMEM_ARCH13) 29 .machine "arch13" 30 .machinemode "zarch_nohighgprs" 31# if ! defined __s390x__ 32 llgfr %r3,%r3 33 llgfr %r5,%r5 34 llgfr %r4,%r4 35 llgfr %r2,%r2 36# endif /* ! defined __s390x__ */ 37 clgrjl %r3,%r5,.Lend_no_match /* Haystack < needle? */ 38 39 /* Jump to fallback if needle > 9. See also strstr-arch13.S. */ 40# if ! HAVE_MEMMEM_Z13 41# error The arch13 variant of memmem needs the z13 variant of memmem! 42# endif 43 clgfi %r5,9 44 jgh MEMMEM_Z13 45 46 aghik %r0,%r5,-1 /* vll needs highest index. */ 47 bc 4,0(%r14) /* cc==1: return if needle-len == 0. */ 48 vll %v18,%r0,0(%r4) /* Load needle. */ 49 vlvgb %v19,%r5,7 /* v19[7] contains length of needle. */ 50 51 clgijh %r3,16,.Lhaystack_larger_16 52.Lhaystack_smaller_16_on_bb: 53 aghik %r0,%r3,-1 /* vll needs highest index. */ 54 vll %v16,%r0,0(%r2) /* Load haystack. */ 55.Lhaystack_smaller_16: 56 sgr %r3,%r5 /* r3 = largest valid match-index. */ 57 jl .Lend_no_match /* Haystack-len < needle-len? */ 58 vstrs %v20,%v16,%v18,%v19,0,0 59 /* Vector string search without zero search where v20 will contain 60 the index of a partial/full match or 16 (index is named k). 61 cc=0 (no match; k=16): .Lend_no_match 62 cc=1 (only available with zero-search): Ignore 63 cc=2 (full match; k<16): Needle found, but could be beyond haystack! 64 cc=3 (partial match; k<16): Always at end of v16 and thus beyond! */ 65 brc 9,.Lend_no_match /* Jump away if cc == 0 || cc == 3. */ 66 vlgvb %r1,%v20,7 67 /* Verify that the full-match (cc=2) is valid! */ 68 clgrjh %r1,%r3,.Lend_no_match /* Jump away if match is beyond. */ 69 la %r2,0(%r1,%r2) 70 br %r14 71.Lend_no_match: 72 lghi %r2,0 73 br %r14 74 75.Lhaystack_larger_16: 76 vl %v16,0(%r2) 77 lghi %r1,17 78 lay %r4,-16(%r3,%r2) /* Boundary for loading with vl. */ 79 lay %r0,-64(%r3,%r2) /* Boundary for loading with 4xvl. */ 80 /* See also strstr-arch13.S: 81 min-skip-partial-match-index = (16 - n_len) + 1 */ 82 sgr %r1,%r5 83 clgfi %r3,64 /* Set Boundary to zero ... */ 84 la %r3,0(%r3,%r2) 85 locghil %r0,0 /* ... if haystack < 64bytes. */ 86 jh .Lloop64 87.Lloop: 88 la %r2,16(%r2) 89 /* Vector string search with zero search. cc=0 => no match. */ 90 vstrs %v20,%v16,%v18,%v19,0,0 91 jne .Lloop_vstrs_nonzero_cc 92 clgrjh %r2,%r4,.Lhaystack_too_small 93.Lloop16: 94 vl %v16,0(%r2) 95 la %r2,16(%r2) 96 vstrs %v20,%v16,%v18,%v19,0,0 97 jne .Lloop_vstrs_nonzero_cc 98 clgrjle %r2,%r4,.Lloop16 99.Lhaystack_too_small: 100 sgr %r3,%r2 /* r3 = (haystack + len) - curr_pos */ 101 je .Lend_no_match /* Remaining haystack is empty. */ 102 lcbb %r0,0(%r2),6 103 jo .Lhaystack_smaller_16_on_bb 104 vl %v16,0(%r2) /* Load haystack. */ 105 j .Lhaystack_smaller_16 106 107.Lend_match_found: 108 vlgvb %r4,%v20,7 109 sgr %r2,%r1 110 la %r2,0(%r4,%r2) 111 br %r14 112 113.Lloop_vstrs_nonzero_cc32: 114 la %r2,16(%r2) 115.Lloop_vstrs_nonzero_cc16: 116 la %r2,16(%r2) 117.Lloop_vstrs_nonzero_cc0: 118 la %r2,16(%r2) 119.Lloop_vstrs_nonzero_cc: 120 lay %r2,-16(%r1,%r2) /* Compute next load address. */ 121 jh .Lend_match_found /* cc == 2 (full match) */ 122 clgrjh %r2,%r4,.Lhaystack_too_small 123 vl %v16,0(%r2) 124.Lloop_vstrs_nonzero_cc_loop: 125 la %r2,0(%r1,%r2) 126 vstrs %v20,%v16,%v18,%v19,0,0 127 jh .Lend_match_found 128 clgrjh %r2,%r4,.Lhaystack_too_small 129 vl %v16,0(%r2) /* Next part of haystack. */ 130 jo .Lloop_vstrs_nonzero_cc_loop 131 /* Case: no-match. */ 132 clgrjh %r2,%r0,.Lloop /* Jump away if haystack has less than 64b. */ 133.Lloop64: 134 vstrs %v20,%v16,%v18,%v19,0,0 135 jne .Lloop_vstrs_nonzero_cc0 136 vl %v16,16(%r2) /* Next part of haystack. */ 137 vstrs %v20,%v16,%v18,%v19,0,0 138 jne .Lloop_vstrs_nonzero_cc16 139 vl %v16,32(%r2) /* Next part of haystack. */ 140 vstrs %v20,%v16,%v18,%v19,0,0 141 jne .Lloop_vstrs_nonzero_cc32 142 vl %v16,48(%r2) /* Next part of haystack. */ 143 la %r2,64(%r2) 144 vstrs %v20,%v16,%v18,%v19,0,0 145 jne .Lloop_vstrs_nonzero_cc 146 clgrjh %r2,%r4,.Lhaystack_too_small 147 vl %v16,0(%r2) /* Next part of haystack. */ 148 clgrjle %r2,%r0,.Lloop64 149 j .Lloop 150END(MEMMEM_ARCH13) 151 152# if ! HAVE_MEMMEM_IFUNC 153strong_alias (MEMMEM_ARCH13, __memmem) 154weak_alias (__memmem, memmem) 155# endif 156 157# if MEMMEM_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc) 158weak_alias (MEMMEM_ARCH13, __GI_memmem) 159strong_alias (MEMMEM_ARCH13, __GI___memmem) 160# endif 161#endif 162