1/* Vector optimized 32/64 bit S/390 version of strrchr.
2   Copyright (C) 2015-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <ifunc-strrchr.h>
20
21#if HAVE_STRRCHR_Z13
22
23# include "sysdep.h"
24# include "asm-syntax.h"
25
26	.text
27
28/* char *strrchr (const char *s, int c)
29   Locate the last character c in string.
30
31   Register usage:
32   -r0=loaded bytes in first part of s.
33   -r1=pointer to last occurence of c or NULL if not found.
34   -r2=s
35   -r3=c
36   -r4=tmp
37   -r5=current_len
38   -v16=part of s
39   -v17=index of found element
40   -v18=replicated c
41   -v19=part of s with last occurence of c.
42   -v20=permute pattern
43*/
44ENTRY(STRRCHR_Z13)
45	.machine "z13"
46	.machinemode "zarch_nohighgprs"
47
48	vlbb	%v16,0(%r2),6	/* Load s until next 4k-byte boundary.  */
49	lcbb	%r0,0(%r2),6	/* Get bytes to 4k-byte boundary or 16.  */
50
51	vlvgb	%v18,%r3,0	/* Generate vector which elements are all c.
52				   if c > 255, c will be truncated.  */
53	vrepb	%v18,%v18,0
54
55	lghi	%r1,-1		/* Currently no c found.  */
56	lghi	%r5,0		/* current_len = 0.  */
57
58	vfeezbs	%v17,%v16,%v18	/* Find element equal or zero.  */
59	vlgvb	%r4,%v17,7	/* Load byte index of c/zero or 16.  */
60	clrjl	%r4,%r0,.Lfound_first_part /* Found c/zero in loaded bytes.  */
61.Lalign:
62	/* Align s to 16 byte.  */
63	risbgn	%r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15.  */
64	lghi	%r5,16		/* current_len = 16.  */
65	slr	%r5,%r4		/* Compute bytes to 16bytes boundary.  */
66
67.Lloop:
68	vl	%v16,0(%r5,%r2) /* Load s.  */
69	vfeezbs	%v17,%v16,%v18	/* Find element equal with zero search.  */
70	jno	.Lfound		/* Found c/zero (cc=0|1|2).  */
71	vl	%v16,16(%r5,%r2)
72	vfeezbs	%v17,%v16,%v18
73	jno	.Lfound16
74	vl	%v16,32(%r5,%r2)
75	vfeezbs	%v17,%v16,%v18
76	jno	.Lfound32
77	vl	%v16,48(%r5,%r2)
78	vfeezbs	%v17,%v16,%v18
79	jno	.Lfound48
80
81	aghi	%r5,64
82	j	.Lloop		/* No character and no zero -> loop.  */
83
84.Lfound48:
85	la	%r5,16(%r5)	/* Use la since aghi would clobber cc.  */
86.Lfound32:
87	la	%r5,16(%r5)
88.Lfound16:
89	la	%r5,16(%r5)
90.Lfound:
91	je	.Lzero		/* Found zero, but no c before that zero.  */
92	/* Save this part of s to check for further matches after reaching
93	   the end of the complete string.  */
94	vlr	%v19,%v16
95	lgr	%r1,%r5
96
97	jh	.Lzero		/* Found a zero after the found c.  */
98	aghi	%r5,16		/* Start search of next part of s.  */
99	j	.Lloop
100
101.Lfound_first_part:
102	/* This code is only executed if the found c/zero is whithin loaded
103	   bytes. If no c/zero was found (cc==3) the found index = 16, thus
104	   this code is not called.
105	   Resulting condition code of vector find element equal:
106	   cc==0: no c, found zero
107	   cc==1: c found, no zero
108	   cc==2: c found, found zero after c
109	   cc==3: no c, no zero (this case can be ignored).  */
110	je	.Lzero		/* Found zero, but no c before that zero.  */
111
112	locgrne	%r1,%r5		/* Mark c as found in first part of s.  */
113	vlr	%v19,%v16
114
115	jl	.Lalign		/* No zero (e.g. if vr was fully loaded)
116				   -> Align and loop afterwards.  */
117
118	/* Found a zero in vr. If vr was not fully loaded due to block
119	   boundary, the remaining bytes are filled with zero and we can't
120	   rely on zero indication of condition code here!  */
121
122	vfenezb	%v17,%v16,%v16	/* Find zero.  */
123	vlgvb	%r4,%v17,7	/* Load byte index of zero or 16.  */
124	clrjl	%r4,%r0,.Lzero	/* Zero within loaded bytes -> end.  */
125	j	.Lalign		/* Align and loop afterwards.  */
126
127.Lend_searched_zero:
128	vlgvb	%r4,%v17,7	/* Load byte index of zero.  */
129	algr	%r5,%r4
130	la	%r2,0(%r5,%r2)	/* Return pointer to zero.  */
131	br	%r14
132
133.Lzero:
134	/* Reached end of string. Check if one c was found before.  */
135	clije	%r3,0,.Lend_searched_zero /* Found zero and c is zero.  */
136
137	cgfi	%r1,-1		/* No c found -> return NULL.  */
138	locghie	%r2,0
139	ber	%r14
140
141	larl	%r3,.Lpermute_mask /* Load permute mask.  */
142	vl	%v20,0(%r3)
143
144	/* c was found and is part of v19.  */
145	vfenezb	%v17,%v19,%v19	/* Find zero.  */
146	vlgvb	%r4,%v17,7	/* Load byte index of zero or 16.  */
147
148	clgfi	%r5,0		/* Loaded byte count in v19 is 16, ...  */
149	lochine	%r0,16		/* ... if v19 is not the first part of s.  */
150	ahi	%r0,-1		/* Convert byte count to highest index.  */
151
152	clr	%r0,%r4
153	locrl	%r4,%r0		/* r4 = min (zero-index, highest-index).  */
154
155	/* Right-shift of v19 to mask bytes after zero.  */
156	clije	%r4,15,.Lzero_permute /* No shift is needed if highest index
157					 in vr is 15.  */
158	lhi	%r0,15
159	slr	%r0,%r4		/* Compute byte count for vector shift right.  */
160	sll	%r0,3		/* Convert to bit count.  */
161	vlvgb	%v17,%r0,7
162	vsrlb	%v19,%v19,%v17	/* Vector shift right by byte by number of bytes
163				   specified in bits 1-4 of byte 7 in v17.   */
164
165	/* Reverse bytes in v19.  */
166.Lzero_permute:
167	vperm	%v19,%v19,%v19,%v20 /* Permute v19 to reversed order.  */
168
169	/* Find c in reversed v19.  */
170	vfeeb	%v19,%v19,%v18	/* Find c.  */
171	la	%r2,0(%r1,%r2)
172	vlgvb	%r3,%v19,7	/* Load byte index of c.  */
173
174	/* Compute index in real s and return.  */
175	slgr	%r4,%r3
176	la	%r2,0(%r4,%r2)	/* Return pointer to zero.  */
177	br	%r14
178.Lpermute_mask:
179	.byte	0x0F,0x0E,0x0D,0x0C,0x0B,0x0A,0x09,0x08
180	.byte	0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00
181END(STRRCHR_Z13)
182
183# if ! HAVE_STRRCHR_IFUNC
184strong_alias (STRRCHR_Z13, strrchr)
185weak_alias (strrchr, rindex)
186# endif
187
188# if ! HAVE_STRRCHR_C && defined SHARED && IS_IN (libc)
189strong_alias (STRRCHR_Z13, __GI_strrchr)
190# endif
191
192#endif /* HAVE_STRRCHR_Z13  */
193