1/* strcmp.S
2 * Copyright (C) 2003-2007 Analog Devices Inc., All Rights Reserved.
3 *
4 * This file is subject to the terms and conditions of the GNU Library General
5 * Public License. See the file "COPYING.LIB" in the main directory of this
6 * archive for more details.
7 *
8 * Non-LGPL License also available as part of VisualDSP++
9 * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
10 */
11
12#include <sysdep.h>
13
14/* Fast strcmp() for Blackfin.
15 * When both strings are aligned, this processes four characters at
16 * a time. Uses a hw loop with "very big" count to loop "forever",
17 * until difference or a terminating zero is found.
18 * Once the end-case word has been identified, breaks out of the
19 * loop to check more carefully (same as the unaligned case).
20 */
21
22.text
23
24.align 2
25
26.weak _strcmp
27ENTRY(_strcmp)
28	[--sp] = (R7:4);
29	p1 = r0;
30	p2 = r1;
31
32	p0 = -1;	/* (need for loop counter init) */
33
34	  /* check if byte aligned */
35	r0 = r0 | r1;	/* check both pointers at same time */
36	r0 <<= 30;	/* dump all but last 2 bits */
37	cc = az;	/* are they zero? */
38	if !cc jump .Lunaligned;	/* no; use unaligned code. */
39			/* fall-thru for aligned case.. */
40
41	  /* note that r0 is zero from the previous... */
42	  /*           p0 set to -1 */
43
44	LSETUP (.Lbeginloop, .Lendloop) lc0=p0;
45	  /* pick up first words */
46	r1 = [p1++];
47	r2 = [p2++];
48	  /* make up mask:  0FF0FF */
49	r7 = 0xFF;
50	r7.h = 0xFF;
51		/* loop : 9 cycles to check 4 characters */
52	cc = r1 == r2;
53.Lbeginloop:
54	if !cc jump .Lnotequal4;	/* compare failure, exit loop */
55
56	  /* starting with   44332211 */
57	  /* see if char 3 or char 1 is 0 */
58	r3 = r1 & r7;		/* form 00330011 */
59	  /* add to zero, and (r2 is free, reload) */
60	r6 = r3 +|+ r0 || r2 = [p2++] || nop;
61	cc = az;	/* true if either is zero */
62	r3 = r1 ^ r3;	        /* form 44002200 (4321^0301 => 4020) */
63				/* (trick, saves having another mask) */
64	/* add to zero,  and  (r1 is free, reload) */
65	r6 = r3 +|+ r0 || r1 = [p1++] || nop;
66	cc |= az;	/* true if either is zero */
67	if cc jump .Lzero4;	/* leave if a zero somewhere */
68.Lendloop:
69	cc = r1 == r2;
70
71 /* loop exits */
72.Lnotequal4:		/* compare failure on 4-char compare */
73			/* address pointers are one word ahead; */
74			/* faster to use zero4 exit code */
75	p1 += 4;
76	p2 += 4;
77
78.Lzero4:			/* one of the bytes in word 1 is zero */
79			/* but we've already fetched the next word; so */
80			/* backup two to look at failing word again */
81	p1 += -8;
82	p2 += -8;
83
84
85
86		/* here when pointers are unaligned: checks one */
87		/* character at a time.  Also use at the end of */
88		/* the word-check algorithm to figure out what happened */
89.Lunaligned:
90	  /*	R0 is non-zero from before. */
91	  /*           p0 set to -1 */
92
93	r0 = 0 (Z);
94	r1 = B[p1++] (Z);
95	r2 = B[p2++] (Z);
96	LSETUP (.Lbeginloop1, .Lendloop1) lc0=p0;
97
98.Lbeginloop1:
99	cc = r1;	/* first char must be non-zero */
100	/* chars must be the same */
101	r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop;
102	cc &= az;
103	r3 = r0 - r2;	/* second char must be non-zero */
104	cc &= an;
105	if !cc jump .Lexitloop1;
106.Lendloop1:
107	r2 = B[p2++] (Z);
108
109.Lexitloop1: /* here means we found a zero or a difference. */
110	   /* we have r2(N), p2(N), r1(N+1), p1(N+2) */
111	r1=B[p1+ -2] (Z);
112	r0 = r1 - r2;
113	(r7:4) = [sp++];
114	rts;
115.size _strcmp,.-_strcmp
116
117libc_hidden_def (strcmp)
118
119#ifndef __UCLIBC_HAS_LOCALE__
120weak_alias (strcmp,strcoll)
121libc_hidden_def (strcoll)
122#endif
123