1/* strcat(dest, src) -- Append SRC on the end of DEST. 2 Optimized for x86-64. 3 Copyright (C) 2002-2021 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21#include "asm-syntax.h" 22 23/* Will be removed when new strcpy implementation gets merged. */ 24 25 .text 26ENTRY (strcat) 27 movq %rdi, %rcx /* Dest. register. */ 28 andl $7, %ecx /* mask alignment bits */ 29 movq %rdi, %rax /* Duplicate destination pointer. */ 30 movq $0xfefefefefefefeff,%r8 31 32 /* First step: Find end of destination. */ 33 jz 4f /* aligned => start loop */ 34 35 neg %ecx /* We need to align to 8 bytes. */ 36 addl $8,%ecx 37 /* Search the first bytes directly. */ 380: cmpb $0x0,(%rax) /* is byte NUL? */ 39 je 2f /* yes => start copy */ 40 incq %rax /* increment pointer */ 41 decl %ecx 42 jnz 0b 43 44 45 46 /* Now the source is aligned. Scan for NUL byte. */ 47 .p2align 4 484: 49 /* First unroll. */ 50 movq (%rax), %rcx /* get double word (= 8 bytes) in question */ 51 addq $8,%rax /* adjust pointer for next word */ 52 movq %r8, %rdx /* magic value */ 53 addq %rcx, %rdx /* add the magic value to the word. We get 54 carry bits reported for each byte which 55 is *not* 0 */ 56 jnc 3f /* highest byte is NUL => return pointer */ 57 xorq %rcx, %rdx /* (word+magic)^word */ 58 orq %r8, %rdx /* set all non-carry bits */ 59 incq %rdx /* add 1: if one carry bit was *not* set 60 the addition will not result in 0. */ 61 jnz 3f /* found NUL => return pointer */ 62 63 /* Second unroll. */ 64 movq (%rax), %rcx /* get double word (= 8 bytes) in question */ 65 addq $8,%rax /* adjust pointer for next word */ 66 movq %r8, %rdx /* magic value */ 67 addq %rcx, %rdx /* add the magic value to the word. We get 68 carry bits reported for each byte which 69 is *not* 0 */ 70 jnc 3f /* highest byte is NUL => return pointer */ 71 xorq %rcx, %rdx /* (word+magic)^word */ 72 orq %r8, %rdx /* set all non-carry bits */ 73 incq %rdx /* add 1: if one carry bit was *not* set 74 the addition will not result in 0. */ 75 jnz 3f /* found NUL => return pointer */ 76 77 /* Third unroll. */ 78 movq (%rax), %rcx /* get double word (= 8 bytes) in question */ 79 addq $8,%rax /* adjust pointer for next word */ 80 movq %r8, %rdx /* magic value */ 81 addq %rcx, %rdx /* add the magic value to the word. We get 82 carry bits reported for each byte which 83 is *not* 0 */ 84 jnc 3f /* highest byte is NUL => return pointer */ 85 xorq %rcx, %rdx /* (word+magic)^word */ 86 orq %r8, %rdx /* set all non-carry bits */ 87 incq %rdx /* add 1: if one carry bit was *not* set 88 the addition will not result in 0. */ 89 jnz 3f /* found NUL => return pointer */ 90 91 /* Fourth unroll. */ 92 movq (%rax), %rcx /* get double word (= 8 bytes) in question */ 93 addq $8,%rax /* adjust pointer for next word */ 94 movq %r8, %rdx /* magic value */ 95 addq %rcx, %rdx /* add the magic value to the word. We get 96 carry bits reported for each byte which 97 is *not* 0 */ 98 jnc 3f /* highest byte is NUL => return pointer */ 99 xorq %rcx, %rdx /* (word+magic)^word */ 100 orq %r8, %rdx /* set all non-carry bits */ 101 incq %rdx /* add 1: if one carry bit was *not* set 102 the addition will not result in 0. */ 103 jz 4b /* no NUL found => continue loop */ 104 105 .p2align 4 /* Align, it's a jump target. */ 1063: subq $8,%rax /* correct pointer increment. */ 107 108 testb %cl, %cl /* is first byte NUL? */ 109 jz 2f /* yes => return */ 110 incq %rax /* increment pointer */ 111 112 testb %ch, %ch /* is second byte NUL? */ 113 jz 2f /* yes => return */ 114 incq %rax /* increment pointer */ 115 116 testl $0x00ff0000, %ecx /* is third byte NUL? */ 117 jz 2f /* yes => return pointer */ 118 incq %rax /* increment pointer */ 119 120 testl $0xff000000, %ecx /* is fourth byte NUL? */ 121 jz 2f /* yes => return pointer */ 122 incq %rax /* increment pointer */ 123 124 shrq $32, %rcx /* look at other half. */ 125 126 testb %cl, %cl /* is first byte NUL? */ 127 jz 2f /* yes => return */ 128 incq %rax /* increment pointer */ 129 130 testb %ch, %ch /* is second byte NUL? */ 131 jz 2f /* yes => return */ 132 incq %rax /* increment pointer */ 133 134 testl $0xff0000, %ecx /* is third byte NUL? */ 135 jz 2f /* yes => return pointer */ 136 incq %rax /* increment pointer */ 137 1382: 139 /* Second step: Copy source to destination. */ 140 141 movq %rsi, %rcx /* duplicate */ 142 andl $7,%ecx /* mask alignment bits */ 143 movq %rax, %rdx /* move around */ 144 jz 22f /* aligned => start loop */ 145 146 neg %ecx /* align to 8 bytes. */ 147 addl $8, %ecx 148 /* Align the source pointer. */ 14921: 150 movb (%rsi), %al /* Fetch a byte */ 151 testb %al, %al /* Is it NUL? */ 152 movb %al, (%rdx) /* Store it */ 153 jz 24f /* If it was NUL, done! */ 154 incq %rsi 155 incq %rdx 156 decl %ecx 157 jnz 21b 158 159 /* Now the sources is aligned. Unfortunatly we cannot force 160 to have both source and destination aligned, so ignore the 161 alignment of the destination. */ 162 .p2align 4 16322: 164 /* 1st unroll. */ 165 movq (%rsi), %rax /* Read double word (8 bytes). */ 166 addq $8, %rsi /* Adjust pointer for next word. */ 167 movq %rax, %r9 /* Save a copy for NUL finding. */ 168 addq %r8, %r9 /* add the magic value to the word. We get 169 carry bits reported for each byte which 170 is *not* 0 */ 171 jnc 23f /* highest byte is NUL => return pointer */ 172 xorq %rax, %r9 /* (word+magic)^word */ 173 orq %r8, %r9 /* set all non-carry bits */ 174 incq %r9 /* add 1: if one carry bit was *not* set 175 the addition will not result in 0. */ 176 177 jnz 23f /* found NUL => return pointer */ 178 179 movq %rax, (%rdx) /* Write value to destination. */ 180 addq $8, %rdx /* Adjust pointer. */ 181 182 /* 2nd unroll. */ 183 movq (%rsi), %rax /* Read double word (8 bytes). */ 184 addq $8, %rsi /* Adjust pointer for next word. */ 185 movq %rax, %r9 /* Save a copy for NUL finding. */ 186 addq %r8, %r9 /* add the magic value to the word. We get 187 carry bits reported for each byte which 188 is *not* 0 */ 189 jnc 23f /* highest byte is NUL => return pointer */ 190 xorq %rax, %r9 /* (word+magic)^word */ 191 orq %r8, %r9 /* set all non-carry bits */ 192 incq %r9 /* add 1: if one carry bit was *not* set 193 the addition will not result in 0. */ 194 195 jnz 23f /* found NUL => return pointer */ 196 197 movq %rax, (%rdx) /* Write value to destination. */ 198 addq $8, %rdx /* Adjust pointer. */ 199 200 /* 3rd unroll. */ 201 movq (%rsi), %rax /* Read double word (8 bytes). */ 202 addq $8, %rsi /* Adjust pointer for next word. */ 203 movq %rax, %r9 /* Save a copy for NUL finding. */ 204 addq %r8, %r9 /* add the magic value to the word. We get 205 carry bits reported for each byte which 206 is *not* 0 */ 207 jnc 23f /* highest byte is NUL => return pointer */ 208 xorq %rax, %r9 /* (word+magic)^word */ 209 orq %r8, %r9 /* set all non-carry bits */ 210 incq %r9 /* add 1: if one carry bit was *not* set 211 the addition will not result in 0. */ 212 213 jnz 23f /* found NUL => return pointer */ 214 215 movq %rax, (%rdx) /* Write value to destination. */ 216 addq $8, %rdx /* Adjust pointer. */ 217 218 /* 4th unroll. */ 219 movq (%rsi), %rax /* Read double word (8 bytes). */ 220 addq $8, %rsi /* Adjust pointer for next word. */ 221 movq %rax, %r9 /* Save a copy for NUL finding. */ 222 addq %r8, %r9 /* add the magic value to the word. We get 223 carry bits reported for each byte which 224 is *not* 0 */ 225 jnc 23f /* highest byte is NUL => return pointer */ 226 xorq %rax, %r9 /* (word+magic)^word */ 227 orq %r8, %r9 /* set all non-carry bits */ 228 incq %r9 /* add 1: if one carry bit was *not* set 229 the addition will not result in 0. */ 230 231 jnz 23f /* found NUL => return pointer */ 232 233 movq %rax, (%rdx) /* Write value to destination. */ 234 addq $8, %rdx /* Adjust pointer. */ 235 jmp 22b /* Next iteration. */ 236 237 /* Do the last few bytes. %rax contains the value to write. 238 The loop is unrolled twice. */ 239 .p2align 4 24023: 241 movb %al, (%rdx) /* 1st byte. */ 242 testb %al, %al /* Is it NUL. */ 243 jz 24f /* yes, finish. */ 244 incq %rdx /* Increment destination. */ 245 movb %ah, (%rdx) /* 2nd byte. */ 246 testb %ah, %ah /* Is it NUL?. */ 247 jz 24f /* yes, finish. */ 248 incq %rdx /* Increment destination. */ 249 shrq $16, %rax /* Shift... */ 250 jmp 23b /* and look at next two bytes in %rax. */ 251 252 25324: 254 movq %rdi, %rax /* Source is return value. */ 255 retq 256END (strcat) 257libc_hidden_builtin_def (strcat) 258