1 /* Mapping tables from GBK to GB2312 and vice versa.
2    Copyright (C) 1999-2021 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <dlfcn.h>
20 #include <gconv.h>
21 #include <stdint.h>
22 
23 
24 /* Definitions used in the body of the `gconv' function.  */
25 #define CHARSET_NAME		"GBK//"
26 #define FROM_LOOP		from_gbk_to_gb
27 #define TO_LOOP			from_gb_to_gbk
28 #define DEFINE_INIT		1
29 #define DEFINE_FINI		1
30 #define MIN_NEEDED_FROM		1
31 #define MAX_NEEDED_FROM		2
32 #define MIN_NEEDED_TO		1
33 #define MAX_NEEDED_TO		2
34 #define ONE_DIRECTION		0
35 
36 
37 /* First define the conversion function from GBK to GB2312.  */
38 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
39 #define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
40 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
41 #define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
42 #define LOOPFCT			FROM_LOOP
43 #define BODY \
44   {									      \
45     uint32_t ch = *inptr;						      \
46 									      \
47     if (ch <= 0x7f)							      \
48       *outptr++ = *inptr++;						      \
49     else								      \
50       {									      \
51 	/* It's a two-byte sequence.  We have to mask out all the sequences   \
52 	   which are not in GB2312.  Besides all of them in the range	      \
53 	   0x8140 to 0xA0FE this also includes in the remaining range the     \
54 	   sequences which the second byte being in the range from 0x40 to    \
55 	   0xA0 and the following exceptions:				      \
56 									      \
57 	     0xA2A1 to 0xA2A9,						      \
58 	     0xA2AA,							      \
59 	     0xA6E0 to 0xA6EB,						      \
60 	     0xA6EE to 0xA6F2,						      \
61 	     0xA6F4, 0xA6F5,						      \
62 	     0xA8BB to 0xA8C0						      \
63 									      \
64 	   All these characters are not defined in GB2312.  Besides this      \
65 	   there is an incomatibility in the mapping.  The Unicode tables     \
66 	   say that 0xA1A4 maps in GB2312 to U30FB while in GBK it maps to    \
67 	   U00B7.  Similarly, 0xA1AA maps in GB2312 to U2015 while in GBK     \
68 	   it maps to U2014.  Since we are free to do whatever we want if     \
69 	   a mapping is not available we will not flag this as an error	      \
70 	   but instead map the two positions.  But this means that the	      \
71 	   mapping							      \
72 									      \
73 		UCS4 -> GB2312 -> GBK -> UCS4				      \
74 									      \
75 	   might not produce identical text.  */			      \
76 	if (__glibc_unlikely (inptr + 1 >= inend))			      \
77 	  {								      \
78 	    /* The second character is not available.  Store		      \
79 	       the intermediate result.  */				      \
80 	    result = __GCONV_INCOMPLETE_INPUT;				      \
81 	    break;							      \
82 	  }								      \
83 									      \
84 	if (__glibc_unlikely (outend - outptr < 2))			      \
85 	  {								      \
86 	    /* We ran out of space.  */					      \
87 	    result = __GCONV_FULL_OUTPUT;				      \
88 	    break;							      \
89 	  }								      \
90 									      \
91 	ch = (ch << 8) | inptr[1];					      \
92 									      \
93 	/* Map 0xA844 (U2015 in GBK) to 0xA1AA (U2015 in GB2312).  */	      \
94 	if (__glibc_unlikely (ch == 0xa844))				      \
95 	  ch = 0xa1aa;							      \
96 									      \
97 	/* Now determine whether the character is valid.  */		      \
98 	if (__builtin_expect (ch < 0xa1a1, 0)				      \
99 	    || __builtin_expect (ch > 0xf7fe, 0)			      \
100 	    || __builtin_expect (inptr[1] < 0xa1, 0)			      \
101 	    /* Now test the exceptions.  */				      \
102 	    || (__builtin_expect (ch >= 0xa2a1, 0)			      \
103 		&& __builtin_expect (ch <= 0xa2aa, 0))			      \
104 	    || (__builtin_expect (ch >= 0xa6e0, 0)			      \
105 		&& __builtin_expect (ch <= 0xa6f5, 0))			      \
106 	    || (__builtin_expect (ch >= 0xa8bb, 0)			      \
107 		&& __builtin_expect (ch <= 0xa8c0, 0)))			      \
108 	  {								      \
109 	    /* One of the characters we cannot map.  */			      \
110 	    STANDARD_TO_LOOP_ERR_HANDLER (2);				      \
111 	  }								      \
112 									      \
113 	/* Copy the two bytes.  */					      \
114 	*outptr++ = *inptr++;						      \
115 	*outptr++ = *inptr++;						      \
116       }									      \
117   }
118 #define LOOP_NEED_FLAGS
119 #include <iconv/loop.c>
120 
121 
122 /* Next, define the other direction.  */
123 #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
124 #define MAX_NEEDED_INPUT	MAX_NEEDED_TO
125 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
126 #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
127 #define LOOPFCT			TO_LOOP
128 #define BODY \
129   {									      \
130     /* We don't have to care about characters we cannot map.  The only	      \
131        problem are the mapping of 0xA1A4 and 0xA1AA but as explained above    \
132        we do not do anything special here.  */				      \
133     unsigned char ch = *inptr++;					      \
134 									      \
135     if (ch > 0x7f)							      \
136       {									      \
137 	if (__glibc_unlikely (inptr + 1 >= inend))			      \
138 	  {								      \
139 	    /* The second character is not available.  Store		      \
140 		 the intermediate result.  */				      \
141 	    result = __GCONV_INCOMPLETE_INPUT;				      \
142 	    break;							      \
143 	  }								      \
144 									      \
145 	if (__glibc_unlikely (outend - outptr < 2))			      \
146 	  {								      \
147 	    /* We ran out of space.  */					      \
148 	    result = __GCONV_FULL_OUTPUT;				      \
149 	    break;							      \
150 	  }								      \
151 									      \
152 	*outptr++ = ch;							      \
153 	ch = *inptr++;							      \
154       }									      \
155     *outptr++ = ch;							      \
156   }
157 #include <iconv/loop.c>
158 
159 
160 /* Now define the toplevel functions.  */
161 #include <iconv/skeleton.c>
162