1 /* Copyright (C) 1996-2021 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published
6    by the Free Software Foundation; version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
16 
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <libintl.h>
25 #include <stdarg.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stdint.h>
29 
30 #include "localedef.h"
31 #include "charmap.h"
32 #include "error.h"
33 #include "linereader.h"
34 #include "locfile.h"
35 
36 /* Prototypes for local functions.  */
37 static struct token *get_toplvl_escape (struct linereader *lr);
38 static struct token *get_symname (struct linereader *lr);
39 static struct token *get_ident (struct linereader *lr);
40 static struct token *get_string (struct linereader *lr,
41 				 const struct charmap_t *charmap,
42 				 struct localedef_t *locale,
43 				 const struct repertoire_t *repertoire,
44 				 int verbose);
45 
46 
47 struct linereader *
lr_open(const char * fname,kw_hash_fct_t hf)48 lr_open (const char *fname, kw_hash_fct_t hf)
49 {
50   FILE *fp;
51 
52   if (fname == NULL || strcmp (fname, "-") == 0
53       || strcmp (fname, "/dev/stdin") == 0)
54     return lr_create (stdin, "<stdin>", hf);
55   else
56     {
57       fp = fopen (fname, "rm");
58       if (fp == NULL)
59 	return NULL;
60       return lr_create (fp, fname, hf);
61     }
62 }
63 
64 struct linereader *
lr_create(FILE * fp,const char * fname,kw_hash_fct_t hf)65 lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
66 {
67   struct linereader *result;
68   int n;
69 
70   result = (struct linereader *) xmalloc (sizeof (*result));
71 
72   result->fp = fp;
73   result->fname = xstrdup (fname);
74   result->buf = NULL;
75   result->bufsize = 0;
76   result->lineno = 1;
77   result->idx = 0;
78   result->comment_char = '#';
79   result->escape_char = '\\';
80   result->translate_strings = 1;
81   result->return_widestr = 0;
82 
83   n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
84   if (n < 0)
85     {
86       int save = errno;
87       fclose (result->fp);
88       free ((char *) result->fname);
89       free (result);
90       errno = save;
91       return NULL;
92     }
93 
94   if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
95     n -= 2;
96 
97   result->buf[n] = '\0';
98   result->bufact = n;
99   result->hash_fct = hf;
100 
101   return result;
102 }
103 
104 
105 int
lr_eof(struct linereader * lr)106 lr_eof (struct linereader *lr)
107 {
108   return lr->bufact = 0;
109 }
110 
111 
112 void
lr_ignore_rest(struct linereader * lr,int verbose)113 lr_ignore_rest (struct linereader *lr, int verbose)
114 {
115   if (verbose)
116     {
117       while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
118 	     && lr->buf[lr->idx] != lr->comment_char)
119 	if (lr->buf[lr->idx] == '\0')
120 	  {
121 	    if (lr_next (lr) < 0)
122 	      return;
123 	  }
124 	else
125 	  ++lr->idx;
126 
127       if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
128 	  && lr->buf[lr->idx] != lr->comment_char)
129 	lr_error (lr, _("trailing garbage at end of line"));
130     }
131 
132   /* Ignore continued line.  */
133   while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
134     if (lr_next (lr) < 0)
135       break;
136 
137   lr->idx = lr->bufact;
138 }
139 
140 
141 void
lr_close(struct linereader * lr)142 lr_close (struct linereader *lr)
143 {
144   fclose (lr->fp);
145   free (lr->buf);
146   free (lr);
147 }
148 
149 
150 int
lr_next(struct linereader * lr)151 lr_next (struct linereader *lr)
152 {
153   int n;
154 
155   n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
156   if (n < 0)
157     return -1;
158 
159   ++lr->lineno;
160 
161   if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
162     {
163 #if 0
164       /* XXX Is this correct?  */
165       /* An escaped newline character is substituted with a single <SP>.  */
166       --n;
167       lr->buf[n - 1] = ' ';
168 #else
169       n -= 2;
170 #endif
171     }
172 
173   lr->buf[n] = '\0';
174   lr->bufact = n;
175   lr->idx = 0;
176 
177   return 0;
178 }
179 
180 
181 /* Defined in error.c.  */
182 /* This variable is incremented each time `error' is called.  */
183 extern unsigned int error_message_count;
184 
185 /* The calling program should define program_name and set it to the
186    name of the executing program.  */
187 extern char *program_name;
188 
189 
190 struct token *
lr_token(struct linereader * lr,const struct charmap_t * charmap,struct localedef_t * locale,const struct repertoire_t * repertoire,int verbose)191 lr_token (struct linereader *lr, const struct charmap_t *charmap,
192 	  struct localedef_t *locale, const struct repertoire_t *repertoire,
193 	  int verbose)
194 {
195   int ch;
196 
197   while (1)
198     {
199       do
200 	{
201 	  ch = lr_getc (lr);
202 
203 	  if (ch == EOF)
204 	    {
205 	      lr->token.tok = tok_eof;
206 	      return &lr->token;
207 	    };
208 
209 	  if (ch == '\n')
210 	    {
211 	      lr->token.tok = tok_eol;
212 	      return &lr->token;
213 	    }
214 	}
215       while (isspace (ch));
216 
217       if (ch != lr->comment_char)
218 	break;
219 
220       /* Is there an newline at the end of the buffer?  */
221       if (lr->buf[lr->bufact - 1] != '\n')
222 	{
223 	  /* No.  Some people want this to mean that only the line in
224 	     the file not the logical, concatenated line is ignored.
225 	     Let's try this.  */
226 	  lr->idx = lr->bufact;
227 	  continue;
228 	}
229 
230       /* Ignore rest of line.  */
231       lr_ignore_rest (lr, 0);
232       lr->token.tok = tok_eol;
233       return &lr->token;
234     }
235 
236   /* Match escape sequences.  */
237   if (ch == lr->escape_char)
238     return get_toplvl_escape (lr);
239 
240   /* Match ellipsis.  */
241   if (ch == '.')
242     {
243       if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
244 	{
245 	  int cnt;
246 	  for (cnt = 0; cnt < 10; ++cnt)
247 	    lr_getc (lr);
248 	  lr->token.tok = tok_ellipsis4_2;
249 	  return &lr->token;
250 	}
251       if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
252 	{
253 	  lr_getc (lr);
254 	  lr_getc (lr);
255 	  lr_getc (lr);
256 	  lr->token.tok = tok_ellipsis4;
257 	  return &lr->token;
258 	}
259       if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
260 	{
261 	  lr_getc (lr);
262 	  lr_getc (lr);
263 	  lr->token.tok = tok_ellipsis3;
264 	  return &lr->token;
265 	}
266       if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
267 	{
268 	  int cnt;
269 	  for (cnt = 0; cnt < 6; ++cnt)
270 	    lr_getc (lr);
271 	  lr->token.tok = tok_ellipsis2_2;
272 	  return &lr->token;
273 	}
274       if (lr->buf[lr->idx] == '.')
275 	{
276 	  lr_getc (lr);
277 	  lr->token.tok = tok_ellipsis2;
278 	  return &lr->token;
279 	}
280     }
281 
282   switch (ch)
283     {
284     case '<':
285       return get_symname (lr);
286 
287     case '0' ... '9':
288       lr->token.tok = tok_number;
289       lr->token.val.num = ch - '0';
290 
291       while (isdigit (ch = lr_getc (lr)))
292 	{
293 	  lr->token.val.num *= 10;
294 	  lr->token.val.num += ch - '0';
295 	}
296       if (isalpha (ch))
297 	lr_error (lr, _("garbage at end of number"));
298       lr_ungetn (lr, 1);
299 
300       return &lr->token;
301 
302     case ';':
303       lr->token.tok = tok_semicolon;
304       return &lr->token;
305 
306     case ',':
307       lr->token.tok = tok_comma;
308       return &lr->token;
309 
310     case '(':
311       lr->token.tok = tok_open_brace;
312       return &lr->token;
313 
314     case ')':
315       lr->token.tok = tok_close_brace;
316       return &lr->token;
317 
318     case '"':
319       return get_string (lr, charmap, locale, repertoire, verbose);
320 
321     case '-':
322       ch = lr_getc (lr);
323       if (ch == '1')
324 	{
325 	  lr->token.tok = tok_minus1;
326 	  return &lr->token;
327 	}
328       lr_ungetn (lr, 2);
329       break;
330     }
331 
332   return get_ident (lr);
333 }
334 
335 
336 static struct token *
get_toplvl_escape(struct linereader * lr)337 get_toplvl_escape (struct linereader *lr)
338 {
339   /* This is supposed to be a numeric value.  We return the
340      numerical value and the number of bytes.  */
341   size_t start_idx = lr->idx - 1;
342   unsigned char *bytes = lr->token.val.charcode.bytes;
343   size_t nbytes = 0;
344   int ch;
345 
346   do
347     {
348       unsigned int byte = 0;
349       unsigned int base = 8;
350 
351       ch = lr_getc (lr);
352 
353       if (ch == 'd')
354 	{
355 	  base = 10;
356 	  ch = lr_getc (lr);
357 	}
358       else if (ch == 'x')
359 	{
360 	  base = 16;
361 	  ch = lr_getc (lr);
362 	}
363 
364       if ((base == 16 && !isxdigit (ch))
365 	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
366 	{
367 	esc_error:
368 	  lr->token.val.str.startmb = &lr->buf[start_idx];
369 
370 	  while (ch != EOF && !isspace (ch))
371 	    ch = lr_getc (lr);
372 	  lr->token.val.str.lenmb = lr->idx - start_idx;
373 
374 	  lr->token.tok = tok_error;
375 	  return &lr->token;
376 	}
377 
378       if (isdigit (ch))
379 	byte = ch - '0';
380       else
381 	byte = tolower (ch) - 'a' + 10;
382 
383       ch = lr_getc (lr);
384       if ((base == 16 && !isxdigit (ch))
385 	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
386 	goto esc_error;
387 
388       byte *= base;
389       if (isdigit (ch))
390 	byte += ch - '0';
391       else
392 	byte += tolower (ch) - 'a' + 10;
393 
394       ch = lr_getc (lr);
395       if (base != 16 && isdigit (ch))
396 	{
397 	  byte *= base;
398 	  byte += ch - '0';
399 
400 	  ch = lr_getc (lr);
401 	}
402 
403       bytes[nbytes++] = byte;
404     }
405   while (ch == lr->escape_char
406 	 && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
407 
408   if (!isspace (ch))
409     lr_error (lr, _("garbage at end of character code specification"));
410 
411   lr_ungetn (lr, 1);
412 
413   lr->token.tok = tok_charcode;
414   lr->token.val.charcode.nbytes = nbytes;
415 
416   return &lr->token;
417 }
418 
419 
420 #define ADDC(ch) \
421   do									      \
422     {									      \
423       if (bufact == bufmax)						      \
424 	{								      \
425 	  bufmax *= 2;							      \
426 	  buf = xrealloc (buf, bufmax);					      \
427 	}								      \
428       buf[bufact++] = (ch);						      \
429     }									      \
430   while (0)
431 
432 
433 #define ADDS(s, l) \
434   do									      \
435     {									      \
436       size_t _l = (l);							      \
437       if (bufact + _l > bufmax)						      \
438 	{								      \
439 	  if (bufact < _l)						      \
440 	    bufact = _l;						      \
441 	  bufmax *= 2;							      \
442 	  buf = xrealloc (buf, bufmax);					      \
443 	}								      \
444       memcpy (&buf[bufact], s, _l);					      \
445       bufact += _l;							      \
446     }									      \
447   while (0)
448 
449 
450 #define ADDWC(ch) \
451   do									      \
452     {									      \
453       if (buf2act == buf2max)						      \
454 	{								      \
455 	  buf2max *= 2;							      \
456 	  buf2 = xrealloc (buf2, buf2max * 4);				      \
457 	}								      \
458       buf2[buf2act++] = (ch);						      \
459     }									      \
460   while (0)
461 
462 
463 static struct token *
get_symname(struct linereader * lr)464 get_symname (struct linereader *lr)
465 {
466   /* Symbol in brackets.  We must distinguish three kinds:
467      1. reserved words
468      2. ISO 10646 position values
469      3. all other.  */
470   char *buf;
471   size_t bufact = 0;
472   size_t bufmax = 56;
473   const struct keyword_t *kw;
474   int ch;
475 
476   buf = (char *) xmalloc (bufmax);
477 
478   do
479     {
480       ch = lr_getc (lr);
481       if (ch == lr->escape_char)
482 	{
483 	  int c2 = lr_getc (lr);
484 	  ADDC (c2);
485 
486 	  if (c2 == '\n')
487 	    ch = '\n';
488 	}
489       else
490 	ADDC (ch);
491     }
492   while (ch != '>' && ch != '\n');
493 
494   if (ch == '\n')
495     lr_error (lr, _("unterminated symbolic name"));
496 
497   /* Test for ISO 10646 position value.  */
498   if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
499     {
500       char *cp = buf + 1;
501       while (cp < &buf[bufact - 1] && isxdigit (*cp))
502 	++cp;
503 
504       if (cp == &buf[bufact - 1])
505 	{
506 	  /* Yes, it is.  */
507 	  lr->token.tok = tok_ucs4;
508 	  lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
509 
510 	  return &lr->token;
511 	}
512     }
513 
514   /* It is a symbolic name.  Test for reserved words.  */
515   kw = lr->hash_fct (buf, bufact - 1);
516 
517   if (kw != NULL && kw->symname_or_ident == 1)
518     {
519       lr->token.tok = kw->token;
520       free (buf);
521     }
522   else
523     {
524       lr->token.tok = tok_bsymbol;
525 
526       buf = xrealloc (buf, bufact + 1);
527       buf[bufact] = '\0';
528 
529       lr->token.val.str.startmb = buf;
530       lr->token.val.str.lenmb = bufact - 1;
531     }
532 
533   return &lr->token;
534 }
535 
536 
537 static struct token *
get_ident(struct linereader * lr)538 get_ident (struct linereader *lr)
539 {
540   char *buf;
541   size_t bufact;
542   size_t bufmax = 56;
543   const struct keyword_t *kw;
544   int ch;
545 
546   buf = xmalloc (bufmax);
547   bufact = 0;
548 
549   ADDC (lr->buf[lr->idx - 1]);
550 
551   while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
552 	 && ch != '<' && ch != ',' && ch != EOF)
553     {
554       if (ch == lr->escape_char)
555 	{
556 	  ch = lr_getc (lr);
557 	  if (ch == '\n' || ch == EOF)
558 	    {
559 	      lr_error (lr, _("invalid escape sequence"));
560 	      break;
561 	    }
562 	}
563       ADDC (ch);
564     }
565 
566   lr_ungetc (lr, ch);
567 
568   kw = lr->hash_fct (buf, bufact);
569 
570   if (kw != NULL && kw->symname_or_ident == 0)
571     {
572       lr->token.tok = kw->token;
573       free (buf);
574     }
575   else
576     {
577       lr->token.tok = tok_ident;
578 
579       buf = xrealloc (buf, bufact + 1);
580       buf[bufact] = '\0';
581 
582       lr->token.val.str.startmb = buf;
583       lr->token.val.str.lenmb = bufact;
584     }
585 
586   return &lr->token;
587 }
588 
589 
590 static struct token *
get_string(struct linereader * lr,const struct charmap_t * charmap,struct localedef_t * locale,const struct repertoire_t * repertoire,int verbose)591 get_string (struct linereader *lr, const struct charmap_t *charmap,
592 	    struct localedef_t *locale, const struct repertoire_t *repertoire,
593 	    int verbose)
594 {
595   int return_widestr = lr->return_widestr;
596   char *buf;
597   wchar_t *buf2 = NULL;
598   size_t bufact;
599   size_t bufmax = 56;
600 
601   /* We must return two different strings.  */
602   buf = xmalloc (bufmax);
603   bufact = 0;
604 
605   /* We know it'll be a string.  */
606   lr->token.tok = tok_string;
607 
608   /* If we need not translate the strings (i.e., expand <...> parts)
609      we can run a simple loop.  */
610   if (!lr->translate_strings)
611     {
612       int ch;
613 
614       buf2 = NULL;
615       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
616 	ADDC (ch);
617 
618       /* Catch errors with trailing escape character.  */
619       if (bufact > 0 && buf[bufact - 1] == lr->escape_char
620 	  && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
621 	{
622 	  lr_error (lr, _("illegal escape sequence at end of string"));
623 	  --bufact;
624 	}
625       else if (ch == '\n' || ch == EOF)
626 	lr_error (lr, _("unterminated string"));
627 
628       ADDC ('\0');
629     }
630   else
631     {
632       int illegal_string = 0;
633       size_t buf2act = 0;
634       size_t buf2max = 56 * sizeof (uint32_t);
635       int ch;
636 
637       /* We have to provide the wide character result as well.  */
638       if (return_widestr)
639 	buf2 = xmalloc (buf2max);
640 
641       /* Read until the end of the string (or end of the line or file).  */
642       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
643 	{
644 	  size_t startidx;
645 	  uint32_t wch;
646 	  struct charseq *seq;
647 
648 	  if (ch != '<')
649 	    {
650 	      /* The standards leave it up to the implementation to decide
651 		 what to do with character which stand for themself.  We
652 		 could jump through hoops to find out the value relative to
653 		 the charmap and the repertoire map, but instead we leave
654 		 it up to the locale definition author to write a better
655 		 definition.  We assume here that every character which
656 		 stands for itself is encoded using ISO 8859-1.  Using the
657 		 escape character is allowed.  */
658 	      if (ch == lr->escape_char)
659 		{
660 		  ch = lr_getc (lr);
661 		  if (ch == '\n' || ch == EOF)
662 		    break;
663 		}
664 
665 	      ADDC (ch);
666 	      if (return_widestr)
667 		ADDWC ((uint32_t) ch);
668 
669 	      continue;
670 	    }
671 
672 	  /* Now we have to search for the end of the symbolic name, i.e.,
673 	     the closing '>'.  */
674 	  startidx = bufact;
675 	  while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
676 	    {
677 	      if (ch == lr->escape_char)
678 		{
679 		  ch = lr_getc (lr);
680 		  if (ch == '\n' || ch == EOF)
681 		    break;
682 		}
683 	      ADDC (ch);
684 	    }
685 	  if (ch == '\n' || ch == EOF)
686 	    /* Not a correct string.  */
687 	    break;
688 	  if (bufact == startidx)
689 	    {
690 	      /* <> is no correct name.  Ignore it and also signal an
691 		 error.  */
692 	      illegal_string = 1;
693 	      continue;
694 	    }
695 
696 	  /* It might be a Uxxxx symbol.  */
697 	  if (buf[startidx] == 'U'
698 	      && (bufact - startidx == 5 || bufact - startidx == 9))
699 	    {
700 	      char *cp = buf + startidx + 1;
701 	      while (cp < &buf[bufact] && isxdigit (*cp))
702 		++cp;
703 
704 	      if (cp == &buf[bufact])
705 		{
706 		  char utmp[10];
707 
708 		  /* Yes, it is.  */
709 		  ADDC ('\0');
710 		  wch = strtoul (buf + startidx + 1, NULL, 16);
711 
712 		  /* Now forget about the name we just added.  */
713 		  bufact = startidx;
714 
715 		  if (return_widestr)
716 		    ADDWC (wch);
717 
718 		  /* See whether the charmap contains the Uxxxxxxxx names.  */
719 		  snprintf (utmp, sizeof (utmp), "U%08X", wch);
720 		  seq = charmap_find_value (charmap, utmp, 9);
721 
722 		  if (seq == NULL)
723 		    {
724 		     /* No, this isn't the case.  Now determine from
725 			the repertoire the name of the character and
726 			find it in the charmap.  */
727 		      if (repertoire != NULL)
728 			{
729 			  const char *symbol;
730 
731 			  symbol = repertoire_find_symbol (repertoire, wch);
732 
733 			  if (symbol != NULL)
734 			    seq = charmap_find_value (charmap, symbol,
735 						      strlen (symbol));
736 			}
737 
738 		      if (seq == NULL)
739 			{
740 #ifndef NO_TRANSLITERATION
741 			  /* Transliterate if possible.  */
742 			  if (locale != NULL)
743 			    {
744 			      uint32_t *translit;
745 
746 			      if ((locale->avail & CTYPE_LOCALE) == 0)
747 				{
748 				  /* Load the CTYPE data now.  */
749 				  int old_needed = locale->needed;
750 
751 				  locale->needed = 0;
752 				  locale = load_locale (LC_CTYPE,
753 							locale->name,
754 							locale->repertoire_name,
755 							charmap, locale);
756 				  locale->needed = old_needed;
757 				}
758 
759 			      if ((locale->avail & CTYPE_LOCALE) != 0
760 				  && ((translit = find_translit (locale,
761 								 charmap, wch))
762 				      != NULL))
763 				/* The CTYPE data contains a matching
764 				   transliteration.  */
765 				{
766 				  int i;
767 
768 				  for (i = 0; translit[i] != 0; ++i)
769 				    {
770 				      char utmp[10];
771 
772 				      snprintf (utmp, sizeof (utmp), "U%08X",
773 						translit[i]);
774 				      seq = charmap_find_value (charmap, utmp,
775 								9);
776 				      assert (seq != NULL);
777 				      ADDS (seq->bytes, seq->nbytes);
778 				    }
779 
780 				  continue;
781 				}
782 			    }
783 #endif	/* NO_TRANSLITERATION */
784 
785 			  /* Not a known name.  */
786 			  illegal_string = 1;
787 			}
788 		    }
789 
790 		  if (seq != NULL)
791 		    ADDS (seq->bytes, seq->nbytes);
792 
793 		  continue;
794 		}
795 	    }
796 
797 	  /* We now have the symbolic name in buf[startidx] to
798 	     buf[bufact-1].  Now find out the value for this character
799 	     in the charmap as well as in the repertoire map (in this
800 	     order).  */
801 	  seq = charmap_find_value (charmap, &buf[startidx],
802 				    bufact - startidx);
803 
804 	  if (seq == NULL)
805 	    {
806 	      /* This name is not in the charmap.  */
807 	      lr_error (lr, _("symbol `%.*s' not in charmap"),
808 			(int) (bufact - startidx), &buf[startidx]);
809 	      illegal_string = 1;
810 	    }
811 
812 	  if (return_widestr)
813 	    {
814 	      /* Now the same for the multibyte representation.  */
815 	      if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
816 		wch = seq->ucs4;
817 	      else
818 		{
819 		  wch = repertoire_find_value (repertoire, &buf[startidx],
820 					       bufact - startidx);
821 		  if (seq != NULL)
822 		    seq->ucs4 = wch;
823 		}
824 
825 	      if (wch == ILLEGAL_CHAR_VALUE)
826 		{
827 		  /* This name is not in the repertoire map.  */
828 		  lr_error (lr, _("symbol `%.*s' not in repertoire map"),
829 			    (int) (bufact - startidx), &buf[startidx]);
830 		  illegal_string = 1;
831 		}
832 	      else
833 		ADDWC (wch);
834 	    }
835 
836 	  /* Now forget about the name we just added.  */
837 	  bufact = startidx;
838 
839 	  /* And copy the bytes.  */
840 	  if (seq != NULL)
841 	    ADDS (seq->bytes, seq->nbytes);
842 	}
843 
844       if (ch == '\n' || ch == EOF)
845 	{
846 	  lr_error (lr, _("unterminated string"));
847 	  illegal_string = 1;
848 	}
849 
850       if (illegal_string)
851 	{
852 	  free (buf);
853 	  free (buf2);
854 	  lr->token.val.str.startmb = NULL;
855 	  lr->token.val.str.lenmb = 0;
856 	  lr->token.val.str.startwc = NULL;
857 	  lr->token.val.str.lenwc = 0;
858 
859 	  return &lr->token;
860 	}
861 
862       ADDC ('\0');
863 
864       if (return_widestr)
865 	{
866 	  ADDWC (0);
867 	  lr->token.val.str.startwc = xrealloc (buf2,
868 						buf2act * sizeof (uint32_t));
869 	  lr->token.val.str.lenwc = buf2act;
870 	}
871     }
872 
873   lr->token.val.str.startmb = xrealloc (buf, bufact);
874   lr->token.val.str.lenmb = bufact;
875 
876   return &lr->token;
877 }
878