regcomp.c

Last change on this file was 599, checked in by bird, 18 years ago
GNU sed 4.1.5.
File size: 107.5 KB

Line
1	/* Extended regular expression matching and search library.
2	Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, write to the Free
18	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19	02111-1307 USA. */
20
21	static reg_errcode_t re_compile_internal (regex_t preg, const char pattern,
22	size_t length, reg_syntax_t syntax);
23	static void re_compile_fastmap_iter (regex_t *bufp,
24	const re_dfastate_t *init_state,
25	char *fastmap);
26	static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
27	#ifdef RE_ENABLE_I18N
28	static void free_charset (re_charset_t *cset);
29	#endif /* RE_ENABLE_I18N */
30	static void free_workarea_compile (regex_t *preg);
31	static reg_errcode_t create_initial_state (re_dfa_t *dfa);
32	#ifdef RE_ENABLE_I18N
33	static void optimize_utf8 (re_dfa_t *dfa);
34	#endif
35	static reg_errcode_t analyze (regex_t *preg);
36	static reg_errcode_t preorder (bin_tree_t *root,
37	reg_errcode_t (fn (void , bin_tree_t )),
38	void *extra);
39	static reg_errcode_t postorder (bin_tree_t *root,
40	reg_errcode_t (fn (void , bin_tree_t )),
41	void *extra);
42	static reg_errcode_t optimize_subexps (void extra, bin_tree_t node);
43	static reg_errcode_t lower_subexps (void extra, bin_tree_t node);
44	static bin_tree_t lower_subexp (reg_errcode_t err, regex_t *preg,
45	bin_tree_t *node);
46	static reg_errcode_t calc_first (void extra, bin_tree_t node);
47	static reg_errcode_t calc_next (void extra, bin_tree_t node);
48	static reg_errcode_t link_nfa_nodes (void extra, bin_tree_t node);
49	static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
50	static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
51	unsigned int constraint);
52	static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
53	static reg_errcode_t calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa,
54	int node, int root);
55	static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
56	static int fetch_number (re_string_t input, re_token_t token,
57	reg_syntax_t syntax);
58	static int peek_token (re_token_t token, re_string_t input,
59	reg_syntax_t syntax) internal_function;
60	static bin_tree_t parse (re_string_t regexp, regex_t *preg,
61	reg_syntax_t syntax, reg_errcode_t *err);
62	static bin_tree_t parse_reg_exp (re_string_t regexp, regex_t *preg,
63	re_token_t *token, reg_syntax_t syntax,
64	int nest, reg_errcode_t *err);
65	static bin_tree_t parse_branch (re_string_t regexp, regex_t *preg,
66	re_token_t *token, reg_syntax_t syntax,
67	int nest, reg_errcode_t *err);
68	static bin_tree_t parse_expression (re_string_t regexp, regex_t *preg,
69	re_token_t *token, reg_syntax_t syntax,
70	int nest, reg_errcode_t *err);
71	static bin_tree_t parse_sub_exp (re_string_t regexp, regex_t *preg,
72	re_token_t *token, reg_syntax_t syntax,
73	int nest, reg_errcode_t *err);
74	static bin_tree_t parse_dup_op (bin_tree_t dup_elem, re_string_t *regexp,
75	re_dfa_t dfa, re_token_t token,
76	reg_syntax_t syntax, reg_errcode_t *err);
77	static bin_tree_t parse_bracket_exp (re_string_t regexp, re_dfa_t *dfa,
78	re_token_t *token, reg_syntax_t syntax,
79	reg_errcode_t *err);
80	static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
81	re_string_t *regexp,
82	re_token_t *token, int token_len,
83	re_dfa_t *dfa,
84	reg_syntax_t syntax,
85	int accept_hyphen);
86	static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
87	re_string_t *regexp,
88	re_token_t *token);
89	#ifdef RE_ENABLE_I18N
90	static reg_errcode_t build_equiv_class (bitset_t sbcset,
91	re_charset_t *mbcset,
92	int *equiv_class_alloc,
93	const unsigned char *name);
94	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
95	bitset_t sbcset,
96	re_charset_t *mbcset,
97	int *char_class_alloc,
98	const unsigned char *class_name,
99	reg_syntax_t syntax);
100	#else /* not RE_ENABLE_I18N */
101	static reg_errcode_t build_equiv_class (bitset_t sbcset,
102	const unsigned char *name);
103	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
104	bitset_t sbcset,
105	const unsigned char *class_name,
106	reg_syntax_t syntax);
107	#endif /* not RE_ENABLE_I18N */
108	static bin_tree_t build_charclass_op (re_dfa_t dfa,
109	RE_TRANSLATE_TYPE trans,
110	const unsigned char *class_name,
111	const unsigned char *extra,
112	int non_match, reg_errcode_t *err);
113	static bin_tree_t create_tree (re_dfa_t dfa,
114	bin_tree_t left, bin_tree_t right,
115	re_token_type_t type);
116	static bin_tree_t create_token_tree (re_dfa_t dfa,
117	bin_tree_t left, bin_tree_t right,
118	const re_token_t *token);
119	static bin_tree_t duplicate_tree (const bin_tree_t src, re_dfa_t *dfa);
120	static void free_token (re_token_t *node);
121	static reg_errcode_t free_tree (void extra, bin_tree_t node);
122	static reg_errcode_t mark_opt_subexp (void extra, bin_tree_t node);
123
124
125	/* This table gives an error message for each of the error codes listed
126	in regex.h. Obviously the order here has to be same as there.
127	POSIX doesn't require that we do anything for REG_NOERROR,
128	but why not be nice? */
129
130	const char __re_error_msgid[] attribute_hidden =
131	{
132	#define REG_NOERROR_IDX 0
133	gettext_noop ("Success") /* REG_NOERROR */
134	"\0"
135	#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
136	gettext_noop ("No match") /* REG_NOMATCH */
137	"\0"
138	#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
139	gettext_noop ("Invalid regular expression") /* REG_BADPAT */
140	"\0"
141	#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
142	gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
143	"\0"
144	#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
145	gettext_noop ("Invalid character class name") /* REG_ECTYPE */
146	"\0"
147	#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
148	gettext_noop ("Trailing backslash") /* REG_EESCAPE */
149	"\0"
150	#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
151	gettext_noop ("Invalid back reference") /* REG_ESUBREG */
152	"\0"
153	#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
154	gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
155	"\0"
156	#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
157	gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
158	"\0"
159	#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
160	gettext_noop ("Unmatched \\{") /* REG_EBRACE */
161	"\0"
162	#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
163	gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
164	"\0"
165	#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
166	gettext_noop ("Invalid range end") /* REG_ERANGE */
167	"\0"
168	#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
169	gettext_noop ("Memory exhausted") /* REG_ESPACE */
170	"\0"
171	#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
172	gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
173	"\0"
174	#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
175	gettext_noop ("Premature end of regular expression") /* REG_EEND */
176	"\0"
177	#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
178	gettext_noop ("Regular expression too big") /* REG_ESIZE */
179	"\0"
180	#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
181	gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
182	};
183
184	const size_t __re_error_msgid_idx[] attribute_hidden =
185	{
186	REG_NOERROR_IDX,
187	REG_NOMATCH_IDX,
188	REG_BADPAT_IDX,
189	REG_ECOLLATE_IDX,
190	REG_ECTYPE_IDX,
191	REG_EESCAPE_IDX,
192	REG_ESUBREG_IDX,
193	REG_EBRACK_IDX,
194	REG_EPAREN_IDX,
195	REG_EBRACE_IDX,
196	REG_BADBR_IDX,
197	REG_ERANGE_IDX,
198	REG_ESPACE_IDX,
199	REG_BADRPT_IDX,
200	REG_EEND_IDX,
201	REG_ESIZE_IDX,
202	REG_ERPAREN_IDX
203	};
204
205
206	/* Entry points for GNU code. */
207
208	/* re_compile_pattern is the GNU regular expression compiler: it
209	compiles PATTERN (of length LENGTH) and puts the result in BUFP.
210	Returns 0 if the pattern was valid, otherwise an error string.
211
212	Assumes the `allocated' (and perhaps `buffer') and `translate' fields
213	are set in BUFP on entry. */
214
215	const char *
216	re_compile_pattern (pattern, length, bufp)
217	const char *pattern;
218	size_t length;
219	struct re_pattern_buffer *bufp;
220	{
221	reg_errcode_t ret;
222
223	/* And GNU code determines whether or not to get register information
224	by passing null for the REGS argument to re_match, etc., not by
225	setting no_sub, unless RE_NO_SUB is set. */
226	bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
227
228	/* Match anchors at newline. */
229	bufp->newline_anchor = 1;
230
231	ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
232
233	if (!ret)
234	return NULL;
235	return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
236	}
237	#ifdef _LIBC
238	weak_alias (__re_compile_pattern, re_compile_pattern)
239	#endif
240
241	/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
242	also be assigned to arbitrarily: each pattern buffer stores its own
243	syntax, so it can be changed between regex compilations. */
244	/* This has no initializer because initialized variables in Emacs
245	become read-only after dumping. */
246	reg_syntax_t re_syntax_options;
247
248
249	/* Specify the precise syntax of regexps for compilation. This provides
250	for compatibility for various utilities which historically have
251	different, incompatible syntaxes.
252
253	The argument SYNTAX is a bit mask comprised of the various bits
254	defined in regex.h. We return the old syntax. */
255
256	reg_syntax_t
257	re_set_syntax (syntax)
258	reg_syntax_t syntax;
259	{
260	reg_syntax_t ret = re_syntax_options;
261
262	re_syntax_options = syntax;
263	return ret;
264	}
265	#ifdef _LIBC
266	weak_alias (__re_set_syntax, re_set_syntax)
267	#endif
268
269	int
270	re_compile_fastmap (bufp)
271	struct re_pattern_buffer *bufp;
272	{
273	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
274	char *fastmap = bufp->fastmap;
275
276	memset (fastmap, '\0', sizeof (char) * SBC_MAX);
277	re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
278	if (dfa->init_state != dfa->init_state_word)
279	re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
280	if (dfa->init_state != dfa->init_state_nl)
281	re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
282	if (dfa->init_state != dfa->init_state_begbuf)
283	re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
284	bufp->fastmap_accurate = 1;
285	return 0;
286	}
287	#ifdef _LIBC
288	weak_alias (__re_compile_fastmap, re_compile_fastmap)
289	#endif
290
291	static inline void
292	__attribute ((always_inline))
293	re_set_fastmap (char *fastmap, int icase, int ch)
294	{
295	fastmap[ch] = 1;
296	if (icase)
297	fastmap[tolower (ch)] = 1;
298	}
299
300	/* Helper function for re_compile_fastmap.
301	Compile fastmap for the initial_state INIT_STATE. */
302
303	static void
304	re_compile_fastmap_iter (regex_t bufp, const re_dfastate_t init_state,
305	char *fastmap)
306	{
307	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
308	int node_cnt;
309	int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
310	for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
311	{
312	int node = init_state->nodes.elems[node_cnt];
313	re_token_type_t type = dfa->nodes[node].type;
314
315	if (type == CHARACTER)
316	{
317	re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
318	#ifdef RE_ENABLE_I18N
319	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
320	{
321	unsigned char buf = alloca (dfa->mb_cur_max), p;
322	wchar_t wc;
323	mbstate_t state;
324
325	p = buf;
326	*p++ = dfa->nodes[node].opr.c;
327	while (++node < dfa->nodes_len
328	&& dfa->nodes[node].type == CHARACTER
329	&& dfa->nodes[node].mb_partial)
330	*p++ = dfa->nodes[node].opr.c;
331	memset (&state, '\0', sizeof (state));
332	if (mbrtowc (&wc, (const char *) buf, p - buf,
333	&state) == p - buf
334	&& (__wcrtomb ((char *) buf, towlower (wc), &state)
335	!= (size_t) -1))
336	re_set_fastmap (fastmap, 0, buf[0]);
337	}
338	#endif
339	}
340	else if (type == SIMPLE_BRACKET)
341	{
342	int i, ch;
343	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
344	{
345	int j;
346	bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
347	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
348	if (w & ((bitset_word_t) 1 << j))
349	re_set_fastmap (fastmap, icase, ch);
350	}
351	}
352	#ifdef RE_ENABLE_I18N
353	else if (type == COMPLEX_BRACKET)
354	{
355	int i;
356	re_charset_t *cset = dfa->nodes[node].opr.mbcset;
357	if (cset->non_match \|\| cset->ncoll_syms \|\| cset->nequiv_classes
358	\|\| cset->nranges \|\| cset->nchar_classes)
359	{
360	# ifdef _LIBC
361	if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
362	{
363	/* In this case we want to catch the bytes which are
364	the first byte of any collation elements.
365	e.g. In da_DK, we want to catch 'a' since "aa"
366	is a valid collation element, and don't catch
367	'b' since 'b' is the only collation element
368	which starts from 'b'. */
369	const int32_t table = (const int32_t )
370	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371	for (i = 0; i < SBC_MAX; ++i)
372	if (table[i] < 0)
373	re_set_fastmap (fastmap, icase, i);
374	}
375	# else
376	if (dfa->mb_cur_max > 1)
377	for (i = 0; i < SBC_MAX; ++i)
378	if (__btowc (i) == WEOF)
379	re_set_fastmap (fastmap, icase, i);
380	# endif /* not _LIBC */
381	}
382	for (i = 0; i < cset->nmbchars; ++i)
383	{
384	char buf[256];
385	mbstate_t state;
386	memset (&state, '\0', sizeof (state));
387	if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
388	re_set_fastmap (fastmap, icase, (unsigned char ) buf);
389	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
390	{
391	if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
392	!= (size_t) -1)
393	re_set_fastmap (fastmap, 0, (unsigned char ) buf);
394	}
395	}
396	}
397	#endif /* RE_ENABLE_I18N */
398	else if (type == OP_PERIOD
399	#ifdef RE_ENABLE_I18N
400	\|\| type == OP_UTF8_PERIOD
401	#endif /* RE_ENABLE_I18N */
402	\|\| type == END_OF_RE)
403	{
404	memset (fastmap, '\1', sizeof (char) * SBC_MAX);
405	if (type == END_OF_RE)
406	bufp->can_be_null = 1;
407	return;
408	}
409	}
410	}
411
412
413	/* Entry point for POSIX code. */
414	/* regcomp takes a regular expression as a string and compiles it.
415
416	PREG is a regex_t *. We do not expect any fields to be initialized,
417	since POSIX says we shouldn't. Thus, we set
418
419	`buffer' to the compiled pattern;
420	`used' to the length of the compiled pattern;
421	`syntax' to RE_SYNTAX_POSIX_EXTENDED if the
422	REG_EXTENDED bit in CFLAGS is set; otherwise, to
423	RE_SYNTAX_POSIX_BASIC;
424	`newline_anchor' to REG_NEWLINE being set in CFLAGS;
425	`fastmap' to an allocated space for the fastmap;
426	`fastmap_accurate' to zero;
427	`re_nsub' to the number of subexpressions in PATTERN.
428
429	PATTERN is the address of the pattern string.
430
431	CFLAGS is a series of bits which affect compilation.
432
433	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
434	use POSIX basic syntax.
435
436	If REG_NEWLINE is set, then . and [^...] don't match newline.
437	Also, regexec will try a match beginning after every newline.
438
439	If REG_ICASE is set, then we considers upper- and lowercase
440	versions of letters to be equivalent when matching.
441
442	If REG_NOSUB is set, then when PREG is passed to regexec, that
443	routine will report only success or failure, and nothing about the
444	registers.
445
446	It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
447	the return codes and their meanings.) */
448
449	int
450	regcomp (preg, pattern, cflags)
451	regex_t *__restrict preg;
452	const char *__restrict pattern;
453	int cflags;
454	{
455	reg_errcode_t ret;
456	reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
457	: RE_SYNTAX_POSIX_BASIC);
458
459	preg->buffer = NULL;
460	preg->allocated = 0;
461	preg->used = 0;
462
463	/* Try to allocate space for the fastmap. */
464	preg->fastmap = re_malloc (char, SBC_MAX);
465	if (BE (preg->fastmap == NULL, 0))
466	return REG_ESPACE;
467
468	syntax \|= (cflags & REG_ICASE) ? RE_ICASE : 0;
469
470	/* If REG_NEWLINE is set, newlines are treated differently. */
471	if (cflags & REG_NEWLINE)
472	{ /* REG_NEWLINE implies neither . nor [^...] match newline. */
473	syntax &= ~RE_DOT_NEWLINE;
474	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
475	/* It also changes the matching behavior. */
476	preg->newline_anchor = 1;
477	}
478	else
479	preg->newline_anchor = 0;
480	preg->no_sub = !!(cflags & REG_NOSUB);
481	preg->translate = NULL;
482
483	ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
484
485	/* POSIX doesn't distinguish between an unmatched open-group and an
486	unmatched close-group: both are REG_EPAREN. */
487	if (ret == REG_ERPAREN)
488	ret = REG_EPAREN;
489
490	/* We have already checked preg->fastmap != NULL. */
491	if (BE (ret == REG_NOERROR, 1))
492	/* Compute the fastmap now, since regexec cannot modify the pattern
493	buffer. This function never fails in this implementation. */
494	(void) re_compile_fastmap (preg);
495	else
496	{
497	/* Some error occurred while compiling the expression. */
498	re_free (preg->fastmap);
499	preg->fastmap = NULL;
500	}
501
502	return (int) ret;
503	}
504	#ifdef _LIBC
505	weak_alias (__regcomp, regcomp)
506	#endif
507
508	/* Returns a message corresponding to an error code, ERRCODE, returned
509	from either regcomp or regexec. We don't use PREG here. */
510
511	size_t
512	regerror (errcode, preg, errbuf, errbuf_size)
513	int errcode;
514	const regex_t *__restrict preg;
515	char *__restrict errbuf;
516	size_t errbuf_size;
517	{
518	const char *msg;
519	size_t msg_size;
520
521	if (BE (errcode < 0
522	\|\| errcode >= (int) (sizeof (__re_error_msgid_idx)
523	/ sizeof (__re_error_msgid_idx[0])), 0))
524	/* Only error codes returned by the rest of the code should be passed
525	to this routine. If we are given anything else, or if other regex
526	code generates an invalid error code, then the program has a bug.
527	Dump core so we can fix it. */
528	abort ();
529
530	msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
531
532	msg_size = strlen (msg) + 1; /* Includes the null. */
533
534	if (BE (errbuf_size != 0, 1))
535	{
536	if (BE (msg_size > errbuf_size, 0))
537	{
538	#if defined HAVE_MEMPCPY \|\| defined _LIBC
539	((char ) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
540	#else
541	memcpy (errbuf, msg, errbuf_size - 1);
542	errbuf[errbuf_size - 1] = 0;
543	#endif
544	}
545	else
546	memcpy (errbuf, msg, msg_size);
547	}
548
549	return msg_size;
550	}
551	#ifdef _LIBC
552	weak_alias (__regerror, regerror)
553	#endif
554
555
556	#ifdef RE_ENABLE_I18N
557	/* This static array is used for the map to single-byte characters when
558	UTF-8 is used. Otherwise we would allocate memory just to initialize
559	it the same all the time. UTF-8 is the preferred encoding so this is
560	a worthwhile optimization. */
561	static const bitset_t utf8_sb_map =
562	{
563	/* Set the first 128 bits. */
564	[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
565	};
566	#endif
567
568
569	static void
570	free_dfa_content (re_dfa_t *dfa)
571	{
572	int i, j;
573
574	if (dfa->nodes)
575	for (i = 0; i < dfa->nodes_len; ++i)
576	free_token (dfa->nodes + i);
577	re_free (dfa->nexts);
578	for (i = 0; i < dfa->nodes_len; ++i)
579	{
580	if (dfa->eclosures != NULL)
581	re_node_set_free (dfa->eclosures + i);
582	if (dfa->inveclosures != NULL)
583	re_node_set_free (dfa->inveclosures + i);
584	if (dfa->edests != NULL)
585	re_node_set_free (dfa->edests + i);
586	}
587	re_free (dfa->edests);
588	re_free (dfa->eclosures);
589	re_free (dfa->inveclosures);
590	re_free (dfa->nodes);
591
592	if (dfa->state_table)
593	for (i = 0; i <= dfa->state_hash_mask; ++i)
594	{
595	struct re_state_table_entry *entry = dfa->state_table + i;
596	for (j = 0; j < entry->num; ++j)
597	{
598	re_dfastate_t *state = entry->array[j];
599	free_state (state);
600	}
601	re_free (entry->array);
602	}
603	re_free (dfa->state_table);
604	#ifdef RE_ENABLE_I18N
605	if (dfa->sb_char != utf8_sb_map)
606	re_free (dfa->sb_char);
607	#endif
608	re_free (dfa->subexp_map);
609	#ifdef DEBUG
610	re_free (dfa->re_str);
611	#endif
612
613	re_free (dfa);
614	}
615
616
617	/* Free dynamically allocated space used by PREG. */
618
619	void
620	regfree (preg)
621	regex_t *preg;
622	{
623	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
624	if (BE (dfa != NULL, 1))
625	free_dfa_content (dfa);
626	preg->buffer = NULL;
627	preg->allocated = 0;
628
629	re_free (preg->fastmap);
630	preg->fastmap = NULL;
631
632	re_free (preg->translate);
633	preg->translate = NULL;
634	}
635	#ifdef _LIBC
636	weak_alias (__regfree, regfree)
637	#endif
638
639
640	/* Entry points compatible with 4.2 BSD regex library. We don't define
641	them unless specifically requested. */
642
643	#if defined _REGEX_RE_COMP \|\| defined _LIBC
644
645	/* BSD has one and only one pattern buffer. */
646	static struct re_pattern_buffer re_comp_buf;
647
648	char *
649	# ifdef _LIBC
650	/* Make these definitions weak in libc, so POSIX programs can redefine
651	these names if they don't use our functions, and still use
652	regcomp/regexec above without link errors. */
653	weak_function
654	# endif
655	re_comp (s)
656	const char *s;
657	{
658	reg_errcode_t ret;
659	char *fastmap;
660
661	if (!s)
662	{
663	if (!re_comp_buf.buffer)
664	return gettext ("No previous regular expression");
665	return 0;
666	}
667
668	if (re_comp_buf.buffer)
669	{
670	fastmap = re_comp_buf.fastmap;
671	re_comp_buf.fastmap = NULL;
672	__regfree (&re_comp_buf);
673	memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
674	re_comp_buf.fastmap = fastmap;
675	}
676
677	if (re_comp_buf.fastmap == NULL)
678	{
679	re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
680	if (re_comp_buf.fastmap == NULL)
681	return (char *) gettext (__re_error_msgid
682	+ __re_error_msgid_idx[(int) REG_ESPACE]);
683	}
684
685	/* Since `re_exec' always passes NULL for the `regs' argument, we
686	don't need to initialize the pattern buffer fields which affect it. */
687
688	/* Match anchors at newlines. */
689	re_comp_buf.newline_anchor = 1;
690
691	ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
692
693	if (!ret)
694	return NULL;
695
696	/* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
697	return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
698	}
699
700	#ifdef _LIBC
701	libc_freeres_fn (free_mem)
702	{
703	__regfree (&re_comp_buf);
704	}
705	#endif
706
707	#endif /* _REGEX_RE_COMP */
708
709
710	/* Internal entry point.
711	Compile the regular expression PATTERN, whose length is LENGTH.
712	SYNTAX indicate regular expression's syntax. */
713
714	static reg_errcode_t
715	re_compile_internal (regex_t preg, const char pattern, size_t length,
716	reg_syntax_t syntax)
717	{
718	reg_errcode_t err = REG_NOERROR;
719	re_dfa_t *dfa;
720	re_string_t regexp;
721
722	/* Initialize the pattern buffer. */
723	preg->fastmap_accurate = 0;
724	preg->syntax = syntax;
725	preg->not_bol = preg->not_eol = 0;
726	preg->used = 0;
727	preg->re_nsub = 0;
728	preg->can_be_null = 0;
729	preg->regs_allocated = REGS_UNALLOCATED;
730
731	/* Initialize the dfa. */
732	dfa = (re_dfa_t *) preg->buffer;
733	if (BE (preg->allocated < sizeof (re_dfa_t), 0))
734	{
735	/* If zero allocated, but buffer is non-null, try to realloc
736	enough space. This loses if buffer's address is bogus, but
737	that is the user's responsibility. If ->buffer is NULL this
738	is a simple allocation. */
739	dfa = re_realloc (preg->buffer, re_dfa_t, 1);
740	if (dfa == NULL)
741	return REG_ESPACE;
742	preg->allocated = sizeof (re_dfa_t);
743	preg->buffer = (unsigned char *) dfa;
744	}
745	preg->used = sizeof (re_dfa_t);
746
747	err = init_dfa (dfa, length);
748	if (BE (err != REG_NOERROR, 0))
749	{
750	free_dfa_content (dfa);
751	preg->buffer = NULL;
752	preg->allocated = 0;
753	return err;
754	}
755	#ifdef DEBUG
756	/* Note: length+1 will not overflow since it is checked in init_dfa. */
757	dfa->re_str = re_malloc (char, length + 1);
758	strncpy (dfa->re_str, pattern, length + 1);
759	#endif
760
761	__libc_lock_init (dfa->lock);
762
763	err = re_string_construct (&regexp, pattern, length, preg->translate,
764	syntax & RE_ICASE, dfa);
765	if (BE (err != REG_NOERROR, 0))
766	{
767	re_compile_internal_free_return:
768	free_workarea_compile (preg);
769	re_string_destruct (&regexp);
770	free_dfa_content (dfa);
771	preg->buffer = NULL;
772	preg->allocated = 0;
773	return err;
774	}
775
776	/* Parse the regular expression, and build a structure tree. */
777	preg->re_nsub = 0;
778	dfa->str_tree = parse (&regexp, preg, syntax, &err);
779	if (BE (dfa->str_tree == NULL, 0))
780	goto re_compile_internal_free_return;
781
782	/* Analyze the tree and create the nfa. */
783	err = analyze (preg);
784	if (BE (err != REG_NOERROR, 0))
785	goto re_compile_internal_free_return;
786
787	#ifdef RE_ENABLE_I18N
788	/* If possible, do searching in single byte encoding to speed things up. */
789	if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
790	optimize_utf8 (dfa);
791	#endif
792
793	/* Then create the initial state of the dfa. */
794	err = create_initial_state (dfa);
795
796	/* Release work areas. */
797	free_workarea_compile (preg);
798	re_string_destruct (&regexp);
799
800	if (BE (err != REG_NOERROR, 0))
801	{
802	free_dfa_content (dfa);
803	preg->buffer = NULL;
804	preg->allocated = 0;
805	}
806
807	return err;
808	}
809
810	/* Initialize DFA. We use the length of the regular expression PAT_LEN
811	as the initial length of some arrays. */
812
813	static reg_errcode_t
814	init_dfa (re_dfa_t *dfa, size_t pat_len)
815	{
816	unsigned int table_size;
817	#ifndef _LIBC
818	char *codeset_name;
819	#endif
820
821	memset (dfa, '\0', sizeof (re_dfa_t));
822
823	/* Force allocation of str_tree_storage the first time. */
824	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
825
826	/* Avoid overflows. */
827	if (pat_len == SIZE_MAX)
828	return REG_ESPACE;
829
830	dfa->nodes_alloc = pat_len + 1;
831	dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
832
833	/* table_size = 2 ^ ceil(log pat_len) */
834	for (table_size = 1; ; table_size <<= 1)
835	if (table_size > pat_len)
836	break;
837
838	dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
839	dfa->state_hash_mask = table_size - 1;
840
841	dfa->mb_cur_max = MB_CUR_MAX;
842	#ifdef _LIBC
843	if (dfa->mb_cur_max == 6
844	&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
845	dfa->is_utf8 = 1;
846	dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
847	!= 0);
848	#else
849	# ifdef HAVE_LANGINFO_CODESET
850	codeset_name = nl_langinfo (CODESET);
851	# else
852	codeset_name = getenv ("LC_ALL");
853	if (codeset_name == NULL \|\| codeset_name[0] == '\0')
854	codeset_name = getenv ("LC_CTYPE");
855	if (codeset_name == NULL \|\| codeset_name[0] == '\0')
856	codeset_name = getenv ("LANG");
857	if (codeset_name == NULL)
858	codeset_name = "";
859	else if (strchr (codeset_name, '.') != NULL)
860	codeset_name = strchr (codeset_name, '.') + 1;
861	# endif
862
863	if (strcasecmp (codeset_name, "UTF-8") == 0
864	\|\| strcasecmp (codeset_name, "UTF8") == 0)
865	dfa->is_utf8 = 1;
866
867	/* We check exhaustively in the loop below if this charset is a
868	superset of ASCII. */
869	dfa->map_notascii = 0;
870	#endif
871
872	#ifdef RE_ENABLE_I18N
873	if (dfa->mb_cur_max > 1)
874	{
875	if (dfa->is_utf8)
876	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
877	else
878	{
879	int i, j, ch;
880
881	dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
882	if (BE (dfa->sb_char == NULL, 0))
883	return REG_ESPACE;
884
885	/* Set the bits corresponding to single byte chars. */
886	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
887	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
888	{
889	wint_t wch = __btowc (ch);
890	if (wch != WEOF)
891	dfa->sb_char[i] \|= (bitset_word_t) 1 << j;
892	# ifndef _LIBC
893	if (isascii (ch) && wch != ch)
894	dfa->map_notascii = 1;
895	# endif
896	}
897	}
898	}
899	#endif
900
901	if (BE (dfa->nodes == NULL \|\| dfa->state_table == NULL, 0))
902	return REG_ESPACE;
903	return REG_NOERROR;
904	}
905
906	/* Initialize WORD_CHAR table, which indicate which character is
907	"word". In this case "word" means that it is the word construction
908	character used by some operators like "\<", "\>", etc. */
909
910	static void
911	internal_function
912	init_word_char (re_dfa_t *dfa)
913	{
914	int i, j, ch;
915	dfa->word_ops_used = 1;
916	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
917	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
918	if (isalnum (ch) \|\| ch == '_')
919	dfa->word_char[i] \|= (bitset_word_t) 1 << j;
920	}
921
922	/* Free the work area which are only used while compiling. */
923
924	static void
925	free_workarea_compile (regex_t *preg)
926	{
927	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
928	bin_tree_storage_t storage, next;
929	for (storage = dfa->str_tree_storage; storage; storage = next)
930	{
931	next = storage->next;
932	re_free (storage);
933	}
934	dfa->str_tree_storage = NULL;
935	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
936	dfa->str_tree = NULL;
937	re_free (dfa->org_indices);
938	dfa->org_indices = NULL;
939	}
940
941	/* Create initial states for all contexts. */
942
943	static reg_errcode_t
944	create_initial_state (re_dfa_t *dfa)
945	{
946	int first, i;
947	reg_errcode_t err;
948	re_node_set init_nodes;
949
950	/* Initial states have the epsilon closure of the node which is
951	the first node of the regular expression. */
952	first = dfa->str_tree->first->node_idx;
953	dfa->init_node = first;
954	err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
955	if (BE (err != REG_NOERROR, 0))
956	return err;
957
958	/* The back-references which are in initial states can epsilon transit,
959	since in this case all of the subexpressions can be null.
960	Then we add epsilon closures of the nodes which are the next nodes of
961	the back-references. */
962	if (dfa->nbackref > 0)
963	for (i = 0; i < init_nodes.nelem; ++i)
964	{
965	int node_idx = init_nodes.elems[i];
966	re_token_type_t type = dfa->nodes[node_idx].type;
967
968	int clexp_idx;
969	if (type != OP_BACK_REF)
970	continue;
971	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
972	{
973	re_token_t *clexp_node;
974	clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
975	if (clexp_node->type == OP_CLOSE_SUBEXP
976	&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
977	break;
978	}
979	if (clexp_idx == init_nodes.nelem)
980	continue;
981
982	if (type == OP_BACK_REF)
983	{
984	int dest_idx = dfa->edests[node_idx].elems[0];
985	if (!re_node_set_contains (&init_nodes, dest_idx))
986	{
987	re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
988	i = 0;
989	}
990	}
991	}
992
993	/* It must be the first time to invoke acquire_state. */
994	dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
995	/* We don't check ERR here, since the initial state must not be NULL. */
996	if (BE (dfa->init_state == NULL, 0))
997	return err;
998	if (dfa->init_state->has_constraint)
999	{
1000	dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1001	CONTEXT_WORD);
1002	dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1003	CONTEXT_NEWLINE);
1004	dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1005	&init_nodes,
1006	CONTEXT_NEWLINE
1007	\| CONTEXT_BEGBUF);
1008	if (BE (dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
1009	\|\| dfa->init_state_begbuf == NULL, 0))
1010	return err;
1011	}
1012	else
1013	dfa->init_state_word = dfa->init_state_nl
1014	= dfa->init_state_begbuf = dfa->init_state;
1015
1016	re_node_set_free (&init_nodes);
1017	return REG_NOERROR;
1018	}
1019
1020
1021	#ifdef RE_ENABLE_I18N
1022	/* If it is possible to do searching in single byte encoding instead of UTF-8
1023	to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1024	DFA nodes where needed. */
1025
1026	static void
1027	optimize_utf8 (re_dfa_t *dfa)
1028	{
1029	int node, i, mb_chars = 0, has_period = 0;
1030
1031	for (node = 0; node < dfa->nodes_len; ++node)
1032	switch (dfa->nodes[node].type)
1033	{
1034	case CHARACTER:
1035	if (dfa->nodes[node].opr.c >= 0x80)
1036	mb_chars = 1;
1037	break;
1038	case ANCHOR:
1039	switch (dfa->nodes[node].opr.idx)
1040	{
1041	case LINE_FIRST:
1042	case LINE_LAST:
1043	case BUF_FIRST:
1044	case BUF_LAST:
1045	break;
1046	default:
1047	/* Word anchors etc. cannot be handled. */
1048	return;
1049	}
1050	break;
1051	case OP_PERIOD:
1052	has_period = 1;
1053	break;
1054	case OP_BACK_REF:
1055	case OP_ALT:
1056	case END_OF_RE:
1057	case OP_DUP_ASTERISK:
1058	case OP_OPEN_SUBEXP:
1059	case OP_CLOSE_SUBEXP:
1060	break;
1061	case COMPLEX_BRACKET:
1062	return;
1063	case SIMPLE_BRACKET:
1064	/* Just double check. The non-ASCII range starts at 0x80. */
1065	assert (0x80 % BITSET_WORD_BITS == 0);
1066	for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
1067	if (dfa->nodes[node].opr.sbcset[i])
1068	return;
1069	break;
1070	default:
1071	abort ();
1072	}
1073
1074	if (mb_chars \|\| has_period)
1075	for (node = 0; node < dfa->nodes_len; ++node)
1076	{
1077	if (dfa->nodes[node].type == CHARACTER
1078	&& dfa->nodes[node].opr.c >= 0x80)
1079	dfa->nodes[node].mb_partial = 0;
1080	else if (dfa->nodes[node].type == OP_PERIOD)
1081	dfa->nodes[node].type = OP_UTF8_PERIOD;
1082	}
1083
1084	/* The search can be in single byte locale. */
1085	dfa->mb_cur_max = 1;
1086	dfa->is_utf8 = 0;
1087	dfa->has_mb_node = dfa->nbackref > 0 \|\| has_period;
1088	}
1089	#endif
1090
1091
1092	/* Analyze the structure tree, and calculate "first", "next", "edest",
1093	"eclosure", and "inveclosure". */
1094
1095	static reg_errcode_t
1096	analyze (regex_t *preg)
1097	{
1098	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1099	reg_errcode_t ret;
1100
1101	/* Allocate arrays. */
1102	dfa->nexts = re_malloc (int, dfa->nodes_alloc);
1103	dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
1104	dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1105	dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1106	if (BE (dfa->nexts == NULL \|\| dfa->org_indices == NULL \|\| dfa->edests == NULL
1107	\|\| dfa->eclosures == NULL, 0))
1108	return REG_ESPACE;
1109
1110	dfa->subexp_map = re_malloc (int, preg->re_nsub);
1111	if (dfa->subexp_map != NULL)
1112	{
1113	int i;
1114	for (i = 0; i < preg->re_nsub; i++)
1115	dfa->subexp_map[i] = i;
1116	preorder (dfa->str_tree, optimize_subexps, dfa);
1117	for (i = 0; i < preg->re_nsub; i++)
1118	if (dfa->subexp_map[i] != i)
1119	break;
1120	if (i == preg->re_nsub)
1121	{
1122	free (dfa->subexp_map);
1123	dfa->subexp_map = NULL;
1124	}
1125	}
1126
1127	ret = postorder (dfa->str_tree, lower_subexps, preg);
1128	if (BE (ret != REG_NOERROR, 0))
1129	return ret;
1130	ret = postorder (dfa->str_tree, calc_first, dfa);
1131	if (BE (ret != REG_NOERROR, 0))
1132	return ret;
1133	preorder (dfa->str_tree, calc_next, dfa);
1134	ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1135	if (BE (ret != REG_NOERROR, 0))
1136	return ret;
1137	ret = calc_eclosure (dfa);
1138	if (BE (ret != REG_NOERROR, 0))
1139	return ret;
1140
1141	/* We only need this during the prune_impossible_nodes pass in regexec.c;
1142	skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
1143	if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1144	\|\| dfa->nbackref)
1145	{
1146	dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1147	if (BE (dfa->inveclosures == NULL, 0))
1148	return REG_ESPACE;
1149	ret = calc_inveclosure (dfa);
1150	}
1151
1152	return ret;
1153	}
1154
1155	/* Our parse trees are very unbalanced, so we cannot use a stack to
1156	implement parse tree visits. Instead, we use parent pointers and
1157	some hairy code in these two functions. */
1158	static reg_errcode_t
1159	postorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
1160	void *extra)
1161	{
1162	bin_tree_t node, prev;
1163
1164	for (node = root; ; )
1165	{
1166	/* Descend down the tree, preferably to the left (or to the right
1167	if that's the only child). */
1168	while (node->left \|\| node->right)
1169	if (node->left)
1170	node = node->left;
1171	else
1172	node = node->right;
1173
1174	do
1175	{
1176	reg_errcode_t err = fn (extra, node);
1177	if (BE (err != REG_NOERROR, 0))
1178	return err;
1179	if (node->parent == NULL)
1180	return REG_NOERROR;
1181	prev = node;
1182	node = node->parent;
1183	}
1184	/* Go up while we have a node that is reached from the right. */
1185	while (node->right == prev \|\| node->right == NULL);
1186	node = node->right;
1187	}
1188	}
1189
1190	static reg_errcode_t
1191	preorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
1192	void *extra)
1193	{
1194	bin_tree_t *node;
1195
1196	for (node = root; ; )
1197	{
1198	reg_errcode_t err = fn (extra, node);
1199	if (BE (err != REG_NOERROR, 0))
1200	return err;
1201
1202	/* Go to the left node, or up and to the right. */
1203	if (node->left)
1204	node = node->left;
1205	else
1206	{
1207	bin_tree_t *prev = NULL;
1208	while (node->right == prev \|\| node->right == NULL)
1209	{
1210	prev = node;
1211	node = node->parent;
1212	if (!node)
1213	return REG_NOERROR;
1214	}
1215	node = node->right;
1216	}
1217	}
1218	}
1219
1220	/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
1221	re_search_internal to map the inner one's opr.idx to this one's. Adjust
1222	backreferences as well. Requires a preorder visit. */
1223	static reg_errcode_t
1224	optimize_subexps (void extra, bin_tree_t node)
1225	{
1226	re_dfa_t dfa = (re_dfa_t ) extra;
1227
1228	if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1229	{
1230	int idx = node->token.opr.idx;
1231	node->token.opr.idx = dfa->subexp_map[idx];
1232	dfa->used_bkref_map \|= 1 << node->token.opr.idx;
1233	}
1234
1235	else if (node->token.type == SUBEXP
1236	&& node->left && node->left->token.type == SUBEXP)
1237	{
1238	int other_idx = node->left->token.opr.idx;
1239
1240	node->left = node->left->left;
1241	if (node->left)
1242	node->left->parent = node;
1243
1244	dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1245	if (other_idx < BITSET_WORD_BITS)
1246	dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
1247	}
1248
1249	return REG_NOERROR;
1250	}
1251
1252	/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
1253	of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
1254	static reg_errcode_t
1255	lower_subexps (void extra, bin_tree_t node)
1256	{
1257	regex_t preg = (regex_t ) extra;
1258	reg_errcode_t err = REG_NOERROR;
1259
1260	if (node->left && node->left->token.type == SUBEXP)
1261	{
1262	node->left = lower_subexp (&err, preg, node->left);
1263	if (node->left)
1264	node->left->parent = node;
1265	}
1266	if (node->right && node->right->token.type == SUBEXP)
1267	{
1268	node->right = lower_subexp (&err, preg, node->right);
1269	if (node->right)
1270	node->right->parent = node;
1271	}
1272
1273	return err;
1274	}
1275
1276	static bin_tree_t *
1277	lower_subexp (reg_errcode_t err, regex_t preg, bin_tree_t *node)
1278	{
1279	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1280	bin_tree_t *body = node->left;
1281	bin_tree_t op, cls, tree1, tree;
1282
1283	if (preg->no_sub
1284	/* We do not optimize empty subexpressions, because otherwise we may
1285	have bad CONCAT nodes with NULL children. This is obviously not
1286	very common, so we do not lose much. An example that triggers
1287	this case is the sed "script" //x. */
1288	&& node->left != NULL
1289	&& (node->token.opr.idx >= BITSET_WORD_BITS
1290	\|\| !(dfa->used_bkref_map
1291	& ((bitset_word_t) 1 << node->token.opr.idx))))
1292	return node->left;
1293
1294	/* Convert the SUBEXP node to the concatenation of an
1295	OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
1296	op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1297	cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1298	tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1299	tree = create_tree (dfa, op, tree1, CONCAT);
1300	if (BE (tree == NULL \|\| tree1 == NULL \|\| op == NULL \|\| cls == NULL, 0))
1301	{
1302	*err = REG_ESPACE;
1303	return NULL;
1304	}
1305
1306	op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1307	op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1308	return tree;
1309	}
1310
1311	/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
1312	nodes. Requires a postorder visit. */
1313	static reg_errcode_t
1314	calc_first (void extra, bin_tree_t node)
1315	{
1316	re_dfa_t dfa = (re_dfa_t ) extra;
1317	if (node->token.type == CONCAT)
1318	{
1319	node->first = node->left->first;
1320	node->node_idx = node->left->node_idx;
1321	}
1322	else
1323	{
1324	node->first = node;
1325	node->node_idx = re_dfa_add_node (dfa, node->token);
1326	if (BE (node->node_idx == -1, 0))
1327	return REG_ESPACE;
1328	}
1329	return REG_NOERROR;
1330	}
1331
1332	/* Pass 2: compute NEXT on the tree. Preorder visit. */
1333	static reg_errcode_t
1334	calc_next (void extra, bin_tree_t node)
1335	{
1336	switch (node->token.type)
1337	{
1338	case OP_DUP_ASTERISK:
1339	node->left->next = node;
1340	break;
1341	case CONCAT:
1342	node->left->next = node->right->first;
1343	node->right->next = node->next;
1344	break;
1345	default:
1346	if (node->left)
1347	node->left->next = node->next;
1348	if (node->right)
1349	node->right->next = node->next;
1350	break;
1351	}
1352	return REG_NOERROR;
1353	}
1354
1355	/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
1356	static reg_errcode_t
1357	link_nfa_nodes (void extra, bin_tree_t node)
1358	{
1359	re_dfa_t dfa = (re_dfa_t ) extra;
1360	int idx = node->node_idx;
1361	reg_errcode_t err = REG_NOERROR;
1362
1363	switch (node->token.type)
1364	{
1365	case CONCAT:
1366	break;
1367
1368	case END_OF_RE:
1369	assert (node->next == NULL);
1370	break;
1371
1372	case OP_DUP_ASTERISK:
1373	case OP_ALT:
1374	{
1375	int left, right;
1376	dfa->has_plural_match = 1;
1377	if (node->left != NULL)
1378	left = node->left->first->node_idx;
1379	else
1380	left = node->next->node_idx;
1381	if (node->right != NULL)
1382	right = node->right->first->node_idx;
1383	else
1384	right = node->next->node_idx;
1385	assert (left > -1);
1386	assert (right > -1);
1387	err = re_node_set_init_2 (dfa->edests + idx, left, right);
1388	}
1389	break;
1390
1391	case ANCHOR:
1392	case OP_OPEN_SUBEXP:
1393	case OP_CLOSE_SUBEXP:
1394	err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1395	break;
1396
1397	case OP_BACK_REF:
1398	dfa->nexts[idx] = node->next->node_idx;
1399	if (node->token.type == OP_BACK_REF)
1400	re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1401	break;
1402
1403	default:
1404	assert (!IS_EPSILON_NODE (node->token.type));
1405	dfa->nexts[idx] = node->next->node_idx;
1406	break;
1407	}
1408
1409	return err;
1410	}
1411
1412	/* Duplicate the epsilon closure of the node ROOT_NODE.
1413	Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1414	to their own constraint. */
1415
1416	static reg_errcode_t
1417	internal_function
1418	duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node,
1419	int root_node, unsigned int init_constraint)
1420	{
1421	int org_node, clone_node, ret;
1422	unsigned int constraint = init_constraint;
1423	for (org_node = top_org_node, clone_node = top_clone_node;;)
1424	{
1425	int org_dest, clone_dest;
1426	if (dfa->nodes[org_node].type == OP_BACK_REF)
1427	{
1428	/* If the back reference epsilon-transit, its destination must
1429	also have the constraint. Then duplicate the epsilon closure
1430	of the destination of the back reference, and store it in
1431	edests of the back reference. */
1432	org_dest = dfa->nexts[org_node];
1433	re_node_set_empty (dfa->edests + clone_node);
1434	clone_dest = duplicate_node (dfa, org_dest, constraint);
1435	if (BE (clone_dest == -1, 0))
1436	return REG_ESPACE;
1437	dfa->nexts[clone_node] = dfa->nexts[org_node];
1438	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1439	if (BE (ret < 0, 0))
1440	return REG_ESPACE;
1441	}
1442	else if (dfa->edests[org_node].nelem == 0)
1443	{
1444	/* In case of the node can't epsilon-transit, don't duplicate the
1445	destination and store the original destination as the
1446	destination of the node. */
1447	dfa->nexts[clone_node] = dfa->nexts[org_node];
1448	break;
1449	}
1450	else if (dfa->edests[org_node].nelem == 1)
1451	{
1452	/* In case of the node can epsilon-transit, and it has only one
1453	destination. */
1454	org_dest = dfa->edests[org_node].elems[0];
1455	re_node_set_empty (dfa->edests + clone_node);
1456	if (dfa->nodes[org_node].type == ANCHOR)
1457	{
1458	/* In case of the node has another constraint, append it. */
1459	if (org_node == root_node && clone_node != org_node)
1460	{
1461	/* ...but if the node is root_node itself, it means the
1462	epsilon closure have a loop, then tie it to the
1463	destination of the root_node. */
1464	ret = re_node_set_insert (dfa->edests + clone_node,
1465	org_dest);
1466	if (BE (ret < 0, 0))
1467	return REG_ESPACE;
1468	break;
1469	}
1470	constraint \|= dfa->nodes[org_node].opr.ctx_type;
1471	}
1472	clone_dest = duplicate_node (dfa, org_dest, constraint);
1473	if (BE (clone_dest == -1, 0))
1474	return REG_ESPACE;
1475	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1476	if (BE (ret < 0, 0))
1477	return REG_ESPACE;
1478	}
1479	else /* dfa->edests[org_node].nelem == 2 */
1480	{
1481	/* In case of the node can epsilon-transit, and it has two
1482	destinations. In the bin_tree_t and DFA, that's '\|' and ''. /
1483	org_dest = dfa->edests[org_node].elems[0];
1484	re_node_set_empty (dfa->edests + clone_node);
1485	/* Search for a duplicated node which satisfies the constraint. */
1486	clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1487	if (clone_dest == -1)
1488	{
1489	/* There are no such a duplicated node, create a new one. */
1490	reg_errcode_t err;
1491	clone_dest = duplicate_node (dfa, org_dest, constraint);
1492	if (BE (clone_dest == -1, 0))
1493	return REG_ESPACE;
1494	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1495	if (BE (ret < 0, 0))
1496	return REG_ESPACE;
1497	err = duplicate_node_closure (dfa, org_dest, clone_dest,
1498	root_node, constraint);
1499	if (BE (err != REG_NOERROR, 0))
1500	return err;
1501	}
1502	else
1503	{
1504	/* There are a duplicated node which satisfy the constraint,
1505	use it to avoid infinite loop. */
1506	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1507	if (BE (ret < 0, 0))
1508	return REG_ESPACE;
1509	}
1510
1511	org_dest = dfa->edests[org_node].elems[1];
1512	clone_dest = duplicate_node (dfa, org_dest, constraint);
1513	if (BE (clone_dest == -1, 0))
1514	return REG_ESPACE;
1515	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1516	if (BE (ret < 0, 0))
1517	return REG_ESPACE;
1518	}
1519	org_node = org_dest;
1520	clone_node = clone_dest;
1521	}
1522	return REG_NOERROR;
1523	}
1524
1525	/* Search for a node which is duplicated from the node ORG_NODE, and
1526	satisfies the constraint CONSTRAINT. */
1527
1528	static int
1529	search_duplicated_node (const re_dfa_t *dfa, int org_node,
1530	unsigned int constraint)
1531	{
1532	int idx;
1533	for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1534	{
1535	if (org_node == dfa->org_indices[idx]
1536	&& constraint == dfa->nodes[idx].constraint)
1537	return idx; /* Found. */
1538	}
1539	return -1; /* Not found. */
1540	}
1541
1542	/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1543	Return the index of the new node, or -1 if insufficient storage is
1544	available. */
1545
1546	static int
1547	duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint)
1548	{
1549	int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1550	if (BE (dup_idx != -1, 1))
1551	{
1552	dfa->nodes[dup_idx].constraint = constraint;
1553	if (dfa->nodes[org_idx].type == ANCHOR)
1554	dfa->nodes[dup_idx].constraint \|= dfa->nodes[org_idx].opr.ctx_type;
1555	dfa->nodes[dup_idx].duplicated = 1;
1556
1557	/* Store the index of the original node. */
1558	dfa->org_indices[dup_idx] = org_idx;
1559	}
1560	return dup_idx;
1561	}
1562
1563	static reg_errcode_t
1564	calc_inveclosure (re_dfa_t *dfa)
1565	{
1566	int src, idx, ret;
1567	for (idx = 0; idx < dfa->nodes_len; ++idx)
1568	re_node_set_init_empty (dfa->inveclosures + idx);
1569
1570	for (src = 0; src < dfa->nodes_len; ++src)
1571	{
1572	int *elems = dfa->eclosures[src].elems;
1573	for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1574	{
1575	ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1576	if (BE (ret == -1, 0))
1577	return REG_ESPACE;
1578	}
1579	}
1580
1581	return REG_NOERROR;
1582	}
1583
1584	/* Calculate "eclosure" for all the node in DFA. */
1585
1586	static reg_errcode_t
1587	calc_eclosure (re_dfa_t *dfa)
1588	{
1589	int node_idx, incomplete;
1590	#ifdef DEBUG
1591	assert (dfa->nodes_len > 0);
1592	#endif
1593	incomplete = 0;
1594	/* For each nodes, calculate epsilon closure. */
1595	for (node_idx = 0; ; ++node_idx)
1596	{
1597	reg_errcode_t err;
1598	re_node_set eclosure_elem;
1599	if (node_idx == dfa->nodes_len)
1600	{
1601	if (!incomplete)
1602	break;
1603	incomplete = 0;
1604	node_idx = 0;
1605	}
1606
1607	#ifdef DEBUG
1608	assert (dfa->eclosures[node_idx].nelem != -1);
1609	#endif
1610
1611	/* If we have already calculated, skip it. */
1612	if (dfa->eclosures[node_idx].nelem != 0)
1613	continue;
1614	/* Calculate epsilon closure of `node_idx'. */
1615	err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
1616	if (BE (err != REG_NOERROR, 0))
1617	return err;
1618
1619	if (dfa->eclosures[node_idx].nelem == 0)
1620	{
1621	incomplete = 1;
1622	re_node_set_free (&eclosure_elem);
1623	}
1624	}
1625	return REG_NOERROR;
1626	}
1627
1628	/* Calculate epsilon closure of NODE. */
1629
1630	static reg_errcode_t
1631	calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa, int node, int root)
1632	{
1633	reg_errcode_t err;
1634	unsigned int constraint;
1635	int i, incomplete;
1636	re_node_set eclosure;
1637	incomplete = 0;
1638	err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1639	if (BE (err != REG_NOERROR, 0))
1640	return err;
1641
1642	/* This indicates that we are calculating this node now.
1643	We reference this value to avoid infinite loop. */
1644	dfa->eclosures[node].nelem = -1;
1645
1646	constraint = ((dfa->nodes[node].type == ANCHOR)
1647	? dfa->nodes[node].opr.ctx_type : 0);
1648	/* If the current node has constraints, duplicate all nodes.
1649	Since they must inherit the constraints. */
1650	if (constraint
1651	&& dfa->edests[node].nelem
1652	&& !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1653	{
1654	int org_node, cur_node;
1655	org_node = cur_node = node;
1656	err = duplicate_node_closure (dfa, node, node, node, constraint);
1657	if (BE (err != REG_NOERROR, 0))
1658	return err;
1659	}
1660
1661	/* Expand each epsilon destination nodes. */
1662	if (IS_EPSILON_NODE(dfa->nodes[node].type))
1663	for (i = 0; i < dfa->edests[node].nelem; ++i)
1664	{
1665	re_node_set eclosure_elem;
1666	int edest = dfa->edests[node].elems[i];
1667	/* If calculating the epsilon closure of `edest' is in progress,
1668	return intermediate result. */
1669	if (dfa->eclosures[edest].nelem == -1)
1670	{
1671	incomplete = 1;
1672	continue;
1673	}
1674	/* If we haven't calculated the epsilon closure of `edest' yet,
1675	calculate now. Otherwise use calculated epsilon closure. */
1676	if (dfa->eclosures[edest].nelem == 0)
1677	{
1678	err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
1679	if (BE (err != REG_NOERROR, 0))
1680	return err;
1681	}
1682	else
1683	eclosure_elem = dfa->eclosures[edest];
1684	/* Merge the epsilon closure of `edest'. */
1685	re_node_set_merge (&eclosure, &eclosure_elem);
1686	/* If the epsilon closure of `edest' is incomplete,
1687	the epsilon closure of this node is also incomplete. */
1688	if (dfa->eclosures[edest].nelem == 0)
1689	{
1690	incomplete = 1;
1691	re_node_set_free (&eclosure_elem);
1692	}
1693	}
1694
1695	/* Epsilon closures include itself. */
1696	re_node_set_insert (&eclosure, node);
1697	if (incomplete && !root)
1698	dfa->eclosures[node].nelem = 0;
1699	else
1700	dfa->eclosures[node] = eclosure;
1701	*new_set = eclosure;
1702	return REG_NOERROR;
1703	}
1704
1705
1706	/* Functions for token which are used in the parser. */
1707
1708	/* Fetch a token from INPUT.
1709	We must not use this function inside bracket expressions. */
1710
1711	static void
1712	internal_function
1713	fetch_token (re_token_t result, re_string_t input, reg_syntax_t syntax)
1714	{
1715	re_string_skip_bytes (input, peek_token (result, input, syntax));
1716	}
1717
1718	/* Peek a token from INPUT, and return the length of the token.
1719	We must not use this function inside bracket expressions. */
1720
1721	static int
1722	internal_function
1723	peek_token (re_token_t token, re_string_t input, reg_syntax_t syntax)
1724	{
1725	unsigned char c;
1726
1727	if (re_string_eoi (input))
1728	{
1729	token->type = END_OF_RE;
1730	return 0;
1731	}
1732
1733	c = re_string_peek_byte (input, 0);
1734	token->opr.c = c;
1735
1736	token->word_char = 0;
1737	#ifdef RE_ENABLE_I18N
1738	token->mb_partial = 0;
1739	if (input->mb_cur_max > 1 &&
1740	!re_string_first_byte (input, re_string_cur_idx (input)))
1741	{
1742	token->type = CHARACTER;
1743	token->mb_partial = 1;
1744	return 1;
1745	}
1746	#endif
1747	if (c == '\\')
1748	{
1749	unsigned char c2;
1750	if (re_string_cur_idx (input) + 1 >= re_string_length (input))
1751	{
1752	token->type = BACK_SLASH;
1753	return 1;
1754	}
1755
1756	c2 = re_string_peek_byte_case (input, 1);
1757	token->opr.c = c2;
1758	token->type = CHARACTER;
1759	#ifdef RE_ENABLE_I18N
1760	if (input->mb_cur_max > 1)
1761	{
1762	wint_t wc = re_string_wchar_at (input,
1763	re_string_cur_idx (input) + 1);
1764	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1765	}
1766	else
1767	#endif
1768	token->word_char = IS_WORD_CHAR (c2) != 0;
1769
1770	switch (c2)
1771	{
1772	case '\|':
1773	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1774	token->type = OP_ALT;
1775	break;
1776	case '1': case '2': case '3': case '4': case '5':
1777	case '6': case '7': case '8': case '9':
1778	if (!(syntax & RE_NO_BK_REFS))
1779	{
1780	token->type = OP_BACK_REF;
1781	token->opr.idx = c2 - '1';
1782	}
1783	break;
1784	case '<':
1785	if (!(syntax & RE_NO_GNU_OPS))
1786	{
1787	token->type = ANCHOR;
1788	token->opr.ctx_type = WORD_FIRST;
1789	}
1790	break;
1791	case '>':
1792	if (!(syntax & RE_NO_GNU_OPS))
1793	{
1794	token->type = ANCHOR;
1795	token->opr.ctx_type = WORD_LAST;
1796	}
1797	break;
1798	case 'b':
1799	if (!(syntax & RE_NO_GNU_OPS))
1800	{
1801	token->type = ANCHOR;
1802	token->opr.ctx_type = WORD_DELIM;
1803	}
1804	break;
1805	case 'B':
1806	if (!(syntax & RE_NO_GNU_OPS))
1807	{
1808	token->type = ANCHOR;
1809	token->opr.ctx_type = NOT_WORD_DELIM;
1810	}
1811	break;
1812	case 'w':
1813	if (!(syntax & RE_NO_GNU_OPS))
1814	token->type = OP_WORD;
1815	break;
1816	case 'W':
1817	if (!(syntax & RE_NO_GNU_OPS))
1818	token->type = OP_NOTWORD;
1819	break;
1820	case 's':
1821	if (!(syntax & RE_NO_GNU_OPS))
1822	token->type = OP_SPACE;
1823	break;
1824	case 'S':
1825	if (!(syntax & RE_NO_GNU_OPS))
1826	token->type = OP_NOTSPACE;
1827	break;
1828	case '`':
1829	if (!(syntax & RE_NO_GNU_OPS))
1830	{
1831	token->type = ANCHOR;
1832	token->opr.ctx_type = BUF_FIRST;
1833	}
1834	break;
1835	case '\'':
1836	if (!(syntax & RE_NO_GNU_OPS))
1837	{
1838	token->type = ANCHOR;
1839	token->opr.ctx_type = BUF_LAST;
1840	}
1841	break;
1842	case '(':
1843	if (!(syntax & RE_NO_BK_PARENS))
1844	token->type = OP_OPEN_SUBEXP;
1845	break;
1846	case ')':
1847	if (!(syntax & RE_NO_BK_PARENS))
1848	token->type = OP_CLOSE_SUBEXP;
1849	break;
1850	case '+':
1851	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1852	token->type = OP_DUP_PLUS;
1853	break;
1854	case '?':
1855	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1856	token->type = OP_DUP_QUESTION;
1857	break;
1858	case '{':
1859	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1860	token->type = OP_OPEN_DUP_NUM;
1861	break;
1862	case '}':
1863	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1864	token->type = OP_CLOSE_DUP_NUM;
1865	break;
1866	default:
1867	break;
1868	}
1869	return 2;
1870	}
1871
1872	token->type = CHARACTER;
1873	#ifdef RE_ENABLE_I18N
1874	if (input->mb_cur_max > 1)
1875	{
1876	wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1877	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1878	}
1879	else
1880	#endif
1881	token->word_char = IS_WORD_CHAR (token->opr.c);
1882
1883	switch (c)
1884	{
1885	case '\n':
1886	if (syntax & RE_NEWLINE_ALT)
1887	token->type = OP_ALT;
1888	break;
1889	case '\|':
1890	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1891	token->type = OP_ALT;
1892	break;
1893	case '*':
1894	token->type = OP_DUP_ASTERISK;
1895	break;
1896	case '+':
1897	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1898	token->type = OP_DUP_PLUS;
1899	break;
1900	case '?':
1901	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1902	token->type = OP_DUP_QUESTION;
1903	break;
1904	case '{':
1905	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1906	token->type = OP_OPEN_DUP_NUM;
1907	break;
1908	case '}':
1909	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1910	token->type = OP_CLOSE_DUP_NUM;
1911	break;
1912	case '(':
1913	if (syntax & RE_NO_BK_PARENS)
1914	token->type = OP_OPEN_SUBEXP;
1915	break;
1916	case ')':
1917	if (syntax & RE_NO_BK_PARENS)
1918	token->type = OP_CLOSE_SUBEXP;
1919	break;
1920	case '[':
1921	token->type = OP_OPEN_BRACKET;
1922	break;
1923	case '.':
1924	token->type = OP_PERIOD;
1925	break;
1926	case '^':
1927	if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS \| RE_CARET_ANCHORS_HERE)) &&
1928	re_string_cur_idx (input) != 0)
1929	{
1930	char prev = re_string_peek_byte (input, -1);
1931	if (!(syntax & RE_NEWLINE_ALT) \|\| prev != '\n')
1932	break;
1933	}
1934	token->type = ANCHOR;
1935	token->opr.ctx_type = LINE_FIRST;
1936	break;
1937	case '$':
1938	if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
1939	re_string_cur_idx (input) + 1 != re_string_length (input))
1940	{
1941	re_token_t next;
1942	re_string_skip_bytes (input, 1);
1943	peek_token (&next, input, syntax);
1944	re_string_skip_bytes (input, -1);
1945	if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
1946	break;
1947	}
1948	token->type = ANCHOR;
1949	token->opr.ctx_type = LINE_LAST;
1950	break;
1951	default:
1952	break;
1953	}
1954	return 1;
1955	}
1956
1957	/* Peek a token from INPUT, and return the length of the token.
1958	We must not use this function out of bracket expressions. */
1959
1960	static int
1961	internal_function
1962	peek_token_bracket (re_token_t token, re_string_t input, reg_syntax_t syntax)
1963	{
1964	unsigned char c;
1965	if (re_string_eoi (input))
1966	{
1967	token->type = END_OF_RE;
1968	return 0;
1969	}
1970	c = re_string_peek_byte (input, 0);
1971	token->opr.c = c;
1972
1973	#ifdef RE_ENABLE_I18N
1974	if (input->mb_cur_max > 1 &&
1975	!re_string_first_byte (input, re_string_cur_idx (input)))
1976	{
1977	token->type = CHARACTER;
1978	return 1;
1979	}
1980	#endif /* RE_ENABLE_I18N */
1981
1982	if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
1983	&& re_string_cur_idx (input) + 1 < re_string_length (input))
1984	{
1985	/* In this case, '\' escape a character. */
1986	unsigned char c2;
1987	re_string_skip_bytes (input, 1);
1988	c2 = re_string_peek_byte (input, 0);
1989	token->opr.c = c2;
1990	token->type = CHARACTER;
1991	return 1;
1992	}
1993	if (c == '[') /* '[' is a special char in a bracket exps. */
1994	{
1995	unsigned char c2;
1996	int token_len;
1997	if (re_string_cur_idx (input) + 1 < re_string_length (input))
1998	c2 = re_string_peek_byte (input, 1);
1999	else
2000	c2 = 0;
2001	token->opr.c = c2;
2002	token_len = 2;
2003	switch (c2)
2004	{
2005	case '.':
2006	token->type = OP_OPEN_COLL_ELEM;
2007	break;
2008	case '=':
2009	token->type = OP_OPEN_EQUIV_CLASS;
2010	break;
2011	case ':':
2012	if (syntax & RE_CHAR_CLASSES)
2013	{
2014	token->type = OP_OPEN_CHAR_CLASS;
2015	break;
2016	}
2017	/* else fall through. */
2018	default:
2019	token->type = CHARACTER;
2020	token->opr.c = c;
2021	token_len = 1;
2022	break;
2023	}
2024	return token_len;
2025	}
2026	switch (c)
2027	{
2028	case '-':
2029	token->type = OP_CHARSET_RANGE;
2030	break;
2031	case ']':
2032	token->type = OP_CLOSE_BRACKET;
2033	break;
2034	case '^':
2035	token->type = OP_NON_MATCH_LIST;
2036	break;
2037	default:
2038	token->type = CHARACTER;
2039	}
2040	return 1;
2041	}
2042
2043
2044	/* Functions for parser. */
2045
2046	/* Entry point of the parser.
2047	Parse the regular expression REGEXP and return the structure tree.
2048	If an error is occured, ERR is set by error code, and return NULL.
2049	This function build the following tree, from regular expression <reg_exp>:
2050	CAT
2051	/ \
2052	/ \
2053	<reg_exp> EOR
2054
2055	CAT means concatenation.
2056	EOR means end of regular expression. */
2057
2058	static bin_tree_t *
2059	parse (re_string_t regexp, regex_t preg, reg_syntax_t syntax,
2060	reg_errcode_t *err)
2061	{
2062	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2063	bin_tree_t tree, eor, *root;
2064	re_token_t current_token;
2065	dfa->syntax = syntax;
2066	fetch_token (&current_token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2067	tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
2068	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2069	return NULL;
2070	eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2071	if (tree != NULL)
2072	root = create_tree (dfa, tree, eor, CONCAT);
2073	else
2074	root = eor;
2075	if (BE (eor == NULL \|\| root == NULL, 0))
2076	{
2077	*err = REG_ESPACE;
2078	return NULL;
2079	}
2080	return root;
2081	}
2082
2083	/* This function build the following tree, from regular expression
2084	<branch1>\|<branch2>:
2085	ALT
2086	/ \
2087	/ \
2088	<branch1> <branch2>
2089
2090	ALT means alternative, which represents the operator `\|'. */
2091
2092	static bin_tree_t *
2093	parse_reg_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2094	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2095	{
2096	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2097	bin_tree_t tree, branch = NULL;
2098	tree = parse_branch (regexp, preg, token, syntax, nest, err);
2099	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2100	return NULL;
2101
2102	while (token->type == OP_ALT)
2103	{
2104	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2105	if (token->type != OP_ALT && token->type != END_OF_RE
2106	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
2107	{
2108	branch = parse_branch (regexp, preg, token, syntax, nest, err);
2109	if (BE (*err != REG_NOERROR && branch == NULL, 0))
2110	return NULL;
2111	}
2112	else
2113	branch = NULL;
2114	tree = create_tree (dfa, tree, branch, OP_ALT);
2115	if (BE (tree == NULL, 0))
2116	{
2117	*err = REG_ESPACE;
2118	return NULL;
2119	}
2120	}
2121	return tree;
2122	}
2123
2124	/* This function build the following tree, from regular expression
2125	<exp1><exp2>:
2126	CAT
2127	/ \
2128	/ \
2129	<exp1> <exp2>
2130
2131	CAT means concatenation. */
2132
2133	static bin_tree_t *
2134	parse_branch (re_string_t regexp, regex_t preg, re_token_t *token,
2135	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2136	{
2137	bin_tree_t tree, exp;
2138	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2139	tree = parse_expression (regexp, preg, token, syntax, nest, err);
2140	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2141	return NULL;
2142
2143	while (token->type != OP_ALT && token->type != END_OF_RE
2144	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
2145	{
2146	exp = parse_expression (regexp, preg, token, syntax, nest, err);
2147	if (BE (*err != REG_NOERROR && exp == NULL, 0))
2148	{
2149	return NULL;
2150	}
2151	if (tree != NULL && exp != NULL)
2152	{
2153	tree = create_tree (dfa, tree, exp, CONCAT);
2154	if (tree == NULL)
2155	{
2156	*err = REG_ESPACE;
2157	return NULL;
2158	}
2159	}
2160	else if (tree == NULL)
2161	tree = exp;
2162	/* Otherwise exp == NULL, we don't need to create new tree. */
2163	}
2164	return tree;
2165	}
2166
2167	/* This function build the following tree, from regular expression a*:
2168	*
2169	\|
2170	a
2171	*/
2172
2173	static bin_tree_t *
2174	parse_expression (re_string_t regexp, regex_t preg, re_token_t *token,
2175	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2176	{
2177	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2178	bin_tree_t *tree;
2179	switch (token->type)
2180	{
2181	case CHARACTER:
2182	tree = create_token_tree (dfa, NULL, NULL, token);
2183	if (BE (tree == NULL, 0))
2184	{
2185	*err = REG_ESPACE;
2186	return NULL;
2187	}
2188	#ifdef RE_ENABLE_I18N
2189	if (dfa->mb_cur_max > 1)
2190	{
2191	while (!re_string_eoi (regexp)
2192	&& !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
2193	{
2194	bin_tree_t *mbc_remain;
2195	fetch_token (token, regexp, syntax);
2196	mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2197	tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2198	if (BE (mbc_remain == NULL \|\| tree == NULL, 0))
2199	{
2200	*err = REG_ESPACE;
2201	return NULL;
2202	}
2203	}
2204	}
2205	#endif
2206	break;
2207	case OP_OPEN_SUBEXP:
2208	tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
2209	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2210	return NULL;
2211	break;
2212	case OP_OPEN_BRACKET:
2213	tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2214	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2215	return NULL;
2216	break;
2217	case OP_BACK_REF:
2218	if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
2219	{
2220	*err = REG_ESUBREG;
2221	return NULL;
2222	}
2223	dfa->used_bkref_map \|= 1 << token->opr.idx;
2224	tree = create_token_tree (dfa, NULL, NULL, token);
2225	if (BE (tree == NULL, 0))
2226	{
2227	*err = REG_ESPACE;
2228	return NULL;
2229	}
2230	++dfa->nbackref;
2231	dfa->has_mb_node = 1;
2232	break;
2233	case OP_OPEN_DUP_NUM:
2234	if (syntax & RE_CONTEXT_INVALID_DUP)
2235	{
2236	*err = REG_BADRPT;
2237	return NULL;
2238	}
2239	/* FALLTHROUGH */
2240	case OP_DUP_ASTERISK:
2241	case OP_DUP_PLUS:
2242	case OP_DUP_QUESTION:
2243	if (syntax & RE_CONTEXT_INVALID_OPS)
2244	{
2245	*err = REG_BADRPT;
2246	return NULL;
2247	}
2248	else if (syntax & RE_CONTEXT_INDEP_OPS)
2249	{
2250	fetch_token (token, regexp, syntax);
2251	return parse_expression (regexp, preg, token, syntax, nest, err);
2252	}
2253	/* else fall through */
2254	case OP_CLOSE_SUBEXP:
2255	if ((token->type == OP_CLOSE_SUBEXP) &&
2256	!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2257	{
2258	*err = REG_ERPAREN;
2259	return NULL;
2260	}
2261	/* else fall through */
2262	case OP_CLOSE_DUP_NUM:
2263	/* We treat it as a normal character. */
2264
2265	/* Then we can these characters as normal characters. */
2266	token->type = CHARACTER;
2267	/* mb_partial and word_char bits should be initialized already
2268	by peek_token. */
2269	tree = create_token_tree (dfa, NULL, NULL, token);
2270	if (BE (tree == NULL, 0))
2271	{
2272	*err = REG_ESPACE;
2273	return NULL;
2274	}
2275	break;
2276	case ANCHOR:
2277	if ((token->opr.ctx_type
2278	& (WORD_DELIM \| NOT_WORD_DELIM \| WORD_FIRST \| WORD_LAST))
2279	&& dfa->word_ops_used == 0)
2280	init_word_char (dfa);
2281	if (token->opr.ctx_type == WORD_DELIM
2282	\|\| token->opr.ctx_type == NOT_WORD_DELIM)
2283	{
2284	bin_tree_t tree_first, tree_last;
2285	if (token->opr.ctx_type == WORD_DELIM)
2286	{
2287	token->opr.ctx_type = WORD_FIRST;
2288	tree_first = create_token_tree (dfa, NULL, NULL, token);
2289	token->opr.ctx_type = WORD_LAST;
2290	}
2291	else
2292	{
2293	token->opr.ctx_type = INSIDE_WORD;
2294	tree_first = create_token_tree (dfa, NULL, NULL, token);
2295	token->opr.ctx_type = INSIDE_NOTWORD;
2296	}
2297	tree_last = create_token_tree (dfa, NULL, NULL, token);
2298	tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2299	if (BE (tree_first == NULL \|\| tree_last == NULL \|\| tree == NULL, 0))
2300	{
2301	*err = REG_ESPACE;
2302	return NULL;
2303	}
2304	}
2305	else
2306	{
2307	tree = create_token_tree (dfa, NULL, NULL, token);
2308	if (BE (tree == NULL, 0))
2309	{
2310	*err = REG_ESPACE;
2311	return NULL;
2312	}
2313	}
2314	/* We must return here, since ANCHORs can't be followed
2315	by repetition operators.
2316	eg. RE"^" is invalid or "<ANCHOR(^)><CHAR()>",
2317	it must not be "<ANCHOR(^)><REPEAT()>". /
2318	fetch_token (token, regexp, syntax);
2319	return tree;
2320	case OP_PERIOD:
2321	tree = create_token_tree (dfa, NULL, NULL, token);
2322	if (BE (tree == NULL, 0))
2323	{
2324	*err = REG_ESPACE;
2325	return NULL;
2326	}
2327	if (dfa->mb_cur_max > 1)
2328	dfa->has_mb_node = 1;
2329	break;
2330	case OP_WORD:
2331	case OP_NOTWORD:
2332	tree = build_charclass_op (dfa, regexp->trans,
2333	(const unsigned char *) "alnum",
2334	(const unsigned char *) "_",
2335	token->type == OP_NOTWORD, err);
2336	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2337	return NULL;
2338	break;
2339	case OP_SPACE:
2340	case OP_NOTSPACE:
2341	tree = build_charclass_op (dfa, regexp->trans,
2342	(const unsigned char *) "space",
2343	(const unsigned char *) "",
2344	token->type == OP_NOTSPACE, err);
2345	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2346	return NULL;
2347	break;
2348	case OP_ALT:
2349	case END_OF_RE:
2350	return NULL;
2351	case BACK_SLASH:
2352	*err = REG_EESCAPE;
2353	return NULL;
2354	default:
2355	/* Must not happen? */
2356	#ifdef DEBUG
2357	assert (0);
2358	#endif
2359	return NULL;
2360	}
2361	fetch_token (token, regexp, syntax);
2362
2363	while (token->type == OP_DUP_ASTERISK \|\| token->type == OP_DUP_PLUS
2364	\|\| token->type == OP_DUP_QUESTION \|\| token->type == OP_OPEN_DUP_NUM)
2365	{
2366	tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2367	if (BE (*err != REG_NOERROR && tree == NULL, 0))
2368	return NULL;
2369	/* In BRE consecutive duplications are not allowed. */
2370	if ((syntax & RE_CONTEXT_INVALID_DUP)
2371	&& (token->type == OP_DUP_ASTERISK
2372	\|\| token->type == OP_OPEN_DUP_NUM))
2373	{
2374	*err = REG_BADRPT;
2375	return NULL;
2376	}
2377	}
2378
2379	return tree;
2380	}
2381
2382	/* This function build the following tree, from regular expression
2383	(<reg_exp>):
2384	SUBEXP
2385	\|
2386	<reg_exp>
2387	*/
2388
2389	static bin_tree_t *
2390	parse_sub_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2391	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2392	{
2393	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2394	bin_tree_t *tree;
2395	size_t cur_nsub;
2396	cur_nsub = preg->re_nsub++;
2397
2398	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2399
2400	/* The subexpression may be a null string. */
2401	if (token->type == OP_CLOSE_SUBEXP)
2402	tree = NULL;
2403	else
2404	{
2405	tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2406	if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
2407	*err = REG_EPAREN;
2408	if (BE (*err != REG_NOERROR, 0))
2409	return NULL;
2410	}
2411
2412	if (cur_nsub <= '9' - '1')
2413	dfa->completed_bkref_map \|= 1 << cur_nsub;
2414
2415	tree = create_tree (dfa, tree, NULL, SUBEXP);
2416	if (BE (tree == NULL, 0))
2417	{
2418	*err = REG_ESPACE;
2419	return NULL;
2420	}
2421	tree->token.opr.idx = cur_nsub;
2422	return tree;
2423	}
2424
2425	/* This function parse repetition operators like "", "+", "{1,3}" etc. /
2426
2427	static bin_tree_t *
2428	parse_dup_op (bin_tree_t elem, re_string_t regexp, re_dfa_t *dfa,
2429	re_token_t token, reg_syntax_t syntax, reg_errcode_t err)
2430	{
2431	bin_tree_t tree = NULL, old_tree = NULL;
2432	int i, start, end, start_idx = re_string_cur_idx (regexp);
2433	re_token_t start_token = *token;
2434
2435	if (token->type == OP_OPEN_DUP_NUM)
2436	{
2437	end = 0;
2438	start = fetch_number (regexp, token, syntax);
2439	if (start == -1)
2440	{
2441	if (token->type == CHARACTER && token->opr.c == ',')
2442	start = 0; /* We treat "{,m}" as "{0,m}". */
2443	else
2444	{
2445	err = REG_BADBR; / <re>{} is invalid. */
2446	return NULL;
2447	}
2448	}
2449	if (BE (start != -2, 1))
2450	{
2451	/* We treat "{n}" as "{n,n}". */
2452	end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2453	: ((token->type == CHARACTER && token->opr.c == ',')
2454	? fetch_number (regexp, token, syntax) : -2));
2455	}
2456	if (BE (start == -2 \|\| end == -2, 0))
2457	{
2458	/* Invalid sequence. */
2459	if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
2460	{
2461	if (token->type == END_OF_RE)
2462	*err = REG_EBRACE;
2463	else
2464	*err = REG_BADBR;
2465
2466	return NULL;
2467	}
2468
2469	/* If the syntax bit is set, rollback. */
2470	re_string_set_index (regexp, start_idx);
2471	*token = start_token;
2472	token->type = CHARACTER;
2473	/* mb_partial and word_char bits should be already initialized by
2474	peek_token. */
2475	return elem;
2476	}
2477
2478	if (BE (end != -1 && start > end, 0))
2479	{
2480	/* First number greater than second. */
2481	*err = REG_BADBR;
2482	return NULL;
2483	}
2484	}
2485	else
2486	{
2487	start = (token->type == OP_DUP_PLUS) ? 1 : 0;
2488	end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
2489	}
2490
2491	fetch_token (token, regexp, syntax);
2492
2493	if (BE (elem == NULL, 0))
2494	return NULL;
2495	if (BE (start == 0 && end == 0, 0))
2496	{
2497	postorder (elem, free_tree, NULL);
2498	return NULL;
2499	}
2500
2501	/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2502	if (BE (start > 0, 0))
2503	{
2504	tree = elem;
2505	for (i = 2; i <= start; ++i)
2506	{
2507	elem = duplicate_tree (elem, dfa);
2508	tree = create_tree (dfa, tree, elem, CONCAT);
2509	if (BE (elem == NULL \|\| tree == NULL, 0))
2510	goto parse_dup_op_espace;
2511	}
2512
2513	if (start == end)
2514	return tree;
2515
2516	/* Duplicate ELEM before it is marked optional. */
2517	elem = duplicate_tree (elem, dfa);
2518	old_tree = tree;
2519	}
2520	else
2521	old_tree = NULL;
2522
2523	if (elem->token.type == SUBEXP)
2524	postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
2525
2526	tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
2527	if (BE (tree == NULL, 0))
2528	goto parse_dup_op_espace;
2529
2530	/* This loop is actually executed only when end != -1,
2531	to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2532	already created the start+1-th copy. */
2533	for (i = start + 2; i <= end; ++i)
2534	{
2535	elem = duplicate_tree (elem, dfa);
2536	tree = create_tree (dfa, tree, elem, CONCAT);
2537	if (BE (elem == NULL \|\| tree == NULL, 0))
2538	goto parse_dup_op_espace;
2539
2540	tree = create_tree (dfa, tree, NULL, OP_ALT);
2541	if (BE (tree == NULL, 0))
2542	goto parse_dup_op_espace;
2543	}
2544
2545	if (old_tree)
2546	tree = create_tree (dfa, old_tree, tree, CONCAT);
2547
2548	return tree;
2549
2550	parse_dup_op_espace:
2551	*err = REG_ESPACE;
2552	return NULL;
2553	}
2554
2555	/* Size of the names for collating symbol/equivalence_class/character_class.
2556	I'm not sure, but maybe enough. */
2557	#define BRACKET_NAME_BUF_SIZE 32
2558
2559	#ifndef _LIBC
2560	/* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2561	Build the range expression which starts from START_ELEM, and ends
2562	at END_ELEM. The result are written to MBCSET and SBCSET.
2563	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2564	mbcset->range_ends, is a pointer argument sinse we may
2565	update it. */
2566
2567	static reg_errcode_t
2568	internal_function
2569	# ifdef RE_ENABLE_I18N
2570	build_range_exp (bitset_t sbcset, re_charset_t mbcset, int range_alloc,
2571	bracket_elem_t start_elem, bracket_elem_t end_elem)
2572	# else /* not RE_ENABLE_I18N */
2573	build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2574	bracket_elem_t *end_elem)
2575	# endif /* not RE_ENABLE_I18N */
2576	{
2577	unsigned int start_ch, end_ch;
2578	/* Equivalence Classes and Character Classes can't be a range start/end. */
2579	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2580	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2581	0))
2582	return REG_ERANGE;
2583
2584	/* We can handle no multi character collating elements without libc
2585	support. */
2586	if (BE ((start_elem->type == COLL_SYM
2587	&& strlen ((char *) start_elem->opr.name) > 1)
2588	\|\| (end_elem->type == COLL_SYM
2589	&& strlen ((char *) end_elem->opr.name) > 1), 0))
2590	return REG_ECOLLATE;
2591
2592	# ifdef RE_ENABLE_I18N
2593	{
2594	wchar_t wc;
2595	wint_t start_wc;
2596	wint_t end_wc;
2597	wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2598
2599	start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2600	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2601	: 0));
2602	end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2603	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2604	: 0));
2605	start_wc = ((start_elem->type == SB_CHAR \|\| start_elem->type == COLL_SYM)
2606	? __btowc (start_ch) : start_elem->opr.wch);
2607	end_wc = ((end_elem->type == SB_CHAR \|\| end_elem->type == COLL_SYM)
2608	? __btowc (end_ch) : end_elem->opr.wch);
2609	if (start_wc == WEOF \|\| end_wc == WEOF)
2610	return REG_ECOLLATE;
2611	cmp_buf[0] = start_wc;
2612	cmp_buf[4] = end_wc;
2613	if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
2614	return REG_ERANGE;
2615
2616	/* Got valid collation sequence values, add them as a new entry.
2617	However, for !_LIBC we have no collation elements: if the
2618	character set is single byte, the single byte character set
2619	that we build below suffices. parse_bracket_exp passes
2620	no MBCSET if dfa->mb_cur_max == 1. */
2621	if (mbcset)
2622	{
2623	/* Check the space of the arrays. */
2624	if (BE (*range_alloc == mbcset->nranges, 0))
2625	{
2626	/* There is not enough space, need realloc. */
2627	wchar_t new_array_start, new_array_end;
2628	int new_nranges;
2629
2630	/* +1 in case of mbcset->nranges is 0. */
2631	new_nranges = 2 * mbcset->nranges + 1;
2632	/* Use realloc since mbcset->range_starts and mbcset->range_ends
2633	are NULL if range_alloc == 0. /
2634	new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2635	new_nranges);
2636	new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2637	new_nranges);
2638
2639	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
2640	return REG_ESPACE;
2641
2642	mbcset->range_starts = new_array_start;
2643	mbcset->range_ends = new_array_end;
2644	*range_alloc = new_nranges;
2645	}
2646
2647	mbcset->range_starts[mbcset->nranges] = start_wc;
2648	mbcset->range_ends[mbcset->nranges++] = end_wc;
2649	}
2650
2651	/* Build the table for single byte characters. */
2652	for (wc = 0; wc < SBC_MAX; ++wc)
2653	{
2654	cmp_buf[2] = wc;
2655	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2656	&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2657	bitset_set (sbcset, wc);
2658	}
2659	}
2660	# else /* not RE_ENABLE_I18N */
2661	{
2662	unsigned int ch;
2663	start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2664	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2665	: 0));
2666	end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2667	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
2668	: 0));
2669	if (start_ch > end_ch)
2670	return REG_ERANGE;
2671	/* Build the table for single byte characters. */
2672	for (ch = 0; ch < SBC_MAX; ++ch)
2673	if (start_ch <= ch && ch <= end_ch)
2674	bitset_set (sbcset, ch);
2675	}
2676	# endif /* not RE_ENABLE_I18N */
2677	return REG_NOERROR;
2678	}
2679	#endif /* not _LIBC */
2680
2681	#ifndef _LIBC
2682	/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2683	Build the collating element which is represented by NAME.
2684	The result are written to MBCSET and SBCSET.
2685	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2686	pointer argument since we may update it. */
2687
2688	static reg_errcode_t
2689	internal_function
2690	# ifdef RE_ENABLE_I18N
2691	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2692	int coll_sym_alloc, const unsigned char name)
2693	# else /* not RE_ENABLE_I18N */
2694	build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2695	# endif /* not RE_ENABLE_I18N */
2696	{
2697	size_t name_len = strlen ((const char *) name);
2698	if (BE (name_len != 1, 0))
2699	return REG_ECOLLATE;
2700	else
2701	{
2702	bitset_set (sbcset, name[0]);
2703	return REG_NOERROR;
2704	}
2705	}
2706	#endif /* not _LIBC */
2707
2708	/* This function parse bracket expression like "[abc]", "[a-c]",
2709	"[[.a-a.]]" etc. */
2710
2711	static bin_tree_t *
2712	parse_bracket_exp (re_string_t regexp, re_dfa_t dfa, re_token_t *token,
2713	reg_syntax_t syntax, reg_errcode_t *err)
2714	{
2715	#ifdef _LIBC
2716	const unsigned char *collseqmb;
2717	const char *collseqwc;
2718	uint32_t nrules;
2719	int32_t table_size;
2720	const int32_t *symb_table;
2721	const unsigned char *extra;
2722
2723	/* Local function for parse_bracket_exp used in _LIBC environement.
2724	Seek the collating symbol entry correspondings to NAME.
2725	Return the index of the symbol in the SYMB_TABLE. */
2726
2727	auto inline int32_t
2728	__attribute ((always_inline))
2729	seek_collating_symbol_entry (name, name_len)
2730	const unsigned char *name;
2731	size_t name_len;
2732	{
2733	int32_t hash = elem_hash ((const char *) name, name_len);
2734	int32_t elem = hash % table_size;
2735	if (symb_table[2 * elem] != 0)
2736	{
2737	int32_t second = hash % (table_size - 2) + 1;
2738
2739	do
2740	{
2741	/* First compare the hashing value. */
2742	if (symb_table[2 * elem] == hash
2743	/* Compare the length of the name. */
2744	&& name_len == extra[symb_table[2 * elem + 1]]
2745	/* Compare the name. */
2746	&& memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2747	name_len) == 0)
2748	{
2749	/* Yep, this is the entry. */
2750	break;
2751	}
2752
2753	/* Next entry. */
2754	elem += second;
2755	}
2756	while (symb_table[2 * elem] != 0);
2757	}
2758	return elem;
2759	}
2760
2761	/* Local function for parse_bracket_exp used in _LIBC environement.
2762	Look up the collation sequence value of BR_ELEM.
2763	Return the value if succeeded, UINT_MAX otherwise. */
2764
2765	auto inline unsigned int
2766	__attribute ((always_inline))
2767	lookup_collation_sequence_value (br_elem)
2768	bracket_elem_t *br_elem;
2769	{
2770	if (br_elem->type == SB_CHAR)
2771	{
2772	/*
2773	if (MB_CUR_MAX == 1)
2774	*/
2775	if (nrules == 0)
2776	return collseqmb[br_elem->opr.ch];
2777	else
2778	{
2779	wint_t wc = __btowc (br_elem->opr.ch);
2780	return __collseq_table_lookup (collseqwc, wc);
2781	}
2782	}
2783	else if (br_elem->type == MB_CHAR)
2784	{
2785	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2786	}
2787	else if (br_elem->type == COLL_SYM)
2788	{
2789	size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2790	if (nrules != 0)
2791	{
2792	int32_t elem, idx;
2793	elem = seek_collating_symbol_entry (br_elem->opr.name,
2794	sym_name_len);
2795	if (symb_table[2 * elem] != 0)
2796	{
2797	/* We found the entry. */
2798	idx = symb_table[2 * elem + 1];
2799	/* Skip the name of collating element name. */
2800	idx += 1 + extra[idx];
2801	/* Skip the byte sequence of the collating element. */
2802	idx += 1 + extra[idx];
2803	/* Adjust for the alignment. */
2804	idx = (idx + 3) & ~3;
2805	/* Skip the multibyte collation sequence value. */
2806	idx += sizeof (unsigned int);
2807	/* Skip the wide char sequence of the collating element. */
2808	idx += sizeof (unsigned int) *
2809	(1 + (unsigned int ) (extra + idx));
2810	/* Return the collation sequence value. */
2811	return (unsigned int ) (extra + idx);
2812	}
2813	else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2814	{
2815	/* No valid character. Match it as a single byte
2816	character. */
2817	return collseqmb[br_elem->opr.name[0]];
2818	}
2819	}
2820	else if (sym_name_len == 1)
2821	return collseqmb[br_elem->opr.name[0]];
2822	}
2823	return UINT_MAX;
2824	}
2825
2826	/* Local function for parse_bracket_exp used in _LIBC environement.
2827	Build the range expression which starts from START_ELEM, and ends
2828	at END_ELEM. The result are written to MBCSET and SBCSET.
2829	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2830	mbcset->range_ends, is a pointer argument sinse we may
2831	update it. */
2832
2833	auto inline reg_errcode_t
2834	__attribute ((always_inline))
2835	build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2836	re_charset_t *mbcset;
2837	int *range_alloc;
2838	bitset_t sbcset;
2839	bracket_elem_t start_elem, end_elem;
2840	{
2841	unsigned int ch;
2842	uint32_t start_collseq;
2843	uint32_t end_collseq;
2844
2845	/* Equivalence Classes and Character Classes can't be a range
2846	start/end. */
2847	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2848	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2849	0))
2850	return REG_ERANGE;
2851
2852	start_collseq = lookup_collation_sequence_value (start_elem);
2853	end_collseq = lookup_collation_sequence_value (end_elem);
2854	/* Check start/end collation sequence values. */
2855	if (BE (start_collseq == UINT_MAX \|\| end_collseq == UINT_MAX, 0))
2856	return REG_ECOLLATE;
2857	if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
2858	return REG_ERANGE;
2859
2860	/* Got valid collation sequence values, add them as a new entry.
2861	However, if we have no collation elements, and the character set
2862	is single byte, the single byte character set that we
2863	build below suffices. */
2864	if (nrules > 0 \|\| dfa->mb_cur_max > 1)
2865	{
2866	/* Check the space of the arrays. */
2867	if (BE (*range_alloc == mbcset->nranges, 0))
2868	{
2869	/* There is not enough space, need realloc. */
2870	uint32_t *new_array_start;
2871	uint32_t *new_array_end;
2872	int new_nranges;
2873
2874	/* +1 in case of mbcset->nranges is 0. */
2875	new_nranges = 2 * mbcset->nranges + 1;
2876	new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2877	new_nranges);
2878	new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2879	new_nranges);
2880
2881	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
2882	return REG_ESPACE;
2883
2884	mbcset->range_starts = new_array_start;
2885	mbcset->range_ends = new_array_end;
2886	*range_alloc = new_nranges;
2887	}
2888
2889	mbcset->range_starts[mbcset->nranges] = start_collseq;
2890	mbcset->range_ends[mbcset->nranges++] = end_collseq;
2891	}
2892
2893	/* Build the table for single byte characters. */
2894	for (ch = 0; ch < SBC_MAX; ch++)
2895	{
2896	uint32_t ch_collseq;
2897	/*
2898	if (MB_CUR_MAX == 1)
2899	*/
2900	if (nrules == 0)
2901	ch_collseq = collseqmb[ch];
2902	else
2903	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2904	if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2905	bitset_set (sbcset, ch);
2906	}
2907	return REG_NOERROR;
2908	}
2909
2910	/* Local function for parse_bracket_exp used in _LIBC environement.
2911	Build the collating element which is represented by NAME.
2912	The result are written to MBCSET and SBCSET.
2913	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2914	pointer argument sinse we may update it. */
2915
2916	auto inline reg_errcode_t
2917	__attribute ((always_inline))
2918	build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2919	re_charset_t *mbcset;
2920	int *coll_sym_alloc;
2921	bitset_t sbcset;
2922	const unsigned char *name;
2923	{
2924	int32_t elem, idx;
2925	size_t name_len = strlen ((const char *) name);
2926	if (nrules != 0)
2927	{
2928	elem = seek_collating_symbol_entry (name, name_len);
2929	if (symb_table[2 * elem] != 0)
2930	{
2931	/* We found the entry. */
2932	idx = symb_table[2 * elem + 1];
2933	/* Skip the name of collating element name. */
2934	idx += 1 + extra[idx];
2935	}
2936	else if (symb_table[2 * elem] == 0 && name_len == 1)
2937	{
2938	/* No valid character, treat it as a normal
2939	character. */
2940	bitset_set (sbcset, name[0]);
2941	return REG_NOERROR;
2942	}
2943	else
2944	return REG_ECOLLATE;
2945
2946	/* Got valid collation sequence, add it as a new entry. */
2947	/* Check the space of the arrays. */
2948	if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
2949	{
2950	/* Not enough, realloc it. */
2951	/* +1 in case of mbcset->ncoll_syms is 0. */
2952	int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
2953	/* Use realloc since mbcset->coll_syms is NULL
2954	if alloc == 0. /
2955	int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
2956	new_coll_sym_alloc);
2957	if (BE (new_coll_syms == NULL, 0))
2958	return REG_ESPACE;
2959	mbcset->coll_syms = new_coll_syms;
2960	*coll_sym_alloc = new_coll_sym_alloc;
2961	}
2962	mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
2963	return REG_NOERROR;
2964	}
2965	else
2966	{
2967	if (BE (name_len != 1, 0))
2968	return REG_ECOLLATE;
2969	else
2970	{
2971	bitset_set (sbcset, name[0]);
2972	return REG_NOERROR;
2973	}
2974	}
2975	}
2976	#endif
2977
2978	re_token_t br_token;
2979	re_bitset_ptr_t sbcset;
2980	#ifdef RE_ENABLE_I18N
2981	re_charset_t *mbcset;
2982	int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
2983	int equiv_class_alloc = 0, char_class_alloc = 0;
2984	#endif /* not RE_ENABLE_I18N */
2985	int non_match = 0;
2986	bin_tree_t *work_tree;
2987	int token_len;
2988	int first_round = 1;
2989	#ifdef _LIBC
2990	collseqmb = (const unsigned char *)
2991	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
2992	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2993	if (nrules)
2994	{
2995	/*
2996	if (MB_CUR_MAX > 1)
2997	*/
2998	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
2999	table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3000	symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3001	_NL_COLLATE_SYMB_TABLEMB);
3002	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3003	_NL_COLLATE_SYMB_EXTRAMB);
3004	}
3005	#endif
3006	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3007	#ifdef RE_ENABLE_I18N
3008	mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3009	#endif /* RE_ENABLE_I18N */
3010	#ifdef RE_ENABLE_I18N
3011	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
3012	#else
3013	if (BE (sbcset == NULL, 0))
3014	#endif /* RE_ENABLE_I18N */
3015	{
3016	*err = REG_ESPACE;
3017	return NULL;
3018	}
3019
3020	token_len = peek_token_bracket (token, regexp, syntax);
3021	if (BE (token->type == END_OF_RE, 0))
3022	{
3023	*err = REG_BADPAT;
3024	goto parse_bracket_exp_free_return;
3025	}
3026	if (token->type == OP_NON_MATCH_LIST)
3027	{
3028	#ifdef RE_ENABLE_I18N
3029	mbcset->non_match = 1;
3030	#endif /* not RE_ENABLE_I18N */
3031	non_match = 1;
3032	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3033	bitset_set (sbcset, '\0');
3034	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3035	token_len = peek_token_bracket (token, regexp, syntax);
3036	if (BE (token->type == END_OF_RE, 0))
3037	{
3038	*err = REG_BADPAT;
3039	goto parse_bracket_exp_free_return;
3040	}
3041	}
3042
3043	/* We treat the first ']' as a normal character. */
3044	if (token->type == OP_CLOSE_BRACKET)
3045	token->type = CHARACTER;
3046
3047	while (1)
3048	{
3049	bracket_elem_t start_elem, end_elem;
3050	unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
3051	unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
3052	reg_errcode_t ret;
3053	int token_len2 = 0, is_range_exp = 0;
3054	re_token_t token2;
3055
3056	start_elem.opr.name = start_name_buf;
3057	ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3058	syntax, first_round);
3059	if (BE (ret != REG_NOERROR, 0))
3060	{
3061	*err = ret;
3062	goto parse_bracket_exp_free_return;
3063	}
3064	first_round = 0;
3065
3066	/* Get information about the next token. We need it in any case. */
3067	token_len = peek_token_bracket (token, regexp, syntax);
3068
3069	/* Do not check for ranges if we know they are not allowed. */
3070	if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3071	{
3072	if (BE (token->type == END_OF_RE, 0))
3073	{
3074	*err = REG_EBRACK;
3075	goto parse_bracket_exp_free_return;
3076	}
3077	if (token->type == OP_CHARSET_RANGE)
3078	{
3079	re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3080	token_len2 = peek_token_bracket (&token2, regexp, syntax);
3081	if (BE (token2.type == END_OF_RE, 0))
3082	{
3083	*err = REG_EBRACK;
3084	goto parse_bracket_exp_free_return;
3085	}
3086	if (token2.type == OP_CLOSE_BRACKET)
3087	{
3088	/* We treat the last '-' as a normal character. */
3089	re_string_skip_bytes (regexp, -token_len);
3090	token->type = CHARACTER;
3091	}
3092	else
3093	is_range_exp = 1;
3094	}
3095	}
3096
3097	if (is_range_exp == 1)
3098	{
3099	end_elem.opr.name = end_name_buf;
3100	ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3101	dfa, syntax, 1);
3102	if (BE (ret != REG_NOERROR, 0))
3103	{
3104	*err = ret;
3105	goto parse_bracket_exp_free_return;
3106	}
3107
3108	token_len = peek_token_bracket (token, regexp, syntax);
3109
3110	#ifdef _LIBC
3111	*err = build_range_exp (sbcset, mbcset, &range_alloc,
3112	&start_elem, &end_elem);
3113	#else
3114	# ifdef RE_ENABLE_I18N
3115	*err = build_range_exp (sbcset,
3116	dfa->mb_cur_max > 1 ? mbcset : NULL,
3117	&range_alloc, &start_elem, &end_elem);
3118	# else
3119	*err = build_range_exp (sbcset, &start_elem, &end_elem);
3120	# endif
3121	#endif /* RE_ENABLE_I18N */
3122	if (BE (*err != REG_NOERROR, 0))
3123	goto parse_bracket_exp_free_return;
3124	}
3125	else
3126	{
3127	switch (start_elem.type)
3128	{
3129	case SB_CHAR:
3130	bitset_set (sbcset, start_elem.opr.ch);
3131	break;
3132	#ifdef RE_ENABLE_I18N
3133	case MB_CHAR:
3134	/* Check whether the array has enough space. */
3135	if (BE (mbchar_alloc == mbcset->nmbchars, 0))
3136	{
3137	wchar_t *new_mbchars;
3138	/* Not enough, realloc it. */
3139	/* +1 in case of mbcset->nmbchars is 0. */
3140	mbchar_alloc = 2 * mbcset->nmbchars + 1;
3141	/* Use realloc since array is NULL if alloc == 0. /
3142	new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3143	mbchar_alloc);
3144	if (BE (new_mbchars == NULL, 0))
3145	goto parse_bracket_exp_espace;
3146	mbcset->mbchars = new_mbchars;
3147	}
3148	mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3149	break;
3150	#endif /* RE_ENABLE_I18N */
3151	case EQUIV_CLASS:
3152	*err = build_equiv_class (sbcset,
3153	#ifdef RE_ENABLE_I18N
3154	mbcset, &equiv_class_alloc,
3155	#endif /* RE_ENABLE_I18N */
3156	start_elem.opr.name);
3157	if (BE (*err != REG_NOERROR, 0))
3158	goto parse_bracket_exp_free_return;
3159	break;
3160	case COLL_SYM:
3161	*err = build_collating_symbol (sbcset,
3162	#ifdef RE_ENABLE_I18N
3163	mbcset, &coll_sym_alloc,
3164	#endif /* RE_ENABLE_I18N */
3165	start_elem.opr.name);
3166	if (BE (*err != REG_NOERROR, 0))
3167	goto parse_bracket_exp_free_return;
3168	break;
3169	case CHAR_CLASS:
3170	*err = build_charclass (regexp->trans, sbcset,
3171	#ifdef RE_ENABLE_I18N
3172	mbcset, &char_class_alloc,
3173	#endif /* RE_ENABLE_I18N */
3174	start_elem.opr.name, syntax);
3175	if (BE (*err != REG_NOERROR, 0))
3176	goto parse_bracket_exp_free_return;
3177	break;
3178	default:
3179	assert (0);
3180	break;
3181	}
3182	}
3183	if (BE (token->type == END_OF_RE, 0))
3184	{
3185	*err = REG_EBRACK;
3186	goto parse_bracket_exp_free_return;
3187	}
3188	if (token->type == OP_CLOSE_BRACKET)
3189	break;
3190	}
3191
3192	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3193
3194	/* If it is non-matching list. */
3195	if (non_match)
3196	bitset_not (sbcset);
3197
3198	#ifdef RE_ENABLE_I18N
3199	/* Ensure only single byte characters are set. */
3200	if (dfa->mb_cur_max > 1)
3201	bitset_mask (sbcset, dfa->sb_char);
3202
3203	if (mbcset->nmbchars \|\| mbcset->ncoll_syms \|\| mbcset->nequiv_classes
3204	\|\| mbcset->nranges \|\| (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3205	\|\| mbcset->non_match)))
3206	{
3207	bin_tree_t *mbc_tree;
3208	int sbc_idx;
3209	/* Build a tree for complex bracket. */
3210	dfa->has_mb_node = 1;
3211	br_token.type = COMPLEX_BRACKET;
3212	br_token.opr.mbcset = mbcset;
3213	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3214	if (BE (mbc_tree == NULL, 0))
3215	goto parse_bracket_exp_espace;
3216	for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
3217	if (sbcset[sbc_idx])
3218	break;
3219	/* If there are no bits set in sbcset, there is no point
3220	of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
3221	if (sbc_idx < BITSET_WORDS)
3222	{
3223	/* Build a tree for simple bracket. */
3224	br_token.type = SIMPLE_BRACKET;
3225	br_token.opr.sbcset = sbcset;
3226	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3227	if (BE (work_tree == NULL, 0))
3228	goto parse_bracket_exp_espace;
3229
3230	/* Then join them by ALT node. */
3231	work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3232	if (BE (work_tree == NULL, 0))
3233	goto parse_bracket_exp_espace;
3234	}
3235	else
3236	{
3237	re_free (sbcset);
3238	work_tree = mbc_tree;
3239	}
3240	}
3241	else
3242	#endif /* not RE_ENABLE_I18N */
3243	{
3244	#ifdef RE_ENABLE_I18N
3245	free_charset (mbcset);
3246	#endif
3247	/* Build a tree for simple bracket. */
3248	br_token.type = SIMPLE_BRACKET;
3249	br_token.opr.sbcset = sbcset;
3250	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3251	if (BE (work_tree == NULL, 0))
3252	goto parse_bracket_exp_espace;
3253	}
3254	return work_tree;
3255
3256	parse_bracket_exp_espace:
3257	*err = REG_ESPACE;
3258	parse_bracket_exp_free_return:
3259	re_free (sbcset);
3260	#ifdef RE_ENABLE_I18N
3261	free_charset (mbcset);
3262	#endif /* RE_ENABLE_I18N */
3263	return NULL;
3264	}
3265
3266	/* Parse an element in the bracket expression. */
3267
3268	static reg_errcode_t
3269	parse_bracket_element (bracket_elem_t elem, re_string_t regexp,
3270	re_token_t token, int token_len, re_dfa_t dfa,
3271	reg_syntax_t syntax, int accept_hyphen)
3272	{
3273	#ifdef RE_ENABLE_I18N
3274	int cur_char_size;
3275	cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3276	if (cur_char_size > 1)
3277	{
3278	elem->type = MB_CHAR;
3279	elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3280	re_string_skip_bytes (regexp, cur_char_size);
3281	return REG_NOERROR;
3282	}
3283	#endif /* RE_ENABLE_I18N */
3284	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
3285	if (token->type == OP_OPEN_COLL_ELEM \|\| token->type == OP_OPEN_CHAR_CLASS
3286	\|\| token->type == OP_OPEN_EQUIV_CLASS)
3287	return parse_bracket_symbol (elem, regexp, token);
3288	if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
3289	{
3290	/* A '-' must only appear as anything but a range indicator before
3291	the closing bracket. Everything else is an error. */
3292	re_token_t token2;
3293	(void) peek_token_bracket (&token2, regexp, syntax);
3294	if (token2.type != OP_CLOSE_BRACKET)
3295	/* The actual error value is not standardized since this whole
3296	case is undefined. But ERANGE makes good sense. */
3297	return REG_ERANGE;
3298	}
3299	elem->type = SB_CHAR;
3300	elem->opr.ch = token->opr.c;
3301	return REG_NOERROR;
3302	}
3303
3304	/* Parse a bracket symbol in the bracket expression. Bracket symbols are
3305	such as [:<character_class>:], [.<collating_element>.], and
3306	[=<equivalent_class>=]. */
3307
3308	static reg_errcode_t
3309	parse_bracket_symbol (bracket_elem_t elem, re_string_t regexp,
3310	re_token_t *token)
3311	{
3312	unsigned char ch, delim = token->opr.c;
3313	int i = 0;
3314	if (re_string_eoi(regexp))
3315	return REG_EBRACK;
3316	for (;; ++i)
3317	{
3318	if (i >= BRACKET_NAME_BUF_SIZE)
3319	return REG_EBRACK;
3320	if (token->type == OP_OPEN_CHAR_CLASS)
3321	ch = re_string_fetch_byte_case (regexp);
3322	else
3323	ch = re_string_fetch_byte (regexp);
3324	if (re_string_eoi(regexp))
3325	return REG_EBRACK;
3326	if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
3327	break;
3328	elem->opr.name[i] = ch;
3329	}
3330	re_string_skip_bytes (regexp, 1);
3331	elem->opr.name[i] = '\0';
3332	switch (token->type)
3333	{
3334	case OP_OPEN_COLL_ELEM:
3335	elem->type = COLL_SYM;
3336	break;
3337	case OP_OPEN_EQUIV_CLASS:
3338	elem->type = EQUIV_CLASS;
3339	break;
3340	case OP_OPEN_CHAR_CLASS:
3341	elem->type = CHAR_CLASS;
3342	break;
3343	default:
3344	break;
3345	}
3346	return REG_NOERROR;
3347	}
3348
3349	/* Helper function for parse_bracket_exp.
3350	Build the equivalence class which is represented by NAME.
3351	The result are written to MBCSET and SBCSET.
3352	EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3353	is a pointer argument sinse we may update it. */
3354
3355	static reg_errcode_t
3356	#ifdef RE_ENABLE_I18N
3357	build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3358	int equiv_class_alloc, const unsigned char name)
3359	#else /* not RE_ENABLE_I18N */
3360	build_equiv_class (bitset_t sbcset, const unsigned char *name)
3361	#endif /* not RE_ENABLE_I18N */
3362	{
3363	#ifdef _LIBC
3364	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3365	if (nrules != 0)
3366	{
3367	const int32_t table, indirect;
3368	const unsigned char weights, extra, *cp;
3369	unsigned char char_buf[2];
3370	int32_t idx1, idx2;
3371	unsigned int ch;
3372	size_t len;
3373	/* This #include defines a local function! */
3374	# include <locale/weight.h>
3375	/* Calculate the index for equivalence class. */
3376	cp = name;
3377	table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3378	weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3379	_NL_COLLATE_WEIGHTMB);
3380	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3381	_NL_COLLATE_EXTRAMB);
3382	indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3383	_NL_COLLATE_INDIRECTMB);
3384	idx1 = findidx (&cp);
3385	if (BE (idx1 == 0 \|\| cp < name + strlen ((const char *) name), 0))
3386	/* This isn't a valid character. */
3387	return REG_ECOLLATE;
3388
3389	/* Build single byte matcing table for this equivalence class. */
3390	char_buf[1] = (unsigned char) '\0';
3391	len = weights[idx1];
3392	for (ch = 0; ch < SBC_MAX; ++ch)
3393	{
3394	char_buf[0] = ch;
3395	cp = char_buf;
3396	idx2 = findidx (&cp);
3397	/*
3398	idx2 = table[ch];
3399	*/
3400	if (idx2 == 0)
3401	/* This isn't a valid character. */
3402	continue;
3403	if (len == weights[idx2])
3404	{
3405	int cnt = 0;
3406	while (cnt <= len &&
3407	weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
3408	++cnt;
3409
3410	if (cnt > len)
3411	bitset_set (sbcset, ch);
3412	}
3413	}
3414	/* Check whether the array has enough space. */
3415	if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
3416	{
3417	/* Not enough, realloc it. */
3418	/* +1 in case of mbcset->nequiv_classes is 0. */
3419	int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
3420	/* Use realloc since the array is NULL if alloc == 0. /
3421	int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3422	int32_t,
3423	new_equiv_class_alloc);
3424	if (BE (new_equiv_classes == NULL, 0))
3425	return REG_ESPACE;
3426	mbcset->equiv_classes = new_equiv_classes;
3427	*equiv_class_alloc = new_equiv_class_alloc;
3428	}
3429	mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3430	}
3431	else
3432	#endif /* _LIBC */
3433	{
3434	if (BE (strlen ((const char *) name) != 1, 0))
3435	return REG_ECOLLATE;
3436	bitset_set (sbcset, *name);
3437	}
3438	return REG_NOERROR;
3439	}
3440
3441	/* Helper function for parse_bracket_exp.
3442	Build the character class which is represented by NAME.
3443	The result are written to MBCSET and SBCSET.
3444	CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3445	is a pointer argument sinse we may update it. */
3446
3447	static reg_errcode_t
3448	#ifdef RE_ENABLE_I18N
3449	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3450	re_charset_t mbcset, int char_class_alloc,
3451	const unsigned char *class_name, reg_syntax_t syntax)
3452	#else /* not RE_ENABLE_I18N */
3453	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3454	const unsigned char *class_name, reg_syntax_t syntax)
3455	#endif /* not RE_ENABLE_I18N */
3456	{
3457	int i;
3458	const char name = (const char ) class_name;
3459
3460	/* In case of REG_ICASE "upper" and "lower" match the both of
3461	upper and lower cases. */
3462	if ((syntax & RE_ICASE)
3463	&& (strcmp (name, "upper") == 0 \|\| strcmp (name, "lower") == 0))
3464	name = "alpha";
3465
3466	#ifdef RE_ENABLE_I18N
3467	/* Check the space of the arrays. */
3468	if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
3469	{
3470	/* Not enough, realloc it. */
3471	/* +1 in case of mbcset->nchar_classes is 0. */
3472	int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
3473	/* Use realloc since array is NULL if alloc == 0. /
3474	wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3475	new_char_class_alloc);
3476	if (BE (new_char_classes == NULL, 0))
3477	return REG_ESPACE;
3478	mbcset->char_classes = new_char_classes;
3479	*char_class_alloc = new_char_class_alloc;
3480	}
3481	mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
3482	#endif /* RE_ENABLE_I18N */
3483
3484	#define BUILD_CHARCLASS_LOOP(ctype_func) \
3485	do { \
3486	if (BE (trans != NULL, 0)) \
3487	{ \
3488	for (i = 0; i < SBC_MAX; ++i) \
3489	if (ctype_func (i)) \
3490	bitset_set (sbcset, trans[i]); \
3491	} \
3492	else \
3493	{ \
3494	for (i = 0; i < SBC_MAX; ++i) \
3495	if (ctype_func (i)) \
3496	bitset_set (sbcset, i); \
3497	} \
3498	} while (0)
3499
3500	if (strcmp (name, "alnum") == 0)
3501	BUILD_CHARCLASS_LOOP (isalnum);
3502	else if (strcmp (name, "cntrl") == 0)
3503	BUILD_CHARCLASS_LOOP (iscntrl);
3504	else if (strcmp (name, "lower") == 0)
3505	BUILD_CHARCLASS_LOOP (islower);
3506	else if (strcmp (name, "space") == 0)
3507	BUILD_CHARCLASS_LOOP (isspace);
3508	else if (strcmp (name, "alpha") == 0)
3509	BUILD_CHARCLASS_LOOP (isalpha);
3510	else if (strcmp (name, "digit") == 0)
3511	BUILD_CHARCLASS_LOOP (isdigit);
3512	else if (strcmp (name, "print") == 0)
3513	BUILD_CHARCLASS_LOOP (isprint);
3514	else if (strcmp (name, "upper") == 0)
3515	BUILD_CHARCLASS_LOOP (isupper);
3516	else if (strcmp (name, "blank") == 0)
3517	BUILD_CHARCLASS_LOOP (isblank);
3518	else if (strcmp (name, "graph") == 0)
3519	BUILD_CHARCLASS_LOOP (isgraph);
3520	else if (strcmp (name, "punct") == 0)
3521	BUILD_CHARCLASS_LOOP (ispunct);
3522	else if (strcmp (name, "xdigit") == 0)
3523	BUILD_CHARCLASS_LOOP (isxdigit);
3524	else
3525	return REG_ECTYPE;
3526
3527	return REG_NOERROR;
3528	}
3529
3530	static bin_tree_t *
3531	build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3532	const unsigned char *class_name,
3533	const unsigned char *extra, int non_match,
3534	reg_errcode_t *err)
3535	{
3536	re_bitset_ptr_t sbcset;
3537	#ifdef RE_ENABLE_I18N
3538	re_charset_t *mbcset;
3539	int alloc = 0;
3540	#endif /* not RE_ENABLE_I18N */
3541	reg_errcode_t ret;
3542	re_token_t br_token;
3543	bin_tree_t *tree;
3544
3545	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3546	#ifdef RE_ENABLE_I18N
3547	mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3548	#endif /* RE_ENABLE_I18N */
3549
3550	#ifdef RE_ENABLE_I18N
3551	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
3552	#else /* not RE_ENABLE_I18N */
3553	if (BE (sbcset == NULL, 0))
3554	#endif /* not RE_ENABLE_I18N */
3555	{
3556	*err = REG_ESPACE;
3557	return NULL;
3558	}
3559
3560	if (non_match)
3561	{
3562	#ifdef RE_ENABLE_I18N
3563	/*
3564	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3565	bitset_set(cset->sbcset, '\0');
3566	*/
3567	mbcset->non_match = 1;
3568	#endif /* not RE_ENABLE_I18N */
3569	}
3570
3571	/* We don't care the syntax in this case. */
3572	ret = build_charclass (trans, sbcset,
3573	#ifdef RE_ENABLE_I18N
3574	mbcset, &alloc,
3575	#endif /* RE_ENABLE_I18N */
3576	class_name, 0);
3577
3578	if (BE (ret != REG_NOERROR, 0))
3579	{
3580	re_free (sbcset);
3581	#ifdef RE_ENABLE_I18N
3582	free_charset (mbcset);
3583	#endif /* RE_ENABLE_I18N */
3584	*err = ret;
3585	return NULL;
3586	}
3587	/* \w match '_' also. */
3588	for (; *extra; extra++)
3589	bitset_set (sbcset, *extra);
3590
3591	/* If it is non-matching list. */
3592	if (non_match)
3593	bitset_not (sbcset);
3594
3595	#ifdef RE_ENABLE_I18N
3596	/* Ensure only single byte characters are set. */
3597	if (dfa->mb_cur_max > 1)
3598	bitset_mask (sbcset, dfa->sb_char);
3599	#endif
3600
3601	/* Build a tree for simple bracket. */
3602	br_token.type = SIMPLE_BRACKET;
3603	br_token.opr.sbcset = sbcset;
3604	tree = create_token_tree (dfa, NULL, NULL, &br_token);
3605	if (BE (tree == NULL, 0))
3606	goto build_word_op_espace;
3607
3608	#ifdef RE_ENABLE_I18N
3609	if (dfa->mb_cur_max > 1)
3610	{
3611	bin_tree_t *mbc_tree;
3612	/* Build a tree for complex bracket. */
3613	br_token.type = COMPLEX_BRACKET;
3614	br_token.opr.mbcset = mbcset;
3615	dfa->has_mb_node = 1;
3616	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3617	if (BE (mbc_tree == NULL, 0))
3618	goto build_word_op_espace;
3619	/* Then join them by ALT node. */
3620	tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3621	if (BE (mbc_tree != NULL, 1))
3622	return tree;
3623	}
3624	else
3625	{
3626	free_charset (mbcset);
3627	return tree;
3628	}
3629	#else /* not RE_ENABLE_I18N */
3630	return tree;
3631	#endif /* not RE_ENABLE_I18N */
3632
3633	build_word_op_espace:
3634	re_free (sbcset);
3635	#ifdef RE_ENABLE_I18N
3636	free_charset (mbcset);
3637	#endif /* RE_ENABLE_I18N */
3638	*err = REG_ESPACE;
3639	return NULL;
3640	}
3641
3642	/* This is intended for the expressions like "a{1,3}".
3643	Fetch a number from `input', and return the number.
3644	Return -1, if the number field is empty like "{,1}".
3645	Return -2, If an error is occured. */
3646
3647	static int
3648	fetch_number (re_string_t input, re_token_t token, reg_syntax_t syntax)
3649	{
3650	int num = -1;
3651	unsigned char c;
3652	while (1)
3653	{
3654	fetch_token (token, input, syntax);
3655	c = token->opr.c;
3656	if (BE (token->type == END_OF_RE, 0))
3657	return -2;
3658	if (token->type == OP_CLOSE_DUP_NUM \|\| c == ',')
3659	break;
3660	num = ((token->type != CHARACTER \|\| c < '0' \|\| '9' < c \|\| num == -2)
3661	? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
3662	num = (num > RE_DUP_MAX) ? -2 : num;
3663	}
3664	return num;
3665	}
3666
3667
3668	#ifdef RE_ENABLE_I18N
3669	static void
3670	free_charset (re_charset_t *cset)
3671	{
3672	re_free (cset->mbchars);
3673	# ifdef _LIBC
3674	re_free (cset->coll_syms);
3675	re_free (cset->equiv_classes);
3676	re_free (cset->range_starts);
3677	re_free (cset->range_ends);
3678	# endif
3679	re_free (cset->char_classes);
3680	re_free (cset);
3681	}
3682	#endif /* RE_ENABLE_I18N */
3683
3684
3685	/* Functions for binary tree operation. */
3686
3687	/* Create a tree node. */
3688
3689	static bin_tree_t *
3690	create_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3691	re_token_type_t type)
3692	{
3693	re_token_t t;
3694	t.type = type;
3695	return create_token_tree (dfa, left, right, &t);
3696	}
3697
3698	static bin_tree_t *
3699	create_token_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3700	const re_token_t *token)
3701	{
3702	bin_tree_t *tree;
3703	if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
3704	{
3705	bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
3706
3707	if (storage == NULL)
3708	return NULL;
3709	storage->next = dfa->str_tree_storage;
3710	dfa->str_tree_storage = storage;
3711	dfa->str_tree_storage_idx = 0;
3712	}
3713	tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3714
3715	tree->parent = NULL;
3716	tree->left = left;
3717	tree->right = right;
3718	tree->token = *token;
3719	tree->token.duplicated = 0;
3720	tree->token.opt_subexp = 0;
3721	tree->first = NULL;
3722	tree->next = NULL;
3723	tree->node_idx = -1;
3724
3725	if (left != NULL)
3726	left->parent = tree;
3727	if (right != NULL)
3728	right->parent = tree;
3729	return tree;
3730	}
3731
3732	/* Mark the tree SRC as an optional subexpression.
3733	To be called from preorder or postorder. */
3734
3735	static reg_errcode_t
3736	mark_opt_subexp (void extra, bin_tree_t node)
3737	{
3738	int idx = (int) (long) extra;
3739	if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3740	node->token.opt_subexp = 1;
3741
3742	return REG_NOERROR;
3743	}
3744
3745	/* Free the allocated memory inside NODE. */
3746
3747	static void
3748	free_token (re_token_t *node)
3749	{
3750	#ifdef RE_ENABLE_I18N
3751	if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
3752	free_charset (node->opr.mbcset);
3753	else
3754	#endif /* RE_ENABLE_I18N */
3755	if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
3756	re_free (node->opr.sbcset);
3757	}
3758
3759	/* Worker function for tree walking. Free the allocated memory inside NODE
3760	and its children. */
3761
3762	static reg_errcode_t
3763	free_tree (void extra, bin_tree_t node)
3764	{
3765	free_token (&node->token);
3766	return REG_NOERROR;
3767	}
3768
3769
3770	/* Duplicate the node SRC, and return new node. This is a preorder
3771	visit similar to the one implemented by the generic visitor, but
3772	we need more infrastructure to maintain two parallel trees --- so,
3773	it's easier to duplicate. */
3774
3775	static bin_tree_t *
3776	duplicate_tree (const bin_tree_t root, re_dfa_t dfa)
3777	{
3778	const bin_tree_t *node;
3779	bin_tree_t *dup_root;
3780	bin_tree_t *p_new = &dup_root, dup_node = root->parent;
3781
3782	for (node = root; ; )
3783	{
3784	/* Create a new tree and link it back to the current parent. */
3785	*p_new = create_token_tree (dfa, NULL, NULL, &node->token);
3786	if (*p_new == NULL)
3787	return NULL;
3788	(*p_new)->parent = dup_node;
3789	(*p_new)->token.duplicated = 1;
3790	dup_node = *p_new;
3791
3792	/* Go to the left node, or up and to the right. */
3793	if (node->left)
3794	{
3795	node = node->left;
3796	p_new = &dup_node->left;
3797	}
3798	else
3799	{
3800	const bin_tree_t *prev = NULL;
3801	while (node->right == prev \|\| node->right == NULL)
3802	{
3803	prev = node;
3804	node = node->parent;
3805	dup_node = dup_node->parent;
3806	if (!node)
3807	return dup_root;
3808	}
3809	node = node->right;
3810	p_new = &dup_node->right;
3811	}
3812	}
3813	}

Note: See TracBrowser for help on using the repository browser.

source: kBuild/vendor/sed/current/lib/regcomp.c

Download in other formats: