regexec.c

Last change on this file was 599, checked in by bird, 18 years ago
GNU sed 4.1.5.
File size: 125.2 KB

Line
1	/* Extended regular expression matching and search library.
2	Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, write to the Free
18	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19	02111-1307 USA. */
20
21	static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22	int n) internal_function;
23	static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24	static void match_ctx_free (re_match_context_t *cache) internal_function;
25	static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
26	int str_idx, int from, int to)
27	internal_function;
28	static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
29	internal_function;
30	static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31	int str_idx) internal_function;
32	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33	int node, int str_idx)
34	internal_function;
35	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
36	re_dfastate_t **limited_sts, int last_node,
37	int last_str_idx)
38	internal_function;
39	static reg_errcode_t re_search_internal (const regex_t *preg,
40	const char *string, int length,
41	int start, int range, int stop,
42	size_t nmatch, regmatch_t pmatch[],
43	int eflags) internal_function;
44	static int re_search_2_stub (struct re_pattern_buffer *bufp,
45	const char *string1, int length1,
46	const char *string2, int length2,
47	int start, int range, struct re_registers *regs,
48	int stop, int ret_len) internal_function;
49	static int re_search_stub (struct re_pattern_buffer *bufp,
50	const char *string, int length, int start,
51	int range, int stop, struct re_registers *regs,
52	int ret_len) internal_function;
53	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
54	int nregs, int regs_allocated) internal_function;
55	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
56	internal_function;
57	static int check_matching (re_match_context_t *mctx, int fl_longest_match,
58	int *p_match_first) internal_function;
59	static int check_halt_state_context (const re_match_context_t *mctx,
60	const re_dfastate_t *state, int idx)
61	internal_function;
62	static void update_regs (const re_dfa_t dfa, regmatch_t pmatch,
63	regmatch_t *prev_idx_match, int cur_node,
64	int cur_idx, int nmatch) internal_function;
65	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
66	int str_idx, int dest_node, int nregs,
67	regmatch_t *regs,
68	re_node_set *eps_via_nodes)
69	internal_function;
70	static reg_errcode_t set_regs (const regex_t *preg,
71	const re_match_context_t *mctx,
72	size_t nmatch, regmatch_t *pmatch,
73	int fl_backtrack) internal_function;
74	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
75	internal_function;
76
77	#ifdef RE_ENABLE_I18N
78	static int sift_states_iter_mb (const re_match_context_t *mctx,
79	re_sift_context_t *sctx,
80	int node_idx, int str_idx, int max_str_idx)
81	internal_function;
82	#endif /* RE_ENABLE_I18N */
83	static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
84	re_sift_context_t *sctx)
85	internal_function;
86	static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
87	re_sift_context_t *sctx, int str_idx,
88	re_node_set *cur_dest)
89	internal_function;
90	static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
91	re_sift_context_t *sctx,
92	int str_idx,
93	re_node_set *dest_nodes)
94	internal_function;
95	static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
96	re_node_set *dest_nodes,
97	const re_node_set *candidates)
98	internal_function;
99	static int check_dst_limits (const re_match_context_t *mctx,
100	re_node_set *limits,
101	int dst_node, int dst_idx, int src_node,
102	int src_idx) internal_function;
103	static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
104	int boundaries, int subexp_idx,
105	int from_node, int bkref_idx)
106	internal_function;
107	static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
108	int limit, int subexp_idx,
109	int node, int str_idx,
110	int bkref_idx) internal_function;
111	static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
112	re_node_set *dest_nodes,
113	const re_node_set *candidates,
114	re_node_set *limits,
115	struct re_backref_cache_entry *bkref_ents,
116	int str_idx) internal_function;
117	static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
118	re_sift_context_t *sctx,
119	int str_idx, const re_node_set *candidates)
120	internal_function;
121	static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
122	re_dfastate_t **dst,
123	re_dfastate_t **src, int num)
124	internal_function;
125	static re_dfastate_t find_recover_state (reg_errcode_t err,
126	re_match_context_t *mctx) internal_function;
127	static re_dfastate_t transit_state (reg_errcode_t err,
128	re_match_context_t *mctx,
129	re_dfastate_t *state) internal_function;
130	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
131	re_match_context_t *mctx,
132	re_dfastate_t *next_state)
133	internal_function;
134	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
135	re_node_set *cur_nodes,
136	int str_idx) internal_function;
137	#if 0
138	static re_dfastate_t transit_state_sb (reg_errcode_t err,
139	re_match_context_t *mctx,
140	re_dfastate_t *pstate)
141	internal_function;
142	#endif
143	#ifdef RE_ENABLE_I18N
144	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
145	re_dfastate_t *pstate)
146	internal_function;
147	#endif /* RE_ENABLE_I18N */
148	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
149	const re_node_set *nodes)
150	internal_function;
151	static reg_errcode_t get_subexp (re_match_context_t *mctx,
152	int bkref_node, int bkref_str_idx)
153	internal_function;
154	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
155	const re_sub_match_top_t *sub_top,
156	re_sub_match_last_t *sub_last,
157	int bkref_node, int bkref_str)
158	internal_function;
159	static int find_subexp_node (const re_dfa_t dfa, const re_node_set nodes,
160	int subexp_idx, int type) internal_function;
161	static reg_errcode_t check_arrival (re_match_context_t *mctx,
162	state_array_t *path, int top_node,
163	int top_str, int last_node, int last_str,
164	int type) internal_function;
165	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
166	int str_idx,
167	re_node_set *cur_nodes,
168	re_node_set *next_nodes)
169	internal_function;
170	static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
171	re_node_set *cur_nodes,
172	int ex_subexp, int type)
173	internal_function;
174	static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
175	re_node_set *dst_nodes,
176	int target, int ex_subexp,
177	int type) internal_function;
178	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
179	re_node_set *cur_nodes, int cur_str,
180	int subexp_num, int type)
181	internal_function;
182	static int build_trtable (const re_dfa_t *dfa,
183	re_dfastate_t *state) internal_function;
184	#ifdef RE_ENABLE_I18N
185	static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
186	const re_string_t *input, int idx)
187	internal_function;
188	# ifdef _LIBC
189	static unsigned int find_collation_sequence_value (const unsigned char *mbs,
190	size_t name_len)
191	internal_function;
192	# endif /* _LIBC */
193	#endif /* RE_ENABLE_I18N */
194	static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
195	const re_dfastate_t *state,
196	re_node_set *states_node,
197	bitset_t *states_ch) internal_function;
198	static int check_node_accept (const re_match_context_t *mctx,
199	const re_token_t *node, int idx)
200	internal_function;
201	static reg_errcode_t extend_buffers (re_match_context_t *mctx)
202	internal_function;
203
204
205	/* Entry point for POSIX code. */
206
207	/* regexec searches for a given pattern, specified by PREG, in the
208	string STRING.
209
210	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
211	`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
212	least NMATCH elements, and we set them to the offsets of the
213	corresponding matched substrings.
214
215	EFLAGS specifies `execution flags' which affect matching: if
216	REG_NOTBOL is set, then ^ does not match at the beginning of the
217	string; if REG_NOTEOL is set, then $ does not match at the end.
218
219	We return 0 if we find a match and REG_NOMATCH if not. */
220
221	int
222	regexec (preg, string, nmatch, pmatch, eflags)
223	const regex_t *__restrict preg;
224	const char *__restrict string;
225	size_t nmatch;
226	regmatch_t pmatch[];
227	int eflags;
228	{
229	reg_errcode_t err;
230	int start, length;
231	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
232
233	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
234	return REG_BADPAT;
235
236	if (eflags & REG_STARTEND)
237	{
238	start = pmatch[0].rm_so;
239	length = pmatch[0].rm_eo;
240	}
241	else
242	{
243	start = 0;
244	length = strlen (string);
245	}
246
247	__libc_lock_lock (dfa->lock);
248	if (preg->no_sub)
249	err = re_search_internal (preg, string, length, start, length - start,
250	length, 0, NULL, eflags);
251	else
252	err = re_search_internal (preg, string, length, start, length - start,
253	length, nmatch, pmatch, eflags);
254	__libc_lock_unlock (dfa->lock);
255	return err != REG_NOERROR;
256	}
257
258	#ifdef _LIBC
259	# include <shlib-compat.h>
260	versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
261
262	# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
263	__typeof__ (__regexec) __compat_regexec;
264
265	int
266	attribute_compat_text_section
267	__compat_regexec (const regex_t *__restrict preg,
268	const char *__restrict string, size_t nmatch,
269	regmatch_t pmatch[], int eflags)
270	{
271	return regexec (preg, string, nmatch, pmatch,
272	eflags & (REG_NOTBOL \| REG_NOTEOL));
273	}
274	compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
275	# endif
276	#endif
277
278	/* Entry points for GNU code. */
279
280	/* re_match, re_search, re_match_2, re_search_2
281
282	The former two functions operate on STRING with length LENGTH,
283	while the later two operate on concatenation of STRING1 and STRING2
284	with lengths LENGTH1 and LENGTH2, respectively.
285
286	re_match() matches the compiled pattern in BUFP against the string,
287	starting at index START.
288
289	re_search() first tries matching at index START, then it tries to match
290	starting from index START + 1, and so on. The last start position tried
291	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
292	way as re_match().)
293
294	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
295	the first STOP characters of the concatenation of the strings should be
296	concerned.
297
298	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
299	and all groups is stroed in REGS. (For the "_2" variants, the offsets are
300	computed relative to the concatenation, not relative to the individual
301	strings.)
302
303	On success, re_match* functions return the length of the match, re_search*
304	return the position of the start of the match. Return value -1 means no
305	match was found and -2 indicates an internal error. */
306
307	int
308	re_match (bufp, string, length, start, regs)
309	struct re_pattern_buffer *bufp;
310	const char *string;
311	int length, start;
312	struct re_registers *regs;
313	{
314	return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
315	}
316	#ifdef _LIBC
317	weak_alias (__re_match, re_match)
318	#endif
319
320	int
321	re_search (bufp, string, length, start, range, regs)
322	struct re_pattern_buffer *bufp;
323	const char *string;
324	int length, start, range;
325	struct re_registers *regs;
326	{
327	return re_search_stub (bufp, string, length, start, range, length, regs, 0);
328	}
329	#ifdef _LIBC
330	weak_alias (__re_search, re_search)
331	#endif
332
333	int
334	re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
335	struct re_pattern_buffer *bufp;
336	const char string1, string2;
337	int length1, length2, start, stop;
338	struct re_registers *regs;
339	{
340	return re_search_2_stub (bufp, string1, length1, string2, length2,
341	start, 0, regs, stop, 1);
342	}
343	#ifdef _LIBC
344	weak_alias (__re_match_2, re_match_2)
345	#endif
346
347	int
348	re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
349	struct re_pattern_buffer *bufp;
350	const char string1, string2;
351	int length1, length2, start, range, stop;
352	struct re_registers *regs;
353	{
354	return re_search_2_stub (bufp, string1, length1, string2, length2,
355	start, range, regs, stop, 0);
356	}
357	#ifdef _LIBC
358	weak_alias (__re_search_2, re_search_2)
359	#endif
360
361	static int
362	re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
363	stop, ret_len)
364	struct re_pattern_buffer *bufp;
365	const char string1, string2;
366	int length1, length2, start, range, stop, ret_len;
367	struct re_registers *regs;
368	{
369	const char *str;
370	int rval;
371	int len = length1 + length2;
372	int free_str = 0;
373
374	if (BE (length1 < 0 \|\| length2 < 0 \|\| stop < 0, 0))
375	return -2;
376
377	/* Concatenate the strings. */
378	if (length2 > 0)
379	if (length1 > 0)
380	{
381	char *s = re_malloc (char, len);
382
383	if (BE (s == NULL, 0))
384	return -2;
385	#ifdef _LIBC
386	memcpy (__mempcpy (s, string1, length1), string2, length2);
387	#else
388	memcpy (s, string1, length1);
389	memcpy (s + length1, string2, length2);
390	#endif
391	str = s;
392	free_str = 1;
393	}
394	else
395	str = string2;
396	else
397	str = string1;
398
399	rval = re_search_stub (bufp, str, len, start, range, stop, regs,
400	ret_len);
401	if (free_str)
402	re_free ((char *) str);
403	return rval;
404	}
405
406	/* The parameters have the same meaning as those of re_search.
407	Additional parameters:
408	If RET_LEN is nonzero the length of the match is returned (re_match style);
409	otherwise the position of the match is returned. */
410
411	static int
412	re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
413	struct re_pattern_buffer *bufp;
414	const char *string;
415	int length, start, range, stop, ret_len;
416	struct re_registers *regs;
417	{
418	reg_errcode_t result;
419	regmatch_t *pmatch;
420	int nregs, rval;
421	int eflags = 0;
422	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
423
424	/* Check for out-of-range. */
425	if (BE (start < 0 \|\| start > length, 0))
426	return -1;
427	if (BE (start + range > length, 0))
428	range = length - start;
429	else if (BE (start + range < 0, 0))
430	range = -start;
431
432	__libc_lock_lock (dfa->lock);
433
434	eflags \|= (bufp->not_bol) ? REG_NOTBOL : 0;
435	eflags \|= (bufp->not_eol) ? REG_NOTEOL : 0;
436
437	/* Compile fastmap if we haven't yet. */
438	if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
439	re_compile_fastmap (bufp);
440
441	if (BE (bufp->no_sub, 0))
442	regs = NULL;
443
444	/* We need at least 1 register. */
445	if (regs == NULL)
446	nregs = 1;
447	else if (BE (bufp->regs_allocated == REGS_FIXED &&
448	regs->num_regs < bufp->re_nsub + 1, 0))
449	{
450	nregs = regs->num_regs;
451	if (BE (nregs < 1, 0))
452	{
453	/* Nothing can be copied to regs. */
454	regs = NULL;
455	nregs = 1;
456	}
457	}
458	else
459	nregs = bufp->re_nsub + 1;
460	pmatch = re_malloc (regmatch_t, nregs);
461	if (BE (pmatch == NULL, 0))
462	{
463	rval = -2;
464	goto out;
465	}
466
467	result = re_search_internal (bufp, string, length, start, range, stop,
468	nregs, pmatch, eflags);
469
470	rval = 0;
471
472	/* I hope we needn't fill ther regs with -1's when no match was found. */
473	if (result != REG_NOERROR)
474	rval = -1;
475	else if (regs != NULL)
476	{
477	/* If caller wants register contents data back, copy them. */
478	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
479	bufp->regs_allocated);
480	if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
481	rval = -2;
482	}
483
484	if (BE (rval == 0, 1))
485	{
486	if (ret_len)
487	{
488	assert (pmatch[0].rm_so == start);
489	rval = pmatch[0].rm_eo - start;
490	}
491	else
492	rval = pmatch[0].rm_so;
493	}
494	re_free (pmatch);
495	out:
496	__libc_lock_unlock (dfa->lock);
497	return rval;
498	}
499
500	static unsigned
501	re_copy_regs (regs, pmatch, nregs, regs_allocated)
502	struct re_registers *regs;
503	regmatch_t *pmatch;
504	int nregs, regs_allocated;
505	{
506	int rval = REGS_REALLOCATE;
507	int i;
508	int need_regs = nregs + 1;
509	/* We need one extra element beyond `num_regs' for the `-1' marker GNU code
510	uses. */
511
512	/* Have the register data arrays been allocated? */
513	if (regs_allocated == REGS_UNALLOCATED)
514	{ /* No. So allocate them with malloc. */
515	regs->start = re_malloc (regoff_t, need_regs);
516	regs->end = re_malloc (regoff_t, need_regs);
517	if (BE (regs->start == NULL, 0) \|\| BE (regs->end == NULL, 0))
518	return REGS_UNALLOCATED;
519	regs->num_regs = need_regs;
520	}
521	else if (regs_allocated == REGS_REALLOCATE)
522	{ /* Yes. If we need more elements than were already
523	allocated, reallocate them. If we need fewer, just
524	leave it alone. */
525	if (BE (need_regs > regs->num_regs, 0))
526	{
527	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
528	regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
529	if (BE (new_start == NULL, 0) \|\| BE (new_end == NULL, 0))
530	return REGS_UNALLOCATED;
531	regs->start = new_start;
532	regs->end = new_end;
533	regs->num_regs = need_regs;
534	}
535	}
536	else
537	{
538	assert (regs_allocated == REGS_FIXED);
539	/* This function may not be called with REGS_FIXED and nregs too big. */
540	assert (regs->num_regs >= nregs);
541	rval = REGS_FIXED;
542	}
543
544	/* Copy the regs. */
545	for (i = 0; i < nregs; ++i)
546	{
547	regs->start[i] = pmatch[i].rm_so;
548	regs->end[i] = pmatch[i].rm_eo;
549	}
550	for ( ; i < regs->num_regs; ++i)
551	regs->start[i] = regs->end[i] = -1;
552
553	return rval;
554	}
555
556	/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
557	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
558	this memory for recording register information. STARTS and ENDS
559	must be allocated using the malloc library routine, and must each
560	be at least NUM_REGS * sizeof (regoff_t) bytes long.
561
562	If NUM_REGS == 0, then subsequent matches should allocate their own
563	register data.
564
565	Unless this function is called, the first search or match using
566	PATTERN_BUFFER will allocate its own register data, without
567	freeing the old data. */
568
569	void
570	re_set_registers (bufp, regs, num_regs, starts, ends)
571	struct re_pattern_buffer *bufp;
572	struct re_registers *regs;
573	unsigned num_regs;
574	regoff_t starts, ends;
575	{
576	if (num_regs)
577	{
578	bufp->regs_allocated = REGS_REALLOCATE;
579	regs->num_regs = num_regs;
580	regs->start = starts;
581	regs->end = ends;
582	}
583	else
584	{
585	bufp->regs_allocated = REGS_UNALLOCATED;
586	regs->num_regs = 0;
587	regs->start = regs->end = (regoff_t *) 0;
588	}
589	}
590	#ifdef _LIBC
591	weak_alias (__re_set_registers, re_set_registers)
592	#endif
593
594
595	/* Entry points compatible with 4.2 BSD regex library. We don't define
596	them unless specifically requested. */
597
598	#if defined _REGEX_RE_COMP \|\| defined _LIBC
599	int
600	# ifdef _LIBC
601	weak_function
602	# endif
603	re_exec (s)
604	const char *s;
605	{
606	return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
607	}
608	#endif /* _REGEX_RE_COMP */
609
610
611	/* Internal entry point. */
612
613	/* Searches for a compiled pattern PREG in the string STRING, whose
614	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
615	mingings with regexec. START, and RANGE have the same meanings
616	with re_search.
617	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
618	otherwise return the error code.
619	Note: We assume front end functions already check ranges.
620	(START + RANGE >= 0 && START + RANGE <= LENGTH) */
621
622	static reg_errcode_t
623	re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
624	eflags)
625	const regex_t *preg;
626	const char *string;
627	int length, start, range, stop, eflags;
628	size_t nmatch;
629	regmatch_t pmatch[];
630	{
631	reg_errcode_t err;
632	const re_dfa_t dfa = (const re_dfa_t ) preg->buffer;
633	int left_lim, right_lim, incr;
634	int fl_longest_match, match_first, match_kind, match_last = -1;
635	int extra_nmatch;
636	int sb, ch;
637	#if defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
638	re_match_context_t mctx = { .dfa = dfa };
639	#else
640	re_match_context_t mctx;
641	#endif
642	char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
643	&& range && !preg->can_be_null) ? preg->fastmap : NULL;
644	RE_TRANSLATE_TYPE t = preg->translate;
645
646	#if !(defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
647	memset (&mctx, '\0', sizeof (re_match_context_t));
648	mctx.dfa = dfa;
649	#endif
650
651	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
652	nmatch -= extra_nmatch;
653
654	/* Check if the DFA haven't been compiled. */
655	if (BE (preg->used == 0 \|\| dfa->init_state == NULL
656	\|\| dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
657	\|\| dfa->init_state_begbuf == NULL, 0))
658	return REG_NOMATCH;
659
660	#ifdef DEBUG
661	/* We assume front-end functions already check them. */
662	assert (start + range >= 0 && start + range <= length);
663	#endif
664
665	/* If initial states with non-begbuf contexts have no elements,
666	the regex must be anchored. If preg->newline_anchor is set,
667	we'll never use init_state_nl, so do not check it. */
668	if (dfa->init_state->nodes.nelem == 0
669	&& dfa->init_state_word->nodes.nelem == 0
670	&& (dfa->init_state_nl->nodes.nelem == 0
671	\|\| !preg->newline_anchor))
672	{
673	if (start != 0 && start + range != 0)
674	return REG_NOMATCH;
675	start = range = 0;
676	}
677
678	/* We must check the longest matching, if nmatch > 0. */
679	fl_longest_match = (nmatch != 0 \|\| dfa->nbackref);
680
681	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
682	preg->translate, preg->syntax & RE_ICASE, dfa);
683	if (BE (err != REG_NOERROR, 0))
684	goto free_return;
685	mctx.input.stop = stop;
686	mctx.input.raw_stop = stop;
687	mctx.input.newline_anchor = preg->newline_anchor;
688
689	err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
690	if (BE (err != REG_NOERROR, 0))
691	goto free_return;
692
693	/* We will log all the DFA states through which the dfa pass,
694	if nmatch > 1, or this dfa has "multibyte node", which is a
695	back-reference or a node which can accept multibyte character or
696	multi character collating element. */
697	if (nmatch > 1 \|\| dfa->has_mb_node)
698	{
699	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
700	if (BE (mctx.state_log == NULL, 0))
701	{
702	err = REG_ESPACE;
703	goto free_return;
704	}
705	}
706	else
707	mctx.state_log = NULL;
708
709	match_first = start;
710	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
711	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
712
713	/* Check incrementally whether of not the input string match. */
714	incr = (range < 0) ? -1 : 1;
715	left_lim = (range < 0) ? start + range : start;
716	right_lim = (range < 0) ? start : start + range;
717	sb = dfa->mb_cur_max == 1;
718	match_kind =
719	(fastmap
720	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? 4 : 0)
721	\| (range >= 0 ? 2 : 0)
722	\| (t != NULL ? 1 : 0))
723	: 8);
724
725	for (;; match_first += incr)
726	{
727	err = REG_NOMATCH;
728	if (match_first < left_lim \|\| right_lim < match_first)
729	goto free_return;
730
731	/* Advance as rapidly as possible through the string, until we
732	find a plausible place to start matching. This may be done
733	with varying efficiency, so there are various possibilities:
734	only the most common of them are specialized, in order to
735	save on code size. We use a switch statement for speed. */
736	switch (match_kind)
737	{
738	case 8:
739	/* No fastmap. */
740	break;
741
742	case 7:
743	/* Fastmap with single-byte translation, match forward. */
744	while (BE (match_first < right_lim, 1)
745	&& !fastmap[t[(unsigned char) string[match_first]]])
746	++match_first;
747	goto forward_match_found_start_or_reached_end;
748
749	case 6:
750	/* Fastmap without translation, match forward. */
751	while (BE (match_first < right_lim, 1)
752	&& !fastmap[(unsigned char) string[match_first]])
753	++match_first;
754
755	forward_match_found_start_or_reached_end:
756	if (BE (match_first == right_lim, 0))
757	{
758	ch = match_first >= length
759	? 0 : (unsigned char) string[match_first];
760	if (!fastmap[t ? t[ch] : ch])
761	goto free_return;
762	}
763	break;
764
765	case 4:
766	case 5:
767	/* Fastmap without multi-byte translation, match backwards. */
768	while (match_first >= left_lim)
769	{
770	ch = match_first >= length
771	? 0 : (unsigned char) string[match_first];
772	if (fastmap[t ? t[ch] : ch])
773	break;
774	--match_first;
775	}
776	if (match_first < left_lim)
777	goto free_return;
778	break;
779
780	default:
781	/* In this case, we can't determine easily the current byte,
782	since it might be a component byte of a multibyte
783	character. Then we use the constructed buffer instead. */
784	for (;;)
785	{
786	/* If MATCH_FIRST is out of the valid range, reconstruct the
787	buffers. */
788	unsigned int offset = match_first - mctx.input.raw_mbs_idx;
789	if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
790	{
791	err = re_string_reconstruct (&mctx.input, match_first,
792	eflags);
793	if (BE (err != REG_NOERROR, 0))
794	goto free_return;
795
796	offset = match_first - mctx.input.raw_mbs_idx;
797	}
798	/* If MATCH_FIRST is out of the buffer, leave it as '\0'.
799	Note that MATCH_FIRST must not be smaller than 0. */
800	ch = (match_first >= length
801	? 0 : re_string_byte_at (&mctx.input, offset));
802	if (fastmap[ch])
803	break;
804	match_first += incr;
805	if (match_first < left_lim \|\| match_first > right_lim)
806	{
807	err = REG_NOMATCH;
808	goto free_return;
809	}
810	}
811	break;
812	}
813
814	/* Reconstruct the buffers so that the matcher can assume that
815	the matching starts from the beginning of the buffer. */
816	err = re_string_reconstruct (&mctx.input, match_first, eflags);
817	if (BE (err != REG_NOERROR, 0))
818	goto free_return;
819
820	#ifdef RE_ENABLE_I18N
821	/* Don't consider this char as a possible match start if it part,
822	yet isn't the head, of a multibyte character. */
823	if (!sb && !re_string_first_byte (&mctx.input, 0))
824	continue;
825	#endif
826
827	/* It seems to be appropriate one, then use the matcher. */
828	/* We assume that the matching starts from 0. */
829	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
830	match_last = check_matching (&mctx, fl_longest_match,
831	range >= 0 ? &match_first : NULL);
832	if (match_last != -1)
833	{
834	if (BE (match_last == -2, 0))
835	{
836	err = REG_ESPACE;
837	goto free_return;
838	}
839	else
840	{
841	mctx.match_last = match_last;
842	if ((!preg->no_sub && nmatch > 1) \|\| dfa->nbackref)
843	{
844	re_dfastate_t *pstate = mctx.state_log[match_last];
845	mctx.last_node = check_halt_state_context (&mctx, pstate,
846	match_last);
847	}
848	if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
849	\|\| dfa->nbackref)
850	{
851	err = prune_impossible_nodes (&mctx);
852	if (err == REG_NOERROR)
853	break;
854	if (BE (err != REG_NOMATCH, 0))
855	goto free_return;
856	match_last = -1;
857	}
858	else
859	break; /* We found a match. */
860	}
861	}
862
863	match_ctx_clean (&mctx);
864	}
865
866	#ifdef DEBUG
867	assert (match_last != -1);
868	assert (err == REG_NOERROR);
869	#endif
870
871	/* Set pmatch[] if we need. */
872	if (nmatch > 0)
873	{
874	int reg_idx;
875
876	/* Initialize registers. */
877	for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
878	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
879
880	/* Set the points where matching start/end. */
881	pmatch[0].rm_so = 0;
882	pmatch[0].rm_eo = mctx.match_last;
883
884	if (!preg->no_sub && nmatch > 1)
885	{
886	err = set_regs (preg, &mctx, nmatch, pmatch,
887	dfa->has_plural_match && dfa->nbackref > 0);
888	if (BE (err != REG_NOERROR, 0))
889	goto free_return;
890	}
891
892	/* At last, add the offset to the each registers, since we slided
893	the buffers so that we could assume that the matching starts
894	from 0. */
895	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
896	if (pmatch[reg_idx].rm_so != -1)
897	{
898	#ifdef RE_ENABLE_I18N
899	if (BE (mctx.input.offsets_needed != 0, 0))
900	{
901	pmatch[reg_idx].rm_so =
902	(pmatch[reg_idx].rm_so == mctx.input.valid_len
903	? mctx.input.valid_raw_len
904	: mctx.input.offsets[pmatch[reg_idx].rm_so]);
905	pmatch[reg_idx].rm_eo =
906	(pmatch[reg_idx].rm_eo == mctx.input.valid_len
907	? mctx.input.valid_raw_len
908	: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
909	}
910	#else
911	assert (mctx.input.offsets_needed == 0);
912	#endif
913	pmatch[reg_idx].rm_so += match_first;
914	pmatch[reg_idx].rm_eo += match_first;
915	}
916	for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
917	{
918	pmatch[nmatch + reg_idx].rm_so = -1;
919	pmatch[nmatch + reg_idx].rm_eo = -1;
920	}
921
922	if (dfa->subexp_map)
923	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
924	if (dfa->subexp_map[reg_idx] != reg_idx)
925	{
926	pmatch[reg_idx + 1].rm_so
927	= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
928	pmatch[reg_idx + 1].rm_eo
929	= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
930	}
931	}
932
933	free_return:
934	re_free (mctx.state_log);
935	if (dfa->nbackref)
936	match_ctx_free (&mctx);
937	re_string_destruct (&mctx.input);
938	return err;
939	}
940
941	static reg_errcode_t
942	prune_impossible_nodes (mctx)
943	re_match_context_t *mctx;
944	{
945	const re_dfa_t *const dfa = mctx->dfa;
946	int halt_node, match_last;
947	reg_errcode_t ret;
948	re_dfastate_t **sifted_states;
949	re_dfastate_t **lim_states = NULL;
950	re_sift_context_t sctx;
951	#ifdef DEBUG
952	assert (mctx->state_log != NULL);
953	#endif
954	match_last = mctx->match_last;
955	halt_node = mctx->last_node;
956	sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
957	if (BE (sifted_states == NULL, 0))
958	{
959	ret = REG_ESPACE;
960	goto free_return;
961	}
962	if (dfa->nbackref)
963	{
964	lim_states = re_malloc (re_dfastate_t *, match_last + 1);
965	if (BE (lim_states == NULL, 0))
966	{
967	ret = REG_ESPACE;
968	goto free_return;
969	}
970	while (1)
971	{
972	memset (lim_states, '\0',
973	sizeof (re_dfastate_t ) (match_last + 1));
974	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
975	match_last);
976	ret = sift_states_backward (mctx, &sctx);
977	re_node_set_free (&sctx.limits);
978	if (BE (ret != REG_NOERROR, 0))
979	goto free_return;
980	if (sifted_states[0] != NULL \|\| lim_states[0] != NULL)
981	break;
982	do
983	{
984	--match_last;
985	if (match_last < 0)
986	{
987	ret = REG_NOMATCH;
988	goto free_return;
989	}
990	} while (mctx->state_log[match_last] == NULL
991	\|\| !mctx->state_log[match_last]->halt);
992	halt_node = check_halt_state_context (mctx,
993	mctx->state_log[match_last],
994	match_last);
995	}
996	ret = merge_state_array (dfa, sifted_states, lim_states,
997	match_last + 1);
998	re_free (lim_states);
999	lim_states = NULL;
1000	if (BE (ret != REG_NOERROR, 0))
1001	goto free_return;
1002	}
1003	else
1004	{
1005	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1006	ret = sift_states_backward (mctx, &sctx);
1007	re_node_set_free (&sctx.limits);
1008	if (BE (ret != REG_NOERROR, 0))
1009	goto free_return;
1010	}
1011	re_free (mctx->state_log);
1012	mctx->state_log = sifted_states;
1013	sifted_states = NULL;
1014	mctx->last_node = halt_node;
1015	mctx->match_last = match_last;
1016	ret = REG_NOERROR;
1017	free_return:
1018	re_free (sifted_states);
1019	re_free (lim_states);
1020	return ret;
1021	}
1022
1023	/* Acquire an initial state and return it.
1024	We must select appropriate initial state depending on the context,
1025	since initial states may have constraints like "\<", "^", etc.. */
1026
1027	static inline re_dfastate_t *
1028	__attribute ((always_inline)) internal_function
1029	acquire_init_state_context (reg_errcode_t err, const re_match_context_t mctx,
1030	int idx)
1031	{
1032	const re_dfa_t *const dfa = mctx->dfa;
1033	if (dfa->init_state->has_constraint)
1034	{
1035	unsigned int context;
1036	context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1037	if (IS_WORD_CONTEXT (context))
1038	return dfa->init_state_word;
1039	else if (IS_ORDINARY_CONTEXT (context))
1040	return dfa->init_state;
1041	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1042	return dfa->init_state_begbuf;
1043	else if (IS_NEWLINE_CONTEXT (context))
1044	return dfa->init_state_nl;
1045	else if (IS_BEGBUF_CONTEXT (context))
1046	{
1047	/* It is relatively rare case, then calculate on demand. */
1048	return re_acquire_state_context (err, dfa,
1049	dfa->init_state->entrance_nodes,
1050	context);
1051	}
1052	else
1053	/* Must not happen? */
1054	return dfa->init_state;
1055	}
1056	else
1057	return dfa->init_state;
1058	}
1059
1060	/* Check whether the regular expression match input string INPUT or not,
1061	and return the index where the matching end, return -1 if not match,
1062	or return -2 in case of an error.
1063	FL_LONGEST_MATCH means we want the POSIX longest matching.
1064	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1065	next place where we may want to try matching.
1066	Note that the matcher assume that the maching starts from the current
1067	index of the buffer. */
1068
1069	static int
1070	internal_function
1071	check_matching (re_match_context_t *mctx, int fl_longest_match,
1072	int *p_match_first)
1073	{
1074	const re_dfa_t *const dfa = mctx->dfa;
1075	reg_errcode_t err;
1076	int match = 0;
1077	int match_last = -1;
1078	int cur_str_idx = re_string_cur_idx (&mctx->input);
1079	re_dfastate_t *cur_state;
1080	int at_init_state = p_match_first != NULL;
1081	int next_start_idx = cur_str_idx;
1082
1083	err = REG_NOERROR;
1084	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1085	/* An initial state must not be NULL (invalid). */
1086	if (BE (cur_state == NULL, 0))
1087	{
1088	assert (err == REG_ESPACE);
1089	return -2;
1090	}
1091
1092	if (mctx->state_log != NULL)
1093	{
1094	mctx->state_log[cur_str_idx] = cur_state;
1095
1096	/* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1097	later. E.g. Processing back references. */
1098	if (BE (dfa->nbackref, 0))
1099	{
1100	at_init_state = 0;
1101	err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1102	if (BE (err != REG_NOERROR, 0))
1103	return err;
1104
1105	if (cur_state->has_backref)
1106	{
1107	err = transit_state_bkref (mctx, &cur_state->nodes);
1108	if (BE (err != REG_NOERROR, 0))
1109	return err;
1110	}
1111	}
1112	}
1113
1114	/* If the RE accepts NULL string. */
1115	if (BE (cur_state->halt, 0))
1116	{
1117	if (!cur_state->has_constraint
1118	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
1119	{
1120	if (!fl_longest_match)
1121	return cur_str_idx;
1122	else
1123	{
1124	match_last = cur_str_idx;
1125	match = 1;
1126	}
1127	}
1128	}
1129
1130	while (!re_string_eoi (&mctx->input))
1131	{
1132	re_dfastate_t *old_state = cur_state;
1133	int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1134
1135	if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1136	\|\| (BE (next_char_idx >= mctx->input.valid_len, 0)
1137	&& mctx->input.valid_len < mctx->input.len))
1138	{
1139	err = extend_buffers (mctx);
1140	if (BE (err != REG_NOERROR, 0))
1141	{
1142	assert (err == REG_ESPACE);
1143	return -2;
1144	}
1145	}
1146
1147	cur_state = transit_state (&err, mctx, cur_state);
1148	if (mctx->state_log != NULL)
1149	cur_state = merge_state_with_log (&err, mctx, cur_state);
1150
1151	if (cur_state == NULL)
1152	{
1153	/* Reached the invalid state or an error. Try to recover a valid
1154	state using the state log, if available and if we have not
1155	already found a valid (even if not the longest) match. */
1156	if (BE (err != REG_NOERROR, 0))
1157	return -2;
1158
1159	if (mctx->state_log == NULL
1160	\|\| (match && !fl_longest_match)
1161	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
1162	break;
1163	}
1164
1165	if (BE (at_init_state, 0))
1166	{
1167	if (old_state == cur_state)
1168	next_start_idx = next_char_idx;
1169	else
1170	at_init_state = 0;
1171	}
1172
1173	if (cur_state->halt)
1174	{
1175	/* Reached a halt state.
1176	Check the halt state can satisfy the current context. */
1177	if (!cur_state->has_constraint
1178	\|\| check_halt_state_context (mctx, cur_state,
1179	re_string_cur_idx (&mctx->input)))
1180	{
1181	/* We found an appropriate halt state. */
1182	match_last = re_string_cur_idx (&mctx->input);
1183	match = 1;
1184
1185	/* We found a match, do not modify match_first below. */
1186	p_match_first = NULL;
1187	if (!fl_longest_match)
1188	break;
1189	}
1190	}
1191	}
1192
1193	if (p_match_first)
1194	*p_match_first += next_start_idx;
1195
1196	return match_last;
1197	}
1198
1199	/* Check NODE match the current context. */
1200
1201	static int
1202	internal_function
1203	check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1204	{
1205	re_token_type_t type = dfa->nodes[node].type;
1206	unsigned int constraint = dfa->nodes[node].constraint;
1207	if (type != END_OF_RE)
1208	return 0;
1209	if (!constraint)
1210	return 1;
1211	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1212	return 0;
1213	return 1;
1214	}
1215
1216	/* Check the halt state STATE match the current context.
1217	Return 0 if not match, if the node, STATE has, is a halt node and
1218	match the context, return the node. */
1219
1220	static int
1221	internal_function
1222	check_halt_state_context (const re_match_context_t *mctx,
1223	const re_dfastate_t *state, int idx)
1224	{
1225	int i;
1226	unsigned int context;
1227	#ifdef DEBUG
1228	assert (state->halt);
1229	#endif
1230	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1231	for (i = 0; i < state->nodes.nelem; ++i)
1232	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1233	return state->nodes.elems[i];
1234	return 0;
1235	}
1236
1237	/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1238	corresponding to the DFA).
1239	Return the destination node, and update EPS_VIA_NODES, return -1 in case
1240	of errors. */
1241
1242	static int
1243	internal_function
1244	proceed_next_node (const re_match_context_t mctx, int nregs, regmatch_t regs,
1245	int pidx, int node, re_node_set eps_via_nodes,
1246	struct re_fail_stack_t *fs)
1247	{
1248	const re_dfa_t *const dfa = mctx->dfa;
1249	int i, err;
1250	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1251	{
1252	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1253	re_node_set *edests = &dfa->edests[node];
1254	int dest_node;
1255	err = re_node_set_insert (eps_via_nodes, node);
1256	if (BE (err < 0, 0))
1257	return -2;
1258	/* Pick up a valid destination, or return -1 if none is found. */
1259	for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1260	{
1261	int candidate = edests->elems[i];
1262	if (!re_node_set_contains (cur_nodes, candidate))
1263	continue;
1264	if (dest_node == -1)
1265	dest_node = candidate;
1266
1267	else
1268	{
1269	/* In order to avoid infinite loop like "(a)", return the second
1270	epsilon-transition if the first was already considered. */
1271	if (re_node_set_contains (eps_via_nodes, dest_node))
1272	return candidate;
1273
1274	/* Otherwise, push the second epsilon-transition on the fail stack. */
1275	else if (fs != NULL
1276	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
1277	eps_via_nodes))
1278	return -2;
1279
1280	/* We know we are going to exit. */
1281	break;
1282	}
1283	}
1284	return dest_node;
1285	}
1286	else
1287	{
1288	int naccepted = 0;
1289	re_token_type_t type = dfa->nodes[node].type;
1290
1291	#ifdef RE_ENABLE_I18N
1292	if (dfa->nodes[node].accept_mb)
1293	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1294	else
1295	#endif /* RE_ENABLE_I18N */
1296	if (type == OP_BACK_REF)
1297	{
1298	int subexp_idx = dfa->nodes[node].opr.idx + 1;
1299	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1300	if (fs != NULL)
1301	{
1302	if (regs[subexp_idx].rm_so == -1 \|\| regs[subexp_idx].rm_eo == -1)
1303	return -1;
1304	else if (naccepted)
1305	{
1306	char buf = (char ) re_string_get_buffer (&mctx->input);
1307	if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1308	naccepted) != 0)
1309	return -1;
1310	}
1311	}
1312
1313	if (naccepted == 0)
1314	{
1315	int dest_node;
1316	err = re_node_set_insert (eps_via_nodes, node);
1317	if (BE (err < 0, 0))
1318	return -2;
1319	dest_node = dfa->edests[node].elems[0];
1320	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1321	dest_node))
1322	return dest_node;
1323	}
1324	}
1325
1326	if (naccepted != 0
1327	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
1328	{
1329	int dest_node = dfa->nexts[node];
1330	pidx = (naccepted == 0) ? pidx + 1 : *pidx + naccepted;
1331	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
1332	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1333	dest_node)))
1334	return -1;
1335	re_node_set_empty (eps_via_nodes);
1336	return dest_node;
1337	}
1338	}
1339	return -1;
1340	}
1341
1342	static reg_errcode_t
1343	internal_function
1344	push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1345	int nregs, regmatch_t regs, re_node_set eps_via_nodes)
1346	{
1347	reg_errcode_t err;
1348	int num = fs->num++;
1349	if (fs->num == fs->alloc)
1350	{
1351	struct re_fail_stack_ent_t *new_array;
1352	new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1353	* fs->alloc * 2));
1354	if (new_array == NULL)
1355	return REG_ESPACE;
1356	fs->alloc *= 2;
1357	fs->stack = new_array;
1358	}
1359	fs->stack[num].idx = str_idx;
1360	fs->stack[num].node = dest_node;
1361	fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1362	if (fs->stack[num].regs == NULL)
1363	return REG_ESPACE;
1364	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1365	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1366	return err;
1367	}
1368
1369	static int
1370	internal_function
1371	pop_fail_stack (struct re_fail_stack_t fs, int pidx, int nregs,
1372	regmatch_t regs, re_node_set eps_via_nodes)
1373	{
1374	int num = --fs->num;
1375	assert (num >= 0);
1376	*pidx = fs->stack[num].idx;
1377	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1378	re_node_set_free (eps_via_nodes);
1379	re_free (fs->stack[num].regs);
1380	*eps_via_nodes = fs->stack[num].eps_via_nodes;
1381	return fs->stack[num].node;
1382	}
1383
1384	/* Set the positions where the subexpressions are starts/ends to registers
1385	PMATCH.
1386	Note: We assume that pmatch[0] is already set, and
1387	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1388
1389	static reg_errcode_t
1390	internal_function
1391	set_regs (const regex_t preg, const re_match_context_t mctx, size_t nmatch,
1392	regmatch_t *pmatch, int fl_backtrack)
1393	{
1394	const re_dfa_t dfa = (const re_dfa_t ) preg->buffer;
1395	int idx, cur_node;
1396	re_node_set eps_via_nodes;
1397	struct re_fail_stack_t *fs;
1398	struct re_fail_stack_t fs_body = { 0, 2, NULL };
1399	regmatch_t *prev_idx_match;
1400	int prev_idx_match_malloced = 0;
1401
1402	#ifdef DEBUG
1403	assert (nmatch > 1);
1404	assert (mctx->state_log != NULL);
1405	#endif
1406	if (fl_backtrack)
1407	{
1408	fs = &fs_body;
1409	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1410	if (fs->stack == NULL)
1411	return REG_ESPACE;
1412	}
1413	else
1414	fs = NULL;
1415
1416	cur_node = dfa->init_node;
1417	re_node_set_init_empty (&eps_via_nodes);
1418
1419	if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1420	prev_idx_match = (regmatch_t ) alloca (nmatch sizeof (regmatch_t));
1421	else
1422	{
1423	prev_idx_match = re_malloc (regmatch_t, nmatch);
1424	if (prev_idx_match == NULL)
1425	{
1426	free_fail_stack_return (fs);
1427	return REG_ESPACE;
1428	}
1429	prev_idx_match_malloced = 1;
1430	}
1431	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1432
1433	for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1434	{
1435	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1436
1437	if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1438	{
1439	int reg_idx;
1440	if (fs)
1441	{
1442	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1443	if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1444	break;
1445	if (reg_idx == nmatch)
1446	{
1447	re_node_set_free (&eps_via_nodes);
1448	if (prev_idx_match_malloced)
1449	re_free (prev_idx_match);
1450	return free_fail_stack_return (fs);
1451	}
1452	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1453	&eps_via_nodes);
1454	}
1455	else
1456	{
1457	re_node_set_free (&eps_via_nodes);
1458	if (prev_idx_match_malloced)
1459	re_free (prev_idx_match);
1460	return REG_NOERROR;
1461	}
1462	}
1463
1464	/* Proceed to next node. */
1465	cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1466	&eps_via_nodes, fs);
1467
1468	if (BE (cur_node < 0, 0))
1469	{
1470	if (BE (cur_node == -2, 0))
1471	{
1472	re_node_set_free (&eps_via_nodes);
1473	if (prev_idx_match_malloced)
1474	re_free (prev_idx_match);
1475	free_fail_stack_return (fs);
1476	return REG_ESPACE;
1477	}
1478	if (fs)
1479	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1480	&eps_via_nodes);
1481	else
1482	{
1483	re_node_set_free (&eps_via_nodes);
1484	if (prev_idx_match_malloced)
1485	re_free (prev_idx_match);
1486	return REG_NOMATCH;
1487	}
1488	}
1489	}
1490	re_node_set_free (&eps_via_nodes);
1491	if (prev_idx_match_malloced)
1492	re_free (prev_idx_match);
1493	return free_fail_stack_return (fs);
1494	}
1495
1496	static reg_errcode_t
1497	internal_function
1498	free_fail_stack_return (struct re_fail_stack_t *fs)
1499	{
1500	if (fs)
1501	{
1502	int fs_idx;
1503	for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1504	{
1505	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1506	re_free (fs->stack[fs_idx].regs);
1507	}
1508	re_free (fs->stack);
1509	}
1510	return REG_NOERROR;
1511	}
1512
1513	static void
1514	internal_function
1515	update_regs (const re_dfa_t dfa, regmatch_t pmatch,
1516	regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
1517	{
1518	int type = dfa->nodes[cur_node].type;
1519	if (type == OP_OPEN_SUBEXP)
1520	{
1521	int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1522
1523	/* We are at the first node of this sub expression. */
1524	if (reg_num < nmatch)
1525	{
1526	pmatch[reg_num].rm_so = cur_idx;
1527	pmatch[reg_num].rm_eo = -1;
1528	}
1529	}
1530	else if (type == OP_CLOSE_SUBEXP)
1531	{
1532	int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1533	if (reg_num < nmatch)
1534	{
1535	/* We are at the last node of this sub expression. */
1536	if (pmatch[reg_num].rm_so < cur_idx)
1537	{
1538	pmatch[reg_num].rm_eo = cur_idx;
1539	/* This is a non-empty match or we are not inside an optional
1540	subexpression. Accept this right away. */
1541	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1542	}
1543	else
1544	{
1545	if (dfa->nodes[cur_node].opt_subexp
1546	&& prev_idx_match[reg_num].rm_so != -1)
1547	/* We transited through an empty match for an optional
1548	subexpression, like (a?)*, and this is not the subexp's
1549	first match. Copy back the old content of the registers
1550	so that matches of an inner subexpression are undone as
1551	well, like in ((a?)). /
1552	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1553	else
1554	/* We completed a subexpression, but it may be part of
1555	an optional one, so do not update PREV_IDX_MATCH. */
1556	pmatch[reg_num].rm_eo = cur_idx;
1557	}
1558	}
1559	}
1560	}
1561
1562	/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1563	and sift the nodes in each states according to the following rules.
1564	Updated state_log will be wrote to STATE_LOG.
1565
1566	Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1567	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1568	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1569	the LAST_NODE, we throw away the node `a'.
1570	2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1571	string `s' and transit to `b':
1572	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1573	away the node `a'.
1574	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1575	thrown away, we throw away the node `a'.
1576	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1577	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1578	node `a'.
1579	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1580	we throw away the node `a'. */
1581
1582	#define STATE_NODE_CONTAINS(state,node) \
1583	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1584
1585	static reg_errcode_t
1586	internal_function
1587	sift_states_backward (const re_match_context_t mctx, re_sift_context_t sctx)
1588	{
1589	reg_errcode_t err;
1590	int null_cnt = 0;
1591	int str_idx = sctx->last_str_idx;
1592	re_node_set cur_dest;
1593
1594	#ifdef DEBUG
1595	assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1596	#endif
1597
1598	/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1599	transit to the last_node and the last_node itself. */
1600	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1601	if (BE (err != REG_NOERROR, 0))
1602	return err;
1603	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1604	if (BE (err != REG_NOERROR, 0))
1605	goto free_return;
1606
1607	/* Then check each states in the state_log. */
1608	while (str_idx > 0)
1609	{
1610	/* Update counters. */
1611	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1612	if (null_cnt > mctx->max_mb_elem_len)
1613	{
1614	memset (sctx->sifted_states, '\0',
1615	sizeof (re_dfastate_t ) str_idx);
1616	re_node_set_free (&cur_dest);
1617	return REG_NOERROR;
1618	}
1619	re_node_set_empty (&cur_dest);
1620	--str_idx;
1621
1622	if (mctx->state_log[str_idx])
1623	{
1624	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1625	if (BE (err != REG_NOERROR, 0))
1626	goto free_return;
1627	}
1628
1629	/* Add all the nodes which satisfy the following conditions:
1630	- It can epsilon transit to a node in CUR_DEST.
1631	- It is in CUR_SRC.
1632	And update state_log. */
1633	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1634	if (BE (err != REG_NOERROR, 0))
1635	goto free_return;
1636	}
1637	err = REG_NOERROR;
1638	free_return:
1639	re_node_set_free (&cur_dest);
1640	return err;
1641	}
1642
1643	static reg_errcode_t
1644	internal_function
1645	build_sifted_states (const re_match_context_t mctx, re_sift_context_t sctx,
1646	int str_idx, re_node_set *cur_dest)
1647	{
1648	const re_dfa_t *const dfa = mctx->dfa;
1649	const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1650	int i;
1651
1652	/* Then build the next sifted state.
1653	We build the next sifted state on `cur_dest', and update
1654	`sifted_states[str_idx]' with `cur_dest'.
1655	Note:
1656	`cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1657	`cur_src' points the node_set of the old `state_log[str_idx]'
1658	(with the epsilon nodes pre-filtered out). */
1659	for (i = 0; i < cur_src->nelem; i++)
1660	{
1661	int prev_node = cur_src->elems[i];
1662	int naccepted = 0;
1663	int ret;
1664
1665	#ifdef DEBUG
1666	re_token_type_t type = dfa->nodes[prev_node].type;
1667	assert (!IS_EPSILON_NODE (type));
1668	#endif
1669	#ifdef RE_ENABLE_I18N
1670	/* If the node may accept `multi byte'. */
1671	if (dfa->nodes[prev_node].accept_mb)
1672	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1673	str_idx, sctx->last_str_idx);
1674	#endif /* RE_ENABLE_I18N */
1675
1676	/* We don't check backreferences here.
1677	See update_cur_sifted_state(). */
1678	if (!naccepted
1679	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1680	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1681	dfa->nexts[prev_node]))
1682	naccepted = 1;
1683
1684	if (naccepted == 0)
1685	continue;
1686
1687	if (sctx->limits.nelem)
1688	{
1689	int to_idx = str_idx + naccepted;
1690	if (check_dst_limits (mctx, &sctx->limits,
1691	dfa->nexts[prev_node], to_idx,
1692	prev_node, str_idx))
1693	continue;
1694	}
1695	ret = re_node_set_insert (cur_dest, prev_node);
1696	if (BE (ret == -1, 0))
1697	return REG_ESPACE;
1698	}
1699
1700	return REG_NOERROR;
1701	}
1702
1703	/* Helper functions. */
1704
1705	static reg_errcode_t
1706	internal_function
1707	clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1708	{
1709	int top = mctx->state_log_top;
1710
1711	if (next_state_log_idx >= mctx->input.bufs_len
1712	\|\| (next_state_log_idx >= mctx->input.valid_len
1713	&& mctx->input.valid_len < mctx->input.len))
1714	{
1715	reg_errcode_t err;
1716	err = extend_buffers (mctx);
1717	if (BE (err != REG_NOERROR, 0))
1718	return err;
1719	}
1720
1721	if (top < next_state_log_idx)
1722	{
1723	memset (mctx->state_log + top + 1, '\0',
1724	sizeof (re_dfastate_t ) (next_state_log_idx - top));
1725	mctx->state_log_top = next_state_log_idx;
1726	}
1727	return REG_NOERROR;
1728	}
1729
1730	static reg_errcode_t
1731	internal_function
1732	merge_state_array (const re_dfa_t dfa, re_dfastate_t *dst,
1733	re_dfastate_t **src, int num)
1734	{
1735	int st_idx;
1736	reg_errcode_t err;
1737	for (st_idx = 0; st_idx < num; ++st_idx)
1738	{
1739	if (dst[st_idx] == NULL)
1740	dst[st_idx] = src[st_idx];
1741	else if (src[st_idx] != NULL)
1742	{
1743	re_node_set merged_set;
1744	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1745	&src[st_idx]->nodes);
1746	if (BE (err != REG_NOERROR, 0))
1747	return err;
1748	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1749	re_node_set_free (&merged_set);
1750	if (BE (err != REG_NOERROR, 0))
1751	return err;
1752	}
1753	}
1754	return REG_NOERROR;
1755	}
1756
1757	static reg_errcode_t
1758	internal_function
1759	update_cur_sifted_state (const re_match_context_t *mctx,
1760	re_sift_context_t *sctx, int str_idx,
1761	re_node_set *dest_nodes)
1762	{
1763	const re_dfa_t *const dfa = mctx->dfa;
1764	reg_errcode_t err = REG_NOERROR;
1765	const re_node_set *candidates;
1766	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1767	: &mctx->state_log[str_idx]->nodes);
1768
1769	if (dest_nodes->nelem == 0)
1770	sctx->sifted_states[str_idx] = NULL;
1771	else
1772	{
1773	if (candidates)
1774	{
1775	/* At first, add the nodes which can epsilon transit to a node in
1776	DEST_NODE. */
1777	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1778	if (BE (err != REG_NOERROR, 0))
1779	return err;
1780
1781	/* Then, check the limitations in the current sift_context. */
1782	if (sctx->limits.nelem)
1783	{
1784	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1785	mctx->bkref_ents, str_idx);
1786	if (BE (err != REG_NOERROR, 0))
1787	return err;
1788	}
1789	}
1790
1791	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1792	if (BE (err != REG_NOERROR, 0))
1793	return err;
1794	}
1795
1796	if (candidates && mctx->state_log[str_idx]->has_backref)
1797	{
1798	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1799	if (BE (err != REG_NOERROR, 0))
1800	return err;
1801	}
1802	return REG_NOERROR;
1803	}
1804
1805	static reg_errcode_t
1806	internal_function
1807	add_epsilon_src_nodes (const re_dfa_t dfa, re_node_set dest_nodes,
1808	const re_node_set *candidates)
1809	{
1810	reg_errcode_t err = REG_NOERROR;
1811	int i;
1812
1813	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1814	if (BE (err != REG_NOERROR, 0))
1815	return err;
1816
1817	if (!state->inveclosure.alloc)
1818	{
1819	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1820	if (BE (err != REG_NOERROR, 0))
1821	return REG_ESPACE;
1822	for (i = 0; i < dest_nodes->nelem; i++)
1823	re_node_set_merge (&state->inveclosure,
1824	dfa->inveclosures + dest_nodes->elems[i]);
1825	}
1826	return re_node_set_add_intersect (dest_nodes, candidates,
1827	&state->inveclosure);
1828	}
1829
1830	static reg_errcode_t
1831	internal_function
1832	sub_epsilon_src_nodes (const re_dfa_t dfa, int node, re_node_set dest_nodes,
1833	const re_node_set *candidates)
1834	{
1835	int ecl_idx;
1836	reg_errcode_t err;
1837	re_node_set *inv_eclosure = dfa->inveclosures + node;
1838	re_node_set except_nodes;
1839	re_node_set_init_empty (&except_nodes);
1840	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1841	{
1842	int cur_node = inv_eclosure->elems[ecl_idx];
1843	if (cur_node == node)
1844	continue;
1845	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1846	{
1847	int edst1 = dfa->edests[cur_node].elems[0];
1848	int edst2 = ((dfa->edests[cur_node].nelem > 1)
1849	? dfa->edests[cur_node].elems[1] : -1);
1850	if ((!re_node_set_contains (inv_eclosure, edst1)
1851	&& re_node_set_contains (dest_nodes, edst1))
1852	\|\| (edst2 > 0
1853	&& !re_node_set_contains (inv_eclosure, edst2)
1854	&& re_node_set_contains (dest_nodes, edst2)))
1855	{
1856	err = re_node_set_add_intersect (&except_nodes, candidates,
1857	dfa->inveclosures + cur_node);
1858	if (BE (err != REG_NOERROR, 0))
1859	{
1860	re_node_set_free (&except_nodes);
1861	return err;
1862	}
1863	}
1864	}
1865	}
1866	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1867	{
1868	int cur_node = inv_eclosure->elems[ecl_idx];
1869	if (!re_node_set_contains (&except_nodes, cur_node))
1870	{
1871	int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1872	re_node_set_remove_at (dest_nodes, idx);
1873	}
1874	}
1875	re_node_set_free (&except_nodes);
1876	return REG_NOERROR;
1877	}
1878
1879	static int
1880	internal_function
1881	check_dst_limits (const re_match_context_t mctx, re_node_set limits,
1882	int dst_node, int dst_idx, int src_node, int src_idx)
1883	{
1884	const re_dfa_t *const dfa = mctx->dfa;
1885	int lim_idx, src_pos, dst_pos;
1886
1887	int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1888	int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1889	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1890	{
1891	int subexp_idx;
1892	struct re_backref_cache_entry *ent;
1893	ent = mctx->bkref_ents + limits->elems[lim_idx];
1894	subexp_idx = dfa->nodes[ent->node].opr.idx;
1895
1896	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1897	subexp_idx, dst_node, dst_idx,
1898	dst_bkref_idx);
1899	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1900	subexp_idx, src_node, src_idx,
1901	src_bkref_idx);
1902
1903	/* In case of:
1904	<src> <dst> ( <subexp> )
1905	( <subexp> ) <src> <dst>
1906	( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1907	if (src_pos == dst_pos)
1908	continue; /* This is unrelated limitation. */
1909	else
1910	return 1;
1911	}
1912	return 0;
1913	}
1914
1915	static int
1916	internal_function
1917	check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1918	int subexp_idx, int from_node, int bkref_idx)
1919	{
1920	const re_dfa_t *const dfa = mctx->dfa;
1921	const re_node_set *eclosures = dfa->eclosures + from_node;
1922	int node_idx;
1923
1924	/* Else, we are on the boundary: examine the nodes on the epsilon
1925	closure. */
1926	for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1927	{
1928	int node = eclosures->elems[node_idx];
1929	switch (dfa->nodes[node].type)
1930	{
1931	case OP_BACK_REF:
1932	if (bkref_idx != -1)
1933	{
1934	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1935	do
1936	{
1937	int dst, cpos;
1938
1939	if (ent->node != node)
1940	continue;
1941
1942	if (subexp_idx < BITSET_WORD_BITS
1943	&& !(ent->eps_reachable_subexps_map
1944	& ((bitset_word_t) 1 << subexp_idx)))
1945	continue;
1946
1947	/* Recurse trying to reach the OP_OPEN_SUBEXP and
1948	OP_CLOSE_SUBEXP cases below. But, if the
1949	destination node is the same node as the source
1950	node, don't recurse because it would cause an
1951	infinite loop: a regex that exhibits this behavior
1952	is ()\1\1 */
1953	dst = dfa->edests[node].elems[0];
1954	if (dst == from_node)
1955	{
1956	if (boundaries & 1)
1957	return -1;
1958	else /* if (boundaries & 2) */
1959	return 0;
1960	}
1961
1962	cpos =
1963	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1964	dst, bkref_idx);
1965	if (cpos == -1 /* && (boundaries & 1) */)
1966	return -1;
1967	if (cpos == 0 && (boundaries & 2))
1968	return 0;
1969
1970	if (subexp_idx < BITSET_WORD_BITS)
1971	ent->eps_reachable_subexps_map
1972	&= ~((bitset_word_t) 1 << subexp_idx);
1973	}
1974	while (ent++->more);
1975	}
1976	break;
1977
1978	case OP_OPEN_SUBEXP:
1979	if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1980	return -1;
1981	break;
1982
1983	case OP_CLOSE_SUBEXP:
1984	if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1985	return 0;
1986	break;
1987
1988	default:
1989	break;
1990	}
1991	}
1992
1993	return (boundaries & 2) ? 1 : 0;
1994	}
1995
1996	static int
1997	internal_function
1998	check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
1999	int subexp_idx, int from_node, int str_idx,
2000	int bkref_idx)
2001	{
2002	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2003	int boundaries;
2004
2005	/* If we are outside the range of the subexpression, return -1 or 1. */
2006	if (str_idx < lim->subexp_from)
2007	return -1;
2008
2009	if (lim->subexp_to < str_idx)
2010	return 1;
2011
2012	/* If we are within the subexpression, return 0. */
2013	boundaries = (str_idx == lim->subexp_from);
2014	boundaries \|= (str_idx == lim->subexp_to) << 1;
2015	if (boundaries == 0)
2016	return 0;
2017
2018	/* Else, examine epsilon closure. */
2019	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2020	from_node, bkref_idx);
2021	}
2022
2023	/* Check the limitations of sub expressions LIMITS, and remove the nodes
2024	which are against limitations from DEST_NODES. */
2025
2026	static reg_errcode_t
2027	internal_function
2028	check_subexp_limits (const re_dfa_t dfa, re_node_set dest_nodes,
2029	const re_node_set candidates, re_node_set limits,
2030	struct re_backref_cache_entry *bkref_ents, int str_idx)
2031	{
2032	reg_errcode_t err;
2033	int node_idx, lim_idx;
2034
2035	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2036	{
2037	int subexp_idx;
2038	struct re_backref_cache_entry *ent;
2039	ent = bkref_ents + limits->elems[lim_idx];
2040
2041	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
2042	continue; /* This is unrelated limitation. */
2043
2044	subexp_idx = dfa->nodes[ent->node].opr.idx;
2045	if (ent->subexp_to == str_idx)
2046	{
2047	int ops_node = -1;
2048	int cls_node = -1;
2049	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2050	{
2051	int node = dest_nodes->elems[node_idx];
2052	re_token_type_t type = dfa->nodes[node].type;
2053	if (type == OP_OPEN_SUBEXP
2054	&& subexp_idx == dfa->nodes[node].opr.idx)
2055	ops_node = node;
2056	else if (type == OP_CLOSE_SUBEXP
2057	&& subexp_idx == dfa->nodes[node].opr.idx)
2058	cls_node = node;
2059	}
2060
2061	/* Check the limitation of the open subexpression. */
2062	/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2063	if (ops_node >= 0)
2064	{
2065	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2066	candidates);
2067	if (BE (err != REG_NOERROR, 0))
2068	return err;
2069	}
2070
2071	/* Check the limitation of the close subexpression. */
2072	if (cls_node >= 0)
2073	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2074	{
2075	int node = dest_nodes->elems[node_idx];
2076	if (!re_node_set_contains (dfa->inveclosures + node,
2077	cls_node)
2078	&& !re_node_set_contains (dfa->eclosures + node,
2079	cls_node))
2080	{
2081	/* It is against this limitation.
2082	Remove it form the current sifted state. */
2083	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2084	candidates);
2085	if (BE (err != REG_NOERROR, 0))
2086	return err;
2087	--node_idx;
2088	}
2089	}
2090	}
2091	else /* (ent->subexp_to != str_idx) */
2092	{
2093	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2094	{
2095	int node = dest_nodes->elems[node_idx];
2096	re_token_type_t type = dfa->nodes[node].type;
2097	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
2098	{
2099	if (subexp_idx != dfa->nodes[node].opr.idx)
2100	continue;
2101	/* It is against this limitation.
2102	Remove it form the current sifted state. */
2103	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2104	candidates);
2105	if (BE (err != REG_NOERROR, 0))
2106	return err;
2107	}
2108	}
2109	}
2110	}
2111	return REG_NOERROR;
2112	}
2113
2114	static reg_errcode_t
2115	internal_function
2116	sift_states_bkref (const re_match_context_t mctx, re_sift_context_t sctx,
2117	int str_idx, const re_node_set *candidates)
2118	{
2119	const re_dfa_t *const dfa = mctx->dfa;
2120	reg_errcode_t err;
2121	int node_idx, node;
2122	re_sift_context_t local_sctx;
2123	int first_idx = search_cur_bkref_entry (mctx, str_idx);
2124
2125	if (first_idx == -1)
2126	return REG_NOERROR;
2127
2128	local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
2129
2130	for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2131	{
2132	int enabled_idx;
2133	re_token_type_t type;
2134	struct re_backref_cache_entry *entry;
2135	node = candidates->elems[node_idx];
2136	type = dfa->nodes[node].type;
2137	/* Avoid infinite loop for the REs like "()\1+". */
2138	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2139	continue;
2140	if (type != OP_BACK_REF)
2141	continue;
2142
2143	entry = mctx->bkref_ents + first_idx;
2144	enabled_idx = first_idx;
2145	do
2146	{
2147	int subexp_len;
2148	int to_idx;
2149	int dst_node;
2150	int ret;
2151	re_dfastate_t *cur_state;
2152
2153	if (entry->node != node)
2154	continue;
2155	subexp_len = entry->subexp_to - entry->subexp_from;
2156	to_idx = str_idx + subexp_len;
2157	dst_node = (subexp_len ? dfa->nexts[node]
2158	: dfa->edests[node].elems[0]);
2159
2160	if (to_idx > sctx->last_str_idx
2161	\|\| sctx->sifted_states[to_idx] == NULL
2162	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2163	\|\| check_dst_limits (mctx, &sctx->limits, node,
2164	str_idx, dst_node, to_idx))
2165	continue;
2166
2167	if (local_sctx.sifted_states == NULL)
2168	{
2169	local_sctx = *sctx;
2170	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2171	if (BE (err != REG_NOERROR, 0))
2172	goto free_return;
2173	}
2174	local_sctx.last_node = node;
2175	local_sctx.last_str_idx = str_idx;
2176	ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2177	if (BE (ret < 0, 0))
2178	{
2179	err = REG_ESPACE;
2180	goto free_return;
2181	}
2182	cur_state = local_sctx.sifted_states[str_idx];
2183	err = sift_states_backward (mctx, &local_sctx);
2184	if (BE (err != REG_NOERROR, 0))
2185	goto free_return;
2186	if (sctx->limited_states != NULL)
2187	{
2188	err = merge_state_array (dfa, sctx->limited_states,
2189	local_sctx.sifted_states,
2190	str_idx + 1);
2191	if (BE (err != REG_NOERROR, 0))
2192	goto free_return;
2193	}
2194	local_sctx.sifted_states[str_idx] = cur_state;
2195	re_node_set_remove (&local_sctx.limits, enabled_idx);
2196
2197	/* mctx->bkref_ents may have changed, reload the pointer. */
2198	entry = mctx->bkref_ents + enabled_idx;
2199	}
2200	while (enabled_idx++, entry++->more);
2201	}
2202	err = REG_NOERROR;
2203	free_return:
2204	if (local_sctx.sifted_states != NULL)
2205	{
2206	re_node_set_free (&local_sctx.limits);
2207	}
2208
2209	return err;
2210	}
2211
2212
2213	#ifdef RE_ENABLE_I18N
2214	static int
2215	internal_function
2216	sift_states_iter_mb (const re_match_context_t mctx, re_sift_context_t sctx,
2217	int node_idx, int str_idx, int max_str_idx)
2218	{
2219	const re_dfa_t *const dfa = mctx->dfa;
2220	int naccepted;
2221	/* Check the node can accept `multi byte'. */
2222	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2223	if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2224	!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2225	dfa->nexts[node_idx]))
2226	/* The node can't accept the `multi byte', or the
2227	destination was already thrown away, then the node
2228	could't accept the current input `multi byte'. */
2229	naccepted = 0;
2230	/* Otherwise, it is sure that the node could accept
2231	`naccepted' bytes input. */
2232	return naccepted;
2233	}
2234	#endif /* RE_ENABLE_I18N */
2235
2236
2237
2238	/* Functions for state transition. */
2239
2240	/* Return the next state to which the current state STATE will transit by
2241	accepting the current input byte, and update STATE_LOG if necessary.
2242	If STATE can accept a multibyte char/collating element/back reference
2243	update the destination of STATE_LOG. */
2244
2245	static re_dfastate_t *
2246	internal_function
2247	transit_state (reg_errcode_t err, re_match_context_t mctx,
2248	re_dfastate_t *state)
2249	{
2250	re_dfastate_t **trtable;
2251	unsigned char ch;
2252
2253	#ifdef RE_ENABLE_I18N
2254	/* If the current state can accept multibyte. */
2255	if (BE (state->accept_mb, 0))
2256	{
2257	*err = transit_state_mb (mctx, state);
2258	if (BE (*err != REG_NOERROR, 0))
2259	return NULL;
2260	}
2261	#endif /* RE_ENABLE_I18N */
2262
2263	/* Then decide the next state with the single byte. */
2264	#if 0
2265	if (0)
2266	/* don't use transition table */
2267	return transit_state_sb (err, mctx, state);
2268	#endif
2269
2270	/* Use transition table */
2271	ch = re_string_fetch_byte (&mctx->input);
2272	for (;;)
2273	{
2274	trtable = state->trtable;
2275	if (BE (trtable != NULL, 1))
2276	return trtable[ch];
2277
2278	trtable = state->word_trtable;
2279	if (BE (trtable != NULL, 1))
2280	{
2281	unsigned int context;
2282	context
2283	= re_string_context_at (&mctx->input,
2284	re_string_cur_idx (&mctx->input) - 1,
2285	mctx->eflags);
2286	if (IS_WORD_CONTEXT (context))
2287	return trtable[ch + SBC_MAX];
2288	else
2289	return trtable[ch];
2290	}
2291
2292	if (!build_trtable (mctx->dfa, state))
2293	{
2294	*err = REG_ESPACE;
2295	return NULL;
2296	}
2297
2298	/* Retry, we now have a transition table. */
2299	}
2300	}
2301
2302	/* Update the state_log if we need */
2303	re_dfastate_t *
2304	internal_function
2305	merge_state_with_log (reg_errcode_t err, re_match_context_t mctx,
2306	re_dfastate_t *next_state)
2307	{
2308	const re_dfa_t *const dfa = mctx->dfa;
2309	int cur_idx = re_string_cur_idx (&mctx->input);
2310
2311	if (cur_idx > mctx->state_log_top)
2312	{
2313	mctx->state_log[cur_idx] = next_state;
2314	mctx->state_log_top = cur_idx;
2315	}
2316	else if (mctx->state_log[cur_idx] == 0)
2317	{
2318	mctx->state_log[cur_idx] = next_state;
2319	}
2320	else
2321	{
2322	re_dfastate_t *pstate;
2323	unsigned int context;
2324	re_node_set next_nodes, log_nodes, table_nodes = NULL;
2325	/* If (state_log[cur_idx] != 0), it implies that cur_idx is
2326	the destination of a multibyte char/collating element/
2327	back reference. Then the next state is the union set of
2328	these destinations and the results of the transition table. */
2329	pstate = mctx->state_log[cur_idx];
2330	log_nodes = pstate->entrance_nodes;
2331	if (next_state != NULL)
2332	{
2333	table_nodes = next_state->entrance_nodes;
2334	*err = re_node_set_init_union (&next_nodes, table_nodes,
2335	log_nodes);
2336	if (BE (*err != REG_NOERROR, 0))
2337	return NULL;
2338	}
2339	else
2340	next_nodes = *log_nodes;
2341	/* Note: We already add the nodes of the initial state,
2342	then we don't need to add them here. */
2343
2344	context = re_string_context_at (&mctx->input,
2345	re_string_cur_idx (&mctx->input) - 1,
2346	mctx->eflags);
2347	next_state = mctx->state_log[cur_idx]
2348	= re_acquire_state_context (err, dfa, &next_nodes, context);
2349	/* We don't need to check errors here, since the return value of
2350	this function is next_state and ERR is already set. */
2351
2352	if (table_nodes != NULL)
2353	re_node_set_free (&next_nodes);
2354	}
2355
2356	if (BE (dfa->nbackref, 0) && next_state != NULL)
2357	{
2358	/* Check OP_OPEN_SUBEXP in the current state in case that we use them
2359	later. We must check them here, since the back references in the
2360	next state might use them. */
2361	*err = check_subexp_matching_top (mctx, &next_state->nodes,
2362	cur_idx);
2363	if (BE (*err != REG_NOERROR, 0))
2364	return NULL;
2365
2366	/* If the next state has back references. */
2367	if (next_state->has_backref)
2368	{
2369	*err = transit_state_bkref (mctx, &next_state->nodes);
2370	if (BE (*err != REG_NOERROR, 0))
2371	return NULL;
2372	next_state = mctx->state_log[cur_idx];
2373	}
2374	}
2375
2376	return next_state;
2377	}
2378
2379	/* Skip bytes in the input that correspond to part of a
2380	multi-byte match, then look in the log for a state
2381	from which to restart matching. */
2382	re_dfastate_t *
2383	internal_function
2384	find_recover_state (reg_errcode_t err, re_match_context_t mctx)
2385	{
2386	re_dfastate_t *cur_state;
2387	do
2388	{
2389	int max = mctx->state_log_top;
2390	int cur_str_idx = re_string_cur_idx (&mctx->input);
2391
2392	do
2393	{
2394	if (++cur_str_idx > max)
2395	return NULL;
2396	re_string_skip_bytes (&mctx->input, 1);
2397	}
2398	while (mctx->state_log[cur_str_idx] == NULL);
2399
2400	cur_state = merge_state_with_log (err, mctx, NULL);
2401	}
2402	while (*err == REG_NOERROR && cur_state == NULL);
2403	return cur_state;
2404	}
2405
2406	/* Helper functions for transit_state. */
2407
2408	/* From the node set CUR_NODES, pick up the nodes whose types are
2409	OP_OPEN_SUBEXP and which have corresponding back references in the regular
2410	expression. And register them to use them later for evaluating the
2411	correspoding back references. */
2412
2413	static reg_errcode_t
2414	internal_function
2415	check_subexp_matching_top (re_match_context_t mctx, re_node_set cur_nodes,
2416	int str_idx)
2417	{
2418	const re_dfa_t *const dfa = mctx->dfa;
2419	int node_idx;
2420	reg_errcode_t err;
2421
2422	/* TODO: This isn't efficient.
2423	Because there might be more than one nodes whose types are
2424	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2425	nodes.
2426	E.g. RE: (a){2} */
2427	for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2428	{
2429	int node = cur_nodes->elems[node_idx];
2430	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2431	&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2432	&& (dfa->used_bkref_map
2433	& ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2434	{
2435	err = match_ctx_add_subtop (mctx, node, str_idx);
2436	if (BE (err != REG_NOERROR, 0))
2437	return err;
2438	}
2439	}
2440	return REG_NOERROR;
2441	}
2442
2443	#if 0
2444	/* Return the next state to which the current state STATE will transit by
2445	accepting the current input byte. */
2446
2447	static re_dfastate_t *
2448	transit_state_sb (reg_errcode_t err, re_match_context_t mctx,
2449	re_dfastate_t *state)
2450	{
2451	const re_dfa_t *const dfa = mctx->dfa;
2452	re_node_set next_nodes;
2453	re_dfastate_t *next_state;
2454	int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2455	unsigned int context;
2456
2457	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2458	if (BE (*err != REG_NOERROR, 0))
2459	return NULL;
2460	for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2461	{
2462	int cur_node = state->nodes.elems[node_cnt];
2463	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2464	{
2465	*err = re_node_set_merge (&next_nodes,
2466	dfa->eclosures + dfa->nexts[cur_node]);
2467	if (BE (*err != REG_NOERROR, 0))
2468	{
2469	re_node_set_free (&next_nodes);
2470	return NULL;
2471	}
2472	}
2473	}
2474	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2475	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2476	/* We don't need to check errors here, since the return value of
2477	this function is next_state and ERR is already set. */
2478
2479	re_node_set_free (&next_nodes);
2480	re_string_skip_bytes (&mctx->input, 1);
2481	return next_state;
2482	}
2483	#endif
2484
2485	#ifdef RE_ENABLE_I18N
2486	static reg_errcode_t
2487	internal_function
2488	transit_state_mb (re_match_context_t mctx, re_dfastate_t pstate)
2489	{
2490	const re_dfa_t *const dfa = mctx->dfa;
2491	reg_errcode_t err;
2492	int i;
2493
2494	for (i = 0; i < pstate->nodes.nelem; ++i)
2495	{
2496	re_node_set dest_nodes, *new_nodes;
2497	int cur_node_idx = pstate->nodes.elems[i];
2498	int naccepted, dest_idx;
2499	unsigned int context;
2500	re_dfastate_t *dest_state;
2501
2502	if (!dfa->nodes[cur_node_idx].accept_mb)
2503	continue;
2504
2505	if (dfa->nodes[cur_node_idx].constraint)
2506	{
2507	context = re_string_context_at (&mctx->input,
2508	re_string_cur_idx (&mctx->input),
2509	mctx->eflags);
2510	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2511	context))
2512	continue;
2513	}
2514
2515	/* How many bytes the node can accept? */
2516	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2517	re_string_cur_idx (&mctx->input));
2518	if (naccepted == 0)
2519	continue;
2520
2521	/* The node can accepts `naccepted' bytes. */
2522	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2523	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2524	: mctx->max_mb_elem_len);
2525	err = clean_state_log_if_needed (mctx, dest_idx);
2526	if (BE (err != REG_NOERROR, 0))
2527	return err;
2528	#ifdef DEBUG
2529	assert (dfa->nexts[cur_node_idx] != -1);
2530	#endif
2531	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2532
2533	dest_state = mctx->state_log[dest_idx];
2534	if (dest_state == NULL)
2535	dest_nodes = *new_nodes;
2536	else
2537	{
2538	err = re_node_set_init_union (&dest_nodes,
2539	dest_state->entrance_nodes, new_nodes);
2540	if (BE (err != REG_NOERROR, 0))
2541	return err;
2542	}
2543	context = re_string_context_at (&mctx->input, dest_idx - 1,
2544	mctx->eflags);
2545	mctx->state_log[dest_idx]
2546	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2547	if (dest_state != NULL)
2548	re_node_set_free (&dest_nodes);
2549	if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2550	return err;
2551	}
2552	return REG_NOERROR;
2553	}
2554	#endif /* RE_ENABLE_I18N */
2555
2556	static reg_errcode_t
2557	internal_function
2558	transit_state_bkref (re_match_context_t mctx, const re_node_set nodes)
2559	{
2560	const re_dfa_t *const dfa = mctx->dfa;
2561	reg_errcode_t err;
2562	int i;
2563	int cur_str_idx = re_string_cur_idx (&mctx->input);
2564
2565	for (i = 0; i < nodes->nelem; ++i)
2566	{
2567	int dest_str_idx, prev_nelem, bkc_idx;
2568	int node_idx = nodes->elems[i];
2569	unsigned int context;
2570	const re_token_t *node = dfa->nodes + node_idx;
2571	re_node_set *new_dest_nodes;
2572
2573	/* Check whether `node' is a backreference or not. */
2574	if (node->type != OP_BACK_REF)
2575	continue;
2576
2577	if (node->constraint)
2578	{
2579	context = re_string_context_at (&mctx->input, cur_str_idx,
2580	mctx->eflags);
2581	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2582	continue;
2583	}
2584
2585	/* `node' is a backreference.
2586	Check the substring which the substring matched. */
2587	bkc_idx = mctx->nbkref_ents;
2588	err = get_subexp (mctx, node_idx, cur_str_idx);
2589	if (BE (err != REG_NOERROR, 0))
2590	goto free_return;
2591
2592	/* And add the epsilon closures (which is `new_dest_nodes') of
2593	the backreference to appropriate state_log. */
2594	#ifdef DEBUG
2595	assert (dfa->nexts[node_idx] != -1);
2596	#endif
2597	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2598	{
2599	int subexp_len;
2600	re_dfastate_t *dest_state;
2601	struct re_backref_cache_entry *bkref_ent;
2602	bkref_ent = mctx->bkref_ents + bkc_idx;
2603	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
2604	continue;
2605	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2606	new_dest_nodes = (subexp_len == 0
2607	? dfa->eclosures + dfa->edests[node_idx].elems[0]
2608	: dfa->eclosures + dfa->nexts[node_idx]);
2609	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2610	- bkref_ent->subexp_from);
2611	context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2612	mctx->eflags);
2613	dest_state = mctx->state_log[dest_str_idx];
2614	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2615	: mctx->state_log[cur_str_idx]->nodes.nelem);
2616	/* Add `new_dest_node' to state_log. */
2617	if (dest_state == NULL)
2618	{
2619	mctx->state_log[dest_str_idx]
2620	= re_acquire_state_context (&err, dfa, new_dest_nodes,
2621	context);
2622	if (BE (mctx->state_log[dest_str_idx] == NULL
2623	&& err != REG_NOERROR, 0))
2624	goto free_return;
2625	}
2626	else
2627	{
2628	re_node_set dest_nodes;
2629	err = re_node_set_init_union (&dest_nodes,
2630	dest_state->entrance_nodes,
2631	new_dest_nodes);
2632	if (BE (err != REG_NOERROR, 0))
2633	{
2634	re_node_set_free (&dest_nodes);
2635	goto free_return;
2636	}
2637	mctx->state_log[dest_str_idx]
2638	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2639	re_node_set_free (&dest_nodes);
2640	if (BE (mctx->state_log[dest_str_idx] == NULL
2641	&& err != REG_NOERROR, 0))
2642	goto free_return;
2643	}
2644	/* We need to check recursively if the backreference can epsilon
2645	transit. */
2646	if (subexp_len == 0
2647	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2648	{
2649	err = check_subexp_matching_top (mctx, new_dest_nodes,
2650	cur_str_idx);
2651	if (BE (err != REG_NOERROR, 0))
2652	goto free_return;
2653	err = transit_state_bkref (mctx, new_dest_nodes);
2654	if (BE (err != REG_NOERROR, 0))
2655	goto free_return;
2656	}
2657	}
2658	}
2659	err = REG_NOERROR;
2660	free_return:
2661	return err;
2662	}
2663
2664	/* Enumerate all the candidates which the backreference BKREF_NODE can match
2665	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2666	Note that we might collect inappropriate candidates here.
2667	However, the cost of checking them strictly here is too high, then we
2668	delay these checking for prune_impossible_nodes(). */
2669
2670	static reg_errcode_t
2671	internal_function
2672	get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2673	{
2674	const re_dfa_t *const dfa = mctx->dfa;
2675	int subexp_num, sub_top_idx;
2676	const char buf = (const char ) re_string_get_buffer (&mctx->input);
2677	/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2678	int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2679	if (cache_idx != -1)
2680	{
2681	const struct re_backref_cache_entry *entry
2682	= mctx->bkref_ents + cache_idx;
2683	do
2684	if (entry->node == bkref_node)
2685	return REG_NOERROR; /* We already checked it. */
2686	while (entry++->more);
2687	}
2688
2689	subexp_num = dfa->nodes[bkref_node].opr.idx;
2690
2691	/* For each sub expression */
2692	for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2693	{
2694	reg_errcode_t err;
2695	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2696	re_sub_match_last_t *sub_last;
2697	int sub_last_idx, sl_str, bkref_str_off;
2698
2699	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2700	continue; /* It isn't related. */
2701
2702	sl_str = sub_top->str_idx;
2703	bkref_str_off = bkref_str_idx;
2704	/* At first, check the last node of sub expressions we already
2705	evaluated. */
2706	for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2707	{
2708	int sl_str_diff;
2709	sub_last = sub_top->lasts[sub_last_idx];
2710	sl_str_diff = sub_last->str_idx - sl_str;
2711	/* The matched string by the sub expression match with the substring
2712	at the back reference? */
2713	if (sl_str_diff > 0)
2714	{
2715	if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2716	{
2717	/* Not enough chars for a successful match. */
2718	if (bkref_str_off + sl_str_diff > mctx->input.len)
2719	break;
2720
2721	err = clean_state_log_if_needed (mctx,
2722	bkref_str_off
2723	+ sl_str_diff);
2724	if (BE (err != REG_NOERROR, 0))
2725	return err;
2726	buf = (const char *) re_string_get_buffer (&mctx->input);
2727	}
2728	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2729	/* We don't need to search this sub expression any more. */
2730	break;
2731	}
2732	bkref_str_off += sl_str_diff;
2733	sl_str += sl_str_diff;
2734	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2735	bkref_str_idx);
2736
2737	/* Reload buf, since the preceding call might have reallocated
2738	the buffer. */
2739	buf = (const char *) re_string_get_buffer (&mctx->input);
2740
2741	if (err == REG_NOMATCH)
2742	continue;
2743	if (BE (err != REG_NOERROR, 0))
2744	return err;
2745	}
2746
2747	if (sub_last_idx < sub_top->nlasts)
2748	continue;
2749	if (sub_last_idx > 0)
2750	++sl_str;
2751	/* Then, search for the other last nodes of the sub expression. */
2752	for (; sl_str <= bkref_str_idx; ++sl_str)
2753	{
2754	int cls_node, sl_str_off;
2755	const re_node_set *nodes;
2756	sl_str_off = sl_str - sub_top->str_idx;
2757	/* The matched string by the sub expression match with the substring
2758	at the back reference? */
2759	if (sl_str_off > 0)
2760	{
2761	if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2762	{
2763	/* If we are at the end of the input, we cannot match. */
2764	if (bkref_str_off >= mctx->input.len)
2765	break;
2766
2767	err = extend_buffers (mctx);
2768	if (BE (err != REG_NOERROR, 0))
2769	return err;
2770
2771	buf = (const char *) re_string_get_buffer (&mctx->input);
2772	}
2773	if (buf [bkref_str_off++] != buf[sl_str - 1])
2774	break; /* We don't need to search this sub expression
2775	any more. */
2776	}
2777	if (mctx->state_log[sl_str] == NULL)
2778	continue;
2779	/* Does this state have a ')' of the sub expression? */
2780	nodes = &mctx->state_log[sl_str]->nodes;
2781	cls_node = find_subexp_node (dfa, nodes, subexp_num,
2782	OP_CLOSE_SUBEXP);
2783	if (cls_node == -1)
2784	continue; /* No. */
2785	if (sub_top->path == NULL)
2786	{
2787	sub_top->path = calloc (sizeof (state_array_t),
2788	sl_str - sub_top->str_idx + 1);
2789	if (sub_top->path == NULL)
2790	return REG_ESPACE;
2791	}
2792	/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2793	in the current context? */
2794	err = check_arrival (mctx, sub_top->path, sub_top->node,
2795	sub_top->str_idx, cls_node, sl_str,
2796	OP_CLOSE_SUBEXP);
2797	if (err == REG_NOMATCH)
2798	continue;
2799	if (BE (err != REG_NOERROR, 0))
2800	return err;
2801	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2802	if (BE (sub_last == NULL, 0))
2803	return REG_ESPACE;
2804	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2805	bkref_str_idx);
2806	if (err == REG_NOMATCH)
2807	continue;
2808	}
2809	}
2810	return REG_NOERROR;
2811	}
2812
2813	/* Helper functions for get_subexp(). */
2814
2815	/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2816	If it can arrive, register the sub expression expressed with SUB_TOP
2817	and SUB_LAST. */
2818
2819	static reg_errcode_t
2820	internal_function
2821	get_subexp_sub (re_match_context_t mctx, const re_sub_match_top_t sub_top,
2822	re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2823	{
2824	reg_errcode_t err;
2825	int to_idx;
2826	/* Can the subexpression arrive the back reference? */
2827	err = check_arrival (mctx, &sub_last->path, sub_last->node,
2828	sub_last->str_idx, bkref_node, bkref_str,
2829	OP_OPEN_SUBEXP);
2830	if (err != REG_NOERROR)
2831	return err;
2832	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2833	sub_last->str_idx);
2834	if (BE (err != REG_NOERROR, 0))
2835	return err;
2836	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2837	return clean_state_log_if_needed (mctx, to_idx);
2838	}
2839
2840	/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2841	Search '(' if FL_OPEN, or search ')' otherwise.
2842	TODO: This function isn't efficient...
2843	Because there might be more than one nodes whose types are
2844	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2845	nodes.
2846	E.g. RE: (a){2} */
2847
2848	static int
2849	internal_function
2850	find_subexp_node (const re_dfa_t dfa, const re_node_set nodes,
2851	int subexp_idx, int type)
2852	{
2853	int cls_idx;
2854	for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2855	{
2856	int cls_node = nodes->elems[cls_idx];
2857	const re_token_t *node = dfa->nodes + cls_node;
2858	if (node->type == type
2859	&& node->opr.idx == subexp_idx)
2860	return cls_node;
2861	}
2862	return -1;
2863	}
2864
2865	/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2866	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2867	heavily reused.
2868	Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2869
2870	static reg_errcode_t
2871	internal_function
2872	check_arrival (re_match_context_t mctx, state_array_t path, int top_node,
2873	int top_str, int last_node, int last_str, int type)
2874	{
2875	const re_dfa_t *const dfa = mctx->dfa;
2876	reg_errcode_t err = REG_NOERROR;
2877	int subexp_num, backup_cur_idx, str_idx, null_cnt;
2878	re_dfastate_t *cur_state = NULL;
2879	re_node_set *cur_nodes, next_nodes;
2880	re_dfastate_t **backup_state_log;
2881	unsigned int context;
2882
2883	subexp_num = dfa->nodes[top_node].opr.idx;
2884	/* Extend the buffer if we need. */
2885	if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2886	{
2887	re_dfastate_t **new_array;
2888	int old_alloc = path->alloc;
2889	path->alloc += last_str + mctx->max_mb_elem_len + 1;
2890	new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2891	if (BE (new_array == NULL, 0))
2892	{
2893	path->alloc = old_alloc;
2894	return REG_ESPACE;
2895	}
2896	path->array = new_array;
2897	memset (new_array + old_alloc, '\0',
2898	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2899	}
2900
2901	str_idx = path->next_idx ?: top_str;
2902
2903	/* Temporary modify MCTX. */
2904	backup_state_log = mctx->state_log;
2905	backup_cur_idx = mctx->input.cur_idx;
2906	mctx->state_log = path->array;
2907	mctx->input.cur_idx = str_idx;
2908
2909	/* Setup initial node set. */
2910	context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2911	if (str_idx == top_str)
2912	{
2913	err = re_node_set_init_1 (&next_nodes, top_node);
2914	if (BE (err != REG_NOERROR, 0))
2915	return err;
2916	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2917	if (BE (err != REG_NOERROR, 0))
2918	{
2919	re_node_set_free (&next_nodes);
2920	return err;
2921	}
2922	}
2923	else
2924	{
2925	cur_state = mctx->state_log[str_idx];
2926	if (cur_state && cur_state->has_backref)
2927	{
2928	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2929	if (BE (err != REG_NOERROR, 0))
2930	return err;
2931	}
2932	else
2933	re_node_set_init_empty (&next_nodes);
2934	}
2935	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
2936	{
2937	if (next_nodes.nelem)
2938	{
2939	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2940	subexp_num, type);
2941	if (BE (err != REG_NOERROR, 0))
2942	{
2943	re_node_set_free (&next_nodes);
2944	return err;
2945	}
2946	}
2947	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2948	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2949	{
2950	re_node_set_free (&next_nodes);
2951	return err;
2952	}
2953	mctx->state_log[str_idx] = cur_state;
2954	}
2955
2956	for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2957	{
2958	re_node_set_empty (&next_nodes);
2959	if (mctx->state_log[str_idx + 1])
2960	{
2961	err = re_node_set_merge (&next_nodes,
2962	&mctx->state_log[str_idx + 1]->nodes);
2963	if (BE (err != REG_NOERROR, 0))
2964	{
2965	re_node_set_free (&next_nodes);
2966	return err;
2967	}
2968	}
2969	if (cur_state)
2970	{
2971	err = check_arrival_add_next_nodes (mctx, str_idx,
2972	&cur_state->non_eps_nodes,
2973	&next_nodes);
2974	if (BE (err != REG_NOERROR, 0))
2975	{
2976	re_node_set_free (&next_nodes);
2977	return err;
2978	}
2979	}
2980	++str_idx;
2981	if (next_nodes.nelem)
2982	{
2983	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2984	if (BE (err != REG_NOERROR, 0))
2985	{
2986	re_node_set_free (&next_nodes);
2987	return err;
2988	}
2989	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2990	subexp_num, type);
2991	if (BE (err != REG_NOERROR, 0))
2992	{
2993	re_node_set_free (&next_nodes);
2994	return err;
2995	}
2996	}
2997	context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2998	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2999	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3000	{
3001	re_node_set_free (&next_nodes);
3002	return err;
3003	}
3004	mctx->state_log[str_idx] = cur_state;
3005	null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3006	}
3007	re_node_set_free (&next_nodes);
3008	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3009	: &mctx->state_log[last_str]->nodes);
3010	path->next_idx = str_idx;
3011
3012	/* Fix MCTX. */
3013	mctx->state_log = backup_state_log;
3014	mctx->input.cur_idx = backup_cur_idx;
3015
3016	/* Then check the current node set has the node LAST_NODE. */
3017	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3018	return REG_NOERROR;
3019
3020	return REG_NOMATCH;
3021	}
3022
3023	/* Helper functions for check_arrival. */
3024
3025	/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3026	to NEXT_NODES.
3027	TODO: This function is similar to the functions transit_state*(),
3028	however this function has many additional works.
3029	Can't we unify them? */
3030
3031	static reg_errcode_t
3032	internal_function
3033	check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
3034	re_node_set cur_nodes, re_node_set next_nodes)
3035	{
3036	const re_dfa_t *const dfa = mctx->dfa;
3037	int result;
3038	int cur_idx;
3039	reg_errcode_t err = REG_NOERROR;
3040	re_node_set union_set;
3041	re_node_set_init_empty (&union_set);
3042	for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3043	{
3044	int naccepted = 0;
3045	int cur_node = cur_nodes->elems[cur_idx];
3046	#ifdef DEBUG
3047	re_token_type_t type = dfa->nodes[cur_node].type;
3048	assert (!IS_EPSILON_NODE (type));
3049	#endif
3050	#ifdef RE_ENABLE_I18N
3051	/* If the node may accept `multi byte'. */
3052	if (dfa->nodes[cur_node].accept_mb)
3053	{
3054	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3055	str_idx);
3056	if (naccepted > 1)
3057	{
3058	re_dfastate_t *dest_state;
3059	int next_node = dfa->nexts[cur_node];
3060	int next_idx = str_idx + naccepted;
3061	dest_state = mctx->state_log[next_idx];
3062	re_node_set_empty (&union_set);
3063	if (dest_state)
3064	{
3065	err = re_node_set_merge (&union_set, &dest_state->nodes);
3066	if (BE (err != REG_NOERROR, 0))
3067	{
3068	re_node_set_free (&union_set);
3069	return err;
3070	}
3071	}
3072	result = re_node_set_insert (&union_set, next_node);
3073	if (BE (result < 0, 0))
3074	{
3075	re_node_set_free (&union_set);
3076	return REG_ESPACE;
3077	}
3078	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3079	&union_set);
3080	if (BE (mctx->state_log[next_idx] == NULL
3081	&& err != REG_NOERROR, 0))
3082	{
3083	re_node_set_free (&union_set);
3084	return err;
3085	}
3086	}
3087	}
3088	#endif /* RE_ENABLE_I18N */
3089	if (naccepted
3090	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3091	{
3092	result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3093	if (BE (result < 0, 0))
3094	{
3095	re_node_set_free (&union_set);
3096	return REG_ESPACE;
3097	}
3098	}
3099	}
3100	re_node_set_free (&union_set);
3101	return REG_NOERROR;
3102	}
3103
3104	/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3105	CUR_NODES, however exclude the nodes which are:
3106	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3107	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3108	*/
3109
3110	static reg_errcode_t
3111	internal_function
3112	check_arrival_expand_ecl (const re_dfa_t dfa, re_node_set cur_nodes,
3113	int ex_subexp, int type)
3114	{
3115	reg_errcode_t err;
3116	int idx, outside_node;
3117	re_node_set new_nodes;
3118	#ifdef DEBUG
3119	assert (cur_nodes->nelem);
3120	#endif
3121	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3122	if (BE (err != REG_NOERROR, 0))
3123	return err;
3124	/* Create a new node set NEW_NODES with the nodes which are epsilon
3125	closures of the node in CUR_NODES. */
3126
3127	for (idx = 0; idx < cur_nodes->nelem; ++idx)
3128	{
3129	int cur_node = cur_nodes->elems[idx];
3130	const re_node_set *eclosure = dfa->eclosures + cur_node;
3131	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3132	if (outside_node == -1)
3133	{
3134	/* There are no problematic nodes, just merge them. */
3135	err = re_node_set_merge (&new_nodes, eclosure);
3136	if (BE (err != REG_NOERROR, 0))
3137	{
3138	re_node_set_free (&new_nodes);
3139	return err;
3140	}
3141	}
3142	else
3143	{
3144	/* There are problematic nodes, re-calculate incrementally. */
3145	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3146	ex_subexp, type);
3147	if (BE (err != REG_NOERROR, 0))
3148	{
3149	re_node_set_free (&new_nodes);
3150	return err;
3151	}
3152	}
3153	}
3154	re_node_set_free (cur_nodes);
3155	*cur_nodes = new_nodes;
3156	return REG_NOERROR;
3157	}
3158
3159	/* Helper function for check_arrival_expand_ecl.
3160	Check incrementally the epsilon closure of TARGET, and if it isn't
3161	problematic append it to DST_NODES. */
3162
3163	static reg_errcode_t
3164	internal_function
3165	check_arrival_expand_ecl_sub (const re_dfa_t dfa, re_node_set dst_nodes,
3166	int target, int ex_subexp, int type)
3167	{
3168	int cur_node;
3169	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3170	{
3171	int err;
3172
3173	if (dfa->nodes[cur_node].type == type
3174	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3175	{
3176	if (type == OP_CLOSE_SUBEXP)
3177	{
3178	err = re_node_set_insert (dst_nodes, cur_node);
3179	if (BE (err == -1, 0))
3180	return REG_ESPACE;
3181	}
3182	break;
3183	}
3184	err = re_node_set_insert (dst_nodes, cur_node);
3185	if (BE (err == -1, 0))
3186	return REG_ESPACE;
3187	if (dfa->edests[cur_node].nelem == 0)
3188	break;
3189	if (dfa->edests[cur_node].nelem == 2)
3190	{
3191	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3192	dfa->edests[cur_node].elems[1],
3193	ex_subexp, type);
3194	if (BE (err != REG_NOERROR, 0))
3195	return err;
3196	}
3197	cur_node = dfa->edests[cur_node].elems[0];
3198	}
3199	return REG_NOERROR;
3200	}
3201
3202
3203	/* For all the back references in the current state, calculate the
3204	destination of the back references by the appropriate entry
3205	in MCTX->BKREF_ENTS. */
3206
3207	static reg_errcode_t
3208	internal_function
3209	expand_bkref_cache (re_match_context_t mctx, re_node_set cur_nodes,
3210	int cur_str, int subexp_num, int type)
3211	{
3212	const re_dfa_t *const dfa = mctx->dfa;
3213	reg_errcode_t err;
3214	int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3215	struct re_backref_cache_entry *ent;
3216
3217	if (cache_idx_start == -1)
3218	return REG_NOERROR;
3219
3220	restart:
3221	ent = mctx->bkref_ents + cache_idx_start;
3222	do
3223	{
3224	int to_idx, next_node;
3225
3226	/* Is this entry ENT is appropriate? */
3227	if (!re_node_set_contains (cur_nodes, ent->node))
3228	continue; /* No. */
3229
3230	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3231	/* Calculate the destination of the back reference, and append it
3232	to MCTX->STATE_LOG. */
3233	if (to_idx == cur_str)
3234	{
3235	/* The backreference did epsilon transit, we must re-check all the
3236	node in the current state. */
3237	re_node_set new_dests;
3238	reg_errcode_t err2, err3;
3239	next_node = dfa->edests[ent->node].elems[0];
3240	if (re_node_set_contains (cur_nodes, next_node))
3241	continue;
3242	err = re_node_set_init_1 (&new_dests, next_node);
3243	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3244	err3 = re_node_set_merge (cur_nodes, &new_dests);
3245	re_node_set_free (&new_dests);
3246	if (BE (err != REG_NOERROR \|\| err2 != REG_NOERROR
3247	\|\| err3 != REG_NOERROR, 0))
3248	{
3249	err = (err != REG_NOERROR ? err
3250	: (err2 != REG_NOERROR ? err2 : err3));
3251	return err;
3252	}
3253	/* TODO: It is still inefficient... */
3254	goto restart;
3255	}
3256	else
3257	{
3258	re_node_set union_set;
3259	next_node = dfa->nexts[ent->node];
3260	if (mctx->state_log[to_idx])
3261	{
3262	int ret;
3263	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3264	next_node))
3265	continue;
3266	err = re_node_set_init_copy (&union_set,
3267	&mctx->state_log[to_idx]->nodes);
3268	ret = re_node_set_insert (&union_set, next_node);
3269	if (BE (err != REG_NOERROR \|\| ret < 0, 0))
3270	{
3271	re_node_set_free (&union_set);
3272	err = err != REG_NOERROR ? err : REG_ESPACE;
3273	return err;
3274	}
3275	}
3276	else
3277	{
3278	err = re_node_set_init_1 (&union_set, next_node);
3279	if (BE (err != REG_NOERROR, 0))
3280	return err;
3281	}
3282	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3283	re_node_set_free (&union_set);
3284	if (BE (mctx->state_log[to_idx] == NULL
3285	&& err != REG_NOERROR, 0))
3286	return err;
3287	}
3288	}
3289	while (ent++->more);
3290	return REG_NOERROR;
3291	}
3292
3293	/* Build transition table for the state.
3294	Return 1 if succeeded, otherwise return NULL. */
3295
3296	static int
3297	internal_function
3298	build_trtable (const re_dfa_t dfa, re_dfastate_t state)
3299	{
3300	reg_errcode_t err;
3301	int i, j, ch, need_word_trtable = 0;
3302	bitset_word_t elem, mask;
3303	bool dests_node_malloced = false;
3304	bool dest_states_malloced = false;
3305	int ndests; /* Number of the destination states from `state'. */
3306	re_dfastate_t **trtable;
3307	re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
3308	re_node_set follows, *dests_node;
3309	bitset_t *dests_ch;
3310	bitset_t acceptable;
3311
3312	struct dests_alloc
3313	{
3314	re_node_set dests_node[SBC_MAX];
3315	bitset_t dests_ch[SBC_MAX];
3316	} *dests_alloc;
3317
3318	/* We build DFA states which corresponds to the destination nodes
3319	from `state'. `dests_node[i]' represents the nodes which i-th
3320	destination state contains, and `dests_ch[i]' represents the
3321	characters which i-th destination state accepts. */
3322	if (__libc_use_alloca (sizeof (struct dests_alloc)))
3323	dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3324	else
3325	{
3326	dests_alloc = re_malloc (struct dests_alloc, 1);
3327	if (BE (dests_alloc == NULL, 0))
3328	return 0;
3329	dests_node_malloced = true;
3330	}
3331	dests_node = dests_alloc->dests_node;
3332	dests_ch = dests_alloc->dests_ch;
3333
3334	/* Initialize transiton table. */
3335	state->word_trtable = state->trtable = NULL;
3336
3337	/* At first, group all nodes belonging to `state' into several
3338	destinations. */
3339	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3340	if (BE (ndests <= 0, 0))
3341	{
3342	if (dests_node_malloced)
3343	free (dests_alloc);
3344	/* Return 0 in case of an error, 1 otherwise. */
3345	if (ndests == 0)
3346	{
3347	state->trtable = (re_dfastate_t **)
3348	calloc (sizeof (re_dfastate_t *), SBC_MAX);
3349	return 1;
3350	}
3351	return 0;
3352	}
3353
3354	err = re_node_set_alloc (&follows, ndests + 1);
3355	if (BE (err != REG_NOERROR, 0))
3356	goto out_free;
3357
3358	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3359	+ ndests * 3 * sizeof (re_dfastate_t *)))
3360	dest_states = (re_dfastate_t **)
3361	alloca (ndests * 3 * sizeof (re_dfastate_t *));
3362	else
3363	{
3364	dest_states = (re_dfastate_t **)
3365	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3366	if (BE (dest_states == NULL, 0))
3367	{
3368	out_free:
3369	if (dest_states_malloced)
3370	free (dest_states);
3371	re_node_set_free (&follows);
3372	for (i = 0; i < ndests; ++i)
3373	re_node_set_free (dests_node + i);
3374	if (dests_node_malloced)
3375	free (dests_alloc);
3376	return 0;
3377	}
3378	dest_states_malloced = true;
3379	}
3380	dest_states_word = dest_states + ndests;
3381	dest_states_nl = dest_states_word + ndests;
3382	bitset_empty (acceptable);
3383
3384	/* Then build the states for all destinations. */
3385	for (i = 0; i < ndests; ++i)
3386	{
3387	int next_node;
3388	re_node_set_empty (&follows);
3389	/* Merge the follows of this destination states. */
3390	for (j = 0; j < dests_node[i].nelem; ++j)
3391	{
3392	next_node = dfa->nexts[dests_node[i].elems[j]];
3393	if (next_node != -1)
3394	{
3395	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3396	if (BE (err != REG_NOERROR, 0))
3397	goto out_free;
3398	}
3399	}
3400	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3401	if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3402	goto out_free;
3403	/* If the new state has context constraint,
3404	build appropriate states for these contexts. */
3405	if (dest_states[i]->has_constraint)
3406	{
3407	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3408	CONTEXT_WORD);
3409	if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3410	goto out_free;
3411
3412	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3413	need_word_trtable = 1;
3414
3415	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3416	CONTEXT_NEWLINE);
3417	if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3418	goto out_free;
3419	}
3420	else
3421	{
3422	dest_states_word[i] = dest_states[i];
3423	dest_states_nl[i] = dest_states[i];
3424	}
3425	bitset_merge (acceptable, dests_ch[i]);
3426	}
3427
3428	if (!BE (need_word_trtable, 0))
3429	{
3430	/* We don't care about whether the following character is a word
3431	character, or we are in a single-byte character set so we can
3432	discern by looking at the character code: allocate a
3433	256-entry transition table. */
3434	trtable = state->trtable =
3435	(re_dfastate_t *) calloc (sizeof (re_dfastate_t ), SBC_MAX);
3436	if (BE (trtable == NULL, 0))
3437	goto out_free;
3438
3439	/* For all characters ch...: */
3440	for (i = 0; i < BITSET_WORDS; ++i)
3441	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3442	elem;
3443	mask <<= 1, elem >>= 1, ++ch)
3444	if (BE (elem & 1, 0))
3445	{
3446	/* There must be exactly one destination which accepts
3447	character ch. See group_nodes_into_DFAstates. */
3448	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3449	;
3450
3451	/* j-th destination accepts the word character ch. */
3452	if (dfa->word_char[i] & mask)
3453	trtable[ch] = dest_states_word[j];
3454	else
3455	trtable[ch] = dest_states[j];
3456	}
3457	}
3458	else
3459	{
3460	/* We care about whether the following character is a word
3461	character, and we are in a multi-byte character set: discern
3462	by looking at the character code: build two 256-entry
3463	transition tables, one starting at trtable[0] and one
3464	starting at trtable[SBC_MAX]. */
3465	trtable = state->word_trtable =
3466	(re_dfastate_t *) calloc (sizeof (re_dfastate_t ), 2 * SBC_MAX);
3467	if (BE (trtable == NULL, 0))
3468	goto out_free;
3469
3470	/* For all characters ch...: */
3471	for (i = 0; i < BITSET_WORDS; ++i)
3472	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3473	elem;
3474	mask <<= 1, elem >>= 1, ++ch)
3475	if (BE (elem & 1, 0))
3476	{
3477	/* There must be exactly one destination which accepts
3478	character ch. See group_nodes_into_DFAstates. */
3479	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3480	;
3481
3482	/* j-th destination accepts the word character ch. */
3483	trtable[ch] = dest_states[j];
3484	trtable[ch + SBC_MAX] = dest_states_word[j];
3485	}
3486	}
3487
3488	/* new line */
3489	if (bitset_contain (acceptable, NEWLINE_CHAR))
3490	{
3491	/* The current state accepts newline character. */
3492	for (j = 0; j < ndests; ++j)
3493	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3494	{
3495	/* k-th destination accepts newline character. */
3496	trtable[NEWLINE_CHAR] = dest_states_nl[j];
3497	if (need_word_trtable)
3498	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3499	/* There must be only one destination which accepts
3500	newline. See group_nodes_into_DFAstates. */
3501	break;
3502	}
3503	}
3504
3505	if (dest_states_malloced)
3506	free (dest_states);
3507
3508	re_node_set_free (&follows);
3509	for (i = 0; i < ndests; ++i)
3510	re_node_set_free (dests_node + i);
3511
3512	if (dests_node_malloced)
3513	free (dests_alloc);
3514
3515	return 1;
3516	}
3517
3518	/* Group all nodes belonging to STATE into several destinations.
3519	Then for all destinations, set the nodes belonging to the destination
3520	to DESTS_NODE[i] and set the characters accepted by the destination
3521	to DEST_CH[i]. This function return the number of destinations. */
3522
3523	static int
3524	internal_function
3525	group_nodes_into_DFAstates (const re_dfa_t dfa, const re_dfastate_t state,
3526	re_node_set dests_node, bitset_t dests_ch)
3527	{
3528	reg_errcode_t err;
3529	int result;
3530	int i, j, k;
3531	int ndests; /* Number of the destinations from `state'. */
3532	bitset_t accepts; /* Characters a node can accept. */
3533	const re_node_set *cur_nodes = &state->nodes;
3534	bitset_empty (accepts);
3535	ndests = 0;
3536
3537	/* For all the nodes belonging to `state', */
3538	for (i = 0; i < cur_nodes->nelem; ++i)
3539	{
3540	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3541	re_token_type_t type = node->type;
3542	unsigned int constraint = node->constraint;
3543
3544	/* Enumerate all single byte character this node can accept. */
3545	if (type == CHARACTER)
3546	bitset_set (accepts, node->opr.c);
3547	else if (type == SIMPLE_BRACKET)
3548	{
3549	bitset_merge (accepts, node->opr.sbcset);
3550	}
3551	else if (type == OP_PERIOD)
3552	{
3553	#ifdef RE_ENABLE_I18N
3554	if (dfa->mb_cur_max > 1)
3555	bitset_merge (accepts, dfa->sb_char);
3556	else
3557	#endif
3558	bitset_set_all (accepts);
3559	if (!(dfa->syntax & RE_DOT_NEWLINE))
3560	bitset_clear (accepts, '\n');
3561	if (dfa->syntax & RE_DOT_NOT_NULL)
3562	bitset_clear (accepts, '\0');
3563	}
3564	#ifdef RE_ENABLE_I18N
3565	else if (type == OP_UTF8_PERIOD)
3566	{
3567	memset (accepts, '\xff', sizeof (bitset_t) / 2);
3568	if (!(dfa->syntax & RE_DOT_NEWLINE))
3569	bitset_clear (accepts, '\n');
3570	if (dfa->syntax & RE_DOT_NOT_NULL)
3571	bitset_clear (accepts, '\0');
3572	}
3573	#endif
3574	else
3575	continue;
3576
3577	/* Check the `accepts' and sift the characters which are not
3578	match it the context. */
3579	if (constraint)
3580	{
3581	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3582	{
3583	bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3584	bitset_empty (accepts);
3585	if (accepts_newline)
3586	bitset_set (accepts, NEWLINE_CHAR);
3587	else
3588	continue;
3589	}
3590	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3591	{
3592	bitset_empty (accepts);
3593	continue;
3594	}
3595
3596	if (constraint & NEXT_WORD_CONSTRAINT)
3597	{
3598	bitset_word_t any_set = 0;
3599	if (type == CHARACTER && !node->word_char)
3600	{
3601	bitset_empty (accepts);
3602	continue;
3603	}
3604	#ifdef RE_ENABLE_I18N
3605	if (dfa->mb_cur_max > 1)
3606	for (j = 0; j < BITSET_WORDS; ++j)
3607	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3608	else
3609	#endif
3610	for (j = 0; j < BITSET_WORDS; ++j)
3611	any_set \|= (accepts[j] &= dfa->word_char[j]);
3612	if (!any_set)
3613	continue;
3614	}
3615	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3616	{
3617	bitset_word_t any_set = 0;
3618	if (type == CHARACTER && node->word_char)
3619	{
3620	bitset_empty (accepts);
3621	continue;
3622	}
3623	#ifdef RE_ENABLE_I18N
3624	if (dfa->mb_cur_max > 1)
3625	for (j = 0; j < BITSET_WORDS; ++j)
3626	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3627	else
3628	#endif
3629	for (j = 0; j < BITSET_WORDS; ++j)
3630	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3631	if (!any_set)
3632	continue;
3633	}
3634	}
3635
3636	/* Then divide `accepts' into DFA states, or create a new
3637	state. Above, we make sure that accepts is not empty. */
3638	for (j = 0; j < ndests; ++j)
3639	{
3640	bitset_t intersec; /* Intersection sets, see below. */
3641	bitset_t remains;
3642	/* Flags, see below. */
3643	bitset_word_t has_intersec, not_subset, not_consumed;
3644
3645	/* Optimization, skip if this state doesn't accept the character. */
3646	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3647	continue;
3648
3649	/* Enumerate the intersection set of this state and `accepts'. */
3650	has_intersec = 0;
3651	for (k = 0; k < BITSET_WORDS; ++k)
3652	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3653	/* And skip if the intersection set is empty. */
3654	if (!has_intersec)
3655	continue;
3656
3657	/* Then check if this state is a subset of `accepts'. */
3658	not_subset = not_consumed = 0;
3659	for (k = 0; k < BITSET_WORDS; ++k)
3660	{
3661	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3662	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3663	}
3664
3665	/* If this state isn't a subset of `accepts', create a
3666	new group state, which has the `remains'. */
3667	if (not_subset)
3668	{
3669	bitset_copy (dests_ch[ndests], remains);
3670	bitset_copy (dests_ch[j], intersec);
3671	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3672	if (BE (err != REG_NOERROR, 0))
3673	goto error_return;
3674	++ndests;
3675	}
3676
3677	/* Put the position in the current group. */
3678	result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3679	if (BE (result < 0, 0))
3680	goto error_return;
3681
3682	/* If all characters are consumed, go to next node. */
3683	if (!not_consumed)
3684	break;
3685	}
3686	/* Some characters remain, create a new group. */
3687	if (j == ndests)
3688	{
3689	bitset_copy (dests_ch[ndests], accepts);
3690	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3691	if (BE (err != REG_NOERROR, 0))
3692	goto error_return;
3693	++ndests;
3694	bitset_empty (accepts);
3695	}
3696	}
3697	return ndests;
3698	error_return:
3699	for (j = 0; j < ndests; ++j)
3700	re_node_set_free (dests_node + j);
3701	return -1;
3702	}
3703
3704	#ifdef RE_ENABLE_I18N
3705	/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3706	Return the number of the bytes the node accepts.
3707	STR_IDX is the current index of the input string.
3708
3709	This function handles the nodes which can accept one character, or
3710	one collating element like '.', '[a-z]', opposite to the other nodes
3711	can only accept one byte. */
3712
3713	static int
3714	internal_function
3715	check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3716	const re_string_t *input, int str_idx)
3717	{
3718	const re_token_t *node = dfa->nodes + node_idx;
3719	int char_len, elem_len;
3720	int i;
3721
3722	if (BE (node->type == OP_UTF8_PERIOD, 0))
3723	{
3724	unsigned char c = re_string_byte_at (input, str_idx), d;
3725	if (BE (c < 0xc2, 1))
3726	return 0;
3727
3728	if (str_idx + 2 > input->len)
3729	return 0;
3730
3731	d = re_string_byte_at (input, str_idx + 1);
3732	if (c < 0xe0)
3733	return (d < 0x80 \|\| d > 0xbf) ? 0 : 2;
3734	else if (c < 0xf0)
3735	{
3736	char_len = 3;
3737	if (c == 0xe0 && d < 0xa0)
3738	return 0;
3739	}
3740	else if (c < 0xf8)
3741	{
3742	char_len = 4;
3743	if (c == 0xf0 && d < 0x90)
3744	return 0;
3745	}
3746	else if (c < 0xfc)
3747	{
3748	char_len = 5;
3749	if (c == 0xf8 && d < 0x88)
3750	return 0;
3751	}
3752	else if (c < 0xfe)
3753	{
3754	char_len = 6;
3755	if (c == 0xfc && d < 0x84)
3756	return 0;
3757	}
3758	else
3759	return 0;
3760
3761	if (str_idx + char_len > input->len)
3762	return 0;
3763
3764	for (i = 1; i < char_len; ++i)
3765	{
3766	d = re_string_byte_at (input, str_idx + i);
3767	if (d < 0x80 \|\| d > 0xbf)
3768	return 0;
3769	}
3770	return char_len;
3771	}
3772
3773	char_len = re_string_char_size_at (input, str_idx);
3774	if (node->type == OP_PERIOD)
3775	{
3776	if (char_len <= 1)
3777	return 0;
3778	/* FIXME: I don't think this if is needed, as both '\n'
3779	and '\0' are char_len == 1. */
3780	/* '.' accepts any one character except the following two cases. */
3781	if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3782	re_string_byte_at (input, str_idx) == '\n') \|\|
3783	((dfa->syntax & RE_DOT_NOT_NULL) &&
3784	re_string_byte_at (input, str_idx) == '\0'))
3785	return 0;
3786	return char_len;
3787	}
3788
3789	elem_len = re_string_elem_size_at (input, str_idx);
3790	if ((elem_len <= 1 && char_len <= 1) \|\| char_len == 0)
3791	return 0;
3792
3793	if (node->type == COMPLEX_BRACKET)
3794	{
3795	const re_charset_t *cset = node->opr.mbcset;
3796	# ifdef _LIBC
3797	const unsigned char *pin
3798	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3799	int j;
3800	uint32_t nrules;
3801	# endif /* _LIBC */
3802	int match_len = 0;
3803	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
3804	? re_string_wchar_at (input, str_idx) : 0);
3805
3806	/* match with multibyte character? */
3807	for (i = 0; i < cset->nmbchars; ++i)
3808	if (wc == cset->mbchars[i])
3809	{
3810	match_len = char_len;
3811	goto check_node_accept_bytes_match;
3812	}
3813	/* match with character_class? */
3814	for (i = 0; i < cset->nchar_classes; ++i)
3815	{
3816	wctype_t wt = cset->char_classes[i];
3817	if (__iswctype (wc, wt))
3818	{
3819	match_len = char_len;
3820	goto check_node_accept_bytes_match;
3821	}
3822	}
3823
3824	# ifdef _LIBC
3825	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3826	if (nrules != 0)
3827	{
3828	unsigned int in_collseq = 0;
3829	const int32_t table, indirect;
3830	const unsigned char weights, extra;
3831	const char *collseqwc;
3832	int32_t idx;
3833	/* This #include defines a local function! */
3834	# include <locale/weight.h>
3835
3836	/* match with collating_symbol? */
3837	if (cset->ncoll_syms)
3838	extra = (const unsigned char *)
3839	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3840	for (i = 0; i < cset->ncoll_syms; ++i)
3841	{
3842	const unsigned char *coll_sym = extra + cset->coll_syms[i];
3843	/* Compare the length of input collating element and
3844	the length of current collating element. */
3845	if (*coll_sym != elem_len)
3846	continue;
3847	/* Compare each bytes. */
3848	for (j = 0; j < *coll_sym; j++)
3849	if (pin[j] != coll_sym[1 + j])
3850	break;
3851	if (j == *coll_sym)
3852	{
3853	/* Match if every bytes is equal. */
3854	match_len = j;
3855	goto check_node_accept_bytes_match;
3856	}
3857	}
3858
3859	if (cset->nranges)
3860	{
3861	if (elem_len <= char_len)
3862	{
3863	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3864	in_collseq = __collseq_table_lookup (collseqwc, wc);
3865	}
3866	else
3867	in_collseq = find_collation_sequence_value (pin, elem_len);
3868	}
3869	/* match with range expression? */
3870	for (i = 0; i < cset->nranges; ++i)
3871	if (cset->range_starts[i] <= in_collseq
3872	&& in_collseq <= cset->range_ends[i])
3873	{
3874	match_len = elem_len;
3875	goto check_node_accept_bytes_match;
3876	}
3877
3878	/* match with equivalence_class? */
3879	if (cset->nequiv_classes)
3880	{
3881	const unsigned char *cp = pin;
3882	table = (const int32_t *)
3883	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3884	weights = (const unsigned char *)
3885	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3886	extra = (const unsigned char *)
3887	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3888	indirect = (const int32_t *)
3889	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3890	idx = findidx (&cp);
3891	if (idx > 0)
3892	for (i = 0; i < cset->nequiv_classes; ++i)
3893	{
3894	int32_t equiv_class_idx = cset->equiv_classes[i];
3895	size_t weight_len = weights[idx];
3896	if (weight_len == weights[equiv_class_idx])
3897	{
3898	int cnt = 0;
3899	while (cnt <= weight_len
3900	&& (weights[equiv_class_idx + 1 + cnt]
3901	== weights[idx + 1 + cnt]))
3902	++cnt;
3903	if (cnt > weight_len)
3904	{
3905	match_len = elem_len;
3906	goto check_node_accept_bytes_match;
3907	}
3908	}
3909	}
3910	}
3911	}
3912	else
3913	# endif /* _LIBC */
3914	{
3915	/* match with range expression? */
3916	#if __GNUC__ >= 2
3917	wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3918	#else
3919	wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3920	cmp_buf[2] = wc;
3921	#endif
3922	for (i = 0; i < cset->nranges; ++i)
3923	{
3924	cmp_buf[0] = cset->range_starts[i];
3925	cmp_buf[4] = cset->range_ends[i];
3926	if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3927	&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3928	{
3929	match_len = char_len;
3930	goto check_node_accept_bytes_match;
3931	}
3932	}
3933	}
3934	check_node_accept_bytes_match:
3935	if (!cset->non_match)
3936	return match_len;
3937	else
3938	{
3939	if (match_len > 0)
3940	return 0;
3941	else
3942	return (elem_len > char_len) ? elem_len : char_len;
3943	}
3944	}
3945	return 0;
3946	}
3947
3948	# ifdef _LIBC
3949	static unsigned int
3950	internal_function
3951	find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3952	{
3953	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3954	if (nrules == 0)
3955	{
3956	if (mbs_len == 1)
3957	{
3958	/* No valid character. Match it as a single byte character. */
3959	const unsigned char collseq = (const unsigned char )
3960	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3961	return collseq[mbs[0]];
3962	}
3963	return UINT_MAX;
3964	}
3965	else
3966	{
3967	int32_t idx;
3968	const unsigned char extra = (const unsigned char )
3969	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3970	int32_t extrasize = (const unsigned char *)
3971	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3972
3973	for (idx = 0; idx < extrasize;)
3974	{
3975	int mbs_cnt, found = 0;
3976	int32_t elem_mbs_len;
3977	/* Skip the name of collating element name. */
3978	idx = idx + extra[idx] + 1;
3979	elem_mbs_len = extra[idx++];
3980	if (mbs_len == elem_mbs_len)
3981	{
3982	for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3983	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3984	break;
3985	if (mbs_cnt == elem_mbs_len)
3986	/* Found the entry. */
3987	found = 1;
3988	}
3989	/* Skip the byte sequence of the collating element. */
3990	idx += elem_mbs_len;
3991	/* Adjust for the alignment. */
3992	idx = (idx + 3) & ~3;
3993	/* Skip the collation sequence value. */
3994	idx += sizeof (uint32_t);
3995	/* Skip the wide char sequence of the collating element. */
3996	idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3997	/* If we found the entry, return the sequence value. */
3998	if (found)
3999	return (uint32_t ) (extra + idx);
4000	/* Skip the collation sequence value. */
4001	idx += sizeof (uint32_t);
4002	}
4003	return UINT_MAX;
4004	}
4005	}
4006	# endif /* _LIBC */
4007	#endif /* RE_ENABLE_I18N */
4008
4009	/* Check whether the node accepts the byte which is IDX-th
4010	byte of the INPUT. */
4011
4012	static int
4013	internal_function
4014	check_node_accept (const re_match_context_t mctx, const re_token_t node,
4015	int idx)
4016	{
4017	unsigned char ch;
4018	ch = re_string_byte_at (&mctx->input, idx);
4019	switch (node->type)
4020	{
4021	case CHARACTER:
4022	if (node->opr.c != ch)
4023	return 0;
4024	break;
4025
4026	case SIMPLE_BRACKET:
4027	if (!bitset_contain (node->opr.sbcset, ch))
4028	return 0;
4029	break;
4030
4031	#ifdef RE_ENABLE_I18N
4032	case OP_UTF8_PERIOD:
4033	if (ch >= 0x80)
4034	return 0;
4035	/* FALLTHROUGH */
4036	#endif
4037	case OP_PERIOD:
4038	if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4039	\|\| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4040	return 0;
4041	break;
4042
4043	default:
4044	return 0;
4045	}
4046
4047	if (node->constraint)
4048	{
4049	/* The node has constraints. Check whether the current context
4050	satisfies the constraints. */
4051	unsigned int context = re_string_context_at (&mctx->input, idx,
4052	mctx->eflags);
4053	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4054	return 0;
4055	}
4056
4057	return 1;
4058	}
4059
4060	/* Extend the buffers, if the buffers have run out. */
4061
4062	static reg_errcode_t
4063	internal_function
4064	extend_buffers (re_match_context_t *mctx)
4065	{
4066	reg_errcode_t ret;
4067	re_string_t *pstr = &mctx->input;
4068
4069	/* Double the lengthes of the buffers. */
4070	ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4071	if (BE (ret != REG_NOERROR, 0))
4072	return ret;
4073
4074	if (mctx->state_log != NULL)
4075	{
4076	/* And double the length of state_log. */
4077	/* XXX We have no indication of the size of this buffer. If this
4078	allocation fail we have no indication that the state_log array
4079	does not have the right size. */
4080	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
4081	pstr->bufs_len + 1);
4082	if (BE (new_array == NULL, 0))
4083	return REG_ESPACE;
4084	mctx->state_log = new_array;
4085	}
4086
4087	/* Then reconstruct the buffers. */
4088	if (pstr->icase)
4089	{
4090	#ifdef RE_ENABLE_I18N
4091	if (pstr->mb_cur_max > 1)
4092	{
4093	ret = build_wcs_upper_buffer (pstr);
4094	if (BE (ret != REG_NOERROR, 0))
4095	return ret;
4096	}
4097	else
4098	#endif /* RE_ENABLE_I18N */
4099	build_upper_buffer (pstr);
4100	}
4101	else
4102	{
4103	#ifdef RE_ENABLE_I18N
4104	if (pstr->mb_cur_max > 1)
4105	build_wcs_buffer (pstr);
4106	else
4107	#endif /* RE_ENABLE_I18N */
4108	{
4109	if (pstr->trans != NULL)
4110	re_string_translate_buffer (pstr);
4111	}
4112	}
4113	return REG_NOERROR;
4114	}
4115
4116
4117
4118	/* Functions for matching context. */
4119
4120	/* Initialize MCTX. */
4121
4122	static reg_errcode_t
4123	internal_function
4124	match_ctx_init (re_match_context_t *mctx, int eflags, int n)
4125	{
4126	mctx->eflags = eflags;
4127	mctx->match_last = -1;
4128	if (n > 0)
4129	{
4130	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4131	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4132	if (BE (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL, 0))
4133	return REG_ESPACE;
4134	}
4135	/* Already zero-ed by the caller.
4136	else
4137	mctx->bkref_ents = NULL;
4138	mctx->nbkref_ents = 0;
4139	mctx->nsub_tops = 0; */
4140	mctx->abkref_ents = n;
4141	mctx->max_mb_elem_len = 1;
4142	mctx->asub_tops = n;
4143	return REG_NOERROR;
4144	}
4145
4146	/* Clean the entries which depend on the current input in MCTX.
4147	This function must be invoked when the matcher changes the start index
4148	of the input, or changes the input string. */
4149
4150	static void
4151	internal_function
4152	match_ctx_clean (re_match_context_t *mctx)
4153	{
4154	int st_idx;
4155	for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4156	{
4157	int sl_idx;
4158	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4159	for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4160	{
4161	re_sub_match_last_t *last = top->lasts[sl_idx];
4162	re_free (last->path.array);
4163	re_free (last);
4164	}
4165	re_free (top->lasts);
4166	if (top->path)
4167	{
4168	re_free (top->path->array);
4169	re_free (top->path);
4170	}
4171	free (top);
4172	}
4173
4174	mctx->nsub_tops = 0;
4175	mctx->nbkref_ents = 0;
4176	}
4177
4178	/* Free all the memory associated with MCTX. */
4179
4180	static void
4181	internal_function
4182	match_ctx_free (re_match_context_t *mctx)
4183	{
4184	/* First, free all the memory associated with MCTX->SUB_TOPS. */
4185	match_ctx_clean (mctx);
4186	re_free (mctx->sub_tops);
4187	re_free (mctx->bkref_ents);
4188	}
4189
4190	/* Add a new backreference entry to MCTX.
4191	Note that we assume that caller never call this function with duplicate
4192	entry, and call with STR_IDX which isn't smaller than any existing entry.
4193	*/
4194
4195	static reg_errcode_t
4196	internal_function
4197	match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4198	int to)
4199	{
4200	if (mctx->nbkref_ents >= mctx->abkref_ents)
4201	{
4202	struct re_backref_cache_entry* new_entry;
4203	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4204	mctx->abkref_ents * 2);
4205	if (BE (new_entry == NULL, 0))
4206	{
4207	re_free (mctx->bkref_ents);
4208	return REG_ESPACE;
4209	}
4210	mctx->bkref_ents = new_entry;
4211	memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4212	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4213	mctx->abkref_ents *= 2;
4214	}
4215	if (mctx->nbkref_ents > 0
4216	&& mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4217	mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4218
4219	mctx->bkref_ents[mctx->nbkref_ents].node = node;
4220	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4221	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4222	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4223
4224	/* This is a cache that saves negative results of check_dst_limits_calc_pos.
4225	If bit N is clear, means that this entry won't epsilon-transition to
4226	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4227	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4228	such node.
4229
4230	A backreference does not epsilon-transition unless it is empty, so set
4231	to all zeros if FROM != TO. */
4232	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4233	= (from == to ? ~0 : 0);
4234
4235	mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4236	if (mctx->max_mb_elem_len < to - from)
4237	mctx->max_mb_elem_len = to - from;
4238	return REG_NOERROR;
4239	}
4240
4241	/* Search for the first entry which has the same str_idx, or -1 if none is
4242	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4243
4244	static int
4245	internal_function
4246	search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
4247	{
4248	int left, right, mid, last;
4249	last = right = mctx->nbkref_ents;
4250	for (left = 0; left < right;)
4251	{
4252	mid = (left + right) / 2;
4253	if (mctx->bkref_ents[mid].str_idx < str_idx)
4254	left = mid + 1;
4255	else
4256	right = mid;
4257	}
4258	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4259	return left;
4260	else
4261	return -1;
4262	}
4263
4264	/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4265	at STR_IDX. */
4266
4267	static reg_errcode_t
4268	internal_function
4269	match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4270	{
4271	#ifdef DEBUG
4272	assert (mctx->sub_tops != NULL);
4273	assert (mctx->asub_tops > 0);
4274	#endif
4275	if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4276	{
4277	int new_asub_tops = mctx->asub_tops * 2;
4278	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4279	re_sub_match_top_t *,
4280	new_asub_tops);
4281	if (BE (new_array == NULL, 0))
4282	return REG_ESPACE;
4283	mctx->sub_tops = new_array;
4284	mctx->asub_tops = new_asub_tops;
4285	}
4286	mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4287	if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4288	return REG_ESPACE;
4289	mctx->sub_tops[mctx->nsub_tops]->node = node;
4290	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4291	return REG_NOERROR;
4292	}
4293
4294	/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4295	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4296
4297	static re_sub_match_last_t *
4298	internal_function
4299	match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4300	{
4301	re_sub_match_last_t *new_entry;
4302	if (BE (subtop->nlasts == subtop->alasts, 0))
4303	{
4304	int new_alasts = 2 * subtop->alasts + 1;
4305	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4306	re_sub_match_last_t *,
4307	new_alasts);
4308	if (BE (new_array == NULL, 0))
4309	return NULL;
4310	subtop->lasts = new_array;
4311	subtop->alasts = new_alasts;
4312	}
4313	new_entry = calloc (1, sizeof (re_sub_match_last_t));
4314	if (BE (new_entry != NULL, 1))
4315	{
4316	subtop->lasts[subtop->nlasts] = new_entry;
4317	new_entry->node = node;
4318	new_entry->str_idx = str_idx;
4319	++subtop->nlasts;
4320	}
4321	return new_entry;
4322	}
4323
4324	static void
4325	internal_function
4326	sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
4327	re_dfastate_t **limited_sts, int last_node, int last_str_idx)
4328	{
4329	sctx->sifted_states = sifted_sts;
4330	sctx->limited_states = limited_sts;
4331	sctx->last_node = last_node;
4332	sctx->last_str_idx = last_str_idx;
4333	re_node_set_init_empty (&sctx->limits);
4334	}

Note: See TracBrowser for help on using the repository browser.

source: kBuild/vendor/sed/current/lib/regexec.c

Download in other formats: