1 |
|
---|
2 | #define DPRINTF(p) /*nothing */
|
---|
3 | #define DPRINTF(p) printf p
|
---|
4 | #define GETCHAR(c, eptr) c = *eptr;
|
---|
5 | #define GETCHARINC(c, eptr) c = *eptr++;
|
---|
6 | #define class pcre_class
|
---|
7 | #define match_condassert 0x01 /* Called to check a condition assertion */
|
---|
8 | #define match_isgroup 0x02 /* Set if start of bracketed group */
|
---|
9 | #else
|
---|
10 | #endif
|
---|
11 | #ifdef DEBUG /* Sigh. Some compilers never learn. */
|
---|
12 | #ifdef DEBUG
|
---|
13 | #ifdef __cplusplus
|
---|
14 | #include "internal.h"
|
---|
15 | && length - re->max_match_size > start_offset)
|
---|
16 | ((*ecode++ == OP_BEG_WORD) ? prev_is_word : cur_is_word))
|
---|
17 | ((md->ctypes[*eptr] & ctype_word) != 0);
|
---|
18 | ((md->ctypes[eptr[-1]] & ctype_word) != 0);
|
---|
19 | (eptr == md->end_subject - 1 && *eptr != '\n'))
|
---|
20 | (i.e. keep it out of the loop). Also we can test that there are at least
|
---|
21 | (md->ctypes[*eptr++] & ctype_digit) != 0)
|
---|
22 | (md->ctypes[*eptr++] & ctype_digit) == 0)
|
---|
23 | (md->ctypes[*eptr++] & ctype_space) != 0)
|
---|
24 | (md->ctypes[*eptr++] & ctype_space) == 0)
|
---|
25 | (md->ctypes[*eptr++] & ctype_word) != 0)
|
---|
26 | (md->ctypes[*eptr++] & ctype_word) == 0)
|
---|
27 | (offsetcount - 2) * sizeof (int));
|
---|
28 | (offsets == NULL && offsetcount > 0))
|
---|
29 | (pcre_free) (match_block.offset_vector);
|
---|
30 | (pcre_free) (save);
|
---|
31 | (re->tables + fcc_offset)[req_char] : req_char;
|
---|
32 | * Match a back-reference *
|
---|
33 | * Execute a Regular Expression *
|
---|
34 | * Match from current position *
|
---|
35 | * Debugging function to print chars *
|
---|
36 | * Perl-Compatible Regular Expressions *
|
---|
37 | * Macros and tables for character handling *
|
---|
38 | *************************************************/
|
---|
39 | */
|
---|
40 | *iptr = -1;
|
---|
41 | *iptr++ = -1;
|
---|
42 | *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
|
---|
43 | *prev == OP_ONCE)
|
---|
44 | -----------------------------------------------------------------------------
|
---|
45 | -1 => failed to match
|
---|
46 | /*
|
---|
47 | /* "Once" brackets are like assertion brackets except that after a match,
|
---|
48 | /* ... else fall through */
|
---|
49 | /* Advance to a possible match for an initial string after study */
|
---|
50 | /* Allow compilation as C++ source code, should anybody want to do that. */
|
---|
51 | /* Always fail if not enough characters left */
|
---|
52 | /* An alternation is the end of a branch; scan along to find the end of the
|
---|
53 | /* Assert before internal newline if multiline, or before a terminating
|
---|
54 | /* Assertion brackets. Check the alternative branches in turn - the
|
---|
55 | /* At the start of a bracketed group, add the current subject pointer to the
|
---|
56 | /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
|
---|
57 | /* Caseful comparisons */
|
---|
58 | /* Change option settings */
|
---|
59 | /* Common code for all repeated single character type matches */
|
---|
60 | /* Common code for all repeated single-character matches. We can give
|
---|
61 | /* Compute the minimum number of offsets that we need to reset each time. Doing
|
---|
62 | /* Conditional group: compilation checked that there are no more than
|
---|
63 | /* Continue as from after the assertion, updating the offsets high water
|
---|
64 | /* Continue from after the assertion, updating the offsets high water
|
---|
65 | /* Control never gets here */
|
---|
66 | /* Control never reaches here */
|
---|
67 | /* Copy the offset information from temporary store if necessary */
|
---|
68 | /* Do a single test if no case difference is set up */
|
---|
69 | /* Do not stick any code in here without much thought; it is assumed
|
---|
70 | /* End of a group, repeated or non-repeating. If we are at the end of
|
---|
71 | /* End of subject assertion (\z) */
|
---|
72 | /* End of subject or ending \n assertion (\Z) */
|
---|
73 | /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
|
---|
74 | /* First, ensure the minimum number of matches are present. */
|
---|
75 | /* First, ensure the minimum number of matches are present. Use inline
|
---|
76 | /* First, ensure the minimum number of matches are present. We get back
|
---|
77 | /* Flag bits for the match() function */
|
---|
78 | /* For a non-repeating ket, just continue at this level. This also
|
---|
79 | /* For anchored or unanchored matches, there may be a "last known required
|
---|
80 | /* For extended extraction brackets (large number), we have to fish out
|
---|
81 | /* For extended extraction brackets (large number), we have to fish out the
|
---|
82 | /* For matches anchored to the end of the pattern, we can often avoid
|
---|
83 | /* If a back reference hasn't been set, the length that is passed is greater
|
---|
84 | /* If checking an assertion for a condition, return TRUE. */
|
---|
85 | /* If hit the end of the group (which could be repeated), fail */
|
---|
86 | /* If max == min we can continue with the main loop without the
|
---|
87 | /* If maximizing it is worth using inline code for speed, doing the type
|
---|
88 | /* If maximizing, find the longest possible run, then work backwards. */
|
---|
89 | /* If maximizing, find the longest string and work backwards */
|
---|
90 | /* If min = max, continue at the same level without recursing */
|
---|
91 | /* If min = max, continue at the same level without recursion.
|
---|
92 | /* If minimizing, keep testing the rest of the expression and advancing
|
---|
93 | /* If minimizing, keep trying and advancing the pointer */
|
---|
94 | /* If minimizing, we have to test the rest of the pattern before each
|
---|
95 | /* If req_char is set, we know that that character must appear in the subject
|
---|
96 | /* If the expression has got more back references than the offsets supplied can
|
---|
97 | /* If the length of the reference is zero, just continue with the
|
---|
98 | /* If the reference is unset, set the length to be longer than the amount
|
---|
99 | /* If we can't find the required character, break the matching loop */
|
---|
100 | /* If we have found the required character, save the point where we
|
---|
101 | /* In all other cases except a conditional group we have to check the
|
---|
102 | /* In case the recursion has set more capturing values, save the final
|
---|
103 | /* Include the internals header, which itself includes Standard C headers plus
|
---|
104 | /* Insufficient room for saving captured contents */
|
---|
105 | /* Loop for handling unanchored repeated matching attempts; for anchored regexs
|
---|
106 | /* Match a back reference, possibly repeatedly. Look past the end of the
|
---|
107 | /* Match a character class, possibly repeatedly. Look past the end of the
|
---|
108 | /* Match a negated single character */
|
---|
109 | /* Match a negated single character repeatedly. This is almost a repeat of
|
---|
110 | /* Match a run of characters */
|
---|
111 | /* Match a single character repeatedly; different opcodes share code. */
|
---|
112 | /* Match a single character type repeatedly; several different opcodes
|
---|
113 | /* Match a single character type; inline for speed */
|
---|
114 | /* Min and max values for the common repeats; for the maxima, 0 => infinity */
|
---|
115 | /* Move the subject pointer back. This occurs only at the start of
|
---|
116 | /* Negative assertion: all branches must fail to match */
|
---|
117 | /* Now start processing the operations. */
|
---|
118 | /* OP_KETRMAX */
|
---|
119 | /* On entry ecode points to the first opcode, and eptr to the first character
|
---|
120 | /* Opening capturing bracket. If there is space in the offset vector, save
|
---|
121 | /* Or to a non-unique first char after study */
|
---|
122 | /* Or to a unique first char if possible */
|
---|
123 | /* Or to just after \n for a multiline match if possible */
|
---|
124 | /* Other types of node can be handled by a switch */
|
---|
125 | /* Otherwise test for either case */
|
---|
126 | /* Print a sequence of chars in printable format, stopping at the end of the
|
---|
127 | /* Recursion matches the current regex, nested. If there are any capturing
|
---|
128 | /* Reset the maximum number of extractions we might see. */
|
---|
129 | /* Reset the value of the ims flags, in case they got changed during
|
---|
130 | /* Reset the working variable associated with each extraction. These should
|
---|
131 | /* Separate the caselesss case for speed */
|
---|
132 | /* Set up for repetition, or handle the non-repeated case */
|
---|
133 | /* Set up the first character to match, if available. The first_char value is
|
---|
134 | /* Skip over conditional reference data or large extraction number data if
|
---|
135 | /* Start of subject assertion */
|
---|
136 | /* Start of subject unless notbol, or after internal newline if multiline */
|
---|
137 | /* Structure for building a chain of data that actually lives on the
|
---|
138 | /* The code is duplicated for the caseless and caseful cases, for speed,
|
---|
139 | /* The condition is an assertion. Call match() to evaluate it - setting
|
---|
140 | /* The ims options can vary during the matching as a result of the presence
|
---|
141 | /* The repeating kets try the rest of the pattern or restart from the
|
---|
142 | /* There's been some horrible disaster. */
|
---|
143 | /* This "while" is the end of the "do" above */
|
---|
144 | /* This function applies a compiled re to a subject string and picks out
|
---|
145 | /* Use a macro for debugging printing, 'cause that limits the use of #ifdef
|
---|
146 | /* We don't need to repeat the search if we haven't yet reached the
|
---|
147 | /* When a match occurs, substrings will be set for all internal extractions;
|
---|
148 | /* Word boundary assertions */
|
---|
149 | /*************************************************
|
---|
150 | 1. This software is distributed in the hope that it will be useful,
|
---|
151 | 2. The origin of this software must not be misrepresented, either by
|
---|
152 | 3. Altered versions must be plainly marked as such, and must not be
|
---|
153 | 4. If PCRE is embedded in any software that is released under the GNU
|
---|
154 | 5.005. If there is an options reset, it will get obeyed in the normal
|
---|
155 | 6 : 3 + (ecode[1] << 8) + ecode[2]),
|
---|
156 | < -1 => some kind of unexpected problem
|
---|
157 | = 0 => success, but offsets is not big enough
|
---|
158 | Arguments:
|
---|
159 | BOOL anchored;
|
---|
160 | BOOL cur_is_word = (eptr < md->end_subject) &&
|
---|
161 | BOOL is_subject;
|
---|
162 | BOOL minimize = FALSE;
|
---|
163 | BOOL prev_is_word = (eptr != md->start_subject) &&
|
---|
164 | BOOL rc;
|
---|
165 | BOOL startline;
|
---|
166 | BOOL using_temporary_offsets = FALSE;
|
---|
167 | Copyright (c) 1997-2000 University of Cambridge
|
---|
168 | DPRINTF ((">>>> returning %d\n", match_block.errorcode));
|
---|
169 | DPRINTF ((">>>> returning %d\n", rc));
|
---|
170 | DPRINTF (("Copied offsets from temporary memory\n"));
|
---|
171 | DPRINTF (("Freeing temporary memory\n"));
|
---|
172 | DPRINTF (("Got memory to hold back references\n"));
|
---|
173 | DPRINTF (("Unknown opcode %d\n", *ecode));
|
---|
174 | DPRINTF (("bracket %d failed\n", number));
|
---|
175 | DPRINTF (("bracket 0 failed\n"));
|
---|
176 | DPRINTF (("ims reset to %02lx\n", ims));
|
---|
177 | DPRINTF (("ims set to %02lx at group repeat\n", ims));
|
---|
178 | DPRINTF (("ims set to %02lx\n", ims));
|
---|
179 | DPRINTF (("matching %c{%d,%d} against subject %.*s\n", c, min, max,
|
---|
180 | DPRINTF (("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
|
---|
181 | DPRINTF (("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
|
---|
182 | DPRINTF (("start bracket 0\n"));
|
---|
183 | GETCHAR (c, eptr) /* Get character */
|
---|
184 | GETCHARINC (c, eptr) /* Get character; increment eptr */
|
---|
185 | General Purpose Licence (GPL), then the terms of that licence shall
|
---|
186 | However, if the referenced string is the empty string, always treat
|
---|
187 | If the bracket fails to match, we need to restore this value and also the
|
---|
188 | If there isn't enough space in the offset vector, treat this as if it were a
|
---|
189 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
---|
190 | Otherwise, we can use the vector supplied, rounding down its size to a multiple
|
---|
191 | Permission is granted to anyone to use this software for any purpose on any
|
---|
192 | REPEATCHAR:
|
---|
193 | REPEATNOTCHAR:
|
---|
194 | REPEATTYPE:
|
---|
195 | Returns: > 0 => success; value is the number of elements filled in
|
---|
196 | Returns: TRUE if matched
|
---|
197 | Returns: TRUE if matched
|
---|
198 | Returns: nothing
|
---|
199 | They are not both allowed to be zero. */
|
---|
200 | This is a library of functions to support regular expressions whose syntax
|
---|
201 | This is the forcible breaking of infinite loops as implemented in Perl
|
---|
202 | Writing separate code makes it go faster, as does using an autoincrement and
|
---|
203 | Written by: Philip Hazel <ph10@cam.ac.uk>
|
---|
204 | a move back into the brackets. Check the alternative branches in turn - the
|
---|
205 | address of eptr, so that eptr can be a register variable. */
|
---|
206 | an assertion "group", stop matching and return TRUE, but record the
|
---|
207 | an empty string - recursion will then try other alternatives, if any. */
|
---|
208 | an error. Save the top 15 values on the stack, and accept that the rest
|
---|
209 | an unanchored pattern, of course. If there's no first char and the pattern was
|
---|
210 | analyzing most of the pattern. length > re->max_match_size is
|
---|
211 | anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
|
---|
212 | and advance one byte in the pattern code. */
|
---|
213 | and reinstate them after the recursion. However, we don't know how many
|
---|
214 | and semantics are as close as possible to those of the Perl 5 language. See
|
---|
215 | and the required character in fact is caseful. */
|
---|
216 | at run time, so we have to test for anchoring. The first char may be unset for
|
---|
217 | avoid duplicate testing (which takes significant time). This covers the vast
|
---|
218 | backing off on a match. */
|
---|
219 | bmtable = extra->data.bmtable;
|
---|
220 | both cases of the character. Otherwise set the two values the same, which will
|
---|
221 | bracketed group and go to there. */
|
---|
222 | brackets - for testing for empty matches
|
---|
223 | brackets started but not finished, we have to save their starting points
|
---|
224 | break;
|
---|
225 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
226 | c != md->lcc[*eptr++])
|
---|
227 | c = *ecode++ - OP_CRSTAR;
|
---|
228 | c = *ecode++ - OP_NOTSTAR;
|
---|
229 | c = *ecode++ - OP_STAR;
|
---|
230 | c = *ecode++ - OP_TYPESTAR;
|
---|
231 | c = *ecode++;
|
---|
232 | c = *eptr++;
|
---|
233 | c = 15;
|
---|
234 | c = max - min;
|
---|
235 | c = md->end_subject - eptr;
|
---|
236 | c = md->lcc[c];
|
---|
237 | c = md->offset_max;
|
---|
238 | c == md->lcc[*eptr++])
|
---|
239 | can't just fail here, because of the possibility of quantifiers with zero
|
---|
240 | case OP_ALT:
|
---|
241 | case OP_ANY:
|
---|
242 | case OP_ASSERT:
|
---|
243 | case OP_ASSERTBACK:
|
---|
244 | case OP_ASSERTBACK_NOT:
|
---|
245 | case OP_ASSERT_NOT:
|
---|
246 | case OP_BEG_WORD:
|
---|
247 | case OP_BRA: /* Non-capturing bracket: optimized */
|
---|
248 | case OP_BRAMINZERO:
|
---|
249 | case OP_BRANUMBER:
|
---|
250 | case OP_BRAZERO:
|
---|
251 | case OP_CHARS:
|
---|
252 | case OP_CIRC:
|
---|
253 | case OP_CLASS:
|
---|
254 | case OP_COND:
|
---|
255 | case OP_CREF:
|
---|
256 | case OP_CRMINPLUS:
|
---|
257 | case OP_CRMINQUERY:
|
---|
258 | case OP_CRMINRANGE:
|
---|
259 | case OP_CRMINSTAR:
|
---|
260 | case OP_CRPLUS:
|
---|
261 | case OP_CRQUERY:
|
---|
262 | case OP_CRRANGE:
|
---|
263 | case OP_CRSTAR:
|
---|
264 | case OP_DIGIT:
|
---|
265 | case OP_DOLL:
|
---|
266 | case OP_END:
|
---|
267 | case OP_END_WORD:
|
---|
268 | case OP_EOD:
|
---|
269 | case OP_EODN:
|
---|
270 | case OP_EXACT:
|
---|
271 | case OP_KET:
|
---|
272 | case OP_KETRMAX:
|
---|
273 | case OP_KETRMIN:
|
---|
274 | case OP_MINPLUS:
|
---|
275 | case OP_MINQUERY:
|
---|
276 | case OP_MINSTAR:
|
---|
277 | case OP_MINUPTO:
|
---|
278 | case OP_NOT:
|
---|
279 | case OP_NOTEXACT:
|
---|
280 | case OP_NOTMINPLUS:
|
---|
281 | case OP_NOTMINQUERY:
|
---|
282 | case OP_NOTMINSTAR:
|
---|
283 | case OP_NOTMINUPTO:
|
---|
284 | case OP_NOTPLUS:
|
---|
285 | case OP_NOTQUERY:
|
---|
286 | case OP_NOTSTAR:
|
---|
287 | case OP_NOTUPTO:
|
---|
288 | case OP_NOT_DIGIT:
|
---|
289 | case OP_NOT_WHITESPACE:
|
---|
290 | case OP_NOT_WORDCHAR:
|
---|
291 | case OP_NOT_WORD_BOUNDARY:
|
---|
292 | case OP_ONCE:
|
---|
293 | case OP_OPT:
|
---|
294 | case OP_PLUS:
|
---|
295 | case OP_QUERY:
|
---|
296 | case OP_RECURSE:
|
---|
297 | case OP_REF:
|
---|
298 | case OP_REVERSE:
|
---|
299 | case OP_SOD:
|
---|
300 | case OP_STAR:
|
---|
301 | case OP_TYPEEXACT:
|
---|
302 | case OP_TYPEMINPLUS:
|
---|
303 | case OP_TYPEMINQUERY:
|
---|
304 | case OP_TYPEMINSTAR:
|
---|
305 | case OP_TYPEMINUPTO:
|
---|
306 | case OP_TYPEPLUS:
|
---|
307 | case OP_TYPEQUERY:
|
---|
308 | case OP_TYPESTAR:
|
---|
309 | case OP_TYPEUPTO:
|
---|
310 | case OP_UPTO:
|
---|
311 | case OP_WHITESPACE:
|
---|
312 | case OP_WORDCHAR:
|
---|
313 | case OP_WORD_BOUNDARY:
|
---|
314 | case matching may be when this character is hit, so test for it in both its
|
---|
315 | caselessly, or if there are any changes of this flag within the regex, set up
|
---|
316 | cases if necessary. However, the different cased versions will not be set up
|
---|
317 | character" set. If the PCRE_CASELESS is set, implying that the match starts
|
---|
318 | characters and work backwards. */
|
---|
319 | code for maximizing the speed, and do the type test once at the start
|
---|
320 | code to character type repeats - written out again for speed. */
|
---|
321 | commoning these up that doesn't require a test of the positive/negative
|
---|
322 | computer system, and to redistribute it freely, subject to the following
|
---|
323 | const char *subject;
|
---|
324 | const pcre *re;
|
---|
325 | const pcre_extra *extra;
|
---|
326 | const uschar *bmtable = NULL;
|
---|
327 | const uschar *data = ecode + 1; /* Save for matching */
|
---|
328 | const uschar *end_subject;
|
---|
329 | const uschar *next = ecode + 1;
|
---|
330 | const uschar *p = md->start_subject + md->offset_vector[offset];
|
---|
331 | const uschar *p;
|
---|
332 | const uschar *pp = eptr;
|
---|
333 | const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
|
---|
334 | const uschar *prev = ecode;
|
---|
335 | const uschar *req_char_ptr = start_match - 1;
|
---|
336 | const uschar *saved_eptr = eptr;
|
---|
337 | const uschar *saved_eptr = eptrb->saved_eptr;
|
---|
338 | const uschar *saved_eptr;
|
---|
339 | const uschar *start_bits = NULL;
|
---|
340 | const uschar *start_match = (const uschar *) subject + start_offset;
|
---|
341 | continue; /* With the main loop */
|
---|
342 | continue;
|
---|
343 | course of events. */
|
---|
344 | ctype = *ecode++; /* Code for the character type */
|
---|
345 | cur_is_word == prev_is_word : cur_is_word != prev_is_word)
|
---|
346 | current high water mark for use by positive assertions. Do this also
|
---|
347 | default: /* No repeat follows */
|
---|
348 | default:
|
---|
349 | do
|
---|
350 | each branch of a lookbehind assertion. If we are too close to the start to
|
---|
351 | each substring: the offsets to the start and end of the substring.
|
---|
352 | ecode position in code
|
---|
353 | ecode + ((offset < offset_top && md->offset_vector[offset] >= 0) ?
|
---|
354 | ecode += (ecode[1] << 8) + ecode[2];
|
---|
355 | ecode += 2;
|
---|
356 | ecode += 3 + (ecode[4] << 8) + ecode[5];
|
---|
357 | ecode += 33; /* Advance past the item */
|
---|
358 | ecode += 3; /* Advance past the item */
|
---|
359 | ecode += 3;
|
---|
360 | ecode += 5;
|
---|
361 | ecode = next + 3;
|
---|
362 | ecode++;
|
---|
363 | else
|
---|
364 | else if ((extra->options & PCRE_STUDY_BM) != 0)
|
---|
365 | else if (first_char >= 0)
|
---|
366 | else if (start_bits != NULL)
|
---|
367 | else if (startline)
|
---|
368 | encountered */
|
---|
369 | end_subject = match_block.end_subject;
|
---|
370 | eptr pointer in subject
|
---|
371 | eptr points into the subject
|
---|
372 | eptr += c;
|
---|
373 | eptr += length;
|
---|
374 | eptr += min;
|
---|
375 | eptr -= (ecode[1] << 8) + ecode[2];
|
---|
376 | eptr -= length;
|
---|
377 | eptr = md->end_match_ptr;
|
---|
378 | eptr++;
|
---|
379 | eptrb pointer to chain of blocks containing eptr at start of
|
---|
380 | eptrb = &newptrb;
|
---|
381 | eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */
|
---|
382 | eptrblock *eptrb;
|
---|
383 | eptrblock newptrb;
|
---|
384 | eptrblock;
|
---|
385 | exactly what going to the ket would do. */
|
---|
386 | explicit claim or by omission.
|
---|
387 | external_extra points to "hints" from pcre_study() or is NULL
|
---|
388 | external_re points to the compiled expression
|
---|
389 | extraction by setting the offsets and bumping the high water mark. */
|
---|
390 | first_char = match_block.lcc[first_char];
|
---|
391 | first_char = re->first_char;
|
---|
392 | flags can contain
|
---|
393 | for (;;)
|
---|
394 | for (i = 1; i <= c; i++)
|
---|
395 | for (i = 1; i <= min; i++)
|
---|
396 | for (i = min; i < max; i++)
|
---|
397 | for (i = min;; i++)
|
---|
398 | for the "once" (not-backup up) groups. */
|
---|
399 | for the match to succeed. If the first character is set, req_char must be
|
---|
400 | found it, so that we don't search again next time round the loop if
|
---|
401 | from a previous iteration of this group, and be referred to by a reference
|
---|
402 | goto REPEATCHAR;
|
---|
403 | goto REPEATNOTCHAR;
|
---|
404 | goto REPEATTYPE;
|
---|
405 | group number back at the start and if necessary complete handling an
|
---|
406 | happens for a repeating ket if no characters were matched in the group.
|
---|
407 | here; that is handled in the code for KET. */
|
---|
408 | hold, we get a temporary bit of working store to use during the matching.
|
---|
409 | i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
|
---|
410 | if (!anchored)
|
---|
411 | if (!match (start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
|
---|
412 | if (!match_ref (offset, eptr, length, md, ims))
|
---|
413 | if (!md->endonly)
|
---|
414 | if (!rc)
|
---|
415 | if (!startline && extra != NULL)
|
---|
416 | if ((*ecode++ == OP_WORD_BOUNDARY) ?
|
---|
417 | if ((data[c / 8] & (1 << (c & 7))) != 0)
|
---|
418 | if ((data[c / 8] & (1 << (c & 7))) == 0)
|
---|
419 | if ((extra->options & PCRE_STUDY_MAPPED) != 0)
|
---|
420 | if ((flags & match_condassert) != 0)
|
---|
421 | if ((flags & match_isgroup) != 0)
|
---|
422 | if ((ims & PCRE_CASELESS) != 0)
|
---|
423 | if ((ims & PCRE_DOTALL) == 0 && c == '\n')
|
---|
424 | if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
|
---|
425 | if ((ims & PCRE_DOTALL) == 0)
|
---|
426 | if ((ims & PCRE_MULTILINE) != 0)
|
---|
427 | if ((md->ctypes[*eptr++] & ctype_digit) != 0)
|
---|
428 | if ((md->ctypes[*eptr++] & ctype_digit) == 0)
|
---|
429 | if ((md->ctypes[*eptr++] & ctype_space) != 0)
|
---|
430 | if ((md->ctypes[*eptr++] & ctype_space) == 0)
|
---|
431 | if ((md->ctypes[*eptr++] & ctype_word) != 0)
|
---|
432 | if ((md->ctypes[*eptr++] & ctype_word) == 0)
|
---|
433 | if ((md->ctypes[c] & ctype_digit) != 0)
|
---|
434 | if ((md->ctypes[c] & ctype_digit) == 0)
|
---|
435 | if ((md->ctypes[c] & ctype_space) != 0)
|
---|
436 | if ((md->ctypes[c] & ctype_space) == 0)
|
---|
437 | if ((md->ctypes[c] & ctype_word) != 0)
|
---|
438 | if ((md->ctypes[c] & ctype_word) == 0)
|
---|
439 | if ((options & ~PUBLIC_EXEC_OPTIONS) != 0)
|
---|
440 | if ((re->options & PCRE_FIRSTSET) != 0)
|
---|
441 | if ((re->options & PCRE_REQCHSET) != 0)
|
---|
442 | if ((start_bits[c / 8] & (1 << (c & 7))) == 0)
|
---|
443 | if (*ecode != OP_ONCE && *ecode != OP_ALT)
|
---|
444 | if (*ecode == OP_KET || eptr == saved_eptr)
|
---|
445 | if (*ecode == OP_KET)
|
---|
446 | if (*ecode == OP_KETRMIN)
|
---|
447 | if (*ecode++ != *eptr++)
|
---|
448 | if (*ecode++ == *eptr++)
|
---|
449 | if (*eptr != '\n')
|
---|
450 | if (*eptr++ == '\n')
|
---|
451 | if (*p++ != *eptr++)
|
---|
452 | if (*p++ == req_char)
|
---|
453 | if (*prev != OP_COND)
|
---|
454 | if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
|
---|
455 | if (bmtable != NULL)
|
---|
456 | if (bmtable[*start_match])
|
---|
457 | if (c != *eptr++)
|
---|
458 | if (c != md->lcc[*eptr++])
|
---|
459 | if (c < 16)
|
---|
460 | if (c == *eptr++)
|
---|
461 | if (c == md->lcc[*eptr++])
|
---|
462 | if (c > md->end_subject - eptr)
|
---|
463 | if (cur_is_word == prev_is_word ||
|
---|
464 | if (ecode[3] == OP_CREF) /* Condition is extraction test */
|
---|
465 | if (ecode[3] == OP_OPT)
|
---|
466 | if (eptr != md->start_subject && eptr[-1] != '\n')
|
---|
467 | if (eptr != md->start_subject)
|
---|
468 | if (eptr < md->end_subject - 1 ||
|
---|
469 | if (eptr < md->end_subject)
|
---|
470 | if (eptr < md->start_subject)
|
---|
471 | if (eptr >= md->end_subject ||
|
---|
472 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
|
---|
473 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
|
---|
474 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
|
---|
475 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
|
---|
476 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
|
---|
477 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
|
---|
478 | if (eptr >= md->end_subject || *eptr == '\n')
|
---|
479 | if (eptr >= md->end_subject || c != *eptr)
|
---|
480 | if (eptr >= md->end_subject || c != md->lcc[*eptr])
|
---|
481 | if (eptr >= md->end_subject || c == *eptr)
|
---|
482 | if (eptr >= md->end_subject || c == md->lcc[*eptr])
|
---|
483 | if (eptr >= md->end_subject)
|
---|
484 | if (eptr++ >= md->end_subject)
|
---|
485 | if (i >= max || !match_ref (offset, eptr, length, md, ims))
|
---|
486 | if (i >= max || eptr >= md->end_subject ||
|
---|
487 | if (i >= max || eptr >= md->end_subject || c != *eptr++)
|
---|
488 | if (i >= max || eptr >= md->end_subject || c == *eptr++)
|
---|
489 | if (i >= max || eptr >= md->end_subject)
|
---|
490 | if (is_subject && length > md->end_subject - p)
|
---|
491 | if (isprint (c = *(p++)))
|
---|
492 | if (length == 0)
|
---|
493 | if (length > md->end_subject - eptr)
|
---|
494 | if (match (eptr, ecode + 3, offset_top, md, ims, NULL,
|
---|
495 | if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup))
|
---|
496 | if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) ||
|
---|
497 | if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
|
---|
498 | if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
|
---|
499 | if (match (eptr, next + 3, offset_top, md, ims, eptrb, match_isgroup))
|
---|
500 | if (match (eptr, next, offset_top, md, ims, eptrb, match_isgroup))
|
---|
501 | if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
|
---|
502 | if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
|
---|
503 | if (match_block.end_offset_top > offsetcount)
|
---|
504 | if (match_block.offset_vector != NULL)
|
---|
505 | if (match_block.offset_vector == NULL)
|
---|
506 | if (max == 0)
|
---|
507 | if (md->lcc[*ecode++] != md->lcc[*eptr++])
|
---|
508 | if (md->lcc[*ecode++] == md->lcc[*eptr++])
|
---|
509 | if (md->lcc[*p++] != md->lcc[*eptr++])
|
---|
510 | if (md->notbol && eptr == md->start_subject)
|
---|
511 | if (md->notempty && eptr == md->start_match)
|
---|
512 | if (md->noteol)
|
---|
513 | if (min == max)
|
---|
514 | if (min > 0)
|
---|
515 | if (min > md->end_subject - eptr)
|
---|
516 | if (minimize)
|
---|
517 | if (number > 0)
|
---|
518 | if (number > EXTRACT_BASIC_MAX)
|
---|
519 | if (offset < md->offset_max)
|
---|
520 | if (offset >= md->offset_max)
|
---|
521 | if (offset_top <= offset)
|
---|
522 | if (offsetcount < 2)
|
---|
523 | if (offsetcount >= 4)
|
---|
524 | if (op > OP_BRA)
|
---|
525 | if (p > req_char_ptr)
|
---|
526 | if (p >= end_subject)
|
---|
527 | if (pp == req_char || pp == req_char2)
|
---|
528 | if (re == NULL || subject == NULL ||
|
---|
529 | if (re->magic_number != MAGIC_NUMBER)
|
---|
530 | if (re->max_match_size >= 0
|
---|
531 | if (re->top_backref > 0 && re->top_backref >= ocount / 3)
|
---|
532 | if (req_char == req_char2)
|
---|
533 | if (req_char >= 0)
|
---|
534 | if (resetcount > offsetcount)
|
---|
535 | if (save != stacksave)
|
---|
536 | if (save == NULL)
|
---|
537 | if (skipped_chars)
|
---|
538 | if (start_match + bmtable[256] > end_subject)
|
---|
539 | if (start_match > match_block.start_subject + start_offset)
|
---|
540 | if (using_temporary_offsets)
|
---|
541 | if certain parts of the pattern were not used. */
|
---|
542 | if the malloc fails ... there is no way of returning to the top level with
|
---|
543 | implied in the second condition, because start_offset > 0. */
|
---|
544 | ims current /i, /m, and /s options
|
---|
545 | ims the ims flags
|
---|
546 | ims = (ims & ~PCRE_IMS) | ecode[4];
|
---|
547 | ims = ecode[1];
|
---|
548 | ims = original_ims;
|
---|
549 | ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL);
|
---|
550 | in the pattern. */
|
---|
551 | in the subject string, while eptrb holds the value of eptr at the start of the
|
---|
552 | initialize them to avoid reading uninitialized locations. */
|
---|
553 | inline, and there are *still* stupid compilers about that don't like indented
|
---|
554 | inside the group.
|
---|
555 | int
|
---|
556 | int *offsets;
|
---|
557 | int *save;
|
---|
558 | int c;
|
---|
559 | int first_char = -1;
|
---|
560 | int flags;
|
---|
561 | int length;
|
---|
562 | int min, max, ctype;
|
---|
563 | int number = *prev - OP_BRA;
|
---|
564 | int number = op - OP_BRA;
|
---|
565 | int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled reference number */
|
---|
566 | int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled reference number */
|
---|
567 | int offset;
|
---|
568 | int offset_top;
|
---|
569 | int offsetcount;
|
---|
570 | int op = (int) *ecode;
|
---|
571 | int options;
|
---|
572 | int rc;
|
---|
573 | int req_char = -1;
|
---|
574 | int req_char2 = -1;
|
---|
575 | int resetcount, ocount;
|
---|
576 | int save_offset1 = md->offset_vector[offset];
|
---|
577 | int save_offset2 = md->offset_vector[offset + 1];
|
---|
578 | int save_offset3 = md->offset_vector[md->offset_end - number];
|
---|
579 | int skipped_chars = 0;
|
---|
580 | int stacksave[15];
|
---|
581 | int start_offset;
|
---|
582 | is a bit large to put on the stack, but using malloc for small numbers
|
---|
583 | is_subject TRUE if printing from within md->start_subject
|
---|
584 | it as matched, any number of times (otherwise there could be infinite
|
---|
585 | item to see if there is repeat information following. The code is similar
|
---|
586 | item to see if there is repeat information following. Then obey similar
|
---|
587 | last bracketed group - used for breaking infinite loops matching zero-length
|
---|
588 | later in the subject; otherwise the test starts at the match point. This
|
---|
589 | length length of subject string (may contain binary zeros)
|
---|
590 | length length to be matched
|
---|
591 | length number to print
|
---|
592 | length = (offset >= offset_top || md->offset_vector[offset] < 0) ?
|
---|
593 | length = md->end_subject - p;
|
---|
594 | level without recursing. Otherwise, if minimizing, keep trying the rest of
|
---|
595 | loop. */
|
---|
596 | loops). */
|
---|
597 | main loop. */
|
---|
598 | majority of cases. It will be suboptimal when the case flag changes in a regex
|
---|
599 | mark, since extracts may have been taken during the assertion. */
|
---|
600 | mark, since extracts may have been taken. */
|
---|
601 | match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0))
|
---|
602 | match (eptr, ecode, offset_top, md, ims, eptrb, flags)
|
---|
603 | match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
|
---|
604 | match_block.ctypes = re->tables + ctypes_offset;
|
---|
605 | match_block.end_subject = match_block.start_subject + length;
|
---|
606 | match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
|
---|
607 | match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
|
---|
608 | match_block.errorcode == PCRE_ERROR_NOMATCH &&
|
---|
609 | match_block.lcc = re->tables + lcc_offset;
|
---|
610 | match_block.lcc[*start_match] != first_char)
|
---|
611 | match_block.notbol = (options & PCRE_NOTBOL) != 0;
|
---|
612 | match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
|
---|
613 | match_block.noteol = (options & PCRE_NOTEOL) != 0;
|
---|
614 | match_block.offset_end = ocount;
|
---|
615 | match_block.offset_max = (2 * ocount) / 3;
|
---|
616 | match_block.offset_overflow = FALSE;
|
---|
617 | match_block.offset_overflow = TRUE;
|
---|
618 | match_block.offset_vector = (int *) (pcre_malloc) (ocount * sizeof (int));
|
---|
619 | match_block.offset_vector = offsets;
|
---|
620 | match_block.start_match = start_match;
|
---|
621 | match_block.start_pattern = re->code;
|
---|
622 | match_block.start_subject = (const uschar *) subject;
|
---|
623 | match_condassert - this is an assertion condition
|
---|
624 | match_condassert | match_isgroup))
|
---|
625 | match_data *md;
|
---|
626 | match_data match_block;
|
---|
627 | match_isgroup - this is the start of a bracketed group
|
---|
628 | match_isgroup);
|
---|
629 | match_ref (offset, eptr, length, md, ims)
|
---|
630 | matches, we carry on as at the end of a normal bracket, leaving the subject
|
---|
631 | matching won't pass the KET for an assertion. If any one branch matches,
|
---|
632 | matching won't pass the KET for this kind of subpattern. If any one branch
|
---|
633 | max = (ecode[1] << 8) + ecode[2];
|
---|
634 | max = (ecode[3] << 8) + ecode[4];
|
---|
635 | max = INT_MAX;
|
---|
636 | max = rep_max[c]; /* zero for max => infinity */
|
---|
637 | max, eptr));
|
---|
638 | maximum. Alternatively, if maximizing, find the maximum number of
|
---|
639 | may be wrong. */
|
---|
640 | md pointer to "static" info for the match
|
---|
641 | md pointer to matching data block, if is_subject is TRUE
|
---|
642 | md points to match data block
|
---|
643 | md->end_match_ptr = eptr; /* For ONCE */
|
---|
644 | md->end_match_ptr = eptr; /* Record where we ended */
|
---|
645 | md->end_offset_top = offset_top; /* and how many extracts were taken */
|
---|
646 | md->end_offset_top = offset_top;
|
---|
647 | md->end_subject - eptr + 1 :
|
---|
648 | md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
|
---|
649 | md->offset_overflow = TRUE;
|
---|
650 | md->offset_vector[md->offset_end - i] = save[i];
|
---|
651 | md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
|
---|
652 | md->offset_vector[md->offset_end - number] = save_offset3;
|
---|
653 | md->offset_vector[md->offset_end - number];
|
---|
654 | md->offset_vector[offset + 1] - md->offset_vector[offset];
|
---|
655 | md->offset_vector[offset + 1] = eptr - md->start_subject;
|
---|
656 | md->offset_vector[offset + 1] = save_offset2;
|
---|
657 | md->offset_vector[offset] =
|
---|
658 | md->offset_vector[offset] = save_offset1;
|
---|
659 | memcpy (offsets + 2, match_block.offset_vector + 2,
|
---|
660 | min = (ecode[1] << 8) + ecode[2];
|
---|
661 | min = 0;
|
---|
662 | min = max = (ecode[1] << 8) + ecode[2];
|
---|
663 | min = max = 1;
|
---|
664 | min = rep_min[c]; /* Pick up values from tables; */
|
---|
665 | minima. */
|
---|
666 | minimize = (*ecode == OP_CRMINRANGE);
|
---|
667 | minimize = (c & 1) != 0;
|
---|
668 | minimize = *ecode == OP_MINUPTO;
|
---|
669 | minimize = *ecode == OP_NOTMINUPTO;
|
---|
670 | minimize = *ecode == OP_TYPEMINUPTO;
|
---|
671 | minimize = TRUE;
|
---|
672 | minimum number of matches are present. If min = max, continue at the same
|
---|
673 | misrepresented as being the original software.
|
---|
674 | move back, this match function fails. */
|
---|
675 | mustn't change the current values of the data slot, because they may be set
|
---|
676 | need to recurse. */
|
---|
677 | never be used unless previously set, but they get saved and restored, and so we
|
---|
678 | never set for an anchored regular expression, but the anchoring may be forced
|
---|
679 | newline unless endonly is set, else end of subject unless noteol is set. */
|
---|
680 | newptrb.prev = eptrb;
|
---|
681 | newptrb.saved_eptr = eptr;
|
---|
682 | next += (next[1] << 8) + next[2];
|
---|
683 | non-capturing bracket. Don't worry about setting the flag for the error case
|
---|
684 | number = (ecode[4] << 8) | ecode[5];
|
---|
685 | number = (prev[4] << 8) | prev[5];
|
---|
686 | number from a dummy opcode at the start. */
|
---|
687 | number, then move along the subject till after the recursive match,
|
---|
688 | ocount = offsetcount - (offsetcount % 3);
|
---|
689 | ocount = re->top_backref * 3 + 3;
|
---|
690 | of (?ims) items in the pattern. They are kept in a local variable so that
|
---|
691 | of 3. */
|
---|
692 | of subject left; this ensures that every attempt at a match fails. We
|
---|
693 | offset index into the offset vector
|
---|
694 | offset = number << 1;
|
---|
695 | offset_top current top pointer
|
---|
696 | offset_top = md->end_offset_top;
|
---|
697 | offset_top = offset + 2;
|
---|
698 | offset_top, md, ims, eptrb, match_isgroup);
|
---|
699 | offsetcount the number of elements in the vector
|
---|
700 | offsets points to a vector of ints to be filled in with offsets
|
---|
701 | offsets[0] = start_match - match_block.start_subject;
|
---|
702 | offsets[1] = match_block.end_match_ptr - match_block.start_subject;
|
---|
703 | op = OP_BRA;
|
---|
704 | opcode. */
|
---|
705 | optimization can save a huge amount of backtracking in patterns with nested
|
---|
706 | option for each character match. Maybe that wouldn't add very much to the
|
---|
707 | options option bits
|
---|
708 | p points to characters
|
---|
709 | p--;
|
---|
710 | past the end if there is only one branch, but that's OK because that is
|
---|
711 | pchars (ecode, length, FALSE, md);
|
---|
712 | pchars (eptr, 16, TRUE, md);
|
---|
713 | pchars (eptr, length, TRUE, md);
|
---|
714 | pchars (p, length, FALSE, md);
|
---|
715 | pchars (p, length, is_subject, md)
|
---|
716 | pchars (start_match, end_subject - start_match, TRUE, &match_block);
|
---|
717 | pcre_exec (re, extra, subject, length, start_offset, options, offsets, offsetcount)
|
---|
718 | place we found it at last time. */
|
---|
719 | pointer. */
|
---|
720 | portions of the string if it matches. Two elements in the vector are set for
|
---|
721 | pre-processor statements. I suppose it's only been 10 years... */
|
---|
722 | preceded by BRAZERO or BRAMINZERO. */
|
---|
723 | preceding bracket, in the appropriate order. */
|
---|
724 | preceding bracket, in the appropriate order. We need to reset any options
|
---|
725 | printf (" against backref ");
|
---|
726 | printf (" against pattern ");
|
---|
727 | printf ("%c", c);
|
---|
728 | printf (">>>> Match against: ");
|
---|
729 | printf (">>>>> Skipped %d chars to reach first character\n",
|
---|
730 | printf ("\\x%02x", c);
|
---|
731 | printf ("\n");
|
---|
732 | printf ("end bracket %d", number);
|
---|
733 | printf ("matching subject ");
|
---|
734 | printf ("matching subject <null> against pattern ");
|
---|
735 | printf ("matching subject <null>");
|
---|
736 | printf ("start bracket %d subject=", number);
|
---|
737 | rc = 0;
|
---|
738 | rc = match (eptr, md->start_pattern, offset_top, md, ims, eptrb,
|
---|
739 | rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2;
|
---|
740 | register const uschar *ecode;
|
---|
741 | register const uschar *eptr;
|
---|
742 | register const uschar *p = start_match + ((first_char >= 0) ? 1 : 0);
|
---|
743 | register int *iend = iptr + resetcount;
|
---|
744 | register int *iend = iptr - resetcount / 2 + 1;
|
---|
745 | register int *iptr = match_block.offset_vector + ocount;
|
---|
746 | register int *iptr = match_block.offset_vector;
|
---|
747 | register int c = *start_match;
|
---|
748 | register int c;
|
---|
749 | register int i;
|
---|
750 | register int length = ecode[1];
|
---|
751 | register int pp = *p++;
|
---|
752 | repeat it in the interests of efficiency. */
|
---|
753 | repeat limits are compiled as a number of copies, with the optional ones
|
---|
754 | req_char = re->req_char;
|
---|
755 | req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ?
|
---|
756 | req_char_ptr = p;
|
---|
757 | resetcount = 2 + re->top_bracket * 2;
|
---|
758 | resetcount = ocount;
|
---|
759 | restoring at the exit of a group is easy. */
|
---|
760 | restrictions:
|
---|
761 | return FALSE;
|
---|
762 | return PCRE_ERROR_BADMAGIC;
|
---|
763 | return PCRE_ERROR_BADOPTION;
|
---|
764 | return PCRE_ERROR_NOMATCH;
|
---|
765 | return PCRE_ERROR_NOMEMORY;
|
---|
766 | return PCRE_ERROR_NULL;
|
---|
767 | return TRUE;
|
---|
768 | return match (eptr,
|
---|
769 | return match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup);
|
---|
770 | return match_block.errorcode;
|
---|
771 | return rc;
|
---|
772 | save = (int *) (pcre_malloc) ((c + 1) * sizeof (int));
|
---|
773 | save = stacksave;
|
---|
774 | save[i] = md->offset_vector[md->offset_end - i];
|
---|
775 | seems expensive. As a compromise, the stack is used when there are fewer
|
---|
776 | share code. This is very similar to the code for single characters, but we
|
---|
777 | similar code to character type repeats - written out again for speed.
|
---|
778 | since matching characters is likely to be quite common. First, ensure the
|
---|
779 | skipped_chars += bmtable[*start_match],
|
---|
780 | skipped_chars += bmtable[256] - 1;
|
---|
781 | skipped_chars -= bmtable[256] - 1;
|
---|
782 | skipped_chars);
|
---|
783 | skipped_chars++,
|
---|
784 | stack of such pointers, to be re-instated at the end of the group when we hit
|
---|
785 | stack, for holding the values of the subject pointer at the start of each
|
---|
786 | start of each branch to move the current point backwards, so the code at
|
---|
787 | start_bits = extra->data.start_bits;
|
---|
788 | start_match += bmtable[*start_match];
|
---|
789 | start_match += bmtable[256] - 1;
|
---|
790 | start_match -= bmtable[256] - 1;
|
---|
791 | start_match = (const uschar *) subject + length - re->max_match_size;
|
---|
792 | start_match++ < end_subject);
|
---|
793 | start_match++;
|
---|
794 | start_offset where to start in the subject string
|
---|
795 | startline = (re->options & PCRE_STARTLINE) != 0;
|
---|
796 | static BOOL
|
---|
797 | static const char rep_max[] =
|
---|
798 | static const char rep_min[] =
|
---|
799 | static void
|
---|
800 | strings.
|
---|
801 | struct eptrblock *prev;
|
---|
802 | studied, there may be a bitmap of possible first characters. */
|
---|
803 | subject points to the subject string
|
---|
804 | subject if the requested.
|
---|
805 | subpattern - to break infinite loops. */
|
---|
806 | subpattern, so as to detect when an empty string has been matched by a
|
---|
807 | subsequent match. */
|
---|
808 | such there are (offset_top records the completed total) so we just have
|
---|
809 | supersede any condition above with which it is incompatible.
|
---|
810 | switch (*ecode)
|
---|
811 | switch (ctype)
|
---|
812 | switch (op)
|
---|
813 | test once at the start (i.e. keep it out of the loop). */
|
---|
814 | than 16 values to store; otherwise malloc is used. A problem is what to do
|
---|
815 | than the number of characters left in the string, so the match fails.
|
---|
816 | that "continue" in the code above comes out to here to repeat the main
|
---|
817 | that changed within the bracket before re-running it, so check the next
|
---|
818 | that it may occur zero times. It may repeat infinitely, or not at all -
|
---|
819 | the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
|
---|
820 | the closing ket. When match() is called in other circumstances, we don't add to
|
---|
821 | the code for a repeated single character, but I haven't found a nice way of
|
---|
822 | the current subject position in the working slot at the top of the vector. We
|
---|
823 | the expression and advancing one matching character if failing, up to the
|
---|
824 | the external pcre header. */
|
---|
825 | the file Tech.Notes for some information on the internals.
|
---|
826 | the final argument TRUE causes it to stop at the end of an assertion. */
|
---|
827 | the group. */
|
---|
828 | the length of the reference string explicitly rather than passing the
|
---|
829 | the loop runs just once. */
|
---|
830 | the minimum number of bytes before we start. */
|
---|
831 | the number from a dummy opcode at the start. */
|
---|
832 | the point in the subject string is not moved back. Thus there can never be
|
---|
833 | the pointer while it matches the class. */
|
---|
834 | the same bracket.
|
---|
835 | the stack. */
|
---|
836 | the start hasn't passed this character yet. */
|
---|
837 | the subject. */
|
---|
838 | there were too many extractions, set the return code to zero. In the case
|
---|
839 | this level is identical to the lookahead case. */
|
---|
840 | this makes a huge difference to execution time when there aren't many brackets
|
---|
841 | those back references that we can. In this case there need not be overflow
|
---|
842 | time taken, but character matching *is* what this is all about... */
|
---|
843 | to save all the potential data. There may be up to 99 such values, which
|
---|
844 | to that for character classes, but repeated for efficiency. Then obey
|
---|
845 | two branches. If the condition is false, skipping the first branch takes us
|
---|
846 | typedef struct eptrblock
|
---|
847 | unless PCRE_CASELESS was given or the casing state changes within the regex.
|
---|
848 | unlimited repeats that aren't going to match. We don't know what the state of
|
---|
849 | unsigned long int ims = 0;
|
---|
850 | unsigned long int ims;
|
---|
851 | unsigned long int original_ims = ims; /* Save for resetting on ')' */
|
---|
852 | up quickly if there are fewer than the minimum number of characters left in
|
---|
853 | using_temporary_offsets = TRUE;
|
---|
854 | values of the final offsets, in case they were set by a previous iteration of
|
---|
855 | we just need to set up the whole thing as substring 0 before returning. If
|
---|
856 | where we had to get some local store to hold offsets for backreferences, copy
|
---|
857 | while (!anchored &&
|
---|
858 | while (*ecode == OP_ALT)
|
---|
859 | while (*ecode == OP_ALT);
|
---|
860 | while (*next == OP_ALT);
|
---|
861 | while (--iptr >= iend)
|
---|
862 | while (eptr >= pp)
|
---|
863 | while (iptr < iend)
|
---|
864 | while (length-- > 0)
|
---|
865 | while (p < end_subject)
|
---|
866 | while (start_match < end_subject &&
|
---|
867 | while (start_match < end_subject && *start_match != first_char)
|
---|
868 | while (start_match < end_subject && start_match[-1] != '\n')
|
---|
869 | while (start_match < end_subject)
|
---|
870 | {
|
---|
871 | {0, 0, 0, 0, 1, 1};
|
---|
872 | {0, 0, 1, 1, 0, 0};
|
---|
873 | } /* End of main loop */
|
---|
874 | }
|
---|