VirtualBox

source: vbox/trunk/src/bldprogs/scmparser.cpp@ 96586

Last change on this file since 96586 was 96407, checked in by vboxsync, 2 years ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.4 KB
Line 
1/* $Id: scmparser.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */
2/** @file
3 * IPRT Testcase / Tool - Source Code Massager, Code Parsers.
4 */
5
6/*
7 * Copyright (C) 2010-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include <iprt/assert.h>
33#include <iprt/ctype.h>
34#include <iprt/dir.h>
35#include <iprt/env.h>
36#include <iprt/file.h>
37#include <iprt/errcore.h>
38#include <iprt/getopt.h>
39#include <iprt/initterm.h>
40#include <iprt/mem.h>
41#include <iprt/message.h>
42#include <iprt/param.h>
43#include <iprt/path.h>
44#include <iprt/process.h>
45#include <iprt/stream.h>
46#include <iprt/string.h>
47
48#include "scm.h"
49
50
51/*********************************************************************************************************************************
52* Structures and Typedefs *
53*********************************************************************************************************************************/
54typedef size_t (*PFNISCOMMENT)(const char *pchLine, size_t cchLine, bool fSecond);
55
56
57/**
58 * Callback for checking if C++ line comment.
59 */
60static size_t isCppLineComment(const char *pchLine, size_t cchLine, bool fSecond)
61{
62 if ( cchLine >= 2
63 && pchLine[0] == '/'
64 && pchLine[1] == '/')
65 {
66 if (!fSecond)
67 return 2;
68 if (cchLine >= 3 && pchLine[2] == '/')
69 return 3;
70 }
71 return 0;
72}
73
74
75/**
76 * Callback for checking if hash comment.
77 */
78static size_t isHashComment(const char *pchLine, size_t cchLine, bool fSecond)
79{
80 if (cchLine >= 1 && *pchLine == '#')
81 {
82 if (!fSecond)
83 return 1;
84 if (cchLine >= 2 && pchLine[1] == '#')
85 return 2;
86 }
87 return 0;
88}
89
90
91/**
92 * Callback for checking if semicolon comment.
93 */
94static size_t isSemicolonComment(const char *pchLine, size_t cchLine, bool fSecond)
95{
96 if (cchLine >= 1 && *pchLine == ';')
97 {
98 if (!fSecond)
99 return 1;
100 if (cchLine >= 2 && pchLine[1] == ';')
101 return 2;
102 }
103 return 0;
104}
105
106
107/** Macro for checking for a XML comment start. */
108#define IS_XML_COMMENT_START(a_pch, a_off, a_cch) \
109 ( (a_off) + 4 <= (a_cch) \
110 && (a_pch)[(a_off) ] == '<' \
111 && (a_pch)[(a_off) + 1] == '!' \
112 && (a_pch)[(a_off) + 2] == '-' \
113 && (a_pch)[(a_off) + 3] == '-' \
114 && ((a_off) + 4 == (a_cch) || RT_C_IS_SPACE((a_pch)[(a_off) + 4])) )
115
116/** Macro for checking for a XML comment end. */
117#define IS_XML_COMMENT_END(a_pch, a_off, a_cch) \
118 ( (a_off) + 3 <= (a_cch) \
119 && (a_pch)[(a_off) ] == '-' \
120 && (a_pch)[(a_off) + 1] == '-' \
121 && (a_pch)[(a_off) + 2] == '>')
122
123
124/** Macro for checking for a batch file comment prefix. */
125#define IS_REM(a_pch, a_off, a_cch) \
126 ( (a_off) + 3 <= (a_cch) \
127 && ((a_pch)[(a_off) ] == 'R' || (a_pch)[(a_off) ] == 'r') \
128 && ((a_pch)[(a_off) + 1] == 'E' || (a_pch)[(a_off) + 1] == 'e') \
129 && ((a_pch)[(a_off) + 2] == 'M' || (a_pch)[(a_off) + 2] == 'm') \
130 && ((a_off) + 3 == (a_cch) || RT_C_IS_SPACE((a_pch)[(a_off) + 3])) )
131
132
133/**
134 * Callback for checking if batch comment.
135 */
136static size_t isBatchComment(const char *pchLine, size_t cchLine, bool fSecond)
137{
138 if (!fSecond)
139 {
140 if (IS_REM(pchLine, 0, cchLine))
141 return 3;
142 }
143 else
144 {
145 /* Check for the 2nd in "rem rem" lines. */
146 if ( cchLine >= 4
147 && RT_C_IS_SPACE(*pchLine)
148 && IS_REM(pchLine, 1, cchLine))
149 return 4;
150 }
151 return 0;
152}
153
154/**
155 * Callback for checking if SQL comment.
156 */
157static size_t isSqlComment(const char *pchLine, size_t cchLine, bool fSecond)
158{
159 if ( cchLine >= 2
160 && pchLine[0] == '-'
161 && pchLine[1] == '-')
162 {
163 if (!fSecond)
164 return 2;
165 if ( cchLine >= 3
166 && pchLine[2] == '-')
167 return 3;
168 }
169 return 0;
170}
171
172/**
173 * Callback for checking if tick comment.
174 */
175static size_t isTickComment(const char *pchLine, size_t cchLine, bool fSecond)
176{
177 if (cchLine >= 1 && *pchLine == '\'')
178 {
179 if (!fSecond)
180 return 1;
181 if (cchLine >= 2 && pchLine[1] == '\'')
182 return 2;
183 }
184 return 0;
185}
186
187
188/**
189 * Common worker for enumeratePythonComments and enumerateSimpleLineComments.
190 *
191 * @returns IPRT status code.
192 * @param pIn The input stream.
193 * @param pfnIsComment Comment tester function.
194 * @param pfnCallback The callback.
195 * @param pvUser The user argument for the callback.
196 * @param ppchLine Pointer to the line variable.
197 * @param pcchLine Pointer to the line length variable.
198 * @param penmEol Pointer to the line ending type variable.
199 * @param piLine Pointer to the line number variable.
200 * @param poff Pointer to the line offset variable. On input this
201 * is positioned at the start of the comment.
202 */
203static int handleLineComment(PSCMSTREAM pIn, PFNISCOMMENT pfnIsComment,
204 PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser,
205 const char **ppchLine, size_t *pcchLine, PSCMEOL penmEol,
206 uint32_t *piLine, size_t *poff)
207{
208 /* Unpack input/output variables. */
209 uint32_t iLine = *piLine;
210 const char *pchLine = *ppchLine;
211 size_t cchLine = *pcchLine;
212 size_t off = *poff;
213 SCMEOL enmEol = *penmEol;
214
215 /*
216 * Take down the basic info about the comment.
217 */
218 SCMCOMMENTINFO Info;
219 Info.iLineStart = iLine;
220 Info.iLineEnd = iLine;
221 Info.offStart = (uint32_t)off;
222 Info.offEnd = (uint32_t)cchLine;
223
224 size_t cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false);
225 Assert(cchSkip > 0);
226 off += cchSkip;
227
228 /* Determine comment type. */
229 Info.enmType = kScmCommentType_Line;
230 char ch;
231 cchSkip = 1;
232 if ( off < cchLine
233 && ( (ch = pchLine[off]) == '!'
234 || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, true)) > 0) )
235 {
236 unsigned ch2;
237 if ( off + cchSkip == cchLine
238 || RT_C_IS_SPACE(ch2 = pchLine[off + cchSkip]) )
239 {
240 Info.enmType = ch != '!' ? kScmCommentType_Line_JavaDoc : kScmCommentType_Line_Qt;
241 off += cchSkip;
242 }
243 else if ( ch2 == '<'
244 && ( off + cchSkip + 1 == cchLine
245 || RT_C_IS_SPACE(pchLine[off + cchSkip + 1]) ))
246 {
247 Info.enmType = ch == '!' ? kScmCommentType_Line_JavaDoc_After : kScmCommentType_Line_Qt_After;
248 off += cchSkip + 1;
249 }
250 }
251
252 /*
253 * Copy body of the first line. Like for C, we ignore a single space in the first comment line.
254 */
255 if (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
256 off++;
257 size_t cchBody = cchLine;
258 while (cchBody > off && RT_C_IS_SPACE(pchLine[cchBody - 1]))
259 cchBody--;
260 cchBody -= off;
261 size_t cbBodyAlloc = RT_MAX(_1K, RT_ALIGN_Z(cchBody + 64, 128));
262 char *pszBody = (char *)RTMemAlloc(cbBodyAlloc);
263 if (!pszBody)
264 return VERR_NO_MEMORY;
265 memcpy(pszBody, &pchLine[off], cchBody);
266 pszBody[cchBody] = '\0';
267
268 Info.cBlankLinesBefore = cchBody == 0;
269
270 /*
271 * Look for more comment lines and append them to the body.
272 */
273 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
274 {
275 iLine++;
276
277 /* Skip leading spaces. */
278 off = 0;
279 while (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
280 off++;
281
282 /* Check if it's a comment. */
283 if ( off >= cchLine
284 || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false)) == 0)
285 break;
286 off += cchSkip;
287
288 /* Split on doxygen comment start (if not already in one). */
289 if ( Info.enmType == kScmCommentType_Line
290 && off + 1 < cchLine
291 && ( pfnIsComment(&pchLine[off], cchLine - off, true) > 0
292 || ( pchLine[off + 1] == '!'
293 && ( off + 2 == cchLine
294 || pchLine[off + 2] != '!') ) ) )
295 {
296 off -= cchSkip;
297 break;
298 }
299
300 /* Append the body w/o trailing spaces and some leading ones. */
301 if (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
302 off++;
303 while (off < cchLine && off < Info.offStart + 3 && RT_C_IS_SPACE(pchLine[off]))
304 off++;
305 size_t cchAppend = cchLine;
306 while (cchAppend > off && RT_C_IS_SPACE(pchLine[cchAppend - 1]))
307 cchAppend--;
308 cchAppend -= off;
309
310 size_t cchNewBody = cchBody + 1 + cchAppend;
311 if (cchNewBody >= cbBodyAlloc)
312 {
313 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
314 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
315 if (pvNew)
316 pszBody = (char *)pvNew;
317 else
318 {
319 RTMemFree(pszBody);
320 return VERR_NO_MEMORY;
321 }
322 }
323
324 if ( cchBody > 0
325 || cchAppend > 0)
326 {
327 if (cchBody > 0)
328 pszBody[cchBody++] = '\n';
329 memcpy(&pszBody[cchBody], &pchLine[off], cchAppend);
330 cchBody += cchAppend;
331 pszBody[cchBody] = '\0';
332 }
333 else
334 Info.cBlankLinesBefore++;
335
336 /* Advance. */
337 Info.offEnd = (uint32_t)cchLine;
338 Info.iLineEnd = iLine;
339 }
340
341 /*
342 * Strip trailing empty lines in the body.
343 */
344 Info.cBlankLinesAfter = 0;
345 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
346 {
347 Info.cBlankLinesAfter++;
348 pszBody[--cchBody] = '\0';
349 }
350
351 /*
352 * Do the callback and return.
353 */
354 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
355
356 RTMemFree(pszBody);
357
358 *piLine = iLine;
359 *ppchLine = pchLine;
360 *pcchLine = cchLine;
361 *poff = off;
362 *penmEol = enmEol;
363 return rc;
364}
365
366
367
368/**
369 * Common string literal handler.
370 *
371 * @returns new pchLine value.
372 * @param pIn The input string.
373 * @param chType The quotation type.
374 * @param pchLine The current line.
375 * @param ppchLine Pointer to the line variable.
376 * @param pcchLine Pointer to the line length variable.
377 * @param penmEol Pointer to the line ending type variable.
378 * @param piLine Pointer to the line number variable.
379 * @param poff Pointer to the line offset variable.
380 */
381static const char *handleStringLiteral(PSCMSTREAM pIn, char chType, const char *pchLine, size_t *pcchLine, PSCMEOL penmEol,
382 uint32_t *piLine, size_t *poff)
383{
384 size_t off = *poff;
385 for (;;)
386 {
387 bool fEnd = false;
388 bool fEscaped = false;
389 size_t const cchLine = *pcchLine;
390 while (off < cchLine)
391 {
392 char ch = pchLine[off++];
393 if (!fEscaped)
394 {
395 if (ch != chType)
396 {
397 if (ch != '\\')
398 { /* likely */ }
399 else
400 fEscaped = true;
401 }
402 else
403 {
404 fEnd = true;
405 break;
406 }
407 }
408 else
409 fEscaped = false;
410 }
411 if (fEnd)
412 break;
413
414 /* next line */
415 pchLine = ScmStreamGetLine(pIn, pcchLine, penmEol);
416 if (!pchLine)
417 break;
418 *piLine += 1;
419 off = 0;
420 }
421
422 *poff = off;
423 return pchLine;
424}
425
426
427/**
428 * Deals with comments in C and C++ code.
429 *
430 * @returns VBox status code / callback return code.
431 * @param pIn The stream to parse.
432 * @param pfnCallback The callback.
433 * @param pvUser The user parameter for the callback.
434 */
435static int enumerateCStyleComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
436{
437 AssertCompile('\'' < '/');
438 AssertCompile('"' < '/');
439
440 int rcRet = VINF_SUCCESS;
441 uint32_t iLine = 0;
442 SCMEOL enmEol;
443 size_t cchLine;
444 const char *pchLine;
445 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
446 {
447 size_t off = 0;
448 while (off < cchLine)
449 {
450 unsigned ch = pchLine[off++];
451 if (ch > (unsigned)'/')
452 { /* not interesting */ }
453 else if (ch == '/')
454 {
455 if (off < cchLine)
456 {
457 ch = pchLine[off++];
458 if (ch == '*')
459 {
460 /*
461 * Multiline comment. Find the end.
462 *
463 * Note! This is very similar to the python doc string handling further down.
464 */
465 SCMCOMMENTINFO Info;
466 Info.iLineStart = iLine;
467 Info.offStart = (uint32_t)off - 2;
468 Info.iLineEnd = UINT32_MAX;
469 Info.offEnd = UINT32_MAX;
470 Info.cBlankLinesBefore = 0;
471
472 /* Determine comment type (same as for line-comments). */
473 Info.enmType = kScmCommentType_MultiLine;
474 if ( off < cchLine
475 && ( (ch = pchLine[off]) == '*'
476 || ch == '!') )
477 {
478 unsigned ch2;
479 if ( off + 1 == cchLine
480 || RT_C_IS_SPACE(ch2 = pchLine[off + 1]) )
481 {
482 Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc : kScmCommentType_MultiLine_Qt;
483 off += 1;
484 }
485 else if ( ch2 == '<'
486 && ( off + 2 == cchLine
487 || RT_C_IS_SPACE(pchLine[off + 2]) ))
488 {
489 Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc_After
490 : kScmCommentType_MultiLine_Qt_After;
491 off += 2;
492 }
493 }
494
495 /*
496 * Copy the body and find the end of the multiline comment.
497 */
498 size_t cbBodyAlloc = 0;
499 size_t cchBody = 0;
500 char *pszBody = NULL;
501 for (;;)
502 {
503 /* Parse the line up to the end-of-comment or end-of-line. */
504 size_t offLineStart = off;
505 size_t offLastNonBlank = off;
506 size_t offFirstNonBlank = ~(size_t)0;
507 while (off < cchLine)
508 {
509 ch = pchLine[off++];
510 if (ch != '*' || off >= cchLine || pchLine[off] != '/')
511 {
512 if (RT_C_IS_BLANK(ch))
513 {/* kind of likely */}
514 else
515 {
516 offLastNonBlank = off - 1;
517 if (offFirstNonBlank != ~(size_t)0)
518 {/* likely */}
519 else if ( ch != '*' /* ignore continuation-asterisks */
520 || off > Info.offStart + 1 + 1
521 || off > cchLine
522 || ( off < cchLine
523 && !RT_C_IS_SPACE(pchLine[off]))
524 || pszBody == NULL)
525 offFirstNonBlank = off - 1;
526 }
527 }
528 else
529 {
530 Info.offEnd = (uint32_t)++off;
531 Info.iLineEnd = iLine;
532 break;
533 }
534 }
535
536 /* Append line content to the comment body string. */
537 size_t cchAppend;
538 if (offFirstNonBlank == ~(size_t)0)
539 cchAppend = 0; /* empty line */
540 else
541 {
542 if (pszBody)
543 offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank);
544 else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */
545 offLineStart++;
546 cchAppend = offLastNonBlank + 1 - offLineStart;
547 Assert(cchAppend <= cchLine);
548 }
549
550 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
551 if (cchNewBody >= cbBodyAlloc)
552 {
553 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
554 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
555 if (pvNew)
556 pszBody = (char *)pvNew;
557 else
558 {
559 RTMemFree(pszBody);
560 return VERR_NO_MEMORY;
561 }
562 }
563
564 if (cchBody > 0) /* no leading blank lines */
565 pszBody[cchBody++] = '\n';
566 else if (cchAppend == 0)
567 Info.cBlankLinesBefore++;
568 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
569 cchBody += cchAppend;
570 pszBody[cchBody] = '\0';
571
572 /* Advance to the next line, if we haven't yet seen the end of this comment. */
573 if (Info.iLineEnd != UINT32_MAX)
574 break;
575 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
576 if (!pchLine)
577 {
578 Info.offEnd = (uint32_t)cchLine;
579 Info.iLineEnd = iLine;
580 break;
581 }
582 iLine++;
583 off = 0;
584 }
585
586 /* Strip trailing empty lines in the body. */
587 Info.cBlankLinesAfter = 0;
588 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
589 {
590 Info.cBlankLinesAfter++;
591 pszBody[--cchBody] = '\0';
592 }
593
594 /* Do the callback. */
595 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
596 RTMemFree(pszBody);
597 if (RT_FAILURE(rc))
598 return rc;
599 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
600 rcRet = rc;
601 }
602 else if (ch == '/')
603 {
604 /*
605 * Line comment. Join the other line comment guys.
606 */
607 off -= 2;
608 int rc = handleLineComment(pIn, isCppLineComment, pfnCallback, pvUser,
609 &pchLine, &cchLine, &enmEol, &iLine, &off);
610 if (RT_FAILURE(rc))
611 return rc;
612 if (rcRet == VINF_SUCCESS)
613 rcRet = rc;
614 }
615
616 if (!pchLine)
617 break;
618 }
619 }
620 else if (ch == '"')
621 {
622 /*
623 * String literal may include sequences that looks like comments. So,
624 * they needs special handling to avoid confusion.
625 */
626 pchLine = handleStringLiteral(pIn, '"', pchLine, &cchLine, &enmEol, &iLine, &off);
627 }
628 /* else: We don't have to deal with character literal as these shouldn't
629 include comment-like sequences. */
630 } /* for each character in the line */
631
632 iLine++;
633 } /* for each line in the stream */
634
635 int rcStream = ScmStreamGetStatus(pIn);
636 if (RT_SUCCESS(rcStream))
637 return rcRet;
638 return rcStream;
639}
640
641
642/**
643 * Deals with comments in Python code.
644 *
645 * @returns VBox status code / callback return code.
646 * @param pIn The stream to parse.
647 * @param pfnCallback The callback.
648 * @param pvUser The user parameter for the callback.
649 */
650static int enumeratePythonComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
651{
652 AssertCompile('#' < '\'');
653 AssertCompile('"' < '\'');
654
655 int rcRet = VINF_SUCCESS;
656 uint32_t iLine = 0;
657 SCMEOL enmEol;
658 size_t cchLine;
659 const char *pchLine;
660 SCMCOMMENTINFO Info;
661 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
662 {
663 size_t off = 0;
664 while (off < cchLine)
665 {
666 char ch = pchLine[off++];
667 if ((unsigned char)ch > (unsigned char)'\'')
668 { /* not interesting */ }
669 else if (ch == '#')
670 {
671 /*
672 * Line comment. Join paths with the others.
673 */
674 off -= 1;
675 int rc = handleLineComment(pIn, isHashComment, pfnCallback, pvUser,
676 &pchLine, &cchLine, &enmEol, &iLine, &off);
677 if (RT_FAILURE(rc))
678 return rc;
679 if (rcRet == VINF_SUCCESS)
680 rcRet = rc;
681
682 if (!pchLine)
683 break;
684 }
685 else if (ch == '"' || ch == '\'')
686 {
687 /*
688 * String literal may be doc strings and they may legally include hashes.
689 */
690 const char chType = ch;
691 if ( off + 1 >= cchLine
692 || pchLine[off] != chType
693 || pchLine[off + 1] != chType)
694 pchLine = handleStringLiteral(pIn, chType, pchLine, &cchLine, &enmEol, &iLine, &off);
695 else
696 {
697 /*
698 * Doc string (/ long string).
699 *
700 * Note! This is very similar to the multiline C comment handling above.
701 */
702 Info.iLineStart = iLine;
703 Info.offStart = (uint32_t)off - 1;
704 Info.iLineEnd = UINT32_MAX;
705 Info.offEnd = UINT32_MAX;
706 Info.cBlankLinesBefore = 0;
707 Info.enmType = kScmCommentType_DocString;
708
709 off += 2;
710
711 /* Copy the body and find the end of the doc string comment. */
712 size_t cbBodyAlloc = 0;
713 size_t cchBody = 0;
714 char *pszBody = NULL;
715 for (;;)
716 {
717 /* Parse the line up to the end-of-comment or end-of-line. */
718 size_t offLineStart = off;
719 size_t offLastNonBlank = off;
720 size_t offFirstNonBlank = ~(size_t)0;
721 bool fEscaped = false;
722 while (off < cchLine)
723 {
724 ch = pchLine[off++];
725 if (!fEscaped)
726 {
727 if ( off + 1 >= cchLine
728 || ch != chType
729 || pchLine[off] != chType
730 || pchLine[off + 1] != chType)
731 {
732 if (RT_C_IS_BLANK(ch))
733 {/* kind of likely */}
734 else
735 {
736 offLastNonBlank = off - 1;
737 if (offFirstNonBlank != ~(size_t)0)
738 {/* likely */}
739 else if ( ch != '*' /* ignore continuation-asterisks */
740 || off > Info.offStart + 1 + 1
741 || off > cchLine
742 || ( off < cchLine
743 && !RT_C_IS_SPACE(pchLine[off]))
744 || pszBody == NULL)
745 offFirstNonBlank = off - 1;
746
747 if (ch != '\\')
748 {/* likely */ }
749 else
750 fEscaped = true;
751 }
752 }
753 else
754 {
755 off += 2;
756 Info.offEnd = (uint32_t)off;
757 Info.iLineEnd = iLine;
758 break;
759 }
760 }
761 else
762 fEscaped = false;
763 }
764
765 /* Append line content to the comment body string. */
766 size_t cchAppend;
767 if (offFirstNonBlank == ~(size_t)0)
768 cchAppend = 0; /* empty line */
769 else
770 {
771 if (pszBody)
772 offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank);
773 else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */
774 offLineStart++;
775 cchAppend = offLastNonBlank + 1 - offLineStart;
776 Assert(cchAppend <= cchLine);
777 }
778
779 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
780 if (cchNewBody >= cbBodyAlloc)
781 {
782 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
783 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
784 if (pvNew)
785 pszBody = (char *)pvNew;
786 else
787 {
788 RTMemFree(pszBody);
789 return VERR_NO_MEMORY;
790 }
791 }
792
793 if (cchBody > 0) /* no leading blank lines */
794 pszBody[cchBody++] = '\n';
795 else if (cchAppend == 0)
796 Info.cBlankLinesBefore++;
797 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
798 cchBody += cchAppend;
799 pszBody[cchBody] = '\0';
800
801 /* Advance to the next line, if we haven't yet seen the end of this comment. */
802 if (Info.iLineEnd != UINT32_MAX)
803 break;
804 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
805 if (!pchLine)
806 {
807 Info.offEnd = (uint32_t)cchLine;
808 Info.iLineEnd = iLine;
809 break;
810 }
811 iLine++;
812 off = 0;
813 }
814
815 /* Strip trailing empty lines in the body. */
816 Info.cBlankLinesAfter = 0;
817 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
818 {
819 Info.cBlankLinesAfter++;
820 pszBody[--cchBody] = '\0';
821 }
822
823 /* Do the callback. */
824 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
825 RTMemFree(pszBody);
826 if (RT_FAILURE(rc))
827 return rc;
828 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
829 rcRet = rc;
830 }
831
832 if (!pchLine)
833 break;
834 }
835 /* else: We don't have to deal with character literal as these shouldn't
836 include comment-like sequences. */
837 } /* for each character in the line */
838
839 iLine++;
840 } /* for each line in the stream */
841
842 int rcStream = ScmStreamGetStatus(pIn);
843 if (RT_SUCCESS(rcStream))
844 return rcRet;
845 return rcStream;
846}
847
848
849/**
850 * Deals with XML comments.
851 *
852 * @returns VBox status code / callback return code.
853 * @param pIn The stream to parse.
854 * @param pfnCallback The callback.
855 * @param pvUser The user parameter for the callback.
856 */
857static int enumerateXmlComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
858{
859 int rcRet = VINF_SUCCESS;
860 uint32_t iLine = 0;
861 SCMEOL enmEol;
862 size_t cchLine;
863 const char *pchLine;
864 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
865 {
866 size_t off = 0;
867 while (off < cchLine)
868 {
869 /*
870 * Skip leading blanks and check for start of XML comment.
871 */
872 while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off]))
873 off++;
874 if (IS_XML_COMMENT_START(pchLine, off, cchLine))
875 {
876 /*
877 * XML comment. Find the end.
878 *
879 * Note! This is very similar to the python doc string handling above.
880 */
881 SCMCOMMENTINFO Info;
882 Info.iLineStart = iLine;
883 Info.offStart = (uint32_t)off;
884 Info.iLineEnd = UINT32_MAX;
885 Info.offEnd = UINT32_MAX;
886 Info.cBlankLinesBefore = 0;
887 Info.enmType = kScmCommentType_Xml;
888
889 off += 4;
890
891 /*
892 * Copy the body and find the end of the XML comment.
893 */
894 size_t cbBodyAlloc = 0;
895 size_t cchBody = 0;
896 char *pszBody = NULL;
897 for (;;)
898 {
899 /* Parse the line up to the end-of-comment or end-of-line. */
900 size_t offLineStart = off;
901 size_t offLastNonBlank = off;
902 size_t offFirstNonBlank = ~(size_t)0;
903 while (off < cchLine)
904 {
905 if (!IS_XML_COMMENT_END(pchLine, off, cchLine))
906 {
907 char ch = pchLine[off++];
908 if (RT_C_IS_BLANK(ch))
909 {/* kind of likely */}
910 else
911 {
912 offLastNonBlank = off - 1;
913 if (offFirstNonBlank != ~(size_t)0)
914 {/* likely */}
915 else if ( (ch != '*' && ch != '#') /* ignore continuation-asterisks */
916 || off > Info.offStart + 1 + 1
917 || off > cchLine
918 || ( off < cchLine
919 && !RT_C_IS_SPACE(pchLine[off]))
920 || pszBody == NULL)
921 offFirstNonBlank = off - 1;
922 }
923 }
924 else
925 {
926 off += 3;
927 Info.offEnd = (uint32_t)off;
928 Info.iLineEnd = iLine;
929 break;
930 }
931 }
932
933 /* Append line content to the comment body string. */
934 size_t cchAppend;
935 if (offFirstNonBlank == ~(size_t)0)
936 cchAppend = 0; /* empty line */
937 else
938 {
939 offLineStart = offFirstNonBlank;
940 cchAppend = offLastNonBlank + 1 - offLineStart;
941 Assert(cchAppend <= cchLine);
942 }
943
944 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
945 if (cchNewBody >= cbBodyAlloc)
946 {
947 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
948 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
949 if (pvNew)
950 pszBody = (char *)pvNew;
951 else
952 {
953 RTMemFree(pszBody);
954 return VERR_NO_MEMORY;
955 }
956 }
957
958 if (cchBody > 0) /* no leading blank lines */
959 pszBody[cchBody++] = '\n';
960 else if (cchAppend == 0)
961 Info.cBlankLinesBefore++;
962 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
963 cchBody += cchAppend;
964 pszBody[cchBody] = '\0';
965
966 /* Advance to the next line, if we haven't yet seen the end of this comment. */
967 if (Info.iLineEnd != UINT32_MAX)
968 break;
969 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
970 if (!pchLine)
971 {
972 Info.offEnd = (uint32_t)cchLine;
973 Info.iLineEnd = iLine;
974 break;
975 }
976 iLine++;
977 off = 0;
978 }
979
980 /* Strip trailing empty lines in the body. */
981 Info.cBlankLinesAfter = 0;
982 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
983 {
984 Info.cBlankLinesAfter++;
985 pszBody[--cchBody] = '\0';
986 }
987
988 /* Do the callback. */
989 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
990 RTMemFree(pszBody);
991 if (RT_FAILURE(rc))
992 return rc;
993 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
994 rcRet = rc;
995 }
996 else
997 off++;
998 } /* for each character in the line */
999
1000 iLine++;
1001 } /* for each line in the stream */
1002
1003 int rcStream = ScmStreamGetStatus(pIn);
1004 if (RT_SUCCESS(rcStream))
1005 return rcRet;
1006 return rcStream;
1007}
1008
1009
1010/**
1011 * Deals with comments in DOS batch files.
1012 *
1013 * @returns VBox status code / callback return code.
1014 * @param pIn The stream to parse.
1015 * @param pfnCallback The callback.
1016 * @param pvUser The user parameter for the callback.
1017 */
1018static int enumerateBatchComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
1019{
1020 int rcRet = VINF_SUCCESS;
1021 uint32_t iLine = 0;
1022 SCMEOL enmEol;
1023 size_t cchLine;
1024 const char *pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
1025 while (pchLine != NULL)
1026 {
1027 /*
1028 * Skip leading blanks and check for 'rem'.
1029 * At the moment we do not parse '::label-comments'.
1030 */
1031 size_t off = 0;
1032 while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off]))
1033 off++;
1034 if (!IS_REM(pchLine, off, cchLine))
1035 {
1036 iLine++;
1037 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
1038 }
1039 else
1040 {
1041 int rc = handleLineComment(pIn, isBatchComment, pfnCallback, pvUser,
1042 &pchLine, &cchLine, &enmEol, &iLine, &off);
1043 if (RT_FAILURE(rc))
1044 return rc;
1045 if (rcRet == VINF_SUCCESS)
1046 rcRet = rc;
1047 }
1048 }
1049
1050 int rcStream = ScmStreamGetStatus(pIn);
1051 if (RT_SUCCESS(rcStream))
1052 return rcRet;
1053 return rcStream;
1054}
1055
1056
1057/**
1058 * Deals with comments in SQL files.
1059 *
1060 * @returns VBox status code / callback return code.
1061 * @param pIn The stream to parse.
1062 * @param pfnCallback The callback.
1063 * @param pvUser The user parameter for the callback.
1064 */
1065static int enumerateSqlComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
1066{
1067 int rcRet = VINF_SUCCESS;
1068 uint32_t iLine = 0;
1069 SCMEOL enmEol;
1070 size_t cchLine;
1071 const char *pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
1072 while (pchLine != NULL)
1073 {
1074 /*
1075 * Skip leading blanks and check for '--'.
1076 */
1077 size_t off = 0;
1078 while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off]))
1079 off++;
1080 if ( cchLine < 2
1081 || pchLine[0] != '-'
1082 || pchLine[1] != '-')
1083 {
1084 iLine++;
1085 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
1086 }
1087 else
1088 {
1089 int rc = handleLineComment(pIn, isSqlComment, pfnCallback, pvUser,
1090 &pchLine, &cchLine, &enmEol, &iLine, &off);
1091 if (RT_FAILURE(rc))
1092 return rc;
1093 if (rcRet == VINF_SUCCESS)
1094 rcRet = rc;
1095 }
1096 }
1097
1098 int rcStream = ScmStreamGetStatus(pIn);
1099 if (RT_SUCCESS(rcStream))
1100 return rcRet;
1101 return rcStream;
1102}
1103
1104
1105/**
1106 * Deals with simple line comments.
1107 *
1108 * @returns VBox status code / callback return code.
1109 * @param pIn The stream to parse.
1110 * @param chStart The start of comment character.
1111 * @param pfnIsComment Comment tester function.
1112 * @param pfnCallback The callback.
1113 * @param pvUser The user parameter for the callback.
1114 */
1115static int enumerateSimpleLineComments(PSCMSTREAM pIn, char chStart, PFNISCOMMENT pfnIsComment,
1116 PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
1117{
1118 int rcRet = VINF_SUCCESS;
1119 uint32_t iLine = 0;
1120 SCMEOL enmEol;
1121 size_t cchLine;
1122 const char *pchLine;
1123 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
1124 {
1125 size_t off = 0;
1126 while (off < cchLine)
1127 {
1128 char ch = pchLine[off++];
1129 if (ch != chStart)
1130 { /* not interesting */ }
1131 else
1132 {
1133 off -= 1;
1134 int rc = handleLineComment(pIn, pfnIsComment, pfnCallback, pvUser,
1135 &pchLine, &cchLine, &enmEol, &iLine, &off);
1136 if (RT_FAILURE(rc))
1137 return rc;
1138 if (rcRet == VINF_SUCCESS)
1139 rcRet = rc;
1140
1141 if (!pchLine)
1142 break;
1143 }
1144 } /* for each character in the line */
1145
1146 iLine++;
1147 } /* for each line in the stream */
1148
1149 int rcStream = ScmStreamGetStatus(pIn);
1150 if (RT_SUCCESS(rcStream))
1151 return rcRet;
1152 return rcStream;
1153}
1154
1155
1156/**
1157 * Enumerates the comments in the given stream, calling @a pfnCallback for each.
1158 *
1159 * @returns IPRT status code.
1160 * @param pIn The stream to parse.
1161 * @param enmCommentStyle The comment style of the source stream.
1162 * @param pfnCallback The function to call.
1163 * @param pvUser User argument to the callback.
1164 */
1165int ScmEnumerateComments(PSCMSTREAM pIn, SCMCOMMENTSTYLE enmCommentStyle, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
1166{
1167 switch (enmCommentStyle)
1168 {
1169 case kScmCommentStyle_C:
1170 return enumerateCStyleComments(pIn, pfnCallback, pvUser);
1171
1172 case kScmCommentStyle_Python:
1173 return enumeratePythonComments(pIn, pfnCallback, pvUser);
1174
1175 case kScmCommentStyle_Semicolon:
1176 return enumerateSimpleLineComments(pIn, ';', isSemicolonComment, pfnCallback, pvUser);
1177
1178 case kScmCommentStyle_Hash:
1179 return enumerateSimpleLineComments(pIn, '#', isHashComment, pfnCallback, pvUser);
1180
1181 case kScmCommentStyle_Rem_Upper:
1182 case kScmCommentStyle_Rem_Lower:
1183 case kScmCommentStyle_Rem_Camel:
1184 return enumerateBatchComments(pIn, pfnCallback, pvUser);
1185
1186 case kScmCommentStyle_Sql:
1187 return enumerateSqlComments(pIn, pfnCallback, pvUser);
1188
1189 case kScmCommentStyle_Tick:
1190 return enumerateSimpleLineComments(pIn, '\'', isTickComment, pfnCallback, pvUser);
1191
1192 case kScmCommentStyle_Xml:
1193 return enumerateXmlComments(pIn, pfnCallback, pvUser);
1194
1195 default:
1196 AssertFailedReturn(VERR_INVALID_PARAMETER);
1197 }
1198}
1199
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette