VirtualBox

source: vbox/trunk/src/bldprogs/scmparser.cpp@ 96401

最後變更 在這個檔案從96401是 96401,由 vboxsync 提交於 3 年 前

bldprogs/scm: Teach it to replace the old copyright and license notices with the updated ones (e.g. GPlv2 to GPLv3).

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 43.1 KB
 
1/* $Id: scmparser.cpp 96401 2022-08-22 15:06:19Z vboxsync $ */
2/** @file
3 * IPRT Testcase / Tool - Source Code Massager, Code Parsers.
4 */
5
6/*
7 * Copyright (C) 2010-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.alldomusa.eu.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include <iprt/assert.h>
23#include <iprt/ctype.h>
24#include <iprt/dir.h>
25#include <iprt/env.h>
26#include <iprt/file.h>
27#include <iprt/errcore.h>
28#include <iprt/getopt.h>
29#include <iprt/initterm.h>
30#include <iprt/mem.h>
31#include <iprt/message.h>
32#include <iprt/param.h>
33#include <iprt/path.h>
34#include <iprt/process.h>
35#include <iprt/stream.h>
36#include <iprt/string.h>
37
38#include "scm.h"
39
40
41/*********************************************************************************************************************************
42* Structures and Typedefs *
43*********************************************************************************************************************************/
44typedef size_t (*PFNISCOMMENT)(const char *pchLine, size_t cchLine, bool fSecond);
45
46
47/**
48 * Callback for checking if C++ line comment.
49 */
50static size_t isCppLineComment(const char *pchLine, size_t cchLine, bool fSecond)
51{
52 if ( cchLine >= 2
53 && pchLine[0] == '/'
54 && pchLine[1] == '/')
55 {
56 if (!fSecond)
57 return 2;
58 if (cchLine >= 3 && pchLine[2] == '/')
59 return 3;
60 }
61 return 0;
62}
63
64
65/**
66 * Callback for checking if hash comment.
67 */
68static size_t isHashComment(const char *pchLine, size_t cchLine, bool fSecond)
69{
70 if (cchLine >= 1 && *pchLine == '#')
71 {
72 if (!fSecond)
73 return 1;
74 if (cchLine >= 2 && pchLine[1] == '#')
75 return 2;
76 }
77 return 0;
78}
79
80
81/**
82 * Callback for checking if semicolon comment.
83 */
84static size_t isSemicolonComment(const char *pchLine, size_t cchLine, bool fSecond)
85{
86 if (cchLine >= 1 && *pchLine == ';')
87 {
88 if (!fSecond)
89 return 1;
90 if (cchLine >= 2 && pchLine[1] == ';')
91 return 2;
92 }
93 return 0;
94}
95
96
97/** Macro for checking for a XML comment start. */
98#define IS_XML_COMMENT_START(a_pch, a_off, a_cch) \
99 ( (a_off) + 4 <= (a_cch) \
100 && (a_pch)[(a_off) ] == '<' \
101 && (a_pch)[(a_off) + 1] == '!' \
102 && (a_pch)[(a_off) + 2] == '-' \
103 && (a_pch)[(a_off) + 3] == '-' \
104 && ((a_off) + 4 == (a_cch) || RT_C_IS_SPACE((a_pch)[(a_off) + 4])) )
105
106/** Macro for checking for a XML comment end. */
107#define IS_XML_COMMENT_END(a_pch, a_off, a_cch) \
108 ( (a_off) + 3 <= (a_cch) \
109 && (a_pch)[(a_off) ] == '-' \
110 && (a_pch)[(a_off) + 1] == '-' \
111 && (a_pch)[(a_off) + 2] == '>')
112
113
114/** Macro for checking for a batch file comment prefix. */
115#define IS_REM(a_pch, a_off, a_cch) \
116 ( (a_off) + 3 <= (a_cch) \
117 && ((a_pch)[(a_off) ] == 'R' || (a_pch)[(a_off) ] == 'r') \
118 && ((a_pch)[(a_off) + 1] == 'E' || (a_pch)[(a_off) + 1] == 'e') \
119 && ((a_pch)[(a_off) + 2] == 'M' || (a_pch)[(a_off) + 2] == 'm') \
120 && ((a_off) + 3 == (a_cch) || RT_C_IS_SPACE((a_pch)[(a_off) + 3])) )
121
122
123/**
124 * Callback for checking if batch comment.
125 */
126static size_t isBatchComment(const char *pchLine, size_t cchLine, bool fSecond)
127{
128 if (!fSecond)
129 {
130 if (IS_REM(pchLine, 0, cchLine))
131 return 3;
132 }
133 else
134 {
135 /* Check for the 2nd in "rem rem" lines. */
136 if ( cchLine >= 4
137 && RT_C_IS_SPACE(*pchLine)
138 && IS_REM(pchLine, 1, cchLine))
139 return 4;
140 }
141 return 0;
142}
143
144/**
145 * Callback for checking if SQL comment.
146 */
147static size_t isSqlComment(const char *pchLine, size_t cchLine, bool fSecond)
148{
149 if ( cchLine >= 2
150 && pchLine[0] == '-'
151 && pchLine[1] == '-')
152 {
153 if (!fSecond)
154 return 2;
155 if ( cchLine >= 3
156 && pchLine[2] == '-')
157 return 3;
158 }
159 return 0;
160}
161
162/**
163 * Callback for checking if tick comment.
164 */
165static size_t isTickComment(const char *pchLine, size_t cchLine, bool fSecond)
166{
167 if (cchLine >= 1 && *pchLine == '\'')
168 {
169 if (!fSecond)
170 return 1;
171 if (cchLine >= 2 && pchLine[1] == '\'')
172 return 2;
173 }
174 return 0;
175}
176
177
178/**
179 * Common worker for enumeratePythonComments and enumerateSimpleLineComments.
180 *
181 * @returns IPRT status code.
182 * @param pIn The input stream.
183 * @param pfnIsComment Comment tester function.
184 * @param pfnCallback The callback.
185 * @param pvUser The user argument for the callback.
186 * @param ppchLine Pointer to the line variable.
187 * @param pcchLine Pointer to the line length variable.
188 * @param penmEol Pointer to the line ending type variable.
189 * @param piLine Pointer to the line number variable.
190 * @param poff Pointer to the line offset variable. On input this
191 * is positioned at the start of the comment.
192 */
193static int handleLineComment(PSCMSTREAM pIn, PFNISCOMMENT pfnIsComment,
194 PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser,
195 const char **ppchLine, size_t *pcchLine, PSCMEOL penmEol,
196 uint32_t *piLine, size_t *poff)
197{
198 /* Unpack input/output variables. */
199 uint32_t iLine = *piLine;
200 const char *pchLine = *ppchLine;
201 size_t cchLine = *pcchLine;
202 size_t off = *poff;
203 SCMEOL enmEol = *penmEol;
204
205 /*
206 * Take down the basic info about the comment.
207 */
208 SCMCOMMENTINFO Info;
209 Info.iLineStart = iLine;
210 Info.iLineEnd = iLine;
211 Info.offStart = (uint32_t)off;
212 Info.offEnd = (uint32_t)cchLine;
213
214 size_t cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false);
215 Assert(cchSkip > 0);
216 off += cchSkip;
217
218 /* Determine comment type. */
219 Info.enmType = kScmCommentType_Line;
220 char ch;
221 cchSkip = 1;
222 if ( off < cchLine
223 && ( (ch = pchLine[off]) == '!'
224 || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, true)) > 0) )
225 {
226 unsigned ch2;
227 if ( off + cchSkip == cchLine
228 || RT_C_IS_SPACE(ch2 = pchLine[off + cchSkip]) )
229 {
230 Info.enmType = ch != '!' ? kScmCommentType_Line_JavaDoc : kScmCommentType_Line_Qt;
231 off += cchSkip;
232 }
233 else if ( ch2 == '<'
234 && ( off + cchSkip + 1 == cchLine
235 || RT_C_IS_SPACE(pchLine[off + cchSkip + 1]) ))
236 {
237 Info.enmType = ch == '!' ? kScmCommentType_Line_JavaDoc_After : kScmCommentType_Line_Qt_After;
238 off += cchSkip + 1;
239 }
240 }
241
242 /*
243 * Copy body of the first line. Like for C, we ignore a single space in the first comment line.
244 */
245 if (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
246 off++;
247 size_t cchBody = cchLine;
248 while (cchBody > off && RT_C_IS_SPACE(pchLine[cchBody - 1]))
249 cchBody--;
250 cchBody -= off;
251 size_t cbBodyAlloc = RT_MAX(_1K, RT_ALIGN_Z(cchBody + 64, 128));
252 char *pszBody = (char *)RTMemAlloc(cbBodyAlloc);
253 if (!pszBody)
254 return VERR_NO_MEMORY;
255 memcpy(pszBody, &pchLine[off], cchBody);
256 pszBody[cchBody] = '\0';
257
258 Info.cBlankLinesBefore = cchBody == 0;
259
260 /*
261 * Look for more comment lines and append them to the body.
262 */
263 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
264 {
265 iLine++;
266
267 /* Skip leading spaces. */
268 off = 0;
269 while (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
270 off++;
271
272 /* Check if it's a comment. */
273 if ( off >= cchLine
274 || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false)) == 0)
275 break;
276 off += cchSkip;
277
278 /* Split on doxygen comment start (if not already in one). */
279 if ( Info.enmType == kScmCommentType_Line
280 && off + 1 < cchLine
281 && ( pfnIsComment(&pchLine[off], cchLine - off, true) > 0
282 || ( pchLine[off + 1] == '!'
283 && ( off + 2 == cchLine
284 || pchLine[off + 2] != '!') ) ) )
285 {
286 off -= cchSkip;
287 break;
288 }
289
290 /* Append the body w/o trailing spaces and some leading ones. */
291 if (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
292 off++;
293 while (off < cchLine && off < Info.offStart + 3 && RT_C_IS_SPACE(pchLine[off]))
294 off++;
295 size_t cchAppend = cchLine;
296 while (cchAppend > off && RT_C_IS_SPACE(pchLine[cchAppend - 1]))
297 cchAppend--;
298 cchAppend -= off;
299
300 size_t cchNewBody = cchBody + 1 + cchAppend;
301 if (cchNewBody >= cbBodyAlloc)
302 {
303 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
304 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
305 if (pvNew)
306 pszBody = (char *)pvNew;
307 else
308 {
309 RTMemFree(pszBody);
310 return VERR_NO_MEMORY;
311 }
312 }
313
314 if ( cchBody > 0
315 || cchAppend > 0)
316 {
317 if (cchBody > 0)
318 pszBody[cchBody++] = '\n';
319 memcpy(&pszBody[cchBody], &pchLine[off], cchAppend);
320 cchBody += cchAppend;
321 pszBody[cchBody] = '\0';
322 }
323 else
324 Info.cBlankLinesBefore++;
325
326 /* Advance. */
327 Info.offEnd = (uint32_t)cchLine;
328 Info.iLineEnd = iLine;
329 }
330
331 /*
332 * Strip trailing empty lines in the body.
333 */
334 Info.cBlankLinesAfter = 0;
335 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
336 {
337 Info.cBlankLinesAfter++;
338 pszBody[--cchBody] = '\0';
339 }
340
341 /*
342 * Do the callback and return.
343 */
344 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
345
346 RTMemFree(pszBody);
347
348 *piLine = iLine;
349 *ppchLine = pchLine;
350 *pcchLine = cchLine;
351 *poff = off;
352 *penmEol = enmEol;
353 return rc;
354}
355
356
357
358/**
359 * Common string literal handler.
360 *
361 * @returns new pchLine value.
362 * @param pIn The input string.
363 * @param chType The quotation type.
364 * @param pchLine The current line.
365 * @param ppchLine Pointer to the line variable.
366 * @param pcchLine Pointer to the line length variable.
367 * @param penmEol Pointer to the line ending type variable.
368 * @param piLine Pointer to the line number variable.
369 * @param poff Pointer to the line offset variable.
370 */
371static const char *handleStringLiteral(PSCMSTREAM pIn, char chType, const char *pchLine, size_t *pcchLine, PSCMEOL penmEol,
372 uint32_t *piLine, size_t *poff)
373{
374 size_t off = *poff;
375 for (;;)
376 {
377 bool fEnd = false;
378 bool fEscaped = false;
379 size_t const cchLine = *pcchLine;
380 while (off < cchLine)
381 {
382 char ch = pchLine[off++];
383 if (!fEscaped)
384 {
385 if (ch != chType)
386 {
387 if (ch != '\\')
388 { /* likely */ }
389 else
390 fEscaped = true;
391 }
392 else
393 {
394 fEnd = true;
395 break;
396 }
397 }
398 else
399 fEscaped = false;
400 }
401 if (fEnd)
402 break;
403
404 /* next line */
405 pchLine = ScmStreamGetLine(pIn, pcchLine, penmEol);
406 if (!pchLine)
407 break;
408 *piLine += 1;
409 off = 0;
410 }
411
412 *poff = off;
413 return pchLine;
414}
415
416
417/**
418 * Deals with comments in C and C++ code.
419 *
420 * @returns VBox status code / callback return code.
421 * @param pIn The stream to parse.
422 * @param pfnCallback The callback.
423 * @param pvUser The user parameter for the callback.
424 */
425static int enumerateCStyleComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
426{
427 AssertCompile('\'' < '/');
428 AssertCompile('"' < '/');
429
430 int rcRet = VINF_SUCCESS;
431 uint32_t iLine = 0;
432 SCMEOL enmEol;
433 size_t cchLine;
434 const char *pchLine;
435 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
436 {
437 size_t off = 0;
438 while (off < cchLine)
439 {
440 unsigned ch = pchLine[off++];
441 if (ch > (unsigned)'/')
442 { /* not interesting */ }
443 else if (ch == '/')
444 {
445 if (off < cchLine)
446 {
447 ch = pchLine[off++];
448 if (ch == '*')
449 {
450 /*
451 * Multiline comment. Find the end.
452 *
453 * Note! This is very similar to the python doc string handling further down.
454 */
455 SCMCOMMENTINFO Info;
456 Info.iLineStart = iLine;
457 Info.offStart = (uint32_t)off - 2;
458 Info.iLineEnd = UINT32_MAX;
459 Info.offEnd = UINT32_MAX;
460 Info.cBlankLinesBefore = 0;
461
462 /* Determine comment type (same as for line-comments). */
463 Info.enmType = kScmCommentType_MultiLine;
464 if ( off < cchLine
465 && ( (ch = pchLine[off]) == '*'
466 || ch == '!') )
467 {
468 unsigned ch2;
469 if ( off + 1 == cchLine
470 || RT_C_IS_SPACE(ch2 = pchLine[off + 1]) )
471 {
472 Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc : kScmCommentType_MultiLine_Qt;
473 off += 1;
474 }
475 else if ( ch2 == '<'
476 && ( off + 2 == cchLine
477 || RT_C_IS_SPACE(pchLine[off + 2]) ))
478 {
479 Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc_After
480 : kScmCommentType_MultiLine_Qt_After;
481 off += 2;
482 }
483 }
484
485 /*
486 * Copy the body and find the end of the multiline comment.
487 */
488 size_t cbBodyAlloc = 0;
489 size_t cchBody = 0;
490 char *pszBody = NULL;
491 for (;;)
492 {
493 /* Parse the line up to the end-of-comment or end-of-line. */
494 size_t offLineStart = off;
495 size_t offLastNonBlank = off;
496 size_t offFirstNonBlank = ~(size_t)0;
497 while (off < cchLine)
498 {
499 ch = pchLine[off++];
500 if (ch != '*' || off >= cchLine || pchLine[off] != '/')
501 {
502 if (RT_C_IS_BLANK(ch))
503 {/* kind of likely */}
504 else
505 {
506 offLastNonBlank = off - 1;
507 if (offFirstNonBlank != ~(size_t)0)
508 {/* likely */}
509 else if ( ch != '*' /* ignore continuation-asterisks */
510 || off > Info.offStart + 1 + 1
511 || off > cchLine
512 || ( off < cchLine
513 && !RT_C_IS_SPACE(pchLine[off]))
514 || pszBody == NULL)
515 offFirstNonBlank = off - 1;
516 }
517 }
518 else
519 {
520 Info.offEnd = (uint32_t)++off;
521 Info.iLineEnd = iLine;
522 break;
523 }
524 }
525
526 /* Append line content to the comment body string. */
527 size_t cchAppend;
528 if (offFirstNonBlank == ~(size_t)0)
529 cchAppend = 0; /* empty line */
530 else
531 {
532 if (pszBody)
533 offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank);
534 else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */
535 offLineStart++;
536 cchAppend = offLastNonBlank + 1 - offLineStart;
537 Assert(cchAppend <= cchLine);
538 }
539
540 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
541 if (cchNewBody >= cbBodyAlloc)
542 {
543 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
544 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
545 if (pvNew)
546 pszBody = (char *)pvNew;
547 else
548 {
549 RTMemFree(pszBody);
550 return VERR_NO_MEMORY;
551 }
552 }
553
554 if (cchBody > 0) /* no leading blank lines */
555 pszBody[cchBody++] = '\n';
556 else if (cchAppend == 0)
557 Info.cBlankLinesBefore++;
558 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
559 cchBody += cchAppend;
560 pszBody[cchBody] = '\0';
561
562 /* Advance to the next line, if we haven't yet seen the end of this comment. */
563 if (Info.iLineEnd != UINT32_MAX)
564 break;
565 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
566 if (!pchLine)
567 {
568 Info.offEnd = (uint32_t)cchLine;
569 Info.iLineEnd = iLine;
570 break;
571 }
572 iLine++;
573 off = 0;
574 }
575
576 /* Strip trailing empty lines in the body. */
577 Info.cBlankLinesAfter = 0;
578 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
579 {
580 Info.cBlankLinesAfter++;
581 pszBody[--cchBody] = '\0';
582 }
583
584 /* Do the callback. */
585 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
586 RTMemFree(pszBody);
587 if (RT_FAILURE(rc))
588 return rc;
589 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
590 rcRet = rc;
591 }
592 else if (ch == '/')
593 {
594 /*
595 * Line comment. Join the other line comment guys.
596 */
597 off -= 2;
598 int rc = handleLineComment(pIn, isCppLineComment, pfnCallback, pvUser,
599 &pchLine, &cchLine, &enmEol, &iLine, &off);
600 if (RT_FAILURE(rc))
601 return rc;
602 if (rcRet == VINF_SUCCESS)
603 rcRet = rc;
604 }
605
606 if (!pchLine)
607 break;
608 }
609 }
610 else if (ch == '"')
611 {
612 /*
613 * String literal may include sequences that looks like comments. So,
614 * they needs special handling to avoid confusion.
615 */
616 pchLine = handleStringLiteral(pIn, '"', pchLine, &cchLine, &enmEol, &iLine, &off);
617 }
618 /* else: We don't have to deal with character literal as these shouldn't
619 include comment-like sequences. */
620 } /* for each character in the line */
621
622 iLine++;
623 } /* for each line in the stream */
624
625 int rcStream = ScmStreamGetStatus(pIn);
626 if (RT_SUCCESS(rcStream))
627 return rcRet;
628 return rcStream;
629}
630
631
632/**
633 * Deals with comments in Python code.
634 *
635 * @returns VBox status code / callback return code.
636 * @param pIn The stream to parse.
637 * @param pfnCallback The callback.
638 * @param pvUser The user parameter for the callback.
639 */
640static int enumeratePythonComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
641{
642 AssertCompile('#' < '\'');
643 AssertCompile('"' < '\'');
644
645 int rcRet = VINF_SUCCESS;
646 uint32_t iLine = 0;
647 SCMEOL enmEol;
648 size_t cchLine;
649 const char *pchLine;
650 SCMCOMMENTINFO Info;
651 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
652 {
653 size_t off = 0;
654 while (off < cchLine)
655 {
656 char ch = pchLine[off++];
657 if ((unsigned char)ch > (unsigned char)'\'')
658 { /* not interesting */ }
659 else if (ch == '#')
660 {
661 /*
662 * Line comment. Join paths with the others.
663 */
664 off -= 1;
665 int rc = handleLineComment(pIn, isHashComment, pfnCallback, pvUser,
666 &pchLine, &cchLine, &enmEol, &iLine, &off);
667 if (RT_FAILURE(rc))
668 return rc;
669 if (rcRet == VINF_SUCCESS)
670 rcRet = rc;
671
672 if (!pchLine)
673 break;
674 }
675 else if (ch == '"' || ch == '\'')
676 {
677 /*
678 * String literal may be doc strings and they may legally include hashes.
679 */
680 const char chType = ch;
681 if ( off + 1 >= cchLine
682 || pchLine[off] != chType
683 || pchLine[off + 1] != chType)
684 pchLine = handleStringLiteral(pIn, chType, pchLine, &cchLine, &enmEol, &iLine, &off);
685 else
686 {
687 /*
688 * Doc string (/ long string).
689 *
690 * Note! This is very similar to the multiline C comment handling above.
691 */
692 Info.iLineStart = iLine;
693 Info.offStart = (uint32_t)off - 1;
694 Info.iLineEnd = UINT32_MAX;
695 Info.offEnd = UINT32_MAX;
696 Info.cBlankLinesBefore = 0;
697 Info.enmType = kScmCommentType_DocString;
698
699 off += 2;
700
701 /* Copy the body and find the end of the doc string comment. */
702 size_t cbBodyAlloc = 0;
703 size_t cchBody = 0;
704 char *pszBody = NULL;
705 for (;;)
706 {
707 /* Parse the line up to the end-of-comment or end-of-line. */
708 size_t offLineStart = off;
709 size_t offLastNonBlank = off;
710 size_t offFirstNonBlank = ~(size_t)0;
711 bool fEscaped = false;
712 while (off < cchLine)
713 {
714 ch = pchLine[off++];
715 if (!fEscaped)
716 {
717 if ( off + 1 >= cchLine
718 || ch != chType
719 || pchLine[off] != chType
720 || pchLine[off + 1] != chType)
721 {
722 if (RT_C_IS_BLANK(ch))
723 {/* kind of likely */}
724 else
725 {
726 offLastNonBlank = off - 1;
727 if (offFirstNonBlank != ~(size_t)0)
728 {/* likely */}
729 else if ( ch != '*' /* ignore continuation-asterisks */
730 || off > Info.offStart + 1 + 1
731 || off > cchLine
732 || ( off < cchLine
733 && !RT_C_IS_SPACE(pchLine[off]))
734 || pszBody == NULL)
735 offFirstNonBlank = off - 1;
736
737 if (ch != '\\')
738 {/* likely */ }
739 else
740 fEscaped = true;
741 }
742 }
743 else
744 {
745 off += 2;
746 Info.offEnd = (uint32_t)off;
747 Info.iLineEnd = iLine;
748 break;
749 }
750 }
751 else
752 fEscaped = false;
753 }
754
755 /* Append line content to the comment body string. */
756 size_t cchAppend;
757 if (offFirstNonBlank == ~(size_t)0)
758 cchAppend = 0; /* empty line */
759 else
760 {
761 if (pszBody)
762 offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank);
763 else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */
764 offLineStart++;
765 cchAppend = offLastNonBlank + 1 - offLineStart;
766 Assert(cchAppend <= cchLine);
767 }
768
769 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
770 if (cchNewBody >= cbBodyAlloc)
771 {
772 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
773 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
774 if (pvNew)
775 pszBody = (char *)pvNew;
776 else
777 {
778 RTMemFree(pszBody);
779 return VERR_NO_MEMORY;
780 }
781 }
782
783 if (cchBody > 0) /* no leading blank lines */
784 pszBody[cchBody++] = '\n';
785 else if (cchAppend == 0)
786 Info.cBlankLinesBefore++;
787 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
788 cchBody += cchAppend;
789 pszBody[cchBody] = '\0';
790
791 /* Advance to the next line, if we haven't yet seen the end of this comment. */
792 if (Info.iLineEnd != UINT32_MAX)
793 break;
794 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
795 if (!pchLine)
796 {
797 Info.offEnd = (uint32_t)cchLine;
798 Info.iLineEnd = iLine;
799 break;
800 }
801 iLine++;
802 off = 0;
803 }
804
805 /* Strip trailing empty lines in the body. */
806 Info.cBlankLinesAfter = 0;
807 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
808 {
809 Info.cBlankLinesAfter++;
810 pszBody[--cchBody] = '\0';
811 }
812
813 /* Do the callback. */
814 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
815 RTMemFree(pszBody);
816 if (RT_FAILURE(rc))
817 return rc;
818 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
819 rcRet = rc;
820 }
821
822 if (!pchLine)
823 break;
824 }
825 /* else: We don't have to deal with character literal as these shouldn't
826 include comment-like sequences. */
827 } /* for each character in the line */
828
829 iLine++;
830 } /* for each line in the stream */
831
832 int rcStream = ScmStreamGetStatus(pIn);
833 if (RT_SUCCESS(rcStream))
834 return rcRet;
835 return rcStream;
836}
837
838
839/**
840 * Deals with XML comments.
841 *
842 * @returns VBox status code / callback return code.
843 * @param pIn The stream to parse.
844 * @param pfnCallback The callback.
845 * @param pvUser The user parameter for the callback.
846 */
847static int enumerateXmlComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
848{
849 int rcRet = VINF_SUCCESS;
850 uint32_t iLine = 0;
851 SCMEOL enmEol;
852 size_t cchLine;
853 const char *pchLine;
854 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
855 {
856 size_t off = 0;
857 while (off < cchLine)
858 {
859 /*
860 * Skip leading blanks and check for start of XML comment.
861 */
862 while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off]))
863 off++;
864 if (IS_XML_COMMENT_START(pchLine, off, cchLine))
865 {
866 /*
867 * XML comment. Find the end.
868 *
869 * Note! This is very similar to the python doc string handling above.
870 */
871 SCMCOMMENTINFO Info;
872 Info.iLineStart = iLine;
873 Info.offStart = (uint32_t)off;
874 Info.iLineEnd = UINT32_MAX;
875 Info.offEnd = UINT32_MAX;
876 Info.cBlankLinesBefore = 0;
877 Info.enmType = kScmCommentType_Xml;
878
879 off += 4;
880
881 /*
882 * Copy the body and find the end of the XML comment.
883 */
884 size_t cbBodyAlloc = 0;
885 size_t cchBody = 0;
886 char *pszBody = NULL;
887 for (;;)
888 {
889 /* Parse the line up to the end-of-comment or end-of-line. */
890 size_t offLineStart = off;
891 size_t offLastNonBlank = off;
892 size_t offFirstNonBlank = ~(size_t)0;
893 while (off < cchLine)
894 {
895 if (!IS_XML_COMMENT_END(pchLine, off, cchLine))
896 {
897 char ch = pchLine[off++];
898 if (RT_C_IS_BLANK(ch))
899 {/* kind of likely */}
900 else
901 {
902 offLastNonBlank = off - 1;
903 if (offFirstNonBlank != ~(size_t)0)
904 {/* likely */}
905 else if ( (ch != '*' && ch != '#') /* ignore continuation-asterisks */
906 || off > Info.offStart + 1 + 1
907 || off > cchLine
908 || ( off < cchLine
909 && !RT_C_IS_SPACE(pchLine[off]))
910 || pszBody == NULL)
911 offFirstNonBlank = off - 1;
912 }
913 }
914 else
915 {
916 off += 3;
917 Info.offEnd = (uint32_t)off;
918 Info.iLineEnd = iLine;
919 break;
920 }
921 }
922
923 /* Append line content to the comment body string. */
924 size_t cchAppend;
925 if (offFirstNonBlank == ~(size_t)0)
926 cchAppend = 0; /* empty line */
927 else
928 {
929 offLineStart = offFirstNonBlank;
930 cchAppend = offLastNonBlank + 1 - offLineStart;
931 Assert(cchAppend <= cchLine);
932 }
933
934 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
935 if (cchNewBody >= cbBodyAlloc)
936 {
937 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
938 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
939 if (pvNew)
940 pszBody = (char *)pvNew;
941 else
942 {
943 RTMemFree(pszBody);
944 return VERR_NO_MEMORY;
945 }
946 }
947
948 if (cchBody > 0) /* no leading blank lines */
949 pszBody[cchBody++] = '\n';
950 else if (cchAppend == 0)
951 Info.cBlankLinesBefore++;
952 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
953 cchBody += cchAppend;
954 pszBody[cchBody] = '\0';
955
956 /* Advance to the next line, if we haven't yet seen the end of this comment. */
957 if (Info.iLineEnd != UINT32_MAX)
958 break;
959 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
960 if (!pchLine)
961 {
962 Info.offEnd = (uint32_t)cchLine;
963 Info.iLineEnd = iLine;
964 break;
965 }
966 iLine++;
967 off = 0;
968 }
969
970 /* Strip trailing empty lines in the body. */
971 Info.cBlankLinesAfter = 0;
972 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
973 {
974 Info.cBlankLinesAfter++;
975 pszBody[--cchBody] = '\0';
976 }
977
978 /* Do the callback. */
979 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
980 RTMemFree(pszBody);
981 if (RT_FAILURE(rc))
982 return rc;
983 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
984 rcRet = rc;
985 }
986 else
987 off++;
988 } /* for each character in the line */
989
990 iLine++;
991 } /* for each line in the stream */
992
993 int rcStream = ScmStreamGetStatus(pIn);
994 if (RT_SUCCESS(rcStream))
995 return rcRet;
996 return rcStream;
997}
998
999
1000/**
1001 * Deals with comments in DOS batch files.
1002 *
1003 * @returns VBox status code / callback return code.
1004 * @param pIn The stream to parse.
1005 * @param pfnCallback The callback.
1006 * @param pvUser The user parameter for the callback.
1007 */
1008static int enumerateBatchComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
1009{
1010 int rcRet = VINF_SUCCESS;
1011 uint32_t iLine = 0;
1012 SCMEOL enmEol;
1013 size_t cchLine;
1014 const char *pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
1015 while (pchLine != NULL)
1016 {
1017 /*
1018 * Skip leading blanks and check for 'rem'.
1019 * At the moment we do not parse '::label-comments'.
1020 */
1021 size_t off = 0;
1022 while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off]))
1023 off++;
1024 if (!IS_REM(pchLine, off, cchLine))
1025 {
1026 iLine++;
1027 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
1028 }
1029 else
1030 {
1031 int rc = handleLineComment(pIn, isBatchComment, pfnCallback, pvUser,
1032 &pchLine, &cchLine, &enmEol, &iLine, &off);
1033 if (RT_FAILURE(rc))
1034 return rc;
1035 if (rcRet == VINF_SUCCESS)
1036 rcRet = rc;
1037 }
1038 }
1039
1040 int rcStream = ScmStreamGetStatus(pIn);
1041 if (RT_SUCCESS(rcStream))
1042 return rcRet;
1043 return rcStream;
1044}
1045
1046
1047/**
1048 * Deals with comments in SQL files.
1049 *
1050 * @returns VBox status code / callback return code.
1051 * @param pIn The stream to parse.
1052 * @param pfnCallback The callback.
1053 * @param pvUser The user parameter for the callback.
1054 */
1055static int enumerateSqlComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
1056{
1057 int rcRet = VINF_SUCCESS;
1058 uint32_t iLine = 0;
1059 SCMEOL enmEol;
1060 size_t cchLine;
1061 const char *pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
1062 while (pchLine != NULL)
1063 {
1064 /*
1065 * Skip leading blanks and check for '--'.
1066 */
1067 size_t off = 0;
1068 while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off]))
1069 off++;
1070 if ( cchLine < 2
1071 || pchLine[0] != '-'
1072 || pchLine[1] != '-')
1073 {
1074 iLine++;
1075 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
1076 }
1077 else
1078 {
1079 int rc = handleLineComment(pIn, isSqlComment, pfnCallback, pvUser,
1080 &pchLine, &cchLine, &enmEol, &iLine, &off);
1081 if (RT_FAILURE(rc))
1082 return rc;
1083 if (rcRet == VINF_SUCCESS)
1084 rcRet = rc;
1085 }
1086 }
1087
1088 int rcStream = ScmStreamGetStatus(pIn);
1089 if (RT_SUCCESS(rcStream))
1090 return rcRet;
1091 return rcStream;
1092}
1093
1094
1095/**
1096 * Deals with simple line comments.
1097 *
1098 * @returns VBox status code / callback return code.
1099 * @param pIn The stream to parse.
1100 * @param chStart The start of comment character.
1101 * @param pfnIsComment Comment tester function.
1102 * @param pfnCallback The callback.
1103 * @param pvUser The user parameter for the callback.
1104 */
1105static int enumerateSimpleLineComments(PSCMSTREAM pIn, char chStart, PFNISCOMMENT pfnIsComment,
1106 PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
1107{
1108 int rcRet = VINF_SUCCESS;
1109 uint32_t iLine = 0;
1110 SCMEOL enmEol;
1111 size_t cchLine;
1112 const char *pchLine;
1113 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
1114 {
1115 size_t off = 0;
1116 while (off < cchLine)
1117 {
1118 char ch = pchLine[off++];
1119 if (ch != chStart)
1120 { /* not interesting */ }
1121 else
1122 {
1123 off -= 1;
1124 int rc = handleLineComment(pIn, pfnIsComment, pfnCallback, pvUser,
1125 &pchLine, &cchLine, &enmEol, &iLine, &off);
1126 if (RT_FAILURE(rc))
1127 return rc;
1128 if (rcRet == VINF_SUCCESS)
1129 rcRet = rc;
1130
1131 if (!pchLine)
1132 break;
1133 }
1134 } /* for each character in the line */
1135
1136 iLine++;
1137 } /* for each line in the stream */
1138
1139 int rcStream = ScmStreamGetStatus(pIn);
1140 if (RT_SUCCESS(rcStream))
1141 return rcRet;
1142 return rcStream;
1143}
1144
1145
1146/**
1147 * Enumerates the comments in the given stream, calling @a pfnCallback for each.
1148 *
1149 * @returns IPRT status code.
1150 * @param pIn The stream to parse.
1151 * @param enmCommentStyle The comment style of the source stream.
1152 * @param pfnCallback The function to call.
1153 * @param pvUser User argument to the callback.
1154 */
1155int ScmEnumerateComments(PSCMSTREAM pIn, SCMCOMMENTSTYLE enmCommentStyle, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
1156{
1157 switch (enmCommentStyle)
1158 {
1159 case kScmCommentStyle_C:
1160 return enumerateCStyleComments(pIn, pfnCallback, pvUser);
1161
1162 case kScmCommentStyle_Python:
1163 return enumeratePythonComments(pIn, pfnCallback, pvUser);
1164
1165 case kScmCommentStyle_Semicolon:
1166 return enumerateSimpleLineComments(pIn, ';', isSemicolonComment, pfnCallback, pvUser);
1167
1168 case kScmCommentStyle_Hash:
1169 return enumerateSimpleLineComments(pIn, '#', isHashComment, pfnCallback, pvUser);
1170
1171 case kScmCommentStyle_Rem_Upper:
1172 case kScmCommentStyle_Rem_Lower:
1173 case kScmCommentStyle_Rem_Camel:
1174 return enumerateBatchComments(pIn, pfnCallback, pvUser);
1175
1176 case kScmCommentStyle_Sql:
1177 return enumerateSqlComments(pIn, pfnCallback, pvUser);
1178
1179 case kScmCommentStyle_Tick:
1180 return enumerateSimpleLineComments(pIn, '\'', isTickComment, pfnCallback, pvUser);
1181
1182 case kScmCommentStyle_Xml:
1183 return enumerateXmlComments(pIn, pfnCallback, pvUser);
1184
1185 default:
1186 AssertFailedReturn(VERR_INVALID_PARAMETER);
1187 }
1188}
1189
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette