VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/getoptargv.cpp@ 66861

最後變更 在這個檔案從66861是 66861,由 vboxsync 提交於 8 年 前

RTGetOptArgvFromString: Fixed incorrect handling of escape sequences inside double quotes in bourne shell mode. Only double quote, backslash, backtick, dollar and newline are valid characters to escape.

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 19.2 KB
 
1/* $Id: getoptargv.cpp 66861 2017-05-10 12:56:16Z vboxsync $ */
2/** @file
3 * IPRT - Command Line Parsing, Argument Vector.
4 */
5
6/*
7 * Copyright (C) 2010-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.alldomusa.eu.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include <iprt/getopt.h>
32#include "internal/iprt.h"
33
34#include <iprt/asm.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/mem.h>
38#include <iprt/string.h>
39
40
41/*********************************************************************************************************************************
42* Header Files *
43*********************************************************************************************************************************/
44/**
45 * Array indexed by the quoting type and 7-bit ASCII character.
46 *
47 * We include some extra stuff here that the corresponding shell would normally
48 * require quoting of.
49 */
50static uint8_t
51#ifndef IPRT_REGENERATE_QUOTE_CHARS
52const
53#endif
54g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_MASK + 1][16] =
55{
56 { 0xfe, 0xff, 0xff, 0xff, 0x65, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 },
57 { 0xfe, 0xff, 0xff, 0xff, 0xd7, 0x07, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x18, 0x01, 0x00, 0x00, 0x50 },
58};
59
60
61#ifdef IPRT_REGENERATE_QUOTE_CHARS /* To re-generate the bitmaps. */
62# include <stdio.h>
63int main()
64{
65 RT_ZERO(g_abmQuoteChars);
66
67# define SET_ALL(ch) \
68 do { \
69 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++) \
70 ASMBitSet(&g_abmQuoteChars[iType], (ch)); \
71 } while (0)
72# define SET(ConstSuffix, ch) \
73 do { \
74 ASMBitSet(&g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_##ConstSuffix], (ch)); \
75 printf(#ConstSuffix ": %#x %d %c\n", (ch), (ch), (ch)); \
76 } while (0)
77
78 /* just flag all the control chars as in need of quoting. */
79 for (char ch = 1; ch < 0x20; ch++)
80 SET_ALL(ch);
81
82 /* ... and space of course */
83 SET_ALL(' ');
84
85 /* MS CRT / CMD.EXE: */
86 SET(MS_CRT, '"');
87 SET(MS_CRT, '&');
88 SET(MS_CRT, '>');
89 SET(MS_CRT, '<');
90 SET(MS_CRT, '|');
91 SET(MS_CRT, '%');
92
93 /* Bourne shell: */
94 SET(BOURNE_SH, '!');
95 SET(BOURNE_SH, '"');
96 SET(BOURNE_SH, '$');
97 SET(BOURNE_SH, '&');
98 SET(BOURNE_SH, '(');
99 SET(BOURNE_SH, ')');
100 SET(BOURNE_SH, '*');
101 SET(BOURNE_SH, ';');
102 SET(BOURNE_SH, '<');
103 SET(BOURNE_SH, '>');
104 SET(BOURNE_SH, '?');
105 SET(BOURNE_SH, '[');
106 SET(BOURNE_SH, '\'');
107 SET(BOURNE_SH, '\\');
108 SET(BOURNE_SH, '`');
109 SET(BOURNE_SH, '|');
110 SET(BOURNE_SH, '~');
111
112 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++)
113 {
114 printf(" {");
115 for (size_t iByte = 0; iByte < 16; iByte++)
116 printf(iByte == 0 ? " 0x%02x" : ", 0x%02x", g_abmQuoteChars[iType][iByte]);
117 printf(" },\n");
118 }
119 return 0;
120}
121
122#else /* !IPRT_REGENERATE_QUOTE_CHARS */
123
124/**
125 * Look for an unicode code point in the separator string.
126 *
127 * @returns true if it's a separator, false if it isn't.
128 * @param Cp The code point.
129 * @param pszSeparators The separators.
130 */
131static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
132{
133 /* This could be done in a more optimal fashion. Probably worth a
134 separate RTStr function at some point. */
135 for (;;)
136 {
137 RTUNICP CpSep;
138 int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
139 AssertRCReturn(rc, false);
140 if (CpSep == Cp)
141 return true;
142 if (!CpSep)
143 return false;
144 }
145}
146
147
148/**
149 * Look for an 7-bit ASCII character in the separator string.
150 *
151 * @returns true if it's a separator, false if it isn't.
152 * @param ch The character.
153 * @param pszSeparators The separators.
154 * @param cchSeparators The number of separators chars.
155 */
156DECLINLINE(bool) rtGetOptIsAsciiInSet(char ch, const char *pszSeparators, size_t cchSeparators)
157{
158 switch (cchSeparators)
159 {
160 case 8: if (ch == pszSeparators[7]) return true; /* fall thru */
161 case 7: if (ch == pszSeparators[6]) return true; /* fall thru */
162 case 6: if (ch == pszSeparators[5]) return true; /* fall thru */
163 case 5: if (ch == pszSeparators[4]) return true; /* fall thru */
164 case 4: if (ch == pszSeparators[3]) return true; /* fall thru */
165 case 3: if (ch == pszSeparators[2]) return true; /* fall thru */
166 case 2: if (ch == pszSeparators[1]) return true; /* fall thru */
167 case 1: if (ch == pszSeparators[0]) return true;
168 return false;
169 default:
170 return memchr(pszSeparators, ch, cchSeparators) != NULL;
171 }
172}
173
174
175/**
176 * Checks if the character is in the set of separators
177 *
178 * @returns true if it is, false if it isn't.
179 *
180 * @param Cp The code point.
181 * @param pszSeparators The separators.
182 * @param cchSeparators The length of @a pszSeparators.
183 */
184DECL_FORCE_INLINE(bool) rtGetOptIsCpInSet(RTUNICP Cp, const char *pszSeparators, size_t cchSeparators)
185{
186 if (RT_LIKELY(Cp <= 127))
187 return rtGetOptIsAsciiInSet((char)Cp, pszSeparators, cchSeparators);
188 return rtGetOptIsUniCpInString(Cp, pszSeparators);
189}
190
191
192/**
193 * Skips any delimiters at the start of the string that is pointed to.
194 *
195 * @returns VINF_SUCCESS or RTStrGetCpEx status code.
196 * @param ppszSrc Where to get and return the string pointer.
197 * @param pszSeparators The separators.
198 * @param cchSeparators The length of @a pszSeparators.
199 */
200static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
201{
202 const char *pszSrc = *ppszSrc;
203 const char *pszRet;
204 for (;;)
205 {
206 pszRet = pszSrc;
207 RTUNICP Cp;
208 int rc = RTStrGetCpEx(&pszSrc, &Cp);
209 if (RT_FAILURE(rc))
210 {
211 *ppszSrc = pszRet;
212 return rc;
213 }
214 if ( !Cp
215 || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
216 break;
217 }
218
219 *ppszSrc = pszRet;
220 return VINF_SUCCESS;
221}
222
223
224RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine,
225 uint32_t fFlags, const char *pszSeparators)
226{
227 /*
228 * Some input validation.
229 */
230 AssertPtr(pszCmdLine);
231 AssertPtr(pcArgs);
232 AssertPtr(ppapszArgv);
233 AssertReturn( fFlags == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH
234 || fFlags == RTGETOPTARGV_CNV_QUOTE_MS_CRT, VERR_INVALID_FLAGS);
235 if (!pszSeparators)
236 pszSeparators = " \t\n\r";
237 else
238 AssertPtr(pszSeparators);
239 size_t const cchSeparators = strlen(pszSeparators);
240 AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);
241
242 /*
243 * Parse the command line and chop off it into argv individual argv strings.
244 */
245 int rc = VINF_SUCCESS;
246 const char *pszSrc = pszCmdLine;
247 char *pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1);
248 char *pszDst = pszDup;
249 if (!pszDup)
250 return VERR_NO_STR_MEMORY;
251 char **papszArgs = NULL;
252 unsigned iArg = 0;
253 while (*pszSrc)
254 {
255 /* Skip stuff */
256 rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
257 if (RT_FAILURE(rc))
258 break;
259 if (!*pszSrc)
260 break;
261
262 /* Start a new entry. */
263 if ((iArg % 32) == 0)
264 {
265 void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
266 if (!pvNew)
267 {
268 rc = VERR_NO_MEMORY;
269 break;
270 }
271 papszArgs = (char **)pvNew;
272 }
273 papszArgs[iArg++] = pszDst;
274
275 /*
276 * Parse and copy the string over.
277 */
278 RTUNICP uc;
279 if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH)
280 {
281 /*
282 * Bourne shell style.
283 */
284 RTUNICP ucQuote = 0;
285 for (;;)
286 {
287 rc = RTStrGetCpEx(&pszSrc, &uc);
288 if (RT_FAILURE(rc) || !uc)
289 break;
290 if (!ucQuote)
291 {
292 if (uc == '"' || uc == '\'')
293 ucQuote = uc;
294 else if (rtGetOptIsCpInSet(uc, pszSeparators, cchSeparators))
295 break;
296 else if (uc != '\\')
297 pszDst = RTStrPutCp(pszDst, uc);
298 else
299 {
300 /* escaped char */
301 rc = RTStrGetCpEx(&pszSrc, &uc);
302 if (RT_FAILURE(rc) || !uc)
303 break;
304 pszDst = RTStrPutCp(pszDst, uc);
305 }
306 }
307 else if (ucQuote != uc)
308 {
309 if (uc != '\\' || ucQuote == '\'')
310 pszDst = RTStrPutCp(pszDst, uc);
311 else
312 {
313 /* escaped char */
314 rc = RTStrGetCpEx(&pszSrc, &uc);
315 if (RT_FAILURE(rc) || !uc)
316 break;
317 if ( uc != '"'
318 && uc != '\\'
319 && uc != '`'
320 && uc != '$'
321 && uc != '\n')
322 pszDst = RTStrPutCp(pszDst, ucQuote);
323 pszDst = RTStrPutCp(pszDst, uc);
324 }
325 }
326 else
327 ucQuote = 0;
328 }
329 }
330 else
331 {
332 /*
333 * Microsoft CRT style.
334 */
335 Assert((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT);
336 bool fInQuote = false;
337 for (;;)
338 {
339 rc = RTStrGetCpEx(&pszSrc, &uc);
340 if (RT_FAILURE(rc) || !uc)
341 break;
342 if (uc == '"')
343 {
344 /* Two double quotes insides a quoted string in an escape
345 sequence and we output one double quote char.
346 See http://www.daviddeley.com/autohotkey/parameters/parameters.htm */
347 if (!fInQuote)
348 fInQuote = true;
349 else if (*pszSrc != '"')
350 fInQuote = false;
351 else
352 {
353 pszDst = RTStrPutCp(pszDst, '"');
354 pszSrc++;
355 }
356 }
357 else if (!fInQuote && rtGetOptIsCpInSet(uc, pszSeparators, cchSeparators))
358 break;
359 else if (uc != '\\')
360 pszDst = RTStrPutCp(pszDst, uc);
361 else
362 {
363 /* A backslash sequence is only relevant if followed by
364 a double quote, then it will work like an escape char. */
365 size_t cSlashes = 1;
366 while (*pszSrc == '\\')
367 {
368 cSlashes++;
369 pszSrc++;
370 }
371 if (*pszSrc != '"')
372 /* Not an escape sequence. */
373 while (cSlashes-- > 0)
374 pszDst = RTStrPutCp(pszDst, '\\');
375 else
376 {
377 /* Escape sequence. Output half of the slashes. If odd
378 number, output the escaped double quote . */
379 while (cSlashes >= 2)
380 {
381 pszDst = RTStrPutCp(pszDst, '\\');
382 cSlashes -= 2;
383 }
384 if (cSlashes)
385 {
386 pszDst = RTStrPutCp(pszDst, '"');
387 pszSrc++;
388 }
389 }
390 }
391 }
392 }
393
394 *pszDst++ = '\0';
395 if (RT_FAILURE(rc) || !uc)
396 break;
397 }
398
399 if (RT_FAILURE(rc))
400 {
401 RTMemFree(pszDup);
402 RTMemFree(papszArgs);
403 return rc;
404 }
405
406 /*
407 * Terminate the array.
408 * Check for empty string to make sure we've got an array.
409 */
410 if (iArg == 0)
411 {
412 RTMemFree(pszDup);
413 papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
414 if (!papszArgs)
415 return VERR_NO_MEMORY;
416 }
417 papszArgs[iArg] = NULL;
418
419 *pcArgs = iArg;
420 *ppapszArgv = papszArgs;
421 return VINF_SUCCESS;
422}
423
424
425RTDECL(void) RTGetOptArgvFree(char **papszArgv)
426{
427 if (papszArgv)
428 {
429 /*
430 * We've really only _two_ allocations here. Check the code in
431 * RTGetOptArgvFromString for the particulars.
432 */
433 RTMemFree(papszArgv[0]);
434 RTMemFree(papszArgv);
435 }
436}
437
438
439/**
440 * Checks if the argument needs quoting or not.
441 *
442 * @returns true if it needs, false if it don't.
443 * @param pszArg The argument.
444 * @param fFlags Quoting style.
445 * @param pcch Where to store the argument length when quoting
446 * is not required. (optimization)
447 */
448DECLINLINE(bool) rtGetOpArgvRequiresQuoting(const char *pszArg, uint32_t fFlags, size_t *pcch)
449{
450 if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) != RTGETOPTARGV_CNV_UNQUOTED)
451 {
452 char const *psz = pszArg;
453 unsigned char ch;
454 while ((ch = (unsigned char)*psz))
455 {
456 if ( ch < 128
457 && ASMBitTest(&g_abmQuoteChars[fFlags & RTGETOPTARGV_CNV_QUOTE_MASK], ch))
458 return true;
459 psz++;
460 }
461
462 *pcch = psz - pszArg;
463 }
464 else
465 *pcch = strlen(pszArg);
466 return false;
467}
468
469
470/**
471 * Grows the command line string buffer.
472 *
473 * @returns VINF_SUCCESS or VERR_NO_STR_MEMORY.
474 * @param ppszCmdLine Pointer to the command line string pointer.
475 * @param pcbCmdLineAlloc Pointer to the allocation length variable.
476 * @param cchMin The minimum size to grow with, kind of.
477 */
478static int rtGetOptArgvToStringGrow(char **ppszCmdLine, size_t *pcbCmdLineAlloc, size_t cchMin)
479{
480 size_t cb = *pcbCmdLineAlloc;
481 while (cb < cchMin)
482 cb *= 2;
483 cb *= 2;
484 *pcbCmdLineAlloc = cb;
485 return RTStrRealloc(ppszCmdLine, cb);
486}
487
488/**
489 * Checks if we have a sequence of DOS slashes followed by a double quote char.
490 *
491 * @returns true / false accordingly.
492 * @param psz The string.
493 */
494DECLINLINE(bool) rtGetOptArgvMsCrtIsSlashQuote(const char *psz)
495{
496 while (*psz == '\\')
497 psz++;
498 return *psz == '"' || *psz == '\0';
499}
500
501
502RTDECL(int) RTGetOptArgvToString(char **ppszCmdLine, const char * const *papszArgv, uint32_t fFlags)
503{
504 AssertReturn(fFlags <= RTGETOPTARGV_CNV_UNQUOTED, VERR_INVALID_PARAMETER);
505
506#define PUT_CH(ch) \
507 if (RT_UNLIKELY(off + 1 >= cbCmdLineAlloc)) { \
508 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, 1); \
509 if (RT_FAILURE(rc)) \
510 break; \
511 } \
512 pszCmdLine[off++] = (ch)
513
514#define PUT_PSZ(psz, cch) \
515 if (RT_UNLIKELY(off + (cch) >= cbCmdLineAlloc)) { \
516 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, (cch)); \
517 if (RT_FAILURE(rc)) \
518 break; \
519 } \
520 memcpy(&pszCmdLine[off], (psz), (cch)); \
521 off += (cch);
522#define PUT_SZ(sz) PUT_PSZ(sz, sizeof(sz) - 1)
523
524 /*
525 * Take the realloc approach, it requires less code and is probably more
526 * efficient than figuring out the size first.
527 */
528 int rc = VINF_SUCCESS;
529 size_t off = 0;
530 size_t cbCmdLineAlloc = 256;
531 char *pszCmdLine = RTStrAlloc(256);
532 if (!pszCmdLine)
533 return VERR_NO_STR_MEMORY;
534
535 for (size_t i = 0; papszArgv[i]; i++)
536 {
537 if (i > 0)
538 {
539 PUT_CH(' ');
540 }
541
542 /* does it need quoting? */
543 const char *pszArg = papszArgv[i];
544 size_t cchArg;
545 if (!rtGetOpArgvRequiresQuoting(pszArg, fFlags, &cchArg))
546 {
547 /* No quoting needed, just append the argument. */
548 PUT_PSZ(pszArg, cchArg);
549 }
550 else if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT)
551 {
552 /*
553 * Microsoft CRT quoting. Quote the whole argument in double
554 * quotes to make it easier to read and code.
555 */
556 PUT_CH('"');
557 char ch;
558 while ((ch = *pszArg++))
559 {
560 if ( ch == '\\'
561 && rtGetOptArgvMsCrtIsSlashQuote(pszArg))
562 {
563 PUT_SZ("\\\\");
564 }
565 else if (ch == '"')
566 {
567 PUT_SZ("\\\"");
568 }
569 else
570 {
571 PUT_CH(ch);
572 }
573 }
574 PUT_CH('"');
575 }
576 else
577 {
578 /*
579 * Bourne Shell quoting. Quote the whole thing in single quotes
580 * and use double quotes for any single quote chars.
581 */
582 PUT_CH('\'');
583 char ch;
584 while ((ch = *pszArg++))
585 {
586 if (ch == '\'')
587 {
588 PUT_SZ("'\"'\"'");
589 }
590 else
591 {
592 PUT_CH(ch);
593 }
594 }
595 PUT_CH('\'');
596 }
597 }
598
599 /* Set return value / cleanup. */
600 if (RT_SUCCESS(rc))
601 {
602 pszCmdLine[off] = '\0';
603 *ppszCmdLine = pszCmdLine;
604 }
605 else
606 RTStrFree(pszCmdLine);
607#undef PUT_SZ
608#undef PUT_PSZ
609#undef PUT_CH
610 return rc;
611}
612
613
614RTDECL(int) RTGetOptArgvToUtf16String(PRTUTF16 *ppwszCmdLine, const char * const *papszArgv, uint32_t fFlags)
615{
616 char *pszCmdLine;
617 int rc = RTGetOptArgvToString(&pszCmdLine, papszArgv, fFlags);
618 if (RT_SUCCESS(rc))
619 {
620 rc = RTStrToUtf16(pszCmdLine, ppwszCmdLine);
621 RTStrFree(pszCmdLine);
622 }
623 return rc;
624}
625
626#endif /* !IPRT_REGENERATE_QUOTE_CHARS */
627
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette