VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/getoptargv.cpp@ 56002

Last change on this file since 56002 was 56002, checked in by vboxsync, 10 years ago

RTGetOptArgvFromString/MS_CRT: Implemented undocumented doubledouble quotes, current CRT style.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.5 KB
Line 
1/* $Id: getoptargv.cpp 56002 2015-05-21 12:19:28Z vboxsync $ */
2/** @file
3 * IPRT - Command Line Parsing, Argument Vector.
4 */
5
6/*
7 * Copyright (C) 2010-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#include <iprt/getopt.h>
31#include "internal/iprt.h"
32
33#include <iprt/asm.h>
34#include <iprt/assert.h>
35#include <iprt/err.h>
36#include <iprt/mem.h>
37#include <iprt/string.h>
38
39
40/*******************************************************************************
41* Header Files *
42*******************************************************************************/
43/**
44 * Array indexed by the quoting type and 7-bit ASCII character.
45 *
46 * We include some extra stuff here that the corresponding shell would normally
47 * require quoting of.
48 */
49static uint8_t
50#ifndef IPRT_REGENERATE_QUOTE_CHARS
51const
52#endif
53g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_MASK + 1][16] =
54{
55 { 0xfe, 0xff, 0xff, 0xff, 0x65, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 },
56 { 0xfe, 0xff, 0xff, 0xff, 0xd7, 0x07, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x18, 0x01, 0x00, 0x00, 0x50 },
57};
58
59
60#ifdef IPRT_REGENERATE_QUOTE_CHARS /* To re-generate the bitmaps. */
61# include <stdio.h>
62int main()
63{
64 RT_ZERO(g_abmQuoteChars);
65
66# define SET_ALL(ch) \
67 do { \
68 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++) \
69 ASMBitSet(&g_abmQuoteChars[iType], (ch)); \
70 } while (0)
71# define SET(ConstSuffix, ch) \
72 do { \
73 ASMBitSet(&g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_##ConstSuffix], (ch)); \
74 printf(#ConstSuffix ": %#x %d %c\n", (ch), (ch), (ch)); \
75 } while (0)
76
77 /* just flag all the control chars as in need of quoting. */
78 for (char ch = 1; ch < 0x20; ch++)
79 SET_ALL(ch);
80
81 /* ... and space of course */
82 SET_ALL(' ');
83
84 /* MS CRT / CMD.EXE: */
85 SET(MS_CRT, '"');
86 SET(MS_CRT, '&');
87 SET(MS_CRT, '>');
88 SET(MS_CRT, '<');
89 SET(MS_CRT, '|');
90 SET(MS_CRT, '%');
91
92 /* Bourne shell: */
93 SET(BOURNE_SH, '!');
94 SET(BOURNE_SH, '"');
95 SET(BOURNE_SH, '$');
96 SET(BOURNE_SH, '&');
97 SET(BOURNE_SH, '(');
98 SET(BOURNE_SH, ')');
99 SET(BOURNE_SH, '*');
100 SET(BOURNE_SH, ';');
101 SET(BOURNE_SH, '<');
102 SET(BOURNE_SH, '>');
103 SET(BOURNE_SH, '?');
104 SET(BOURNE_SH, '[');
105 SET(BOURNE_SH, '\'');
106 SET(BOURNE_SH, '\\');
107 SET(BOURNE_SH, '`');
108 SET(BOURNE_SH, '|');
109 SET(BOURNE_SH, '~');
110
111 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++)
112 {
113 printf(" {");
114 for (size_t iByte = 0; iByte < 16; iByte++)
115 printf(iByte == 0 ? " 0x%02x" : ", 0x%02x", g_abmQuoteChars[iType][iByte]);
116 printf(" },\n");
117 }
118 return 0;
119}
120
121#else /* !IPRT_REGENERATE_QUOTE_CHARS */
122
123/**
124 * Look for an unicode code point in the separator string.
125 *
126 * @returns true if it's a separator, false if it isn't.
127 * @param Cp The code point.
128 * @param pszSeparators The separators.
129 */
130static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
131{
132 /* This could be done in a more optimal fashion. Probably worth a
133 separate RTStr function at some point. */
134 for (;;)
135 {
136 RTUNICP CpSep;
137 int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
138 AssertRCReturn(rc, false);
139 if (CpSep == Cp)
140 return true;
141 if (!CpSep)
142 return false;
143 }
144}
145
146
147/**
148 * Look for an 7-bit ASCII character in the separator string.
149 *
150 * @returns true if it's a separator, false if it isn't.
151 * @param ch The character.
152 * @param pszSeparators The separators.
153 * @param cchSeparators The number of separators chars.
154 */
155DECLINLINE(bool) rtGetOptIsAsciiInSet(char ch, const char *pszSeparators, size_t cchSeparators)
156{
157 switch (cchSeparators)
158 {
159 case 8: if (ch == pszSeparators[7]) return true;
160 case 7: if (ch == pszSeparators[6]) return true;
161 case 6: if (ch == pszSeparators[5]) return true;
162 case 5: if (ch == pszSeparators[4]) return true;
163 case 4: if (ch == pszSeparators[3]) return true;
164 case 3: if (ch == pszSeparators[2]) return true;
165 case 2: if (ch == pszSeparators[1]) return true;
166 case 1: if (ch == pszSeparators[0]) return true;
167 return false;
168 default:
169 return memchr(pszSeparators, ch, cchSeparators) != NULL;
170 }
171}
172
173
174/**
175 * Checks if the character is in the set of separators
176 *
177 * @returns true if it is, false if it isn't.
178 *
179 * @param Cp The code point.
180 * @param pszSeparators The separators.
181 * @param cchSeparators The length of @a pszSeparators.
182 */
183DECL_FORCE_INLINE(bool) rtGetOptIsCpInSet(RTUNICP Cp, const char *pszSeparators, size_t cchSeparators)
184{
185 if (RT_LIKELY(Cp <= 127))
186 return rtGetOptIsAsciiInSet((char)Cp, pszSeparators, cchSeparators);
187 return rtGetOptIsUniCpInString(Cp, pszSeparators);
188}
189
190
191/**
192 * Skips any delimiters at the start of the string that is pointed to.
193 *
194 * @returns VINF_SUCCESS or RTStrGetCpEx status code.
195 * @param ppszSrc Where to get and return the string pointer.
196 * @param pszSeparators The separators.
197 * @param cchSeparators The length of @a pszSeparators.
198 */
199static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
200{
201 const char *pszSrc = *ppszSrc;
202 const char *pszRet;
203 for (;;)
204 {
205 pszRet = pszSrc;
206 RTUNICP Cp;
207 int rc = RTStrGetCpEx(&pszSrc, &Cp);
208 if (RT_FAILURE(rc))
209 {
210 *ppszSrc = pszRet;
211 return rc;
212 }
213 if ( !Cp
214 || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
215 break;
216 }
217
218 *ppszSrc = pszRet;
219 return VINF_SUCCESS;
220}
221
222
223RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine,
224 uint32_t fFlags, const char *pszSeparators)
225{
226 /*
227 * Some input validation.
228 */
229 AssertPtr(pszCmdLine);
230 AssertPtr(pcArgs);
231 AssertPtr(ppapszArgv);
232 AssertReturn( fFlags == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH
233 || fFlags == RTGETOPTARGV_CNV_QUOTE_MS_CRT, VERR_INVALID_FLAGS);
234 if (!pszSeparators)
235 pszSeparators = " \t\n\r";
236 else
237 AssertPtr(pszSeparators);
238 size_t const cchSeparators = strlen(pszSeparators);
239 AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);
240
241 /*
242 * Parse the command line and chop off it into argv individual argv strings.
243 */
244 int rc = VINF_SUCCESS;
245 const char *pszSrc = pszCmdLine;
246 char *pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1);
247 char *pszDst = pszDup;
248 if (!pszDup)
249 return VERR_NO_STR_MEMORY;
250 char **papszArgs = NULL;
251 unsigned iArg = 0;
252 while (*pszSrc)
253 {
254 /* Skip stuff */
255 rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
256 if (RT_FAILURE(rc))
257 break;
258 if (!*pszSrc)
259 break;
260
261 /* Start a new entry. */
262 if ((iArg % 32) == 0)
263 {
264 void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
265 if (!pvNew)
266 {
267 rc = VERR_NO_MEMORY;
268 break;
269 }
270 papszArgs = (char **)pvNew;
271 }
272 papszArgs[iArg++] = pszDst;
273
274 /*
275 * Parse and copy the string over.
276 */
277 RTUNICP Cp;
278 if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH)
279 {
280 /*
281 * Bourne shell style.
282 */
283 RTUNICP CpQuote = 0;
284 for (;;)
285 {
286 rc = RTStrGetCpEx(&pszSrc, &Cp);
287 if (RT_FAILURE(rc) || !Cp)
288 break;
289 if (!CpQuote)
290 {
291 if (Cp == '"' || Cp == '\'')
292 CpQuote = Cp;
293 else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
294 break;
295 else if (Cp != '\\')
296 pszDst = RTStrPutCp(pszDst, Cp);
297 else
298 {
299 /* escaped char */
300 rc = RTStrGetCpEx(&pszSrc, &Cp);
301 if (RT_FAILURE(rc) || !Cp)
302 break;
303 pszDst = RTStrPutCp(pszDst, Cp);
304 }
305 }
306 else if (CpQuote != Cp)
307 {
308 if (Cp != '\\' || CpQuote == '\'')
309 pszDst = RTStrPutCp(pszDst, Cp);
310 else
311 {
312 /* escaped char */
313 rc = RTStrGetCpEx(&pszSrc, &Cp);
314 if (RT_FAILURE(rc) || !Cp)
315 break;
316 pszDst = RTStrPutCp(pszDst, Cp);
317 }
318 }
319 else
320 CpQuote = 0;
321 }
322 }
323 else
324 {
325 /*
326 * Microsoft CRT style.
327 */
328 Assert((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT);
329 bool fInQuote = false;
330 for (;;)
331 {
332 rc = RTStrGetCpEx(&pszSrc, &Cp);
333 if (RT_FAILURE(rc) || !Cp)
334 break;
335 if (Cp == '"')
336 {
337 /* Two double quotes insides a quoted string in an escape
338 sequence and we output one double quote char.
339 See http://www.daviddeley.com/autohotkey/parameters/parameters.htm */
340 if (!fInQuote)
341 fInQuote = true;
342 else if (*pszSrc != '"')
343 fInQuote = false;
344 else
345 {
346 pszDst = RTStrPutCp(pszDst, '"');
347 pszSrc++;
348 }
349 }
350 else if (!fInQuote && rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
351 break;
352 else if (Cp != '\\')
353 pszDst = RTStrPutCp(pszDst, Cp);
354 else
355 {
356 /* A backslash sequence is only relevant if followed by
357 a double quote, then it will work like an escape char. */
358 size_t cSlashes = 1;
359 while (*pszSrc == '\\')
360 {
361 cSlashes++;
362 pszSrc++;
363 }
364 if (*pszSrc != '"')
365 /* Not an escape sequence. */
366 while (cSlashes-- > 0)
367 pszDst = RTStrPutCp(pszDst, '\\');
368 else
369 {
370 /* Escape sequence. Output half of the slashes. If odd
371 number, output the escaped double quote . */
372 while (cSlashes >= 2)
373 {
374 pszDst = RTStrPutCp(pszDst, '\\');
375 cSlashes -= 2;
376 }
377 if (cSlashes)
378 {
379 pszDst = RTStrPutCp(pszDst, '"');
380 pszSrc++;
381 }
382 }
383 }
384 }
385 }
386
387 *pszDst++ = '\0';
388 if (RT_FAILURE(rc) || !Cp)
389 break;
390 }
391
392 if (RT_FAILURE(rc))
393 {
394 RTMemFree(pszDup);
395 RTMemFree(papszArgs);
396 return rc;
397 }
398
399 /*
400 * Terminate the array.
401 * Check for empty string to make sure we've got an array.
402 */
403 if (iArg == 0)
404 {
405 RTMemFree(pszDup);
406 papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
407 if (!papszArgs)
408 return VERR_NO_MEMORY;
409 }
410 papszArgs[iArg] = NULL;
411
412 *pcArgs = iArg;
413 *ppapszArgv = papszArgs;
414 return VINF_SUCCESS;
415}
416
417
418RTDECL(void) RTGetOptArgvFree(char **papszArgv)
419{
420 if (papszArgv)
421 {
422 /*
423 * We've really only _two_ allocations here. Check the code in
424 * RTGetOptArgvFromString for the particulars.
425 */
426 RTMemFree(papszArgv[0]);
427 RTMemFree(papszArgv);
428 }
429}
430
431
432/**
433 * Checks if the argument needs quoting or not.
434 *
435 * @returns true if it needs, false if it don't.
436 * @param pszArg The argument.
437 * @param fFlags Quoting style.
438 * @param pcch Where to store the argument length when quoting
439 * is not required. (optimization)
440 */
441DECLINLINE(bool) rtGetOpArgvRequiresQuoting(const char *pszArg, uint32_t fFlags, size_t *pcch)
442{
443 if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) != RTGETOPTARGV_CNV_UNQUOTED)
444 {
445 char const *psz = pszArg;
446 unsigned char ch;
447 while ((ch = (unsigned char)*psz))
448 {
449 if ( ch < 128
450 && ASMBitTest(&g_abmQuoteChars[fFlags & RTGETOPTARGV_CNV_QUOTE_MASK], ch))
451 return true;
452 psz++;
453 }
454
455 *pcch = psz - pszArg;
456 }
457 else
458 *pcch = strlen(pszArg);
459 return false;
460}
461
462
463/**
464 * Grows the command line string buffer.
465 *
466 * @returns VINF_SUCCESS or VERR_NO_STR_MEMORY.
467 * @param ppszCmdLine Pointer to the command line string pointer.
468 * @param pcbCmdLineAlloc Pointer to the allocation length variable.
469 * @param cchMin The minimum size to grow with, kind of.
470 */
471static int rtGetOptArgvToStringGrow(char **ppszCmdLine, size_t *pcbCmdLineAlloc, size_t cchMin)
472{
473 size_t cb = *pcbCmdLineAlloc;
474 while (cb < cchMin)
475 cb *= 2;
476 cb *= 2;
477 *pcbCmdLineAlloc = cb;
478 return RTStrRealloc(ppszCmdLine, cb);
479}
480
481/**
482 * Checks if we have a sequence of DOS slashes followed by a double quote char.
483 *
484 * @returns true / false accordingly.
485 * @param psz The string.
486 */
487DECLINLINE(bool) rtGetOptArgvMsCrtIsSlashQuote(const char *psz)
488{
489 while (*psz == '\\')
490 psz++;
491 return *psz == '"' || *psz == '\0';
492}
493
494
495RTDECL(int) RTGetOptArgvToString(char **ppszCmdLine, const char * const *papszArgv, uint32_t fFlags)
496{
497 AssertReturn(fFlags <= RTGETOPTARGV_CNV_UNQUOTED, VERR_INVALID_PARAMETER);
498
499#define PUT_CH(ch) \
500 if (RT_UNLIKELY(off + 1 >= cbCmdLineAlloc)) { \
501 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, 1); \
502 if (RT_FAILURE(rc)) \
503 break; \
504 } \
505 pszCmdLine[off++] = (ch)
506
507#define PUT_PSZ(psz, cch) \
508 if (RT_UNLIKELY(off + (cch) >= cbCmdLineAlloc)) { \
509 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, (cch)); \
510 if (RT_FAILURE(rc)) \
511 break; \
512 } \
513 memcpy(&pszCmdLine[off], (psz), (cch)); \
514 off += (cch);
515#define PUT_SZ(sz) PUT_PSZ(sz, sizeof(sz) - 1)
516
517 /*
518 * Take the realloc approach, it requires less code and is probably more
519 * efficient than figuring out the size first.
520 */
521 int rc = VINF_SUCCESS;
522 size_t off = 0;
523 size_t cbCmdLineAlloc = 256;
524 char *pszCmdLine = RTStrAlloc(256);
525 if (!pszCmdLine)
526 return VERR_NO_STR_MEMORY;
527
528 for (size_t i = 0; papszArgv[i]; i++)
529 {
530 if (i > 0)
531 {
532 PUT_CH(' ');
533 }
534
535 /* does it need quoting? */
536 const char *pszArg = papszArgv[i];
537 size_t cchArg;
538 if (!rtGetOpArgvRequiresQuoting(pszArg, fFlags, &cchArg))
539 {
540 /* No quoting needed, just append the argument. */
541 PUT_PSZ(pszArg, cchArg);
542 }
543 else if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT)
544 {
545 /*
546 * Microsoft CRT quoting. Quote the whole argument in double
547 * quotes to make it easier to read and code.
548 */
549 PUT_CH('"');
550 char ch;
551 while ((ch = *pszArg++))
552 {
553 if ( ch == '\\'
554 && rtGetOptArgvMsCrtIsSlashQuote(pszArg))
555 {
556 PUT_SZ("\\\\");
557 }
558 else if (ch == '"')
559 {
560 PUT_SZ("\\\"");
561 }
562 else
563 {
564 PUT_CH(ch);
565 }
566 }
567 PUT_CH('"');
568 }
569 else
570 {
571 /*
572 * Bourne Shell quoting. Quote the whole thing in single quotes
573 * and use double quotes for any single quote chars.
574 */
575 PUT_CH('\'');
576 char ch;
577 while ((ch = *pszArg++))
578 {
579 if (ch == '\'')
580 {
581 PUT_SZ("'\"'\"'");
582 }
583 else
584 {
585 PUT_CH(ch);
586 }
587 }
588 PUT_CH('\'');
589 }
590 }
591
592 /* Set return value / cleanup. */
593 if (RT_SUCCESS(rc))
594 {
595 pszCmdLine[off] = '\0';
596 *ppszCmdLine = pszCmdLine;
597 }
598 else
599 RTStrFree(pszCmdLine);
600#undef PUT_SZ
601#undef PUT_PSZ
602#undef PUT_CH
603 return rc;
604}
605
606
607RTDECL(int) RTGetOptArgvToUtf16String(PRTUTF16 *ppwszCmdLine, const char * const *papszArgv, uint32_t fFlags)
608{
609 char *pszCmdLine;
610 int rc = RTGetOptArgvToString(&pszCmdLine, papszArgv, fFlags);
611 if (RT_SUCCESS(rc))
612 {
613 rc = RTStrToUtf16(pszCmdLine, ppwszCmdLine);
614 RTStrFree(pszCmdLine);
615 }
616 return rc;
617}
618
619#endif /* !IPRT_REGENERATE_QUOTE_CHARS */
620
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette