VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/getoptargv.cpp@ 37596

Last change on this file since 37596 was 33540, checked in by vboxsync, 14 years ago

*: spelling fixes, thanks Timeless!

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 14.3 KB
Line 
1/* $Id: getoptargv.cpp 33540 2010-10-28 09:27:05Z vboxsync $ */
2/** @file
3 * IPRT - Command Line Parsing, Argument Vector.
4 */
5
6/*
7 * Copyright (C) 2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#include <iprt/getopt.h>
31#include "internal/iprt.h"
32
33#include <iprt/asm.h>
34#include <iprt/assert.h>
35#include <iprt/err.h>
36#include <iprt/mem.h>
37#include <iprt/string.h>
38
39
40/*******************************************************************************
41* Header Files *
42*******************************************************************************/
43/**
44 * Array indexed by the quoting type and 7-bit ASCII character.
45 *
46 * We include some extra stuff here that the corresponding shell would normally
47 * require quoting of.
48 */
49static uint8_t const g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_MASK + 1][128/8] =
50{
51 { 0xfe, 0xff, 0x0f, 0x00, 0x65, 0x00, 0x00, 0x50 },
52 { 0xfe, 0xff, 0x0f, 0x00, 0xd7, 0x07, 0x00, 0xd8 },
53};
54
55
56#if 0 /* To re-generate the bitmaps. */
57#include <stdio.h>
58int main()
59{
60 RT_ZERO(g_abmQuoteChars);
61
62# define SET_ALL(ch) \
63 do { \
64 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++) \
65 ASMBitSet(&g_abmQuoteChars[iType], (ch)); \
66 } while (0)
67# define SET(ConstSuffix, ch) \
68 ASMBitSet(&g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_##ConstSuffix], (ch));
69
70 /* just flag all the control chars as in need of quoting. */
71 for (char ch = 1; ch < 20; ch++)
72 SET_ALL(ch);
73
74 /* ... and space of course */
75 SET_ALL(' ');
76
77 /* MS CRT / CMD.EXE: */
78 SET(MS_CRT, '"')
79 SET(MS_CRT, '&')
80 SET(MS_CRT, '>')
81 SET(MS_CRT, '<')
82 SET(MS_CRT, '|')
83 SET(MS_CRT, '%')
84
85 /* Bourne shell: */
86 SET(BOURNE_SH, '!');
87 SET(BOURNE_SH, '"');
88 SET(BOURNE_SH, '$');
89 SET(BOURNE_SH, '&');
90 SET(BOURNE_SH, '(');
91 SET(BOURNE_SH, ')');
92 SET(BOURNE_SH, '*');
93 SET(BOURNE_SH, ';');
94 SET(BOURNE_SH, '<');
95 SET(BOURNE_SH, '>');
96 SET(BOURNE_SH, '?');
97 SET(BOURNE_SH, '[');
98 SET(BOURNE_SH, '\'');
99 SET(BOURNE_SH, '\\');
100 SET(BOURNE_SH, '`');
101 SET(BOURNE_SH, '|');
102 SET(BOURNE_SH, '~');
103
104 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++)
105 {
106 printf(" {");
107 for (size_t iByte = 0; iByte < 8; iByte++)
108 printf(iByte == 0 ? " 0x%02x" : ", 0x%02x", g_abmQuoteChars[iType][iByte]);
109 printf(" },\n");
110 }
111 return 0;
112}
113#endif /* To re-generate the bitmaps. */
114
115
116/**
117 * Look for an unicode code point in the separator string.
118 *
119 * @returns true if it's a separator, false if it isn't.
120 * @param Cp The code point.
121 * @param pszSeparators The separators.
122 */
123static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
124{
125 /* This could be done in a more optimal fashion. Probably worth a
126 separate RTStr function at some point. */
127 for (;;)
128 {
129 RTUNICP CpSep;
130 int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
131 AssertRCReturn(rc, false);
132 if (CpSep == Cp)
133 return true;
134 if (!CpSep)
135 return false;
136 }
137}
138
139
140/**
141 * Look for an 7-bit ASCII character in the separator string.
142 *
143 * @returns true if it's a separator, false if it isn't.
144 * @param ch The character.
145 * @param pszSeparators The separators.
146 * @param cchSeparators The number of separators chars.
147 */
148DECLINLINE(bool) rtGetOptIsAsciiInSet(char ch, const char *pszSeparators, size_t cchSeparators)
149{
150 switch (cchSeparators)
151 {
152 case 8: if (ch == pszSeparators[7]) return true;
153 case 7: if (ch == pszSeparators[6]) return true;
154 case 6: if (ch == pszSeparators[5]) return true;
155 case 5: if (ch == pszSeparators[4]) return true;
156 case 4: if (ch == pszSeparators[3]) return true;
157 case 3: if (ch == pszSeparators[2]) return true;
158 case 2: if (ch == pszSeparators[1]) return true;
159 case 1: if (ch == pszSeparators[0]) return true;
160 return false;
161 default:
162 return memchr(pszSeparators, ch, cchSeparators) != NULL;
163 }
164}
165
166
167/**
168 * Checks if the character is in the set of separators
169 *
170 * @returns true if it is, false if it isn't.
171 *
172 * @param Cp The code point.
173 * @param pszSeparators The separators.
174 * @param cchSeparators The length of @a pszSeparators.
175 */
176DECL_FORCE_INLINE(bool) rtGetOptIsCpInSet(RTUNICP Cp, const char *pszSeparators, size_t cchSeparators)
177{
178 if (RT_LIKELY(Cp <= 127))
179 return rtGetOptIsAsciiInSet((char)Cp, pszSeparators, cchSeparators);
180 return rtGetOptIsUniCpInString(Cp, pszSeparators);
181}
182
183
184/**
185 * Skips any delimiters at the start of the string that is pointed to.
186 *
187 * @returns VINF_SUCCESS or RTStrGetCpEx status code.
188 * @param ppszSrc Where to get and return the string pointer.
189 * @param pszSeparators The separators.
190 * @param cchSeparators The length of @a pszSeparators.
191 */
192static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
193{
194 const char *pszSrc = *ppszSrc;
195 const char *pszRet;
196 for (;;)
197 {
198 pszRet = pszSrc;
199 RTUNICP Cp;
200 int rc = RTStrGetCpEx(&pszSrc, &Cp);
201 if (RT_FAILURE(rc))
202 {
203 *ppszSrc = pszRet;
204 return rc;
205 }
206 if ( !Cp
207 || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
208 break;
209 }
210
211 *ppszSrc = pszRet;
212 return VINF_SUCCESS;
213}
214
215
216RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine, const char *pszSeparators)
217{
218 /*
219 * Some input validation.
220 */
221 AssertPtr(pszCmdLine);
222 AssertPtr(pcArgs);
223 AssertPtr(ppapszArgv);
224 if (!pszSeparators)
225 pszSeparators = " \t\n\r";
226 else
227 AssertPtr(pszSeparators);
228 size_t const cchSeparators = strlen(pszSeparators);
229 AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);
230
231 /*
232 * Parse the command line and chop off it into argv individual argv strings.
233 */
234 int rc = VINF_SUCCESS;
235 const char *pszSrc = pszCmdLine;
236 char *pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1);
237 char *pszDst = pszDup;
238 if (!pszDup)
239 return VERR_NO_STR_MEMORY;
240 char **papszArgs = NULL;
241 unsigned iArg = 0;
242 while (*pszSrc)
243 {
244 /* Skip stuff */
245 rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
246 if (RT_FAILURE(rc))
247 break;
248 if (!*pszSrc)
249 break;
250
251 /* Start a new entry. */
252 if ((iArg % 32) == 0)
253 {
254 void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
255 if (!pvNew)
256 {
257 rc = VERR_NO_MEMORY;
258 break;
259 }
260 papszArgs = (char **)pvNew;
261 }
262 papszArgs[iArg++] = pszDst;
263
264 /* Parse and copy the string over. */
265 RTUNICP CpQuote = 0;
266 RTUNICP Cp;
267 for (;;)
268 {
269 rc = RTStrGetCpEx(&pszSrc, &Cp);
270 if (RT_FAILURE(rc) || !Cp)
271 break;
272 if (!CpQuote)
273 {
274 if (Cp == '"' || Cp == '\'')
275 CpQuote = Cp;
276 else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
277 break;
278 else
279 pszDst = RTStrPutCp(pszDst, Cp);
280 }
281 else if (CpQuote != Cp)
282 pszDst = RTStrPutCp(pszDst, Cp);
283 else
284 CpQuote = 0;
285 }
286 *pszDst++ = '\0';
287 if (RT_FAILURE(rc) || !Cp)
288 break;
289 }
290
291 if (RT_FAILURE(rc))
292 {
293 RTMemFree(pszDup);
294 RTMemFree(papszArgs);
295 return rc;
296 }
297
298 /*
299 * Terminate the array.
300 * Check for empty string to make sure we've got an array.
301 */
302 if (iArg == 0)
303 {
304 RTMemFree(pszDup);
305 papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
306 if (!papszArgs)
307 return VERR_NO_MEMORY;
308 }
309 papszArgs[iArg] = NULL;
310
311 *pcArgs = iArg;
312 *ppapszArgv = papszArgs;
313 return VINF_SUCCESS;
314}
315
316
317RTDECL(void) RTGetOptArgvFree(char **papszArgv)
318{
319 if (papszArgv)
320 {
321 RTMemFree(papszArgv[0]);
322 RTMemFree(papszArgv);
323 }
324}
325
326
327/**
328 * Checks if the argument needs quoting or not.
329 *
330 * @returns true if it needs, false if it don't.
331 * @param pszArg The argument.
332 * @param fFlags Quoting style.
333 * @param pcch Where to store the argument length when quoting
334 * is not required. (optimization)
335 */
336DECLINLINE(bool) rtGetOpArgvRequiresQuoting(const char *pszArg, uint32_t fFlags, size_t *pcch)
337{
338 char const *psz = pszArg;
339 unsigned char ch;
340 while ((ch = (unsigned char)*psz))
341 {
342 if ( ch < 128
343 && ASMBitTest(&g_abmQuoteChars[fFlags & RTGETOPTARGV_CNV_QUOTE_MASK], ch))
344 return true;
345 psz++;
346 }
347
348 *pcch = psz - pszArg;
349 return false;
350}
351
352
353/**
354 * Grows the command line string buffer.
355 *
356 * @returns VINF_SUCCESS or VERR_NO_STR_MEMORY.
357 * @param ppszCmdLine Pointer to the command line string pointer.
358 * @param pcbCmdLineAlloc Pointer to the allocation length variable.
359 * @param cchMin The minimum size to grow with, kind of.
360 */
361static int rtGetOptArgvToStringGrow(char **ppszCmdLine, size_t *pcbCmdLineAlloc, size_t cchMin)
362{
363 size_t cb = *pcbCmdLineAlloc;
364 while (cb < cchMin)
365 cb *= 2;
366 cb *= 2;
367 *pcbCmdLineAlloc = cb;
368 return RTStrRealloc(ppszCmdLine, cb);
369}
370
371/**
372 * Checks if we have a sequence of DOS slashes followed by a double quote char.
373 *
374 * @returns true / false accordingly.
375 * @param psz The string.
376 */
377DECLINLINE(bool) rtGetOptArgvMsCrtIsSlashQuote(const char *psz)
378{
379 while (*psz == '\\')
380 psz++;
381 return *psz == '"' || *psz == '\0';
382}
383
384
385RTDECL(int) RTGetOptArgvToString(char **ppszCmdLine, const char * const *papszArgv, uint32_t fFlags)
386{
387 AssertReturn(!(fFlags & ~RTGETOPTARGV_CNV_QUOTE_MASK), VERR_INVALID_PARAMETER);
388
389#define PUT_CH(ch) \
390 if (RT_UNLIKELY(off + 1 >= cbCmdLineAlloc)) { \
391 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, 1); \
392 if (RT_FAILURE(rc)) \
393 break; \
394 } \
395 pszCmdLine[off++] = (ch)
396
397#define PUT_PSZ(psz, cch) \
398 if (RT_UNLIKELY(off + (cch) >= cbCmdLineAlloc)) { \
399 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, (cch)); \
400 if (RT_FAILURE(rc)) \
401 break; \
402 } \
403 memcpy(&pszCmdLine[off], (psz), (cch)); \
404 off += (cch);
405#define PUT_SZ(sz) PUT_PSZ(sz, sizeof(sz) - 1)
406
407 /*
408 * Take the realloc approach, it requires less code and is probably more
409 * efficient than figuring out the size first.
410 */
411 int rc = VINF_SUCCESS;
412 size_t off = 0;
413 size_t cbCmdLineAlloc = 256;
414 char *pszCmdLine = RTStrAlloc(256);
415 if (!pszCmdLine)
416 return VERR_NO_STR_MEMORY;
417
418 for (size_t i = 0; papszArgv[i]; i++)
419 {
420 if (i > 0)
421 {
422 PUT_CH(' ');
423 }
424
425 /* does it need quoting? */
426 const char *pszArg = papszArgv[i];
427 size_t cchArg;
428 if (!rtGetOpArgvRequiresQuoting(pszArg, fFlags, &cchArg))
429 {
430 /* No quoting needed, just append the argument. */
431 PUT_PSZ(pszArg, cchArg);
432 }
433 else if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT)
434 {
435 /*
436 * Microsoft CRT quoting. Quote the whole argument in double
437 * quotes to make it easier to read and code.
438 */
439 PUT_CH('"');
440 char ch;
441 while ((ch = *pszArg++))
442 {
443 if ( ch == '\\'
444 && rtGetOptArgvMsCrtIsSlashQuote(pszArg))
445 {
446 PUT_SZ("\\\\");
447 }
448 else if (ch == '"')
449 {
450 PUT_SZ("\\\"");
451 }
452 else
453 {
454 PUT_CH(ch);
455 }
456 }
457 PUT_CH('"');
458 }
459 else
460 {
461 /*
462 * Bourne Shell quoting. Quote the whole thing in single quotes
463 * and use double quotes for any single quote chars.
464 */
465 PUT_CH('\'');
466 char ch;
467 while ((ch = *pszArg++))
468 {
469 if (ch == '\'')
470 {
471 PUT_SZ("'\"'\"'");
472 }
473 else
474 {
475 PUT_CH(ch);
476 }
477 }
478 PUT_CH('\'');
479 }
480 }
481
482 /* Set return value / cleanup. */
483 if (RT_SUCCESS(rc))
484 {
485 pszCmdLine[off] = '\0';
486 *ppszCmdLine = pszCmdLine;
487 }
488 else
489 RTStrFree(pszCmdLine);
490#undef PUT_SZ
491#undef PUT_PSZ
492#undef PUT_CH
493 return rc;
494}
495
496
497RTDECL(int) RTGetOptArgvToUtf16String(PRTUTF16 *ppwszCmdLine, const char * const *papszArgv, uint32_t fFlags)
498{
499 char *pszCmdLine;
500 int rc = RTGetOptArgvToString(&pszCmdLine, papszArgv, fFlags);
501 if (RT_SUCCESS(rc))
502 {
503 rc = RTStrToUtf16(pszCmdLine, ppwszCmdLine);
504 RTStrFree(pszCmdLine);
505 }
506 return rc;
507}
508
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette