VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/path/RTPathGlob.cpp@ 60067

Last change on this file since 60067 was 57978, checked in by vboxsync, 9 years ago

IPRT: Doxygen warning fixes (last ones, hopefully).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 83.2 KB
Line 
1/* $Id: RTPathGlob.cpp 57978 2015-09-30 19:39:30Z vboxsync $ */
2/** @file
3 * IPRT - RTPathGlob
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include "internal/iprt.h"
32#include <iprt/path.h>
33
34#include <iprt/asm.h>
35#include <iprt/assert.h>
36#include <iprt/buildconfig.h>
37#include <iprt/ctype.h>
38#include <iprt/dir.h>
39#include <iprt/env.h>
40#include <iprt/err.h>
41#include <iprt/mem.h>
42#include <iprt/string.h>
43#include <iprt/uni.h>
44
45#if defined(RT_OS_WINDOWS)
46# include <Windows.h>
47# include "../../r3/win/internal-r3-win.h"
48
49#elif defined(RT_OS_OS2)
50# define INCL_BASE
51# include <os2.h>
52# undef RT_MAX /* collision */
53
54#endif
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60/** Maximum number of results. */
61#define RTPATHGLOB_MAX_RESULTS _32K
62/** Maximum number of zero-or-more wildcards in a pattern.
63 * This limits stack usage and recursion depth, as well as execution time. */
64#define RTPATHMATCH_MAX_ZERO_OR_MORE 24
65/** Maximum number of variable items. */
66#define RTPATHMATCH_MAX_VAR_ITEMS _4K
67
68
69
70/*********************************************************************************************************************************
71* Structures and Typedefs *
72*********************************************************************************************************************************/
73/**
74 * Matching operation.
75 */
76typedef enum RTPATHMATCHOP
77{
78 RTPATHMATCHOP_INVALID = 0,
79 /** EOS: Returns a match if at end of string. */
80 RTPATHMATCHOP_RETURN_MATCH_IF_AT_END,
81 /** Asterisk: Returns a match (trailing asterisk). */
82 RTPATHMATCHOP_RETURN_MATCH,
83 /** Asterisk: Returns a match (just asterisk), unless it's '.' or '..'. */
84 RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT,
85 /** Plain text: Case sensitive string compare. */
86 RTPATHMATCHOP_STRCMP,
87 /** Plain text: Case insensitive string compare. */
88 RTPATHMATCHOP_STRICMP,
89 /** Question marks: Skips exactly one code point. */
90 RTPATHMATCHOP_SKIP_ONE_CODEPOINT,
91 /** Question marks: Skips exactly RTPATHMATCHCORE::cch code points. */
92 RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS,
93 /** Char set: Requires the next codepoint to be in the ASCII-7 set defined by
94 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
95 RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7,
96 /** Char set: Requires the next codepoint to not be in the ASCII-7 set defined
97 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
98 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7,
99 /** Char set: Requires the next codepoint to be in the extended set defined by
100 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
101 RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED,
102 /** Char set: Requires the next codepoint to not be in the extended set defined
103 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
104 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED,
105 /** Variable: Case sensitive variable value compare, RTPATHMATCHCORE::uOp2 is
106 * the variable table index. */
107 RTPATHMATCHOP_VARIABLE_VALUE_CMP,
108 /** Variable: Case insensitive variable value compare, RTPATHMATCHCORE::uOp2 is
109 * the variable table index. */
110 RTPATHMATCHOP_VARIABLE_VALUE_ICMP,
111 /** Asterisk: Match zero or more code points, there must be at least
112 * RTPATHMATCHCORE::cch code points after it. */
113 RTPATHMATCHOP_ZERO_OR_MORE,
114 /** Asterisk: Match zero or more code points, there must be at least
115 * RTPATHMATCHCORE::cch code points after it, unless it's '.' or '..'. */
116 RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT,
117 /** End of valid operations. */
118 RTPATHMATCHOP_END
119} RTPATHMATCHOP;
120
121/**
122 * Matching instruction.
123 */
124typedef struct RTPATHMATCHCORE
125{
126 /** The action to take. */
127 RTPATHMATCHOP enmOpCode;
128 /** Generic value operand. */
129 uint16_t uOp2;
130 /** Generic length operand. */
131 uint16_t cch;
132 /** Generic string pointer operand. */
133 const char *pch;
134} RTPATHMATCHCORE;
135/** Pointer to a matching instruction. */
136typedef RTPATHMATCHCORE *PRTPATHMATCHCORE;
137/** Pointer to a const matching instruction. */
138typedef RTPATHMATCHCORE const *PCRTPATHMATCHCORE;
139
140/**
141 * Path matching instruction allocator.
142 */
143typedef struct RTPATHMATCHALLOC
144{
145 /** Allocated array of instructions. */
146 PRTPATHMATCHCORE paInstructions;
147 /** Index of the next free entry in paScratch. */
148 uint32_t iNext;
149 /** Number of instructions allocated. */
150 uint32_t cAllocated;
151} RTPATHMATCHALLOC;
152/** Pointer to a matching instruction allocator. */
153typedef RTPATHMATCHALLOC *PRTPATHMATCHALLOC;
154
155/**
156 * Path matching cache, mainly intended for variables like the PATH.
157 */
158typedef struct RTPATHMATCHCACHE
159{
160 /** @todo optimize later. */
161 uint32_t iNothingYet;
162} RTPATHMATCHCACHE;
163/** Pointer to a path matching cache. */
164typedef RTPATHMATCHCACHE *PRTPATHMATCHCACHE;
165
166
167
168/** Parsed path entry.*/
169typedef struct RTPATHGLOBPPE
170{
171 /** Normal: Index into RTPATHGLOB::MatchInstrAlloc.paInstructions. */
172 uint32_t iMatchProg : 16;
173 /** Set if this is a normal entry which is matched using iMatchProg. */
174 uint32_t fNormal : 1;
175 /** !fNormal: Plain name that can be dealt with using without
176 * enumerating the whole directory, unless of course the file system is case
177 * sensitive and the globbing isn't (that needs figuring out on a per
178 * directory basis). */
179 uint32_t fPlain : 1;
180 /** !fNormal: Match zero or more subdirectories. */
181 uint32_t fStarStar : 1;
182 /** !fNormal: The whole component is a variable expansion. */
183 uint32_t fExpVariable : 1;
184
185 /** Filter: Set if it only matches directories. */
186 uint32_t fDir : 1;
187 /** Set if it's the final component. */
188 uint32_t fFinal : 1;
189
190 /** Unused bits. */
191 uint32_t fReserved : 2+8;
192} RTPATHGLOBPPE;
193
194
195typedef struct RTPATHGLOB
196{
197 /** Path buffer. */
198 char szPath[RTPATH_MAX];
199 /** Temporary buffers. */
200 union
201 {
202 /** File system object info structure. */
203 RTFSOBJINFO ObjInfo;
204 /** Directory entry buffer. */
205 RTDIRENTRY DirEntry;
206 /** Padding the buffer to an unreasonably large size. */
207 uint8_t abPadding[RTPATH_MAX + sizeof(RTDIRENTRY)];
208 } u;
209
210
211 /** Where to insert the next one.*/
212 PRTPATHGLOBENTRY *ppNext;
213 /** The head pointer. */
214 PRTPATHGLOBENTRY pHead;
215 /** Result count. */
216 uint32_t cResults;
217 /** Counts path overflows. */
218 uint32_t cPathOverflows;
219 /** The input flags. */
220 uint32_t fFlags;
221 /** Matching instruction allocator. */
222 RTPATHMATCHALLOC MatchInstrAlloc;
223 /** Matching state. */
224 RTPATHMATCHCACHE MatchCache;
225
226 /** The pattern string. */
227 const char *pszPattern;
228 /** The parsed path. */
229 PRTPATHPARSED pParsed;
230 /** The component to start with. */
231 uint16_t iFirstComp;
232 /** The corresponding path offset (previous components already present). */
233 uint16_t offFirstPath;
234 /** Path component information we need. */
235 RTPATHGLOBPPE aComps[1];
236} RTPATHGLOB;
237typedef RTPATHGLOB *PRTPATHGLOB;
238
239
240/**
241 * Matching variable lookup table.
242 * Currently so small we don't bother sorting it and doing binary lookups.
243 */
244typedef struct RTPATHMATCHVAR
245{
246 /** The variable name. */
247 const char *pszName;
248 /** The variable name length. */
249 uint16_t cchName;
250 /** Only available as the verify first component. */
251 bool fFirstOnly;
252
253 /**
254 * Queries a given variable value.
255 *
256 * @returns IPRT status code.
257 * @retval VERR_BUFFER_OVERFLOW
258 * @retval VERR_TRY_AGAIN if the caller should skip this value item and try the
259 * next one instead (e.g. env var not present).
260 * @retval VINF_EOF when retrieving the last one, if possible.
261 * @retval VERR_EOF when @a iItem is past the item space.
262 *
263 * @param iItem The variable value item to retrieve. (A variable may
264 * have more than one value, e.g. 'BothProgramFile' on a
265 * 64-bit system or 'Path'.)
266 * @param pszBuf Where to return the value.
267 * @param cbBuf The buffer size.
268 * @param pcchValue Where to return the length of the return string.
269 * @param pCache Pointer to the path matching cache. May speed up
270 * enumerating PATH items and similar.
271 */
272 DECLCALLBACKMEMBER(int, pfnQuery)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, PRTPATHMATCHCACHE pCache);
273
274 /**
275 * Matching method, optional.
276 *
277 * @returns IPRT status code.
278 * @retval VINF_SUCCESS on match.
279 * @retval VERR_MISMATCH on mismatch.
280 *
281 * @param pszMatch String to match with (not terminated).
282 * @param cchMatch The length of what we match with.
283 * @param fIgnoreCase Whether to ignore case or not when comparing.
284 * @param pcchMatched Where to return the length of the match (value length).
285 */
286 DECLCALLBACKMEMBER(int, pfnMatch)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, size_t *pcchMatched);
287
288} RTPATHMATCHVAR;
289
290
291/*********************************************************************************************************************************
292* Internal Functions *
293*********************************************************************************************************************************/
294static int rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp, size_t offStarStarPath);
295static int rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
296static int rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
297static int rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
298
299
300/**
301 * Implements the two variable access functions for a simple one value variable.
302 */
303#define RTPATHMATCHVAR_SIMPLE(a_Name, a_GetStrExpr) \
304 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
305 PRTPATHMATCHCACHE pCache) \
306 { \
307 if (iItem == 0) \
308 { \
309 const char *pszValue = a_GetStrExpr; \
310 size_t cchValue = strlen(pszValue); \
311 if (cchValue + 1 <= cbBuf) \
312 { \
313 memcpy(pszBuf, pszValue, cchValue + 1); \
314 *pcchValue = cchValue; \
315 return VINF_EOF; \
316 } \
317 return VERR_BUFFER_OVERFLOW; \
318 } \
319 NOREF(pCache);\
320 return VERR_EOF; \
321 } \
322 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
323 size_t *pcchMatched) \
324 { \
325 const char *pszValue = a_GetStrExpr; \
326 size_t cchValue = strlen(pszValue); \
327 if ( cchValue >= cchMatch \
328 && ( !fIgnoreCase \
329 ? memcmp(pszValue, pchMatch, cchValue) == 0 \
330 : RTStrNICmp(pszValue, pchMatch, cchValue) == 0) ) \
331 { \
332 *pcchMatched = cchValue; \
333 return VINF_SUCCESS; \
334 } \
335 return VERR_MISMATCH; \
336 } \
337 typedef int RT_CONCAT(DummyColonType_,a_Name)
338
339/**
340 * Implements mapping a glob variable to an environment variable.
341 */
342#define RTPATHMATCHVAR_SIMPLE_ENVVAR(a_Name, a_pszEnvVar, a_cbMaxValue) \
343 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
344 PRTPATHMATCHCACHE pCache) \
345 { \
346 if (iItem == 0) \
347 { \
348 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, pszBuf, cbBuf, pcchValue); \
349 if (RT_SUCCESS(rc)) \
350 return VINF_EOF; \
351 if (rc != VERR_ENV_VAR_NOT_FOUND) \
352 return rc; \
353 } \
354 NOREF(pCache);\
355 return VERR_EOF; \
356 } \
357 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
358 size_t *pcchMatched) \
359 { \
360 char szValue[a_cbMaxValue]; \
361 size_t cchValue; \
362 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, szValue, sizeof(szValue), &cchValue); \
363 if ( RT_SUCCESS(rc) \
364 && cchValue >= cchMatch \
365 && ( !fIgnoreCase \
366 ? memcmp(szValue, pchMatch, cchValue) == 0 \
367 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
368 { \
369 *pcchMatched = cchValue; \
370 return VINF_SUCCESS; \
371 } \
372 return VERR_MISMATCH; \
373 } \
374 typedef int RT_CONCAT(DummyColonType_,a_Name)
375
376/**
377 * Implements mapping a glob variable to multiple environment variable values.
378 *
379 * @param a_Name The variable name.
380 * @param a_apszVarNames Assumes to be a global variable that RT_ELEMENTS
381 * works correctly on.
382 * @param a_cbMaxValue The max expected value size.
383 */
384#define RTPATHMATCHVAR_MULTIPLE_ENVVARS(a_Name, a_apszVarNames, a_cbMaxValue) \
385 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
386 PRTPATHMATCHCACHE pCache) \
387 { \
388 if (iItem < RT_ELEMENTS(a_apszVarNames)) \
389 { \
390 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], pszBuf, cbBuf, pcchValue); \
391 if (RT_SUCCESS(rc)) \
392 return iItem + 1 == RT_ELEMENTS(a_apszVarNames) ? VINF_EOF : VINF_SUCCESS; \
393 if (rc == VERR_ENV_VAR_NOT_FOUND) \
394 rc = VERR_TRY_AGAIN; \
395 return rc; \
396 } \
397 NOREF(pCache);\
398 return VERR_EOF; \
399 } \
400 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
401 size_t *pcchMatched) \
402 { \
403 for (uint32_t iItem = 0; iItem < RT_ELEMENTS(a_apszVarNames); iItem++) \
404 { \
405 char szValue[a_cbMaxValue]; \
406 size_t cchValue; \
407 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], szValue, sizeof(szValue), &cchValue);\
408 if ( RT_SUCCESS(rc) \
409 && cchValue >= cchMatch \
410 && ( !fIgnoreCase \
411 ? memcmp(szValue, pchMatch, cchValue) == 0 \
412 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
413 { \
414 *pcchMatched = cchValue; \
415 return VINF_SUCCESS; \
416 } \
417 } \
418 return VERR_MISMATCH; \
419 } \
420 typedef int RT_CONCAT(DummyColonType_,a_Name)
421
422
423RTPATHMATCHVAR_SIMPLE(Arch, RTBldCfgTargetArch());
424RTPATHMATCHVAR_SIMPLE(Bits, RT_XSTR(ARCH_BITS));
425#ifdef RT_OS_WINDOWS
426RTPATHMATCHVAR_SIMPLE_ENVVAR(WinAppData, "AppData", RTPATH_MAX);
427RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramData, "ProgramData", RTPATH_MAX);
428RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramFiles, "ProgramFiles", RTPATH_MAX);
429RTPATHMATCHVAR_SIMPLE_ENVVAR(WinCommonProgramFiles, "CommonProgramFiles", RTPATH_MAX);
430# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
431RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherProgramFiles, "ProgramFiles(x86)", RTPATH_MAX);
432RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherCommonProgramFiles, "CommonProgramFiles(x86)", RTPATH_MAX);
433# else
434# error "Port ME!"
435# endif
436static const char * const a_apszWinProgramFilesVars[] =
437{
438 "ProgramFiles",
439# ifdef RT_ARCH_AMD64
440 "ProgramFiles(x86)",
441# endif
442};
443RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllProgramFiles, a_apszWinProgramFilesVars, RTPATH_MAX);
444static const char * const a_apszWinCommonProgramFilesVars[] =
445{
446 "CommonProgramFiles",
447# ifdef RT_ARCH_AMD64
448 "CommonProgramFiles(x86)",
449# endif
450};
451RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllCommonProgramFiles, a_apszWinCommonProgramFilesVars, RTPATH_MAX);
452#endif
453
454
455/**
456 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery, Enumerates the PATH}
457 */
458static DECLCALLBACK(int) rtPathVarQuery_Path(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
459 PRTPATHMATCHCACHE pCache)
460{
461 /*
462 * Query the PATH value.
463 */
464/** @todo cache this in pCache with iItem and offset. */
465 char *pszPathFree = NULL;
466 char *pszPath = pszBuf;
467 size_t cchActual;
468 const char *pszVarNm = "PATH";
469 int rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPath, cbBuf, &cchActual);
470#ifdef RT_OS_WINDOWS
471 if (rc == VERR_ENV_VAR_NOT_FOUND)
472 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm = "Path", pszPath, cbBuf, &cchActual);
473#endif
474 if (rc == VERR_BUFFER_OVERFLOW)
475 {
476 for (uint32_t iTry = 0; iTry < 10; iTry++)
477 {
478 size_t cbPathBuf = RT_ALIGN_Z(cchActual + 1 + 64 * iTry, 64);
479 pszPathFree = (char *)RTMemTmpAlloc(cbPathBuf);
480 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPathFree, cbPathBuf, &cchActual);
481 if (RT_SUCCESS(rc))
482 break;
483 RTMemTmpFree(pszPathFree);
484 AssertReturn(cchActual >= cbPathBuf, VERR_INTERNAL_ERROR_3);
485 }
486 pszPath = pszPathFree;
487 }
488
489 /*
490 * Spool forward to the given PATH item.
491 */
492 rc = VERR_EOF;
493#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
494 const char chSep = ';';
495#else
496 const char chSep = ':';
497#endif
498 while (*pszPath != '\0')
499 {
500 char *pchSep = strchr(pszPath, chSep);
501
502 /* We ignore empty strings, which is probably not entirely correct,
503 but works better on DOS based system with many entries added
504 without checking whether there is a trailing separator or not.
505 Thus, the current directory is only searched if a '.' is present
506 in the PATH. */
507 if (pchSep == pszPath)
508 pszPath++;
509 else if (iItem > 0)
510 {
511 /* If we didn't find a separator, the item doesn't exists. Quit. */
512 if (!pchSep)
513 break;
514
515 pszPath = pchSep + 1;
516 iItem--;
517 }
518 else
519 {
520 /* We've reached the item we wanted. */
521 size_t cchComp = pchSep ? pchSep - pszPath : strlen(pszPath);
522 if (cchComp < cbBuf)
523 {
524 if (pszBuf != pszPath)
525 memmove(pszBuf, pszPath, cchComp);
526 pszBuf[cchComp] = '\0';
527 rc = pchSep ? VINF_SUCCESS : VINF_EOF;
528 }
529 else
530 rc = VERR_BUFFER_OVERFLOW;
531 *pcchValue = cchComp;
532 break;
533 }
534 }
535
536 if (pszPathFree)
537 RTMemTmpFree(pszPathFree);
538 return rc;
539}
540
541
542#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
543/**
544 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
545 * The system drive letter + colon.}.
546 */
547static DECLCALLBACK(int) rtPathVarQuery_DosSystemDrive(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
548 PRTPATHMATCHCACHE pCache)
549{
550 if (iItem == 0)
551 {
552 AssertReturn(cbBuf >= 3, VERR_BUFFER_OVERFLOW);
553
554# ifdef RT_OS_WINDOWS
555 /* Since this is used at the start of a pattern, we assume
556 we've got more than enough buffer space. */
557 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
558 PRTUTF16 pwszTmp = (PRTUTF16)pszBuf;
559 UINT cch = g_pfnGetSystemWindowsDirectoryW(pwszTmp, (UINT)(cbBuf / sizeof(WCHAR)));
560 if (cch >= 2)
561 {
562 RTUTF16 wcDrive = pwszTmp[0];
563 if ( RT_C_IS_ALPHA(wcDrive)
564 && pwszTmp[1] == ':')
565 {
566 pszBuf[0] = wcDrive;
567 pszBuf[1] = ':';
568 pszBuf[2] = '\0';
569 *pcchValue = 2;
570 return VINF_EOF;
571 }
572 }
573# else
574 ULONG ulDrive = ~(ULONG)0;
575 APIRET rc = DosQuerySysInfo(QSV_BOOT_DRIVE, QSV_BOOT_DRIVE, &ulDrive, sizeof(ulDrive));
576 ulDrive--; /* 1 = 'A' */
577 if ( rc == NO_ERROR
578 && ulDrive <= (ULONG)'Z')
579 {
580 pszBuf[0] = (char)ulDrive + 'A';
581 pszBuf[1] = ':';
582 pszBuf[2] = '\0';
583 *pcchValue = 2;
584 return VINF_EOF;
585 }
586# endif
587 return VERR_INTERNAL_ERROR_4;
588 }
589 return VERR_EOF;
590}
591#endif
592
593
594#ifdef RT_OS_WINDOWS
595/**
596 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
597 * The system root directory (C:\Windows).}.
598 */
599static DECLCALLBACK(int) rtPathVarQuery_WinSystemRoot(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
600 PRTPATHMATCHCACHE pCache)
601{
602 if (iItem == 0)
603 {
604 Assert(pszBuf); Assert(cbBuf);
605 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
606 RTUTF16 wszSystemRoot[MAX_PATH];
607 UINT cchSystemRoot = g_pfnGetSystemWindowsDirectoryW(wszSystemRoot, MAX_PATH);
608 if (cchSystemRoot > 0)
609 return RTUtf16ToUtf8Ex(wszSystemRoot, cchSystemRoot, &pszBuf, cbBuf, pcchValue);
610 return RTErrConvertFromWin32(GetLastError());
611 }
612 return VERR_EOF;
613}
614#endif
615
616#undef RTPATHMATCHVAR_SIMPLE
617#undef RTPATHMATCHVAR_SIMPLE_ENVVAR
618#undef RTPATHMATCHVAR_DOUBLE_ENVVAR
619
620/**
621 *
622 *
623 * @author bird (9/29/2015)
624 */
625static RTPATHMATCHVAR const g_aVariables[] =
626{
627 { RT_STR_TUPLE("Arch"), false, rtPathVarQuery_Arch, rtPathVarMatch_Arch },
628 { RT_STR_TUPLE("Bits"), false, rtPathVarQuery_Bits, rtPathVarMatch_Bits },
629 { RT_STR_TUPLE("Path"), true, rtPathVarQuery_Path, NULL },
630#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
631 { RT_STR_TUPLE("SystemDrive"), true, rtPathVarQuery_DosSystemDrive, NULL },
632#endif
633#ifdef RT_OS_WINDOWS
634 { RT_STR_TUPLE("SystemRoot"), true, rtPathVarQuery_WinSystemRoot, NULL },
635 { RT_STR_TUPLE("AppData"), true, rtPathVarQuery_WinAppData, NULL },
636 { RT_STR_TUPLE("ProgramData"), true, rtPathVarQuery_WinProgramData, NULL },
637 { RT_STR_TUPLE("ProgramFiles"), true, rtPathVarQuery_WinProgramFiles, NULL },
638 { RT_STR_TUPLE("OtherProgramFiles"), true, rtPathVarQuery_WinOtherProgramFiles, NULL },
639 { RT_STR_TUPLE("AllProgramFiles"), true, rtPathVarQuery_WinAllProgramFiles, NULL },
640 { RT_STR_TUPLE("CommonProgramFiles"), true, rtPathVarQuery_WinCommonProgramFiles, NULL },
641 { RT_STR_TUPLE("OtherCommonProgramFiles"), true, rtPathVarQuery_WinOtherCommonProgramFiles, NULL },
642 { RT_STR_TUPLE("AllCommonProgramFiles"), true, rtPathVarQuery_WinAllCommonProgramFiles, NULL },
643#endif
644};
645
646
647
648/**
649 * Handles a complicated set.
650 *
651 * A complicated set is either using ranges, character classes or code points
652 * outside the ASCII-7 range.
653 *
654 * @returns VINF_SUCCESS or VERR_MISMATCH. May also return UTF-8 decoding
655 * errors as well as VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED.
656 *
657 * @param ucInput The input code point to match with.
658 * @param pchSet The start of the set specification (after caret).
659 * @param cchSet The length of the set specification.
660 */
661static int rtPathMatchExecExtendedSet(RTUNICP ucInput, const char *pchSet, size_t cchSet)
662{
663 while (cchSet > 0)
664 {
665 RTUNICP ucSet;
666 int rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet);
667 AssertRCReturn(rc, rc);
668
669 /*
670 * Check for character class, collating symbol and equvalence class.
671 */
672 if (ucSet == '[' && cchSet > 0)
673 {
674 char chNext = *pchSet;
675 if (chNext == ':')
676 {
677#define CHECK_CHAR_CLASS(a_szClassNm, a_BoolTestExpr) \
678 if ( cchSet >= sizeof(a_szClassNm) \
679 && memcmp(pchSet, a_szClassNm "]", sizeof(a_szClassNm)) == 0) \
680 { \
681 if (a_BoolTestExpr) \
682 return VINF_SUCCESS; \
683 pchSet += sizeof(a_szClassNm); \
684 cchSet -= sizeof(a_szClassNm); \
685 continue; \
686 } do { } while (0)
687
688 CHECK_CHAR_CLASS(":alpha:", RTUniCpIsAlphabetic(ucInput));
689 CHECK_CHAR_CLASS(":alnum:", RTUniCpIsAlphabetic(ucInput) || RTUniCpIsDecDigit(ucInput)); /** @todo figure what's correct here and fix uni.h */
690 CHECK_CHAR_CLASS(":blank:", ucInput == ' ' || ucInput == '\t');
691 CHECK_CHAR_CLASS(":cntrl:", ucInput < 31 || ucInput == 127);
692 CHECK_CHAR_CLASS(":digit:", RTUniCpIsDecDigit(ucInput));
693 CHECK_CHAR_CLASS(":lower:", RTUniCpIsLower(ucInput));
694 CHECK_CHAR_CLASS(":print:", RTUniCpIsAlphabetic(ucInput) || (RT_C_IS_PRINT(ucInput) && ucInput < 127)); /** @todo fixme*/
695 CHECK_CHAR_CLASS(":punct:", RT_C_IS_PRINT(ucInput) && ucInput < 127); /** @todo fixme*/
696 CHECK_CHAR_CLASS(":space:", RTUniCpIsSpace(ucInput));
697 CHECK_CHAR_CLASS(":upper:", RTUniCpIsUpper(ucInput));
698 CHECK_CHAR_CLASS(":xdigit:", RTUniCpIsHexDigit(ucInput));
699 AssertMsgFailedReturn(("Unknown or malformed char class: '%.*s'\n", cchSet + 1, pchSet - 1),
700 VERR_PATH_GLOB_UNKNOWN_CHAR_CLASS);
701#undef CHECK_CHAR_CLASS
702 }
703 /** @todo implement collating symbol and equvalence class. */
704 else if (chNext == '=' || chNext == '.')
705 AssertFailedReturn(VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
706 }
707
708 /*
709 * Check for range (leading or final dash does not constitute a range).
710 */
711 if (cchSet > 1 && *pchSet == '-')
712 {
713 pchSet++; /* skip dash */
714 cchSet--;
715
716 RTUNICP ucSet2;
717 rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet2);
718 AssertRCReturn(rc, rc);
719 Assert(ucSet < ucSet2);
720 if (ucInput >= ucSet && ucInput <= ucSet2)
721 return VINF_SUCCESS;
722 }
723 /*
724 * Single char comparison.
725 */
726 else if (ucInput == ucSet)
727 return VINF_SUCCESS;
728 }
729 return VERR_MISMATCH;
730}
731
732
733/**
734 * Variable matching fallback using the query function.
735 *
736 * This must not be inlined as it consuming a lot of stack! Which is why it's
737 * placed a couple of functions away from the recursive rtPathExecMatch.
738 *
739 * @returns VINF_SUCCESS or VERR_MISMATCH.
740 * @param pchInput The current input position.
741 * @param cchInput The amount of input left..
742 * @param idxVar The variable table index.
743 * @param fIgnoreCase Whether to ignore case when comparing.
744 * @param pcchMatched Where to return how much we actually matched up.
745 * @param pCache Pointer to the path matching cache.
746 */
747DECL_NO_INLINE(static, int) rtPathMatchExecVariableFallback(const char *pchInput, size_t cchInput, uint16_t idxVar,
748 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
749{
750 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
751 {
752 char szValue[RTPATH_MAX];
753 size_t cchValue;
754 int rc = g_aVariables[idxVar].pfnQuery(iItem, szValue, sizeof(szValue), &cchValue, pCache);
755 if (RT_SUCCESS(rc))
756 {
757 if (cchValue <= cchInput)
758 {
759 if ( !fIgnoreCase
760 ? memcmp(pchInput, szValue, cchValue) == 0
761 : RTStrNICmp(pchInput, szValue, cchValue) == 0)
762 {
763 *pcchMatched = cchValue;
764 return VINF_SUCCESS;
765 }
766 }
767 if (rc == VINF_EOF)
768 return VERR_MISMATCH;
769 }
770 else if (rc == VERR_EOF)
771 return VERR_MISMATCH;
772 else
773 Assert(rc == VERR_BUFFER_OVERFLOW || rc == VERR_TRY_AGAIN);
774 }
775 AssertFailed();
776 return VERR_MISMATCH;
777}
778
779
780/**
781 * Variable matching worker.
782 *
783 * @returns VINF_SUCCESS or VERR_MISMATCH.
784 * @param pchInput The current input position.
785 * @param cchInput The amount of input left..
786 * @param idxVar The variable table index.
787 * @param fIgnoreCase Whether to ignore case when comparing.
788 * @param pcchMatched Where to return how much we actually matched up.
789 * @param pCache Pointer to the path matching cache.
790 */
791static int rtPathMatchExecVariable(const char *pchInput, size_t cchInput, uint16_t idxVar,
792 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
793{
794 Assert(idxVar < RT_ELEMENTS(g_aVariables));
795 if (g_aVariables[idxVar].pfnMatch)
796 return g_aVariables[idxVar].pfnMatch(pchInput, cchInput, fIgnoreCase, pcchMatched);
797 return rtPathMatchExecVariableFallback(pchInput, cchInput, idxVar, fIgnoreCase, pcchMatched, pCache);
798}
799
800
801/**
802 * Variable matching worker.
803 *
804 * @returns VINF_SUCCESS or VERR_MISMATCH.
805 * @param pchInput The current input position.
806 * @param cchInput The amount of input left..
807 * @param pProg The first matching program instruction.
808 * @param pCache Pointer to the path matching cache.
809 */
810static int rtPathMatchExec(const char *pchInput, size_t cchInput, PCRTPATHMATCHCORE pProg, PRTPATHMATCHCACHE pCache)
811{
812 for (;;)
813 {
814 switch (pProg->enmOpCode)
815 {
816 case RTPATHMATCHOP_RETURN_MATCH_IF_AT_END:
817 return cchInput == 0 ? VINF_SUCCESS : VERR_MISMATCH;
818
819 case RTPATHMATCHOP_RETURN_MATCH:
820 return VINF_SUCCESS;
821
822 case RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT:
823 if ( cchInput > 2
824 || cchInput < 1
825 || pchInput[0] != '.'
826 || (cchInput == 2 && pchInput[1] != '.') )
827 return VINF_SUCCESS;
828 return VERR_MISMATCH;
829
830 case RTPATHMATCHOP_STRCMP:
831 if (pProg->cch > cchInput)
832 return VERR_MISMATCH;
833 if (memcmp(pchInput, pProg->pch, pProg->cch) != 0)
834 return VERR_MISMATCH;
835 cchInput -= pProg->cch;
836 pchInput += pProg->cch;
837 break;
838
839 case RTPATHMATCHOP_STRICMP:
840 if (pProg->cch > cchInput)
841 return VERR_MISMATCH;
842 if (RTStrNICmp(pchInput, pProg->pch, pProg->cch) != 0)
843 return VERR_MISMATCH;
844 cchInput -= pProg->cch;
845 pchInput += pProg->cch;
846 break;
847
848 case RTPATHMATCHOP_SKIP_ONE_CODEPOINT:
849 {
850 if (cchInput == 0)
851 return VERR_MISMATCH;
852 RTUNICP ucInputIgnore;
853 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
854 AssertRCReturn(rc, rc);
855 break;
856 }
857
858 case RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS:
859 {
860 uint16_t cCpsLeft = pProg->cch;
861 Assert(cCpsLeft > 1);
862 if (cCpsLeft > cchInput)
863 return VERR_MISMATCH;
864 while (cCpsLeft-- > 0)
865 {
866 RTUNICP ucInputIgnore;
867 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
868 if (RT_FAILURE(rc))
869 return rc == VERR_END_OF_STRING ? VERR_MISMATCH : rc;
870 }
871 break;
872 }
873
874 case RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7:
875 {
876 if (cchInput == 0)
877 return VERR_MISMATCH;
878 RTUNICP ucInput;
879 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
880 AssertRCReturn(rc, rc);
881 if (ucInput >= 0x80)
882 return VERR_MISMATCH;
883 if (memchr(pProg->pch, (char)ucInput, pProg->cch) == NULL)
884 return VERR_MISMATCH;
885 break;
886 }
887
888 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7:
889 {
890 if (cchInput == 0)
891 return VERR_MISMATCH;
892 RTUNICP ucInput;
893 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
894 AssertRCReturn(rc, rc);
895 if (ucInput >= 0x80)
896 break;
897 if (memchr(pProg->pch, (char)ucInput, pProg->cch) != NULL)
898 return VERR_MISMATCH;
899 break;
900 }
901
902 case RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED:
903 {
904 if (cchInput == 0)
905 return VERR_MISMATCH;
906 RTUNICP ucInput;
907 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
908 AssertRCReturn(rc, rc);
909 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
910 if (rc == VINF_SUCCESS)
911 break;
912 return rc;
913 }
914
915 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED:
916 {
917 if (cchInput == 0)
918 return VERR_MISMATCH;
919 RTUNICP ucInput;
920 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
921 AssertRCReturn(rc, rc);
922 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
923 if (rc == VERR_MISMATCH)
924 break;
925 if (rc == VINF_SUCCESS)
926 rc = VERR_MISMATCH;
927 return rc;
928 }
929
930 case RTPATHMATCHOP_VARIABLE_VALUE_CMP:
931 case RTPATHMATCHOP_VARIABLE_VALUE_ICMP:
932 {
933 size_t cchMatched = 0;
934 int rc = rtPathMatchExecVariable(pchInput, cchInput, pProg->uOp2,
935 pProg->enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP, &cchMatched, pCache);
936 if (rc == VINF_SUCCESS)
937 {
938 pchInput += cchMatched;
939 cchInput -= cchMatched;
940 break;
941 }
942 return rc;
943 }
944
945 /*
946 * This is the expensive one. It always completes the program.
947 */
948 case RTPATHMATCHOP_ZERO_OR_MORE:
949 {
950 if (cchInput < pProg->cch)
951 return VERR_MISMATCH;
952 size_t cchMatched = cchInput - pProg->cch;
953 do
954 {
955 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
956 if (RT_SUCCESS(rc))
957 return rc;
958 } while (cchMatched-- > 0);
959 return VERR_MISMATCH;
960 }
961
962 /*
963 * Variant of the above that doesn't match '.' and '..' entries.
964 */
965 case RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT:
966 {
967 if (cchInput < pProg->cch)
968 return VERR_MISMATCH;
969 if ( cchInput <= 2
970 && cchInput > 0
971 && pchInput[0] == '.'
972 && (cchInput == 1 || pchInput[1] == '.') )
973 return VERR_MISMATCH;
974 size_t cchMatched = cchInput - pProg->cch;
975 do
976 {
977 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
978 if (RT_SUCCESS(rc))
979 return rc;
980 } while (cchMatched-- > 0);
981 return VERR_MISMATCH;
982 }
983
984 default:
985 AssertMsgFailedReturn(("enmOpCode=%d\n", pProg->enmOpCode), VERR_INTERNAL_ERROR_3);
986 }
987
988 pProg++;
989 }
990}
991
992
993
994
995/**
996 * Compiles a path matching program.
997 *
998 * @returns IPRT status code.
999 * @param pchPattern The pattern to compile.
1000 * @param cchPattern The length of the pattern.
1001 * @param fIgnoreCase Whether to ignore case or not when doing the
1002 * actual matching later on.
1003 * @param pAllocator Pointer to the instruction allocator & result
1004 * array. The compiled "program" starts at
1005 * PRTPATHMATCHALLOC::paInstructions[PRTPATHMATCHALLOC::iNext]
1006 * (input iNext value).
1007 *
1008 * @todo Expose this matching code and also use it for RTDirOpenFiltered
1009 */
1010static int rtPathMatchCompile(const char *pchPattern, size_t cchPattern, bool fIgnoreCase, PRTPATHMATCHALLOC pAllocator)
1011{
1012 /** @todo PORTME: big endian. */
1013 static const uint8_t s_bmMetaChars[256/8] =
1014 {
1015 0x00, 0x00, 0x00, 0x00, /* 0 thru 31 */
1016 0x10, 0x04, 0x00, 0x80, /* 32 thru 63 */
1017 0x00, 0x00, 0x00, 0x08, /* 64 thru 95 */
1018 0x00, 0x00, 0x00, 0x00, /* 96 thru 127 */
1019 /* UTF-8 multibyte: */
1020 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1021 };
1022 Assert(ASMBitTest(s_bmMetaChars, '$')); AssertCompile('$' == 0x24 /*36*/);
1023 Assert(ASMBitTest(s_bmMetaChars, '*')); AssertCompile('*' == 0x2a /*42*/);
1024 Assert(ASMBitTest(s_bmMetaChars, '?')); AssertCompile('?' == 0x3f /*63*/);
1025 Assert(ASMBitTest(s_bmMetaChars, '[')); AssertCompile('[' == 0x5b /*91*/);
1026
1027 /*
1028 * For checking for the first instruction.
1029 */
1030 uint16_t const iFirst = pAllocator->iNext;
1031
1032 /*
1033 * This is for tracking zero-or-more instructions and for calculating
1034 * the minimum amount of input required for it to be considered.
1035 */
1036 uint16_t aiZeroOrMore[RTPATHMATCH_MAX_ZERO_OR_MORE];
1037 uint8_t cZeroOrMore = 0;
1038 size_t offInput = 0;
1039
1040 /*
1041 * Loop thru the pattern and translate it into string matching instructions.
1042 */
1043 for (;;)
1044 {
1045 /*
1046 * Allocate the next instruction.
1047 */
1048 if (pAllocator->iNext >= pAllocator->cAllocated)
1049 {
1050 uint32_t cNew = pAllocator->cAllocated ? pAllocator->cAllocated * 2 : 2;
1051 void *pvNew = RTMemRealloc(pAllocator->paInstructions, cNew * sizeof(pAllocator->paInstructions[0]));
1052 AssertReturn(pvNew, VERR_NO_MEMORY);
1053 pAllocator->paInstructions = (PRTPATHMATCHCORE)pvNew;
1054 pAllocator->cAllocated = cNew;
1055 }
1056 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[pAllocator->iNext++];
1057 pInstr->pch = pchPattern;
1058 pInstr->cch = 0;
1059 pInstr->uOp2 = 0;
1060
1061 /*
1062 * Special case: End of pattern.
1063 */
1064 if (!cchPattern)
1065 {
1066 pInstr->enmOpCode = RTPATHMATCHOP_RETURN_MATCH_IF_AT_END;
1067 break;
1068 }
1069
1070 /*
1071 * Parse the next bit of the pattern.
1072 */
1073 char ch = *pchPattern;
1074 if (ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1075 {
1076 /*
1077 * Zero or more characters wildcard.
1078 */
1079 if (ch == '*')
1080 {
1081 /* Skip extra asterisks. */
1082 do
1083 {
1084 cchPattern--;
1085 pchPattern++;
1086 } while (cchPattern > 0 && *pchPattern == '*');
1087
1088 /* There is a special optimization for trailing '*'. */
1089 pInstr->cch = 1;
1090 if (cchPattern == 0)
1091 {
1092 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1093 ? RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_RETURN_MATCH;
1094 break;
1095 }
1096
1097 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1098 ? RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_ZERO_OR_MORE;
1099 pInstr->uOp2 = (uint16_t)offInput;
1100 AssertReturn(cZeroOrMore < RT_ELEMENTS(aiZeroOrMore), VERR_OUT_OF_RANGE);
1101 aiZeroOrMore[cZeroOrMore] = (uint16_t)(pInstr - pAllocator->paInstructions);
1102
1103 /* cchInput unchanged, zero-or-more matches. */
1104 continue;
1105 }
1106
1107 /*
1108 * Single character wildcard.
1109 */
1110 if (ch == '?')
1111 {
1112 /* Count them if more. */
1113 uint16_t cchQms = 1;
1114 while (cchQms < cchPattern && pchPattern[cchQms] == '?')
1115 cchQms++;
1116
1117 pInstr->cch = cchQms;
1118 pInstr->enmOpCode = cchQms == 1 ? RTPATHMATCHOP_SKIP_ONE_CODEPOINT : RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS;
1119
1120 cchPattern -= cchQms;
1121 pchPattern += cchQms;
1122 offInput += cchQms;
1123 continue;
1124 }
1125
1126 /*
1127 * Character in set.
1128 *
1129 * Note that we skip the first char in the set as that is the only place
1130 * ']' can be placed if one desires to explicitly include it in the set.
1131 * To make life a bit more interesting, [:class:] is allowed inside the
1132 * set, so we have to do the counting game to find the end.
1133 */
1134 if (ch == '[')
1135 {
1136 if ( cchPattern > 2
1137 && (const char *)memchr(pchPattern + 2, ']', cchPattern) != NULL)
1138 {
1139
1140 /* Check for not-in. */
1141 bool fInverted = false;
1142 size_t offStart = 1;
1143 if (pchPattern[offStart] == '^')
1144 {
1145 fInverted = true;
1146 offStart++;
1147 }
1148
1149 /* Special case for ']' as the first char, it doesn't indicate closing then. */
1150 size_t off = offStart;
1151 if (pchPattern[off] == ']')
1152 off++;
1153
1154 bool fExtended = false;
1155 while (off < cchPattern)
1156 {
1157 ch = pchPattern[off++];
1158 if (ch == '[')
1159 {
1160 if (off < cchPattern)
1161 {
1162 char chOpen = pchPattern[off];
1163 if ( chOpen == ':'
1164 || chOpen == '='
1165 || chOpen == '.')
1166 {
1167 off++;
1168 const char *pchFound = (const char *)memchr(&pchPattern[off], ']', cchPattern - off);
1169 if ( pchFound
1170 && pchFound[-1] == chOpen)
1171 {
1172 fExtended = true;
1173 off = pchFound - pchPattern + 1;
1174 }
1175 else
1176 AssertFailed();
1177 }
1178 }
1179 }
1180 /* Check for closing. */
1181 else if (ch == ']')
1182 break;
1183 /* Check for range expression, promote to extended if this happens. */
1184 else if ( ch == '-'
1185 && off != offStart + 1
1186 && off < cchPattern
1187 && pchPattern[off] != ']')
1188 fExtended = true;
1189 /* UTF-8 multibyte chars forces us to use the extended version too. */
1190 else if ((uint8_t)ch >= 0x80)
1191 fExtended = true;
1192 }
1193
1194 if (ch == ']')
1195 {
1196 pInstr->pch = &pchPattern[offStart];
1197 pInstr->cch = (uint16_t)(off - offStart - 1);
1198 if (!fExtended)
1199 pInstr->enmOpCode = !fInverted
1200 ? RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7;
1201 else
1202 pInstr->enmOpCode = !fInverted
1203 ? RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED
1204 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED;
1205 pchPattern += off;
1206 cchPattern -= off;
1207 offInput += 1;
1208 continue;
1209 }
1210
1211 /* else: invalid, treat it as */
1212 AssertFailed();
1213 }
1214 }
1215 /*
1216 * Variable matching.
1217 */
1218 else if (ch == '$')
1219 {
1220 const char *pchFound;
1221 if ( cchPattern > 3
1222 && pchPattern[1] == '{'
1223 && (pchFound = (const char *)memchr(pchPattern + 2, '}', cchPattern)) != NULL
1224 && pchFound != &pchPattern[2])
1225 {
1226 /* skip to the variable name. */
1227 pchPattern += 2;
1228 cchPattern -= 2;
1229 size_t cchVarNm = pchFound - pchPattern;
1230
1231 /* Look it up. */
1232 uint32_t iVar;
1233 for (iVar = 0; iVar < RT_ELEMENTS(g_aVariables); iVar++)
1234 if ( g_aVariables[iVar].cchName == cchVarNm
1235 && memcmp(g_aVariables[iVar].pszName, pchPattern, cchVarNm) == 0)
1236 break;
1237 if (iVar < RT_ELEMENTS(g_aVariables))
1238 {
1239 pInstr->uOp2 = (uint16_t)iVar;
1240 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_VARIABLE_VALUE_CMP : RTPATHMATCHOP_VARIABLE_VALUE_ICMP;
1241 pInstr->pch = pchPattern; /* not necessary */
1242 pInstr->cch = (uint16_t)cchPattern; /* ditto */
1243 pchPattern += cchVarNm + 1;
1244 cchPattern -= cchVarNm + 1;
1245 AssertMsgReturn(!g_aVariables[iVar].fFirstOnly || iFirst + 1U == pAllocator->iNext,
1246 ("Glob variable '%s' should be first\n", g_aVariables[iVar].pszName),
1247 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1248 /* cchInput unchanged, value can be empty. */
1249 continue;
1250 }
1251 AssertMsgFailedReturn(("Unknown path matching variable '%.*s'\n", cchVarNm, pchPattern),
1252 VERR_PATH_MATCH_UNKNOWN_VARIABLE);
1253 }
1254 }
1255 else
1256 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1257 }
1258
1259 /*
1260 * Plain text. Look for the next meta char.
1261 */
1262 uint32_t cchPlain = 1;
1263 while (cchPlain < cchPattern)
1264 {
1265 ch = pchPattern[cchPlain];
1266 if (!ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1267 { /* probable */ }
1268 else if ( ch == '?'
1269 || ch == '*')
1270 break;
1271 else if (ch == '$')
1272 {
1273 const char *pchFound;
1274 if ( cchPattern > cchPlain + 3
1275 && pchPattern[cchPlain + 1] == '{'
1276 && (pchFound = (const char *)memchr(&pchPattern[cchPlain + 2], '}', cchPattern - cchPlain - 2)) != NULL
1277 && pchFound != &pchPattern[cchPlain + 2])
1278 break;
1279 }
1280 else if (ch == '[')
1281 {
1282 /* We don't put a lot of effort into getting this 100% right here,
1283 no point it complicating things for malformed expressions. */
1284 if ( cchPattern > cchPlain + 2
1285 && memchr(&pchPattern[cchPlain + 2], ']', cchPattern - cchPlain - 1) != NULL)
1286 break;
1287 }
1288 else
1289 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1290 cchPlain++;
1291 }
1292 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_STRCMP : RTPATHMATCHOP_STRICMP;
1293 pInstr->cch = cchPlain;
1294 Assert(pInstr->pch == pchPattern);
1295 Assert(pInstr->uOp2 == 0);
1296 pchPattern += cchPlain;
1297 cchPattern -= cchPlain;
1298 offInput += cchPlain;
1299 }
1300
1301 /*
1302 * Optimize zero-or-more matching.
1303 */
1304 while (cZeroOrMore-- > 0)
1305 {
1306 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[aiZeroOrMore[cZeroOrMore]];
1307 pInstr->uOp2 = (uint16_t)(offInput - pInstr->uOp2);
1308 }
1309
1310 /** @todo It's possible to use offInput to inject a instruction for checking
1311 * minimum input length at the start of the program. Not sure it's
1312 * worth it though, unless it's long a complicated expression... */
1313 return VINF_SUCCESS;
1314}
1315
1316
1317/**
1318 * Parses the glob pattern.
1319 *
1320 * This compiles filename matching programs for each component and determins the
1321 * optimal search strategy for them.
1322 *
1323 * @returns IPRT status code.
1324 * @param pGlob The glob instance data.
1325 * @param pszPattern The pattern to parse.
1326 * @param pParsed The RTPathParse output for the pattern.
1327 * @param fFlags The glob flags (same as pGlob->fFlags).
1328 */
1329static int rtPathGlobParse(PRTPATHGLOB pGlob, const char *pszPattern, PRTPATHPARSED pParsed, uint32_t fFlags)
1330{
1331 AssertReturn(pParsed->cComps > 0, VERR_INVALID_PARAMETER); /* shouldn't happen */
1332 uint32_t iComp = 0;
1333
1334 /*
1335 * If we've got a rootspec, mark it as plain. On platforms with
1336 * drive letter and/or UNC we don't allow wildcards or such in
1337 * the drive letter spec or UNC server name. (At least not yet.)
1338 */
1339 if (RTPATH_PROP_HAS_ROOT_SPEC(pParsed->fProps))
1340 {
1341 AssertReturn(pParsed->aComps[0].cch < sizeof(pGlob->szPath) - 1, VERR_FILENAME_TOO_LONG);
1342 memcpy(pGlob->szPath, &pszPattern[pParsed->aComps[0].off], pParsed->aComps[0].cch);
1343 pGlob->offFirstPath = pParsed->aComps[0].cch;
1344 pGlob->iFirstComp = iComp = 1;
1345 }
1346 else
1347 {
1348 const char * const pszComp = &pszPattern[pParsed->aComps[0].off];
1349
1350 /*
1351 * The tilde is only applicable to the first component, expand it
1352 * immediately.
1353 */
1354 if ( *pszComp == '~'
1355 && !(fFlags & RTPATHGLOB_F_NO_TILDE))
1356 {
1357 if (pParsed->aComps[0].cch == 1)
1358 {
1359 int rc = RTPathUserHome(pGlob->szPath, sizeof(pGlob->szPath) - 1);
1360 AssertRCReturn(rc, rc);
1361 }
1362 else
1363 AssertMsgFailedReturn(("'%.*s' is not supported yet\n", pszComp, pParsed->aComps[0].cch),
1364 VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
1365 pGlob->offFirstPath = (uint32_t)RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1366 pGlob->iFirstComp = iComp = 1;
1367 }
1368 }
1369
1370 /*
1371 * Process the other components.
1372 */
1373 bool fStarStar = false;
1374 for (; iComp < pParsed->cComps; iComp++)
1375 {
1376 const char *pszComp = &pszPattern[pParsed->aComps[iComp].off];
1377 uint16_t cchComp = pParsed->aComps[iComp].cch;
1378 Assert(pGlob->aComps[iComp].fNormal == false);
1379
1380 pGlob->aComps[iComp].fDir = iComp + 1 < pParsed->cComps || (fFlags & RTPATHGLOB_F_ONLY_DIRS);
1381 if ( cchComp != 2
1382 || pszComp[0] != '*'
1383 || pszComp[1] != '*'
1384 || (fFlags & RTPATHGLOB_F_NO_STARSTAR) )
1385 {
1386 /* Compile the pattern. */
1387 uint16_t const iMatchProg = pGlob->MatchInstrAlloc.iNext;
1388 pGlob->aComps[iComp].iMatchProg = iMatchProg;
1389 int rc = rtPathMatchCompile(pszComp, cchComp, RT_BOOL(fFlags & RTPATHGLOB_F_IGNORE_CASE),
1390 &pGlob->MatchInstrAlloc);
1391 if (RT_FAILURE(rc))
1392 return rc;
1393
1394 /* Check for plain text as well as full variable matching (not applicable after '**'). */
1395 uint16_t const cInstructions = pGlob->MatchInstrAlloc.iNext - iMatchProg;
1396 if ( cInstructions == 2
1397 && !fStarStar
1398 && pGlob->MatchInstrAlloc.paInstructions[iMatchProg + 1].enmOpCode == RTPATHMATCHOP_RETURN_MATCH_IF_AT_END)
1399 {
1400 if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRCMP
1401 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRICMP)
1402 pGlob->aComps[iComp].fPlain = true;
1403 else if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1404 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP)
1405 {
1406 pGlob->aComps[iComp].fExpVariable = true;
1407 AssertMsgReturn( iComp == 0
1408 || !g_aVariables[pGlob->MatchInstrAlloc.paInstructions[iMatchProg].uOp2].fFirstOnly,
1409 ("Glob variable '%.*s' can only be used as the path component.\n", cchComp, pszComp),
1410 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1411 }
1412 else
1413 pGlob->aComps[iComp].fNormal = true;
1414 }
1415 else
1416 pGlob->aComps[iComp].fNormal = true;
1417 }
1418 else
1419 {
1420 /* Recursive "**" matching. */
1421 pGlob->aComps[iComp].fNormal = false;
1422 pGlob->aComps[iComp].fStarStar = true;
1423 AssertReturn(!fStarStar, VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED); /** @todo implement multiple '**' sequences in a pattern. */
1424 fStarStar = true;
1425 }
1426 }
1427 pGlob->aComps[pParsed->cComps - 1].fFinal = true;
1428
1429 return VINF_SUCCESS;
1430}
1431
1432
1433/**
1434 * This is for skipping overly long directories entries.
1435 *
1436 * Since our directory entry buffer can hold filenames of RTPATH_MAX bytes, we
1437 * can safely skip filenames that are longer. There are very few file systems
1438 * that can actually store filenames longer than 255 bytes at time of coding
1439 * (2015-09), and extremely few which can exceed 4096 (RTPATH_MAX) bytes.
1440 *
1441 * @returns IPRT status code.
1442 * @param hDir The directory handle.
1443 * @param cbNeeded The required entry size.
1444 */
1445DECL_NO_INLINE(static, int) rtPathGlobSkipDirEntry(PRTDIR hDir, size_t cbNeeded)
1446{
1447 int rc = VERR_BUFFER_OVERFLOW;
1448 cbNeeded = RT_ALIGN_Z(cbNeeded, 16);
1449 PRTDIRENTRY pDirEntry = (PRTDIRENTRY)RTMemTmpAlloc(cbNeeded);
1450 if (pDirEntry)
1451 {
1452 rc = RTDirRead(hDir, pDirEntry, &cbNeeded);
1453 RTMemTmpFree(pDirEntry);
1454 }
1455 return rc;
1456}
1457
1458
1459/**
1460 * Adds a result.
1461 *
1462 * @returns IPRT status code.
1463 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1464 *
1465 * @param pGlob The glob instance data.
1466 * @param cchPath The number of bytes to add from pGlob->szPath.
1467 * @param uType The RTDIRENTRYTYPE value.
1468 */
1469DECL_NO_INLINE(static, int) rtPathGlobAddResult(PRTPATHGLOB pGlob, size_t cchPath, uint8_t uType)
1470{
1471 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1472 {
1473 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + 1]));
1474 if (pEntry)
1475 {
1476 pEntry->uType = uType;
1477 pEntry->cchPath = (uint16_t)cchPath;
1478 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1479 pEntry->szPath[cchPath] = '\0';
1480
1481 pEntry->pNext = NULL;
1482 *pGlob->ppNext = pEntry;
1483 pGlob->ppNext = &pEntry->pNext;
1484 pGlob->cResults++;
1485
1486 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1487 return VINF_SUCCESS;
1488 return VINF_CALLBACK_RETURN;
1489 }
1490 return VERR_NO_MEMORY;
1491 }
1492 return VERR_TOO_MUCH_DATA;
1493}
1494
1495
1496/**
1497 * Adds a result, constructing the path from two string.
1498 *
1499 * @returns IPRT status code.
1500 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1501 *
1502 * @param pGlob The glob instance data.
1503 * @param cchPath The number of bytes to add from pGlob->szPath.
1504 * @param pchName The string (usual filename) to append to the szPath.
1505 * @param cchName The length of the string to append.
1506 * @param uType The RTDIRENTRYTYPE value.
1507 */
1508DECL_NO_INLINE(static, int) rtPathGlobAddResult2(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1509 uint8_t uType)
1510{
1511 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1512 {
1513 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1514 if (pEntry)
1515 {
1516 pEntry->uType = uType;
1517 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1518 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1519 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1520 pEntry->szPath[cchPath + cchName] = '\0';
1521
1522 pEntry->pNext = NULL;
1523 *pGlob->ppNext = pEntry;
1524 pGlob->ppNext = &pEntry->pNext;
1525 pGlob->cResults++;
1526
1527 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1528 return VINF_SUCCESS;
1529 return VINF_CALLBACK_RETURN;
1530 }
1531 return VERR_NO_MEMORY;
1532 }
1533 return VERR_TOO_MUCH_DATA;
1534}
1535
1536
1537/**
1538 * Prepares a result, constructing the path from two string.
1539 *
1540 * The caller must call either rtPathGlobCommitResult or
1541 * rtPathGlobRollbackResult to complete the operation.
1542 *
1543 * @returns IPRT status code.
1544 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1545 *
1546 * @param pGlob The glob instance data.
1547 * @param cchPath The number of bytes to add from pGlob->szPath.
1548 * @param pchName The string (usual filename) to append to the szPath.
1549 * @param cchName The length of the string to append.
1550 * @param uType The RTDIRENTRYTYPE value.
1551 */
1552DECL_NO_INLINE(static, int) rtPathGlobAlmostAddResult(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1553 uint8_t uType)
1554{
1555 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1556 {
1557 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1558 if (pEntry)
1559 {
1560 pEntry->uType = uType;
1561 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1562 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1563 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1564 pEntry->szPath[cchPath + cchName] = '\0';
1565
1566 pEntry->pNext = NULL;
1567 *pGlob->ppNext = pEntry;
1568 /* Note! We don't update ppNext here, that is done in rtPathGlobCommitResult. */
1569
1570 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1571 return VINF_SUCCESS;
1572 return VINF_CALLBACK_RETURN;
1573 }
1574 return VERR_NO_MEMORY;
1575 }
1576 return VERR_TOO_MUCH_DATA;
1577}
1578
1579
1580/**
1581 * Commits a pending result from rtPathGlobAlmostAddResult.
1582 *
1583 * @param pGlob The glob instance data.
1584 * @param uType The RTDIRENTRYTYPE value.
1585 */
1586static void rtPathGlobCommitResult(PRTPATHGLOB pGlob, uint8_t uType)
1587{
1588 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1589 AssertPtr(pEntry);
1590 pEntry->uType = uType;
1591 pGlob->ppNext = &pEntry->pNext;
1592 pGlob->cResults++;
1593}
1594
1595
1596/**
1597 * Rolls back a pending result from rtPathGlobAlmostAddResult.
1598 *
1599 * @param pGlob The glob instance data.
1600 */
1601static void rtPathGlobRollbackResult(PRTPATHGLOB pGlob)
1602{
1603 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1604 AssertPtr(pEntry);
1605 RTMemFree(pEntry);
1606 *pGlob->ppNext = NULL;
1607}
1608
1609
1610
1611/**
1612 * Whether to call rtPathGlobExecRecursiveVarExp for the next component.
1613 *
1614 * @returns true / false.
1615 * @param pGlob The glob instance data.
1616 * @param offPath The next path offset/length.
1617 * @param iComp The next component.
1618 */
1619DECLINLINE(bool) rtPathGlobExecIsExpVar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1620{
1621 return pGlob->aComps[iComp].fExpVariable
1622 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1623 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1624}
1625
1626/**
1627 * Whether to call rtPathGlobExecRecursivePlainText for the next component.
1628 *
1629 * @returns true / false.
1630 * @param pGlob The glob instance data.
1631 * @param offPath The next path offset/length.
1632 * @param iComp The next component.
1633 */
1634DECLINLINE(bool) rtPathGlobExecIsPlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1635{
1636 return pGlob->aComps[iComp].fPlain
1637 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1638 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1639}
1640
1641
1642/**
1643 * Helper for rtPathGlobExecRecursiveVarExp and rtPathGlobExecRecursivePlainText
1644 * that compares a file mode mask with dir/no-dir wishes of the caller.
1645 *
1646 * @returns true if match, false if not.
1647 * @param pGlob The glob instance data.
1648 * @param fMode The file mode (only the type is used).
1649 */
1650DECLINLINE(bool) rtPathGlobExecIsMatchFinalWithFileMode(PRTPATHGLOB pGlob, RTFMODE fMode)
1651{
1652 if (!(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)))
1653 return true;
1654 return RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS) == RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode);
1655}
1656
1657
1658/**
1659 * Recursive globbing - star-star mode.
1660 *
1661 * @returns IPRT status code.
1662 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1663 *
1664 * @param pGlob The glob instance data.
1665 * @param offPath The current path offset/length.
1666 * @param iStarStarComp The star-star component index.
1667 * @param offStarStarPath The offset of the star-star component in the
1668 * pattern path.
1669 */
1670DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp,
1671 size_t offStarStarPath)
1672{
1673 /** @todo implement multi subdir matching. */
1674 return VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED;
1675}
1676
1677
1678
1679/**
1680 * Recursive globbing - variable expansion optimization.
1681 *
1682 * @returns IPRT status code.
1683 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1684 *
1685 * @param pGlob The glob instance data.
1686 * @param offPath The current path offset/length.
1687 * @param iComp The current component.
1688 */
1689DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1690{
1691 Assert(iComp < pGlob->pParsed->cComps);
1692 Assert(pGlob->szPath[offPath] == '\0');
1693 Assert(pGlob->aComps[iComp].fExpVariable);
1694 Assert(!pGlob->aComps[iComp].fPlain);
1695 Assert(!pGlob->aComps[iComp].fStarStar);
1696 Assert(rtPathGlobExecIsExpVar(pGlob, offPath, iComp));
1697
1698 /*
1699 * Fish the variable index out of the first matching instruction.
1700 */
1701 Assert( pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1702 == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1703 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1704 == RTPATHMATCHOP_VARIABLE_VALUE_ICMP);
1705 uint16_t const iVar = pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].uOp2;
1706
1707 /*
1708 * Enumerate all the variable, giving them the plain text treatment.
1709 */
1710 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
1711 {
1712 size_t cch;
1713 int rcVar = g_aVariables[iVar].pfnQuery(iItem, &pGlob->szPath[offPath], sizeof(pGlob->szPath) - offPath, &cch,
1714 &pGlob->MatchCache);
1715 if (RT_SUCCESS(rcVar))
1716 {
1717 Assert(pGlob->szPath[offPath + cch] == '\0');
1718
1719 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1720 if (RT_SUCCESS(rc))
1721 {
1722 if (pGlob->aComps[iComp].fFinal)
1723 {
1724 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1725 {
1726 rc = rtPathGlobAddResult(pGlob, cch,
1727 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1728 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1729 if (rc != VINF_SUCCESS)
1730 return rc;
1731 }
1732 }
1733 else if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1734 {
1735 Assert(pGlob->aComps[iComp].fDir);
1736 cch = RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1737 if (cch > 0)
1738 {
1739 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1740 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1741 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1742 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1743 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1744 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1745 else
1746 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1747 if (rc != VINF_SUCCESS)
1748 return rc;
1749 }
1750 else
1751 pGlob->cPathOverflows++;
1752 }
1753 }
1754 /* else: file doesn't exist or something else is wrong, ignore this. */
1755 if (rcVar == VINF_EOF)
1756 return VINF_SUCCESS;
1757 }
1758 else if (rcVar == VERR_EOF)
1759 return VINF_SUCCESS;
1760 else if (rcVar != VERR_TRY_AGAIN)
1761 {
1762 Assert(rcVar == VERR_BUFFER_OVERFLOW);
1763 pGlob->cPathOverflows++;
1764 }
1765 }
1766 AssertFailedReturn(VINF_SUCCESS); /* Too many items returned, probably buggy query method. */
1767}
1768
1769
1770/**
1771 * Recursive globbing - plain text optimization.
1772 *
1773 * @returns IPRT status code.
1774 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1775 *
1776 * @param pGlob The glob instance data.
1777 * @param offPath The current path offset/length.
1778 * @param iComp The current component.
1779 */
1780DECL_NO_INLINE(static, int) rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1781{
1782 /*
1783 * Instead of recursing, we loop thru adjacent plain text components.
1784 */
1785 for (;;)
1786 {
1787 /*
1788 * Preconditions.
1789 */
1790 Assert(iComp < pGlob->pParsed->cComps);
1791 Assert(pGlob->szPath[offPath] == '\0');
1792 Assert(pGlob->aComps[iComp].fPlain);
1793 Assert(!pGlob->aComps[iComp].fExpVariable);
1794 Assert(!pGlob->aComps[iComp].fStarStar);
1795 Assert(rtPathGlobExecIsPlainText(pGlob, offPath, iComp));
1796 Assert(pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1797 == RTPATHMATCHOP_STRCMP
1798 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1799 == RTPATHMATCHOP_STRICMP);
1800
1801 /*
1802 * Add the plain text component to the path.
1803 */
1804 size_t const cch = pGlob->pParsed->aComps[iComp].cch;
1805 if (cch + pGlob->aComps[iComp].fDir < sizeof(pGlob->szPath) - offPath)
1806 {
1807 memcpy(&pGlob->szPath[offPath], &pGlob->pszPattern[pGlob->pParsed->aComps[iComp].off], cch);
1808 offPath += cch;
1809 pGlob->szPath[offPath] = '\0';
1810
1811 /*
1812 * Check if it exists.
1813 */
1814 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1815 if (RT_SUCCESS(rc))
1816 {
1817 if (pGlob->aComps[iComp].fFinal)
1818 {
1819 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1820 return rtPathGlobAddResult(pGlob, offPath,
1821 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1822 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1823 break;
1824 }
1825
1826 if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1827 {
1828 Assert(pGlob->aComps[iComp].fDir);
1829 pGlob->szPath[offPath++] = RTPATH_SLASH;
1830 pGlob->szPath[offPath] = '\0';
1831
1832 iComp++;
1833 if (rtPathGlobExecIsExpVar(pGlob, offPath, iComp))
1834 return rtPathGlobExecRecursiveVarExp(pGlob, offPath, iComp);
1835 if (!rtPathGlobExecIsPlainText(pGlob, offPath, iComp))
1836 return rtPathGlobExecRecursiveGeneric(pGlob, offPath, iComp);
1837 if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1838 return rtPathGlobExecRecursiveStarStar(pGlob, offPath, iComp, offPath);
1839
1840 /* Continue with the next plain text component. */
1841 continue;
1842 }
1843 }
1844 /* else: file doesn't exist or something else is wrong, ignore this. */
1845 }
1846 else
1847 pGlob->cPathOverflows++;
1848 break;
1849 }
1850 return VINF_SUCCESS;
1851}
1852
1853
1854/**
1855 * Recursive globbing - generic.
1856 *
1857 * @returns IPRT status code.
1858 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1859 *
1860 * @param pGlob The glob instance data.
1861 * @param offPath The current path offset/length.
1862 * @param iComp The current component.
1863 */
1864DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1865{
1866 /*
1867 * Enumerate entire directory and match each entry.
1868 */
1869 PRTDIR hDir;
1870 int rc = RTDirOpen(&hDir, offPath ? pGlob->szPath : ".");
1871 if (RT_SUCCESS(rc))
1872 {
1873 for (;;)
1874 {
1875 size_t cch = sizeof(pGlob->u);
1876 rc = RTDirRead(hDir, &pGlob->u.DirEntry, &cch);
1877 if (RT_SUCCESS(rc))
1878 {
1879 if (pGlob->aComps[iComp].fFinal)
1880 {
1881 /*
1882 * Final component: Check if it matches the current pattern.
1883 */
1884 if ( !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS))
1885 || RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1886 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY)
1887 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1888 {
1889 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1890 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1891 &pGlob->MatchCache);
1892 if (RT_SUCCESS(rc))
1893 {
1894 /* Construct the result. */
1895 if ( pGlob->u.DirEntry.enmType != RTDIRENTRYTYPE_UNKNOWN
1896 || !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) )
1897 rc = rtPathGlobAddResult2(pGlob, offPath, pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1898 (uint8_t)pGlob->u.DirEntry.enmType);
1899 else
1900 {
1901 rc = rtPathGlobAlmostAddResult(pGlob, offPath,
1902 pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1903 (uint8_t)RTDIRENTRYTYPE_UNKNOWN);
1904 if (RT_SUCCESS(rc))
1905 {
1906 RTDirQueryUnknownType((*pGlob->ppNext)->szPath, false /*fFollowSymlinks*/,
1907 &pGlob->u.DirEntry.enmType);
1908 if ( RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1909 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY))
1910 rtPathGlobCommitResult(pGlob, (uint8_t)pGlob->u.DirEntry.enmType);
1911 else
1912 rtPathGlobRollbackResult(pGlob);
1913 }
1914 }
1915 if (rc != VINF_SUCCESS)
1916 break;
1917 }
1918 else
1919 {
1920 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1921 rc = VINF_SUCCESS;
1922 }
1923 }
1924 }
1925 /*
1926 * Intermediate component: Directories only.
1927 */
1928 else if ( pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY
1929 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1930 {
1931 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1932 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1933 &pGlob->MatchCache);
1934 if (RT_SUCCESS(rc))
1935 {
1936 /* Recurse down into the alleged directory. */
1937 cch = offPath + pGlob->u.DirEntry.cbName;
1938 if (cch + 1 < sizeof(pGlob->szPath))
1939 {
1940 memcpy(&pGlob->szPath[offPath], pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName);
1941 pGlob->szPath[cch++] = RTPATH_SLASH;
1942 pGlob->szPath[cch] = '\0';
1943
1944 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1945 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1946 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1947 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1948 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1949 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1950 else
1951 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1952 if (rc != VINF_SUCCESS)
1953 return rc;
1954 }
1955 else
1956 pGlob->cPathOverflows++;
1957 }
1958 else
1959 {
1960 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1961 rc = VINF_SUCCESS;
1962 }
1963 }
1964 }
1965 /*
1966 * RTDirRead failure.
1967 */
1968 else
1969 {
1970 /* The end? */
1971 if (rc == VERR_NO_MORE_FILES)
1972 rc = VINF_SUCCESS;
1973 /* Try skip the entry if we end up with an overflow (szPath can't hold it either then). */
1974 else if (rc == VERR_BUFFER_OVERFLOW)
1975 {
1976 pGlob->cPathOverflows++;
1977 rc = rtPathGlobSkipDirEntry(hDir, cch);
1978 if (RT_SUCCESS(rc))
1979 continue;
1980 }
1981 /* else: Any other error is unexpected and should be reported. */
1982 break;
1983 }
1984 }
1985
1986 RTDirClose(hDir);
1987 }
1988 /* Directory doesn't exist or something else is wrong, ignore this. */
1989 else
1990 rc = VINF_SUCCESS;
1991 return rc;
1992}
1993
1994
1995/**
1996 * Executes a glob search.
1997 *
1998 * @returns IPRT status code.
1999 * @param pGlob The glob instance data.
2000 */
2001static int rtPathGlobExec(PRTPATHGLOB pGlob)
2002{
2003 Assert(pGlob->offFirstPath < sizeof(pGlob->szPath));
2004 Assert(pGlob->szPath[pGlob->offFirstPath] == '\0');
2005
2006 int rc;
2007 if (RT_LIKELY(pGlob->iFirstComp < pGlob->pParsed->cComps))
2008 {
2009 /*
2010 * Call the appropriate function.
2011 */
2012 if (rtPathGlobExecIsExpVar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2013 rc = rtPathGlobExecRecursiveVarExp(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2014 else if (rtPathGlobExecIsPlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2015 rc = rtPathGlobExecRecursivePlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2016 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
2017 rc = rtPathGlobExecRecursiveStarStar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp, pGlob->offFirstPath);
2018 else
2019 rc = rtPathGlobExecRecursiveGeneric(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2020 }
2021 else
2022 {
2023 /*
2024 * Special case where we only have a root component or tilde expansion.
2025 */
2026 Assert(pGlob->offFirstPath > 0);
2027 rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
2028 if ( RT_SUCCESS(rc)
2029 && rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
2030 rc = rtPathGlobAddResult(pGlob, pGlob->offFirstPath,
2031 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK) >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
2032 else
2033 rc = VINF_SUCCESS;
2034 }
2035
2036 /*
2037 * Adjust the status code. Check for results, hide RTPATHGLOB_F_FIRST_ONLY
2038 * status code, and add warning if necessary.
2039 */
2040 if (pGlob->cResults > 0)
2041 {
2042 if (rc == VINF_CALLBACK_RETURN)
2043 rc = VINF_SUCCESS;
2044 if (rc == VINF_SUCCESS)
2045 {
2046 if (pGlob->cPathOverflows > 0)
2047 rc = VINF_BUFFER_OVERFLOW;
2048 }
2049 }
2050 else
2051 rc = VERR_FILE_NOT_FOUND;
2052
2053 return rc;
2054}
2055
2056
2057RTDECL(int) RTPathGlob(const char *pszPattern, uint32_t fFlags, PPCRTPATHGLOBENTRY ppHead, uint32_t *pcResults)
2058{
2059 /*
2060 * Input validation.
2061 */
2062 AssertPtrReturn(ppHead, VERR_INVALID_POINTER);
2063 *ppHead = NULL;
2064 if (pcResults)
2065 {
2066 AssertPtrReturn(pcResults, VERR_INVALID_POINTER);
2067 *pcResults = 0;
2068 }
2069 AssertPtrReturn(pszPattern, VERR_INVALID_POINTER);
2070 AssertReturn(!(fFlags & ~RTPATHGLOB_F_MASK), VERR_INVALID_FLAGS);
2071 AssertReturn((fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) != (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS),
2072 VERR_INVALID_FLAGS);
2073
2074 /*
2075 * Parse the path.
2076 */
2077 size_t cbParsed = RT_OFFSETOF(RTPATHPARSED, aComps[1]); /** @todo 16 after testing */
2078 PRTPATHPARSED pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2079 AssertReturn(pParsed, VERR_NO_MEMORY);
2080 int rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2081 if (rc == VERR_BUFFER_OVERFLOW)
2082 {
2083 cbParsed = RT_OFFSETOF(RTPATHPARSED, aComps[pParsed->cComps + 1]);
2084 RTMemTmpFree(pParsed);
2085 pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2086 AssertReturn(pParsed, VERR_NO_MEMORY);
2087
2088 rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2089 }
2090 if (RT_SUCCESS(rc))
2091 {
2092 /*
2093 * Check dir slash vs. only/not dir flag.
2094 */
2095 if ( !(fFlags & RTPATHGLOB_F_NO_DIRS)
2096 || ( !(pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2097 && ( !(pParsed->fProps & (RTPATH_PROP_ROOT_SLASH | RTPATH_PROP_UNC))
2098 || pParsed->cComps > 1) ) )
2099 {
2100 if (pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2101 fFlags |= RTPATHGLOB_F_ONLY_DIRS;
2102
2103 /*
2104 * Allocate and initialize the glob state data structure.
2105 */
2106 size_t cbGlob = RT_OFFSETOF(RTPATHGLOB, aComps[pParsed->cComps + 1]);
2107 PRTPATHGLOB pGlob = (PRTPATHGLOB)RTMemTmpAllocZ(cbGlob);
2108 if (pGlob)
2109 {
2110 pGlob->pszPattern = pszPattern;
2111 pGlob->fFlags = fFlags;
2112 pGlob->pParsed = pParsed;
2113 pGlob->ppNext = &pGlob->pHead;
2114 rc = rtPathGlobParse(pGlob, pszPattern, pParsed, fFlags);
2115 if (RT_SUCCESS(rc))
2116 {
2117 /*
2118 * Execute the search.
2119 */
2120 rc = rtPathGlobExec(pGlob);
2121 if (RT_SUCCESS(rc))
2122 {
2123 *ppHead = pGlob->pHead;
2124 if (pcResults)
2125 *pcResults = pGlob->cResults;
2126 }
2127 else
2128 RTPathGlobFree(pGlob->pHead);
2129 }
2130
2131 RTMemTmpFree(pGlob->MatchInstrAlloc.paInstructions);
2132 RTMemTmpFree(pGlob);
2133 }
2134 else
2135 rc = VERR_NO_MEMORY;
2136 }
2137 else
2138 rc = VERR_NOT_FOUND;
2139 }
2140 RTMemTmpFree(pParsed);
2141 return rc;
2142
2143
2144}
2145
2146
2147RTDECL(void) RTPathGlobFree(PCRTPATHGLOBENTRY pHead)
2148{
2149 PRTPATHGLOBENTRY pCur = (PRTPATHGLOBENTRY)pHead;
2150 while (pCur)
2151 {
2152 PRTPATHGLOBENTRY pNext = pCur->pNext;
2153 pCur->pNext = NULL;
2154 RTMemFree(pCur);
2155 pCur = pNext;
2156 }
2157}
2158
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette