VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/path/RTPathGlob.cpp@ 76409

Last change on this file since 76409 was 76409, checked in by vboxsync, 6 years ago

iprt/string.h: Dropped including utf16.h and let those who need it include it themselves. bugref:9344

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 83.8 KB
Line 
1/* $Id: RTPathGlob.cpp 76409 2018-12-23 18:27:21Z vboxsync $ */
2/** @file
3 * IPRT - RTPathGlob
4 */
5
6/*
7 * Copyright (C) 2006-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include "internal/iprt.h"
32#include <iprt/path.h>
33
34#include <iprt/asm.h>
35#include <iprt/assert.h>
36#include <iprt/buildconfig.h>
37#include <iprt/ctype.h>
38#include <iprt/dir.h>
39#include <iprt/env.h>
40#include <iprt/err.h>
41#include <iprt/mem.h>
42#include <iprt/string.h>
43#include <iprt/uni.h>
44
45#if defined(RT_OS_WINDOWS)
46# include <iprt/utf16.h>
47# include <iprt/win/windows.h>
48# include "../../r3/win/internal-r3-win.h"
49
50#elif defined(RT_OS_OS2)
51# define INCL_BASE
52# include <os2.h>
53# undef RT_MAX /* collision */
54
55#endif
56
57
58/*********************************************************************************************************************************
59* Defined Constants And Macros *
60*********************************************************************************************************************************/
61/** Maximum number of results. */
62#define RTPATHGLOB_MAX_RESULTS _32K
63/** Maximum number of zero-or-more wildcards in a pattern.
64 * This limits stack usage and recursion depth, as well as execution time. */
65#define RTPATHMATCH_MAX_ZERO_OR_MORE 24
66/** Maximum number of variable items. */
67#define RTPATHMATCH_MAX_VAR_ITEMS _4K
68
69
70
71/*********************************************************************************************************************************
72* Structures and Typedefs *
73*********************************************************************************************************************************/
74/**
75 * Matching operation.
76 */
77typedef enum RTPATHMATCHOP
78{
79 RTPATHMATCHOP_INVALID = 0,
80 /** EOS: Returns a match if at end of string. */
81 RTPATHMATCHOP_RETURN_MATCH_IF_AT_END,
82 /** Asterisk: Returns a match (trailing asterisk). */
83 RTPATHMATCHOP_RETURN_MATCH,
84 /** Asterisk: Returns a match (just asterisk), unless it's '.' or '..'. */
85 RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT,
86 /** Plain text: Case sensitive string compare. */
87 RTPATHMATCHOP_STRCMP,
88 /** Plain text: Case insensitive string compare. */
89 RTPATHMATCHOP_STRICMP,
90 /** Question marks: Skips exactly one code point. */
91 RTPATHMATCHOP_SKIP_ONE_CODEPOINT,
92 /** Question marks: Skips exactly RTPATHMATCHCORE::cch code points. */
93 RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS,
94 /** Char set: Requires the next codepoint to be in the ASCII-7 set defined by
95 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
96 RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7,
97 /** Char set: Requires the next codepoint to not be in the ASCII-7 set defined
98 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
99 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7,
100 /** Char set: Requires the next codepoint to be in the extended set defined by
101 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
102 RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED,
103 /** Char set: Requires the next codepoint to not be in the extended set defined
104 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
105 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED,
106 /** Variable: Case sensitive variable value compare, RTPATHMATCHCORE::uOp2 is
107 * the variable table index. */
108 RTPATHMATCHOP_VARIABLE_VALUE_CMP,
109 /** Variable: Case insensitive variable value compare, RTPATHMATCHCORE::uOp2 is
110 * the variable table index. */
111 RTPATHMATCHOP_VARIABLE_VALUE_ICMP,
112 /** Asterisk: Match zero or more code points, there must be at least
113 * RTPATHMATCHCORE::cch code points after it. */
114 RTPATHMATCHOP_ZERO_OR_MORE,
115 /** Asterisk: Match zero or more code points, there must be at least
116 * RTPATHMATCHCORE::cch code points after it, unless it's '.' or '..'. */
117 RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT,
118 /** End of valid operations. */
119 RTPATHMATCHOP_END
120} RTPATHMATCHOP;
121
122/**
123 * Matching instruction.
124 */
125typedef struct RTPATHMATCHCORE
126{
127 /** The action to take. */
128 RTPATHMATCHOP enmOpCode;
129 /** Generic value operand. */
130 uint16_t uOp2;
131 /** Generic length operand. */
132 uint16_t cch;
133 /** Generic string pointer operand. */
134 const char *pch;
135} RTPATHMATCHCORE;
136/** Pointer to a matching instruction. */
137typedef RTPATHMATCHCORE *PRTPATHMATCHCORE;
138/** Pointer to a const matching instruction. */
139typedef RTPATHMATCHCORE const *PCRTPATHMATCHCORE;
140
141/**
142 * Path matching instruction allocator.
143 */
144typedef struct RTPATHMATCHALLOC
145{
146 /** Allocated array of instructions. */
147 PRTPATHMATCHCORE paInstructions;
148 /** Index of the next free entry in paScratch. */
149 uint32_t iNext;
150 /** Number of instructions allocated. */
151 uint32_t cAllocated;
152} RTPATHMATCHALLOC;
153/** Pointer to a matching instruction allocator. */
154typedef RTPATHMATCHALLOC *PRTPATHMATCHALLOC;
155
156/**
157 * Path matching cache, mainly intended for variables like the PATH.
158 */
159typedef struct RTPATHMATCHCACHE
160{
161 /** @todo optimize later. */
162 uint32_t iNothingYet;
163} RTPATHMATCHCACHE;
164/** Pointer to a path matching cache. */
165typedef RTPATHMATCHCACHE *PRTPATHMATCHCACHE;
166
167
168
169/** Parsed path entry.*/
170typedef struct RTPATHGLOBPPE
171{
172 /** Normal: Index into RTPATHGLOB::MatchInstrAlloc.paInstructions. */
173 uint32_t iMatchProg : 16;
174 /** Set if this is a normal entry which is matched using iMatchProg. */
175 uint32_t fNormal : 1;
176 /** !fNormal: Plain name that can be dealt with using without
177 * enumerating the whole directory, unless of course the file system is case
178 * sensitive and the globbing isn't (that needs figuring out on a per
179 * directory basis). */
180 uint32_t fPlain : 1;
181 /** !fNormal: Match zero or more subdirectories. */
182 uint32_t fStarStar : 1;
183 /** !fNormal: The whole component is a variable expansion. */
184 uint32_t fExpVariable : 1;
185
186 /** Filter: Set if it only matches directories. */
187 uint32_t fDir : 1;
188 /** Set if it's the final component. */
189 uint32_t fFinal : 1;
190
191 /** Unused bits. */
192 uint32_t fReserved : 2+8;
193} RTPATHGLOBPPE;
194
195
196typedef struct RTPATHGLOB
197{
198 /** Path buffer. */
199 char szPath[RTPATH_MAX];
200 /** Temporary buffers. */
201 union
202 {
203 /** File system object info structure. */
204 RTFSOBJINFO ObjInfo;
205 /** Directory entry buffer. */
206 RTDIRENTRY DirEntry;
207 /** Padding the buffer to an unreasonably large size. */
208 uint8_t abPadding[RTPATH_MAX + sizeof(RTDIRENTRY)];
209 } u;
210
211
212 /** Where to insert the next one.*/
213 PRTPATHGLOBENTRY *ppNext;
214 /** The head pointer. */
215 PRTPATHGLOBENTRY pHead;
216 /** Result count. */
217 uint32_t cResults;
218 /** Counts path overflows. */
219 uint32_t cPathOverflows;
220 /** The input flags. */
221 uint32_t fFlags;
222 /** Matching instruction allocator. */
223 RTPATHMATCHALLOC MatchInstrAlloc;
224 /** Matching state. */
225 RTPATHMATCHCACHE MatchCache;
226
227 /** The pattern string. */
228 const char *pszPattern;
229 /** The parsed path. */
230 PRTPATHPARSED pParsed;
231 /** The component to start with. */
232 uint16_t iFirstComp;
233 /** The corresponding path offset (previous components already present). */
234 uint16_t offFirstPath;
235 /** Path component information we need. */
236 RTPATHGLOBPPE aComps[1];
237} RTPATHGLOB;
238typedef RTPATHGLOB *PRTPATHGLOB;
239
240
241/**
242 * Matching variable lookup table.
243 * Currently so small we don't bother sorting it and doing binary lookups.
244 */
245typedef struct RTPATHMATCHVAR
246{
247 /** The variable name. */
248 const char *pszName;
249 /** The variable name length. */
250 uint16_t cchName;
251 /** Only available as the verify first component. */
252 bool fFirstOnly;
253
254 /**
255 * Queries a given variable value.
256 *
257 * @returns IPRT status code.
258 * @retval VERR_BUFFER_OVERFLOW
259 * @retval VERR_TRY_AGAIN if the caller should skip this value item and try the
260 * next one instead (e.g. env var not present).
261 * @retval VINF_EOF when retrieving the last one, if possible.
262 * @retval VERR_EOF when @a iItem is past the item space.
263 *
264 * @param iItem The variable value item to retrieve. (A variable may
265 * have more than one value, e.g. 'BothProgramFile' on a
266 * 64-bit system or 'Path'.)
267 * @param pszBuf Where to return the value.
268 * @param cbBuf The buffer size.
269 * @param pcchValue Where to return the length of the return string.
270 * @param pCache Pointer to the path matching cache. May speed up
271 * enumerating PATH items and similar.
272 */
273 DECLCALLBACKMEMBER(int, pfnQuery)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, PRTPATHMATCHCACHE pCache);
274
275 /**
276 * Matching method, optional.
277 *
278 * @returns IPRT status code.
279 * @retval VINF_SUCCESS on match.
280 * @retval VERR_MISMATCH on mismatch.
281 *
282 * @param pszMatch String to match with (not terminated).
283 * @param cchMatch The length of what we match with.
284 * @param fIgnoreCase Whether to ignore case or not when comparing.
285 * @param pcchMatched Where to return the length of the match (value length).
286 */
287 DECLCALLBACKMEMBER(int, pfnMatch)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, size_t *pcchMatched);
288
289} RTPATHMATCHVAR;
290
291
292/*********************************************************************************************************************************
293* Internal Functions *
294*********************************************************************************************************************************/
295static int rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp, size_t offStarStarPath);
296static int rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
297static int rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
298static int rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
299
300
301/**
302 * Implements the two variable access functions for a simple one value variable.
303 */
304#define RTPATHMATCHVAR_SIMPLE(a_Name, a_GetStrExpr) \
305 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
306 PRTPATHMATCHCACHE pCache) \
307 { \
308 if (iItem == 0) \
309 { \
310 const char *pszValue = a_GetStrExpr; \
311 size_t cchValue = strlen(pszValue); \
312 if (cchValue + 1 <= cbBuf) \
313 { \
314 memcpy(pszBuf, pszValue, cchValue + 1); \
315 *pcchValue = cchValue; \
316 return VINF_EOF; \
317 } \
318 return VERR_BUFFER_OVERFLOW; \
319 } \
320 NOREF(pCache);\
321 return VERR_EOF; \
322 } \
323 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
324 size_t *pcchMatched) \
325 { \
326 const char *pszValue = a_GetStrExpr; \
327 size_t cchValue = strlen(pszValue); \
328 if ( cchValue >= cchMatch \
329 && ( !fIgnoreCase \
330 ? memcmp(pszValue, pchMatch, cchValue) == 0 \
331 : RTStrNICmp(pszValue, pchMatch, cchValue) == 0) ) \
332 { \
333 *pcchMatched = cchValue; \
334 return VINF_SUCCESS; \
335 } \
336 return VERR_MISMATCH; \
337 } \
338 typedef int RT_CONCAT(DummyColonType_,a_Name)
339
340/**
341 * Implements mapping a glob variable to an environment variable.
342 */
343#define RTPATHMATCHVAR_SIMPLE_ENVVAR(a_Name, a_pszEnvVar, a_cbMaxValue) \
344 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
345 PRTPATHMATCHCACHE pCache) \
346 { \
347 if (iItem == 0) \
348 { \
349 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, pszBuf, cbBuf, pcchValue); \
350 if (RT_SUCCESS(rc)) \
351 return VINF_EOF; \
352 if (rc != VERR_ENV_VAR_NOT_FOUND) \
353 return rc; \
354 } \
355 NOREF(pCache);\
356 return VERR_EOF; \
357 } \
358 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
359 size_t *pcchMatched) \
360 { \
361 char szValue[a_cbMaxValue]; \
362 size_t cchValue; \
363 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, szValue, sizeof(szValue), &cchValue); \
364 if ( RT_SUCCESS(rc) \
365 && cchValue >= cchMatch \
366 && ( !fIgnoreCase \
367 ? memcmp(szValue, pchMatch, cchValue) == 0 \
368 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
369 { \
370 *pcchMatched = cchValue; \
371 return VINF_SUCCESS; \
372 } \
373 return VERR_MISMATCH; \
374 } \
375 typedef int RT_CONCAT(DummyColonType_,a_Name)
376
377/**
378 * Implements mapping a glob variable to multiple environment variable values.
379 *
380 * @param a_Name The variable name.
381 * @param a_apszVarNames Assumes to be a global variable that RT_ELEMENTS
382 * works correctly on.
383 * @param a_cbMaxValue The max expected value size.
384 */
385#define RTPATHMATCHVAR_MULTIPLE_ENVVARS(a_Name, a_apszVarNames, a_cbMaxValue) \
386 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
387 PRTPATHMATCHCACHE pCache) \
388 { \
389 if (iItem < RT_ELEMENTS(a_apszVarNames)) \
390 { \
391 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], pszBuf, cbBuf, pcchValue); \
392 if (RT_SUCCESS(rc)) \
393 return iItem + 1 == RT_ELEMENTS(a_apszVarNames) ? VINF_EOF : VINF_SUCCESS; \
394 if (rc == VERR_ENV_VAR_NOT_FOUND) \
395 rc = VERR_TRY_AGAIN; \
396 return rc; \
397 } \
398 NOREF(pCache);\
399 return VERR_EOF; \
400 } \
401 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
402 size_t *pcchMatched) \
403 { \
404 for (uint32_t iItem = 0; iItem < RT_ELEMENTS(a_apszVarNames); iItem++) \
405 { \
406 char szValue[a_cbMaxValue]; \
407 size_t cchValue; \
408 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], szValue, sizeof(szValue), &cchValue);\
409 if ( RT_SUCCESS(rc) \
410 && cchValue >= cchMatch \
411 && ( !fIgnoreCase \
412 ? memcmp(szValue, pchMatch, cchValue) == 0 \
413 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
414 { \
415 *pcchMatched = cchValue; \
416 return VINF_SUCCESS; \
417 } \
418 } \
419 return VERR_MISMATCH; \
420 } \
421 typedef int RT_CONCAT(DummyColonType_,a_Name)
422
423
424RTPATHMATCHVAR_SIMPLE(Arch, RTBldCfgTargetArch());
425RTPATHMATCHVAR_SIMPLE(Bits, RT_XSTR(ARCH_BITS));
426#ifdef RT_OS_WINDOWS
427RTPATHMATCHVAR_SIMPLE_ENVVAR(WinAppData, "AppData", RTPATH_MAX);
428RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramData, "ProgramData", RTPATH_MAX);
429RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramFiles, "ProgramFiles", RTPATH_MAX);
430RTPATHMATCHVAR_SIMPLE_ENVVAR(WinCommonProgramFiles, "CommonProgramFiles", RTPATH_MAX);
431# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
432RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherProgramFiles, "ProgramFiles(x86)", RTPATH_MAX);
433RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherCommonProgramFiles, "CommonProgramFiles(x86)", RTPATH_MAX);
434# else
435# error "Port ME!"
436# endif
437static const char * const a_apszWinProgramFilesVars[] =
438{
439 "ProgramFiles",
440# ifdef RT_ARCH_AMD64
441 "ProgramFiles(x86)",
442# endif
443};
444RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllProgramFiles, a_apszWinProgramFilesVars, RTPATH_MAX);
445static const char * const a_apszWinCommonProgramFilesVars[] =
446{
447 "CommonProgramFiles",
448# ifdef RT_ARCH_AMD64
449 "CommonProgramFiles(x86)",
450# endif
451};
452RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllCommonProgramFiles, a_apszWinCommonProgramFilesVars, RTPATH_MAX);
453#endif
454
455
456/**
457 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery, Enumerates the PATH}
458 */
459static DECLCALLBACK(int) rtPathVarQuery_Path(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
460 PRTPATHMATCHCACHE pCache)
461{
462 RT_NOREF_PV(pCache);
463
464 /*
465 * Query the PATH value.
466 */
467/** @todo cache this in pCache with iItem and offset. */
468 char *pszPathFree = NULL;
469 char *pszPath = pszBuf;
470 size_t cchActual;
471 const char *pszVarNm = "PATH";
472 int rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPath, cbBuf, &cchActual);
473#ifdef RT_OS_WINDOWS
474 if (rc == VERR_ENV_VAR_NOT_FOUND)
475 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm = "Path", pszPath, cbBuf, &cchActual);
476#endif
477 if (rc == VERR_BUFFER_OVERFLOW)
478 {
479 for (uint32_t iTry = 0; iTry < 10; iTry++)
480 {
481 size_t cbPathBuf = RT_ALIGN_Z(cchActual + 1 + 64 * iTry, 64);
482 pszPathFree = (char *)RTMemTmpAlloc(cbPathBuf);
483 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPathFree, cbPathBuf, &cchActual);
484 if (RT_SUCCESS(rc))
485 break;
486 RTMemTmpFree(pszPathFree);
487 AssertReturn(cchActual >= cbPathBuf, VERR_INTERNAL_ERROR_3);
488 }
489 pszPath = pszPathFree;
490 }
491
492 /*
493 * Spool forward to the given PATH item.
494 */
495 rc = VERR_EOF;
496#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
497 const char chSep = ';';
498#else
499 const char chSep = ':';
500#endif
501 while (*pszPath != '\0')
502 {
503 char *pchSep = strchr(pszPath, chSep);
504
505 /* We ignore empty strings, which is probably not entirely correct,
506 but works better on DOS based system with many entries added
507 without checking whether there is a trailing separator or not.
508 Thus, the current directory is only searched if a '.' is present
509 in the PATH. */
510 if (pchSep == pszPath)
511 pszPath++;
512 else if (iItem > 0)
513 {
514 /* If we didn't find a separator, the item doesn't exists. Quit. */
515 if (!pchSep)
516 break;
517
518 pszPath = pchSep + 1;
519 iItem--;
520 }
521 else
522 {
523 /* We've reached the item we wanted. */
524 size_t cchComp = pchSep ? pchSep - pszPath : strlen(pszPath);
525 if (cchComp < cbBuf)
526 {
527 if (pszBuf != pszPath)
528 memmove(pszBuf, pszPath, cchComp);
529 pszBuf[cchComp] = '\0';
530 rc = pchSep ? VINF_SUCCESS : VINF_EOF;
531 }
532 else
533 rc = VERR_BUFFER_OVERFLOW;
534 *pcchValue = cchComp;
535 break;
536 }
537 }
538
539 if (pszPathFree)
540 RTMemTmpFree(pszPathFree);
541 return rc;
542}
543
544
545#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
546/**
547 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
548 * The system drive letter + colon.}.
549 */
550static DECLCALLBACK(int) rtPathVarQuery_DosSystemDrive(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
551 PRTPATHMATCHCACHE pCache)
552{
553 RT_NOREF_PV(pCache);
554
555 if (iItem == 0)
556 {
557 AssertReturn(cbBuf >= 3, VERR_BUFFER_OVERFLOW);
558
559# ifdef RT_OS_WINDOWS
560 /* Since this is used at the start of a pattern, we assume
561 we've got more than enough buffer space. */
562 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
563 PRTUTF16 pwszTmp = (PRTUTF16)pszBuf;
564 UINT cch = g_pfnGetSystemWindowsDirectoryW(pwszTmp, (UINT)(cbBuf / sizeof(WCHAR)));
565 if (cch >= 2)
566 {
567 RTUTF16 wcDrive = pwszTmp[0];
568 if ( RT_C_IS_ALPHA(wcDrive)
569 && pwszTmp[1] == ':')
570 {
571 pszBuf[0] = wcDrive;
572 pszBuf[1] = ':';
573 pszBuf[2] = '\0';
574 *pcchValue = 2;
575 return VINF_EOF;
576 }
577 }
578# else
579 ULONG ulDrive = ~(ULONG)0;
580 APIRET rc = DosQuerySysInfo(QSV_BOOT_DRIVE, QSV_BOOT_DRIVE, &ulDrive, sizeof(ulDrive));
581 ulDrive--; /* 1 = 'A' */
582 if ( rc == NO_ERROR
583 && ulDrive <= (ULONG)'Z')
584 {
585 pszBuf[0] = (char)ulDrive + 'A';
586 pszBuf[1] = ':';
587 pszBuf[2] = '\0';
588 *pcchValue = 2;
589 return VINF_EOF;
590 }
591# endif
592 return VERR_INTERNAL_ERROR_4;
593 }
594 return VERR_EOF;
595}
596#endif
597
598
599#ifdef RT_OS_WINDOWS
600/**
601 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
602 * The system root directory (C:\Windows).}.
603 */
604static DECLCALLBACK(int) rtPathVarQuery_WinSystemRoot(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
605 PRTPATHMATCHCACHE pCache)
606{
607 RT_NOREF_PV(pCache);
608
609 if (iItem == 0)
610 {
611 Assert(pszBuf); Assert(cbBuf);
612 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
613 RTUTF16 wszSystemRoot[MAX_PATH];
614 UINT cchSystemRoot = g_pfnGetSystemWindowsDirectoryW(wszSystemRoot, MAX_PATH);
615 if (cchSystemRoot > 0)
616 return RTUtf16ToUtf8Ex(wszSystemRoot, cchSystemRoot, &pszBuf, cbBuf, pcchValue);
617 return RTErrConvertFromWin32(GetLastError());
618 }
619 return VERR_EOF;
620}
621#endif
622
623#undef RTPATHMATCHVAR_SIMPLE
624#undef RTPATHMATCHVAR_SIMPLE_ENVVAR
625#undef RTPATHMATCHVAR_DOUBLE_ENVVAR
626
627/**
628 * Variables.
629 */
630static RTPATHMATCHVAR const g_aVariables[] =
631{
632 { RT_STR_TUPLE("Arch"), false, rtPathVarQuery_Arch, rtPathVarMatch_Arch },
633 { RT_STR_TUPLE("Bits"), false, rtPathVarQuery_Bits, rtPathVarMatch_Bits },
634 { RT_STR_TUPLE("Path"), true, rtPathVarQuery_Path, NULL },
635#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
636 { RT_STR_TUPLE("SystemDrive"), true, rtPathVarQuery_DosSystemDrive, NULL },
637#endif
638#ifdef RT_OS_WINDOWS
639 { RT_STR_TUPLE("SystemRoot"), true, rtPathVarQuery_WinSystemRoot, NULL },
640 { RT_STR_TUPLE("AppData"), true, rtPathVarQuery_WinAppData, rtPathVarMatch_WinAppData },
641 { RT_STR_TUPLE("ProgramData"), true, rtPathVarQuery_WinProgramData, rtPathVarMatch_WinProgramData },
642 { RT_STR_TUPLE("ProgramFiles"), true, rtPathVarQuery_WinProgramFiles, rtPathVarMatch_WinProgramFiles },
643 { RT_STR_TUPLE("OtherProgramFiles"), true, rtPathVarQuery_WinOtherProgramFiles, rtPathVarMatch_WinOtherProgramFiles },
644 { RT_STR_TUPLE("AllProgramFiles"), true, rtPathVarQuery_WinAllProgramFiles, rtPathVarMatch_WinAllProgramFiles },
645 { RT_STR_TUPLE("CommonProgramFiles"), true, rtPathVarQuery_WinCommonProgramFiles, rtPathVarMatch_WinCommonProgramFiles },
646 { RT_STR_TUPLE("OtherCommonProgramFiles"), true, rtPathVarQuery_WinOtherCommonProgramFiles, rtPathVarMatch_WinOtherCommonProgramFiles },
647 { RT_STR_TUPLE("AllCommonProgramFiles"), true, rtPathVarQuery_WinAllCommonProgramFiles, rtPathVarMatch_WinAllCommonProgramFiles },
648#endif
649};
650
651
652
653/**
654 * Handles a complicated set.
655 *
656 * A complicated set is either using ranges, character classes or code points
657 * outside the ASCII-7 range.
658 *
659 * @returns VINF_SUCCESS or VERR_MISMATCH. May also return UTF-8 decoding
660 * errors as well as VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED.
661 *
662 * @param ucInput The input code point to match with.
663 * @param pchSet The start of the set specification (after caret).
664 * @param cchSet The length of the set specification.
665 */
666static int rtPathMatchExecExtendedSet(RTUNICP ucInput, const char *pchSet, size_t cchSet)
667{
668 while (cchSet > 0)
669 {
670 RTUNICP ucSet;
671 int rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet);
672 AssertRCReturn(rc, rc);
673
674 /*
675 * Check for character class, collating symbol and equvalence class.
676 */
677 if (ucSet == '[' && cchSet > 0)
678 {
679 char chNext = *pchSet;
680 if (chNext == ':')
681 {
682#define CHECK_CHAR_CLASS(a_szClassNm, a_BoolTestExpr) \
683 if ( cchSet >= sizeof(a_szClassNm) \
684 && memcmp(pchSet, a_szClassNm "]", sizeof(a_szClassNm)) == 0) \
685 { \
686 if (a_BoolTestExpr) \
687 return VINF_SUCCESS; \
688 pchSet += sizeof(a_szClassNm); \
689 cchSet -= sizeof(a_szClassNm); \
690 continue; \
691 } do { } while (0)
692
693 CHECK_CHAR_CLASS(":alpha:", RTUniCpIsAlphabetic(ucInput));
694 CHECK_CHAR_CLASS(":alnum:", RTUniCpIsAlphabetic(ucInput) || RTUniCpIsDecDigit(ucInput)); /** @todo figure what's correct here and fix uni.h */
695 CHECK_CHAR_CLASS(":blank:", ucInput == ' ' || ucInput == '\t');
696 CHECK_CHAR_CLASS(":cntrl:", ucInput < 31 || ucInput == 127);
697 CHECK_CHAR_CLASS(":digit:", RTUniCpIsDecDigit(ucInput));
698 CHECK_CHAR_CLASS(":lower:", RTUniCpIsLower(ucInput));
699 CHECK_CHAR_CLASS(":print:", RTUniCpIsAlphabetic(ucInput) || (RT_C_IS_PRINT(ucInput) && ucInput < 127)); /** @todo fixme*/
700 CHECK_CHAR_CLASS(":punct:", RT_C_IS_PRINT(ucInput) && ucInput < 127); /** @todo fixme*/
701 CHECK_CHAR_CLASS(":space:", RTUniCpIsSpace(ucInput));
702 CHECK_CHAR_CLASS(":upper:", RTUniCpIsUpper(ucInput));
703 CHECK_CHAR_CLASS(":xdigit:", RTUniCpIsHexDigit(ucInput));
704 AssertMsgFailedReturn(("Unknown or malformed char class: '%.*s'\n", cchSet + 1, pchSet - 1),
705 VERR_PATH_GLOB_UNKNOWN_CHAR_CLASS);
706#undef CHECK_CHAR_CLASS
707 }
708 /** @todo implement collating symbol and equvalence class. */
709 else if (chNext == '=' || chNext == '.')
710 AssertFailedReturn(VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
711 }
712
713 /*
714 * Check for range (leading or final dash does not constitute a range).
715 */
716 if (cchSet > 1 && *pchSet == '-')
717 {
718 pchSet++; /* skip dash */
719 cchSet--;
720
721 RTUNICP ucSet2;
722 rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet2);
723 AssertRCReturn(rc, rc);
724 Assert(ucSet < ucSet2);
725 if (ucInput >= ucSet && ucInput <= ucSet2)
726 return VINF_SUCCESS;
727 }
728 /*
729 * Single char comparison.
730 */
731 else if (ucInput == ucSet)
732 return VINF_SUCCESS;
733 }
734 return VERR_MISMATCH;
735}
736
737
738/**
739 * Variable matching fallback using the query function.
740 *
741 * This must not be inlined as it consuming a lot of stack! Which is why it's
742 * placed a couple of functions away from the recursive rtPathExecMatch.
743 *
744 * @returns VINF_SUCCESS or VERR_MISMATCH.
745 * @param pchInput The current input position.
746 * @param cchInput The amount of input left..
747 * @param idxVar The variable table index.
748 * @param fIgnoreCase Whether to ignore case when comparing.
749 * @param pcchMatched Where to return how much we actually matched up.
750 * @param pCache Pointer to the path matching cache.
751 */
752DECL_NO_INLINE(static, int) rtPathMatchExecVariableFallback(const char *pchInput, size_t cchInput, uint16_t idxVar,
753 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
754{
755 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
756 {
757 char szValue[RTPATH_MAX];
758 size_t cchValue;
759 int rc = g_aVariables[idxVar].pfnQuery(iItem, szValue, sizeof(szValue), &cchValue, pCache);
760 if (RT_SUCCESS(rc))
761 {
762 if (cchValue <= cchInput)
763 {
764 if ( !fIgnoreCase
765 ? memcmp(pchInput, szValue, cchValue) == 0
766 : RTStrNICmp(pchInput, szValue, cchValue) == 0)
767 {
768 *pcchMatched = cchValue;
769 return VINF_SUCCESS;
770 }
771 }
772 if (rc == VINF_EOF)
773 return VERR_MISMATCH;
774 }
775 else if (rc == VERR_EOF)
776 return VERR_MISMATCH;
777 else
778 Assert(rc == VERR_BUFFER_OVERFLOW || rc == VERR_TRY_AGAIN);
779 }
780 AssertFailed();
781 return VERR_MISMATCH;
782}
783
784
785/**
786 * Variable matching worker.
787 *
788 * @returns VINF_SUCCESS or VERR_MISMATCH.
789 * @param pchInput The current input position.
790 * @param cchInput The amount of input left..
791 * @param idxVar The variable table index.
792 * @param fIgnoreCase Whether to ignore case when comparing.
793 * @param pcchMatched Where to return how much we actually matched up.
794 * @param pCache Pointer to the path matching cache.
795 */
796static int rtPathMatchExecVariable(const char *pchInput, size_t cchInput, uint16_t idxVar,
797 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
798{
799 Assert(idxVar < RT_ELEMENTS(g_aVariables));
800 if (g_aVariables[idxVar].pfnMatch)
801 return g_aVariables[idxVar].pfnMatch(pchInput, cchInput, fIgnoreCase, pcchMatched);
802 return rtPathMatchExecVariableFallback(pchInput, cchInput, idxVar, fIgnoreCase, pcchMatched, pCache);
803}
804
805
806/**
807 * Variable matching worker.
808 *
809 * @returns VINF_SUCCESS or VERR_MISMATCH.
810 * @param pchInput The current input position.
811 * @param cchInput The amount of input left..
812 * @param pProg The first matching program instruction.
813 * @param pCache Pointer to the path matching cache.
814 */
815static int rtPathMatchExec(const char *pchInput, size_t cchInput, PCRTPATHMATCHCORE pProg, PRTPATHMATCHCACHE pCache)
816{
817 for (;;)
818 {
819 switch (pProg->enmOpCode)
820 {
821 case RTPATHMATCHOP_RETURN_MATCH_IF_AT_END:
822 return cchInput == 0 ? VINF_SUCCESS : VERR_MISMATCH;
823
824 case RTPATHMATCHOP_RETURN_MATCH:
825 return VINF_SUCCESS;
826
827 case RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT:
828 if ( cchInput > 2
829 || cchInput < 1
830 || pchInput[0] != '.'
831 || (cchInput == 2 && pchInput[1] != '.') )
832 return VINF_SUCCESS;
833 return VERR_MISMATCH;
834
835 case RTPATHMATCHOP_STRCMP:
836 if (pProg->cch > cchInput)
837 return VERR_MISMATCH;
838 if (memcmp(pchInput, pProg->pch, pProg->cch) != 0)
839 return VERR_MISMATCH;
840 cchInput -= pProg->cch;
841 pchInput += pProg->cch;
842 break;
843
844 case RTPATHMATCHOP_STRICMP:
845 if (pProg->cch > cchInput)
846 return VERR_MISMATCH;
847 if (RTStrNICmp(pchInput, pProg->pch, pProg->cch) != 0)
848 return VERR_MISMATCH;
849 cchInput -= pProg->cch;
850 pchInput += pProg->cch;
851 break;
852
853 case RTPATHMATCHOP_SKIP_ONE_CODEPOINT:
854 {
855 if (cchInput == 0)
856 return VERR_MISMATCH;
857 RTUNICP ucInputIgnore;
858 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
859 AssertRCReturn(rc, rc);
860 break;
861 }
862
863 case RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS:
864 {
865 uint16_t cCpsLeft = pProg->cch;
866 Assert(cCpsLeft > 1);
867 if (cCpsLeft > cchInput)
868 return VERR_MISMATCH;
869 while (cCpsLeft-- > 0)
870 {
871 RTUNICP ucInputIgnore;
872 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
873 if (RT_FAILURE(rc))
874 return rc == VERR_END_OF_STRING ? VERR_MISMATCH : rc;
875 }
876 break;
877 }
878
879 case RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7:
880 {
881 if (cchInput == 0)
882 return VERR_MISMATCH;
883 RTUNICP ucInput;
884 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
885 AssertRCReturn(rc, rc);
886 if (ucInput >= 0x80)
887 return VERR_MISMATCH;
888 if (memchr(pProg->pch, (char)ucInput, pProg->cch) == NULL)
889 return VERR_MISMATCH;
890 break;
891 }
892
893 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7:
894 {
895 if (cchInput == 0)
896 return VERR_MISMATCH;
897 RTUNICP ucInput;
898 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
899 AssertRCReturn(rc, rc);
900 if (ucInput >= 0x80)
901 break;
902 if (memchr(pProg->pch, (char)ucInput, pProg->cch) != NULL)
903 return VERR_MISMATCH;
904 break;
905 }
906
907 case RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED:
908 {
909 if (cchInput == 0)
910 return VERR_MISMATCH;
911 RTUNICP ucInput;
912 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
913 AssertRCReturn(rc, rc);
914 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
915 if (rc == VINF_SUCCESS)
916 break;
917 return rc;
918 }
919
920 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED:
921 {
922 if (cchInput == 0)
923 return VERR_MISMATCH;
924 RTUNICP ucInput;
925 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
926 AssertRCReturn(rc, rc);
927 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
928 if (rc == VERR_MISMATCH)
929 break;
930 if (rc == VINF_SUCCESS)
931 rc = VERR_MISMATCH;
932 return rc;
933 }
934
935 case RTPATHMATCHOP_VARIABLE_VALUE_CMP:
936 case RTPATHMATCHOP_VARIABLE_VALUE_ICMP:
937 {
938 size_t cchMatched = 0;
939 int rc = rtPathMatchExecVariable(pchInput, cchInput, pProg->uOp2,
940 pProg->enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP, &cchMatched, pCache);
941 if (rc == VINF_SUCCESS)
942 {
943 pchInput += cchMatched;
944 cchInput -= cchMatched;
945 break;
946 }
947 return rc;
948 }
949
950 /*
951 * This is the expensive one. It always completes the program.
952 */
953 case RTPATHMATCHOP_ZERO_OR_MORE:
954 {
955 if (cchInput < pProg->cch)
956 return VERR_MISMATCH;
957 size_t cchMatched = cchInput - pProg->cch;
958 do
959 {
960 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
961 if (RT_SUCCESS(rc))
962 return rc;
963 } while (cchMatched-- > 0);
964 return VERR_MISMATCH;
965 }
966
967 /*
968 * Variant of the above that doesn't match '.' and '..' entries.
969 */
970 case RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT:
971 {
972 if (cchInput < pProg->cch)
973 return VERR_MISMATCH;
974 if ( cchInput <= 2
975 && cchInput > 0
976 && pchInput[0] == '.'
977 && (cchInput == 1 || pchInput[1] == '.') )
978 return VERR_MISMATCH;
979 size_t cchMatched = cchInput - pProg->cch;
980 do
981 {
982 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
983 if (RT_SUCCESS(rc))
984 return rc;
985 } while (cchMatched-- > 0);
986 return VERR_MISMATCH;
987 }
988
989 default:
990 AssertMsgFailedReturn(("enmOpCode=%d\n", pProg->enmOpCode), VERR_INTERNAL_ERROR_3);
991 }
992
993 pProg++;
994 }
995}
996
997
998
999
1000/**
1001 * Compiles a path matching program.
1002 *
1003 * @returns IPRT status code.
1004 * @param pchPattern The pattern to compile.
1005 * @param cchPattern The length of the pattern.
1006 * @param fIgnoreCase Whether to ignore case or not when doing the
1007 * actual matching later on.
1008 * @param pAllocator Pointer to the instruction allocator & result
1009 * array. The compiled "program" starts at
1010 * PRTPATHMATCHALLOC::paInstructions[PRTPATHMATCHALLOC::iNext]
1011 * (input iNext value).
1012 *
1013 * @todo Expose this matching code and also use it for RTDirOpenFiltered
1014 */
1015static int rtPathMatchCompile(const char *pchPattern, size_t cchPattern, bool fIgnoreCase, PRTPATHMATCHALLOC pAllocator)
1016{
1017 /** @todo PORTME: big endian. */
1018 static const uint8_t s_bmMetaChars[256/8] =
1019 {
1020 0x00, 0x00, 0x00, 0x00, /* 0 thru 31 */
1021 0x10, 0x04, 0x00, 0x80, /* 32 thru 63 */
1022 0x00, 0x00, 0x00, 0x08, /* 64 thru 95 */
1023 0x00, 0x00, 0x00, 0x00, /* 96 thru 127 */
1024 /* UTF-8 multibyte: */
1025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1026 };
1027 Assert(ASMBitTest(s_bmMetaChars, '$')); AssertCompile('$' == 0x24 /*36*/);
1028 Assert(ASMBitTest(s_bmMetaChars, '*')); AssertCompile('*' == 0x2a /*42*/);
1029 Assert(ASMBitTest(s_bmMetaChars, '?')); AssertCompile('?' == 0x3f /*63*/);
1030 Assert(ASMBitTest(s_bmMetaChars, '[')); AssertCompile('[' == 0x5b /*91*/);
1031
1032 /*
1033 * For checking for the first instruction.
1034 */
1035 uint16_t const iFirst = pAllocator->iNext;
1036
1037 /*
1038 * This is for tracking zero-or-more instructions and for calculating
1039 * the minimum amount of input required for it to be considered.
1040 */
1041 uint16_t aiZeroOrMore[RTPATHMATCH_MAX_ZERO_OR_MORE];
1042 uint8_t cZeroOrMore = 0;
1043 size_t offInput = 0;
1044
1045 /*
1046 * Loop thru the pattern and translate it into string matching instructions.
1047 */
1048 for (;;)
1049 {
1050 /*
1051 * Allocate the next instruction.
1052 */
1053 if (pAllocator->iNext >= pAllocator->cAllocated)
1054 {
1055 uint32_t cNew = pAllocator->cAllocated ? pAllocator->cAllocated * 2 : 2;
1056 void *pvNew = RTMemRealloc(pAllocator->paInstructions, cNew * sizeof(pAllocator->paInstructions[0]));
1057 AssertReturn(pvNew, VERR_NO_MEMORY);
1058 pAllocator->paInstructions = (PRTPATHMATCHCORE)pvNew;
1059 pAllocator->cAllocated = cNew;
1060 }
1061 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[pAllocator->iNext++];
1062 pInstr->pch = pchPattern;
1063 pInstr->cch = 0;
1064 pInstr->uOp2 = 0;
1065
1066 /*
1067 * Special case: End of pattern.
1068 */
1069 if (!cchPattern)
1070 {
1071 pInstr->enmOpCode = RTPATHMATCHOP_RETURN_MATCH_IF_AT_END;
1072 break;
1073 }
1074
1075 /*
1076 * Parse the next bit of the pattern.
1077 */
1078 char ch = *pchPattern;
1079 if (ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1080 {
1081 /*
1082 * Zero or more characters wildcard.
1083 */
1084 if (ch == '*')
1085 {
1086 /* Skip extra asterisks. */
1087 do
1088 {
1089 cchPattern--;
1090 pchPattern++;
1091 } while (cchPattern > 0 && *pchPattern == '*');
1092
1093 /* There is a special optimization for trailing '*'. */
1094 pInstr->cch = 1;
1095 if (cchPattern == 0)
1096 {
1097 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1098 ? RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_RETURN_MATCH;
1099 break;
1100 }
1101
1102 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1103 ? RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_ZERO_OR_MORE;
1104 pInstr->uOp2 = (uint16_t)offInput;
1105 AssertReturn(cZeroOrMore < RT_ELEMENTS(aiZeroOrMore), VERR_OUT_OF_RANGE);
1106 aiZeroOrMore[cZeroOrMore] = (uint16_t)(pInstr - pAllocator->paInstructions);
1107
1108 /* cchInput unchanged, zero-or-more matches. */
1109 continue;
1110 }
1111
1112 /*
1113 * Single character wildcard.
1114 */
1115 if (ch == '?')
1116 {
1117 /* Count them if more. */
1118 uint16_t cchQms = 1;
1119 while (cchQms < cchPattern && pchPattern[cchQms] == '?')
1120 cchQms++;
1121
1122 pInstr->cch = cchQms;
1123 pInstr->enmOpCode = cchQms == 1 ? RTPATHMATCHOP_SKIP_ONE_CODEPOINT : RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS;
1124
1125 cchPattern -= cchQms;
1126 pchPattern += cchQms;
1127 offInput += cchQms;
1128 continue;
1129 }
1130
1131 /*
1132 * Character in set.
1133 *
1134 * Note that we skip the first char in the set as that is the only place
1135 * ']' can be placed if one desires to explicitly include it in the set.
1136 * To make life a bit more interesting, [:class:] is allowed inside the
1137 * set, so we have to do the counting game to find the end.
1138 */
1139 if (ch == '[')
1140 {
1141 if ( cchPattern > 2
1142 && (const char *)memchr(pchPattern + 2, ']', cchPattern) != NULL)
1143 {
1144
1145 /* Check for not-in. */
1146 bool fInverted = false;
1147 size_t offStart = 1;
1148 if (pchPattern[offStart] == '^')
1149 {
1150 fInverted = true;
1151 offStart++;
1152 }
1153
1154 /* Special case for ']' as the first char, it doesn't indicate closing then. */
1155 size_t off = offStart;
1156 if (pchPattern[off] == ']')
1157 off++;
1158
1159 bool fExtended = false;
1160 while (off < cchPattern)
1161 {
1162 ch = pchPattern[off++];
1163 if (ch == '[')
1164 {
1165 if (off < cchPattern)
1166 {
1167 char chOpen = pchPattern[off];
1168 if ( chOpen == ':'
1169 || chOpen == '='
1170 || chOpen == '.')
1171 {
1172 off++;
1173 const char *pchFound = (const char *)memchr(&pchPattern[off], ']', cchPattern - off);
1174 if ( pchFound
1175 && pchFound[-1] == chOpen)
1176 {
1177 fExtended = true;
1178 off = pchFound - pchPattern + 1;
1179 }
1180 else
1181 AssertFailed();
1182 }
1183 }
1184 }
1185 /* Check for closing. */
1186 else if (ch == ']')
1187 break;
1188 /* Check for range expression, promote to extended if this happens. */
1189 else if ( ch == '-'
1190 && off != offStart + 1
1191 && off < cchPattern
1192 && pchPattern[off] != ']')
1193 fExtended = true;
1194 /* UTF-8 multibyte chars forces us to use the extended version too. */
1195 else if ((uint8_t)ch >= 0x80)
1196 fExtended = true;
1197 }
1198
1199 if (ch == ']')
1200 {
1201 pInstr->pch = &pchPattern[offStart];
1202 pInstr->cch = (uint16_t)(off - offStart - 1);
1203 if (!fExtended)
1204 pInstr->enmOpCode = !fInverted
1205 ? RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7;
1206 else
1207 pInstr->enmOpCode = !fInverted
1208 ? RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED
1209 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED;
1210 pchPattern += off;
1211 cchPattern -= off;
1212 offInput += 1;
1213 continue;
1214 }
1215
1216 /* else: invalid, treat it as */
1217 AssertFailed();
1218 }
1219 }
1220 /*
1221 * Variable matching.
1222 */
1223 else if (ch == '$')
1224 {
1225 const char *pchFound;
1226 if ( cchPattern > 3
1227 && pchPattern[1] == '{'
1228 && (pchFound = (const char *)memchr(pchPattern + 2, '}', cchPattern)) != NULL
1229 && pchFound != &pchPattern[2])
1230 {
1231 /* skip to the variable name. */
1232 pchPattern += 2;
1233 cchPattern -= 2;
1234 size_t cchVarNm = pchFound - pchPattern;
1235
1236 /* Look it up. */
1237 uint32_t iVar;
1238 for (iVar = 0; iVar < RT_ELEMENTS(g_aVariables); iVar++)
1239 if ( g_aVariables[iVar].cchName == cchVarNm
1240 && memcmp(g_aVariables[iVar].pszName, pchPattern, cchVarNm) == 0)
1241 break;
1242 if (iVar < RT_ELEMENTS(g_aVariables))
1243 {
1244 pInstr->uOp2 = (uint16_t)iVar;
1245 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_VARIABLE_VALUE_CMP : RTPATHMATCHOP_VARIABLE_VALUE_ICMP;
1246 pInstr->pch = pchPattern; /* not necessary */
1247 pInstr->cch = (uint16_t)cchPattern; /* ditto */
1248 pchPattern += cchVarNm + 1;
1249 cchPattern -= cchVarNm + 1;
1250 AssertMsgReturn(!g_aVariables[iVar].fFirstOnly || iFirst + 1U == pAllocator->iNext,
1251 ("Glob variable '%s' should be first\n", g_aVariables[iVar].pszName),
1252 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1253 /* cchInput unchanged, value can be empty. */
1254 continue;
1255 }
1256 AssertMsgFailedReturn(("Unknown path matching variable '%.*s'\n", cchVarNm, pchPattern),
1257 VERR_PATH_MATCH_UNKNOWN_VARIABLE);
1258 }
1259 }
1260 else
1261 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1262 }
1263
1264 /*
1265 * Plain text. Look for the next meta char.
1266 */
1267 uint32_t cchPlain = 1;
1268 while (cchPlain < cchPattern)
1269 {
1270 ch = pchPattern[cchPlain];
1271 if (!ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1272 { /* probable */ }
1273 else if ( ch == '?'
1274 || ch == '*')
1275 break;
1276 else if (ch == '$')
1277 {
1278 const char *pchFound;
1279 if ( cchPattern > cchPlain + 3
1280 && pchPattern[cchPlain + 1] == '{'
1281 && (pchFound = (const char *)memchr(&pchPattern[cchPlain + 2], '}', cchPattern - cchPlain - 2)) != NULL
1282 && pchFound != &pchPattern[cchPlain + 2])
1283 break;
1284 }
1285 else if (ch == '[')
1286 {
1287 /* We don't put a lot of effort into getting this 100% right here,
1288 no point it complicating things for malformed expressions. */
1289 if ( cchPattern > cchPlain + 2
1290 && memchr(&pchPattern[cchPlain + 2], ']', cchPattern - cchPlain - 1) != NULL)
1291 break;
1292 }
1293 else
1294 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1295 cchPlain++;
1296 }
1297 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_STRCMP : RTPATHMATCHOP_STRICMP;
1298 pInstr->cch = cchPlain;
1299 Assert(pInstr->pch == pchPattern);
1300 Assert(pInstr->uOp2 == 0);
1301 pchPattern += cchPlain;
1302 cchPattern -= cchPlain;
1303 offInput += cchPlain;
1304 }
1305
1306 /*
1307 * Optimize zero-or-more matching.
1308 */
1309 while (cZeroOrMore-- > 0)
1310 {
1311 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[aiZeroOrMore[cZeroOrMore]];
1312 pInstr->uOp2 = (uint16_t)(offInput - pInstr->uOp2);
1313 }
1314
1315 /** @todo It's possible to use offInput to inject a instruction for checking
1316 * minimum input length at the start of the program. Not sure it's
1317 * worth it though, unless it's long a complicated expression... */
1318 return VINF_SUCCESS;
1319}
1320
1321
1322/**
1323 * Parses the glob pattern.
1324 *
1325 * This compiles filename matching programs for each component and determins the
1326 * optimal search strategy for them.
1327 *
1328 * @returns IPRT status code.
1329 * @param pGlob The glob instance data.
1330 * @param pszPattern The pattern to parse.
1331 * @param pParsed The RTPathParse output for the pattern.
1332 * @param fFlags The glob flags (same as pGlob->fFlags).
1333 */
1334static int rtPathGlobParse(PRTPATHGLOB pGlob, const char *pszPattern, PRTPATHPARSED pParsed, uint32_t fFlags)
1335{
1336 AssertReturn(pParsed->cComps > 0, VERR_INVALID_PARAMETER); /* shouldn't happen */
1337 uint32_t iComp = 0;
1338
1339 /*
1340 * If we've got a rootspec, mark it as plain. On platforms with
1341 * drive letter and/or UNC we don't allow wildcards or such in
1342 * the drive letter spec or UNC server name. (At least not yet.)
1343 */
1344 if (RTPATH_PROP_HAS_ROOT_SPEC(pParsed->fProps))
1345 {
1346 AssertReturn(pParsed->aComps[0].cch < sizeof(pGlob->szPath) - 1, VERR_FILENAME_TOO_LONG);
1347 memcpy(pGlob->szPath, &pszPattern[pParsed->aComps[0].off], pParsed->aComps[0].cch);
1348 pGlob->offFirstPath = pParsed->aComps[0].cch;
1349 pGlob->iFirstComp = iComp = 1;
1350 }
1351 else
1352 {
1353 const char * const pszComp = &pszPattern[pParsed->aComps[0].off];
1354
1355 /*
1356 * The tilde is only applicable to the first component, expand it
1357 * immediately.
1358 */
1359 if ( *pszComp == '~'
1360 && !(fFlags & RTPATHGLOB_F_NO_TILDE))
1361 {
1362 if (pParsed->aComps[0].cch == 1)
1363 {
1364 int rc = RTPathUserHome(pGlob->szPath, sizeof(pGlob->szPath) - 1);
1365 AssertRCReturn(rc, rc);
1366 }
1367 else
1368 AssertMsgFailedReturn(("'%.*s' is not supported yet\n", pszComp, pParsed->aComps[0].cch),
1369 VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
1370 pGlob->offFirstPath = (uint32_t)RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1371 pGlob->iFirstComp = iComp = 1;
1372 }
1373 }
1374
1375 /*
1376 * Process the other components.
1377 */
1378 bool fStarStar = false;
1379 for (; iComp < pParsed->cComps; iComp++)
1380 {
1381 const char *pszComp = &pszPattern[pParsed->aComps[iComp].off];
1382 uint16_t cchComp = pParsed->aComps[iComp].cch;
1383 Assert(pGlob->aComps[iComp].fNormal == false);
1384
1385 pGlob->aComps[iComp].fDir = iComp + 1 < pParsed->cComps || (fFlags & RTPATHGLOB_F_ONLY_DIRS);
1386 if ( cchComp != 2
1387 || pszComp[0] != '*'
1388 || pszComp[1] != '*'
1389 || (fFlags & RTPATHGLOB_F_NO_STARSTAR) )
1390 {
1391 /* Compile the pattern. */
1392 uint16_t const iMatchProg = pGlob->MatchInstrAlloc.iNext;
1393 pGlob->aComps[iComp].iMatchProg = iMatchProg;
1394 int rc = rtPathMatchCompile(pszComp, cchComp, RT_BOOL(fFlags & RTPATHGLOB_F_IGNORE_CASE),
1395 &pGlob->MatchInstrAlloc);
1396 if (RT_FAILURE(rc))
1397 return rc;
1398
1399 /* Check for plain text as well as full variable matching (not applicable after '**'). */
1400 uint16_t const cInstructions = pGlob->MatchInstrAlloc.iNext - iMatchProg;
1401 if ( cInstructions == 2
1402 && !fStarStar
1403 && pGlob->MatchInstrAlloc.paInstructions[iMatchProg + 1].enmOpCode == RTPATHMATCHOP_RETURN_MATCH_IF_AT_END)
1404 {
1405 if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRCMP
1406 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRICMP)
1407 pGlob->aComps[iComp].fPlain = true;
1408 else if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1409 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP)
1410 {
1411 pGlob->aComps[iComp].fExpVariable = true;
1412 AssertMsgReturn( iComp == 0
1413 || !g_aVariables[pGlob->MatchInstrAlloc.paInstructions[iMatchProg].uOp2].fFirstOnly,
1414 ("Glob variable '%.*s' can only be used as the path component.\n", cchComp, pszComp),
1415 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1416 }
1417 else
1418 pGlob->aComps[iComp].fNormal = true;
1419 }
1420 else
1421 pGlob->aComps[iComp].fNormal = true;
1422 }
1423 else
1424 {
1425 /* Recursive "**" matching. */
1426 pGlob->aComps[iComp].fNormal = false;
1427 pGlob->aComps[iComp].fStarStar = true;
1428 AssertReturn(!fStarStar, VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED); /** @todo implement multiple '**' sequences in a pattern. */
1429 fStarStar = true;
1430 }
1431 }
1432 pGlob->aComps[pParsed->cComps - 1].fFinal = true;
1433
1434 return VINF_SUCCESS;
1435}
1436
1437
1438/**
1439 * This is for skipping overly long directories entries.
1440 *
1441 * Since our directory entry buffer can hold filenames of RTPATH_MAX bytes, we
1442 * can safely skip filenames that are longer. There are very few file systems
1443 * that can actually store filenames longer than 255 bytes at time of coding
1444 * (2015-09), and extremely few which can exceed 4096 (RTPATH_MAX) bytes.
1445 *
1446 * @returns IPRT status code.
1447 * @param hDir The directory handle.
1448 * @param cbNeeded The required entry size.
1449 */
1450DECL_NO_INLINE(static, int) rtPathGlobSkipDirEntry(RTDIR hDir, size_t cbNeeded)
1451{
1452 int rc = VERR_BUFFER_OVERFLOW;
1453 cbNeeded = RT_ALIGN_Z(cbNeeded, 16);
1454 PRTDIRENTRY pDirEntry = (PRTDIRENTRY)RTMemTmpAlloc(cbNeeded);
1455 if (pDirEntry)
1456 {
1457 rc = RTDirRead(hDir, pDirEntry, &cbNeeded);
1458 RTMemTmpFree(pDirEntry);
1459 }
1460 return rc;
1461}
1462
1463
1464/**
1465 * Adds a result.
1466 *
1467 * @returns IPRT status code.
1468 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1469 *
1470 * @param pGlob The glob instance data.
1471 * @param cchPath The number of bytes to add from pGlob->szPath.
1472 * @param uType The RTDIRENTRYTYPE value.
1473 */
1474DECL_NO_INLINE(static, int) rtPathGlobAddResult(PRTPATHGLOB pGlob, size_t cchPath, uint8_t uType)
1475{
1476 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1477 {
1478 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_UOFFSETOF_DYN(RTPATHGLOBENTRY, szPath[cchPath + 1]));
1479 if (pEntry)
1480 {
1481 pEntry->uType = uType;
1482 pEntry->cchPath = (uint16_t)cchPath;
1483 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1484 pEntry->szPath[cchPath] = '\0';
1485
1486 pEntry->pNext = NULL;
1487 *pGlob->ppNext = pEntry;
1488 pGlob->ppNext = &pEntry->pNext;
1489 pGlob->cResults++;
1490
1491 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1492 return VINF_SUCCESS;
1493 return VINF_CALLBACK_RETURN;
1494 }
1495 return VERR_NO_MEMORY;
1496 }
1497 return VERR_TOO_MUCH_DATA;
1498}
1499
1500
1501/**
1502 * Adds a result, constructing the path from two string.
1503 *
1504 * @returns IPRT status code.
1505 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1506 *
1507 * @param pGlob The glob instance data.
1508 * @param cchPath The number of bytes to add from pGlob->szPath.
1509 * @param pchName The string (usual filename) to append to the szPath.
1510 * @param cchName The length of the string to append.
1511 * @param uType The RTDIRENTRYTYPE value.
1512 */
1513DECL_NO_INLINE(static, int) rtPathGlobAddResult2(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1514 uint8_t uType)
1515{
1516 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1517 {
1518 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_UOFFSETOF_DYN(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1519 if (pEntry)
1520 {
1521 pEntry->uType = uType;
1522 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1523 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1524 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1525 pEntry->szPath[cchPath + cchName] = '\0';
1526
1527 pEntry->pNext = NULL;
1528 *pGlob->ppNext = pEntry;
1529 pGlob->ppNext = &pEntry->pNext;
1530 pGlob->cResults++;
1531
1532 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1533 return VINF_SUCCESS;
1534 return VINF_CALLBACK_RETURN;
1535 }
1536 return VERR_NO_MEMORY;
1537 }
1538 return VERR_TOO_MUCH_DATA;
1539}
1540
1541
1542/**
1543 * Prepares a result, constructing the path from two string.
1544 *
1545 * The caller must call either rtPathGlobCommitResult or
1546 * rtPathGlobRollbackResult to complete the operation.
1547 *
1548 * @returns IPRT status code.
1549 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1550 *
1551 * @param pGlob The glob instance data.
1552 * @param cchPath The number of bytes to add from pGlob->szPath.
1553 * @param pchName The string (usual filename) to append to the szPath.
1554 * @param cchName The length of the string to append.
1555 * @param uType The RTDIRENTRYTYPE value.
1556 */
1557DECL_NO_INLINE(static, int) rtPathGlobAlmostAddResult(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1558 uint8_t uType)
1559{
1560 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1561 {
1562 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_UOFFSETOF_DYN(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1563 if (pEntry)
1564 {
1565 pEntry->uType = uType;
1566 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1567 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1568 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1569 pEntry->szPath[cchPath + cchName] = '\0';
1570
1571 pEntry->pNext = NULL;
1572 *pGlob->ppNext = pEntry;
1573 /* Note! We don't update ppNext here, that is done in rtPathGlobCommitResult. */
1574
1575 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1576 return VINF_SUCCESS;
1577 return VINF_CALLBACK_RETURN;
1578 }
1579 return VERR_NO_MEMORY;
1580 }
1581 return VERR_TOO_MUCH_DATA;
1582}
1583
1584
1585/**
1586 * Commits a pending result from rtPathGlobAlmostAddResult.
1587 *
1588 * @param pGlob The glob instance data.
1589 * @param uType The RTDIRENTRYTYPE value.
1590 */
1591static void rtPathGlobCommitResult(PRTPATHGLOB pGlob, uint8_t uType)
1592{
1593 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1594 AssertPtr(pEntry);
1595 pEntry->uType = uType;
1596 pGlob->ppNext = &pEntry->pNext;
1597 pGlob->cResults++;
1598}
1599
1600
1601/**
1602 * Rolls back a pending result from rtPathGlobAlmostAddResult.
1603 *
1604 * @param pGlob The glob instance data.
1605 */
1606static void rtPathGlobRollbackResult(PRTPATHGLOB pGlob)
1607{
1608 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1609 AssertPtr(pEntry);
1610 RTMemFree(pEntry);
1611 *pGlob->ppNext = NULL;
1612}
1613
1614
1615
1616/**
1617 * Whether to call rtPathGlobExecRecursiveVarExp for the next component.
1618 *
1619 * @returns true / false.
1620 * @param pGlob The glob instance data.
1621 * @param offPath The next path offset/length.
1622 * @param iComp The next component.
1623 */
1624DECLINLINE(bool) rtPathGlobExecIsExpVar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1625{
1626 return pGlob->aComps[iComp].fExpVariable
1627 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1628 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1629}
1630
1631/**
1632 * Whether to call rtPathGlobExecRecursivePlainText for the next component.
1633 *
1634 * @returns true / false.
1635 * @param pGlob The glob instance data.
1636 * @param offPath The next path offset/length.
1637 * @param iComp The next component.
1638 */
1639DECLINLINE(bool) rtPathGlobExecIsPlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1640{
1641 return pGlob->aComps[iComp].fPlain
1642 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1643 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1644}
1645
1646
1647/**
1648 * Helper for rtPathGlobExecRecursiveVarExp and rtPathGlobExecRecursivePlainText
1649 * that compares a file mode mask with dir/no-dir wishes of the caller.
1650 *
1651 * @returns true if match, false if not.
1652 * @param pGlob The glob instance data.
1653 * @param fMode The file mode (only the type is used).
1654 */
1655DECLINLINE(bool) rtPathGlobExecIsMatchFinalWithFileMode(PRTPATHGLOB pGlob, RTFMODE fMode)
1656{
1657 if (!(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)))
1658 return true;
1659 return RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS) == RTFS_IS_DIRECTORY(fMode);
1660}
1661
1662
1663/**
1664 * Recursive globbing - star-star mode.
1665 *
1666 * @returns IPRT status code.
1667 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1668 *
1669 * @param pGlob The glob instance data.
1670 * @param offPath The current path offset/length.
1671 * @param iStarStarComp The star-star component index.
1672 * @param offStarStarPath The offset of the star-star component in the
1673 * pattern path.
1674 */
1675DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp,
1676 size_t offStarStarPath)
1677{
1678 /** @todo implement multi subdir matching. */
1679 RT_NOREF_PV(pGlob);
1680 RT_NOREF_PV(offPath);
1681 RT_NOREF_PV(iStarStarComp);
1682 RT_NOREF_PV(offStarStarPath);
1683 return VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED;
1684}
1685
1686
1687
1688/**
1689 * Recursive globbing - variable expansion optimization.
1690 *
1691 * @returns IPRT status code.
1692 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1693 *
1694 * @param pGlob The glob instance data.
1695 * @param offPath The current path offset/length.
1696 * @param iComp The current component.
1697 */
1698DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1699{
1700 Assert(iComp < pGlob->pParsed->cComps);
1701 Assert(pGlob->szPath[offPath] == '\0');
1702 Assert(pGlob->aComps[iComp].fExpVariable);
1703 Assert(!pGlob->aComps[iComp].fPlain);
1704 Assert(!pGlob->aComps[iComp].fStarStar);
1705 Assert(rtPathGlobExecIsExpVar(pGlob, offPath, iComp));
1706
1707 /*
1708 * Fish the variable index out of the first matching instruction.
1709 */
1710 Assert( pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1711 == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1712 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1713 == RTPATHMATCHOP_VARIABLE_VALUE_ICMP);
1714 uint16_t const iVar = pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].uOp2;
1715
1716 /*
1717 * Enumerate all the variable, giving them the plain text treatment.
1718 */
1719 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
1720 {
1721 size_t cch;
1722 int rcVar = g_aVariables[iVar].pfnQuery(iItem, &pGlob->szPath[offPath], sizeof(pGlob->szPath) - offPath, &cch,
1723 &pGlob->MatchCache);
1724 if (RT_SUCCESS(rcVar))
1725 {
1726 Assert(pGlob->szPath[offPath + cch] == '\0');
1727
1728 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1729 if (RT_SUCCESS(rc))
1730 {
1731 if (pGlob->aComps[iComp].fFinal)
1732 {
1733 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1734 {
1735 rc = rtPathGlobAddResult(pGlob, cch,
1736 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1737 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1738 if (rc != VINF_SUCCESS)
1739 return rc;
1740 }
1741 }
1742 else if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1743 {
1744 Assert(pGlob->aComps[iComp].fDir);
1745 cch = RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1746 if (cch > 0)
1747 {
1748 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1749 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1750 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1751 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1752 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1753 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1754 else
1755 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1756 if (rc != VINF_SUCCESS)
1757 return rc;
1758 }
1759 else
1760 pGlob->cPathOverflows++;
1761 }
1762 }
1763 /* else: file doesn't exist or something else is wrong, ignore this. */
1764 if (rcVar == VINF_EOF)
1765 return VINF_SUCCESS;
1766 }
1767 else if (rcVar == VERR_EOF)
1768 return VINF_SUCCESS;
1769 else if (rcVar != VERR_TRY_AGAIN)
1770 {
1771 Assert(rcVar == VERR_BUFFER_OVERFLOW);
1772 pGlob->cPathOverflows++;
1773 }
1774 }
1775 AssertFailedReturn(VINF_SUCCESS); /* Too many items returned, probably buggy query method. */
1776}
1777
1778
1779/**
1780 * Recursive globbing - plain text optimization.
1781 *
1782 * @returns IPRT status code.
1783 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1784 *
1785 * @param pGlob The glob instance data.
1786 * @param offPath The current path offset/length.
1787 * @param iComp The current component.
1788 */
1789DECL_NO_INLINE(static, int) rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1790{
1791 /*
1792 * Instead of recursing, we loop thru adjacent plain text components.
1793 */
1794 for (;;)
1795 {
1796 /*
1797 * Preconditions.
1798 */
1799 Assert(iComp < pGlob->pParsed->cComps);
1800 Assert(pGlob->szPath[offPath] == '\0');
1801 Assert(pGlob->aComps[iComp].fPlain);
1802 Assert(!pGlob->aComps[iComp].fExpVariable);
1803 Assert(!pGlob->aComps[iComp].fStarStar);
1804 Assert(rtPathGlobExecIsPlainText(pGlob, offPath, iComp));
1805 Assert(pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1806 == RTPATHMATCHOP_STRCMP
1807 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1808 == RTPATHMATCHOP_STRICMP);
1809
1810 /*
1811 * Add the plain text component to the path.
1812 */
1813 size_t const cch = pGlob->pParsed->aComps[iComp].cch;
1814 if (cch + pGlob->aComps[iComp].fDir < sizeof(pGlob->szPath) - offPath)
1815 {
1816 memcpy(&pGlob->szPath[offPath], &pGlob->pszPattern[pGlob->pParsed->aComps[iComp].off], cch);
1817 offPath += cch;
1818 pGlob->szPath[offPath] = '\0';
1819
1820 /*
1821 * Check if it exists.
1822 */
1823 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1824 if (RT_SUCCESS(rc))
1825 {
1826 if (pGlob->aComps[iComp].fFinal)
1827 {
1828 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1829 return rtPathGlobAddResult(pGlob, offPath,
1830 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1831 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1832 break;
1833 }
1834
1835 if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1836 {
1837 Assert(pGlob->aComps[iComp].fDir);
1838 pGlob->szPath[offPath++] = RTPATH_SLASH;
1839 pGlob->szPath[offPath] = '\0';
1840
1841 iComp++;
1842 if (rtPathGlobExecIsExpVar(pGlob, offPath, iComp))
1843 return rtPathGlobExecRecursiveVarExp(pGlob, offPath, iComp);
1844 if (!rtPathGlobExecIsPlainText(pGlob, offPath, iComp))
1845 return rtPathGlobExecRecursiveGeneric(pGlob, offPath, iComp);
1846 if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1847 return rtPathGlobExecRecursiveStarStar(pGlob, offPath, iComp, offPath);
1848
1849 /* Continue with the next plain text component. */
1850 continue;
1851 }
1852 }
1853 /* else: file doesn't exist or something else is wrong, ignore this. */
1854 }
1855 else
1856 pGlob->cPathOverflows++;
1857 break;
1858 }
1859 return VINF_SUCCESS;
1860}
1861
1862
1863/**
1864 * Recursive globbing - generic.
1865 *
1866 * @returns IPRT status code.
1867 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1868 *
1869 * @param pGlob The glob instance data.
1870 * @param offPath The current path offset/length.
1871 * @param iComp The current component.
1872 */
1873DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1874{
1875 /*
1876 * Enumerate entire directory and match each entry.
1877 */
1878 RTDIR hDir;
1879 int rc = RTDirOpen(&hDir, offPath ? pGlob->szPath : ".");
1880 if (RT_SUCCESS(rc))
1881 {
1882 for (;;)
1883 {
1884 size_t cch = sizeof(pGlob->u);
1885 rc = RTDirRead(hDir, &pGlob->u.DirEntry, &cch);
1886 if (RT_SUCCESS(rc))
1887 {
1888 if (pGlob->aComps[iComp].fFinal)
1889 {
1890 /*
1891 * Final component: Check if it matches the current pattern.
1892 */
1893 if ( !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS))
1894 || RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1895 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY)
1896 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1897 {
1898 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1899 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1900 &pGlob->MatchCache);
1901 if (RT_SUCCESS(rc))
1902 {
1903 /* Construct the result. */
1904 if ( pGlob->u.DirEntry.enmType != RTDIRENTRYTYPE_UNKNOWN
1905 || !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) )
1906 rc = rtPathGlobAddResult2(pGlob, offPath, pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1907 (uint8_t)pGlob->u.DirEntry.enmType);
1908 else
1909 {
1910 rc = rtPathGlobAlmostAddResult(pGlob, offPath,
1911 pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1912 (uint8_t)RTDIRENTRYTYPE_UNKNOWN);
1913 if (RT_SUCCESS(rc))
1914 {
1915 RTDirQueryUnknownType((*pGlob->ppNext)->szPath, false /*fFollowSymlinks*/,
1916 &pGlob->u.DirEntry.enmType);
1917 if ( RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1918 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY))
1919 rtPathGlobCommitResult(pGlob, (uint8_t)pGlob->u.DirEntry.enmType);
1920 else
1921 rtPathGlobRollbackResult(pGlob);
1922 }
1923 }
1924 if (rc != VINF_SUCCESS)
1925 break;
1926 }
1927 else
1928 {
1929 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1930 rc = VINF_SUCCESS;
1931 }
1932 }
1933 }
1934 /*
1935 * Intermediate component: Directories only.
1936 */
1937 else if ( pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY
1938 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1939 {
1940 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1941 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1942 &pGlob->MatchCache);
1943 if (RT_SUCCESS(rc))
1944 {
1945 /* Recurse down into the alleged directory. */
1946 cch = offPath + pGlob->u.DirEntry.cbName;
1947 if (cch + 1 < sizeof(pGlob->szPath))
1948 {
1949 memcpy(&pGlob->szPath[offPath], pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName);
1950 pGlob->szPath[cch++] = RTPATH_SLASH;
1951 pGlob->szPath[cch] = '\0';
1952
1953 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1954 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1955 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1956 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1957 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1958 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1959 else
1960 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1961 if (rc != VINF_SUCCESS)
1962 return rc;
1963 }
1964 else
1965 pGlob->cPathOverflows++;
1966 }
1967 else
1968 {
1969 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1970 rc = VINF_SUCCESS;
1971 }
1972 }
1973 }
1974 /*
1975 * RTDirRead failure.
1976 */
1977 else
1978 {
1979 /* The end? */
1980 if (rc == VERR_NO_MORE_FILES)
1981 rc = VINF_SUCCESS;
1982 /* Try skip the entry if we end up with an overflow (szPath can't hold it either then). */
1983 else if (rc == VERR_BUFFER_OVERFLOW)
1984 {
1985 pGlob->cPathOverflows++;
1986 rc = rtPathGlobSkipDirEntry(hDir, cch);
1987 if (RT_SUCCESS(rc))
1988 continue;
1989 }
1990 /* else: Any other error is unexpected and should be reported. */
1991 break;
1992 }
1993 }
1994
1995 RTDirClose(hDir);
1996 }
1997 /* Directory doesn't exist or something else is wrong, ignore this. */
1998 else
1999 rc = VINF_SUCCESS;
2000 return rc;
2001}
2002
2003
2004/**
2005 * Executes a glob search.
2006 *
2007 * @returns IPRT status code.
2008 * @param pGlob The glob instance data.
2009 */
2010static int rtPathGlobExec(PRTPATHGLOB pGlob)
2011{
2012 Assert(pGlob->offFirstPath < sizeof(pGlob->szPath));
2013 Assert(pGlob->szPath[pGlob->offFirstPath] == '\0');
2014
2015 int rc;
2016 if (RT_LIKELY(pGlob->iFirstComp < pGlob->pParsed->cComps))
2017 {
2018 /*
2019 * Call the appropriate function.
2020 */
2021 if (rtPathGlobExecIsExpVar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2022 rc = rtPathGlobExecRecursiveVarExp(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2023 else if (rtPathGlobExecIsPlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2024 rc = rtPathGlobExecRecursivePlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2025 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
2026 rc = rtPathGlobExecRecursiveStarStar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp, pGlob->offFirstPath);
2027 else
2028 rc = rtPathGlobExecRecursiveGeneric(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2029 }
2030 else
2031 {
2032 /*
2033 * Special case where we only have a root component or tilde expansion.
2034 */
2035 Assert(pGlob->offFirstPath > 0);
2036 rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
2037 if ( RT_SUCCESS(rc)
2038 && rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
2039 rc = rtPathGlobAddResult(pGlob, pGlob->offFirstPath,
2040 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK) >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
2041 else
2042 rc = VINF_SUCCESS;
2043 }
2044
2045 /*
2046 * Adjust the status code. Check for results, hide RTPATHGLOB_F_FIRST_ONLY
2047 * status code, and add warning if necessary.
2048 */
2049 if (pGlob->cResults > 0)
2050 {
2051 if (rc == VINF_CALLBACK_RETURN)
2052 rc = VINF_SUCCESS;
2053 if (rc == VINF_SUCCESS)
2054 {
2055 if (pGlob->cPathOverflows > 0)
2056 rc = VINF_BUFFER_OVERFLOW;
2057 }
2058 }
2059 else
2060 rc = VERR_FILE_NOT_FOUND;
2061
2062 return rc;
2063}
2064
2065
2066RTDECL(int) RTPathGlob(const char *pszPattern, uint32_t fFlags, PPCRTPATHGLOBENTRY ppHead, uint32_t *pcResults)
2067{
2068 /*
2069 * Input validation.
2070 */
2071 AssertPtrReturn(ppHead, VERR_INVALID_POINTER);
2072 *ppHead = NULL;
2073 if (pcResults)
2074 {
2075 AssertPtrReturn(pcResults, VERR_INVALID_POINTER);
2076 *pcResults = 0;
2077 }
2078 AssertPtrReturn(pszPattern, VERR_INVALID_POINTER);
2079 AssertReturn(!(fFlags & ~RTPATHGLOB_F_MASK), VERR_INVALID_FLAGS);
2080 AssertReturn((fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) != (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS),
2081 VERR_INVALID_FLAGS);
2082
2083 /*
2084 * Parse the path.
2085 */
2086 size_t cbParsed = RT_UOFFSETOF(RTPATHPARSED, aComps[1]); /** @todo 16 after testing */
2087 PRTPATHPARSED pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2088 AssertReturn(pParsed, VERR_NO_MEMORY);
2089 int rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2090 if (rc == VERR_BUFFER_OVERFLOW)
2091 {
2092 cbParsed = RT_UOFFSETOF_DYN(RTPATHPARSED, aComps[pParsed->cComps + 1]);
2093 RTMemTmpFree(pParsed);
2094 pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2095 AssertReturn(pParsed, VERR_NO_MEMORY);
2096
2097 rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2098 }
2099 if (RT_SUCCESS(rc))
2100 {
2101 /*
2102 * Check dir slash vs. only/not dir flag.
2103 */
2104 if ( !(fFlags & RTPATHGLOB_F_NO_DIRS)
2105 || ( !(pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2106 && ( !(pParsed->fProps & (RTPATH_PROP_ROOT_SLASH | RTPATH_PROP_UNC))
2107 || pParsed->cComps > 1) ) )
2108 {
2109 if (pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2110 fFlags |= RTPATHGLOB_F_ONLY_DIRS;
2111
2112 /*
2113 * Allocate and initialize the glob state data structure.
2114 */
2115 size_t cbGlob = RT_UOFFSETOF_DYN(RTPATHGLOB, aComps[pParsed->cComps + 1]);
2116 PRTPATHGLOB pGlob = (PRTPATHGLOB)RTMemTmpAllocZ(cbGlob);
2117 if (pGlob)
2118 {
2119 pGlob->pszPattern = pszPattern;
2120 pGlob->fFlags = fFlags;
2121 pGlob->pParsed = pParsed;
2122 pGlob->ppNext = &pGlob->pHead;
2123 rc = rtPathGlobParse(pGlob, pszPattern, pParsed, fFlags);
2124 if (RT_SUCCESS(rc))
2125 {
2126 /*
2127 * Execute the search.
2128 */
2129 rc = rtPathGlobExec(pGlob);
2130 if (RT_SUCCESS(rc))
2131 {
2132 *ppHead = pGlob->pHead;
2133 if (pcResults)
2134 *pcResults = pGlob->cResults;
2135 }
2136 else
2137 RTPathGlobFree(pGlob->pHead);
2138 }
2139
2140 RTMemTmpFree(pGlob->MatchInstrAlloc.paInstructions);
2141 RTMemTmpFree(pGlob);
2142 }
2143 else
2144 rc = VERR_NO_MEMORY;
2145 }
2146 else
2147 rc = VERR_NOT_FOUND;
2148 }
2149 RTMemTmpFree(pParsed);
2150 return rc;
2151
2152
2153}
2154
2155
2156RTDECL(void) RTPathGlobFree(PCRTPATHGLOBENTRY pHead)
2157{
2158 PRTPATHGLOBENTRY pCur = (PRTPATHGLOBENTRY)pHead;
2159 while (pCur)
2160 {
2161 PRTPATHGLOBENTRY pNext = pCur->pNext;
2162 pCur->pNext = NULL;
2163 RTMemFree(pCur);
2164 pCur = pNext;
2165 }
2166}
2167
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette