VirtualBox

source: vbox/trunk/src/libs/libxml2-2.13.2/fuzz/genSeed.c@ 105420

Last change on this file since 105420 was 105420, checked in by vboxsync, 5 months ago

libxml2-2.12.6: Applied and adjusted our libxml2 changes to 2.12.6. bugref:10730

  • Property svn:eol-style set to native
File size: 12.4 KB
Line 
1/*
2 * xmlSeed.c: Generate the XML seed corpus for fuzzing.
3 *
4 * See Copyright for the status of this software.
5 */
6
7#include <stdio.h>
8#include <string.h>
9#include <glob.h>
10#include <libgen.h>
11#include <sys/stat.h>
12
13#ifdef _WIN32
14#include <direct.h>
15#else
16#include <unistd.h>
17#endif
18
19#include <libxml/parser.h>
20#include <libxml/parserInternals.h>
21#include <libxml/HTMLparser.h>
22#include <libxml/xinclude.h>
23#include <libxml/xmlschemas.h>
24#include "fuzz.h"
25
26#define PATH_SIZE 500
27#define SEED_BUF_SIZE 16384
28#define EXPR_SIZE 4500
29
30#define FLAG_READER (1 << 0)
31#define FLAG_LINT (1 << 1)
32
33typedef int
34(*fileFunc)(const char *base, FILE *out);
35
36typedef int
37(*mainFunc)(const char *arg);
38
39static struct {
40 FILE *out;
41 xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
42 xmlExternalEntityLoader oldLoader;
43 fileFunc processFile;
44 const char *fuzzer;
45 int counter;
46 char cwd[PATH_SIZE];
47 int flags;
48} globalData;
49
50#if defined(HAVE_SCHEMA_FUZZER) || \
51 defined(HAVE_XML_FUZZER)
52/*
53 * A custom entity loader that writes all external DTDs or entities to a
54 * single file in the format expected by xmlFuzzEntityLoader.
55 */
56static xmlParserInputPtr
57fuzzEntityRecorder(const char *URL, const char *ID,
58 xmlParserCtxtPtr ctxt) {
59 xmlParserInputPtr in;
60 static const int chunkSize = 16384;
61 int len;
62
63 in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
64 if (in == NULL)
65 return(NULL);
66
67 if (globalData.entities == NULL) {
68 globalData.entities = xmlHashCreate(4);
69 } else if (xmlHashLookup(globalData.entities,
70 (const xmlChar *) URL) != NULL) {
71 return(in);
72 }
73
74 do {
75 len = xmlParserInputBufferGrow(in->buf, chunkSize);
76 if (len < 0) {
77 fprintf(stderr, "Error reading %s\n", URL);
78 xmlFreeInputStream(in);
79 return(NULL);
80 }
81 } while (len > 0);
82
83 xmlFuzzWriteString(globalData.out, URL);
84 xmlFuzzWriteString(globalData.out,
85 (char *) xmlBufContent(in->buf->buffer));
86
87 xmlFreeInputStream(in);
88
89 xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
90 globalData.entities);
91
92 return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
93}
94
95static void
96fuzzRecorderInit(FILE *out) {
97 globalData.out = out;
98 globalData.entities = xmlHashCreate(8);
99 globalData.oldLoader = xmlGetExternalEntityLoader();
100 xmlSetExternalEntityLoader(fuzzEntityRecorder);
101}
102
103static void
104fuzzRecorderCleanup(void) {
105 xmlSetExternalEntityLoader(globalData.oldLoader);
106 xmlHashFree(globalData.entities, NULL);
107 globalData.out = NULL;
108 globalData.entities = NULL;
109 globalData.oldLoader = NULL;
110}
111#endif
112
113#ifdef HAVE_XML_FUZZER
114static int
115processXml(const char *docFile, FILE *out) {
116 int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
117 xmlDocPtr doc;
118
119 if (globalData.flags & FLAG_LINT) {
120 /* Switches */
121 xmlFuzzWriteInt(out, 0, 4);
122 xmlFuzzWriteInt(out, 0, 4);
123 /* maxmem */
124 xmlFuzzWriteInt(out, 0, 4);
125 /* max-ampl */
126 xmlFuzzWriteInt(out, 0, 1);
127 /* pretty */
128 xmlFuzzWriteInt(out, 0, 1);
129 /* encode */
130 xmlFuzzWriteString(out, "");
131 /* pattern */
132 xmlFuzzWriteString(out, "");
133 /* xpath */
134 xmlFuzzWriteString(out, "");
135 } else {
136 /* Parser options. */
137 xmlFuzzWriteInt(out, opts, 4);
138 /* Max allocations. */
139 xmlFuzzWriteInt(out, 0, 4);
140
141 if (globalData.flags & FLAG_READER) {
142 /* Initial reader program with a couple of OP_READs */
143 xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01");
144 }
145 }
146
147 fuzzRecorderInit(out);
148
149 doc = xmlReadFile(docFile, NULL, opts);
150#ifdef LIBXML_XINCLUDE_ENABLED
151 xmlXIncludeProcessFlags(doc, opts);
152#endif
153 xmlFreeDoc(doc);
154
155 fuzzRecorderCleanup();
156
157 return(0);
158}
159#endif
160
161#ifdef HAVE_HTML_FUZZER
162static int
163processHtml(const char *docFile, FILE *out) {
164 char buf[SEED_BUF_SIZE];
165 FILE *file;
166 size_t size;
167
168 /* Parser options. */
169 xmlFuzzWriteInt(out, 0, 4);
170 /* Max allocations. */
171 xmlFuzzWriteInt(out, 0, 4);
172
173 /* Copy file */
174 file = fopen(docFile, "rb");
175 if (file == NULL) {
176 fprintf(stderr, "couldn't open %s\n", docFile);
177 return(0);
178 }
179 do {
180 size = fread(buf, 1, SEED_BUF_SIZE, file);
181 if (size > 0)
182 fwrite(buf, 1, size, out);
183 } while (size == SEED_BUF_SIZE);
184 fclose(file);
185
186 return(0);
187}
188#endif
189
190#ifdef HAVE_SCHEMA_FUZZER
191static int
192processSchema(const char *docFile, FILE *out) {
193 xmlSchemaPtr schema;
194 xmlSchemaParserCtxtPtr pctxt;
195
196 /* Max allocations. */
197 xmlFuzzWriteInt(out, 0, 4);
198
199 fuzzRecorderInit(out);
200
201 pctxt = xmlSchemaNewParserCtxt(docFile);
202 xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
203 schema = xmlSchemaParse(pctxt);
204 xmlSchemaFreeParserCtxt(pctxt);
205 xmlSchemaFree(schema);
206
207 fuzzRecorderCleanup();
208
209 return(0);
210}
211#endif
212
213#if defined(HAVE_HTML_FUZZER) || \
214 defined(HAVE_SCHEMA_FUZZER) || \
215 defined(HAVE_XML_FUZZER)
216static int
217processPattern(const char *pattern) {
218 glob_t globbuf;
219 int ret = 0;
220 int res;
221 size_t i;
222
223 res = glob(pattern, 0, NULL, &globbuf);
224 if (res == GLOB_NOMATCH)
225 return(0);
226 if (res != 0) {
227 fprintf(stderr, "couldn't match pattern %s\n", pattern);
228 return(-1);
229 }
230
231 for (i = 0; i < globbuf.gl_pathc; i++) {
232 struct stat statbuf;
233 char outPath[PATH_SIZE];
234 char *dirBuf = NULL;
235 char *baseBuf = NULL;
236 const char *path, *dir, *base;
237 FILE *out = NULL;
238 int dirChanged = 0;
239 size_t size;
240
241 path = globbuf.gl_pathv[i];
242
243 if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
244 continue;
245
246 dirBuf = (char *) xmlCharStrdup(path);
247 baseBuf = (char *) xmlCharStrdup(path);
248 if ((dirBuf == NULL) || (baseBuf == NULL)) {
249 fprintf(stderr, "memory allocation failed\n");
250 ret = -1;
251 goto error;
252 }
253 dir = dirname(dirBuf);
254 base = basename(baseBuf);
255
256 size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
257 globalData.fuzzer, base);
258 if (size >= PATH_SIZE) {
259 fprintf(stderr, "creating path failed\n");
260 ret = -1;
261 goto error;
262 }
263 out = fopen(outPath, "wb");
264 if (out == NULL) {
265 fprintf(stderr, "couldn't open %s for writing\n", outPath);
266 ret = -1;
267 goto error;
268 }
269 if (chdir(dir) != 0) {
270 fprintf(stderr, "couldn't chdir to %s\n", dir);
271 ret = -1;
272 goto error;
273 }
274 dirChanged = 1;
275 if (globalData.processFile(base, out) != 0)
276 ret = -1;
277
278error:
279 if (out != NULL)
280 fclose(out);
281 xmlFree(dirBuf);
282 xmlFree(baseBuf);
283 if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
284 fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
285 ret = -1;
286 break;
287 }
288 }
289
290 globfree(&globbuf);
291 return(ret);
292}
293#endif
294
295#ifdef HAVE_XPATH_FUZZER
296static int
297processXPath(const char *testDir, const char *prefix, const char *name,
298 const char *data, const char *subdir, int xptr) {
299 char pattern[PATH_SIZE];
300 glob_t globbuf;
301 size_t i, size;
302 int ret = 0, res;
303
304 size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
305 testDir, subdir, prefix);
306 if (size >= PATH_SIZE)
307 return(-1);
308 res = glob(pattern, 0, NULL, &globbuf);
309 if (res == GLOB_NOMATCH)
310 return(0);
311 if (res != 0) {
312 fprintf(stderr, "couldn't match pattern %s\n", pattern);
313 return(-1);
314 }
315
316 for (i = 0; i < globbuf.gl_pathc; i++) {
317 char *path = globbuf.gl_pathv[i];
318 struct stat statbuf;
319 FILE *in;
320 char expr[EXPR_SIZE];
321
322 if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
323 continue;
324
325 in = fopen(path, "rb");
326 if (in == NULL) {
327 ret = -1;
328 continue;
329 }
330
331 while (fgets(expr, EXPR_SIZE, in) != NULL) {
332 char outPath[PATH_SIZE];
333 FILE *out;
334 int j;
335
336 for (j = 0; expr[j] != 0; j++)
337 if (expr[j] == '\r' || expr[j] == '\n')
338 break;
339 expr[j] = 0;
340
341 size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
342 name, globalData.counter);
343 if (size >= PATH_SIZE) {
344 ret = -1;
345 continue;
346 }
347 out = fopen(outPath, "wb");
348 if (out == NULL) {
349 ret = -1;
350 continue;
351 }
352
353 /* Max allocations. */
354 xmlFuzzWriteInt(out, 0, 4);
355
356 if (xptr) {
357 xmlFuzzWriteString(out, expr);
358 } else {
359 char xptrExpr[EXPR_SIZE+100];
360
361 /* Wrap XPath expressions as XPointer */
362 snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
363 xmlFuzzWriteString(out, xptrExpr);
364 }
365
366 xmlFuzzWriteString(out, data);
367
368 fclose(out);
369 globalData.counter++;
370 }
371
372 fclose(in);
373 }
374
375 globfree(&globbuf);
376
377 return(ret);
378}
379
380static int
381processXPathDir(const char *testDir) {
382 char pattern[PATH_SIZE];
383 glob_t globbuf;
384 size_t i, size;
385 int ret = 0;
386
387 globalData.counter = 1;
388 if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
389 ret = -1;
390
391 size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
392 if (size >= PATH_SIZE)
393 return(1);
394 if (glob(pattern, 0, NULL, &globbuf) != 0)
395 return(1);
396
397 for (i = 0; i < globbuf.gl_pathc; i++) {
398 char *path = globbuf.gl_pathv[i];
399 char *data;
400 const char *docFile;
401
402 data = xmlSlurpFile(path, NULL);
403 if (data == NULL) {
404 ret = -1;
405 continue;
406 }
407 docFile = basename(path);
408
409 globalData.counter = 1;
410 if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
411 ret = -1;
412 if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
413 ret = -1;
414 if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
415 ret = -1;
416
417 xmlFree(data);
418 }
419
420 globfree(&globbuf);
421
422 return(ret);
423}
424#endif
425
426int
427main(int argc, const char **argv) {
428 mainFunc processArg = NULL;
429 const char *fuzzer;
430 int ret = 0;
431 int i;
432
433 if (argc < 3) {
434 fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
435 return(1);
436 }
437
438 xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
439
440 fuzzer = argv[1];
441 if (strcmp(fuzzer, "html") == 0) {
442#ifdef HAVE_HTML_FUZZER
443 processArg = processPattern;
444 globalData.processFile = processHtml;
445#endif
446 } else if (strcmp(fuzzer, "lint") == 0) {
447#ifdef HAVE_LINT_FUZZER
448 processArg = processPattern;
449 globalData.flags |= FLAG_LINT;
450 globalData.processFile = processXml;
451#endif
452 } else if (strcmp(fuzzer, "reader") == 0) {
453#ifdef HAVE_READER_FUZZER
454 processArg = processPattern;
455 globalData.flags |= FLAG_READER;
456 globalData.processFile = processXml;
457#endif
458 } else if (strcmp(fuzzer, "schema") == 0) {
459#ifdef HAVE_SCHEMA_FUZZER
460 processArg = processPattern;
461 globalData.processFile = processSchema;
462#endif
463 } else if (strcmp(fuzzer, "valid") == 0) {
464#ifdef HAVE_VALID_FUZZER
465 processArg = processPattern;
466 globalData.processFile = processXml;
467#endif
468 } else if (strcmp(fuzzer, "xinclude") == 0) {
469#ifdef HAVE_XINCLUDE_FUZZER
470 processArg = processPattern;
471 globalData.processFile = processXml;
472#endif
473 } else if (strcmp(fuzzer, "xml") == 0) {
474#ifdef HAVE_XML_FUZZER
475 processArg = processPattern;
476 globalData.processFile = processXml;
477#endif
478 } else if (strcmp(fuzzer, "xpath") == 0) {
479#ifdef HAVE_XPATH_FUZZER
480 processArg = processXPathDir;
481#endif
482 } else {
483 fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
484 return(1);
485 }
486 globalData.fuzzer = fuzzer;
487
488 if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
489 fprintf(stderr, "couldn't get current directory\n");
490 return(1);
491 }
492
493 if (processArg != NULL)
494 for (i = 2; i < argc; i++)
495 processArg(argv[i]);
496
497 return(ret);
498}
499
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette