1 | /*
|
---|
2 | * xmlSeed.c: Generate the XML seed corpus for fuzzing.
|
---|
3 | *
|
---|
4 | * See Copyright for the status of this software.
|
---|
5 | */
|
---|
6 |
|
---|
7 | #include <stdio.h>
|
---|
8 | #include <string.h>
|
---|
9 | #include <glob.h>
|
---|
10 | #include <libgen.h>
|
---|
11 | #include <sys/stat.h>
|
---|
12 |
|
---|
13 | #ifdef _WIN32
|
---|
14 | #include <direct.h>
|
---|
15 | #else
|
---|
16 | #include <unistd.h>
|
---|
17 | #endif
|
---|
18 |
|
---|
19 | #include <libxml/parser.h>
|
---|
20 | #include <libxml/parserInternals.h>
|
---|
21 | #include <libxml/HTMLparser.h>
|
---|
22 | #include <libxml/xinclude.h>
|
---|
23 | #include <libxml/xmlschemas.h>
|
---|
24 | #include "fuzz.h"
|
---|
25 |
|
---|
26 | #define PATH_SIZE 500
|
---|
27 | #define SEED_BUF_SIZE 16384
|
---|
28 | #define EXPR_SIZE 4500
|
---|
29 |
|
---|
30 | #define FLAG_READER (1 << 0)
|
---|
31 | #define FLAG_LINT (1 << 1)
|
---|
32 |
|
---|
33 | typedef int
|
---|
34 | (*fileFunc)(const char *base, FILE *out);
|
---|
35 |
|
---|
36 | typedef int
|
---|
37 | (*mainFunc)(const char *arg);
|
---|
38 |
|
---|
39 | static struct {
|
---|
40 | FILE *out;
|
---|
41 | xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
|
---|
42 | xmlExternalEntityLoader oldLoader;
|
---|
43 | fileFunc processFile;
|
---|
44 | const char *fuzzer;
|
---|
45 | int counter;
|
---|
46 | char cwd[PATH_SIZE];
|
---|
47 | int flags;
|
---|
48 | } globalData;
|
---|
49 |
|
---|
50 | #if defined(HAVE_SCHEMA_FUZZER) || \
|
---|
51 | defined(HAVE_XML_FUZZER)
|
---|
52 | /*
|
---|
53 | * A custom entity loader that writes all external DTDs or entities to a
|
---|
54 | * single file in the format expected by xmlFuzzEntityLoader.
|
---|
55 | */
|
---|
56 | static xmlParserInputPtr
|
---|
57 | fuzzEntityRecorder(const char *URL, const char *ID,
|
---|
58 | xmlParserCtxtPtr ctxt) {
|
---|
59 | xmlParserInputPtr in;
|
---|
60 | static const int chunkSize = 16384;
|
---|
61 | int len;
|
---|
62 |
|
---|
63 | in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
|
---|
64 | if (in == NULL)
|
---|
65 | return(NULL);
|
---|
66 |
|
---|
67 | if (globalData.entities == NULL) {
|
---|
68 | globalData.entities = xmlHashCreate(4);
|
---|
69 | } else if (xmlHashLookup(globalData.entities,
|
---|
70 | (const xmlChar *) URL) != NULL) {
|
---|
71 | return(in);
|
---|
72 | }
|
---|
73 |
|
---|
74 | do {
|
---|
75 | len = xmlParserInputBufferGrow(in->buf, chunkSize);
|
---|
76 | if (len < 0) {
|
---|
77 | fprintf(stderr, "Error reading %s\n", URL);
|
---|
78 | xmlFreeInputStream(in);
|
---|
79 | return(NULL);
|
---|
80 | }
|
---|
81 | } while (len > 0);
|
---|
82 |
|
---|
83 | xmlFuzzWriteString(globalData.out, URL);
|
---|
84 | xmlFuzzWriteString(globalData.out,
|
---|
85 | (char *) xmlBufContent(in->buf->buffer));
|
---|
86 |
|
---|
87 | xmlFreeInputStream(in);
|
---|
88 |
|
---|
89 | xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
|
---|
90 | globalData.entities);
|
---|
91 |
|
---|
92 | return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
|
---|
93 | }
|
---|
94 |
|
---|
95 | static void
|
---|
96 | fuzzRecorderInit(FILE *out) {
|
---|
97 | globalData.out = out;
|
---|
98 | globalData.entities = xmlHashCreate(8);
|
---|
99 | globalData.oldLoader = xmlGetExternalEntityLoader();
|
---|
100 | xmlSetExternalEntityLoader(fuzzEntityRecorder);
|
---|
101 | }
|
---|
102 |
|
---|
103 | static void
|
---|
104 | fuzzRecorderCleanup(void) {
|
---|
105 | xmlSetExternalEntityLoader(globalData.oldLoader);
|
---|
106 | xmlHashFree(globalData.entities, NULL);
|
---|
107 | globalData.out = NULL;
|
---|
108 | globalData.entities = NULL;
|
---|
109 | globalData.oldLoader = NULL;
|
---|
110 | }
|
---|
111 | #endif
|
---|
112 |
|
---|
113 | #ifdef HAVE_XML_FUZZER
|
---|
114 | static int
|
---|
115 | processXml(const char *docFile, FILE *out) {
|
---|
116 | int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
|
---|
117 | xmlDocPtr doc;
|
---|
118 |
|
---|
119 | if (globalData.flags & FLAG_LINT) {
|
---|
120 | /* Switches */
|
---|
121 | xmlFuzzWriteInt(out, 0, 4);
|
---|
122 | xmlFuzzWriteInt(out, 0, 4);
|
---|
123 | /* maxmem */
|
---|
124 | xmlFuzzWriteInt(out, 0, 4);
|
---|
125 | /* max-ampl */
|
---|
126 | xmlFuzzWriteInt(out, 0, 1);
|
---|
127 | /* pretty */
|
---|
128 | xmlFuzzWriteInt(out, 0, 1);
|
---|
129 | /* encode */
|
---|
130 | xmlFuzzWriteString(out, "");
|
---|
131 | /* pattern */
|
---|
132 | xmlFuzzWriteString(out, "");
|
---|
133 | /* xpath */
|
---|
134 | xmlFuzzWriteString(out, "");
|
---|
135 | } else {
|
---|
136 | /* Parser options. */
|
---|
137 | xmlFuzzWriteInt(out, opts, 4);
|
---|
138 | /* Max allocations. */
|
---|
139 | xmlFuzzWriteInt(out, 0, 4);
|
---|
140 |
|
---|
141 | if (globalData.flags & FLAG_READER) {
|
---|
142 | /* Initial reader program with a couple of OP_READs */
|
---|
143 | xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01");
|
---|
144 | }
|
---|
145 | }
|
---|
146 |
|
---|
147 | fuzzRecorderInit(out);
|
---|
148 |
|
---|
149 | doc = xmlReadFile(docFile, NULL, opts);
|
---|
150 | #ifdef LIBXML_XINCLUDE_ENABLED
|
---|
151 | xmlXIncludeProcessFlags(doc, opts);
|
---|
152 | #endif
|
---|
153 | xmlFreeDoc(doc);
|
---|
154 |
|
---|
155 | fuzzRecorderCleanup();
|
---|
156 |
|
---|
157 | return(0);
|
---|
158 | }
|
---|
159 | #endif
|
---|
160 |
|
---|
161 | #ifdef HAVE_HTML_FUZZER
|
---|
162 | static int
|
---|
163 | processHtml(const char *docFile, FILE *out) {
|
---|
164 | char buf[SEED_BUF_SIZE];
|
---|
165 | FILE *file;
|
---|
166 | size_t size;
|
---|
167 |
|
---|
168 | /* Parser options. */
|
---|
169 | xmlFuzzWriteInt(out, 0, 4);
|
---|
170 | /* Max allocations. */
|
---|
171 | xmlFuzzWriteInt(out, 0, 4);
|
---|
172 |
|
---|
173 | /* Copy file */
|
---|
174 | file = fopen(docFile, "rb");
|
---|
175 | if (file == NULL) {
|
---|
176 | fprintf(stderr, "couldn't open %s\n", docFile);
|
---|
177 | return(0);
|
---|
178 | }
|
---|
179 | do {
|
---|
180 | size = fread(buf, 1, SEED_BUF_SIZE, file);
|
---|
181 | if (size > 0)
|
---|
182 | fwrite(buf, 1, size, out);
|
---|
183 | } while (size == SEED_BUF_SIZE);
|
---|
184 | fclose(file);
|
---|
185 |
|
---|
186 | return(0);
|
---|
187 | }
|
---|
188 | #endif
|
---|
189 |
|
---|
190 | #ifdef HAVE_SCHEMA_FUZZER
|
---|
191 | static int
|
---|
192 | processSchema(const char *docFile, FILE *out) {
|
---|
193 | xmlSchemaPtr schema;
|
---|
194 | xmlSchemaParserCtxtPtr pctxt;
|
---|
195 |
|
---|
196 | /* Max allocations. */
|
---|
197 | xmlFuzzWriteInt(out, 0, 4);
|
---|
198 |
|
---|
199 | fuzzRecorderInit(out);
|
---|
200 |
|
---|
201 | pctxt = xmlSchemaNewParserCtxt(docFile);
|
---|
202 | xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
|
---|
203 | schema = xmlSchemaParse(pctxt);
|
---|
204 | xmlSchemaFreeParserCtxt(pctxt);
|
---|
205 | xmlSchemaFree(schema);
|
---|
206 |
|
---|
207 | fuzzRecorderCleanup();
|
---|
208 |
|
---|
209 | return(0);
|
---|
210 | }
|
---|
211 | #endif
|
---|
212 |
|
---|
213 | #if defined(HAVE_HTML_FUZZER) || \
|
---|
214 | defined(HAVE_SCHEMA_FUZZER) || \
|
---|
215 | defined(HAVE_XML_FUZZER)
|
---|
216 | static int
|
---|
217 | processPattern(const char *pattern) {
|
---|
218 | glob_t globbuf;
|
---|
219 | int ret = 0;
|
---|
220 | int res;
|
---|
221 | size_t i;
|
---|
222 |
|
---|
223 | res = glob(pattern, 0, NULL, &globbuf);
|
---|
224 | if (res == GLOB_NOMATCH)
|
---|
225 | return(0);
|
---|
226 | if (res != 0) {
|
---|
227 | fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
---|
228 | return(-1);
|
---|
229 | }
|
---|
230 |
|
---|
231 | for (i = 0; i < globbuf.gl_pathc; i++) {
|
---|
232 | struct stat statbuf;
|
---|
233 | char outPath[PATH_SIZE];
|
---|
234 | char *dirBuf = NULL;
|
---|
235 | char *baseBuf = NULL;
|
---|
236 | const char *path, *dir, *base;
|
---|
237 | FILE *out = NULL;
|
---|
238 | int dirChanged = 0;
|
---|
239 | size_t size;
|
---|
240 |
|
---|
241 | path = globbuf.gl_pathv[i];
|
---|
242 |
|
---|
243 | if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
---|
244 | continue;
|
---|
245 |
|
---|
246 | dirBuf = (char *) xmlCharStrdup(path);
|
---|
247 | baseBuf = (char *) xmlCharStrdup(path);
|
---|
248 | if ((dirBuf == NULL) || (baseBuf == NULL)) {
|
---|
249 | fprintf(stderr, "memory allocation failed\n");
|
---|
250 | ret = -1;
|
---|
251 | goto error;
|
---|
252 | }
|
---|
253 | dir = dirname(dirBuf);
|
---|
254 | base = basename(baseBuf);
|
---|
255 |
|
---|
256 | size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
|
---|
257 | globalData.fuzzer, base);
|
---|
258 | if (size >= PATH_SIZE) {
|
---|
259 | fprintf(stderr, "creating path failed\n");
|
---|
260 | ret = -1;
|
---|
261 | goto error;
|
---|
262 | }
|
---|
263 | out = fopen(outPath, "wb");
|
---|
264 | if (out == NULL) {
|
---|
265 | fprintf(stderr, "couldn't open %s for writing\n", outPath);
|
---|
266 | ret = -1;
|
---|
267 | goto error;
|
---|
268 | }
|
---|
269 | if (chdir(dir) != 0) {
|
---|
270 | fprintf(stderr, "couldn't chdir to %s\n", dir);
|
---|
271 | ret = -1;
|
---|
272 | goto error;
|
---|
273 | }
|
---|
274 | dirChanged = 1;
|
---|
275 | if (globalData.processFile(base, out) != 0)
|
---|
276 | ret = -1;
|
---|
277 |
|
---|
278 | error:
|
---|
279 | if (out != NULL)
|
---|
280 | fclose(out);
|
---|
281 | xmlFree(dirBuf);
|
---|
282 | xmlFree(baseBuf);
|
---|
283 | if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
|
---|
284 | fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
|
---|
285 | ret = -1;
|
---|
286 | break;
|
---|
287 | }
|
---|
288 | }
|
---|
289 |
|
---|
290 | globfree(&globbuf);
|
---|
291 | return(ret);
|
---|
292 | }
|
---|
293 | #endif
|
---|
294 |
|
---|
295 | #ifdef HAVE_XPATH_FUZZER
|
---|
296 | static int
|
---|
297 | processXPath(const char *testDir, const char *prefix, const char *name,
|
---|
298 | const char *data, const char *subdir, int xptr) {
|
---|
299 | char pattern[PATH_SIZE];
|
---|
300 | glob_t globbuf;
|
---|
301 | size_t i, size;
|
---|
302 | int ret = 0, res;
|
---|
303 |
|
---|
304 | size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
|
---|
305 | testDir, subdir, prefix);
|
---|
306 | if (size >= PATH_SIZE)
|
---|
307 | return(-1);
|
---|
308 | res = glob(pattern, 0, NULL, &globbuf);
|
---|
309 | if (res == GLOB_NOMATCH)
|
---|
310 | return(0);
|
---|
311 | if (res != 0) {
|
---|
312 | fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
---|
313 | return(-1);
|
---|
314 | }
|
---|
315 |
|
---|
316 | for (i = 0; i < globbuf.gl_pathc; i++) {
|
---|
317 | char *path = globbuf.gl_pathv[i];
|
---|
318 | struct stat statbuf;
|
---|
319 | FILE *in;
|
---|
320 | char expr[EXPR_SIZE];
|
---|
321 |
|
---|
322 | if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
---|
323 | continue;
|
---|
324 |
|
---|
325 | in = fopen(path, "rb");
|
---|
326 | if (in == NULL) {
|
---|
327 | ret = -1;
|
---|
328 | continue;
|
---|
329 | }
|
---|
330 |
|
---|
331 | while (fgets(expr, EXPR_SIZE, in) != NULL) {
|
---|
332 | char outPath[PATH_SIZE];
|
---|
333 | FILE *out;
|
---|
334 | int j;
|
---|
335 |
|
---|
336 | for (j = 0; expr[j] != 0; j++)
|
---|
337 | if (expr[j] == '\r' || expr[j] == '\n')
|
---|
338 | break;
|
---|
339 | expr[j] = 0;
|
---|
340 |
|
---|
341 | size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
|
---|
342 | name, globalData.counter);
|
---|
343 | if (size >= PATH_SIZE) {
|
---|
344 | ret = -1;
|
---|
345 | continue;
|
---|
346 | }
|
---|
347 | out = fopen(outPath, "wb");
|
---|
348 | if (out == NULL) {
|
---|
349 | ret = -1;
|
---|
350 | continue;
|
---|
351 | }
|
---|
352 |
|
---|
353 | /* Max allocations. */
|
---|
354 | xmlFuzzWriteInt(out, 0, 4);
|
---|
355 |
|
---|
356 | if (xptr) {
|
---|
357 | xmlFuzzWriteString(out, expr);
|
---|
358 | } else {
|
---|
359 | char xptrExpr[EXPR_SIZE+100];
|
---|
360 |
|
---|
361 | /* Wrap XPath expressions as XPointer */
|
---|
362 | snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
|
---|
363 | xmlFuzzWriteString(out, xptrExpr);
|
---|
364 | }
|
---|
365 |
|
---|
366 | xmlFuzzWriteString(out, data);
|
---|
367 |
|
---|
368 | fclose(out);
|
---|
369 | globalData.counter++;
|
---|
370 | }
|
---|
371 |
|
---|
372 | fclose(in);
|
---|
373 | }
|
---|
374 |
|
---|
375 | globfree(&globbuf);
|
---|
376 |
|
---|
377 | return(ret);
|
---|
378 | }
|
---|
379 |
|
---|
380 | static int
|
---|
381 | processXPathDir(const char *testDir) {
|
---|
382 | char pattern[PATH_SIZE];
|
---|
383 | glob_t globbuf;
|
---|
384 | size_t i, size;
|
---|
385 | int ret = 0;
|
---|
386 |
|
---|
387 | globalData.counter = 1;
|
---|
388 | if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
|
---|
389 | ret = -1;
|
---|
390 |
|
---|
391 | size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
|
---|
392 | if (size >= PATH_SIZE)
|
---|
393 | return(1);
|
---|
394 | if (glob(pattern, 0, NULL, &globbuf) != 0)
|
---|
395 | return(1);
|
---|
396 |
|
---|
397 | for (i = 0; i < globbuf.gl_pathc; i++) {
|
---|
398 | char *path = globbuf.gl_pathv[i];
|
---|
399 | char *data;
|
---|
400 | const char *docFile;
|
---|
401 |
|
---|
402 | data = xmlSlurpFile(path, NULL);
|
---|
403 | if (data == NULL) {
|
---|
404 | ret = -1;
|
---|
405 | continue;
|
---|
406 | }
|
---|
407 | docFile = basename(path);
|
---|
408 |
|
---|
409 | globalData.counter = 1;
|
---|
410 | if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
|
---|
411 | ret = -1;
|
---|
412 | if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
|
---|
413 | ret = -1;
|
---|
414 | if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
|
---|
415 | ret = -1;
|
---|
416 |
|
---|
417 | xmlFree(data);
|
---|
418 | }
|
---|
419 |
|
---|
420 | globfree(&globbuf);
|
---|
421 |
|
---|
422 | return(ret);
|
---|
423 | }
|
---|
424 | #endif
|
---|
425 |
|
---|
426 | int
|
---|
427 | main(int argc, const char **argv) {
|
---|
428 | mainFunc processArg = NULL;
|
---|
429 | const char *fuzzer;
|
---|
430 | int ret = 0;
|
---|
431 | int i;
|
---|
432 |
|
---|
433 | if (argc < 3) {
|
---|
434 | fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
|
---|
435 | return(1);
|
---|
436 | }
|
---|
437 |
|
---|
438 | xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
|
---|
439 |
|
---|
440 | fuzzer = argv[1];
|
---|
441 | if (strcmp(fuzzer, "html") == 0) {
|
---|
442 | #ifdef HAVE_HTML_FUZZER
|
---|
443 | processArg = processPattern;
|
---|
444 | globalData.processFile = processHtml;
|
---|
445 | #endif
|
---|
446 | } else if (strcmp(fuzzer, "lint") == 0) {
|
---|
447 | #ifdef HAVE_LINT_FUZZER
|
---|
448 | processArg = processPattern;
|
---|
449 | globalData.flags |= FLAG_LINT;
|
---|
450 | globalData.processFile = processXml;
|
---|
451 | #endif
|
---|
452 | } else if (strcmp(fuzzer, "reader") == 0) {
|
---|
453 | #ifdef HAVE_READER_FUZZER
|
---|
454 | processArg = processPattern;
|
---|
455 | globalData.flags |= FLAG_READER;
|
---|
456 | globalData.processFile = processXml;
|
---|
457 | #endif
|
---|
458 | } else if (strcmp(fuzzer, "schema") == 0) {
|
---|
459 | #ifdef HAVE_SCHEMA_FUZZER
|
---|
460 | processArg = processPattern;
|
---|
461 | globalData.processFile = processSchema;
|
---|
462 | #endif
|
---|
463 | } else if (strcmp(fuzzer, "valid") == 0) {
|
---|
464 | #ifdef HAVE_VALID_FUZZER
|
---|
465 | processArg = processPattern;
|
---|
466 | globalData.processFile = processXml;
|
---|
467 | #endif
|
---|
468 | } else if (strcmp(fuzzer, "xinclude") == 0) {
|
---|
469 | #ifdef HAVE_XINCLUDE_FUZZER
|
---|
470 | processArg = processPattern;
|
---|
471 | globalData.processFile = processXml;
|
---|
472 | #endif
|
---|
473 | } else if (strcmp(fuzzer, "xml") == 0) {
|
---|
474 | #ifdef HAVE_XML_FUZZER
|
---|
475 | processArg = processPattern;
|
---|
476 | globalData.processFile = processXml;
|
---|
477 | #endif
|
---|
478 | } else if (strcmp(fuzzer, "xpath") == 0) {
|
---|
479 | #ifdef HAVE_XPATH_FUZZER
|
---|
480 | processArg = processXPathDir;
|
---|
481 | #endif
|
---|
482 | } else {
|
---|
483 | fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
|
---|
484 | return(1);
|
---|
485 | }
|
---|
486 | globalData.fuzzer = fuzzer;
|
---|
487 |
|
---|
488 | if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
|
---|
489 | fprintf(stderr, "couldn't get current directory\n");
|
---|
490 | return(1);
|
---|
491 | }
|
---|
492 |
|
---|
493 | if (processArg != NULL)
|
---|
494 | for (i = 2; i < argc; i++)
|
---|
495 | processArg(argv[i]);
|
---|
496 |
|
---|
497 | return(ret);
|
---|
498 | }
|
---|
499 |
|
---|