1 | /*
|
---|
2 | * html.c: a libFuzzer target to test several HTML parser interfaces.
|
---|
3 | *
|
---|
4 | * See Copyright for the status of this software.
|
---|
5 | */
|
---|
6 |
|
---|
7 | #include <libxml/HTMLparser.h>
|
---|
8 | #include <libxml/HTMLtree.h>
|
---|
9 | #include <libxml/catalog.h>
|
---|
10 | #include "fuzz.h"
|
---|
11 |
|
---|
12 | int
|
---|
13 | LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
|
---|
14 | char ***argv ATTRIBUTE_UNUSED) {
|
---|
15 | xmlFuzzMemSetup();
|
---|
16 | xmlInitParser();
|
---|
17 | #ifdef LIBXML_CATALOG_ENABLED
|
---|
18 | xmlInitializeCatalog();
|
---|
19 | xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
|
---|
20 | #endif
|
---|
21 | xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
|
---|
22 |
|
---|
23 | return 0;
|
---|
24 | }
|
---|
25 |
|
---|
26 | int
|
---|
27 | LLVMFuzzerTestOneInput(const char *data, size_t size) {
|
---|
28 | xmlParserCtxtPtr ctxt;
|
---|
29 | htmlDocPtr doc;
|
---|
30 | const char *docBuffer;
|
---|
31 | size_t maxAlloc, docSize;
|
---|
32 | int opts;
|
---|
33 |
|
---|
34 | xmlFuzzDataInit(data, size);
|
---|
35 | opts = (int) xmlFuzzReadInt(4);
|
---|
36 | maxAlloc = xmlFuzzReadInt(4) % (size + 100);
|
---|
37 |
|
---|
38 | docBuffer = xmlFuzzReadRemaining(&docSize);
|
---|
39 | if (docBuffer == NULL) {
|
---|
40 | xmlFuzzDataCleanup();
|
---|
41 | return(0);
|
---|
42 | }
|
---|
43 |
|
---|
44 | /* Pull parser */
|
---|
45 |
|
---|
46 | xmlFuzzMemSetLimit(maxAlloc);
|
---|
47 | ctxt = htmlNewParserCtxt();
|
---|
48 | if (ctxt != NULL) {
|
---|
49 | doc = htmlCtxtReadMemory(ctxt, docBuffer, docSize, NULL, NULL, opts);
|
---|
50 | xmlFuzzCheckMallocFailure("htmlCtxtReadMemory",
|
---|
51 | ctxt->errNo == XML_ERR_NO_MEMORY);
|
---|
52 |
|
---|
53 | if (doc != NULL) {
|
---|
54 | xmlDocPtr copy;
|
---|
55 |
|
---|
56 | #ifdef LIBXML_OUTPUT_ENABLED
|
---|
57 | xmlOutputBufferPtr out;
|
---|
58 | const xmlChar *content;
|
---|
59 |
|
---|
60 | /*
|
---|
61 | * Also test the serializer. Call htmlDocContentDumpOutput with our
|
---|
62 | * own buffer to avoid encoding the output. The HTML encoding is
|
---|
63 | * excruciatingly slow (see htmlEntityValueLookup).
|
---|
64 | */
|
---|
65 | out = xmlAllocOutputBuffer(NULL);
|
---|
66 | htmlDocContentDumpOutput(out, doc, NULL);
|
---|
67 | content = xmlOutputBufferGetContent(out);
|
---|
68 | xmlOutputBufferClose(out);
|
---|
69 | xmlFuzzCheckMallocFailure("htmlDocContentDumpOutput",
|
---|
70 | content == NULL);
|
---|
71 | #endif
|
---|
72 |
|
---|
73 | copy = xmlCopyDoc(doc, 1);
|
---|
74 | xmlFuzzCheckMallocFailure("xmlCopyNode", copy == NULL);
|
---|
75 | xmlFreeDoc(copy);
|
---|
76 |
|
---|
77 | xmlFreeDoc(doc);
|
---|
78 | }
|
---|
79 |
|
---|
80 | htmlFreeParserCtxt(ctxt);
|
---|
81 | }
|
---|
82 |
|
---|
83 |
|
---|
84 | /* Push parser */
|
---|
85 |
|
---|
86 | #ifdef LIBXML_PUSH_ENABLED
|
---|
87 | {
|
---|
88 | static const size_t maxChunkSize = 128;
|
---|
89 | size_t consumed, chunkSize;
|
---|
90 |
|
---|
91 | xmlFuzzMemSetLimit(maxAlloc);
|
---|
92 | ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
|
---|
93 | XML_CHAR_ENCODING_NONE);
|
---|
94 |
|
---|
95 | if (ctxt != NULL) {
|
---|
96 | htmlCtxtUseOptions(ctxt, opts);
|
---|
97 |
|
---|
98 | for (consumed = 0; consumed < docSize; consumed += chunkSize) {
|
---|
99 | chunkSize = docSize - consumed;
|
---|
100 | if (chunkSize > maxChunkSize)
|
---|
101 | chunkSize = maxChunkSize;
|
---|
102 | htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
|
---|
103 | }
|
---|
104 |
|
---|
105 | htmlParseChunk(ctxt, NULL, 0, 1);
|
---|
106 | xmlFuzzCheckMallocFailure("htmlParseChunk",
|
---|
107 | ctxt->errNo == XML_ERR_NO_MEMORY);
|
---|
108 | xmlFreeDoc(ctxt->myDoc);
|
---|
109 | htmlFreeParserCtxt(ctxt);
|
---|
110 | }
|
---|
111 | }
|
---|
112 | #endif
|
---|
113 |
|
---|
114 | /* Cleanup */
|
---|
115 |
|
---|
116 | xmlFuzzMemSetLimit(0);
|
---|
117 | xmlFuzzDataCleanup();
|
---|
118 | xmlResetLastError();
|
---|
119 |
|
---|
120 | return(0);
|
---|
121 | }
|
---|
122 |
|
---|