XML C parser available under the MIT license. http://xmlsoft.org/

Dependents:   libiio

Committer:
pcercuei
Date:
Thu Aug 25 10:07:34 2016 +0000
Revision:
1:26f20484cbdc
Parent:
0:03b5121a232e
Add config.h and dummy.c containing empty functions

Who changed what in which revision?

UserRevisionLine numberNew contents of line
pcercuei 0:03b5121a232e 1 /*
pcercuei 0:03b5121a232e 2 * HTMLparser.c : an HTML 4.0 non-verifying parser
pcercuei 0:03b5121a232e 3 *
pcercuei 0:03b5121a232e 4 * See Copyright for the status of this software.
pcercuei 0:03b5121a232e 5 *
pcercuei 0:03b5121a232e 6 * daniel@veillard.com
pcercuei 0:03b5121a232e 7 */
pcercuei 0:03b5121a232e 8
pcercuei 0:03b5121a232e 9 #define IN_LIBXML
pcercuei 0:03b5121a232e 10 #include "libxml.h"
pcercuei 0:03b5121a232e 11 #ifdef LIBXML_HTML_ENABLED
pcercuei 0:03b5121a232e 12
pcercuei 0:03b5121a232e 13 #include <string.h>
pcercuei 0:03b5121a232e 14 #ifdef HAVE_CTYPE_H
pcercuei 0:03b5121a232e 15 #include <ctype.h>
pcercuei 0:03b5121a232e 16 #endif
pcercuei 0:03b5121a232e 17 #ifdef HAVE_STDLIB_H
pcercuei 0:03b5121a232e 18 #include <stdlib.h>
pcercuei 0:03b5121a232e 19 #endif
pcercuei 0:03b5121a232e 20 #ifdef HAVE_SYS_STAT_H
pcercuei 0:03b5121a232e 21 #include <sys/stat.h>
pcercuei 0:03b5121a232e 22 #endif
pcercuei 0:03b5121a232e 23 #ifdef HAVE_FCNTL_H
pcercuei 0:03b5121a232e 24 #include <fcntl.h>
pcercuei 0:03b5121a232e 25 #endif
pcercuei 0:03b5121a232e 26 #ifdef HAVE_UNISTD_H
pcercuei 0:03b5121a232e 27 #include <unistd.h>
pcercuei 0:03b5121a232e 28 #endif
pcercuei 0:03b5121a232e 29 #ifdef HAVE_ZLIB_H
pcercuei 0:03b5121a232e 30 #include <zlib.h>
pcercuei 0:03b5121a232e 31 #endif
pcercuei 0:03b5121a232e 32
pcercuei 0:03b5121a232e 33 #include <libxml/xmlmemory.h>
pcercuei 0:03b5121a232e 34 #include <libxml/tree.h>
pcercuei 0:03b5121a232e 35 #include <libxml/parser.h>
pcercuei 0:03b5121a232e 36 #include <libxml/parserInternals.h>
pcercuei 0:03b5121a232e 37 #include <libxml/xmlerror.h>
pcercuei 0:03b5121a232e 38 #include <libxml/HTMLparser.h>
pcercuei 0:03b5121a232e 39 #include <libxml/HTMLtree.h>
pcercuei 0:03b5121a232e 40 #include <libxml/entities.h>
pcercuei 0:03b5121a232e 41 #include <libxml/encoding.h>
pcercuei 0:03b5121a232e 42 #include <libxml/valid.h>
pcercuei 0:03b5121a232e 43 #include <libxml/xmlIO.h>
pcercuei 0:03b5121a232e 44 #include <libxml/globals.h>
pcercuei 0:03b5121a232e 45 #include <libxml/uri.h>
pcercuei 0:03b5121a232e 46
pcercuei 0:03b5121a232e 47 #include "buf.h"
pcercuei 0:03b5121a232e 48 #include "enc.h"
pcercuei 0:03b5121a232e 49
pcercuei 0:03b5121a232e 50 #define HTML_MAX_NAMELEN 1000
pcercuei 0:03b5121a232e 51 #define HTML_PARSER_BIG_BUFFER_SIZE 1000
pcercuei 0:03b5121a232e 52 #define HTML_PARSER_BUFFER_SIZE 100
pcercuei 0:03b5121a232e 53
pcercuei 0:03b5121a232e 54 /* #define DEBUG */
pcercuei 0:03b5121a232e 55 /* #define DEBUG_PUSH */
pcercuei 0:03b5121a232e 56
pcercuei 0:03b5121a232e 57 static int htmlOmittedDefaultValue = 1;
pcercuei 0:03b5121a232e 58
pcercuei 0:03b5121a232e 59 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
pcercuei 0:03b5121a232e 60 xmlChar end, xmlChar end2, xmlChar end3);
pcercuei 0:03b5121a232e 61 static void htmlParseComment(htmlParserCtxtPtr ctxt);
pcercuei 0:03b5121a232e 62
pcercuei 0:03b5121a232e 63 /************************************************************************
pcercuei 0:03b5121a232e 64 * *
pcercuei 0:03b5121a232e 65 * Some factorized error routines *
pcercuei 0:03b5121a232e 66 * *
pcercuei 0:03b5121a232e 67 ************************************************************************/
pcercuei 0:03b5121a232e 68
pcercuei 0:03b5121a232e 69 /**
pcercuei 0:03b5121a232e 70 * htmlErrMemory:
pcercuei 0:03b5121a232e 71 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 72 * @extra: extra informations
pcercuei 0:03b5121a232e 73 *
pcercuei 0:03b5121a232e 74 * Handle a redefinition of attribute error
pcercuei 0:03b5121a232e 75 */
pcercuei 0:03b5121a232e 76 static void
pcercuei 0:03b5121a232e 77 htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
pcercuei 0:03b5121a232e 78 {
pcercuei 0:03b5121a232e 79 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
pcercuei 0:03b5121a232e 80 (ctxt->instate == XML_PARSER_EOF))
pcercuei 0:03b5121a232e 81 return;
pcercuei 0:03b5121a232e 82 if (ctxt != NULL) {
pcercuei 0:03b5121a232e 83 ctxt->errNo = XML_ERR_NO_MEMORY;
pcercuei 0:03b5121a232e 84 ctxt->instate = XML_PARSER_EOF;
pcercuei 0:03b5121a232e 85 ctxt->disableSAX = 1;
pcercuei 0:03b5121a232e 86 }
pcercuei 0:03b5121a232e 87 if (extra)
pcercuei 0:03b5121a232e 88 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
pcercuei 0:03b5121a232e 89 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
pcercuei 0:03b5121a232e 90 NULL, NULL, 0, 0,
pcercuei 0:03b5121a232e 91 "Memory allocation failed : %s\n", extra);
pcercuei 0:03b5121a232e 92 else
pcercuei 0:03b5121a232e 93 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
pcercuei 0:03b5121a232e 94 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
pcercuei 0:03b5121a232e 95 NULL, NULL, 0, 0, "Memory allocation failed\n");
pcercuei 0:03b5121a232e 96 }
pcercuei 0:03b5121a232e 97
pcercuei 0:03b5121a232e 98 /**
pcercuei 0:03b5121a232e 99 * htmlParseErr:
pcercuei 0:03b5121a232e 100 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 101 * @error: the error number
pcercuei 0:03b5121a232e 102 * @msg: the error message
pcercuei 0:03b5121a232e 103 * @str1: string infor
pcercuei 0:03b5121a232e 104 * @str2: string infor
pcercuei 0:03b5121a232e 105 *
pcercuei 0:03b5121a232e 106 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
pcercuei 0:03b5121a232e 107 */
pcercuei 0:03b5121a232e 108 static void
pcercuei 0:03b5121a232e 109 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
pcercuei 0:03b5121a232e 110 const char *msg, const xmlChar *str1, const xmlChar *str2)
pcercuei 0:03b5121a232e 111 {
pcercuei 0:03b5121a232e 112 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
pcercuei 0:03b5121a232e 113 (ctxt->instate == XML_PARSER_EOF))
pcercuei 0:03b5121a232e 114 return;
pcercuei 0:03b5121a232e 115 if (ctxt != NULL)
pcercuei 0:03b5121a232e 116 ctxt->errNo = error;
pcercuei 0:03b5121a232e 117 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
pcercuei 0:03b5121a232e 118 XML_ERR_ERROR, NULL, 0,
pcercuei 0:03b5121a232e 119 (const char *) str1, (const char *) str2,
pcercuei 0:03b5121a232e 120 NULL, 0, 0,
pcercuei 0:03b5121a232e 121 msg, str1, str2);
pcercuei 0:03b5121a232e 122 if (ctxt != NULL)
pcercuei 0:03b5121a232e 123 ctxt->wellFormed = 0;
pcercuei 0:03b5121a232e 124 }
pcercuei 0:03b5121a232e 125
pcercuei 0:03b5121a232e 126 /**
pcercuei 0:03b5121a232e 127 * htmlParseErrInt:
pcercuei 0:03b5121a232e 128 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 129 * @error: the error number
pcercuei 0:03b5121a232e 130 * @msg: the error message
pcercuei 0:03b5121a232e 131 * @val: integer info
pcercuei 0:03b5121a232e 132 *
pcercuei 0:03b5121a232e 133 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
pcercuei 0:03b5121a232e 134 */
pcercuei 0:03b5121a232e 135 static void
pcercuei 0:03b5121a232e 136 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
pcercuei 0:03b5121a232e 137 const char *msg, int val)
pcercuei 0:03b5121a232e 138 {
pcercuei 0:03b5121a232e 139 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
pcercuei 0:03b5121a232e 140 (ctxt->instate == XML_PARSER_EOF))
pcercuei 0:03b5121a232e 141 return;
pcercuei 0:03b5121a232e 142 if (ctxt != NULL)
pcercuei 0:03b5121a232e 143 ctxt->errNo = error;
pcercuei 0:03b5121a232e 144 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
pcercuei 0:03b5121a232e 145 XML_ERR_ERROR, NULL, 0, NULL, NULL,
pcercuei 0:03b5121a232e 146 NULL, val, 0, msg, val);
pcercuei 0:03b5121a232e 147 if (ctxt != NULL)
pcercuei 0:03b5121a232e 148 ctxt->wellFormed = 0;
pcercuei 0:03b5121a232e 149 }
pcercuei 0:03b5121a232e 150
pcercuei 0:03b5121a232e 151 /************************************************************************
pcercuei 0:03b5121a232e 152 * *
pcercuei 0:03b5121a232e 153 * Parser stacks related functions and macros *
pcercuei 0:03b5121a232e 154 * *
pcercuei 0:03b5121a232e 155 ************************************************************************/
pcercuei 0:03b5121a232e 156
pcercuei 0:03b5121a232e 157 /**
pcercuei 0:03b5121a232e 158 * htmlnamePush:
pcercuei 0:03b5121a232e 159 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 160 * @value: the element name
pcercuei 0:03b5121a232e 161 *
pcercuei 0:03b5121a232e 162 * Pushes a new element name on top of the name stack
pcercuei 0:03b5121a232e 163 *
pcercuei 0:03b5121a232e 164 * Returns 0 in case of error, the index in the stack otherwise
pcercuei 0:03b5121a232e 165 */
pcercuei 0:03b5121a232e 166 static int
pcercuei 0:03b5121a232e 167 htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
pcercuei 0:03b5121a232e 168 {
pcercuei 0:03b5121a232e 169 if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head")))
pcercuei 0:03b5121a232e 170 ctxt->html = 3;
pcercuei 0:03b5121a232e 171 if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body")))
pcercuei 0:03b5121a232e 172 ctxt->html = 10;
pcercuei 0:03b5121a232e 173 if (ctxt->nameNr >= ctxt->nameMax) {
pcercuei 0:03b5121a232e 174 ctxt->nameMax *= 2;
pcercuei 0:03b5121a232e 175 ctxt->nameTab = (const xmlChar * *)
pcercuei 0:03b5121a232e 176 xmlRealloc((xmlChar * *)ctxt->nameTab,
pcercuei 0:03b5121a232e 177 ctxt->nameMax *
pcercuei 0:03b5121a232e 178 sizeof(ctxt->nameTab[0]));
pcercuei 0:03b5121a232e 179 if (ctxt->nameTab == NULL) {
pcercuei 0:03b5121a232e 180 htmlErrMemory(ctxt, NULL);
pcercuei 0:03b5121a232e 181 return (0);
pcercuei 0:03b5121a232e 182 }
pcercuei 0:03b5121a232e 183 }
pcercuei 0:03b5121a232e 184 ctxt->nameTab[ctxt->nameNr] = value;
pcercuei 0:03b5121a232e 185 ctxt->name = value;
pcercuei 0:03b5121a232e 186 return (ctxt->nameNr++);
pcercuei 0:03b5121a232e 187 }
pcercuei 0:03b5121a232e 188 /**
pcercuei 0:03b5121a232e 189 * htmlnamePop:
pcercuei 0:03b5121a232e 190 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 191 *
pcercuei 0:03b5121a232e 192 * Pops the top element name from the name stack
pcercuei 0:03b5121a232e 193 *
pcercuei 0:03b5121a232e 194 * Returns the name just removed
pcercuei 0:03b5121a232e 195 */
pcercuei 0:03b5121a232e 196 static const xmlChar *
pcercuei 0:03b5121a232e 197 htmlnamePop(htmlParserCtxtPtr ctxt)
pcercuei 0:03b5121a232e 198 {
pcercuei 0:03b5121a232e 199 const xmlChar *ret;
pcercuei 0:03b5121a232e 200
pcercuei 0:03b5121a232e 201 if (ctxt->nameNr <= 0)
pcercuei 0:03b5121a232e 202 return (NULL);
pcercuei 0:03b5121a232e 203 ctxt->nameNr--;
pcercuei 0:03b5121a232e 204 if (ctxt->nameNr < 0)
pcercuei 0:03b5121a232e 205 return (NULL);
pcercuei 0:03b5121a232e 206 if (ctxt->nameNr > 0)
pcercuei 0:03b5121a232e 207 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
pcercuei 0:03b5121a232e 208 else
pcercuei 0:03b5121a232e 209 ctxt->name = NULL;
pcercuei 0:03b5121a232e 210 ret = ctxt->nameTab[ctxt->nameNr];
pcercuei 0:03b5121a232e 211 ctxt->nameTab[ctxt->nameNr] = NULL;
pcercuei 0:03b5121a232e 212 return (ret);
pcercuei 0:03b5121a232e 213 }
pcercuei 0:03b5121a232e 214
pcercuei 0:03b5121a232e 215 /**
pcercuei 0:03b5121a232e 216 * htmlNodeInfoPush:
pcercuei 0:03b5121a232e 217 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 218 * @value: the node info
pcercuei 0:03b5121a232e 219 *
pcercuei 0:03b5121a232e 220 * Pushes a new element name on top of the node info stack
pcercuei 0:03b5121a232e 221 *
pcercuei 0:03b5121a232e 222 * Returns 0 in case of error, the index in the stack otherwise
pcercuei 0:03b5121a232e 223 */
pcercuei 0:03b5121a232e 224 static int
pcercuei 0:03b5121a232e 225 htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value)
pcercuei 0:03b5121a232e 226 {
pcercuei 0:03b5121a232e 227 if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {
pcercuei 0:03b5121a232e 228 if (ctxt->nodeInfoMax == 0)
pcercuei 0:03b5121a232e 229 ctxt->nodeInfoMax = 5;
pcercuei 0:03b5121a232e 230 ctxt->nodeInfoMax *= 2;
pcercuei 0:03b5121a232e 231 ctxt->nodeInfoTab = (htmlParserNodeInfo *)
pcercuei 0:03b5121a232e 232 xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,
pcercuei 0:03b5121a232e 233 ctxt->nodeInfoMax *
pcercuei 0:03b5121a232e 234 sizeof(ctxt->nodeInfoTab[0]));
pcercuei 0:03b5121a232e 235 if (ctxt->nodeInfoTab == NULL) {
pcercuei 0:03b5121a232e 236 htmlErrMemory(ctxt, NULL);
pcercuei 0:03b5121a232e 237 return (0);
pcercuei 0:03b5121a232e 238 }
pcercuei 0:03b5121a232e 239 }
pcercuei 0:03b5121a232e 240 ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value;
pcercuei 0:03b5121a232e 241 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
pcercuei 0:03b5121a232e 242 return (ctxt->nodeInfoNr++);
pcercuei 0:03b5121a232e 243 }
pcercuei 0:03b5121a232e 244
pcercuei 0:03b5121a232e 245 /**
pcercuei 0:03b5121a232e 246 * htmlNodeInfoPop:
pcercuei 0:03b5121a232e 247 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 248 *
pcercuei 0:03b5121a232e 249 * Pops the top element name from the node info stack
pcercuei 0:03b5121a232e 250 *
pcercuei 0:03b5121a232e 251 * Returns 0 in case of error, the pointer to NodeInfo otherwise
pcercuei 0:03b5121a232e 252 */
pcercuei 0:03b5121a232e 253 static htmlParserNodeInfo *
pcercuei 0:03b5121a232e 254 htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
pcercuei 0:03b5121a232e 255 {
pcercuei 0:03b5121a232e 256 if (ctxt->nodeInfoNr <= 0)
pcercuei 0:03b5121a232e 257 return (NULL);
pcercuei 0:03b5121a232e 258 ctxt->nodeInfoNr--;
pcercuei 0:03b5121a232e 259 if (ctxt->nodeInfoNr < 0)
pcercuei 0:03b5121a232e 260 return (NULL);
pcercuei 0:03b5121a232e 261 if (ctxt->nodeInfoNr > 0)
pcercuei 0:03b5121a232e 262 ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1];
pcercuei 0:03b5121a232e 263 else
pcercuei 0:03b5121a232e 264 ctxt->nodeInfo = NULL;
pcercuei 0:03b5121a232e 265 return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
pcercuei 0:03b5121a232e 266 }
pcercuei 0:03b5121a232e 267
pcercuei 0:03b5121a232e 268 /*
pcercuei 0:03b5121a232e 269 * Macros for accessing the content. Those should be used only by the parser,
pcercuei 0:03b5121a232e 270 * and not exported.
pcercuei 0:03b5121a232e 271 *
pcercuei 0:03b5121a232e 272 * Dirty macros, i.e. one need to make assumption on the context to use them
pcercuei 0:03b5121a232e 273 *
pcercuei 0:03b5121a232e 274 * CUR_PTR return the current pointer to the xmlChar to be parsed.
pcercuei 0:03b5121a232e 275 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
pcercuei 0:03b5121a232e 276 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
pcercuei 0:03b5121a232e 277 * in UNICODE mode. This should be used internally by the parser
pcercuei 0:03b5121a232e 278 * only to compare to ASCII values otherwise it would break when
pcercuei 0:03b5121a232e 279 * running with UTF-8 encoding.
pcercuei 0:03b5121a232e 280 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
pcercuei 0:03b5121a232e 281 * to compare on ASCII based substring.
pcercuei 0:03b5121a232e 282 * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
pcercuei 0:03b5121a232e 283 * it should be used only to compare on ASCII based substring.
pcercuei 0:03b5121a232e 284 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
pcercuei 0:03b5121a232e 285 * strings without newlines within the parser.
pcercuei 0:03b5121a232e 286 *
pcercuei 0:03b5121a232e 287 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
pcercuei 0:03b5121a232e 288 *
pcercuei 0:03b5121a232e 289 * CURRENT Returns the current char value, with the full decoding of
pcercuei 0:03b5121a232e 290 * UTF-8 if we are using this mode. It returns an int.
pcercuei 0:03b5121a232e 291 * NEXT Skip to the next character, this does the proper decoding
pcercuei 0:03b5121a232e 292 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
pcercuei 0:03b5121a232e 293 * NEXTL(l) Skip the current unicode character of l xmlChars long.
pcercuei 0:03b5121a232e 294 * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
pcercuei 0:03b5121a232e 295 */
pcercuei 0:03b5121a232e 296
pcercuei 0:03b5121a232e 297 #define UPPER (toupper(*ctxt->input->cur))
pcercuei 0:03b5121a232e 298
pcercuei 0:03b5121a232e 299 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
pcercuei 0:03b5121a232e 300
pcercuei 0:03b5121a232e 301 #define NXT(val) ctxt->input->cur[(val)]
pcercuei 0:03b5121a232e 302
pcercuei 0:03b5121a232e 303 #define UPP(val) (toupper(ctxt->input->cur[(val)]))
pcercuei 0:03b5121a232e 304
pcercuei 0:03b5121a232e 305 #define CUR_PTR ctxt->input->cur
pcercuei 0:03b5121a232e 306
pcercuei 0:03b5121a232e 307 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
pcercuei 0:03b5121a232e 308 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
pcercuei 0:03b5121a232e 309 xmlParserInputShrink(ctxt->input)
pcercuei 0:03b5121a232e 310
pcercuei 0:03b5121a232e 311 #define GROW if ((ctxt->progressive == 0) && \
pcercuei 0:03b5121a232e 312 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
pcercuei 0:03b5121a232e 313 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
pcercuei 0:03b5121a232e 314
pcercuei 0:03b5121a232e 315 #define CURRENT ((int) (*ctxt->input->cur))
pcercuei 0:03b5121a232e 316
pcercuei 0:03b5121a232e 317 #define SKIP_BLANKS htmlSkipBlankChars(ctxt)
pcercuei 0:03b5121a232e 318
pcercuei 0:03b5121a232e 319 /* Inported from XML */
pcercuei 0:03b5121a232e 320
pcercuei 0:03b5121a232e 321 /* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
pcercuei 0:03b5121a232e 322 #define CUR ((int) (*ctxt->input->cur))
pcercuei 0:03b5121a232e 323 #define NEXT xmlNextChar(ctxt)
pcercuei 0:03b5121a232e 324
pcercuei 0:03b5121a232e 325 #define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
pcercuei 0:03b5121a232e 326
pcercuei 0:03b5121a232e 327
pcercuei 0:03b5121a232e 328 #define NEXTL(l) do { \
pcercuei 0:03b5121a232e 329 if (*(ctxt->input->cur) == '\n') { \
pcercuei 0:03b5121a232e 330 ctxt->input->line++; ctxt->input->col = 1; \
pcercuei 0:03b5121a232e 331 } else ctxt->input->col++; \
pcercuei 0:03b5121a232e 332 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
pcercuei 0:03b5121a232e 333 } while (0)
pcercuei 0:03b5121a232e 334
pcercuei 0:03b5121a232e 335 /************
pcercuei 0:03b5121a232e 336 \
pcercuei 0:03b5121a232e 337 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
pcercuei 0:03b5121a232e 338 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
pcercuei 0:03b5121a232e 339 ************/
pcercuei 0:03b5121a232e 340
pcercuei 0:03b5121a232e 341 #define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
pcercuei 0:03b5121a232e 342 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
pcercuei 0:03b5121a232e 343
pcercuei 0:03b5121a232e 344 #define COPY_BUF(l,b,i,v) \
pcercuei 0:03b5121a232e 345 if (l == 1) b[i++] = (xmlChar) v; \
pcercuei 0:03b5121a232e 346 else i += xmlCopyChar(l,&b[i],v)
pcercuei 0:03b5121a232e 347
pcercuei 0:03b5121a232e 348 /**
pcercuei 0:03b5121a232e 349 * htmlFindEncoding:
pcercuei 0:03b5121a232e 350 * @the HTML parser context
pcercuei 0:03b5121a232e 351 *
pcercuei 0:03b5121a232e 352 * Ty to find and encoding in the current data available in the input
pcercuei 0:03b5121a232e 353 * buffer this is needed to try to switch to the proper encoding when
pcercuei 0:03b5121a232e 354 * one face a character error.
pcercuei 0:03b5121a232e 355 * That's an heuristic, since it's operating outside of parsing it could
pcercuei 0:03b5121a232e 356 * try to use a meta which had been commented out, that's the reason it
pcercuei 0:03b5121a232e 357 * should only be used in case of error, not as a default.
pcercuei 0:03b5121a232e 358 *
pcercuei 0:03b5121a232e 359 * Returns an encoding string or NULL if not found, the string need to
pcercuei 0:03b5121a232e 360 * be freed
pcercuei 0:03b5121a232e 361 */
pcercuei 0:03b5121a232e 362 static xmlChar *
pcercuei 0:03b5121a232e 363 htmlFindEncoding(xmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 364 const xmlChar *start, *cur, *end;
pcercuei 0:03b5121a232e 365
pcercuei 0:03b5121a232e 366 if ((ctxt == NULL) || (ctxt->input == NULL) ||
pcercuei 0:03b5121a232e 367 (ctxt->input->encoding != NULL) || (ctxt->input->buf == NULL) ||
pcercuei 0:03b5121a232e 368 (ctxt->input->buf->encoder != NULL))
pcercuei 0:03b5121a232e 369 return(NULL);
pcercuei 0:03b5121a232e 370 if ((ctxt->input->cur == NULL) || (ctxt->input->end == NULL))
pcercuei 0:03b5121a232e 371 return(NULL);
pcercuei 0:03b5121a232e 372
pcercuei 0:03b5121a232e 373 start = ctxt->input->cur;
pcercuei 0:03b5121a232e 374 end = ctxt->input->end;
pcercuei 0:03b5121a232e 375 /* we also expect the input buffer to be zero terminated */
pcercuei 0:03b5121a232e 376 if (*end != 0)
pcercuei 0:03b5121a232e 377 return(NULL);
pcercuei 0:03b5121a232e 378
pcercuei 0:03b5121a232e 379 cur = xmlStrcasestr(start, BAD_CAST "HTTP-EQUIV");
pcercuei 0:03b5121a232e 380 if (cur == NULL)
pcercuei 0:03b5121a232e 381 return(NULL);
pcercuei 0:03b5121a232e 382 cur = xmlStrcasestr(cur, BAD_CAST "CONTENT");
pcercuei 0:03b5121a232e 383 if (cur == NULL)
pcercuei 0:03b5121a232e 384 return(NULL);
pcercuei 0:03b5121a232e 385 cur = xmlStrcasestr(cur, BAD_CAST "CHARSET=");
pcercuei 0:03b5121a232e 386 if (cur == NULL)
pcercuei 0:03b5121a232e 387 return(NULL);
pcercuei 0:03b5121a232e 388 cur += 8;
pcercuei 0:03b5121a232e 389 start = cur;
pcercuei 0:03b5121a232e 390 while (((*cur >= 'A') && (*cur <= 'Z')) ||
pcercuei 0:03b5121a232e 391 ((*cur >= 'a') && (*cur <= 'z')) ||
pcercuei 0:03b5121a232e 392 ((*cur >= '0') && (*cur <= '9')) ||
pcercuei 0:03b5121a232e 393 (*cur == '-') || (*cur == '_') || (*cur == ':') || (*cur == '/'))
pcercuei 0:03b5121a232e 394 cur++;
pcercuei 0:03b5121a232e 395 if (cur == start)
pcercuei 0:03b5121a232e 396 return(NULL);
pcercuei 0:03b5121a232e 397 return(xmlStrndup(start, cur - start));
pcercuei 0:03b5121a232e 398 }
pcercuei 0:03b5121a232e 399
pcercuei 0:03b5121a232e 400 /**
pcercuei 0:03b5121a232e 401 * htmlCurrentChar:
pcercuei 0:03b5121a232e 402 * @ctxt: the HTML parser context
pcercuei 0:03b5121a232e 403 * @len: pointer to the length of the char read
pcercuei 0:03b5121a232e 404 *
pcercuei 0:03b5121a232e 405 * The current char value, if using UTF-8 this may actually span multiple
pcercuei 0:03b5121a232e 406 * bytes in the input buffer. Implement the end of line normalization:
pcercuei 0:03b5121a232e 407 * 2.11 End-of-Line Handling
pcercuei 0:03b5121a232e 408 * If the encoding is unspecified, in the case we find an ISO-Latin-1
pcercuei 0:03b5121a232e 409 * char, then the encoding converter is plugged in automatically.
pcercuei 0:03b5121a232e 410 *
pcercuei 0:03b5121a232e 411 * Returns the current char value and its length
pcercuei 0:03b5121a232e 412 */
pcercuei 0:03b5121a232e 413
pcercuei 0:03b5121a232e 414 static int
pcercuei 0:03b5121a232e 415 htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
pcercuei 0:03b5121a232e 416 if (ctxt->instate == XML_PARSER_EOF)
pcercuei 0:03b5121a232e 417 return(0);
pcercuei 0:03b5121a232e 418
pcercuei 0:03b5121a232e 419 if (ctxt->token != 0) {
pcercuei 0:03b5121a232e 420 *len = 0;
pcercuei 0:03b5121a232e 421 return(ctxt->token);
pcercuei 0:03b5121a232e 422 }
pcercuei 0:03b5121a232e 423 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
pcercuei 0:03b5121a232e 424 /*
pcercuei 0:03b5121a232e 425 * We are supposed to handle UTF8, check it's valid
pcercuei 0:03b5121a232e 426 * From rfc2044: encoding of the Unicode values on UTF-8:
pcercuei 0:03b5121a232e 427 *
pcercuei 0:03b5121a232e 428 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
pcercuei 0:03b5121a232e 429 * 0000 0000-0000 007F 0xxxxxxx
pcercuei 0:03b5121a232e 430 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
pcercuei 0:03b5121a232e 431 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
pcercuei 0:03b5121a232e 432 *
pcercuei 0:03b5121a232e 433 * Check for the 0x110000 limit too
pcercuei 0:03b5121a232e 434 */
pcercuei 0:03b5121a232e 435 const unsigned char *cur = ctxt->input->cur;
pcercuei 0:03b5121a232e 436 unsigned char c;
pcercuei 0:03b5121a232e 437 unsigned int val;
pcercuei 0:03b5121a232e 438
pcercuei 0:03b5121a232e 439 c = *cur;
pcercuei 0:03b5121a232e 440 if (c & 0x80) {
pcercuei 0:03b5121a232e 441 if (cur[1] == 0) {
pcercuei 0:03b5121a232e 442 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
pcercuei 0:03b5121a232e 443 cur = ctxt->input->cur;
pcercuei 0:03b5121a232e 444 }
pcercuei 0:03b5121a232e 445 if ((cur[1] & 0xc0) != 0x80)
pcercuei 0:03b5121a232e 446 goto encoding_error;
pcercuei 0:03b5121a232e 447 if ((c & 0xe0) == 0xe0) {
pcercuei 0:03b5121a232e 448
pcercuei 0:03b5121a232e 449 if (cur[2] == 0) {
pcercuei 0:03b5121a232e 450 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
pcercuei 0:03b5121a232e 451 cur = ctxt->input->cur;
pcercuei 0:03b5121a232e 452 }
pcercuei 0:03b5121a232e 453 if ((cur[2] & 0xc0) != 0x80)
pcercuei 0:03b5121a232e 454 goto encoding_error;
pcercuei 0:03b5121a232e 455 if ((c & 0xf0) == 0xf0) {
pcercuei 0:03b5121a232e 456 if (cur[3] == 0) {
pcercuei 0:03b5121a232e 457 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
pcercuei 0:03b5121a232e 458 cur = ctxt->input->cur;
pcercuei 0:03b5121a232e 459 }
pcercuei 0:03b5121a232e 460 if (((c & 0xf8) != 0xf0) ||
pcercuei 0:03b5121a232e 461 ((cur[3] & 0xc0) != 0x80))
pcercuei 0:03b5121a232e 462 goto encoding_error;
pcercuei 0:03b5121a232e 463 /* 4-byte code */
pcercuei 0:03b5121a232e 464 *len = 4;
pcercuei 0:03b5121a232e 465 val = (cur[0] & 0x7) << 18;
pcercuei 0:03b5121a232e 466 val |= (cur[1] & 0x3f) << 12;
pcercuei 0:03b5121a232e 467 val |= (cur[2] & 0x3f) << 6;
pcercuei 0:03b5121a232e 468 val |= cur[3] & 0x3f;
pcercuei 0:03b5121a232e 469 } else {
pcercuei 0:03b5121a232e 470 /* 3-byte code */
pcercuei 0:03b5121a232e 471 *len = 3;
pcercuei 0:03b5121a232e 472 val = (cur[0] & 0xf) << 12;
pcercuei 0:03b5121a232e 473 val |= (cur[1] & 0x3f) << 6;
pcercuei 0:03b5121a232e 474 val |= cur[2] & 0x3f;
pcercuei 0:03b5121a232e 475 }
pcercuei 0:03b5121a232e 476 } else {
pcercuei 0:03b5121a232e 477 /* 2-byte code */
pcercuei 0:03b5121a232e 478 *len = 2;
pcercuei 0:03b5121a232e 479 val = (cur[0] & 0x1f) << 6;
pcercuei 0:03b5121a232e 480 val |= cur[1] & 0x3f;
pcercuei 0:03b5121a232e 481 }
pcercuei 0:03b5121a232e 482 if (!IS_CHAR(val)) {
pcercuei 0:03b5121a232e 483 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
pcercuei 0:03b5121a232e 484 "Char 0x%X out of allowed range\n", val);
pcercuei 0:03b5121a232e 485 }
pcercuei 0:03b5121a232e 486 return(val);
pcercuei 0:03b5121a232e 487 } else {
pcercuei 0:03b5121a232e 488 if ((*ctxt->input->cur == 0) &&
pcercuei 0:03b5121a232e 489 (ctxt->input->cur < ctxt->input->end)) {
pcercuei 0:03b5121a232e 490 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
pcercuei 0:03b5121a232e 491 "Char 0x%X out of allowed range\n", 0);
pcercuei 0:03b5121a232e 492 *len = 1;
pcercuei 0:03b5121a232e 493 return(' ');
pcercuei 0:03b5121a232e 494 }
pcercuei 0:03b5121a232e 495 /* 1-byte code */
pcercuei 0:03b5121a232e 496 *len = 1;
pcercuei 0:03b5121a232e 497 return((int) *ctxt->input->cur);
pcercuei 0:03b5121a232e 498 }
pcercuei 0:03b5121a232e 499 }
pcercuei 0:03b5121a232e 500 /*
pcercuei 0:03b5121a232e 501 * Assume it's a fixed length encoding (1) with
pcercuei 0:03b5121a232e 502 * a compatible encoding for the ASCII set, since
pcercuei 0:03b5121a232e 503 * XML constructs only use < 128 chars
pcercuei 0:03b5121a232e 504 */
pcercuei 0:03b5121a232e 505 *len = 1;
pcercuei 0:03b5121a232e 506 if ((int) *ctxt->input->cur < 0x80)
pcercuei 0:03b5121a232e 507 return((int) *ctxt->input->cur);
pcercuei 0:03b5121a232e 508
pcercuei 0:03b5121a232e 509 /*
pcercuei 0:03b5121a232e 510 * Humm this is bad, do an automatic flow conversion
pcercuei 0:03b5121a232e 511 */
pcercuei 0:03b5121a232e 512 {
pcercuei 0:03b5121a232e 513 xmlChar * guess;
pcercuei 0:03b5121a232e 514 xmlCharEncodingHandlerPtr handler;
pcercuei 0:03b5121a232e 515
pcercuei 0:03b5121a232e 516 guess = htmlFindEncoding(ctxt);
pcercuei 0:03b5121a232e 517 if (guess == NULL) {
pcercuei 0:03b5121a232e 518 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
pcercuei 0:03b5121a232e 519 } else {
pcercuei 0:03b5121a232e 520 if (ctxt->input->encoding != NULL)
pcercuei 0:03b5121a232e 521 xmlFree((xmlChar *) ctxt->input->encoding);
pcercuei 0:03b5121a232e 522 ctxt->input->encoding = guess;
pcercuei 0:03b5121a232e 523 handler = xmlFindCharEncodingHandler((const char *) guess);
pcercuei 0:03b5121a232e 524 if (handler != NULL) {
pcercuei 0:03b5121a232e 525 xmlSwitchToEncoding(ctxt, handler);
pcercuei 0:03b5121a232e 526 } else {
pcercuei 0:03b5121a232e 527 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
pcercuei 0:03b5121a232e 528 "Unsupported encoding %s", guess, NULL);
pcercuei 0:03b5121a232e 529 }
pcercuei 0:03b5121a232e 530 }
pcercuei 0:03b5121a232e 531 ctxt->charset = XML_CHAR_ENCODING_UTF8;
pcercuei 0:03b5121a232e 532 }
pcercuei 0:03b5121a232e 533
pcercuei 0:03b5121a232e 534 return(xmlCurrentChar(ctxt, len));
pcercuei 0:03b5121a232e 535
pcercuei 0:03b5121a232e 536 encoding_error:
pcercuei 0:03b5121a232e 537 /*
pcercuei 0:03b5121a232e 538 * If we detect an UTF8 error that probably mean that the
pcercuei 0:03b5121a232e 539 * input encoding didn't get properly advertized in the
pcercuei 0:03b5121a232e 540 * declaration header. Report the error and switch the encoding
pcercuei 0:03b5121a232e 541 * to ISO-Latin-1 (if you don't like this policy, just declare the
pcercuei 0:03b5121a232e 542 * encoding !)
pcercuei 0:03b5121a232e 543 */
pcercuei 0:03b5121a232e 544 {
pcercuei 0:03b5121a232e 545 char buffer[150];
pcercuei 0:03b5121a232e 546
pcercuei 0:03b5121a232e 547 if (ctxt->input->end - ctxt->input->cur >= 4) {
pcercuei 0:03b5121a232e 548 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
pcercuei 0:03b5121a232e 549 ctxt->input->cur[0], ctxt->input->cur[1],
pcercuei 0:03b5121a232e 550 ctxt->input->cur[2], ctxt->input->cur[3]);
pcercuei 0:03b5121a232e 551 } else {
pcercuei 0:03b5121a232e 552 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
pcercuei 0:03b5121a232e 553 }
pcercuei 0:03b5121a232e 554 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
pcercuei 0:03b5121a232e 555 "Input is not proper UTF-8, indicate encoding !\n",
pcercuei 0:03b5121a232e 556 BAD_CAST buffer, NULL);
pcercuei 0:03b5121a232e 557 }
pcercuei 0:03b5121a232e 558
pcercuei 0:03b5121a232e 559 ctxt->charset = XML_CHAR_ENCODING_8859_1;
pcercuei 0:03b5121a232e 560 *len = 1;
pcercuei 0:03b5121a232e 561 return((int) *ctxt->input->cur);
pcercuei 0:03b5121a232e 562 }
pcercuei 0:03b5121a232e 563
pcercuei 0:03b5121a232e 564 /**
pcercuei 0:03b5121a232e 565 * htmlSkipBlankChars:
pcercuei 0:03b5121a232e 566 * @ctxt: the HTML parser context
pcercuei 0:03b5121a232e 567 *
pcercuei 0:03b5121a232e 568 * skip all blanks character found at that point in the input streams.
pcercuei 0:03b5121a232e 569 *
pcercuei 0:03b5121a232e 570 * Returns the number of space chars skipped
pcercuei 0:03b5121a232e 571 */
pcercuei 0:03b5121a232e 572
pcercuei 0:03b5121a232e 573 static int
pcercuei 0:03b5121a232e 574 htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 575 int res = 0;
pcercuei 0:03b5121a232e 576
pcercuei 0:03b5121a232e 577 while (IS_BLANK_CH(*(ctxt->input->cur))) {
pcercuei 0:03b5121a232e 578 if ((*ctxt->input->cur == 0) &&
pcercuei 0:03b5121a232e 579 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
pcercuei 0:03b5121a232e 580 xmlPopInput(ctxt);
pcercuei 0:03b5121a232e 581 } else {
pcercuei 0:03b5121a232e 582 if (*(ctxt->input->cur) == '\n') {
pcercuei 0:03b5121a232e 583 ctxt->input->line++; ctxt->input->col = 1;
pcercuei 0:03b5121a232e 584 } else ctxt->input->col++;
pcercuei 0:03b5121a232e 585 ctxt->input->cur++;
pcercuei 0:03b5121a232e 586 ctxt->nbChars++;
pcercuei 0:03b5121a232e 587 if (*ctxt->input->cur == 0)
pcercuei 0:03b5121a232e 588 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
pcercuei 0:03b5121a232e 589 }
pcercuei 0:03b5121a232e 590 res++;
pcercuei 0:03b5121a232e 591 }
pcercuei 0:03b5121a232e 592 return(res);
pcercuei 0:03b5121a232e 593 }
pcercuei 0:03b5121a232e 594
pcercuei 0:03b5121a232e 595
pcercuei 0:03b5121a232e 596
pcercuei 0:03b5121a232e 597 /************************************************************************
pcercuei 0:03b5121a232e 598 * *
pcercuei 0:03b5121a232e 599 * The list of HTML elements and their properties *
pcercuei 0:03b5121a232e 600 * *
pcercuei 0:03b5121a232e 601 ************************************************************************/
pcercuei 0:03b5121a232e 602
pcercuei 0:03b5121a232e 603 /*
pcercuei 0:03b5121a232e 604 * Start Tag: 1 means the start tag can be ommited
pcercuei 0:03b5121a232e 605 * End Tag: 1 means the end tag can be ommited
pcercuei 0:03b5121a232e 606 * 2 means it's forbidden (empty elements)
pcercuei 0:03b5121a232e 607 * 3 means the tag is stylistic and should be closed easily
pcercuei 0:03b5121a232e 608 * Depr: this element is deprecated
pcercuei 0:03b5121a232e 609 * DTD: 1 means that this element is valid only in the Loose DTD
pcercuei 0:03b5121a232e 610 * 2 means that this element is valid only in the Frameset DTD
pcercuei 0:03b5121a232e 611 *
pcercuei 0:03b5121a232e 612 * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description
pcercuei 0:03b5121a232e 613 , subElements , impliedsubelt , Attributes, userdata
pcercuei 0:03b5121a232e 614 */
pcercuei 0:03b5121a232e 615
pcercuei 0:03b5121a232e 616 /* Definitions and a couple of vars for HTML Elements */
pcercuei 0:03b5121a232e 617
pcercuei 0:03b5121a232e 618 #define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"
pcercuei 0:03b5121a232e 619 #define NB_FONTSTYLE 8
pcercuei 0:03b5121a232e 620 #define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
pcercuei 0:03b5121a232e 621 #define NB_PHRASE 10
pcercuei 0:03b5121a232e 622 #define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
pcercuei 0:03b5121a232e 623 #define NB_SPECIAL 16
pcercuei 0:03b5121a232e 624 #define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL
pcercuei 0:03b5121a232e 625 #define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTRL
pcercuei 0:03b5121a232e 626 #define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"
pcercuei 0:03b5121a232e 627 #define NB_BLOCK NB_HEADING + NB_LIST + 14
pcercuei 0:03b5121a232e 628 #define FORMCTRL "input", "select", "textarea", "label", "button"
pcercuei 0:03b5121a232e 629 #define NB_FORMCTRL 5
pcercuei 0:03b5121a232e 630 #define PCDATA
pcercuei 0:03b5121a232e 631 #define NB_PCDATA 0
pcercuei 0:03b5121a232e 632 #define HEADING "h1", "h2", "h3", "h4", "h5", "h6"
pcercuei 0:03b5121a232e 633 #define NB_HEADING 6
pcercuei 0:03b5121a232e 634 #define LIST "ul", "ol", "dir", "menu"
pcercuei 0:03b5121a232e 635 #define NB_LIST 4
pcercuei 0:03b5121a232e 636 #define MODIFIER
pcercuei 0:03b5121a232e 637 #define NB_MODIFIER 0
pcercuei 0:03b5121a232e 638 #define FLOW BLOCK,INLINE
pcercuei 0:03b5121a232e 639 #define NB_FLOW NB_BLOCK + NB_INLINE
pcercuei 0:03b5121a232e 640 #define EMPTY NULL
pcercuei 0:03b5121a232e 641
pcercuei 0:03b5121a232e 642
pcercuei 0:03b5121a232e 643 static const char* const html_flow[] = { FLOW, NULL } ;
pcercuei 0:03b5121a232e 644 static const char* const html_inline[] = { INLINE, NULL } ;
pcercuei 0:03b5121a232e 645
pcercuei 0:03b5121a232e 646 /* placeholders: elts with content but no subelements */
pcercuei 0:03b5121a232e 647 static const char* const html_pcdata[] = { NULL } ;
pcercuei 0:03b5121a232e 648 #define html_cdata html_pcdata
pcercuei 0:03b5121a232e 649
pcercuei 0:03b5121a232e 650
pcercuei 0:03b5121a232e 651 /* ... and for HTML Attributes */
pcercuei 0:03b5121a232e 652
pcercuei 0:03b5121a232e 653 #define COREATTRS "id", "class", "style", "title"
pcercuei 0:03b5121a232e 654 #define NB_COREATTRS 4
pcercuei 0:03b5121a232e 655 #define I18N "lang", "dir"
pcercuei 0:03b5121a232e 656 #define NB_I18N 2
pcercuei 0:03b5121a232e 657 #define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"
pcercuei 0:03b5121a232e 658 #define NB_EVENTS 9
pcercuei 0:03b5121a232e 659 #define ATTRS COREATTRS,I18N,EVENTS
pcercuei 0:03b5121a232e 660 #define NB_ATTRS NB_NB_COREATTRS + NB_I18N + NB_EVENTS
pcercuei 0:03b5121a232e 661 #define CELLHALIGN "align", "char", "charoff"
pcercuei 0:03b5121a232e 662 #define NB_CELLHALIGN 3
pcercuei 0:03b5121a232e 663 #define CELLVALIGN "valign"
pcercuei 0:03b5121a232e 664 #define NB_CELLVALIGN 1
pcercuei 0:03b5121a232e 665
pcercuei 0:03b5121a232e 666 static const char* const html_attrs[] = { ATTRS, NULL } ;
pcercuei 0:03b5121a232e 667 static const char* const core_i18n_attrs[] = { COREATTRS, I18N, NULL } ;
pcercuei 0:03b5121a232e 668 static const char* const core_attrs[] = { COREATTRS, NULL } ;
pcercuei 0:03b5121a232e 669 static const char* const i18n_attrs[] = { I18N, NULL } ;
pcercuei 0:03b5121a232e 670
pcercuei 0:03b5121a232e 671
pcercuei 0:03b5121a232e 672 /* Other declarations that should go inline ... */
pcercuei 0:03b5121a232e 673 static const char* const a_attrs[] = { ATTRS, "charset", "type", "name",
pcercuei 0:03b5121a232e 674 "href", "hreflang", "rel", "rev", "accesskey", "shape", "coords",
pcercuei 0:03b5121a232e 675 "tabindex", "onfocus", "onblur", NULL } ;
pcercuei 0:03b5121a232e 676 static const char* const target_attr[] = { "target", NULL } ;
pcercuei 0:03b5121a232e 677 static const char* const rows_cols_attr[] = { "rows", "cols", NULL } ;
pcercuei 0:03b5121a232e 678 static const char* const alt_attr[] = { "alt", NULL } ;
pcercuei 0:03b5121a232e 679 static const char* const src_alt_attrs[] = { "src", "alt", NULL } ;
pcercuei 0:03b5121a232e 680 static const char* const href_attrs[] = { "href", NULL } ;
pcercuei 0:03b5121a232e 681 static const char* const clear_attrs[] = { "clear", NULL } ;
pcercuei 0:03b5121a232e 682 static const char* const inline_p[] = { INLINE, "p", NULL } ;
pcercuei 0:03b5121a232e 683
pcercuei 0:03b5121a232e 684 static const char* const flow_param[] = { FLOW, "param", NULL } ;
pcercuei 0:03b5121a232e 685 static const char* const applet_attrs[] = { COREATTRS , "codebase",
pcercuei 0:03b5121a232e 686 "archive", "alt", "name", "height", "width", "align",
pcercuei 0:03b5121a232e 687 "hspace", "vspace", NULL } ;
pcercuei 0:03b5121a232e 688 static const char* const area_attrs[] = { "shape", "coords", "href", "nohref",
pcercuei 0:03b5121a232e 689 "tabindex", "accesskey", "onfocus", "onblur", NULL } ;
pcercuei 0:03b5121a232e 690 static const char* const basefont_attrs[] =
pcercuei 0:03b5121a232e 691 { "id", "size", "color", "face", NULL } ;
pcercuei 0:03b5121a232e 692 static const char* const quote_attrs[] = { ATTRS, "cite", NULL } ;
pcercuei 0:03b5121a232e 693 static const char* const body_contents[] = { FLOW, "ins", "del", NULL } ;
pcercuei 0:03b5121a232e 694 static const char* const body_attrs[] = { ATTRS, "onload", "onunload", NULL } ;
pcercuei 0:03b5121a232e 695 static const char* const body_depr[] = { "background", "bgcolor", "text",
pcercuei 0:03b5121a232e 696 "link", "vlink", "alink", NULL } ;
pcercuei 0:03b5121a232e 697 static const char* const button_attrs[] = { ATTRS, "name", "value", "type",
pcercuei 0:03b5121a232e 698 "disabled", "tabindex", "accesskey", "onfocus", "onblur", NULL } ;
pcercuei 0:03b5121a232e 699
pcercuei 0:03b5121a232e 700
pcercuei 0:03b5121a232e 701 static const char* const col_attrs[] = { ATTRS, "span", "width", CELLHALIGN, CELLVALIGN, NULL } ;
pcercuei 0:03b5121a232e 702 static const char* const col_elt[] = { "col", NULL } ;
pcercuei 0:03b5121a232e 703 static const char* const edit_attrs[] = { ATTRS, "datetime", "cite", NULL } ;
pcercuei 0:03b5121a232e 704 static const char* const compact_attrs[] = { ATTRS, "compact", NULL } ;
pcercuei 0:03b5121a232e 705 static const char* const dl_contents[] = { "dt", "dd", NULL } ;
pcercuei 0:03b5121a232e 706 static const char* const compact_attr[] = { "compact", NULL } ;
pcercuei 0:03b5121a232e 707 static const char* const label_attr[] = { "label", NULL } ;
pcercuei 0:03b5121a232e 708 static const char* const fieldset_contents[] = { FLOW, "legend" } ;
pcercuei 0:03b5121a232e 709 static const char* const font_attrs[] = { COREATTRS, I18N, "size", "color", "face" , NULL } ;
pcercuei 0:03b5121a232e 710 static const char* const form_contents[] = { HEADING, LIST, INLINE, "pre", "p", "div", "center", "noscript", "noframes", "blockquote", "isindex", "hr", "table", "fieldset", "address", NULL } ;
pcercuei 0:03b5121a232e 711 static const char* const form_attrs[] = { ATTRS, "method", "enctype", "accept", "name", "onsubmit", "onreset", "accept-charset", NULL } ;
pcercuei 0:03b5121a232e 712 static const char* const frame_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" , NULL } ;
pcercuei 0:03b5121a232e 713 static const char* const frameset_attrs[] = { COREATTRS, "rows", "cols", "onload", "onunload", NULL } ;
pcercuei 0:03b5121a232e 714 static const char* const frameset_contents[] = { "frameset", "frame", "noframes", NULL } ;
pcercuei 0:03b5121a232e 715 static const char* const head_attrs[] = { I18N, "profile", NULL } ;
pcercuei 0:03b5121a232e 716 static const char* const head_contents[] = { "title", "isindex", "base", "script", "style", "meta", "link", "object", NULL } ;
pcercuei 0:03b5121a232e 717 static const char* const hr_depr[] = { "align", "noshade", "size", "width", NULL } ;
pcercuei 0:03b5121a232e 718 static const char* const version_attr[] = { "version", NULL } ;
pcercuei 0:03b5121a232e 719 static const char* const html_content[] = { "head", "body", "frameset", NULL } ;
pcercuei 0:03b5121a232e 720 static const char* const iframe_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width", NULL } ;
pcercuei 0:03b5121a232e 721 static const char* const img_attrs[] = { ATTRS, "longdesc", "name", "height", "width", "usemap", "ismap", NULL } ;
pcercuei 0:03b5121a232e 722 static const char* const embed_attrs[] = { COREATTRS, "align", "alt", "border", "code", "codebase", "frameborder", "height", "hidden", "hspace", "name", "palette", "pluginspace", "pluginurl", "src", "type", "units", "vspace", "width", NULL } ;
pcercuei 0:03b5121a232e 723 static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "ismap", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accept", NULL } ;
pcercuei 0:03b5121a232e 724 static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ;
pcercuei 0:03b5121a232e 725 static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ;
pcercuei 0:03b5121a232e 726 static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ;
pcercuei 0:03b5121a232e 727 static const char* const align_attr[] = { "align", NULL } ;
pcercuei 0:03b5121a232e 728 static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ;
pcercuei 0:03b5121a232e 729 static const char* const map_contents[] = { BLOCK, "area", NULL } ;
pcercuei 0:03b5121a232e 730 static const char* const name_attr[] = { "name", NULL } ;
pcercuei 0:03b5121a232e 731 static const char* const action_attr[] = { "action", NULL } ;
pcercuei 0:03b5121a232e 732 static const char* const blockli_elt[] = { BLOCK, "li", NULL } ;
pcercuei 0:03b5121a232e 733 static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ;
pcercuei 0:03b5121a232e 734 static const char* const content_attr[] = { "content", NULL } ;
pcercuei 0:03b5121a232e 735 static const char* const type_attr[] = { "type", NULL } ;
pcercuei 0:03b5121a232e 736 static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;
pcercuei 0:03b5121a232e 737 static const char* const object_contents[] = { FLOW, "param", NULL } ;
pcercuei 0:03b5121a232e 738 static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codebase", "data", "type", "codetype", "archive", "standby", "height", "width", "usemap", "name", "tabindex", NULL } ;
pcercuei 0:03b5121a232e 739 static const char* const object_depr[] = { "align", "border", "hspace", "vspace", NULL } ;
pcercuei 0:03b5121a232e 740 static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ;
pcercuei 0:03b5121a232e 741 static const char* const option_elt[] = { "option", NULL } ;
pcercuei 0:03b5121a232e 742 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ;
pcercuei 0:03b5121a232e 743 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selected", "value", NULL } ;
pcercuei 0:03b5121a232e 744 static const char* const param_attrs[] = { "id", "value", "valuetype", "type", NULL } ;
pcercuei 0:03b5121a232e 745 static const char* const width_attr[] = { "width", NULL } ;
pcercuei 0:03b5121a232e 746 static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ;
pcercuei 0:03b5121a232e 747 static const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ;
pcercuei 0:03b5121a232e 748 static const char* const language_attr[] = { "language", NULL } ;
pcercuei 0:03b5121a232e 749 static const char* const select_content[] = { "optgroup", "option", NULL } ;
pcercuei 0:03b5121a232e 750 static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ;
pcercuei 0:03b5121a232e 751 static const char* const style_attrs[] = { I18N, "media", "title", NULL } ;
pcercuei 0:03b5121a232e 752 static const char* const table_attrs[] = { ATTRS, "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ;
pcercuei 0:03b5121a232e 753 static const char* const table_depr[] = { "align", "bgcolor", NULL } ;
pcercuei 0:03b5121a232e 754 static const char* const table_contents[] = { "caption", "col", "colgroup", "thead", "tfoot", "tbody", "tr", NULL} ;
pcercuei 0:03b5121a232e 755 static const char* const tr_elt[] = { "tr", NULL } ;
pcercuei 0:03b5121a232e 756 static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ;
pcercuei 0:03b5121a232e 757 static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height", NULL } ;
pcercuei 0:03b5121a232e 758 static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "scope", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ;
pcercuei 0:03b5121a232e 759 static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readonly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ;
pcercuei 0:03b5121a232e 760 static const char* const tr_contents[] = { "th", "td", NULL } ;
pcercuei 0:03b5121a232e 761 static const char* const bgcolor_attr[] = { "bgcolor", NULL } ;
pcercuei 0:03b5121a232e 762 static const char* const li_elt[] = { "li", NULL } ;
pcercuei 0:03b5121a232e 763 static const char* const ul_depr[] = { "type", "compact", NULL} ;
pcercuei 0:03b5121a232e 764 static const char* const dir_attr[] = { "dir", NULL} ;
pcercuei 0:03b5121a232e 765
pcercuei 0:03b5121a232e 766 #define DECL (const char**)
pcercuei 0:03b5121a232e 767
pcercuei 0:03b5121a232e 768 static const htmlElemDesc
pcercuei 0:03b5121a232e 769 html40ElementTable[] = {
pcercuei 0:03b5121a232e 770 { "a", 0, 0, 0, 0, 0, 0, 1, "anchor ",
pcercuei 0:03b5121a232e 771 DECL html_inline , NULL , DECL a_attrs , DECL target_attr, NULL
pcercuei 0:03b5121a232e 772 },
pcercuei 0:03b5121a232e 773 { "abbr", 0, 0, 0, 0, 0, 0, 1, "abbreviated form",
pcercuei 0:03b5121a232e 774 DECL html_inline , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 775 },
pcercuei 0:03b5121a232e 776 { "acronym", 0, 0, 0, 0, 0, 0, 1, "",
pcercuei 0:03b5121a232e 777 DECL html_inline , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 778 },
pcercuei 0:03b5121a232e 779 { "address", 0, 0, 0, 0, 0, 0, 0, "information on author ",
pcercuei 0:03b5121a232e 780 DECL inline_p , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 781 },
pcercuei 0:03b5121a232e 782 { "applet", 0, 0, 0, 0, 1, 1, 2, "java applet ",
pcercuei 0:03b5121a232e 783 DECL flow_param , NULL , NULL , DECL applet_attrs, NULL
pcercuei 0:03b5121a232e 784 },
pcercuei 0:03b5121a232e 785 { "area", 0, 2, 2, 1, 0, 0, 0, "client-side image map area ",
pcercuei 0:03b5121a232e 786 EMPTY , NULL , DECL area_attrs , DECL target_attr, DECL alt_attr
pcercuei 0:03b5121a232e 787 },
pcercuei 0:03b5121a232e 788 { "b", 0, 3, 0, 0, 0, 0, 1, "bold text style",
pcercuei 0:03b5121a232e 789 DECL html_inline , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 790 },
pcercuei 0:03b5121a232e 791 { "base", 0, 2, 2, 1, 0, 0, 0, "document base uri ",
pcercuei 0:03b5121a232e 792 EMPTY , NULL , NULL , DECL target_attr, DECL href_attrs
pcercuei 0:03b5121a232e 793 },
pcercuei 0:03b5121a232e 794 { "basefont", 0, 2, 2, 1, 1, 1, 1, "base font size " ,
pcercuei 0:03b5121a232e 795 EMPTY , NULL , NULL, DECL basefont_attrs, NULL
pcercuei 0:03b5121a232e 796 },
pcercuei 0:03b5121a232e 797 { "bdo", 0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride ",
pcercuei 0:03b5121a232e 798 DECL html_inline , NULL , DECL core_i18n_attrs, NULL, DECL dir_attr
pcercuei 0:03b5121a232e 799 },
pcercuei 0:03b5121a232e 800 { "big", 0, 3, 0, 0, 0, 0, 1, "large text style",
pcercuei 0:03b5121a232e 801 DECL html_inline , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 802 },
pcercuei 0:03b5121a232e 803 { "blockquote", 0, 0, 0, 0, 0, 0, 0, "long quotation ",
pcercuei 0:03b5121a232e 804 DECL html_flow , NULL , DECL quote_attrs , NULL, NULL
pcercuei 0:03b5121a232e 805 },
pcercuei 0:03b5121a232e 806 { "body", 1, 1, 0, 0, 0, 0, 0, "document body ",
pcercuei 0:03b5121a232e 807 DECL body_contents , "div" , DECL body_attrs, DECL body_depr, NULL
pcercuei 0:03b5121a232e 808 },
pcercuei 0:03b5121a232e 809 { "br", 0, 2, 2, 1, 0, 0, 1, "forced line break ",
pcercuei 0:03b5121a232e 810 EMPTY , NULL , DECL core_attrs, DECL clear_attrs , NULL
pcercuei 0:03b5121a232e 811 },
pcercuei 0:03b5121a232e 812 { "button", 0, 0, 0, 0, 0, 0, 2, "push button ",
pcercuei 0:03b5121a232e 813 DECL html_flow MODIFIER , NULL , DECL button_attrs, NULL, NULL
pcercuei 0:03b5121a232e 814 },
pcercuei 0:03b5121a232e 815 { "caption", 0, 0, 0, 0, 0, 0, 0, "table caption ",
pcercuei 0:03b5121a232e 816 DECL html_inline , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 817 },
pcercuei 0:03b5121a232e 818 { "center", 0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center ",
pcercuei 0:03b5121a232e 819 DECL html_flow , NULL , NULL, DECL html_attrs, NULL
pcercuei 0:03b5121a232e 820 },
pcercuei 0:03b5121a232e 821 { "cite", 0, 0, 0, 0, 0, 0, 1, "citation",
pcercuei 0:03b5121a232e 822 DECL html_inline , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 823 },
pcercuei 0:03b5121a232e 824 { "code", 0, 0, 0, 0, 0, 0, 1, "computer code fragment",
pcercuei 0:03b5121a232e 825 DECL html_inline , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 826 },
pcercuei 0:03b5121a232e 827 { "col", 0, 2, 2, 1, 0, 0, 0, "table column ",
pcercuei 0:03b5121a232e 828 EMPTY , NULL , DECL col_attrs , NULL, NULL
pcercuei 0:03b5121a232e 829 },
pcercuei 0:03b5121a232e 830 { "colgroup", 0, 1, 0, 0, 0, 0, 0, "table column group ",
pcercuei 0:03b5121a232e 831 DECL col_elt , "col" , DECL col_attrs , NULL, NULL
pcercuei 0:03b5121a232e 832 },
pcercuei 0:03b5121a232e 833 { "dd", 0, 1, 0, 0, 0, 0, 0, "definition description ",
pcercuei 0:03b5121a232e 834 DECL html_flow , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 835 },
pcercuei 0:03b5121a232e 836 { "del", 0, 0, 0, 0, 0, 0, 2, "deleted text ",
pcercuei 0:03b5121a232e 837 DECL html_flow , NULL , DECL edit_attrs , NULL, NULL
pcercuei 0:03b5121a232e 838 },
pcercuei 0:03b5121a232e 839 { "dfn", 0, 0, 0, 0, 0, 0, 1, "instance definition",
pcercuei 0:03b5121a232e 840 DECL html_inline , NULL , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 841 },
pcercuei 0:03b5121a232e 842 { "dir", 0, 0, 0, 0, 1, 1, 0, "directory list",
pcercuei 0:03b5121a232e 843 DECL blockli_elt, "li" , NULL, DECL compact_attrs, NULL
pcercuei 0:03b5121a232e 844 },
pcercuei 0:03b5121a232e 845 { "div", 0, 0, 0, 0, 0, 0, 0, "generic language/style container",
pcercuei 0:03b5121a232e 846 DECL html_flow, NULL, DECL html_attrs, DECL align_attr, NULL
pcercuei 0:03b5121a232e 847 },
pcercuei 0:03b5121a232e 848 { "dl", 0, 0, 0, 0, 0, 0, 0, "definition list ",
pcercuei 0:03b5121a232e 849 DECL dl_contents , "dd" , DECL html_attrs, DECL compact_attr, NULL
pcercuei 0:03b5121a232e 850 },
pcercuei 0:03b5121a232e 851 { "dt", 0, 1, 0, 0, 0, 0, 0, "definition term ",
pcercuei 0:03b5121a232e 852 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 853 },
pcercuei 0:03b5121a232e 854 { "em", 0, 3, 0, 0, 0, 0, 1, "emphasis",
pcercuei 0:03b5121a232e 855 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 856 },
pcercuei 0:03b5121a232e 857 { "embed", 0, 1, 0, 0, 1, 1, 1, "generic embedded object ",
pcercuei 0:03b5121a232e 858 EMPTY, NULL, DECL embed_attrs, NULL, NULL
pcercuei 0:03b5121a232e 859 },
pcercuei 0:03b5121a232e 860 { "fieldset", 0, 0, 0, 0, 0, 0, 0, "form control group ",
pcercuei 0:03b5121a232e 861 DECL fieldset_contents , NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 862 },
pcercuei 0:03b5121a232e 863 { "font", 0, 3, 0, 0, 1, 1, 1, "local change to font ",
pcercuei 0:03b5121a232e 864 DECL html_inline, NULL, NULL, DECL font_attrs, NULL
pcercuei 0:03b5121a232e 865 },
pcercuei 0:03b5121a232e 866 { "form", 0, 0, 0, 0, 0, 0, 0, "interactive form ",
pcercuei 0:03b5121a232e 867 DECL form_contents, "fieldset", DECL form_attrs , DECL target_attr, DECL action_attr
pcercuei 0:03b5121a232e 868 },
pcercuei 0:03b5121a232e 869 { "frame", 0, 2, 2, 1, 0, 2, 0, "subwindow " ,
pcercuei 0:03b5121a232e 870 EMPTY, NULL, NULL, DECL frame_attrs, NULL
pcercuei 0:03b5121a232e 871 },
pcercuei 0:03b5121a232e 872 { "frameset", 0, 0, 0, 0, 0, 2, 0, "window subdivision" ,
pcercuei 0:03b5121a232e 873 DECL frameset_contents, "noframes" , NULL , DECL frameset_attrs, NULL
pcercuei 0:03b5121a232e 874 },
pcercuei 0:03b5121a232e 875 { "h1", 0, 0, 0, 0, 0, 0, 0, "heading ",
pcercuei 0:03b5121a232e 876 DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
pcercuei 0:03b5121a232e 877 },
pcercuei 0:03b5121a232e 878 { "h2", 0, 0, 0, 0, 0, 0, 0, "heading ",
pcercuei 0:03b5121a232e 879 DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
pcercuei 0:03b5121a232e 880 },
pcercuei 0:03b5121a232e 881 { "h3", 0, 0, 0, 0, 0, 0, 0, "heading ",
pcercuei 0:03b5121a232e 882 DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
pcercuei 0:03b5121a232e 883 },
pcercuei 0:03b5121a232e 884 { "h4", 0, 0, 0, 0, 0, 0, 0, "heading ",
pcercuei 0:03b5121a232e 885 DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
pcercuei 0:03b5121a232e 886 },
pcercuei 0:03b5121a232e 887 { "h5", 0, 0, 0, 0, 0, 0, 0, "heading ",
pcercuei 0:03b5121a232e 888 DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
pcercuei 0:03b5121a232e 889 },
pcercuei 0:03b5121a232e 890 { "h6", 0, 0, 0, 0, 0, 0, 0, "heading ",
pcercuei 0:03b5121a232e 891 DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
pcercuei 0:03b5121a232e 892 },
pcercuei 0:03b5121a232e 893 { "head", 1, 1, 0, 0, 0, 0, 0, "document head ",
pcercuei 0:03b5121a232e 894 DECL head_contents, NULL, DECL head_attrs, NULL, NULL
pcercuei 0:03b5121a232e 895 },
pcercuei 0:03b5121a232e 896 { "hr", 0, 2, 2, 1, 0, 0, 0, "horizontal rule " ,
pcercuei 0:03b5121a232e 897 EMPTY, NULL, DECL html_attrs, DECL hr_depr, NULL
pcercuei 0:03b5121a232e 898 },
pcercuei 0:03b5121a232e 899 { "html", 1, 1, 0, 0, 0, 0, 0, "document root element ",
pcercuei 0:03b5121a232e 900 DECL html_content , NULL , DECL i18n_attrs, DECL version_attr, NULL
pcercuei 0:03b5121a232e 901 },
pcercuei 0:03b5121a232e 902 { "i", 0, 3, 0, 0, 0, 0, 1, "italic text style",
pcercuei 0:03b5121a232e 903 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 904 },
pcercuei 0:03b5121a232e 905 { "iframe", 0, 0, 0, 0, 0, 1, 2, "inline subwindow ",
pcercuei 0:03b5121a232e 906 DECL html_flow, NULL, NULL, DECL iframe_attrs, NULL
pcercuei 0:03b5121a232e 907 },
pcercuei 0:03b5121a232e 908 { "img", 0, 2, 2, 1, 0, 0, 1, "embedded image ",
pcercuei 0:03b5121a232e 909 EMPTY, NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs
pcercuei 0:03b5121a232e 910 },
pcercuei 0:03b5121a232e 911 { "input", 0, 2, 2, 1, 0, 0, 1, "form control ",
pcercuei 0:03b5121a232e 912 EMPTY, NULL, DECL input_attrs , DECL align_attr, NULL
pcercuei 0:03b5121a232e 913 },
pcercuei 0:03b5121a232e 914 { "ins", 0, 0, 0, 0, 0, 0, 2, "inserted text",
pcercuei 0:03b5121a232e 915 DECL html_flow, NULL, DECL edit_attrs, NULL, NULL
pcercuei 0:03b5121a232e 916 },
pcercuei 0:03b5121a232e 917 { "isindex", 0, 2, 2, 1, 1, 1, 0, "single line prompt ",
pcercuei 0:03b5121a232e 918 EMPTY, NULL, NULL, DECL prompt_attrs, NULL
pcercuei 0:03b5121a232e 919 },
pcercuei 0:03b5121a232e 920 { "kbd", 0, 0, 0, 0, 0, 0, 1, "text to be entered by the user",
pcercuei 0:03b5121a232e 921 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 922 },
pcercuei 0:03b5121a232e 923 { "label", 0, 0, 0, 0, 0, 0, 1, "form field label text ",
pcercuei 0:03b5121a232e 924 DECL html_inline MODIFIER, NULL, DECL label_attrs , NULL, NULL
pcercuei 0:03b5121a232e 925 },
pcercuei 0:03b5121a232e 926 { "legend", 0, 0, 0, 0, 0, 0, 0, "fieldset legend ",
pcercuei 0:03b5121a232e 927 DECL html_inline, NULL, DECL legend_attrs , DECL align_attr, NULL
pcercuei 0:03b5121a232e 928 },
pcercuei 0:03b5121a232e 929 { "li", 0, 1, 1, 0, 0, 0, 0, "list item ",
pcercuei 0:03b5121a232e 930 DECL html_flow, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 931 },
pcercuei 0:03b5121a232e 932 { "link", 0, 2, 2, 1, 0, 0, 0, "a media-independent link ",
pcercuei 0:03b5121a232e 933 EMPTY, NULL, DECL link_attrs, DECL target_attr, NULL
pcercuei 0:03b5121a232e 934 },
pcercuei 0:03b5121a232e 935 { "map", 0, 0, 0, 0, 0, 0, 2, "client-side image map ",
pcercuei 0:03b5121a232e 936 DECL map_contents , NULL, DECL html_attrs , NULL, DECL name_attr
pcercuei 0:03b5121a232e 937 },
pcercuei 0:03b5121a232e 938 { "menu", 0, 0, 0, 0, 1, 1, 0, "menu list ",
pcercuei 0:03b5121a232e 939 DECL blockli_elt , NULL, NULL, DECL compact_attrs, NULL
pcercuei 0:03b5121a232e 940 },
pcercuei 0:03b5121a232e 941 { "meta", 0, 2, 2, 1, 0, 0, 0, "generic metainformation ",
pcercuei 0:03b5121a232e 942 EMPTY, NULL, DECL meta_attrs , NULL , DECL content_attr
pcercuei 0:03b5121a232e 943 },
pcercuei 0:03b5121a232e 944 { "noframes", 0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ",
pcercuei 0:03b5121a232e 945 DECL noframes_content, "body" , DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 946 },
pcercuei 0:03b5121a232e 947 { "noscript", 0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ",
pcercuei 0:03b5121a232e 948 DECL html_flow, "div", DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 949 },
pcercuei 0:03b5121a232e 950 { "object", 0, 0, 0, 0, 0, 0, 2, "generic embedded object ",
pcercuei 0:03b5121a232e 951 DECL object_contents , "div" , DECL object_attrs, DECL object_depr, NULL
pcercuei 0:03b5121a232e 952 },
pcercuei 0:03b5121a232e 953 { "ol", 0, 0, 0, 0, 0, 0, 0, "ordered list ",
pcercuei 0:03b5121a232e 954 DECL li_elt , "li" , DECL html_attrs, DECL ol_attrs, NULL
pcercuei 0:03b5121a232e 955 },
pcercuei 0:03b5121a232e 956 { "optgroup", 0, 0, 0, 0, 0, 0, 0, "option group ",
pcercuei 0:03b5121a232e 957 DECL option_elt , "option", DECL optgroup_attrs, NULL, DECL label_attr
pcercuei 0:03b5121a232e 958 },
pcercuei 0:03b5121a232e 959 { "option", 0, 1, 0, 0, 0, 0, 0, "selectable choice " ,
pcercuei 0:03b5121a232e 960 DECL html_pcdata, NULL, DECL option_attrs, NULL, NULL
pcercuei 0:03b5121a232e 961 },
pcercuei 0:03b5121a232e 962 { "p", 0, 1, 0, 0, 0, 0, 0, "paragraph ",
pcercuei 0:03b5121a232e 963 DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
pcercuei 0:03b5121a232e 964 },
pcercuei 0:03b5121a232e 965 { "param", 0, 2, 2, 1, 0, 0, 0, "named property value ",
pcercuei 0:03b5121a232e 966 EMPTY, NULL, DECL param_attrs, NULL, DECL name_attr
pcercuei 0:03b5121a232e 967 },
pcercuei 0:03b5121a232e 968 { "pre", 0, 0, 0, 0, 0, 0, 0, "preformatted text ",
pcercuei 0:03b5121a232e 969 DECL pre_content, NULL, DECL html_attrs, DECL width_attr, NULL
pcercuei 0:03b5121a232e 970 },
pcercuei 0:03b5121a232e 971 { "q", 0, 0, 0, 0, 0, 0, 1, "short inline quotation ",
pcercuei 0:03b5121a232e 972 DECL html_inline, NULL, DECL quote_attrs, NULL, NULL
pcercuei 0:03b5121a232e 973 },
pcercuei 0:03b5121a232e 974 { "s", 0, 3, 0, 0, 1, 1, 1, "strike-through text style",
pcercuei 0:03b5121a232e 975 DECL html_inline, NULL, NULL, DECL html_attrs, NULL
pcercuei 0:03b5121a232e 976 },
pcercuei 0:03b5121a232e 977 { "samp", 0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.",
pcercuei 0:03b5121a232e 978 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 979 },
pcercuei 0:03b5121a232e 980 { "script", 0, 0, 0, 0, 0, 0, 2, "script statements ",
pcercuei 0:03b5121a232e 981 DECL html_cdata, NULL, DECL script_attrs, DECL language_attr, DECL type_attr
pcercuei 0:03b5121a232e 982 },
pcercuei 0:03b5121a232e 983 { "select", 0, 0, 0, 0, 0, 0, 1, "option selector ",
pcercuei 0:03b5121a232e 984 DECL select_content, NULL, DECL select_attrs, NULL, NULL
pcercuei 0:03b5121a232e 985 },
pcercuei 0:03b5121a232e 986 { "small", 0, 3, 0, 0, 0, 0, 1, "small text style",
pcercuei 0:03b5121a232e 987 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 988 },
pcercuei 0:03b5121a232e 989 { "span", 0, 0, 0, 0, 0, 0, 1, "generic language/style container ",
pcercuei 0:03b5121a232e 990 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 991 },
pcercuei 0:03b5121a232e 992 { "strike", 0, 3, 0, 0, 1, 1, 1, "strike-through text",
pcercuei 0:03b5121a232e 993 DECL html_inline, NULL, NULL, DECL html_attrs, NULL
pcercuei 0:03b5121a232e 994 },
pcercuei 0:03b5121a232e 995 { "strong", 0, 3, 0, 0, 0, 0, 1, "strong emphasis",
pcercuei 0:03b5121a232e 996 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 997 },
pcercuei 0:03b5121a232e 998 { "style", 0, 0, 0, 0, 0, 0, 0, "style info ",
pcercuei 0:03b5121a232e 999 DECL html_cdata, NULL, DECL style_attrs, NULL, DECL type_attr
pcercuei 0:03b5121a232e 1000 },
pcercuei 0:03b5121a232e 1001 { "sub", 0, 3, 0, 0, 0, 0, 1, "subscript",
pcercuei 0:03b5121a232e 1002 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 1003 },
pcercuei 0:03b5121a232e 1004 { "sup", 0, 3, 0, 0, 0, 0, 1, "superscript ",
pcercuei 0:03b5121a232e 1005 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 1006 },
pcercuei 0:03b5121a232e 1007 { "table", 0, 0, 0, 0, 0, 0, 0, "",
pcercuei 0:03b5121a232e 1008 DECL table_contents , "tr" , DECL table_attrs , DECL table_depr, NULL
pcercuei 0:03b5121a232e 1009 },
pcercuei 0:03b5121a232e 1010 { "tbody", 1, 0, 0, 0, 0, 0, 0, "table body ",
pcercuei 0:03b5121a232e 1011 DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
pcercuei 0:03b5121a232e 1012 },
pcercuei 0:03b5121a232e 1013 { "td", 0, 0, 0, 0, 0, 0, 0, "table data cell",
pcercuei 0:03b5121a232e 1014 DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
pcercuei 0:03b5121a232e 1015 },
pcercuei 0:03b5121a232e 1016 { "textarea", 0, 0, 0, 0, 0, 0, 1, "multi-line text field ",
pcercuei 0:03b5121a232e 1017 DECL html_pcdata, NULL, DECL textarea_attrs, NULL, DECL rows_cols_attr
pcercuei 0:03b5121a232e 1018 },
pcercuei 0:03b5121a232e 1019 { "tfoot", 0, 1, 0, 0, 0, 0, 0, "table footer ",
pcercuei 0:03b5121a232e 1020 DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
pcercuei 0:03b5121a232e 1021 },
pcercuei 0:03b5121a232e 1022 { "th", 0, 1, 0, 0, 0, 0, 0, "table header cell",
pcercuei 0:03b5121a232e 1023 DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
pcercuei 0:03b5121a232e 1024 },
pcercuei 0:03b5121a232e 1025 { "thead", 0, 1, 0, 0, 0, 0, 0, "table header ",
pcercuei 0:03b5121a232e 1026 DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
pcercuei 0:03b5121a232e 1027 },
pcercuei 0:03b5121a232e 1028 { "title", 0, 0, 0, 0, 0, 0, 0, "document title ",
pcercuei 0:03b5121a232e 1029 DECL html_pcdata, NULL, DECL i18n_attrs, NULL, NULL
pcercuei 0:03b5121a232e 1030 },
pcercuei 0:03b5121a232e 1031 { "tr", 0, 0, 0, 0, 0, 0, 0, "table row ",
pcercuei 0:03b5121a232e 1032 DECL tr_contents , "td" , DECL talign_attrs, DECL bgcolor_attr, NULL
pcercuei 0:03b5121a232e 1033 },
pcercuei 0:03b5121a232e 1034 { "tt", 0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style",
pcercuei 0:03b5121a232e 1035 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 1036 },
pcercuei 0:03b5121a232e 1037 { "u", 0, 3, 0, 0, 1, 1, 1, "underlined text style",
pcercuei 0:03b5121a232e 1038 DECL html_inline, NULL, NULL, DECL html_attrs, NULL
pcercuei 0:03b5121a232e 1039 },
pcercuei 0:03b5121a232e 1040 { "ul", 0, 0, 0, 0, 0, 0, 0, "unordered list ",
pcercuei 0:03b5121a232e 1041 DECL li_elt , "li" , DECL html_attrs, DECL ul_depr, NULL
pcercuei 0:03b5121a232e 1042 },
pcercuei 0:03b5121a232e 1043 { "var", 0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument",
pcercuei 0:03b5121a232e 1044 DECL html_inline, NULL, DECL html_attrs, NULL, NULL
pcercuei 0:03b5121a232e 1045 }
pcercuei 0:03b5121a232e 1046 };
pcercuei 0:03b5121a232e 1047
pcercuei 0:03b5121a232e 1048 /*
pcercuei 0:03b5121a232e 1049 * start tags that imply the end of current element
pcercuei 0:03b5121a232e 1050 */
pcercuei 0:03b5121a232e 1051 static const char * const htmlStartClose[] = {
pcercuei 0:03b5121a232e 1052 "form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",
pcercuei 0:03b5121a232e 1053 "dl", "ul", "ol", "menu", "dir", "address", "pre",
pcercuei 0:03b5121a232e 1054 "listing", "xmp", "head", NULL,
pcercuei 0:03b5121a232e 1055 "head", "p", NULL,
pcercuei 0:03b5121a232e 1056 "title", "p", NULL,
pcercuei 0:03b5121a232e 1057 "body", "head", "style", "link", "title", "p", NULL,
pcercuei 0:03b5121a232e 1058 "frameset", "head", "style", "link", "title", "p", NULL,
pcercuei 0:03b5121a232e 1059 "li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",
pcercuei 0:03b5121a232e 1060 "pre", "listing", "xmp", "head", "li", NULL,
pcercuei 0:03b5121a232e 1061 "hr", "p", "head", NULL,
pcercuei 0:03b5121a232e 1062 "h1", "p", "head", NULL,
pcercuei 0:03b5121a232e 1063 "h2", "p", "head", NULL,
pcercuei 0:03b5121a232e 1064 "h3", "p", "head", NULL,
pcercuei 0:03b5121a232e 1065 "h4", "p", "head", NULL,
pcercuei 0:03b5121a232e 1066 "h5", "p", "head", NULL,
pcercuei 0:03b5121a232e 1067 "h6", "p", "head", NULL,
pcercuei 0:03b5121a232e 1068 "dir", "p", "head", NULL,
pcercuei 0:03b5121a232e 1069 "address", "p", "head", "ul", NULL,
pcercuei 0:03b5121a232e 1070 "pre", "p", "head", "ul", NULL,
pcercuei 0:03b5121a232e 1071 "listing", "p", "head", NULL,
pcercuei 0:03b5121a232e 1072 "xmp", "p", "head", NULL,
pcercuei 0:03b5121a232e 1073 "blockquote", "p", "head", NULL,
pcercuei 0:03b5121a232e 1074 "dl", "p", "dt", "menu", "dir", "address", "pre", "listing",
pcercuei 0:03b5121a232e 1075 "xmp", "head", NULL,
pcercuei 0:03b5121a232e 1076 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp",
pcercuei 0:03b5121a232e 1077 "head", "dd", NULL,
pcercuei 0:03b5121a232e 1078 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp",
pcercuei 0:03b5121a232e 1079 "head", "dt", NULL,
pcercuei 0:03b5121a232e 1080 "ul", "p", "head", "ol", "menu", "dir", "address", "pre",
pcercuei 0:03b5121a232e 1081 "listing", "xmp", NULL,
pcercuei 0:03b5121a232e 1082 "ol", "p", "head", "ul", NULL,
pcercuei 0:03b5121a232e 1083 "menu", "p", "head", "ul", NULL,
pcercuei 0:03b5121a232e 1084 "p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL,
pcercuei 0:03b5121a232e 1085 "div", "p", "head", NULL,
pcercuei 0:03b5121a232e 1086 "noscript", "p", NULL,
pcercuei 0:03b5121a232e 1087 "center", "font", "b", "i", "p", "head", NULL,
pcercuei 0:03b5121a232e 1088 "a", "a", "head", NULL,
pcercuei 0:03b5121a232e 1089 "caption", "p", NULL,
pcercuei 0:03b5121a232e 1090 "colgroup", "caption", "colgroup", "col", "p", NULL,
pcercuei 0:03b5121a232e 1091 "col", "caption", "col", "p", NULL,
pcercuei 0:03b5121a232e 1092 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",
pcercuei 0:03b5121a232e 1093 "listing", "xmp", "a", NULL,
pcercuei 0:03b5121a232e 1094 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
pcercuei 0:03b5121a232e 1095 "td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
pcercuei 0:03b5121a232e 1096 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL,
pcercuei 0:03b5121a232e 1097 "thead", "caption", "col", "colgroup", NULL,
pcercuei 0:03b5121a232e 1098 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead",
pcercuei 0:03b5121a232e 1099 "tbody", "p", NULL,
pcercuei 0:03b5121a232e 1100 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead",
pcercuei 0:03b5121a232e 1101 "tfoot", "tbody", "p", NULL,
pcercuei 0:03b5121a232e 1102 "optgroup", "option", NULL,
pcercuei 0:03b5121a232e 1103 "option", "option", NULL,
pcercuei 0:03b5121a232e 1104 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
pcercuei 0:03b5121a232e 1105 "pre", "listing", "xmp", "a", NULL,
pcercuei 0:03b5121a232e 1106 /* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */
pcercuei 0:03b5121a232e 1107 "tt", "head", NULL,
pcercuei 0:03b5121a232e 1108 "i", "head", NULL,
pcercuei 0:03b5121a232e 1109 "b", "head", NULL,
pcercuei 0:03b5121a232e 1110 "u", "head", NULL,
pcercuei 0:03b5121a232e 1111 "s", "head", NULL,
pcercuei 0:03b5121a232e 1112 "strike", "head", NULL,
pcercuei 0:03b5121a232e 1113 "big", "head", NULL,
pcercuei 0:03b5121a232e 1114 "small", "head", NULL,
pcercuei 0:03b5121a232e 1115
pcercuei 0:03b5121a232e 1116 "em", "head", NULL,
pcercuei 0:03b5121a232e 1117 "strong", "head", NULL,
pcercuei 0:03b5121a232e 1118 "dfn", "head", NULL,
pcercuei 0:03b5121a232e 1119 "code", "head", NULL,
pcercuei 0:03b5121a232e 1120 "samp", "head", NULL,
pcercuei 0:03b5121a232e 1121 "kbd", "head", NULL,
pcercuei 0:03b5121a232e 1122 "var", "head", NULL,
pcercuei 0:03b5121a232e 1123 "cite", "head", NULL,
pcercuei 0:03b5121a232e 1124 "abbr", "head", NULL,
pcercuei 0:03b5121a232e 1125 "acronym", "head", NULL,
pcercuei 0:03b5121a232e 1126
pcercuei 0:03b5121a232e 1127 /* "a" */
pcercuei 0:03b5121a232e 1128 "img", "head", NULL,
pcercuei 0:03b5121a232e 1129 /* "applet" */
pcercuei 0:03b5121a232e 1130 /* "embed" */
pcercuei 0:03b5121a232e 1131 /* "object" */
pcercuei 0:03b5121a232e 1132 "font", "head", NULL,
pcercuei 0:03b5121a232e 1133 /* "basefont" */
pcercuei 0:03b5121a232e 1134 "br", "head", NULL,
pcercuei 0:03b5121a232e 1135 /* "script" */
pcercuei 0:03b5121a232e 1136 "map", "head", NULL,
pcercuei 0:03b5121a232e 1137 "q", "head", NULL,
pcercuei 0:03b5121a232e 1138 "sub", "head", NULL,
pcercuei 0:03b5121a232e 1139 "sup", "head", NULL,
pcercuei 0:03b5121a232e 1140 "span", "head", NULL,
pcercuei 0:03b5121a232e 1141 "bdo", "head", NULL,
pcercuei 0:03b5121a232e 1142 "iframe", "head", NULL,
pcercuei 0:03b5121a232e 1143 NULL
pcercuei 0:03b5121a232e 1144 };
pcercuei 0:03b5121a232e 1145
pcercuei 0:03b5121a232e 1146 /*
pcercuei 0:03b5121a232e 1147 * The list of HTML elements which are supposed not to have
pcercuei 0:03b5121a232e 1148 * CDATA content and where a p element will be implied
pcercuei 0:03b5121a232e 1149 *
pcercuei 0:03b5121a232e 1150 * TODO: extend that list by reading the HTML SGML DTD on
pcercuei 0:03b5121a232e 1151 * implied paragraph
pcercuei 0:03b5121a232e 1152 */
pcercuei 0:03b5121a232e 1153 static const char *const htmlNoContentElements[] = {
pcercuei 0:03b5121a232e 1154 "html",
pcercuei 0:03b5121a232e 1155 "head",
pcercuei 0:03b5121a232e 1156 NULL
pcercuei 0:03b5121a232e 1157 };
pcercuei 0:03b5121a232e 1158
pcercuei 0:03b5121a232e 1159 /*
pcercuei 0:03b5121a232e 1160 * The list of HTML attributes which are of content %Script;
pcercuei 0:03b5121a232e 1161 * NOTE: when adding ones, check htmlIsScriptAttribute() since
pcercuei 0:03b5121a232e 1162 * it assumes the name starts with 'on'
pcercuei 0:03b5121a232e 1163 */
pcercuei 0:03b5121a232e 1164 static const char *const htmlScriptAttributes[] = {
pcercuei 0:03b5121a232e 1165 "onclick",
pcercuei 0:03b5121a232e 1166 "ondblclick",
pcercuei 0:03b5121a232e 1167 "onmousedown",
pcercuei 0:03b5121a232e 1168 "onmouseup",
pcercuei 0:03b5121a232e 1169 "onmouseover",
pcercuei 0:03b5121a232e 1170 "onmousemove",
pcercuei 0:03b5121a232e 1171 "onmouseout",
pcercuei 0:03b5121a232e 1172 "onkeypress",
pcercuei 0:03b5121a232e 1173 "onkeydown",
pcercuei 0:03b5121a232e 1174 "onkeyup",
pcercuei 0:03b5121a232e 1175 "onload",
pcercuei 0:03b5121a232e 1176 "onunload",
pcercuei 0:03b5121a232e 1177 "onfocus",
pcercuei 0:03b5121a232e 1178 "onblur",
pcercuei 0:03b5121a232e 1179 "onsubmit",
pcercuei 0:03b5121a232e 1180 "onreset",
pcercuei 0:03b5121a232e 1181 "onchange",
pcercuei 0:03b5121a232e 1182 "onselect"
pcercuei 0:03b5121a232e 1183 };
pcercuei 0:03b5121a232e 1184
pcercuei 0:03b5121a232e 1185 /*
pcercuei 0:03b5121a232e 1186 * This table is used by the htmlparser to know what to do with
pcercuei 0:03b5121a232e 1187 * broken html pages. By assigning different priorities to different
pcercuei 0:03b5121a232e 1188 * elements the parser can decide how to handle extra endtags.
pcercuei 0:03b5121a232e 1189 * Endtags are only allowed to close elements with lower or equal
pcercuei 0:03b5121a232e 1190 * priority.
pcercuei 0:03b5121a232e 1191 */
pcercuei 0:03b5121a232e 1192
pcercuei 0:03b5121a232e 1193 typedef struct {
pcercuei 0:03b5121a232e 1194 const char *name;
pcercuei 0:03b5121a232e 1195 int priority;
pcercuei 0:03b5121a232e 1196 } elementPriority;
pcercuei 0:03b5121a232e 1197
pcercuei 0:03b5121a232e 1198 static const elementPriority htmlEndPriority[] = {
pcercuei 0:03b5121a232e 1199 {"div", 150},
pcercuei 0:03b5121a232e 1200 {"td", 160},
pcercuei 0:03b5121a232e 1201 {"th", 160},
pcercuei 0:03b5121a232e 1202 {"tr", 170},
pcercuei 0:03b5121a232e 1203 {"thead", 180},
pcercuei 0:03b5121a232e 1204 {"tbody", 180},
pcercuei 0:03b5121a232e 1205 {"tfoot", 180},
pcercuei 0:03b5121a232e 1206 {"table", 190},
pcercuei 0:03b5121a232e 1207 {"head", 200},
pcercuei 0:03b5121a232e 1208 {"body", 200},
pcercuei 0:03b5121a232e 1209 {"html", 220},
pcercuei 0:03b5121a232e 1210 {NULL, 100} /* Default priority */
pcercuei 0:03b5121a232e 1211 };
pcercuei 0:03b5121a232e 1212
pcercuei 0:03b5121a232e 1213 static const char** htmlStartCloseIndex[100];
pcercuei 0:03b5121a232e 1214 static int htmlStartCloseIndexinitialized = 0;
pcercuei 0:03b5121a232e 1215
pcercuei 0:03b5121a232e 1216 /************************************************************************
pcercuei 0:03b5121a232e 1217 * *
pcercuei 0:03b5121a232e 1218 * functions to handle HTML specific data *
pcercuei 0:03b5121a232e 1219 * *
pcercuei 0:03b5121a232e 1220 ************************************************************************/
pcercuei 0:03b5121a232e 1221
pcercuei 0:03b5121a232e 1222 /**
pcercuei 0:03b5121a232e 1223 * htmlInitAutoClose:
pcercuei 0:03b5121a232e 1224 *
pcercuei 0:03b5121a232e 1225 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
pcercuei 0:03b5121a232e 1226 * This is not reentrant. Call xmlInitParser() once before processing in
pcercuei 0:03b5121a232e 1227 * case of use in multithreaded programs.
pcercuei 0:03b5121a232e 1228 */
pcercuei 0:03b5121a232e 1229 void
pcercuei 0:03b5121a232e 1230 htmlInitAutoClose(void) {
pcercuei 0:03b5121a232e 1231 int indx, i = 0;
pcercuei 0:03b5121a232e 1232
pcercuei 0:03b5121a232e 1233 if (htmlStartCloseIndexinitialized) return;
pcercuei 0:03b5121a232e 1234
pcercuei 0:03b5121a232e 1235 for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL;
pcercuei 0:03b5121a232e 1236 indx = 0;
pcercuei 0:03b5121a232e 1237 while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) {
pcercuei 0:03b5121a232e 1238 htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i];
pcercuei 0:03b5121a232e 1239 while (htmlStartClose[i] != NULL) i++;
pcercuei 0:03b5121a232e 1240 i++;
pcercuei 0:03b5121a232e 1241 }
pcercuei 0:03b5121a232e 1242 htmlStartCloseIndexinitialized = 1;
pcercuei 0:03b5121a232e 1243 }
pcercuei 0:03b5121a232e 1244
pcercuei 0:03b5121a232e 1245 /**
pcercuei 0:03b5121a232e 1246 * htmlTagLookup:
pcercuei 0:03b5121a232e 1247 * @tag: The tag name in lowercase
pcercuei 0:03b5121a232e 1248 *
pcercuei 0:03b5121a232e 1249 * Lookup the HTML tag in the ElementTable
pcercuei 0:03b5121a232e 1250 *
pcercuei 0:03b5121a232e 1251 * Returns the related htmlElemDescPtr or NULL if not found.
pcercuei 0:03b5121a232e 1252 */
pcercuei 0:03b5121a232e 1253 const htmlElemDesc *
pcercuei 0:03b5121a232e 1254 htmlTagLookup(const xmlChar *tag) {
pcercuei 0:03b5121a232e 1255 unsigned int i;
pcercuei 0:03b5121a232e 1256
pcercuei 0:03b5121a232e 1257 for (i = 0; i < (sizeof(html40ElementTable) /
pcercuei 0:03b5121a232e 1258 sizeof(html40ElementTable[0]));i++) {
pcercuei 0:03b5121a232e 1259 if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name))
pcercuei 0:03b5121a232e 1260 return((htmlElemDescPtr) &html40ElementTable[i]);
pcercuei 0:03b5121a232e 1261 }
pcercuei 0:03b5121a232e 1262 return(NULL);
pcercuei 0:03b5121a232e 1263 }
pcercuei 0:03b5121a232e 1264
pcercuei 0:03b5121a232e 1265 /**
pcercuei 0:03b5121a232e 1266 * htmlGetEndPriority:
pcercuei 0:03b5121a232e 1267 * @name: The name of the element to look up the priority for.
pcercuei 0:03b5121a232e 1268 *
pcercuei 0:03b5121a232e 1269 * Return value: The "endtag" priority.
pcercuei 0:03b5121a232e 1270 **/
pcercuei 0:03b5121a232e 1271 static int
pcercuei 0:03b5121a232e 1272 htmlGetEndPriority (const xmlChar *name) {
pcercuei 0:03b5121a232e 1273 int i = 0;
pcercuei 0:03b5121a232e 1274
pcercuei 0:03b5121a232e 1275 while ((htmlEndPriority[i].name != NULL) &&
pcercuei 0:03b5121a232e 1276 (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
pcercuei 0:03b5121a232e 1277 i++;
pcercuei 0:03b5121a232e 1278
pcercuei 0:03b5121a232e 1279 return(htmlEndPriority[i].priority);
pcercuei 0:03b5121a232e 1280 }
pcercuei 0:03b5121a232e 1281
pcercuei 0:03b5121a232e 1282
pcercuei 0:03b5121a232e 1283 /**
pcercuei 0:03b5121a232e 1284 * htmlCheckAutoClose:
pcercuei 0:03b5121a232e 1285 * @newtag: The new tag name
pcercuei 0:03b5121a232e 1286 * @oldtag: The old tag name
pcercuei 0:03b5121a232e 1287 *
pcercuei 0:03b5121a232e 1288 * Checks whether the new tag is one of the registered valid tags for
pcercuei 0:03b5121a232e 1289 * closing old.
pcercuei 0:03b5121a232e 1290 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
pcercuei 0:03b5121a232e 1291 *
pcercuei 0:03b5121a232e 1292 * Returns 0 if no, 1 if yes.
pcercuei 0:03b5121a232e 1293 */
pcercuei 0:03b5121a232e 1294 static int
pcercuei 0:03b5121a232e 1295 htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag)
pcercuei 0:03b5121a232e 1296 {
pcercuei 0:03b5121a232e 1297 int i, indx;
pcercuei 0:03b5121a232e 1298 const char **closed = NULL;
pcercuei 0:03b5121a232e 1299
pcercuei 0:03b5121a232e 1300 if (htmlStartCloseIndexinitialized == 0)
pcercuei 0:03b5121a232e 1301 htmlInitAutoClose();
pcercuei 0:03b5121a232e 1302
pcercuei 0:03b5121a232e 1303 /* inefficient, but not a big deal */
pcercuei 0:03b5121a232e 1304 for (indx = 0; indx < 100; indx++) {
pcercuei 0:03b5121a232e 1305 closed = htmlStartCloseIndex[indx];
pcercuei 0:03b5121a232e 1306 if (closed == NULL)
pcercuei 0:03b5121a232e 1307 return (0);
pcercuei 0:03b5121a232e 1308 if (xmlStrEqual(BAD_CAST * closed, newtag))
pcercuei 0:03b5121a232e 1309 break;
pcercuei 0:03b5121a232e 1310 }
pcercuei 0:03b5121a232e 1311
pcercuei 0:03b5121a232e 1312 i = closed - htmlStartClose;
pcercuei 0:03b5121a232e 1313 i++;
pcercuei 0:03b5121a232e 1314 while (htmlStartClose[i] != NULL) {
pcercuei 0:03b5121a232e 1315 if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) {
pcercuei 0:03b5121a232e 1316 return (1);
pcercuei 0:03b5121a232e 1317 }
pcercuei 0:03b5121a232e 1318 i++;
pcercuei 0:03b5121a232e 1319 }
pcercuei 0:03b5121a232e 1320 return (0);
pcercuei 0:03b5121a232e 1321 }
pcercuei 0:03b5121a232e 1322
pcercuei 0:03b5121a232e 1323 /**
pcercuei 0:03b5121a232e 1324 * htmlAutoCloseOnClose:
pcercuei 0:03b5121a232e 1325 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 1326 * @newtag: The new tag name
pcercuei 0:03b5121a232e 1327 * @force: force the tag closure
pcercuei 0:03b5121a232e 1328 *
pcercuei 0:03b5121a232e 1329 * The HTML DTD allows an ending tag to implicitly close other tags.
pcercuei 0:03b5121a232e 1330 */
pcercuei 0:03b5121a232e 1331 static void
pcercuei 0:03b5121a232e 1332 htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
pcercuei 0:03b5121a232e 1333 {
pcercuei 0:03b5121a232e 1334 const htmlElemDesc *info;
pcercuei 0:03b5121a232e 1335 int i, priority;
pcercuei 0:03b5121a232e 1336
pcercuei 0:03b5121a232e 1337 priority = htmlGetEndPriority(newtag);
pcercuei 0:03b5121a232e 1338
pcercuei 0:03b5121a232e 1339 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
pcercuei 0:03b5121a232e 1340
pcercuei 0:03b5121a232e 1341 if (xmlStrEqual(newtag, ctxt->nameTab[i]))
pcercuei 0:03b5121a232e 1342 break;
pcercuei 0:03b5121a232e 1343 /*
pcercuei 0:03b5121a232e 1344 * A missplaced endtag can only close elements with lower
pcercuei 0:03b5121a232e 1345 * or equal priority, so if we find an element with higher
pcercuei 0:03b5121a232e 1346 * priority before we find an element with
pcercuei 0:03b5121a232e 1347 * matching name, we just ignore this endtag
pcercuei 0:03b5121a232e 1348 */
pcercuei 0:03b5121a232e 1349 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
pcercuei 0:03b5121a232e 1350 return;
pcercuei 0:03b5121a232e 1351 }
pcercuei 0:03b5121a232e 1352 if (i < 0)
pcercuei 0:03b5121a232e 1353 return;
pcercuei 0:03b5121a232e 1354
pcercuei 0:03b5121a232e 1355 while (!xmlStrEqual(newtag, ctxt->name)) {
pcercuei 0:03b5121a232e 1356 info = htmlTagLookup(ctxt->name);
pcercuei 0:03b5121a232e 1357 if ((info != NULL) && (info->endTag == 3)) {
pcercuei 0:03b5121a232e 1358 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
pcercuei 0:03b5121a232e 1359 "Opening and ending tag mismatch: %s and %s\n",
pcercuei 0:03b5121a232e 1360 newtag, ctxt->name);
pcercuei 0:03b5121a232e 1361 }
pcercuei 0:03b5121a232e 1362 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 1363 ctxt->sax->endElement(ctxt->userData, ctxt->name);
pcercuei 0:03b5121a232e 1364 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 1365 }
pcercuei 0:03b5121a232e 1366 }
pcercuei 0:03b5121a232e 1367
pcercuei 0:03b5121a232e 1368 /**
pcercuei 0:03b5121a232e 1369 * htmlAutoCloseOnEnd:
pcercuei 0:03b5121a232e 1370 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 1371 *
pcercuei 0:03b5121a232e 1372 * Close all remaining tags at the end of the stream
pcercuei 0:03b5121a232e 1373 */
pcercuei 0:03b5121a232e 1374 static void
pcercuei 0:03b5121a232e 1375 htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
pcercuei 0:03b5121a232e 1376 {
pcercuei 0:03b5121a232e 1377 int i;
pcercuei 0:03b5121a232e 1378
pcercuei 0:03b5121a232e 1379 if (ctxt->nameNr == 0)
pcercuei 0:03b5121a232e 1380 return;
pcercuei 0:03b5121a232e 1381 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
pcercuei 0:03b5121a232e 1382 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 1383 ctxt->sax->endElement(ctxt->userData, ctxt->name);
pcercuei 0:03b5121a232e 1384 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 1385 }
pcercuei 0:03b5121a232e 1386 }
pcercuei 0:03b5121a232e 1387
pcercuei 0:03b5121a232e 1388 /**
pcercuei 0:03b5121a232e 1389 * htmlAutoClose:
pcercuei 0:03b5121a232e 1390 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 1391 * @newtag: The new tag name or NULL
pcercuei 0:03b5121a232e 1392 *
pcercuei 0:03b5121a232e 1393 * The HTML DTD allows a tag to implicitly close other tags.
pcercuei 0:03b5121a232e 1394 * The list is kept in htmlStartClose array. This function is
pcercuei 0:03b5121a232e 1395 * called when a new tag has been detected and generates the
pcercuei 0:03b5121a232e 1396 * appropriates closes if possible/needed.
pcercuei 0:03b5121a232e 1397 * If newtag is NULL this mean we are at the end of the resource
pcercuei 0:03b5121a232e 1398 * and we should check
pcercuei 0:03b5121a232e 1399 */
pcercuei 0:03b5121a232e 1400 static void
pcercuei 0:03b5121a232e 1401 htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
pcercuei 0:03b5121a232e 1402 {
pcercuei 0:03b5121a232e 1403 while ((newtag != NULL) && (ctxt->name != NULL) &&
pcercuei 0:03b5121a232e 1404 (htmlCheckAutoClose(newtag, ctxt->name))) {
pcercuei 0:03b5121a232e 1405 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 1406 ctxt->sax->endElement(ctxt->userData, ctxt->name);
pcercuei 0:03b5121a232e 1407 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 1408 }
pcercuei 0:03b5121a232e 1409 if (newtag == NULL) {
pcercuei 0:03b5121a232e 1410 htmlAutoCloseOnEnd(ctxt);
pcercuei 0:03b5121a232e 1411 return;
pcercuei 0:03b5121a232e 1412 }
pcercuei 0:03b5121a232e 1413 while ((newtag == NULL) && (ctxt->name != NULL) &&
pcercuei 0:03b5121a232e 1414 ((xmlStrEqual(ctxt->name, BAD_CAST "head")) ||
pcercuei 0:03b5121a232e 1415 (xmlStrEqual(ctxt->name, BAD_CAST "body")) ||
pcercuei 0:03b5121a232e 1416 (xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
pcercuei 0:03b5121a232e 1417 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 1418 ctxt->sax->endElement(ctxt->userData, ctxt->name);
pcercuei 0:03b5121a232e 1419 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 1420 }
pcercuei 0:03b5121a232e 1421 }
pcercuei 0:03b5121a232e 1422
pcercuei 0:03b5121a232e 1423 /**
pcercuei 0:03b5121a232e 1424 * htmlAutoCloseTag:
pcercuei 0:03b5121a232e 1425 * @doc: the HTML document
pcercuei 0:03b5121a232e 1426 * @name: The tag name
pcercuei 0:03b5121a232e 1427 * @elem: the HTML element
pcercuei 0:03b5121a232e 1428 *
pcercuei 0:03b5121a232e 1429 * The HTML DTD allows a tag to implicitly close other tags.
pcercuei 0:03b5121a232e 1430 * The list is kept in htmlStartClose array. This function checks
pcercuei 0:03b5121a232e 1431 * if the element or one of it's children would autoclose the
pcercuei 0:03b5121a232e 1432 * given tag.
pcercuei 0:03b5121a232e 1433 *
pcercuei 0:03b5121a232e 1434 * Returns 1 if autoclose, 0 otherwise
pcercuei 0:03b5121a232e 1435 */
pcercuei 0:03b5121a232e 1436 int
pcercuei 0:03b5121a232e 1437 htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
pcercuei 0:03b5121a232e 1438 htmlNodePtr child;
pcercuei 0:03b5121a232e 1439
pcercuei 0:03b5121a232e 1440 if (elem == NULL) return(1);
pcercuei 0:03b5121a232e 1441 if (xmlStrEqual(name, elem->name)) return(0);
pcercuei 0:03b5121a232e 1442 if (htmlCheckAutoClose(elem->name, name)) return(1);
pcercuei 0:03b5121a232e 1443 child = elem->children;
pcercuei 0:03b5121a232e 1444 while (child != NULL) {
pcercuei 0:03b5121a232e 1445 if (htmlAutoCloseTag(doc, name, child)) return(1);
pcercuei 0:03b5121a232e 1446 child = child->next;
pcercuei 0:03b5121a232e 1447 }
pcercuei 0:03b5121a232e 1448 return(0);
pcercuei 0:03b5121a232e 1449 }
pcercuei 0:03b5121a232e 1450
pcercuei 0:03b5121a232e 1451 /**
pcercuei 0:03b5121a232e 1452 * htmlIsAutoClosed:
pcercuei 0:03b5121a232e 1453 * @doc: the HTML document
pcercuei 0:03b5121a232e 1454 * @elem: the HTML element
pcercuei 0:03b5121a232e 1455 *
pcercuei 0:03b5121a232e 1456 * The HTML DTD allows a tag to implicitly close other tags.
pcercuei 0:03b5121a232e 1457 * The list is kept in htmlStartClose array. This function checks
pcercuei 0:03b5121a232e 1458 * if a tag is autoclosed by one of it's child
pcercuei 0:03b5121a232e 1459 *
pcercuei 0:03b5121a232e 1460 * Returns 1 if autoclosed, 0 otherwise
pcercuei 0:03b5121a232e 1461 */
pcercuei 0:03b5121a232e 1462 int
pcercuei 0:03b5121a232e 1463 htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
pcercuei 0:03b5121a232e 1464 htmlNodePtr child;
pcercuei 0:03b5121a232e 1465
pcercuei 0:03b5121a232e 1466 if (elem == NULL) return(1);
pcercuei 0:03b5121a232e 1467 child = elem->children;
pcercuei 0:03b5121a232e 1468 while (child != NULL) {
pcercuei 0:03b5121a232e 1469 if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
pcercuei 0:03b5121a232e 1470 child = child->next;
pcercuei 0:03b5121a232e 1471 }
pcercuei 0:03b5121a232e 1472 return(0);
pcercuei 0:03b5121a232e 1473 }
pcercuei 0:03b5121a232e 1474
pcercuei 0:03b5121a232e 1475 /**
pcercuei 0:03b5121a232e 1476 * htmlCheckImplied:
pcercuei 0:03b5121a232e 1477 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 1478 * @newtag: The new tag name
pcercuei 0:03b5121a232e 1479 *
pcercuei 0:03b5121a232e 1480 * The HTML DTD allows a tag to exists only implicitly
pcercuei 0:03b5121a232e 1481 * called when a new tag has been detected and generates the
pcercuei 0:03b5121a232e 1482 * appropriates implicit tags if missing
pcercuei 0:03b5121a232e 1483 */
pcercuei 0:03b5121a232e 1484 static void
pcercuei 0:03b5121a232e 1485 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
pcercuei 0:03b5121a232e 1486 int i;
pcercuei 0:03b5121a232e 1487
pcercuei 0:03b5121a232e 1488 if (ctxt->options & HTML_PARSE_NOIMPLIED)
pcercuei 0:03b5121a232e 1489 return;
pcercuei 0:03b5121a232e 1490 if (!htmlOmittedDefaultValue)
pcercuei 0:03b5121a232e 1491 return;
pcercuei 0:03b5121a232e 1492 if (xmlStrEqual(newtag, BAD_CAST"html"))
pcercuei 0:03b5121a232e 1493 return;
pcercuei 0:03b5121a232e 1494 if (ctxt->nameNr <= 0) {
pcercuei 0:03b5121a232e 1495 htmlnamePush(ctxt, BAD_CAST"html");
pcercuei 0:03b5121a232e 1496 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
pcercuei 0:03b5121a232e 1497 ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
pcercuei 0:03b5121a232e 1498 }
pcercuei 0:03b5121a232e 1499 if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head")))
pcercuei 0:03b5121a232e 1500 return;
pcercuei 0:03b5121a232e 1501 if ((ctxt->nameNr <= 1) &&
pcercuei 0:03b5121a232e 1502 ((xmlStrEqual(newtag, BAD_CAST"script")) ||
pcercuei 0:03b5121a232e 1503 (xmlStrEqual(newtag, BAD_CAST"style")) ||
pcercuei 0:03b5121a232e 1504 (xmlStrEqual(newtag, BAD_CAST"meta")) ||
pcercuei 0:03b5121a232e 1505 (xmlStrEqual(newtag, BAD_CAST"link")) ||
pcercuei 0:03b5121a232e 1506 (xmlStrEqual(newtag, BAD_CAST"title")) ||
pcercuei 0:03b5121a232e 1507 (xmlStrEqual(newtag, BAD_CAST"base")))) {
pcercuei 0:03b5121a232e 1508 if (ctxt->html >= 3) {
pcercuei 0:03b5121a232e 1509 /* we already saw or generated an <head> before */
pcercuei 0:03b5121a232e 1510 return;
pcercuei 0:03b5121a232e 1511 }
pcercuei 0:03b5121a232e 1512 /*
pcercuei 0:03b5121a232e 1513 * dropped OBJECT ... i you put it first BODY will be
pcercuei 0:03b5121a232e 1514 * assumed !
pcercuei 0:03b5121a232e 1515 */
pcercuei 0:03b5121a232e 1516 htmlnamePush(ctxt, BAD_CAST"head");
pcercuei 0:03b5121a232e 1517 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
pcercuei 0:03b5121a232e 1518 ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
pcercuei 0:03b5121a232e 1519 } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&
pcercuei 0:03b5121a232e 1520 (!xmlStrEqual(newtag, BAD_CAST"frame")) &&
pcercuei 0:03b5121a232e 1521 (!xmlStrEqual(newtag, BAD_CAST"frameset"))) {
pcercuei 0:03b5121a232e 1522 if (ctxt->html >= 10) {
pcercuei 0:03b5121a232e 1523 /* we already saw or generated a <body> before */
pcercuei 0:03b5121a232e 1524 return;
pcercuei 0:03b5121a232e 1525 }
pcercuei 0:03b5121a232e 1526 for (i = 0;i < ctxt->nameNr;i++) {
pcercuei 0:03b5121a232e 1527 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
pcercuei 0:03b5121a232e 1528 return;
pcercuei 0:03b5121a232e 1529 }
pcercuei 0:03b5121a232e 1530 if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
pcercuei 0:03b5121a232e 1531 return;
pcercuei 0:03b5121a232e 1532 }
pcercuei 0:03b5121a232e 1533 }
pcercuei 0:03b5121a232e 1534
pcercuei 0:03b5121a232e 1535 htmlnamePush(ctxt, BAD_CAST"body");
pcercuei 0:03b5121a232e 1536 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
pcercuei 0:03b5121a232e 1537 ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
pcercuei 0:03b5121a232e 1538 }
pcercuei 0:03b5121a232e 1539 }
pcercuei 0:03b5121a232e 1540
pcercuei 0:03b5121a232e 1541 /**
pcercuei 0:03b5121a232e 1542 * htmlCheckParagraph
pcercuei 0:03b5121a232e 1543 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 1544 *
pcercuei 0:03b5121a232e 1545 * Check whether a p element need to be implied before inserting
pcercuei 0:03b5121a232e 1546 * characters in the current element.
pcercuei 0:03b5121a232e 1547 *
pcercuei 0:03b5121a232e 1548 * Returns 1 if a paragraph has been inserted, 0 if not and -1
pcercuei 0:03b5121a232e 1549 * in case of error.
pcercuei 0:03b5121a232e 1550 */
pcercuei 0:03b5121a232e 1551
pcercuei 0:03b5121a232e 1552 static int
pcercuei 0:03b5121a232e 1553 htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 1554 const xmlChar *tag;
pcercuei 0:03b5121a232e 1555 int i;
pcercuei 0:03b5121a232e 1556
pcercuei 0:03b5121a232e 1557 if (ctxt == NULL)
pcercuei 0:03b5121a232e 1558 return(-1);
pcercuei 0:03b5121a232e 1559 tag = ctxt->name;
pcercuei 0:03b5121a232e 1560 if (tag == NULL) {
pcercuei 0:03b5121a232e 1561 htmlAutoClose(ctxt, BAD_CAST"p");
pcercuei 0:03b5121a232e 1562 htmlCheckImplied(ctxt, BAD_CAST"p");
pcercuei 0:03b5121a232e 1563 htmlnamePush(ctxt, BAD_CAST"p");
pcercuei 0:03b5121a232e 1564 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
pcercuei 0:03b5121a232e 1565 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
pcercuei 0:03b5121a232e 1566 return(1);
pcercuei 0:03b5121a232e 1567 }
pcercuei 0:03b5121a232e 1568 if (!htmlOmittedDefaultValue)
pcercuei 0:03b5121a232e 1569 return(0);
pcercuei 0:03b5121a232e 1570 for (i = 0; htmlNoContentElements[i] != NULL; i++) {
pcercuei 0:03b5121a232e 1571 if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) {
pcercuei 0:03b5121a232e 1572 htmlAutoClose(ctxt, BAD_CAST"p");
pcercuei 0:03b5121a232e 1573 htmlCheckImplied(ctxt, BAD_CAST"p");
pcercuei 0:03b5121a232e 1574 htmlnamePush(ctxt, BAD_CAST"p");
pcercuei 0:03b5121a232e 1575 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
pcercuei 0:03b5121a232e 1576 ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
pcercuei 0:03b5121a232e 1577 return(1);
pcercuei 0:03b5121a232e 1578 }
pcercuei 0:03b5121a232e 1579 }
pcercuei 0:03b5121a232e 1580 return(0);
pcercuei 0:03b5121a232e 1581 }
pcercuei 0:03b5121a232e 1582
pcercuei 0:03b5121a232e 1583 /**
pcercuei 0:03b5121a232e 1584 * htmlIsScriptAttribute:
pcercuei 0:03b5121a232e 1585 * @name: an attribute name
pcercuei 0:03b5121a232e 1586 *
pcercuei 0:03b5121a232e 1587 * Check if an attribute is of content type Script
pcercuei 0:03b5121a232e 1588 *
pcercuei 0:03b5121a232e 1589 * Returns 1 is the attribute is a script 0 otherwise
pcercuei 0:03b5121a232e 1590 */
pcercuei 0:03b5121a232e 1591 int
pcercuei 0:03b5121a232e 1592 htmlIsScriptAttribute(const xmlChar *name) {
pcercuei 0:03b5121a232e 1593 unsigned int i;
pcercuei 0:03b5121a232e 1594
pcercuei 0:03b5121a232e 1595 if (name == NULL)
pcercuei 0:03b5121a232e 1596 return(0);
pcercuei 0:03b5121a232e 1597 /*
pcercuei 0:03b5121a232e 1598 * all script attributes start with 'on'
pcercuei 0:03b5121a232e 1599 */
pcercuei 0:03b5121a232e 1600 if ((name[0] != 'o') || (name[1] != 'n'))
pcercuei 0:03b5121a232e 1601 return(0);
pcercuei 0:03b5121a232e 1602 for (i = 0;
pcercuei 0:03b5121a232e 1603 i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]);
pcercuei 0:03b5121a232e 1604 i++) {
pcercuei 0:03b5121a232e 1605 if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i]))
pcercuei 0:03b5121a232e 1606 return(1);
pcercuei 0:03b5121a232e 1607 }
pcercuei 0:03b5121a232e 1608 return(0);
pcercuei 0:03b5121a232e 1609 }
pcercuei 0:03b5121a232e 1610
pcercuei 0:03b5121a232e 1611 /************************************************************************
pcercuei 0:03b5121a232e 1612 * *
pcercuei 0:03b5121a232e 1613 * The list of HTML predefined entities *
pcercuei 0:03b5121a232e 1614 * *
pcercuei 0:03b5121a232e 1615 ************************************************************************/
pcercuei 0:03b5121a232e 1616
pcercuei 0:03b5121a232e 1617
pcercuei 0:03b5121a232e 1618 static const htmlEntityDesc html40EntitiesTable[] = {
pcercuei 0:03b5121a232e 1619 /*
pcercuei 0:03b5121a232e 1620 * the 4 absolute ones, plus apostrophe.
pcercuei 0:03b5121a232e 1621 */
pcercuei 0:03b5121a232e 1622 { 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" },
pcercuei 0:03b5121a232e 1623 { 38, "amp", "ampersand, U+0026 ISOnum" },
pcercuei 0:03b5121a232e 1624 { 39, "apos", "single quote" },
pcercuei 0:03b5121a232e 1625 { 60, "lt", "less-than sign, U+003C ISOnum" },
pcercuei 0:03b5121a232e 1626 { 62, "gt", "greater-than sign, U+003E ISOnum" },
pcercuei 0:03b5121a232e 1627
pcercuei 0:03b5121a232e 1628 /*
pcercuei 0:03b5121a232e 1629 * A bunch still in the 128-255 range
pcercuei 0:03b5121a232e 1630 * Replacing them depend really on the charset used.
pcercuei 0:03b5121a232e 1631 */
pcercuei 0:03b5121a232e 1632 { 160, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" },
pcercuei 0:03b5121a232e 1633 { 161, "iexcl","inverted exclamation mark, U+00A1 ISOnum" },
pcercuei 0:03b5121a232e 1634 { 162, "cent", "cent sign, U+00A2 ISOnum" },
pcercuei 0:03b5121a232e 1635 { 163, "pound","pound sign, U+00A3 ISOnum" },
pcercuei 0:03b5121a232e 1636 { 164, "curren","currency sign, U+00A4 ISOnum" },
pcercuei 0:03b5121a232e 1637 { 165, "yen", "yen sign = yuan sign, U+00A5 ISOnum" },
pcercuei 0:03b5121a232e 1638 { 166, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" },
pcercuei 0:03b5121a232e 1639 { 167, "sect", "section sign, U+00A7 ISOnum" },
pcercuei 0:03b5121a232e 1640 { 168, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" },
pcercuei 0:03b5121a232e 1641 { 169, "copy", "copyright sign, U+00A9 ISOnum" },
pcercuei 0:03b5121a232e 1642 { 170, "ordf", "feminine ordinal indicator, U+00AA ISOnum" },
pcercuei 0:03b5121a232e 1643 { 171, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
pcercuei 0:03b5121a232e 1644 { 172, "not", "not sign, U+00AC ISOnum" },
pcercuei 0:03b5121a232e 1645 { 173, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" },
pcercuei 0:03b5121a232e 1646 { 174, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" },
pcercuei 0:03b5121a232e 1647 { 175, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
pcercuei 0:03b5121a232e 1648 { 176, "deg", "degree sign, U+00B0 ISOnum" },
pcercuei 0:03b5121a232e 1649 { 177, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
pcercuei 0:03b5121a232e 1650 { 178, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" },
pcercuei 0:03b5121a232e 1651 { 179, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
pcercuei 0:03b5121a232e 1652 { 180, "acute","acute accent = spacing acute, U+00B4 ISOdia" },
pcercuei 0:03b5121a232e 1653 { 181, "micro","micro sign, U+00B5 ISOnum" },
pcercuei 0:03b5121a232e 1654 { 182, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" },
pcercuei 0:03b5121a232e 1655 { 183, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
pcercuei 0:03b5121a232e 1656 { 184, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" },
pcercuei 0:03b5121a232e 1657 { 185, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" },
pcercuei 0:03b5121a232e 1658 { 186, "ordm", "masculine ordinal indicator, U+00BA ISOnum" },
pcercuei 0:03b5121a232e 1659 { 187, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
pcercuei 0:03b5121a232e 1660 { 188, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
pcercuei 0:03b5121a232e 1661 { 189, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" },
pcercuei 0:03b5121a232e 1662 { 190, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
pcercuei 0:03b5121a232e 1663 { 191, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" },
pcercuei 0:03b5121a232e 1664 { 192, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
pcercuei 0:03b5121a232e 1665 { 193, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" },
pcercuei 0:03b5121a232e 1666 { 194, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" },
pcercuei 0:03b5121a232e 1667 { 195, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" },
pcercuei 0:03b5121a232e 1668 { 196, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" },
pcercuei 0:03b5121a232e 1669 { 197, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
pcercuei 0:03b5121a232e 1670 { 198, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
pcercuei 0:03b5121a232e 1671 { 199, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" },
pcercuei 0:03b5121a232e 1672 { 200, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" },
pcercuei 0:03b5121a232e 1673 { 201, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" },
pcercuei 0:03b5121a232e 1674 { 202, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" },
pcercuei 0:03b5121a232e 1675 { 203, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" },
pcercuei 0:03b5121a232e 1676 { 204, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" },
pcercuei 0:03b5121a232e 1677 { 205, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" },
pcercuei 0:03b5121a232e 1678 { 206, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" },
pcercuei 0:03b5121a232e 1679 { 207, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" },
pcercuei 0:03b5121a232e 1680 { 208, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" },
pcercuei 0:03b5121a232e 1681 { 209, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" },
pcercuei 0:03b5121a232e 1682 { 210, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" },
pcercuei 0:03b5121a232e 1683 { 211, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" },
pcercuei 0:03b5121a232e 1684 { 212, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" },
pcercuei 0:03b5121a232e 1685 { 213, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" },
pcercuei 0:03b5121a232e 1686 { 214, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" },
pcercuei 0:03b5121a232e 1687 { 215, "times","multiplication sign, U+00D7 ISOnum" },
pcercuei 0:03b5121a232e 1688 { 216, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
pcercuei 0:03b5121a232e 1689 { 217, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" },
pcercuei 0:03b5121a232e 1690 { 218, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" },
pcercuei 0:03b5121a232e 1691 { 219, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" },
pcercuei 0:03b5121a232e 1692 { 220, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" },
pcercuei 0:03b5121a232e 1693 { 221, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" },
pcercuei 0:03b5121a232e 1694 { 222, "THORN","latin capital letter THORN, U+00DE ISOlat1" },
pcercuei 0:03b5121a232e 1695 { 223, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
pcercuei 0:03b5121a232e 1696 { 224, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
pcercuei 0:03b5121a232e 1697 { 225, "aacute","latin small letter a with acute, U+00E1 ISOlat1" },
pcercuei 0:03b5121a232e 1698 { 226, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" },
pcercuei 0:03b5121a232e 1699 { 227, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" },
pcercuei 0:03b5121a232e 1700 { 228, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" },
pcercuei 0:03b5121a232e 1701 { 229, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
pcercuei 0:03b5121a232e 1702 { 230, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
pcercuei 0:03b5121a232e 1703 { 231, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" },
pcercuei 0:03b5121a232e 1704 { 232, "egrave","latin small letter e with grave, U+00E8 ISOlat1" },
pcercuei 0:03b5121a232e 1705 { 233, "eacute","latin small letter e with acute, U+00E9 ISOlat1" },
pcercuei 0:03b5121a232e 1706 { 234, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" },
pcercuei 0:03b5121a232e 1707 { 235, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" },
pcercuei 0:03b5121a232e 1708 { 236, "igrave","latin small letter i with grave, U+00EC ISOlat1" },
pcercuei 0:03b5121a232e 1709 { 237, "iacute","latin small letter i with acute, U+00ED ISOlat1" },
pcercuei 0:03b5121a232e 1710 { 238, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" },
pcercuei 0:03b5121a232e 1711 { 239, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" },
pcercuei 0:03b5121a232e 1712 { 240, "eth", "latin small letter eth, U+00F0 ISOlat1" },
pcercuei 0:03b5121a232e 1713 { 241, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" },
pcercuei 0:03b5121a232e 1714 { 242, "ograve","latin small letter o with grave, U+00F2 ISOlat1" },
pcercuei 0:03b5121a232e 1715 { 243, "oacute","latin small letter o with acute, U+00F3 ISOlat1" },
pcercuei 0:03b5121a232e 1716 { 244, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" },
pcercuei 0:03b5121a232e 1717 { 245, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" },
pcercuei 0:03b5121a232e 1718 { 246, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" },
pcercuei 0:03b5121a232e 1719 { 247, "divide","division sign, U+00F7 ISOnum" },
pcercuei 0:03b5121a232e 1720 { 248, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
pcercuei 0:03b5121a232e 1721 { 249, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" },
pcercuei 0:03b5121a232e 1722 { 250, "uacute","latin small letter u with acute, U+00FA ISOlat1" },
pcercuei 0:03b5121a232e 1723 { 251, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" },
pcercuei 0:03b5121a232e 1724 { 252, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" },
pcercuei 0:03b5121a232e 1725 { 253, "yacute","latin small letter y with acute, U+00FD ISOlat1" },
pcercuei 0:03b5121a232e 1726 { 254, "thorn","latin small letter thorn with, U+00FE ISOlat1" },
pcercuei 0:03b5121a232e 1727 { 255, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" },
pcercuei 0:03b5121a232e 1728
pcercuei 0:03b5121a232e 1729 { 338, "OElig","latin capital ligature OE, U+0152 ISOlat2" },
pcercuei 0:03b5121a232e 1730 { 339, "oelig","latin small ligature oe, U+0153 ISOlat2" },
pcercuei 0:03b5121a232e 1731 { 352, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" },
pcercuei 0:03b5121a232e 1732 { 353, "scaron","latin small letter s with caron, U+0161 ISOlat2" },
pcercuei 0:03b5121a232e 1733 { 376, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" },
pcercuei 0:03b5121a232e 1734
pcercuei 0:03b5121a232e 1735 /*
pcercuei 0:03b5121a232e 1736 * Anything below should really be kept as entities references
pcercuei 0:03b5121a232e 1737 */
pcercuei 0:03b5121a232e 1738 { 402, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" },
pcercuei 0:03b5121a232e 1739
pcercuei 0:03b5121a232e 1740 { 710, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" },
pcercuei 0:03b5121a232e 1741 { 732, "tilde","small tilde, U+02DC ISOdia" },
pcercuei 0:03b5121a232e 1742
pcercuei 0:03b5121a232e 1743 { 913, "Alpha","greek capital letter alpha, U+0391" },
pcercuei 0:03b5121a232e 1744 { 914, "Beta", "greek capital letter beta, U+0392" },
pcercuei 0:03b5121a232e 1745 { 915, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" },
pcercuei 0:03b5121a232e 1746 { 916, "Delta","greek capital letter delta, U+0394 ISOgrk3" },
pcercuei 0:03b5121a232e 1747 { 917, "Epsilon","greek capital letter epsilon, U+0395" },
pcercuei 0:03b5121a232e 1748 { 918, "Zeta", "greek capital letter zeta, U+0396" },
pcercuei 0:03b5121a232e 1749 { 919, "Eta", "greek capital letter eta, U+0397" },
pcercuei 0:03b5121a232e 1750 { 920, "Theta","greek capital letter theta, U+0398 ISOgrk3" },
pcercuei 0:03b5121a232e 1751 { 921, "Iota", "greek capital letter iota, U+0399" },
pcercuei 0:03b5121a232e 1752 { 922, "Kappa","greek capital letter kappa, U+039A" },
pcercuei 0:03b5121a232e 1753 { 923, "Lambda", "greek capital letter lambda, U+039B ISOgrk3" },
pcercuei 0:03b5121a232e 1754 { 924, "Mu", "greek capital letter mu, U+039C" },
pcercuei 0:03b5121a232e 1755 { 925, "Nu", "greek capital letter nu, U+039D" },
pcercuei 0:03b5121a232e 1756 { 926, "Xi", "greek capital letter xi, U+039E ISOgrk3" },
pcercuei 0:03b5121a232e 1757 { 927, "Omicron","greek capital letter omicron, U+039F" },
pcercuei 0:03b5121a232e 1758 { 928, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" },
pcercuei 0:03b5121a232e 1759 { 929, "Rho", "greek capital letter rho, U+03A1" },
pcercuei 0:03b5121a232e 1760 { 931, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" },
pcercuei 0:03b5121a232e 1761 { 932, "Tau", "greek capital letter tau, U+03A4" },
pcercuei 0:03b5121a232e 1762 { 933, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" },
pcercuei 0:03b5121a232e 1763 { 934, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" },
pcercuei 0:03b5121a232e 1764 { 935, "Chi", "greek capital letter chi, U+03A7" },
pcercuei 0:03b5121a232e 1765 { 936, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" },
pcercuei 0:03b5121a232e 1766 { 937, "Omega","greek capital letter omega, U+03A9 ISOgrk3" },
pcercuei 0:03b5121a232e 1767
pcercuei 0:03b5121a232e 1768 { 945, "alpha","greek small letter alpha, U+03B1 ISOgrk3" },
pcercuei 0:03b5121a232e 1769 { 946, "beta", "greek small letter beta, U+03B2 ISOgrk3" },
pcercuei 0:03b5121a232e 1770 { 947, "gamma","greek small letter gamma, U+03B3 ISOgrk3" },
pcercuei 0:03b5121a232e 1771 { 948, "delta","greek small letter delta, U+03B4 ISOgrk3" },
pcercuei 0:03b5121a232e 1772 { 949, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" },
pcercuei 0:03b5121a232e 1773 { 950, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" },
pcercuei 0:03b5121a232e 1774 { 951, "eta", "greek small letter eta, U+03B7 ISOgrk3" },
pcercuei 0:03b5121a232e 1775 { 952, "theta","greek small letter theta, U+03B8 ISOgrk3" },
pcercuei 0:03b5121a232e 1776 { 953, "iota", "greek small letter iota, U+03B9 ISOgrk3" },
pcercuei 0:03b5121a232e 1777 { 954, "kappa","greek small letter kappa, U+03BA ISOgrk3" },
pcercuei 0:03b5121a232e 1778 { 955, "lambda","greek small letter lambda, U+03BB ISOgrk3" },
pcercuei 0:03b5121a232e 1779 { 956, "mu", "greek small letter mu, U+03BC ISOgrk3" },
pcercuei 0:03b5121a232e 1780 { 957, "nu", "greek small letter nu, U+03BD ISOgrk3" },
pcercuei 0:03b5121a232e 1781 { 958, "xi", "greek small letter xi, U+03BE ISOgrk3" },
pcercuei 0:03b5121a232e 1782 { 959, "omicron","greek small letter omicron, U+03BF NEW" },
pcercuei 0:03b5121a232e 1783 { 960, "pi", "greek small letter pi, U+03C0 ISOgrk3" },
pcercuei 0:03b5121a232e 1784 { 961, "rho", "greek small letter rho, U+03C1 ISOgrk3" },
pcercuei 0:03b5121a232e 1785 { 962, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" },
pcercuei 0:03b5121a232e 1786 { 963, "sigma","greek small letter sigma, U+03C3 ISOgrk3" },
pcercuei 0:03b5121a232e 1787 { 964, "tau", "greek small letter tau, U+03C4 ISOgrk3" },
pcercuei 0:03b5121a232e 1788 { 965, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" },
pcercuei 0:03b5121a232e 1789 { 966, "phi", "greek small letter phi, U+03C6 ISOgrk3" },
pcercuei 0:03b5121a232e 1790 { 967, "chi", "greek small letter chi, U+03C7 ISOgrk3" },
pcercuei 0:03b5121a232e 1791 { 968, "psi", "greek small letter psi, U+03C8 ISOgrk3" },
pcercuei 0:03b5121a232e 1792 { 969, "omega","greek small letter omega, U+03C9 ISOgrk3" },
pcercuei 0:03b5121a232e 1793 { 977, "thetasym","greek small letter theta symbol, U+03D1 NEW" },
pcercuei 0:03b5121a232e 1794 { 978, "upsih","greek upsilon with hook symbol, U+03D2 NEW" },
pcercuei 0:03b5121a232e 1795 { 982, "piv", "greek pi symbol, U+03D6 ISOgrk3" },
pcercuei 0:03b5121a232e 1796
pcercuei 0:03b5121a232e 1797 { 8194, "ensp", "en space, U+2002 ISOpub" },
pcercuei 0:03b5121a232e 1798 { 8195, "emsp", "em space, U+2003 ISOpub" },
pcercuei 0:03b5121a232e 1799 { 8201, "thinsp","thin space, U+2009 ISOpub" },
pcercuei 0:03b5121a232e 1800 { 8204, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" },
pcercuei 0:03b5121a232e 1801 { 8205, "zwj", "zero width joiner, U+200D NEW RFC 2070" },
pcercuei 0:03b5121a232e 1802 { 8206, "lrm", "left-to-right mark, U+200E NEW RFC 2070" },
pcercuei 0:03b5121a232e 1803 { 8207, "rlm", "right-to-left mark, U+200F NEW RFC 2070" },
pcercuei 0:03b5121a232e 1804 { 8211, "ndash","en dash, U+2013 ISOpub" },
pcercuei 0:03b5121a232e 1805 { 8212, "mdash","em dash, U+2014 ISOpub" },
pcercuei 0:03b5121a232e 1806 { 8216, "lsquo","left single quotation mark, U+2018 ISOnum" },
pcercuei 0:03b5121a232e 1807 { 8217, "rsquo","right single quotation mark, U+2019 ISOnum" },
pcercuei 0:03b5121a232e 1808 { 8218, "sbquo","single low-9 quotation mark, U+201A NEW" },
pcercuei 0:03b5121a232e 1809 { 8220, "ldquo","left double quotation mark, U+201C ISOnum" },
pcercuei 0:03b5121a232e 1810 { 8221, "rdquo","right double quotation mark, U+201D ISOnum" },
pcercuei 0:03b5121a232e 1811 { 8222, "bdquo","double low-9 quotation mark, U+201E NEW" },
pcercuei 0:03b5121a232e 1812 { 8224, "dagger","dagger, U+2020 ISOpub" },
pcercuei 0:03b5121a232e 1813 { 8225, "Dagger","double dagger, U+2021 ISOpub" },
pcercuei 0:03b5121a232e 1814
pcercuei 0:03b5121a232e 1815 { 8226, "bull", "bullet = black small circle, U+2022 ISOpub" },
pcercuei 0:03b5121a232e 1816 { 8230, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" },
pcercuei 0:03b5121a232e 1817
pcercuei 0:03b5121a232e 1818 { 8240, "permil","per mille sign, U+2030 ISOtech" },
pcercuei 0:03b5121a232e 1819
pcercuei 0:03b5121a232e 1820 { 8242, "prime","prime = minutes = feet, U+2032 ISOtech" },
pcercuei 0:03b5121a232e 1821 { 8243, "Prime","double prime = seconds = inches, U+2033 ISOtech" },
pcercuei 0:03b5121a232e 1822
pcercuei 0:03b5121a232e 1823 { 8249, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" },
pcercuei 0:03b5121a232e 1824 { 8250, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" },
pcercuei 0:03b5121a232e 1825
pcercuei 0:03b5121a232e 1826 { 8254, "oline","overline = spacing overscore, U+203E NEW" },
pcercuei 0:03b5121a232e 1827 { 8260, "frasl","fraction slash, U+2044 NEW" },
pcercuei 0:03b5121a232e 1828
pcercuei 0:03b5121a232e 1829 { 8364, "euro", "euro sign, U+20AC NEW" },
pcercuei 0:03b5121a232e 1830
pcercuei 0:03b5121a232e 1831 { 8465, "image","blackletter capital I = imaginary part, U+2111 ISOamso" },
pcercuei 0:03b5121a232e 1832 { 8472, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" },
pcercuei 0:03b5121a232e 1833 { 8476, "real", "blackletter capital R = real part symbol, U+211C ISOamso" },
pcercuei 0:03b5121a232e 1834 { 8482, "trade","trade mark sign, U+2122 ISOnum" },
pcercuei 0:03b5121a232e 1835 { 8501, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" },
pcercuei 0:03b5121a232e 1836 { 8592, "larr", "leftwards arrow, U+2190 ISOnum" },
pcercuei 0:03b5121a232e 1837 { 8593, "uarr", "upwards arrow, U+2191 ISOnum" },
pcercuei 0:03b5121a232e 1838 { 8594, "rarr", "rightwards arrow, U+2192 ISOnum" },
pcercuei 0:03b5121a232e 1839 { 8595, "darr", "downwards arrow, U+2193 ISOnum" },
pcercuei 0:03b5121a232e 1840 { 8596, "harr", "left right arrow, U+2194 ISOamsa" },
pcercuei 0:03b5121a232e 1841 { 8629, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
pcercuei 0:03b5121a232e 1842 { 8656, "lArr", "leftwards double arrow, U+21D0 ISOtech" },
pcercuei 0:03b5121a232e 1843 { 8657, "uArr", "upwards double arrow, U+21D1 ISOamsa" },
pcercuei 0:03b5121a232e 1844 { 8658, "rArr", "rightwards double arrow, U+21D2 ISOtech" },
pcercuei 0:03b5121a232e 1845 { 8659, "dArr", "downwards double arrow, U+21D3 ISOamsa" },
pcercuei 0:03b5121a232e 1846 { 8660, "hArr", "left right double arrow, U+21D4 ISOamsa" },
pcercuei 0:03b5121a232e 1847
pcercuei 0:03b5121a232e 1848 { 8704, "forall","for all, U+2200 ISOtech" },
pcercuei 0:03b5121a232e 1849 { 8706, "part", "partial differential, U+2202 ISOtech" },
pcercuei 0:03b5121a232e 1850 { 8707, "exist","there exists, U+2203 ISOtech" },
pcercuei 0:03b5121a232e 1851 { 8709, "empty","empty set = null set = diameter, U+2205 ISOamso" },
pcercuei 0:03b5121a232e 1852 { 8711, "nabla","nabla = backward difference, U+2207 ISOtech" },
pcercuei 0:03b5121a232e 1853 { 8712, "isin", "element of, U+2208 ISOtech" },
pcercuei 0:03b5121a232e 1854 { 8713, "notin","not an element of, U+2209 ISOtech" },
pcercuei 0:03b5121a232e 1855 { 8715, "ni", "contains as member, U+220B ISOtech" },
pcercuei 0:03b5121a232e 1856 { 8719, "prod", "n-ary product = product sign, U+220F ISOamsb" },
pcercuei 0:03b5121a232e 1857 { 8721, "sum", "n-ary summation, U+2211 ISOamsb" },
pcercuei 0:03b5121a232e 1858 { 8722, "minus","minus sign, U+2212 ISOtech" },
pcercuei 0:03b5121a232e 1859 { 8727, "lowast","asterisk operator, U+2217 ISOtech" },
pcercuei 0:03b5121a232e 1860 { 8730, "radic","square root = radical sign, U+221A ISOtech" },
pcercuei 0:03b5121a232e 1861 { 8733, "prop", "proportional to, U+221D ISOtech" },
pcercuei 0:03b5121a232e 1862 { 8734, "infin","infinity, U+221E ISOtech" },
pcercuei 0:03b5121a232e 1863 { 8736, "ang", "angle, U+2220 ISOamso" },
pcercuei 0:03b5121a232e 1864 { 8743, "and", "logical and = wedge, U+2227 ISOtech" },
pcercuei 0:03b5121a232e 1865 { 8744, "or", "logical or = vee, U+2228 ISOtech" },
pcercuei 0:03b5121a232e 1866 { 8745, "cap", "intersection = cap, U+2229 ISOtech" },
pcercuei 0:03b5121a232e 1867 { 8746, "cup", "union = cup, U+222A ISOtech" },
pcercuei 0:03b5121a232e 1868 { 8747, "int", "integral, U+222B ISOtech" },
pcercuei 0:03b5121a232e 1869 { 8756, "there4","therefore, U+2234 ISOtech" },
pcercuei 0:03b5121a232e 1870 { 8764, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" },
pcercuei 0:03b5121a232e 1871 { 8773, "cong", "approximately equal to, U+2245 ISOtech" },
pcercuei 0:03b5121a232e 1872 { 8776, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" },
pcercuei 0:03b5121a232e 1873 { 8800, "ne", "not equal to, U+2260 ISOtech" },
pcercuei 0:03b5121a232e 1874 { 8801, "equiv","identical to, U+2261 ISOtech" },
pcercuei 0:03b5121a232e 1875 { 8804, "le", "less-than or equal to, U+2264 ISOtech" },
pcercuei 0:03b5121a232e 1876 { 8805, "ge", "greater-than or equal to, U+2265 ISOtech" },
pcercuei 0:03b5121a232e 1877 { 8834, "sub", "subset of, U+2282 ISOtech" },
pcercuei 0:03b5121a232e 1878 { 8835, "sup", "superset of, U+2283 ISOtech" },
pcercuei 0:03b5121a232e 1879 { 8836, "nsub", "not a subset of, U+2284 ISOamsn" },
pcercuei 0:03b5121a232e 1880 { 8838, "sube", "subset of or equal to, U+2286 ISOtech" },
pcercuei 0:03b5121a232e 1881 { 8839, "supe", "superset of or equal to, U+2287 ISOtech" },
pcercuei 0:03b5121a232e 1882 { 8853, "oplus","circled plus = direct sum, U+2295 ISOamsb" },
pcercuei 0:03b5121a232e 1883 { 8855, "otimes","circled times = vector product, U+2297 ISOamsb" },
pcercuei 0:03b5121a232e 1884 { 8869, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
pcercuei 0:03b5121a232e 1885 { 8901, "sdot", "dot operator, U+22C5 ISOamsb" },
pcercuei 0:03b5121a232e 1886 { 8968, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" },
pcercuei 0:03b5121a232e 1887 { 8969, "rceil","right ceiling, U+2309 ISOamsc" },
pcercuei 0:03b5121a232e 1888 { 8970, "lfloor","left floor = apl downstile, U+230A ISOamsc" },
pcercuei 0:03b5121a232e 1889 { 8971, "rfloor","right floor, U+230B ISOamsc" },
pcercuei 0:03b5121a232e 1890 { 9001, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" },
pcercuei 0:03b5121a232e 1891 { 9002, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" },
pcercuei 0:03b5121a232e 1892 { 9674, "loz", "lozenge, U+25CA ISOpub" },
pcercuei 0:03b5121a232e 1893
pcercuei 0:03b5121a232e 1894 { 9824, "spades","black spade suit, U+2660 ISOpub" },
pcercuei 0:03b5121a232e 1895 { 9827, "clubs","black club suit = shamrock, U+2663 ISOpub" },
pcercuei 0:03b5121a232e 1896 { 9829, "hearts","black heart suit = valentine, U+2665 ISOpub" },
pcercuei 0:03b5121a232e 1897 { 9830, "diams","black diamond suit, U+2666 ISOpub" },
pcercuei 0:03b5121a232e 1898
pcercuei 0:03b5121a232e 1899 };
pcercuei 0:03b5121a232e 1900
pcercuei 0:03b5121a232e 1901 /************************************************************************
pcercuei 0:03b5121a232e 1902 * *
pcercuei 0:03b5121a232e 1903 * Commodity functions to handle entities *
pcercuei 0:03b5121a232e 1904 * *
pcercuei 0:03b5121a232e 1905 ************************************************************************/
pcercuei 0:03b5121a232e 1906
pcercuei 0:03b5121a232e 1907 /*
pcercuei 0:03b5121a232e 1908 * Macro used to grow the current buffer.
pcercuei 0:03b5121a232e 1909 */
pcercuei 0:03b5121a232e 1910 #define growBuffer(buffer) { \
pcercuei 0:03b5121a232e 1911 xmlChar *tmp; \
pcercuei 0:03b5121a232e 1912 buffer##_size *= 2; \
pcercuei 0:03b5121a232e 1913 tmp = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
pcercuei 0:03b5121a232e 1914 if (tmp == NULL) { \
pcercuei 0:03b5121a232e 1915 htmlErrMemory(ctxt, "growing buffer\n"); \
pcercuei 0:03b5121a232e 1916 xmlFree(buffer); \
pcercuei 0:03b5121a232e 1917 return(NULL); \
pcercuei 0:03b5121a232e 1918 } \
pcercuei 0:03b5121a232e 1919 buffer = tmp; \
pcercuei 0:03b5121a232e 1920 }
pcercuei 0:03b5121a232e 1921
pcercuei 0:03b5121a232e 1922 /**
pcercuei 0:03b5121a232e 1923 * htmlEntityLookup:
pcercuei 0:03b5121a232e 1924 * @name: the entity name
pcercuei 0:03b5121a232e 1925 *
pcercuei 0:03b5121a232e 1926 * Lookup the given entity in EntitiesTable
pcercuei 0:03b5121a232e 1927 *
pcercuei 0:03b5121a232e 1928 * TODO: the linear scan is really ugly, an hash table is really needed.
pcercuei 0:03b5121a232e 1929 *
pcercuei 0:03b5121a232e 1930 * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
pcercuei 0:03b5121a232e 1931 */
pcercuei 0:03b5121a232e 1932 const htmlEntityDesc *
pcercuei 0:03b5121a232e 1933 htmlEntityLookup(const xmlChar *name) {
pcercuei 0:03b5121a232e 1934 unsigned int i;
pcercuei 0:03b5121a232e 1935
pcercuei 0:03b5121a232e 1936 for (i = 0;i < (sizeof(html40EntitiesTable)/
pcercuei 0:03b5121a232e 1937 sizeof(html40EntitiesTable[0]));i++) {
pcercuei 0:03b5121a232e 1938 if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) {
pcercuei 0:03b5121a232e 1939 return((htmlEntityDescPtr) &html40EntitiesTable[i]);
pcercuei 0:03b5121a232e 1940 }
pcercuei 0:03b5121a232e 1941 }
pcercuei 0:03b5121a232e 1942 return(NULL);
pcercuei 0:03b5121a232e 1943 }
pcercuei 0:03b5121a232e 1944
pcercuei 0:03b5121a232e 1945 /**
pcercuei 0:03b5121a232e 1946 * htmlEntityValueLookup:
pcercuei 0:03b5121a232e 1947 * @value: the entity's unicode value
pcercuei 0:03b5121a232e 1948 *
pcercuei 0:03b5121a232e 1949 * Lookup the given entity in EntitiesTable
pcercuei 0:03b5121a232e 1950 *
pcercuei 0:03b5121a232e 1951 * TODO: the linear scan is really ugly, an hash table is really needed.
pcercuei 0:03b5121a232e 1952 *
pcercuei 0:03b5121a232e 1953 * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
pcercuei 0:03b5121a232e 1954 */
pcercuei 0:03b5121a232e 1955 const htmlEntityDesc *
pcercuei 0:03b5121a232e 1956 htmlEntityValueLookup(unsigned int value) {
pcercuei 0:03b5121a232e 1957 unsigned int i;
pcercuei 0:03b5121a232e 1958
pcercuei 0:03b5121a232e 1959 for (i = 0;i < (sizeof(html40EntitiesTable)/
pcercuei 0:03b5121a232e 1960 sizeof(html40EntitiesTable[0]));i++) {
pcercuei 0:03b5121a232e 1961 if (html40EntitiesTable[i].value >= value) {
pcercuei 0:03b5121a232e 1962 if (html40EntitiesTable[i].value > value)
pcercuei 0:03b5121a232e 1963 break;
pcercuei 0:03b5121a232e 1964 return((htmlEntityDescPtr) &html40EntitiesTable[i]);
pcercuei 0:03b5121a232e 1965 }
pcercuei 0:03b5121a232e 1966 }
pcercuei 0:03b5121a232e 1967 return(NULL);
pcercuei 0:03b5121a232e 1968 }
pcercuei 0:03b5121a232e 1969
pcercuei 0:03b5121a232e 1970 /**
pcercuei 0:03b5121a232e 1971 * UTF8ToHtml:
pcercuei 0:03b5121a232e 1972 * @out: a pointer to an array of bytes to store the result
pcercuei 0:03b5121a232e 1973 * @outlen: the length of @out
pcercuei 0:03b5121a232e 1974 * @in: a pointer to an array of UTF-8 chars
pcercuei 0:03b5121a232e 1975 * @inlen: the length of @in
pcercuei 0:03b5121a232e 1976 *
pcercuei 0:03b5121a232e 1977 * Take a block of UTF-8 chars in and try to convert it to an ASCII
pcercuei 0:03b5121a232e 1978 * plus HTML entities block of chars out.
pcercuei 0:03b5121a232e 1979 *
pcercuei 0:03b5121a232e 1980 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
pcercuei 0:03b5121a232e 1981 * The value of @inlen after return is the number of octets consumed
pcercuei 0:03b5121a232e 1982 * as the return value is positive, else unpredictable.
pcercuei 0:03b5121a232e 1983 * The value of @outlen after return is the number of octets consumed.
pcercuei 0:03b5121a232e 1984 */
pcercuei 0:03b5121a232e 1985 int
pcercuei 0:03b5121a232e 1986 UTF8ToHtml(unsigned char* out, int *outlen,
pcercuei 0:03b5121a232e 1987 const unsigned char* in, int *inlen) {
pcercuei 0:03b5121a232e 1988 const unsigned char* processed = in;
pcercuei 0:03b5121a232e 1989 const unsigned char* outend;
pcercuei 0:03b5121a232e 1990 const unsigned char* outstart = out;
pcercuei 0:03b5121a232e 1991 const unsigned char* instart = in;
pcercuei 0:03b5121a232e 1992 const unsigned char* inend;
pcercuei 0:03b5121a232e 1993 unsigned int c, d;
pcercuei 0:03b5121a232e 1994 int trailing;
pcercuei 0:03b5121a232e 1995
pcercuei 0:03b5121a232e 1996 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
pcercuei 0:03b5121a232e 1997 if (in == NULL) {
pcercuei 0:03b5121a232e 1998 /*
pcercuei 0:03b5121a232e 1999 * initialization nothing to do
pcercuei 0:03b5121a232e 2000 */
pcercuei 0:03b5121a232e 2001 *outlen = 0;
pcercuei 0:03b5121a232e 2002 *inlen = 0;
pcercuei 0:03b5121a232e 2003 return(0);
pcercuei 0:03b5121a232e 2004 }
pcercuei 0:03b5121a232e 2005 inend = in + (*inlen);
pcercuei 0:03b5121a232e 2006 outend = out + (*outlen);
pcercuei 0:03b5121a232e 2007 while (in < inend) {
pcercuei 0:03b5121a232e 2008 d = *in++;
pcercuei 0:03b5121a232e 2009 if (d < 0x80) { c= d; trailing= 0; }
pcercuei 0:03b5121a232e 2010 else if (d < 0xC0) {
pcercuei 0:03b5121a232e 2011 /* trailing byte in leading position */
pcercuei 0:03b5121a232e 2012 *outlen = out - outstart;
pcercuei 0:03b5121a232e 2013 *inlen = processed - instart;
pcercuei 0:03b5121a232e 2014 return(-2);
pcercuei 0:03b5121a232e 2015 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
pcercuei 0:03b5121a232e 2016 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
pcercuei 0:03b5121a232e 2017 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
pcercuei 0:03b5121a232e 2018 else {
pcercuei 0:03b5121a232e 2019 /* no chance for this in Ascii */
pcercuei 0:03b5121a232e 2020 *outlen = out - outstart;
pcercuei 0:03b5121a232e 2021 *inlen = processed - instart;
pcercuei 0:03b5121a232e 2022 return(-2);
pcercuei 0:03b5121a232e 2023 }
pcercuei 0:03b5121a232e 2024
pcercuei 0:03b5121a232e 2025 if (inend - in < trailing) {
pcercuei 0:03b5121a232e 2026 break;
pcercuei 0:03b5121a232e 2027 }
pcercuei 0:03b5121a232e 2028
pcercuei 0:03b5121a232e 2029 for ( ; trailing; trailing--) {
pcercuei 0:03b5121a232e 2030 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
pcercuei 0:03b5121a232e 2031 break;
pcercuei 0:03b5121a232e 2032 c <<= 6;
pcercuei 0:03b5121a232e 2033 c |= d & 0x3F;
pcercuei 0:03b5121a232e 2034 }
pcercuei 0:03b5121a232e 2035
pcercuei 0:03b5121a232e 2036 /* assertion: c is a single UTF-4 value */
pcercuei 0:03b5121a232e 2037 if (c < 0x80) {
pcercuei 0:03b5121a232e 2038 if (out + 1 >= outend)
pcercuei 0:03b5121a232e 2039 break;
pcercuei 0:03b5121a232e 2040 *out++ = c;
pcercuei 0:03b5121a232e 2041 } else {
pcercuei 0:03b5121a232e 2042 int len;
pcercuei 0:03b5121a232e 2043 const htmlEntityDesc * ent;
pcercuei 0:03b5121a232e 2044 const char *cp;
pcercuei 0:03b5121a232e 2045 char nbuf[16];
pcercuei 0:03b5121a232e 2046
pcercuei 0:03b5121a232e 2047 /*
pcercuei 0:03b5121a232e 2048 * Try to lookup a predefined HTML entity for it
pcercuei 0:03b5121a232e 2049 */
pcercuei 0:03b5121a232e 2050
pcercuei 0:03b5121a232e 2051 ent = htmlEntityValueLookup(c);
pcercuei 0:03b5121a232e 2052 if (ent == NULL) {
pcercuei 0:03b5121a232e 2053 snprintf(nbuf, sizeof(nbuf), "#%u", c);
pcercuei 0:03b5121a232e 2054 cp = nbuf;
pcercuei 0:03b5121a232e 2055 }
pcercuei 0:03b5121a232e 2056 else
pcercuei 0:03b5121a232e 2057 cp = ent->name;
pcercuei 0:03b5121a232e 2058 len = strlen(cp);
pcercuei 0:03b5121a232e 2059 if (out + 2 + len >= outend)
pcercuei 0:03b5121a232e 2060 break;
pcercuei 0:03b5121a232e 2061 *out++ = '&';
pcercuei 0:03b5121a232e 2062 memcpy(out, cp, len);
pcercuei 0:03b5121a232e 2063 out += len;
pcercuei 0:03b5121a232e 2064 *out++ = ';';
pcercuei 0:03b5121a232e 2065 }
pcercuei 0:03b5121a232e 2066 processed = in;
pcercuei 0:03b5121a232e 2067 }
pcercuei 0:03b5121a232e 2068 *outlen = out - outstart;
pcercuei 0:03b5121a232e 2069 *inlen = processed - instart;
pcercuei 0:03b5121a232e 2070 return(0);
pcercuei 0:03b5121a232e 2071 }
pcercuei 0:03b5121a232e 2072
pcercuei 0:03b5121a232e 2073 /**
pcercuei 0:03b5121a232e 2074 * htmlEncodeEntities:
pcercuei 0:03b5121a232e 2075 * @out: a pointer to an array of bytes to store the result
pcercuei 0:03b5121a232e 2076 * @outlen: the length of @out
pcercuei 0:03b5121a232e 2077 * @in: a pointer to an array of UTF-8 chars
pcercuei 0:03b5121a232e 2078 * @inlen: the length of @in
pcercuei 0:03b5121a232e 2079 * @quoteChar: the quote character to escape (' or ") or zero.
pcercuei 0:03b5121a232e 2080 *
pcercuei 0:03b5121a232e 2081 * Take a block of UTF-8 chars in and try to convert it to an ASCII
pcercuei 0:03b5121a232e 2082 * plus HTML entities block of chars out.
pcercuei 0:03b5121a232e 2083 *
pcercuei 0:03b5121a232e 2084 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
pcercuei 0:03b5121a232e 2085 * The value of @inlen after return is the number of octets consumed
pcercuei 0:03b5121a232e 2086 * as the return value is positive, else unpredictable.
pcercuei 0:03b5121a232e 2087 * The value of @outlen after return is the number of octets consumed.
pcercuei 0:03b5121a232e 2088 */
pcercuei 0:03b5121a232e 2089 int
pcercuei 0:03b5121a232e 2090 htmlEncodeEntities(unsigned char* out, int *outlen,
pcercuei 0:03b5121a232e 2091 const unsigned char* in, int *inlen, int quoteChar) {
pcercuei 0:03b5121a232e 2092 const unsigned char* processed = in;
pcercuei 0:03b5121a232e 2093 const unsigned char* outend;
pcercuei 0:03b5121a232e 2094 const unsigned char* outstart = out;
pcercuei 0:03b5121a232e 2095 const unsigned char* instart = in;
pcercuei 0:03b5121a232e 2096 const unsigned char* inend;
pcercuei 0:03b5121a232e 2097 unsigned int c, d;
pcercuei 0:03b5121a232e 2098 int trailing;
pcercuei 0:03b5121a232e 2099
pcercuei 0:03b5121a232e 2100 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL))
pcercuei 0:03b5121a232e 2101 return(-1);
pcercuei 0:03b5121a232e 2102 outend = out + (*outlen);
pcercuei 0:03b5121a232e 2103 inend = in + (*inlen);
pcercuei 0:03b5121a232e 2104 while (in < inend) {
pcercuei 0:03b5121a232e 2105 d = *in++;
pcercuei 0:03b5121a232e 2106 if (d < 0x80) { c= d; trailing= 0; }
pcercuei 0:03b5121a232e 2107 else if (d < 0xC0) {
pcercuei 0:03b5121a232e 2108 /* trailing byte in leading position */
pcercuei 0:03b5121a232e 2109 *outlen = out - outstart;
pcercuei 0:03b5121a232e 2110 *inlen = processed - instart;
pcercuei 0:03b5121a232e 2111 return(-2);
pcercuei 0:03b5121a232e 2112 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
pcercuei 0:03b5121a232e 2113 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
pcercuei 0:03b5121a232e 2114 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
pcercuei 0:03b5121a232e 2115 else {
pcercuei 0:03b5121a232e 2116 /* no chance for this in Ascii */
pcercuei 0:03b5121a232e 2117 *outlen = out - outstart;
pcercuei 0:03b5121a232e 2118 *inlen = processed - instart;
pcercuei 0:03b5121a232e 2119 return(-2);
pcercuei 0:03b5121a232e 2120 }
pcercuei 0:03b5121a232e 2121
pcercuei 0:03b5121a232e 2122 if (inend - in < trailing)
pcercuei 0:03b5121a232e 2123 break;
pcercuei 0:03b5121a232e 2124
pcercuei 0:03b5121a232e 2125 while (trailing--) {
pcercuei 0:03b5121a232e 2126 if (((d= *in++) & 0xC0) != 0x80) {
pcercuei 0:03b5121a232e 2127 *outlen = out - outstart;
pcercuei 0:03b5121a232e 2128 *inlen = processed - instart;
pcercuei 0:03b5121a232e 2129 return(-2);
pcercuei 0:03b5121a232e 2130 }
pcercuei 0:03b5121a232e 2131 c <<= 6;
pcercuei 0:03b5121a232e 2132 c |= d & 0x3F;
pcercuei 0:03b5121a232e 2133 }
pcercuei 0:03b5121a232e 2134
pcercuei 0:03b5121a232e 2135 /* assertion: c is a single UTF-4 value */
pcercuei 0:03b5121a232e 2136 if ((c < 0x80) && (c != (unsigned int) quoteChar) &&
pcercuei 0:03b5121a232e 2137 (c != '&') && (c != '<') && (c != '>')) {
pcercuei 0:03b5121a232e 2138 if (out >= outend)
pcercuei 0:03b5121a232e 2139 break;
pcercuei 0:03b5121a232e 2140 *out++ = c;
pcercuei 0:03b5121a232e 2141 } else {
pcercuei 0:03b5121a232e 2142 const htmlEntityDesc * ent;
pcercuei 0:03b5121a232e 2143 const char *cp;
pcercuei 0:03b5121a232e 2144 char nbuf[16];
pcercuei 0:03b5121a232e 2145 int len;
pcercuei 0:03b5121a232e 2146
pcercuei 0:03b5121a232e 2147 /*
pcercuei 0:03b5121a232e 2148 * Try to lookup a predefined HTML entity for it
pcercuei 0:03b5121a232e 2149 */
pcercuei 0:03b5121a232e 2150 ent = htmlEntityValueLookup(c);
pcercuei 0:03b5121a232e 2151 if (ent == NULL) {
pcercuei 0:03b5121a232e 2152 snprintf(nbuf, sizeof(nbuf), "#%u", c);
pcercuei 0:03b5121a232e 2153 cp = nbuf;
pcercuei 0:03b5121a232e 2154 }
pcercuei 0:03b5121a232e 2155 else
pcercuei 0:03b5121a232e 2156 cp = ent->name;
pcercuei 0:03b5121a232e 2157 len = strlen(cp);
pcercuei 0:03b5121a232e 2158 if (out + 2 + len > outend)
pcercuei 0:03b5121a232e 2159 break;
pcercuei 0:03b5121a232e 2160 *out++ = '&';
pcercuei 0:03b5121a232e 2161 memcpy(out, cp, len);
pcercuei 0:03b5121a232e 2162 out += len;
pcercuei 0:03b5121a232e 2163 *out++ = ';';
pcercuei 0:03b5121a232e 2164 }
pcercuei 0:03b5121a232e 2165 processed = in;
pcercuei 0:03b5121a232e 2166 }
pcercuei 0:03b5121a232e 2167 *outlen = out - outstart;
pcercuei 0:03b5121a232e 2168 *inlen = processed - instart;
pcercuei 0:03b5121a232e 2169 return(0);
pcercuei 0:03b5121a232e 2170 }
pcercuei 0:03b5121a232e 2171
pcercuei 0:03b5121a232e 2172 /************************************************************************
pcercuei 0:03b5121a232e 2173 * *
pcercuei 0:03b5121a232e 2174 * Commodity functions to handle streams *
pcercuei 0:03b5121a232e 2175 * *
pcercuei 0:03b5121a232e 2176 ************************************************************************/
pcercuei 0:03b5121a232e 2177
pcercuei 0:03b5121a232e 2178 /**
pcercuei 0:03b5121a232e 2179 * htmlNewInputStream:
pcercuei 0:03b5121a232e 2180 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2181 *
pcercuei 0:03b5121a232e 2182 * Create a new input stream structure
pcercuei 0:03b5121a232e 2183 * Returns the new input stream or NULL
pcercuei 0:03b5121a232e 2184 */
pcercuei 0:03b5121a232e 2185 static htmlParserInputPtr
pcercuei 0:03b5121a232e 2186 htmlNewInputStream(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 2187 htmlParserInputPtr input;
pcercuei 0:03b5121a232e 2188
pcercuei 0:03b5121a232e 2189 input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput));
pcercuei 0:03b5121a232e 2190 if (input == NULL) {
pcercuei 0:03b5121a232e 2191 htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
pcercuei 0:03b5121a232e 2192 return(NULL);
pcercuei 0:03b5121a232e 2193 }
pcercuei 0:03b5121a232e 2194 memset(input, 0, sizeof(htmlParserInput));
pcercuei 0:03b5121a232e 2195 input->filename = NULL;
pcercuei 0:03b5121a232e 2196 input->directory = NULL;
pcercuei 0:03b5121a232e 2197 input->base = NULL;
pcercuei 0:03b5121a232e 2198 input->cur = NULL;
pcercuei 0:03b5121a232e 2199 input->buf = NULL;
pcercuei 0:03b5121a232e 2200 input->line = 1;
pcercuei 0:03b5121a232e 2201 input->col = 1;
pcercuei 0:03b5121a232e 2202 input->buf = NULL;
pcercuei 0:03b5121a232e 2203 input->free = NULL;
pcercuei 0:03b5121a232e 2204 input->version = NULL;
pcercuei 0:03b5121a232e 2205 input->consumed = 0;
pcercuei 0:03b5121a232e 2206 input->length = 0;
pcercuei 0:03b5121a232e 2207 return(input);
pcercuei 0:03b5121a232e 2208 }
pcercuei 0:03b5121a232e 2209
pcercuei 0:03b5121a232e 2210
pcercuei 0:03b5121a232e 2211 /************************************************************************
pcercuei 0:03b5121a232e 2212 * *
pcercuei 0:03b5121a232e 2213 * Commodity functions, cleanup needed ? *
pcercuei 0:03b5121a232e 2214 * *
pcercuei 0:03b5121a232e 2215 ************************************************************************/
pcercuei 0:03b5121a232e 2216 /*
pcercuei 0:03b5121a232e 2217 * all tags allowing pc data from the html 4.01 loose dtd
pcercuei 0:03b5121a232e 2218 * NOTE: it might be more apropriate to integrate this information
pcercuei 0:03b5121a232e 2219 * into the html40ElementTable array but I don't want to risk any
pcercuei 0:03b5121a232e 2220 * binary incomptibility
pcercuei 0:03b5121a232e 2221 */
pcercuei 0:03b5121a232e 2222 static const char *allowPCData[] = {
pcercuei 0:03b5121a232e 2223 "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
pcercuei 0:03b5121a232e 2224 "blockquote", "body", "button", "caption", "center", "cite", "code",
pcercuei 0:03b5121a232e 2225 "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",
pcercuei 0:03b5121a232e 2226 "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",
pcercuei 0:03b5121a232e 2227 "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",
pcercuei 0:03b5121a232e 2228 "small", "span", "strike", "strong", "td", "th", "tt", "u", "var"
pcercuei 0:03b5121a232e 2229 };
pcercuei 0:03b5121a232e 2230
pcercuei 0:03b5121a232e 2231 /**
pcercuei 0:03b5121a232e 2232 * areBlanks:
pcercuei 0:03b5121a232e 2233 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2234 * @str: a xmlChar *
pcercuei 0:03b5121a232e 2235 * @len: the size of @str
pcercuei 0:03b5121a232e 2236 *
pcercuei 0:03b5121a232e 2237 * Is this a sequence of blank chars that one can ignore ?
pcercuei 0:03b5121a232e 2238 *
pcercuei 0:03b5121a232e 2239 * Returns 1 if ignorable 0 otherwise.
pcercuei 0:03b5121a232e 2240 */
pcercuei 0:03b5121a232e 2241
pcercuei 0:03b5121a232e 2242 static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
pcercuei 0:03b5121a232e 2243 unsigned int i;
pcercuei 0:03b5121a232e 2244 int j;
pcercuei 0:03b5121a232e 2245 xmlNodePtr lastChild;
pcercuei 0:03b5121a232e 2246 xmlDtdPtr dtd;
pcercuei 0:03b5121a232e 2247
pcercuei 0:03b5121a232e 2248 for (j = 0;j < len;j++)
pcercuei 0:03b5121a232e 2249 if (!(IS_BLANK_CH(str[j]))) return(0);
pcercuei 0:03b5121a232e 2250
pcercuei 0:03b5121a232e 2251 if (CUR == 0) return(1);
pcercuei 0:03b5121a232e 2252 if (CUR != '<') return(0);
pcercuei 0:03b5121a232e 2253 if (ctxt->name == NULL)
pcercuei 0:03b5121a232e 2254 return(1);
pcercuei 0:03b5121a232e 2255 if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
pcercuei 0:03b5121a232e 2256 return(1);
pcercuei 0:03b5121a232e 2257 if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
pcercuei 0:03b5121a232e 2258 return(1);
pcercuei 0:03b5121a232e 2259
pcercuei 0:03b5121a232e 2260 /* Only strip CDATA children of the body tag for strict HTML DTDs */
pcercuei 0:03b5121a232e 2261 if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
pcercuei 0:03b5121a232e 2262 dtd = xmlGetIntSubset(ctxt->myDoc);
pcercuei 0:03b5121a232e 2263 if (dtd != NULL && dtd->ExternalID != NULL) {
pcercuei 0:03b5121a232e 2264 if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") ||
pcercuei 0:03b5121a232e 2265 !xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN"))
pcercuei 0:03b5121a232e 2266 return(1);
pcercuei 0:03b5121a232e 2267 }
pcercuei 0:03b5121a232e 2268 }
pcercuei 0:03b5121a232e 2269
pcercuei 0:03b5121a232e 2270 if (ctxt->node == NULL) return(0);
pcercuei 0:03b5121a232e 2271 lastChild = xmlGetLastChild(ctxt->node);
pcercuei 0:03b5121a232e 2272 while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))
pcercuei 0:03b5121a232e 2273 lastChild = lastChild->prev;
pcercuei 0:03b5121a232e 2274 if (lastChild == NULL) {
pcercuei 0:03b5121a232e 2275 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
pcercuei 0:03b5121a232e 2276 (ctxt->node->content != NULL)) return(0);
pcercuei 0:03b5121a232e 2277 /* keep ws in constructs like ...<b> </b>...
pcercuei 0:03b5121a232e 2278 for all tags "b" allowing PCDATA */
pcercuei 0:03b5121a232e 2279 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
pcercuei 0:03b5121a232e 2280 if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
pcercuei 0:03b5121a232e 2281 return(0);
pcercuei 0:03b5121a232e 2282 }
pcercuei 0:03b5121a232e 2283 }
pcercuei 0:03b5121a232e 2284 } else if (xmlNodeIsText(lastChild)) {
pcercuei 0:03b5121a232e 2285 return(0);
pcercuei 0:03b5121a232e 2286 } else {
pcercuei 0:03b5121a232e 2287 /* keep ws in constructs like <p><b>xy</b> <i>z</i><p>
pcercuei 0:03b5121a232e 2288 for all tags "p" allowing PCDATA */
pcercuei 0:03b5121a232e 2289 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
pcercuei 0:03b5121a232e 2290 if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) {
pcercuei 0:03b5121a232e 2291 return(0);
pcercuei 0:03b5121a232e 2292 }
pcercuei 0:03b5121a232e 2293 }
pcercuei 0:03b5121a232e 2294 }
pcercuei 0:03b5121a232e 2295 return(1);
pcercuei 0:03b5121a232e 2296 }
pcercuei 0:03b5121a232e 2297
pcercuei 0:03b5121a232e 2298 /**
pcercuei 0:03b5121a232e 2299 * htmlNewDocNoDtD:
pcercuei 0:03b5121a232e 2300 * @URI: URI for the dtd, or NULL
pcercuei 0:03b5121a232e 2301 * @ExternalID: the external ID of the DTD, or NULL
pcercuei 0:03b5121a232e 2302 *
pcercuei 0:03b5121a232e 2303 * Creates a new HTML document without a DTD node if @URI and @ExternalID
pcercuei 0:03b5121a232e 2304 * are NULL
pcercuei 0:03b5121a232e 2305 *
pcercuei 0:03b5121a232e 2306 * Returns a new document, do not initialize the DTD if not provided
pcercuei 0:03b5121a232e 2307 */
pcercuei 0:03b5121a232e 2308 htmlDocPtr
pcercuei 0:03b5121a232e 2309 htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
pcercuei 0:03b5121a232e 2310 xmlDocPtr cur;
pcercuei 0:03b5121a232e 2311
pcercuei 0:03b5121a232e 2312 /*
pcercuei 0:03b5121a232e 2313 * Allocate a new document and fill the fields.
pcercuei 0:03b5121a232e 2314 */
pcercuei 0:03b5121a232e 2315 cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
pcercuei 0:03b5121a232e 2316 if (cur == NULL) {
pcercuei 0:03b5121a232e 2317 htmlErrMemory(NULL, "HTML document creation failed\n");
pcercuei 0:03b5121a232e 2318 return(NULL);
pcercuei 0:03b5121a232e 2319 }
pcercuei 0:03b5121a232e 2320 memset(cur, 0, sizeof(xmlDoc));
pcercuei 0:03b5121a232e 2321
pcercuei 0:03b5121a232e 2322 cur->type = XML_HTML_DOCUMENT_NODE;
pcercuei 0:03b5121a232e 2323 cur->version = NULL;
pcercuei 0:03b5121a232e 2324 cur->intSubset = NULL;
pcercuei 0:03b5121a232e 2325 cur->doc = cur;
pcercuei 0:03b5121a232e 2326 cur->name = NULL;
pcercuei 0:03b5121a232e 2327 cur->children = NULL;
pcercuei 0:03b5121a232e 2328 cur->extSubset = NULL;
pcercuei 0:03b5121a232e 2329 cur->oldNs = NULL;
pcercuei 0:03b5121a232e 2330 cur->encoding = NULL;
pcercuei 0:03b5121a232e 2331 cur->standalone = 1;
pcercuei 0:03b5121a232e 2332 cur->compression = 0;
pcercuei 0:03b5121a232e 2333 cur->ids = NULL;
pcercuei 0:03b5121a232e 2334 cur->refs = NULL;
pcercuei 0:03b5121a232e 2335 cur->_private = NULL;
pcercuei 0:03b5121a232e 2336 cur->charset = XML_CHAR_ENCODING_UTF8;
pcercuei 0:03b5121a232e 2337 cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT;
pcercuei 0:03b5121a232e 2338 if ((ExternalID != NULL) ||
pcercuei 0:03b5121a232e 2339 (URI != NULL))
pcercuei 0:03b5121a232e 2340 xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);
pcercuei 0:03b5121a232e 2341 return(cur);
pcercuei 0:03b5121a232e 2342 }
pcercuei 0:03b5121a232e 2343
pcercuei 0:03b5121a232e 2344 /**
pcercuei 0:03b5121a232e 2345 * htmlNewDoc:
pcercuei 0:03b5121a232e 2346 * @URI: URI for the dtd, or NULL
pcercuei 0:03b5121a232e 2347 * @ExternalID: the external ID of the DTD, or NULL
pcercuei 0:03b5121a232e 2348 *
pcercuei 0:03b5121a232e 2349 * Creates a new HTML document
pcercuei 0:03b5121a232e 2350 *
pcercuei 0:03b5121a232e 2351 * Returns a new document
pcercuei 0:03b5121a232e 2352 */
pcercuei 0:03b5121a232e 2353 htmlDocPtr
pcercuei 0:03b5121a232e 2354 htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
pcercuei 0:03b5121a232e 2355 if ((URI == NULL) && (ExternalID == NULL))
pcercuei 0:03b5121a232e 2356 return(htmlNewDocNoDtD(
pcercuei 0:03b5121a232e 2357 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd",
pcercuei 0:03b5121a232e 2358 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN"));
pcercuei 0:03b5121a232e 2359
pcercuei 0:03b5121a232e 2360 return(htmlNewDocNoDtD(URI, ExternalID));
pcercuei 0:03b5121a232e 2361 }
pcercuei 0:03b5121a232e 2362
pcercuei 0:03b5121a232e 2363
pcercuei 0:03b5121a232e 2364 /************************************************************************
pcercuei 0:03b5121a232e 2365 * *
pcercuei 0:03b5121a232e 2366 * The parser itself *
pcercuei 0:03b5121a232e 2367 * Relates to http://www.w3.org/TR/html40 *
pcercuei 0:03b5121a232e 2368 * *
pcercuei 0:03b5121a232e 2369 ************************************************************************/
pcercuei 0:03b5121a232e 2370
pcercuei 0:03b5121a232e 2371 /************************************************************************
pcercuei 0:03b5121a232e 2372 * *
pcercuei 0:03b5121a232e 2373 * The parser itself *
pcercuei 0:03b5121a232e 2374 * *
pcercuei 0:03b5121a232e 2375 ************************************************************************/
pcercuei 0:03b5121a232e 2376
pcercuei 0:03b5121a232e 2377 static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
pcercuei 0:03b5121a232e 2378
pcercuei 0:03b5121a232e 2379 /**
pcercuei 0:03b5121a232e 2380 * htmlParseHTMLName:
pcercuei 0:03b5121a232e 2381 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2382 *
pcercuei 0:03b5121a232e 2383 * parse an HTML tag or attribute name, note that we convert it to lowercase
pcercuei 0:03b5121a232e 2384 * since HTML names are not case-sensitive.
pcercuei 0:03b5121a232e 2385 *
pcercuei 0:03b5121a232e 2386 * Returns the Tag Name parsed or NULL
pcercuei 0:03b5121a232e 2387 */
pcercuei 0:03b5121a232e 2388
pcercuei 0:03b5121a232e 2389 static const xmlChar *
pcercuei 0:03b5121a232e 2390 htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 2391 int i = 0;
pcercuei 0:03b5121a232e 2392 xmlChar loc[HTML_PARSER_BUFFER_SIZE];
pcercuei 0:03b5121a232e 2393
pcercuei 0:03b5121a232e 2394 if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
pcercuei 0:03b5121a232e 2395 (CUR != ':') && (CUR != '.')) return(NULL);
pcercuei 0:03b5121a232e 2396
pcercuei 0:03b5121a232e 2397 while ((i < HTML_PARSER_BUFFER_SIZE) &&
pcercuei 0:03b5121a232e 2398 ((IS_ASCII_LETTER(CUR)) || (IS_ASCII_DIGIT(CUR)) ||
pcercuei 0:03b5121a232e 2399 (CUR == ':') || (CUR == '-') || (CUR == '_') ||
pcercuei 0:03b5121a232e 2400 (CUR == '.'))) {
pcercuei 0:03b5121a232e 2401 if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
pcercuei 0:03b5121a232e 2402 else loc[i] = CUR;
pcercuei 0:03b5121a232e 2403 i++;
pcercuei 0:03b5121a232e 2404
pcercuei 0:03b5121a232e 2405 NEXT;
pcercuei 0:03b5121a232e 2406 }
pcercuei 0:03b5121a232e 2407
pcercuei 0:03b5121a232e 2408 return(xmlDictLookup(ctxt->dict, loc, i));
pcercuei 0:03b5121a232e 2409 }
pcercuei 0:03b5121a232e 2410
pcercuei 0:03b5121a232e 2411
pcercuei 0:03b5121a232e 2412 /**
pcercuei 0:03b5121a232e 2413 * htmlParseHTMLName_nonInvasive:
pcercuei 0:03b5121a232e 2414 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2415 *
pcercuei 0:03b5121a232e 2416 * parse an HTML tag or attribute name, note that we convert it to lowercase
pcercuei 0:03b5121a232e 2417 * since HTML names are not case-sensitive, this doesn't consume the data
pcercuei 0:03b5121a232e 2418 * from the stream, it's a look-ahead
pcercuei 0:03b5121a232e 2419 *
pcercuei 0:03b5121a232e 2420 * Returns the Tag Name parsed or NULL
pcercuei 0:03b5121a232e 2421 */
pcercuei 0:03b5121a232e 2422
pcercuei 0:03b5121a232e 2423 static const xmlChar *
pcercuei 0:03b5121a232e 2424 htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 2425 int i = 0;
pcercuei 0:03b5121a232e 2426 xmlChar loc[HTML_PARSER_BUFFER_SIZE];
pcercuei 0:03b5121a232e 2427
pcercuei 0:03b5121a232e 2428 if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') &&
pcercuei 0:03b5121a232e 2429 (NXT(1) != ':')) return(NULL);
pcercuei 0:03b5121a232e 2430
pcercuei 0:03b5121a232e 2431 while ((i < HTML_PARSER_BUFFER_SIZE) &&
pcercuei 0:03b5121a232e 2432 ((IS_ASCII_LETTER(NXT(1+i))) || (IS_ASCII_DIGIT(NXT(1+i))) ||
pcercuei 0:03b5121a232e 2433 (NXT(1+i) == ':') || (NXT(1+i) == '-') || (NXT(1+i) == '_'))) {
pcercuei 0:03b5121a232e 2434 if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20;
pcercuei 0:03b5121a232e 2435 else loc[i] = NXT(1+i);
pcercuei 0:03b5121a232e 2436 i++;
pcercuei 0:03b5121a232e 2437 }
pcercuei 0:03b5121a232e 2438
pcercuei 0:03b5121a232e 2439 return(xmlDictLookup(ctxt->dict, loc, i));
pcercuei 0:03b5121a232e 2440 }
pcercuei 0:03b5121a232e 2441
pcercuei 0:03b5121a232e 2442
pcercuei 0:03b5121a232e 2443 /**
pcercuei 0:03b5121a232e 2444 * htmlParseName:
pcercuei 0:03b5121a232e 2445 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2446 *
pcercuei 0:03b5121a232e 2447 * parse an HTML name, this routine is case sensitive.
pcercuei 0:03b5121a232e 2448 *
pcercuei 0:03b5121a232e 2449 * Returns the Name parsed or NULL
pcercuei 0:03b5121a232e 2450 */
pcercuei 0:03b5121a232e 2451
pcercuei 0:03b5121a232e 2452 static const xmlChar *
pcercuei 0:03b5121a232e 2453 htmlParseName(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 2454 const xmlChar *in;
pcercuei 0:03b5121a232e 2455 const xmlChar *ret;
pcercuei 0:03b5121a232e 2456 int count = 0;
pcercuei 0:03b5121a232e 2457
pcercuei 0:03b5121a232e 2458 GROW;
pcercuei 0:03b5121a232e 2459
pcercuei 0:03b5121a232e 2460 /*
pcercuei 0:03b5121a232e 2461 * Accelerator for simple ASCII names
pcercuei 0:03b5121a232e 2462 */
pcercuei 0:03b5121a232e 2463 in = ctxt->input->cur;
pcercuei 0:03b5121a232e 2464 if (((*in >= 0x61) && (*in <= 0x7A)) ||
pcercuei 0:03b5121a232e 2465 ((*in >= 0x41) && (*in <= 0x5A)) ||
pcercuei 0:03b5121a232e 2466 (*in == '_') || (*in == ':')) {
pcercuei 0:03b5121a232e 2467 in++;
pcercuei 0:03b5121a232e 2468 while (((*in >= 0x61) && (*in <= 0x7A)) ||
pcercuei 0:03b5121a232e 2469 ((*in >= 0x41) && (*in <= 0x5A)) ||
pcercuei 0:03b5121a232e 2470 ((*in >= 0x30) && (*in <= 0x39)) ||
pcercuei 0:03b5121a232e 2471 (*in == '_') || (*in == '-') ||
pcercuei 0:03b5121a232e 2472 (*in == ':') || (*in == '.'))
pcercuei 0:03b5121a232e 2473 in++;
pcercuei 0:03b5121a232e 2474 if ((*in > 0) && (*in < 0x80)) {
pcercuei 0:03b5121a232e 2475 count = in - ctxt->input->cur;
pcercuei 0:03b5121a232e 2476 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
pcercuei 0:03b5121a232e 2477 ctxt->input->cur = in;
pcercuei 0:03b5121a232e 2478 ctxt->nbChars += count;
pcercuei 0:03b5121a232e 2479 ctxt->input->col += count;
pcercuei 0:03b5121a232e 2480 return(ret);
pcercuei 0:03b5121a232e 2481 }
pcercuei 0:03b5121a232e 2482 }
pcercuei 0:03b5121a232e 2483 return(htmlParseNameComplex(ctxt));
pcercuei 0:03b5121a232e 2484 }
pcercuei 0:03b5121a232e 2485
pcercuei 0:03b5121a232e 2486 static const xmlChar *
pcercuei 0:03b5121a232e 2487 htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 2488 int len = 0, l;
pcercuei 0:03b5121a232e 2489 int c;
pcercuei 0:03b5121a232e 2490 int count = 0;
pcercuei 0:03b5121a232e 2491
pcercuei 0:03b5121a232e 2492 /*
pcercuei 0:03b5121a232e 2493 * Handler for more complex cases
pcercuei 0:03b5121a232e 2494 */
pcercuei 0:03b5121a232e 2495 GROW;
pcercuei 0:03b5121a232e 2496 c = CUR_CHAR(l);
pcercuei 0:03b5121a232e 2497 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
pcercuei 0:03b5121a232e 2498 (!IS_LETTER(c) && (c != '_') &&
pcercuei 0:03b5121a232e 2499 (c != ':'))) {
pcercuei 0:03b5121a232e 2500 return(NULL);
pcercuei 0:03b5121a232e 2501 }
pcercuei 0:03b5121a232e 2502
pcercuei 0:03b5121a232e 2503 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
pcercuei 0:03b5121a232e 2504 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
pcercuei 0:03b5121a232e 2505 (c == '.') || (c == '-') ||
pcercuei 0:03b5121a232e 2506 (c == '_') || (c == ':') ||
pcercuei 0:03b5121a232e 2507 (IS_COMBINING(c)) ||
pcercuei 0:03b5121a232e 2508 (IS_EXTENDER(c)))) {
pcercuei 0:03b5121a232e 2509 if (count++ > 100) {
pcercuei 0:03b5121a232e 2510 count = 0;
pcercuei 0:03b5121a232e 2511 GROW;
pcercuei 0:03b5121a232e 2512 }
pcercuei 0:03b5121a232e 2513 len += l;
pcercuei 0:03b5121a232e 2514 NEXTL(l);
pcercuei 0:03b5121a232e 2515 c = CUR_CHAR(l);
pcercuei 0:03b5121a232e 2516 }
pcercuei 0:03b5121a232e 2517 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
pcercuei 0:03b5121a232e 2518 }
pcercuei 0:03b5121a232e 2519
pcercuei 0:03b5121a232e 2520
pcercuei 0:03b5121a232e 2521 /**
pcercuei 0:03b5121a232e 2522 * htmlParseHTMLAttribute:
pcercuei 0:03b5121a232e 2523 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2524 * @stop: a char stop value
pcercuei 0:03b5121a232e 2525 *
pcercuei 0:03b5121a232e 2526 * parse an HTML attribute value till the stop (quote), if
pcercuei 0:03b5121a232e 2527 * stop is 0 then it stops at the first space
pcercuei 0:03b5121a232e 2528 *
pcercuei 0:03b5121a232e 2529 * Returns the attribute parsed or NULL
pcercuei 0:03b5121a232e 2530 */
pcercuei 0:03b5121a232e 2531
pcercuei 0:03b5121a232e 2532 static xmlChar *
pcercuei 0:03b5121a232e 2533 htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
pcercuei 0:03b5121a232e 2534 xmlChar *buffer = NULL;
pcercuei 0:03b5121a232e 2535 int buffer_size = 0;
pcercuei 0:03b5121a232e 2536 xmlChar *out = NULL;
pcercuei 0:03b5121a232e 2537 const xmlChar *name = NULL;
pcercuei 0:03b5121a232e 2538 const xmlChar *cur = NULL;
pcercuei 0:03b5121a232e 2539 const htmlEntityDesc * ent;
pcercuei 0:03b5121a232e 2540
pcercuei 0:03b5121a232e 2541 /*
pcercuei 0:03b5121a232e 2542 * allocate a translation buffer.
pcercuei 0:03b5121a232e 2543 */
pcercuei 0:03b5121a232e 2544 buffer_size = HTML_PARSER_BUFFER_SIZE;
pcercuei 0:03b5121a232e 2545 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
pcercuei 0:03b5121a232e 2546 if (buffer == NULL) {
pcercuei 0:03b5121a232e 2547 htmlErrMemory(ctxt, "buffer allocation failed\n");
pcercuei 0:03b5121a232e 2548 return(NULL);
pcercuei 0:03b5121a232e 2549 }
pcercuei 0:03b5121a232e 2550 out = buffer;
pcercuei 0:03b5121a232e 2551
pcercuei 0:03b5121a232e 2552 /*
pcercuei 0:03b5121a232e 2553 * Ok loop until we reach one of the ending chars
pcercuei 0:03b5121a232e 2554 */
pcercuei 0:03b5121a232e 2555 while ((CUR != 0) && (CUR != stop)) {
pcercuei 0:03b5121a232e 2556 if ((stop == 0) && (CUR == '>')) break;
pcercuei 0:03b5121a232e 2557 if ((stop == 0) && (IS_BLANK_CH(CUR))) break;
pcercuei 0:03b5121a232e 2558 if (CUR == '&') {
pcercuei 0:03b5121a232e 2559 if (NXT(1) == '#') {
pcercuei 0:03b5121a232e 2560 unsigned int c;
pcercuei 0:03b5121a232e 2561 int bits;
pcercuei 0:03b5121a232e 2562
pcercuei 0:03b5121a232e 2563 c = htmlParseCharRef(ctxt);
pcercuei 0:03b5121a232e 2564 if (c < 0x80)
pcercuei 0:03b5121a232e 2565 { *out++ = c; bits= -6; }
pcercuei 0:03b5121a232e 2566 else if (c < 0x800)
pcercuei 0:03b5121a232e 2567 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
pcercuei 0:03b5121a232e 2568 else if (c < 0x10000)
pcercuei 0:03b5121a232e 2569 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
pcercuei 0:03b5121a232e 2570 else
pcercuei 0:03b5121a232e 2571 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
pcercuei 0:03b5121a232e 2572
pcercuei 0:03b5121a232e 2573 for ( ; bits >= 0; bits-= 6) {
pcercuei 0:03b5121a232e 2574 *out++ = ((c >> bits) & 0x3F) | 0x80;
pcercuei 0:03b5121a232e 2575 }
pcercuei 0:03b5121a232e 2576
pcercuei 0:03b5121a232e 2577 if (out - buffer > buffer_size - 100) {
pcercuei 0:03b5121a232e 2578 int indx = out - buffer;
pcercuei 0:03b5121a232e 2579
pcercuei 0:03b5121a232e 2580 growBuffer(buffer);
pcercuei 0:03b5121a232e 2581 out = &buffer[indx];
pcercuei 0:03b5121a232e 2582 }
pcercuei 0:03b5121a232e 2583 } else {
pcercuei 0:03b5121a232e 2584 ent = htmlParseEntityRef(ctxt, &name);
pcercuei 0:03b5121a232e 2585 if (name == NULL) {
pcercuei 0:03b5121a232e 2586 *out++ = '&';
pcercuei 0:03b5121a232e 2587 if (out - buffer > buffer_size - 100) {
pcercuei 0:03b5121a232e 2588 int indx = out - buffer;
pcercuei 0:03b5121a232e 2589
pcercuei 0:03b5121a232e 2590 growBuffer(buffer);
pcercuei 0:03b5121a232e 2591 out = &buffer[indx];
pcercuei 0:03b5121a232e 2592 }
pcercuei 0:03b5121a232e 2593 } else if (ent == NULL) {
pcercuei 0:03b5121a232e 2594 *out++ = '&';
pcercuei 0:03b5121a232e 2595 cur = name;
pcercuei 0:03b5121a232e 2596 while (*cur != 0) {
pcercuei 0:03b5121a232e 2597 if (out - buffer > buffer_size - 100) {
pcercuei 0:03b5121a232e 2598 int indx = out - buffer;
pcercuei 0:03b5121a232e 2599
pcercuei 0:03b5121a232e 2600 growBuffer(buffer);
pcercuei 0:03b5121a232e 2601 out = &buffer[indx];
pcercuei 0:03b5121a232e 2602 }
pcercuei 0:03b5121a232e 2603 *out++ = *cur++;
pcercuei 0:03b5121a232e 2604 }
pcercuei 0:03b5121a232e 2605 } else {
pcercuei 0:03b5121a232e 2606 unsigned int c;
pcercuei 0:03b5121a232e 2607 int bits;
pcercuei 0:03b5121a232e 2608
pcercuei 0:03b5121a232e 2609 if (out - buffer > buffer_size - 100) {
pcercuei 0:03b5121a232e 2610 int indx = out - buffer;
pcercuei 0:03b5121a232e 2611
pcercuei 0:03b5121a232e 2612 growBuffer(buffer);
pcercuei 0:03b5121a232e 2613 out = &buffer[indx];
pcercuei 0:03b5121a232e 2614 }
pcercuei 0:03b5121a232e 2615 c = ent->value;
pcercuei 0:03b5121a232e 2616 if (c < 0x80)
pcercuei 0:03b5121a232e 2617 { *out++ = c; bits= -6; }
pcercuei 0:03b5121a232e 2618 else if (c < 0x800)
pcercuei 0:03b5121a232e 2619 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
pcercuei 0:03b5121a232e 2620 else if (c < 0x10000)
pcercuei 0:03b5121a232e 2621 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
pcercuei 0:03b5121a232e 2622 else
pcercuei 0:03b5121a232e 2623 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
pcercuei 0:03b5121a232e 2624
pcercuei 0:03b5121a232e 2625 for ( ; bits >= 0; bits-= 6) {
pcercuei 0:03b5121a232e 2626 *out++ = ((c >> bits) & 0x3F) | 0x80;
pcercuei 0:03b5121a232e 2627 }
pcercuei 0:03b5121a232e 2628 }
pcercuei 0:03b5121a232e 2629 }
pcercuei 0:03b5121a232e 2630 } else {
pcercuei 0:03b5121a232e 2631 unsigned int c;
pcercuei 0:03b5121a232e 2632 int bits, l;
pcercuei 0:03b5121a232e 2633
pcercuei 0:03b5121a232e 2634 if (out - buffer > buffer_size - 100) {
pcercuei 0:03b5121a232e 2635 int indx = out - buffer;
pcercuei 0:03b5121a232e 2636
pcercuei 0:03b5121a232e 2637 growBuffer(buffer);
pcercuei 0:03b5121a232e 2638 out = &buffer[indx];
pcercuei 0:03b5121a232e 2639 }
pcercuei 0:03b5121a232e 2640 c = CUR_CHAR(l);
pcercuei 0:03b5121a232e 2641 if (c < 0x80)
pcercuei 0:03b5121a232e 2642 { *out++ = c; bits= -6; }
pcercuei 0:03b5121a232e 2643 else if (c < 0x800)
pcercuei 0:03b5121a232e 2644 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
pcercuei 0:03b5121a232e 2645 else if (c < 0x10000)
pcercuei 0:03b5121a232e 2646 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
pcercuei 0:03b5121a232e 2647 else
pcercuei 0:03b5121a232e 2648 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
pcercuei 0:03b5121a232e 2649
pcercuei 0:03b5121a232e 2650 for ( ; bits >= 0; bits-= 6) {
pcercuei 0:03b5121a232e 2651 *out++ = ((c >> bits) & 0x3F) | 0x80;
pcercuei 0:03b5121a232e 2652 }
pcercuei 0:03b5121a232e 2653 NEXT;
pcercuei 0:03b5121a232e 2654 }
pcercuei 0:03b5121a232e 2655 }
pcercuei 0:03b5121a232e 2656 *out = 0;
pcercuei 0:03b5121a232e 2657 return(buffer);
pcercuei 0:03b5121a232e 2658 }
pcercuei 0:03b5121a232e 2659
pcercuei 0:03b5121a232e 2660 /**
pcercuei 0:03b5121a232e 2661 * htmlParseEntityRef:
pcercuei 0:03b5121a232e 2662 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2663 * @str: location to store the entity name
pcercuei 0:03b5121a232e 2664 *
pcercuei 0:03b5121a232e 2665 * parse an HTML ENTITY references
pcercuei 0:03b5121a232e 2666 *
pcercuei 0:03b5121a232e 2667 * [68] EntityRef ::= '&' Name ';'
pcercuei 0:03b5121a232e 2668 *
pcercuei 0:03b5121a232e 2669 * Returns the associated htmlEntityDescPtr if found, or NULL otherwise,
pcercuei 0:03b5121a232e 2670 * if non-NULL *str will have to be freed by the caller.
pcercuei 0:03b5121a232e 2671 */
pcercuei 0:03b5121a232e 2672 const htmlEntityDesc *
pcercuei 0:03b5121a232e 2673 htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
pcercuei 0:03b5121a232e 2674 const xmlChar *name;
pcercuei 0:03b5121a232e 2675 const htmlEntityDesc * ent = NULL;
pcercuei 0:03b5121a232e 2676
pcercuei 0:03b5121a232e 2677 if (str != NULL) *str = NULL;
pcercuei 0:03b5121a232e 2678 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
pcercuei 0:03b5121a232e 2679
pcercuei 0:03b5121a232e 2680 if (CUR == '&') {
pcercuei 0:03b5121a232e 2681 NEXT;
pcercuei 0:03b5121a232e 2682 name = htmlParseName(ctxt);
pcercuei 0:03b5121a232e 2683 if (name == NULL) {
pcercuei 0:03b5121a232e 2684 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
pcercuei 0:03b5121a232e 2685 "htmlParseEntityRef: no name\n", NULL, NULL);
pcercuei 0:03b5121a232e 2686 } else {
pcercuei 0:03b5121a232e 2687 GROW;
pcercuei 0:03b5121a232e 2688 if (CUR == ';') {
pcercuei 0:03b5121a232e 2689 if (str != NULL)
pcercuei 0:03b5121a232e 2690 *str = name;
pcercuei 0:03b5121a232e 2691
pcercuei 0:03b5121a232e 2692 /*
pcercuei 0:03b5121a232e 2693 * Lookup the entity in the table.
pcercuei 0:03b5121a232e 2694 */
pcercuei 0:03b5121a232e 2695 ent = htmlEntityLookup(name);
pcercuei 0:03b5121a232e 2696 if (ent != NULL) /* OK that's ugly !!! */
pcercuei 0:03b5121a232e 2697 NEXT;
pcercuei 0:03b5121a232e 2698 } else {
pcercuei 0:03b5121a232e 2699 htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
pcercuei 0:03b5121a232e 2700 "htmlParseEntityRef: expecting ';'\n",
pcercuei 0:03b5121a232e 2701 NULL, NULL);
pcercuei 0:03b5121a232e 2702 if (str != NULL)
pcercuei 0:03b5121a232e 2703 *str = name;
pcercuei 0:03b5121a232e 2704 }
pcercuei 0:03b5121a232e 2705 }
pcercuei 0:03b5121a232e 2706 }
pcercuei 0:03b5121a232e 2707 return(ent);
pcercuei 0:03b5121a232e 2708 }
pcercuei 0:03b5121a232e 2709
pcercuei 0:03b5121a232e 2710 /**
pcercuei 0:03b5121a232e 2711 * htmlParseAttValue:
pcercuei 0:03b5121a232e 2712 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2713 *
pcercuei 0:03b5121a232e 2714 * parse a value for an attribute
pcercuei 0:03b5121a232e 2715 * Note: the parser won't do substitution of entities here, this
pcercuei 0:03b5121a232e 2716 * will be handled later in xmlStringGetNodeList, unless it was
pcercuei 0:03b5121a232e 2717 * asked for ctxt->replaceEntities != 0
pcercuei 0:03b5121a232e 2718 *
pcercuei 0:03b5121a232e 2719 * Returns the AttValue parsed or NULL.
pcercuei 0:03b5121a232e 2720 */
pcercuei 0:03b5121a232e 2721
pcercuei 0:03b5121a232e 2722 static xmlChar *
pcercuei 0:03b5121a232e 2723 htmlParseAttValue(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 2724 xmlChar *ret = NULL;
pcercuei 0:03b5121a232e 2725
pcercuei 0:03b5121a232e 2726 if (CUR == '"') {
pcercuei 0:03b5121a232e 2727 NEXT;
pcercuei 0:03b5121a232e 2728 ret = htmlParseHTMLAttribute(ctxt, '"');
pcercuei 0:03b5121a232e 2729 if (CUR != '"') {
pcercuei 0:03b5121a232e 2730 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
pcercuei 0:03b5121a232e 2731 "AttValue: \" expected\n", NULL, NULL);
pcercuei 0:03b5121a232e 2732 } else
pcercuei 0:03b5121a232e 2733 NEXT;
pcercuei 0:03b5121a232e 2734 } else if (CUR == '\'') {
pcercuei 0:03b5121a232e 2735 NEXT;
pcercuei 0:03b5121a232e 2736 ret = htmlParseHTMLAttribute(ctxt, '\'');
pcercuei 0:03b5121a232e 2737 if (CUR != '\'') {
pcercuei 0:03b5121a232e 2738 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
pcercuei 0:03b5121a232e 2739 "AttValue: ' expected\n", NULL, NULL);
pcercuei 0:03b5121a232e 2740 } else
pcercuei 0:03b5121a232e 2741 NEXT;
pcercuei 0:03b5121a232e 2742 } else {
pcercuei 0:03b5121a232e 2743 /*
pcercuei 0:03b5121a232e 2744 * That's an HTMLism, the attribute value may not be quoted
pcercuei 0:03b5121a232e 2745 */
pcercuei 0:03b5121a232e 2746 ret = htmlParseHTMLAttribute(ctxt, 0);
pcercuei 0:03b5121a232e 2747 if (ret == NULL) {
pcercuei 0:03b5121a232e 2748 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
pcercuei 0:03b5121a232e 2749 "AttValue: no value found\n", NULL, NULL);
pcercuei 0:03b5121a232e 2750 }
pcercuei 0:03b5121a232e 2751 }
pcercuei 0:03b5121a232e 2752 return(ret);
pcercuei 0:03b5121a232e 2753 }
pcercuei 0:03b5121a232e 2754
pcercuei 0:03b5121a232e 2755 /**
pcercuei 0:03b5121a232e 2756 * htmlParseSystemLiteral:
pcercuei 0:03b5121a232e 2757 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2758 *
pcercuei 0:03b5121a232e 2759 * parse an HTML Literal
pcercuei 0:03b5121a232e 2760 *
pcercuei 0:03b5121a232e 2761 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
pcercuei 0:03b5121a232e 2762 *
pcercuei 0:03b5121a232e 2763 * Returns the SystemLiteral parsed or NULL
pcercuei 0:03b5121a232e 2764 */
pcercuei 0:03b5121a232e 2765
pcercuei 0:03b5121a232e 2766 static xmlChar *
pcercuei 0:03b5121a232e 2767 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 2768 const xmlChar *q;
pcercuei 0:03b5121a232e 2769 xmlChar *ret = NULL;
pcercuei 0:03b5121a232e 2770
pcercuei 0:03b5121a232e 2771 if (CUR == '"') {
pcercuei 0:03b5121a232e 2772 NEXT;
pcercuei 0:03b5121a232e 2773 q = CUR_PTR;
pcercuei 0:03b5121a232e 2774 while ((IS_CHAR_CH(CUR)) && (CUR != '"'))
pcercuei 0:03b5121a232e 2775 NEXT;
pcercuei 0:03b5121a232e 2776 if (!IS_CHAR_CH(CUR)) {
pcercuei 0:03b5121a232e 2777 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
pcercuei 0:03b5121a232e 2778 "Unfinished SystemLiteral\n", NULL, NULL);
pcercuei 0:03b5121a232e 2779 } else {
pcercuei 0:03b5121a232e 2780 ret = xmlStrndup(q, CUR_PTR - q);
pcercuei 0:03b5121a232e 2781 NEXT;
pcercuei 0:03b5121a232e 2782 }
pcercuei 0:03b5121a232e 2783 } else if (CUR == '\'') {
pcercuei 0:03b5121a232e 2784 NEXT;
pcercuei 0:03b5121a232e 2785 q = CUR_PTR;
pcercuei 0:03b5121a232e 2786 while ((IS_CHAR_CH(CUR)) && (CUR != '\''))
pcercuei 0:03b5121a232e 2787 NEXT;
pcercuei 0:03b5121a232e 2788 if (!IS_CHAR_CH(CUR)) {
pcercuei 0:03b5121a232e 2789 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
pcercuei 0:03b5121a232e 2790 "Unfinished SystemLiteral\n", NULL, NULL);
pcercuei 0:03b5121a232e 2791 } else {
pcercuei 0:03b5121a232e 2792 ret = xmlStrndup(q, CUR_PTR - q);
pcercuei 0:03b5121a232e 2793 NEXT;
pcercuei 0:03b5121a232e 2794 }
pcercuei 0:03b5121a232e 2795 } else {
pcercuei 0:03b5121a232e 2796 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
pcercuei 0:03b5121a232e 2797 " or ' expected\n", NULL, NULL);
pcercuei 0:03b5121a232e 2798 }
pcercuei 0:03b5121a232e 2799
pcercuei 0:03b5121a232e 2800 return(ret);
pcercuei 0:03b5121a232e 2801 }
pcercuei 0:03b5121a232e 2802
pcercuei 0:03b5121a232e 2803 /**
pcercuei 0:03b5121a232e 2804 * htmlParsePubidLiteral:
pcercuei 0:03b5121a232e 2805 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2806 *
pcercuei 0:03b5121a232e 2807 * parse an HTML public literal
pcercuei 0:03b5121a232e 2808 *
pcercuei 0:03b5121a232e 2809 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
pcercuei 0:03b5121a232e 2810 *
pcercuei 0:03b5121a232e 2811 * Returns the PubidLiteral parsed or NULL.
pcercuei 0:03b5121a232e 2812 */
pcercuei 0:03b5121a232e 2813
pcercuei 0:03b5121a232e 2814 static xmlChar *
pcercuei 0:03b5121a232e 2815 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 2816 const xmlChar *q;
pcercuei 0:03b5121a232e 2817 xmlChar *ret = NULL;
pcercuei 0:03b5121a232e 2818 /*
pcercuei 0:03b5121a232e 2819 * Name ::= (Letter | '_') (NameChar)*
pcercuei 0:03b5121a232e 2820 */
pcercuei 0:03b5121a232e 2821 if (CUR == '"') {
pcercuei 0:03b5121a232e 2822 NEXT;
pcercuei 0:03b5121a232e 2823 q = CUR_PTR;
pcercuei 0:03b5121a232e 2824 while (IS_PUBIDCHAR_CH(CUR)) NEXT;
pcercuei 0:03b5121a232e 2825 if (CUR != '"') {
pcercuei 0:03b5121a232e 2826 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
pcercuei 0:03b5121a232e 2827 "Unfinished PubidLiteral\n", NULL, NULL);
pcercuei 0:03b5121a232e 2828 } else {
pcercuei 0:03b5121a232e 2829 ret = xmlStrndup(q, CUR_PTR - q);
pcercuei 0:03b5121a232e 2830 NEXT;
pcercuei 0:03b5121a232e 2831 }
pcercuei 0:03b5121a232e 2832 } else if (CUR == '\'') {
pcercuei 0:03b5121a232e 2833 NEXT;
pcercuei 0:03b5121a232e 2834 q = CUR_PTR;
pcercuei 0:03b5121a232e 2835 while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\''))
pcercuei 0:03b5121a232e 2836 NEXT;
pcercuei 0:03b5121a232e 2837 if (CUR != '\'') {
pcercuei 0:03b5121a232e 2838 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
pcercuei 0:03b5121a232e 2839 "Unfinished PubidLiteral\n", NULL, NULL);
pcercuei 0:03b5121a232e 2840 } else {
pcercuei 0:03b5121a232e 2841 ret = xmlStrndup(q, CUR_PTR - q);
pcercuei 0:03b5121a232e 2842 NEXT;
pcercuei 0:03b5121a232e 2843 }
pcercuei 0:03b5121a232e 2844 } else {
pcercuei 0:03b5121a232e 2845 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
pcercuei 0:03b5121a232e 2846 "PubidLiteral \" or ' expected\n", NULL, NULL);
pcercuei 0:03b5121a232e 2847 }
pcercuei 0:03b5121a232e 2848
pcercuei 0:03b5121a232e 2849 return(ret);
pcercuei 0:03b5121a232e 2850 }
pcercuei 0:03b5121a232e 2851
pcercuei 0:03b5121a232e 2852 /**
pcercuei 0:03b5121a232e 2853 * htmlParseScript:
pcercuei 0:03b5121a232e 2854 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2855 *
pcercuei 0:03b5121a232e 2856 * parse the content of an HTML SCRIPT or STYLE element
pcercuei 0:03b5121a232e 2857 * http://www.w3.org/TR/html4/sgml/dtd.html#Script
pcercuei 0:03b5121a232e 2858 * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet
pcercuei 0:03b5121a232e 2859 * http://www.w3.org/TR/html4/types.html#type-script
pcercuei 0:03b5121a232e 2860 * http://www.w3.org/TR/html4/types.html#h-6.15
pcercuei 0:03b5121a232e 2861 * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1
pcercuei 0:03b5121a232e 2862 *
pcercuei 0:03b5121a232e 2863 * Script data ( %Script; in the DTD) can be the content of the SCRIPT
pcercuei 0:03b5121a232e 2864 * element and the value of intrinsic event attributes. User agents must
pcercuei 0:03b5121a232e 2865 * not evaluate script data as HTML markup but instead must pass it on as
pcercuei 0:03b5121a232e 2866 * data to a script engine.
pcercuei 0:03b5121a232e 2867 * NOTES:
pcercuei 0:03b5121a232e 2868 * - The content is passed like CDATA
pcercuei 0:03b5121a232e 2869 * - the attributes for style and scripting "onXXX" are also described
pcercuei 0:03b5121a232e 2870 * as CDATA but SGML allows entities references in attributes so their
pcercuei 0:03b5121a232e 2871 * processing is identical as other attributes
pcercuei 0:03b5121a232e 2872 */
pcercuei 0:03b5121a232e 2873 static void
pcercuei 0:03b5121a232e 2874 htmlParseScript(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 2875 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
pcercuei 0:03b5121a232e 2876 int nbchar = 0;
pcercuei 0:03b5121a232e 2877 int cur,l;
pcercuei 0:03b5121a232e 2878
pcercuei 0:03b5121a232e 2879 SHRINK;
pcercuei 0:03b5121a232e 2880 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 2881 while (IS_CHAR_CH(cur)) {
pcercuei 0:03b5121a232e 2882 if ((cur == '<') && (NXT(1) == '/')) {
pcercuei 0:03b5121a232e 2883 /*
pcercuei 0:03b5121a232e 2884 * One should break here, the specification is clear:
pcercuei 0:03b5121a232e 2885 * Authors should therefore escape "</" within the content.
pcercuei 0:03b5121a232e 2886 * Escape mechanisms are specific to each scripting or
pcercuei 0:03b5121a232e 2887 * style sheet language.
pcercuei 0:03b5121a232e 2888 *
pcercuei 0:03b5121a232e 2889 * In recovery mode, only break if end tag match the
pcercuei 0:03b5121a232e 2890 * current tag, effectively ignoring all tags inside the
pcercuei 0:03b5121a232e 2891 * script/style block and treating the entire block as
pcercuei 0:03b5121a232e 2892 * CDATA.
pcercuei 0:03b5121a232e 2893 */
pcercuei 0:03b5121a232e 2894 if (ctxt->recovery) {
pcercuei 0:03b5121a232e 2895 if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,
pcercuei 0:03b5121a232e 2896 xmlStrlen(ctxt->name)) == 0)
pcercuei 0:03b5121a232e 2897 {
pcercuei 0:03b5121a232e 2898 break; /* while */
pcercuei 0:03b5121a232e 2899 } else {
pcercuei 0:03b5121a232e 2900 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
pcercuei 0:03b5121a232e 2901 "Element %s embeds close tag\n",
pcercuei 0:03b5121a232e 2902 ctxt->name, NULL);
pcercuei 0:03b5121a232e 2903 }
pcercuei 0:03b5121a232e 2904 } else {
pcercuei 0:03b5121a232e 2905 if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) ||
pcercuei 0:03b5121a232e 2906 ((NXT(2) >= 'a') && (NXT(2) <= 'z')))
pcercuei 0:03b5121a232e 2907 {
pcercuei 0:03b5121a232e 2908 break; /* while */
pcercuei 0:03b5121a232e 2909 }
pcercuei 0:03b5121a232e 2910 }
pcercuei 0:03b5121a232e 2911 }
pcercuei 0:03b5121a232e 2912 COPY_BUF(l,buf,nbchar,cur);
pcercuei 0:03b5121a232e 2913 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
pcercuei 0:03b5121a232e 2914 if (ctxt->sax->cdataBlock!= NULL) {
pcercuei 0:03b5121a232e 2915 /*
pcercuei 0:03b5121a232e 2916 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
pcercuei 0:03b5121a232e 2917 */
pcercuei 0:03b5121a232e 2918 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
pcercuei 0:03b5121a232e 2919 } else if (ctxt->sax->characters != NULL) {
pcercuei 0:03b5121a232e 2920 ctxt->sax->characters(ctxt->userData, buf, nbchar);
pcercuei 0:03b5121a232e 2921 }
pcercuei 0:03b5121a232e 2922 nbchar = 0;
pcercuei 0:03b5121a232e 2923 }
pcercuei 0:03b5121a232e 2924 GROW;
pcercuei 0:03b5121a232e 2925 NEXTL(l);
pcercuei 0:03b5121a232e 2926 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 2927 }
pcercuei 0:03b5121a232e 2928
pcercuei 0:03b5121a232e 2929 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
pcercuei 0:03b5121a232e 2930 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
pcercuei 0:03b5121a232e 2931 "Invalid char in CDATA 0x%X\n", cur);
pcercuei 0:03b5121a232e 2932 if (ctxt->input->cur < ctxt->input->end) {
pcercuei 0:03b5121a232e 2933 NEXT;
pcercuei 0:03b5121a232e 2934 }
pcercuei 0:03b5121a232e 2935 }
pcercuei 0:03b5121a232e 2936
pcercuei 0:03b5121a232e 2937 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
pcercuei 0:03b5121a232e 2938 if (ctxt->sax->cdataBlock!= NULL) {
pcercuei 0:03b5121a232e 2939 /*
pcercuei 0:03b5121a232e 2940 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
pcercuei 0:03b5121a232e 2941 */
pcercuei 0:03b5121a232e 2942 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
pcercuei 0:03b5121a232e 2943 } else if (ctxt->sax->characters != NULL) {
pcercuei 0:03b5121a232e 2944 ctxt->sax->characters(ctxt->userData, buf, nbchar);
pcercuei 0:03b5121a232e 2945 }
pcercuei 0:03b5121a232e 2946 }
pcercuei 0:03b5121a232e 2947 }
pcercuei 0:03b5121a232e 2948
pcercuei 0:03b5121a232e 2949
pcercuei 0:03b5121a232e 2950 /**
pcercuei 0:03b5121a232e 2951 * htmlParseCharDataInternal:
pcercuei 0:03b5121a232e 2952 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 2953 * @readahead: optional read ahead character in ascii range
pcercuei 0:03b5121a232e 2954 *
pcercuei 0:03b5121a232e 2955 * parse a CharData section.
pcercuei 0:03b5121a232e 2956 * if we are within a CDATA section ']]>' marks an end of section.
pcercuei 0:03b5121a232e 2957 *
pcercuei 0:03b5121a232e 2958 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
pcercuei 0:03b5121a232e 2959 */
pcercuei 0:03b5121a232e 2960
pcercuei 0:03b5121a232e 2961 static void
pcercuei 0:03b5121a232e 2962 htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) {
pcercuei 0:03b5121a232e 2963 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 6];
pcercuei 0:03b5121a232e 2964 int nbchar = 0;
pcercuei 0:03b5121a232e 2965 int cur, l;
pcercuei 0:03b5121a232e 2966 int chunk = 0;
pcercuei 0:03b5121a232e 2967
pcercuei 0:03b5121a232e 2968 if (readahead)
pcercuei 0:03b5121a232e 2969 buf[nbchar++] = readahead;
pcercuei 0:03b5121a232e 2970
pcercuei 0:03b5121a232e 2971 SHRINK;
pcercuei 0:03b5121a232e 2972 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 2973 while (((cur != '<') || (ctxt->token == '<')) &&
pcercuei 0:03b5121a232e 2974 ((cur != '&') || (ctxt->token == '&')) &&
pcercuei 0:03b5121a232e 2975 (cur != 0)) {
pcercuei 0:03b5121a232e 2976 if (!(IS_CHAR(cur))) {
pcercuei 0:03b5121a232e 2977 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
pcercuei 0:03b5121a232e 2978 "Invalid char in CDATA 0x%X\n", cur);
pcercuei 0:03b5121a232e 2979 } else {
pcercuei 0:03b5121a232e 2980 COPY_BUF(l,buf,nbchar,cur);
pcercuei 0:03b5121a232e 2981 }
pcercuei 0:03b5121a232e 2982 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
pcercuei 0:03b5121a232e 2983 /*
pcercuei 0:03b5121a232e 2984 * Ok the segment is to be consumed as chars.
pcercuei 0:03b5121a232e 2985 */
pcercuei 0:03b5121a232e 2986 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
pcercuei 0:03b5121a232e 2987 if (areBlanks(ctxt, buf, nbchar)) {
pcercuei 0:03b5121a232e 2988 if (ctxt->keepBlanks) {
pcercuei 0:03b5121a232e 2989 if (ctxt->sax->characters != NULL)
pcercuei 0:03b5121a232e 2990 ctxt->sax->characters(ctxt->userData, buf, nbchar);
pcercuei 0:03b5121a232e 2991 } else {
pcercuei 0:03b5121a232e 2992 if (ctxt->sax->ignorableWhitespace != NULL)
pcercuei 0:03b5121a232e 2993 ctxt->sax->ignorableWhitespace(ctxt->userData,
pcercuei 0:03b5121a232e 2994 buf, nbchar);
pcercuei 0:03b5121a232e 2995 }
pcercuei 0:03b5121a232e 2996 } else {
pcercuei 0:03b5121a232e 2997 htmlCheckParagraph(ctxt);
pcercuei 0:03b5121a232e 2998 if (ctxt->sax->characters != NULL)
pcercuei 0:03b5121a232e 2999 ctxt->sax->characters(ctxt->userData, buf, nbchar);
pcercuei 0:03b5121a232e 3000 }
pcercuei 0:03b5121a232e 3001 }
pcercuei 0:03b5121a232e 3002 nbchar = 0;
pcercuei 0:03b5121a232e 3003 }
pcercuei 0:03b5121a232e 3004 NEXTL(l);
pcercuei 0:03b5121a232e 3005 chunk++;
pcercuei 0:03b5121a232e 3006 if (chunk > HTML_PARSER_BUFFER_SIZE) {
pcercuei 0:03b5121a232e 3007 chunk = 0;
pcercuei 0:03b5121a232e 3008 SHRINK;
pcercuei 0:03b5121a232e 3009 GROW;
pcercuei 0:03b5121a232e 3010 }
pcercuei 0:03b5121a232e 3011 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 3012 if (cur == 0) {
pcercuei 0:03b5121a232e 3013 SHRINK;
pcercuei 0:03b5121a232e 3014 GROW;
pcercuei 0:03b5121a232e 3015 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 3016 }
pcercuei 0:03b5121a232e 3017 }
pcercuei 0:03b5121a232e 3018 if (nbchar != 0) {
pcercuei 0:03b5121a232e 3019 buf[nbchar] = 0;
pcercuei 0:03b5121a232e 3020
pcercuei 0:03b5121a232e 3021 /*
pcercuei 0:03b5121a232e 3022 * Ok the segment is to be consumed as chars.
pcercuei 0:03b5121a232e 3023 */
pcercuei 0:03b5121a232e 3024 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
pcercuei 0:03b5121a232e 3025 if (areBlanks(ctxt, buf, nbchar)) {
pcercuei 0:03b5121a232e 3026 if (ctxt->keepBlanks) {
pcercuei 0:03b5121a232e 3027 if (ctxt->sax->characters != NULL)
pcercuei 0:03b5121a232e 3028 ctxt->sax->characters(ctxt->userData, buf, nbchar);
pcercuei 0:03b5121a232e 3029 } else {
pcercuei 0:03b5121a232e 3030 if (ctxt->sax->ignorableWhitespace != NULL)
pcercuei 0:03b5121a232e 3031 ctxt->sax->ignorableWhitespace(ctxt->userData,
pcercuei 0:03b5121a232e 3032 buf, nbchar);
pcercuei 0:03b5121a232e 3033 }
pcercuei 0:03b5121a232e 3034 } else {
pcercuei 0:03b5121a232e 3035 htmlCheckParagraph(ctxt);
pcercuei 0:03b5121a232e 3036 if (ctxt->sax->characters != NULL)
pcercuei 0:03b5121a232e 3037 ctxt->sax->characters(ctxt->userData, buf, nbchar);
pcercuei 0:03b5121a232e 3038 }
pcercuei 0:03b5121a232e 3039 }
pcercuei 0:03b5121a232e 3040 } else {
pcercuei 0:03b5121a232e 3041 /*
pcercuei 0:03b5121a232e 3042 * Loop detection
pcercuei 0:03b5121a232e 3043 */
pcercuei 0:03b5121a232e 3044 if (cur == 0)
pcercuei 0:03b5121a232e 3045 ctxt->instate = XML_PARSER_EOF;
pcercuei 0:03b5121a232e 3046 }
pcercuei 0:03b5121a232e 3047 }
pcercuei 0:03b5121a232e 3048
pcercuei 0:03b5121a232e 3049 /**
pcercuei 0:03b5121a232e 3050 * htmlParseCharData:
pcercuei 0:03b5121a232e 3051 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3052 *
pcercuei 0:03b5121a232e 3053 * parse a CharData section.
pcercuei 0:03b5121a232e 3054 * if we are within a CDATA section ']]>' marks an end of section.
pcercuei 0:03b5121a232e 3055 *
pcercuei 0:03b5121a232e 3056 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
pcercuei 0:03b5121a232e 3057 */
pcercuei 0:03b5121a232e 3058
pcercuei 0:03b5121a232e 3059 static void
pcercuei 0:03b5121a232e 3060 htmlParseCharData(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 3061 htmlParseCharDataInternal(ctxt, 0);
pcercuei 0:03b5121a232e 3062 }
pcercuei 0:03b5121a232e 3063
pcercuei 0:03b5121a232e 3064 /**
pcercuei 0:03b5121a232e 3065 * htmlParseExternalID:
pcercuei 0:03b5121a232e 3066 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3067 * @publicID: a xmlChar** receiving PubidLiteral
pcercuei 0:03b5121a232e 3068 *
pcercuei 0:03b5121a232e 3069 * Parse an External ID or a Public ID
pcercuei 0:03b5121a232e 3070 *
pcercuei 0:03b5121a232e 3071 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
pcercuei 0:03b5121a232e 3072 * | 'PUBLIC' S PubidLiteral S SystemLiteral
pcercuei 0:03b5121a232e 3073 *
pcercuei 0:03b5121a232e 3074 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
pcercuei 0:03b5121a232e 3075 *
pcercuei 0:03b5121a232e 3076 * Returns the function returns SystemLiteral and in the second
pcercuei 0:03b5121a232e 3077 * case publicID receives PubidLiteral, is strict is off
pcercuei 0:03b5121a232e 3078 * it is possible to return NULL and have publicID set.
pcercuei 0:03b5121a232e 3079 */
pcercuei 0:03b5121a232e 3080
pcercuei 0:03b5121a232e 3081 static xmlChar *
pcercuei 0:03b5121a232e 3082 htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
pcercuei 0:03b5121a232e 3083 xmlChar *URI = NULL;
pcercuei 0:03b5121a232e 3084
pcercuei 0:03b5121a232e 3085 if ((UPPER == 'S') && (UPP(1) == 'Y') &&
pcercuei 0:03b5121a232e 3086 (UPP(2) == 'S') && (UPP(3) == 'T') &&
pcercuei 0:03b5121a232e 3087 (UPP(4) == 'E') && (UPP(5) == 'M')) {
pcercuei 0:03b5121a232e 3088 SKIP(6);
pcercuei 0:03b5121a232e 3089 if (!IS_BLANK_CH(CUR)) {
pcercuei 0:03b5121a232e 3090 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
pcercuei 0:03b5121a232e 3091 "Space required after 'SYSTEM'\n", NULL, NULL);
pcercuei 0:03b5121a232e 3092 }
pcercuei 0:03b5121a232e 3093 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3094 URI = htmlParseSystemLiteral(ctxt);
pcercuei 0:03b5121a232e 3095 if (URI == NULL) {
pcercuei 0:03b5121a232e 3096 htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
pcercuei 0:03b5121a232e 3097 "htmlParseExternalID: SYSTEM, no URI\n", NULL, NULL);
pcercuei 0:03b5121a232e 3098 }
pcercuei 0:03b5121a232e 3099 } else if ((UPPER == 'P') && (UPP(1) == 'U') &&
pcercuei 0:03b5121a232e 3100 (UPP(2) == 'B') && (UPP(3) == 'L') &&
pcercuei 0:03b5121a232e 3101 (UPP(4) == 'I') && (UPP(5) == 'C')) {
pcercuei 0:03b5121a232e 3102 SKIP(6);
pcercuei 0:03b5121a232e 3103 if (!IS_BLANK_CH(CUR)) {
pcercuei 0:03b5121a232e 3104 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
pcercuei 0:03b5121a232e 3105 "Space required after 'PUBLIC'\n", NULL, NULL);
pcercuei 0:03b5121a232e 3106 }
pcercuei 0:03b5121a232e 3107 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3108 *publicID = htmlParsePubidLiteral(ctxt);
pcercuei 0:03b5121a232e 3109 if (*publicID == NULL) {
pcercuei 0:03b5121a232e 3110 htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
pcercuei 0:03b5121a232e 3111 "htmlParseExternalID: PUBLIC, no Public Identifier\n",
pcercuei 0:03b5121a232e 3112 NULL, NULL);
pcercuei 0:03b5121a232e 3113 }
pcercuei 0:03b5121a232e 3114 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3115 if ((CUR == '"') || (CUR == '\'')) {
pcercuei 0:03b5121a232e 3116 URI = htmlParseSystemLiteral(ctxt);
pcercuei 0:03b5121a232e 3117 }
pcercuei 0:03b5121a232e 3118 }
pcercuei 0:03b5121a232e 3119 return(URI);
pcercuei 0:03b5121a232e 3120 }
pcercuei 0:03b5121a232e 3121
pcercuei 0:03b5121a232e 3122 /**
pcercuei 0:03b5121a232e 3123 * xmlParsePI:
pcercuei 0:03b5121a232e 3124 * @ctxt: an XML parser context
pcercuei 0:03b5121a232e 3125 *
pcercuei 0:03b5121a232e 3126 * parse an XML Processing Instruction.
pcercuei 0:03b5121a232e 3127 *
pcercuei 0:03b5121a232e 3128 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
pcercuei 0:03b5121a232e 3129 */
pcercuei 0:03b5121a232e 3130 static void
pcercuei 0:03b5121a232e 3131 htmlParsePI(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 3132 xmlChar *buf = NULL;
pcercuei 0:03b5121a232e 3133 int len = 0;
pcercuei 0:03b5121a232e 3134 int size = HTML_PARSER_BUFFER_SIZE;
pcercuei 0:03b5121a232e 3135 int cur, l;
pcercuei 0:03b5121a232e 3136 const xmlChar *target;
pcercuei 0:03b5121a232e 3137 xmlParserInputState state;
pcercuei 0:03b5121a232e 3138 int count = 0;
pcercuei 0:03b5121a232e 3139
pcercuei 0:03b5121a232e 3140 if ((RAW == '<') && (NXT(1) == '?')) {
pcercuei 0:03b5121a232e 3141 state = ctxt->instate;
pcercuei 0:03b5121a232e 3142 ctxt->instate = XML_PARSER_PI;
pcercuei 0:03b5121a232e 3143 /*
pcercuei 0:03b5121a232e 3144 * this is a Processing Instruction.
pcercuei 0:03b5121a232e 3145 */
pcercuei 0:03b5121a232e 3146 SKIP(2);
pcercuei 0:03b5121a232e 3147 SHRINK;
pcercuei 0:03b5121a232e 3148
pcercuei 0:03b5121a232e 3149 /*
pcercuei 0:03b5121a232e 3150 * Parse the target name and check for special support like
pcercuei 0:03b5121a232e 3151 * namespace.
pcercuei 0:03b5121a232e 3152 */
pcercuei 0:03b5121a232e 3153 target = htmlParseName(ctxt);
pcercuei 0:03b5121a232e 3154 if (target != NULL) {
pcercuei 0:03b5121a232e 3155 if (RAW == '>') {
pcercuei 0:03b5121a232e 3156 SKIP(1);
pcercuei 0:03b5121a232e 3157
pcercuei 0:03b5121a232e 3158 /*
pcercuei 0:03b5121a232e 3159 * SAX: PI detected.
pcercuei 0:03b5121a232e 3160 */
pcercuei 0:03b5121a232e 3161 if ((ctxt->sax) && (!ctxt->disableSAX) &&
pcercuei 0:03b5121a232e 3162 (ctxt->sax->processingInstruction != NULL))
pcercuei 0:03b5121a232e 3163 ctxt->sax->processingInstruction(ctxt->userData,
pcercuei 0:03b5121a232e 3164 target, NULL);
pcercuei 0:03b5121a232e 3165 ctxt->instate = state;
pcercuei 0:03b5121a232e 3166 return;
pcercuei 0:03b5121a232e 3167 }
pcercuei 0:03b5121a232e 3168 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
pcercuei 0:03b5121a232e 3169 if (buf == NULL) {
pcercuei 0:03b5121a232e 3170 htmlErrMemory(ctxt, NULL);
pcercuei 0:03b5121a232e 3171 ctxt->instate = state;
pcercuei 0:03b5121a232e 3172 return;
pcercuei 0:03b5121a232e 3173 }
pcercuei 0:03b5121a232e 3174 cur = CUR;
pcercuei 0:03b5121a232e 3175 if (!IS_BLANK(cur)) {
pcercuei 0:03b5121a232e 3176 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
pcercuei 0:03b5121a232e 3177 "ParsePI: PI %s space expected\n", target, NULL);
pcercuei 0:03b5121a232e 3178 }
pcercuei 0:03b5121a232e 3179 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3180 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 3181 while (IS_CHAR(cur) && (cur != '>')) {
pcercuei 0:03b5121a232e 3182 if (len + 5 >= size) {
pcercuei 0:03b5121a232e 3183 xmlChar *tmp;
pcercuei 0:03b5121a232e 3184
pcercuei 0:03b5121a232e 3185 size *= 2;
pcercuei 0:03b5121a232e 3186 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
pcercuei 0:03b5121a232e 3187 if (tmp == NULL) {
pcercuei 0:03b5121a232e 3188 htmlErrMemory(ctxt, NULL);
pcercuei 0:03b5121a232e 3189 xmlFree(buf);
pcercuei 0:03b5121a232e 3190 ctxt->instate = state;
pcercuei 0:03b5121a232e 3191 return;
pcercuei 0:03b5121a232e 3192 }
pcercuei 0:03b5121a232e 3193 buf = tmp;
pcercuei 0:03b5121a232e 3194 }
pcercuei 0:03b5121a232e 3195 count++;
pcercuei 0:03b5121a232e 3196 if (count > 50) {
pcercuei 0:03b5121a232e 3197 GROW;
pcercuei 0:03b5121a232e 3198 count = 0;
pcercuei 0:03b5121a232e 3199 }
pcercuei 0:03b5121a232e 3200 COPY_BUF(l,buf,len,cur);
pcercuei 0:03b5121a232e 3201 NEXTL(l);
pcercuei 0:03b5121a232e 3202 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 3203 if (cur == 0) {
pcercuei 0:03b5121a232e 3204 SHRINK;
pcercuei 0:03b5121a232e 3205 GROW;
pcercuei 0:03b5121a232e 3206 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 3207 }
pcercuei 0:03b5121a232e 3208 }
pcercuei 0:03b5121a232e 3209 buf[len] = 0;
pcercuei 0:03b5121a232e 3210 if (cur != '>') {
pcercuei 0:03b5121a232e 3211 htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
pcercuei 0:03b5121a232e 3212 "ParsePI: PI %s never end ...\n", target, NULL);
pcercuei 0:03b5121a232e 3213 } else {
pcercuei 0:03b5121a232e 3214 SKIP(1);
pcercuei 0:03b5121a232e 3215
pcercuei 0:03b5121a232e 3216 /*
pcercuei 0:03b5121a232e 3217 * SAX: PI detected.
pcercuei 0:03b5121a232e 3218 */
pcercuei 0:03b5121a232e 3219 if ((ctxt->sax) && (!ctxt->disableSAX) &&
pcercuei 0:03b5121a232e 3220 (ctxt->sax->processingInstruction != NULL))
pcercuei 0:03b5121a232e 3221 ctxt->sax->processingInstruction(ctxt->userData,
pcercuei 0:03b5121a232e 3222 target, buf);
pcercuei 0:03b5121a232e 3223 }
pcercuei 0:03b5121a232e 3224 xmlFree(buf);
pcercuei 0:03b5121a232e 3225 } else {
pcercuei 0:03b5121a232e 3226 htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
pcercuei 0:03b5121a232e 3227 "PI is not started correctly", NULL, NULL);
pcercuei 0:03b5121a232e 3228 }
pcercuei 0:03b5121a232e 3229 ctxt->instate = state;
pcercuei 0:03b5121a232e 3230 }
pcercuei 0:03b5121a232e 3231 }
pcercuei 0:03b5121a232e 3232
pcercuei 0:03b5121a232e 3233 /**
pcercuei 0:03b5121a232e 3234 * htmlParseComment:
pcercuei 0:03b5121a232e 3235 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3236 *
pcercuei 0:03b5121a232e 3237 * Parse an XML (SGML) comment <!-- .... -->
pcercuei 0:03b5121a232e 3238 *
pcercuei 0:03b5121a232e 3239 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
pcercuei 0:03b5121a232e 3240 */
pcercuei 0:03b5121a232e 3241 static void
pcercuei 0:03b5121a232e 3242 htmlParseComment(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 3243 xmlChar *buf = NULL;
pcercuei 0:03b5121a232e 3244 int len;
pcercuei 0:03b5121a232e 3245 int size = HTML_PARSER_BUFFER_SIZE;
pcercuei 0:03b5121a232e 3246 int q, ql;
pcercuei 0:03b5121a232e 3247 int r, rl;
pcercuei 0:03b5121a232e 3248 int cur, l;
pcercuei 0:03b5121a232e 3249 xmlParserInputState state;
pcercuei 0:03b5121a232e 3250
pcercuei 0:03b5121a232e 3251 /*
pcercuei 0:03b5121a232e 3252 * Check that there is a comment right here.
pcercuei 0:03b5121a232e 3253 */
pcercuei 0:03b5121a232e 3254 if ((RAW != '<') || (NXT(1) != '!') ||
pcercuei 0:03b5121a232e 3255 (NXT(2) != '-') || (NXT(3) != '-')) return;
pcercuei 0:03b5121a232e 3256
pcercuei 0:03b5121a232e 3257 state = ctxt->instate;
pcercuei 0:03b5121a232e 3258 ctxt->instate = XML_PARSER_COMMENT;
pcercuei 0:03b5121a232e 3259 SHRINK;
pcercuei 0:03b5121a232e 3260 SKIP(4);
pcercuei 0:03b5121a232e 3261 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
pcercuei 0:03b5121a232e 3262 if (buf == NULL) {
pcercuei 0:03b5121a232e 3263 htmlErrMemory(ctxt, "buffer allocation failed\n");
pcercuei 0:03b5121a232e 3264 ctxt->instate = state;
pcercuei 0:03b5121a232e 3265 return;
pcercuei 0:03b5121a232e 3266 }
pcercuei 0:03b5121a232e 3267 len = 0;
pcercuei 0:03b5121a232e 3268 buf[len] = 0;
pcercuei 0:03b5121a232e 3269 q = CUR_CHAR(ql);
pcercuei 0:03b5121a232e 3270 if (!IS_CHAR(q))
pcercuei 0:03b5121a232e 3271 goto unfinished;
pcercuei 0:03b5121a232e 3272 NEXTL(ql);
pcercuei 0:03b5121a232e 3273 r = CUR_CHAR(rl);
pcercuei 0:03b5121a232e 3274 if (!IS_CHAR(r))
pcercuei 0:03b5121a232e 3275 goto unfinished;
pcercuei 0:03b5121a232e 3276 NEXTL(rl);
pcercuei 0:03b5121a232e 3277 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 3278 while (IS_CHAR(cur) &&
pcercuei 0:03b5121a232e 3279 ((cur != '>') ||
pcercuei 0:03b5121a232e 3280 (r != '-') || (q != '-'))) {
pcercuei 0:03b5121a232e 3281 if (len + 5 >= size) {
pcercuei 0:03b5121a232e 3282 xmlChar *tmp;
pcercuei 0:03b5121a232e 3283
pcercuei 0:03b5121a232e 3284 size *= 2;
pcercuei 0:03b5121a232e 3285 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
pcercuei 0:03b5121a232e 3286 if (tmp == NULL) {
pcercuei 0:03b5121a232e 3287 xmlFree(buf);
pcercuei 0:03b5121a232e 3288 htmlErrMemory(ctxt, "growing buffer failed\n");
pcercuei 0:03b5121a232e 3289 ctxt->instate = state;
pcercuei 0:03b5121a232e 3290 return;
pcercuei 0:03b5121a232e 3291 }
pcercuei 0:03b5121a232e 3292 buf = tmp;
pcercuei 0:03b5121a232e 3293 }
pcercuei 0:03b5121a232e 3294 COPY_BUF(ql,buf,len,q);
pcercuei 0:03b5121a232e 3295 q = r;
pcercuei 0:03b5121a232e 3296 ql = rl;
pcercuei 0:03b5121a232e 3297 r = cur;
pcercuei 0:03b5121a232e 3298 rl = l;
pcercuei 0:03b5121a232e 3299 NEXTL(l);
pcercuei 0:03b5121a232e 3300 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 3301 if (cur == 0) {
pcercuei 0:03b5121a232e 3302 SHRINK;
pcercuei 0:03b5121a232e 3303 GROW;
pcercuei 0:03b5121a232e 3304 cur = CUR_CHAR(l);
pcercuei 0:03b5121a232e 3305 }
pcercuei 0:03b5121a232e 3306 }
pcercuei 0:03b5121a232e 3307 buf[len] = 0;
pcercuei 0:03b5121a232e 3308 if (IS_CHAR(cur)) {
pcercuei 0:03b5121a232e 3309 NEXT;
pcercuei 0:03b5121a232e 3310 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
pcercuei 0:03b5121a232e 3311 (!ctxt->disableSAX))
pcercuei 0:03b5121a232e 3312 ctxt->sax->comment(ctxt->userData, buf);
pcercuei 0:03b5121a232e 3313 xmlFree(buf);
pcercuei 0:03b5121a232e 3314 ctxt->instate = state;
pcercuei 0:03b5121a232e 3315 return;
pcercuei 0:03b5121a232e 3316 }
pcercuei 0:03b5121a232e 3317
pcercuei 0:03b5121a232e 3318 unfinished:
pcercuei 0:03b5121a232e 3319 htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
pcercuei 0:03b5121a232e 3320 "Comment not terminated \n<!--%.50s\n", buf, NULL);
pcercuei 0:03b5121a232e 3321 xmlFree(buf);
pcercuei 0:03b5121a232e 3322 }
pcercuei 0:03b5121a232e 3323
pcercuei 0:03b5121a232e 3324 /**
pcercuei 0:03b5121a232e 3325 * htmlParseCharRef:
pcercuei 0:03b5121a232e 3326 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3327 *
pcercuei 0:03b5121a232e 3328 * parse Reference declarations
pcercuei 0:03b5121a232e 3329 *
pcercuei 0:03b5121a232e 3330 * [66] CharRef ::= '&#' [0-9]+ ';' |
pcercuei 0:03b5121a232e 3331 * '&#x' [0-9a-fA-F]+ ';'
pcercuei 0:03b5121a232e 3332 *
pcercuei 0:03b5121a232e 3333 * Returns the value parsed (as an int)
pcercuei 0:03b5121a232e 3334 */
pcercuei 0:03b5121a232e 3335 int
pcercuei 0:03b5121a232e 3336 htmlParseCharRef(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 3337 int val = 0;
pcercuei 0:03b5121a232e 3338
pcercuei 0:03b5121a232e 3339 if ((ctxt == NULL) || (ctxt->input == NULL)) {
pcercuei 0:03b5121a232e 3340 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 3341 "htmlParseCharRef: context error\n",
pcercuei 0:03b5121a232e 3342 NULL, NULL);
pcercuei 0:03b5121a232e 3343 return(0);
pcercuei 0:03b5121a232e 3344 }
pcercuei 0:03b5121a232e 3345 if ((CUR == '&') && (NXT(1) == '#') &&
pcercuei 0:03b5121a232e 3346 ((NXT(2) == 'x') || NXT(2) == 'X')) {
pcercuei 0:03b5121a232e 3347 SKIP(3);
pcercuei 0:03b5121a232e 3348 while (CUR != ';') {
pcercuei 0:03b5121a232e 3349 if ((CUR >= '0') && (CUR <= '9'))
pcercuei 0:03b5121a232e 3350 val = val * 16 + (CUR - '0');
pcercuei 0:03b5121a232e 3351 else if ((CUR >= 'a') && (CUR <= 'f'))
pcercuei 0:03b5121a232e 3352 val = val * 16 + (CUR - 'a') + 10;
pcercuei 0:03b5121a232e 3353 else if ((CUR >= 'A') && (CUR <= 'F'))
pcercuei 0:03b5121a232e 3354 val = val * 16 + (CUR - 'A') + 10;
pcercuei 0:03b5121a232e 3355 else {
pcercuei 0:03b5121a232e 3356 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
pcercuei 0:03b5121a232e 3357 "htmlParseCharRef: missing semicolon\n",
pcercuei 0:03b5121a232e 3358 NULL, NULL);
pcercuei 0:03b5121a232e 3359 break;
pcercuei 0:03b5121a232e 3360 }
pcercuei 0:03b5121a232e 3361 NEXT;
pcercuei 0:03b5121a232e 3362 }
pcercuei 0:03b5121a232e 3363 if (CUR == ';')
pcercuei 0:03b5121a232e 3364 NEXT;
pcercuei 0:03b5121a232e 3365 } else if ((CUR == '&') && (NXT(1) == '#')) {
pcercuei 0:03b5121a232e 3366 SKIP(2);
pcercuei 0:03b5121a232e 3367 while (CUR != ';') {
pcercuei 0:03b5121a232e 3368 if ((CUR >= '0') && (CUR <= '9'))
pcercuei 0:03b5121a232e 3369 val = val * 10 + (CUR - '0');
pcercuei 0:03b5121a232e 3370 else {
pcercuei 0:03b5121a232e 3371 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
pcercuei 0:03b5121a232e 3372 "htmlParseCharRef: missing semicolon\n",
pcercuei 0:03b5121a232e 3373 NULL, NULL);
pcercuei 0:03b5121a232e 3374 break;
pcercuei 0:03b5121a232e 3375 }
pcercuei 0:03b5121a232e 3376 NEXT;
pcercuei 0:03b5121a232e 3377 }
pcercuei 0:03b5121a232e 3378 if (CUR == ';')
pcercuei 0:03b5121a232e 3379 NEXT;
pcercuei 0:03b5121a232e 3380 } else {
pcercuei 0:03b5121a232e 3381 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
pcercuei 0:03b5121a232e 3382 "htmlParseCharRef: invalid value\n", NULL, NULL);
pcercuei 0:03b5121a232e 3383 }
pcercuei 0:03b5121a232e 3384 /*
pcercuei 0:03b5121a232e 3385 * Check the value IS_CHAR ...
pcercuei 0:03b5121a232e 3386 */
pcercuei 0:03b5121a232e 3387 if (IS_CHAR(val)) {
pcercuei 0:03b5121a232e 3388 return(val);
pcercuei 0:03b5121a232e 3389 } else {
pcercuei 0:03b5121a232e 3390 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
pcercuei 0:03b5121a232e 3391 "htmlParseCharRef: invalid xmlChar value %d\n",
pcercuei 0:03b5121a232e 3392 val);
pcercuei 0:03b5121a232e 3393 }
pcercuei 0:03b5121a232e 3394 return(0);
pcercuei 0:03b5121a232e 3395 }
pcercuei 0:03b5121a232e 3396
pcercuei 0:03b5121a232e 3397
pcercuei 0:03b5121a232e 3398 /**
pcercuei 0:03b5121a232e 3399 * htmlParseDocTypeDecl:
pcercuei 0:03b5121a232e 3400 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3401 *
pcercuei 0:03b5121a232e 3402 * parse a DOCTYPE declaration
pcercuei 0:03b5121a232e 3403 *
pcercuei 0:03b5121a232e 3404 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
pcercuei 0:03b5121a232e 3405 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
pcercuei 0:03b5121a232e 3406 */
pcercuei 0:03b5121a232e 3407
pcercuei 0:03b5121a232e 3408 static void
pcercuei 0:03b5121a232e 3409 htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 3410 const xmlChar *name;
pcercuei 0:03b5121a232e 3411 xmlChar *ExternalID = NULL;
pcercuei 0:03b5121a232e 3412 xmlChar *URI = NULL;
pcercuei 0:03b5121a232e 3413
pcercuei 0:03b5121a232e 3414 /*
pcercuei 0:03b5121a232e 3415 * We know that '<!DOCTYPE' has been detected.
pcercuei 0:03b5121a232e 3416 */
pcercuei 0:03b5121a232e 3417 SKIP(9);
pcercuei 0:03b5121a232e 3418
pcercuei 0:03b5121a232e 3419 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3420
pcercuei 0:03b5121a232e 3421 /*
pcercuei 0:03b5121a232e 3422 * Parse the DOCTYPE name.
pcercuei 0:03b5121a232e 3423 */
pcercuei 0:03b5121a232e 3424 name = htmlParseName(ctxt);
pcercuei 0:03b5121a232e 3425 if (name == NULL) {
pcercuei 0:03b5121a232e 3426 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
pcercuei 0:03b5121a232e 3427 "htmlParseDocTypeDecl : no DOCTYPE name !\n",
pcercuei 0:03b5121a232e 3428 NULL, NULL);
pcercuei 0:03b5121a232e 3429 }
pcercuei 0:03b5121a232e 3430 /*
pcercuei 0:03b5121a232e 3431 * Check that upper(name) == "HTML" !!!!!!!!!!!!!
pcercuei 0:03b5121a232e 3432 */
pcercuei 0:03b5121a232e 3433
pcercuei 0:03b5121a232e 3434 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3435
pcercuei 0:03b5121a232e 3436 /*
pcercuei 0:03b5121a232e 3437 * Check for SystemID and ExternalID
pcercuei 0:03b5121a232e 3438 */
pcercuei 0:03b5121a232e 3439 URI = htmlParseExternalID(ctxt, &ExternalID);
pcercuei 0:03b5121a232e 3440 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3441
pcercuei 0:03b5121a232e 3442 /*
pcercuei 0:03b5121a232e 3443 * We should be at the end of the DOCTYPE declaration.
pcercuei 0:03b5121a232e 3444 */
pcercuei 0:03b5121a232e 3445 if (CUR != '>') {
pcercuei 0:03b5121a232e 3446 htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
pcercuei 0:03b5121a232e 3447 "DOCTYPE improperly terminated\n", NULL, NULL);
pcercuei 0:03b5121a232e 3448 /* We shouldn't try to resynchronize ... */
pcercuei 0:03b5121a232e 3449 }
pcercuei 0:03b5121a232e 3450 NEXT;
pcercuei 0:03b5121a232e 3451
pcercuei 0:03b5121a232e 3452 /*
pcercuei 0:03b5121a232e 3453 * Create or update the document accordingly to the DOCTYPE
pcercuei 0:03b5121a232e 3454 */
pcercuei 0:03b5121a232e 3455 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
pcercuei 0:03b5121a232e 3456 (!ctxt->disableSAX))
pcercuei 0:03b5121a232e 3457 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
pcercuei 0:03b5121a232e 3458
pcercuei 0:03b5121a232e 3459 /*
pcercuei 0:03b5121a232e 3460 * Cleanup, since we don't use all those identifiers
pcercuei 0:03b5121a232e 3461 */
pcercuei 0:03b5121a232e 3462 if (URI != NULL) xmlFree(URI);
pcercuei 0:03b5121a232e 3463 if (ExternalID != NULL) xmlFree(ExternalID);
pcercuei 0:03b5121a232e 3464 }
pcercuei 0:03b5121a232e 3465
pcercuei 0:03b5121a232e 3466 /**
pcercuei 0:03b5121a232e 3467 * htmlParseAttribute:
pcercuei 0:03b5121a232e 3468 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3469 * @value: a xmlChar ** used to store the value of the attribute
pcercuei 0:03b5121a232e 3470 *
pcercuei 0:03b5121a232e 3471 * parse an attribute
pcercuei 0:03b5121a232e 3472 *
pcercuei 0:03b5121a232e 3473 * [41] Attribute ::= Name Eq AttValue
pcercuei 0:03b5121a232e 3474 *
pcercuei 0:03b5121a232e 3475 * [25] Eq ::= S? '=' S?
pcercuei 0:03b5121a232e 3476 *
pcercuei 0:03b5121a232e 3477 * With namespace:
pcercuei 0:03b5121a232e 3478 *
pcercuei 0:03b5121a232e 3479 * [NS 11] Attribute ::= QName Eq AttValue
pcercuei 0:03b5121a232e 3480 *
pcercuei 0:03b5121a232e 3481 * Also the case QName == xmlns:??? is handled independently as a namespace
pcercuei 0:03b5121a232e 3482 * definition.
pcercuei 0:03b5121a232e 3483 *
pcercuei 0:03b5121a232e 3484 * Returns the attribute name, and the value in *value.
pcercuei 0:03b5121a232e 3485 */
pcercuei 0:03b5121a232e 3486
pcercuei 0:03b5121a232e 3487 static const xmlChar *
pcercuei 0:03b5121a232e 3488 htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
pcercuei 0:03b5121a232e 3489 const xmlChar *name;
pcercuei 0:03b5121a232e 3490 xmlChar *val = NULL;
pcercuei 0:03b5121a232e 3491
pcercuei 0:03b5121a232e 3492 *value = NULL;
pcercuei 0:03b5121a232e 3493 name = htmlParseHTMLName(ctxt);
pcercuei 0:03b5121a232e 3494 if (name == NULL) {
pcercuei 0:03b5121a232e 3495 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
pcercuei 0:03b5121a232e 3496 "error parsing attribute name\n", NULL, NULL);
pcercuei 0:03b5121a232e 3497 return(NULL);
pcercuei 0:03b5121a232e 3498 }
pcercuei 0:03b5121a232e 3499
pcercuei 0:03b5121a232e 3500 /*
pcercuei 0:03b5121a232e 3501 * read the value
pcercuei 0:03b5121a232e 3502 */
pcercuei 0:03b5121a232e 3503 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3504 if (CUR == '=') {
pcercuei 0:03b5121a232e 3505 NEXT;
pcercuei 0:03b5121a232e 3506 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3507 val = htmlParseAttValue(ctxt);
pcercuei 0:03b5121a232e 3508 }
pcercuei 0:03b5121a232e 3509
pcercuei 0:03b5121a232e 3510 *value = val;
pcercuei 0:03b5121a232e 3511 return(name);
pcercuei 0:03b5121a232e 3512 }
pcercuei 0:03b5121a232e 3513
pcercuei 0:03b5121a232e 3514 /**
pcercuei 0:03b5121a232e 3515 * htmlCheckEncodingDirect:
pcercuei 0:03b5121a232e 3516 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3517 * @attvalue: the attribute value
pcercuei 0:03b5121a232e 3518 *
pcercuei 0:03b5121a232e 3519 * Checks an attribute value to detect
pcercuei 0:03b5121a232e 3520 * the encoding
pcercuei 0:03b5121a232e 3521 * If a new encoding is detected the parser is switched to decode
pcercuei 0:03b5121a232e 3522 * it and pass UTF8
pcercuei 0:03b5121a232e 3523 */
pcercuei 0:03b5121a232e 3524 static void
pcercuei 0:03b5121a232e 3525 htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
pcercuei 0:03b5121a232e 3526
pcercuei 0:03b5121a232e 3527 if ((ctxt == NULL) || (encoding == NULL) ||
pcercuei 0:03b5121a232e 3528 (ctxt->options & HTML_PARSE_IGNORE_ENC))
pcercuei 0:03b5121a232e 3529 return;
pcercuei 0:03b5121a232e 3530
pcercuei 0:03b5121a232e 3531 /* do not change encoding */
pcercuei 0:03b5121a232e 3532 if (ctxt->input->encoding != NULL)
pcercuei 0:03b5121a232e 3533 return;
pcercuei 0:03b5121a232e 3534
pcercuei 0:03b5121a232e 3535 if (encoding != NULL) {
pcercuei 0:03b5121a232e 3536 xmlCharEncoding enc;
pcercuei 0:03b5121a232e 3537 xmlCharEncodingHandlerPtr handler;
pcercuei 0:03b5121a232e 3538
pcercuei 0:03b5121a232e 3539 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
pcercuei 0:03b5121a232e 3540
pcercuei 0:03b5121a232e 3541 if (ctxt->input->encoding != NULL)
pcercuei 0:03b5121a232e 3542 xmlFree((xmlChar *) ctxt->input->encoding);
pcercuei 0:03b5121a232e 3543 ctxt->input->encoding = xmlStrdup(encoding);
pcercuei 0:03b5121a232e 3544
pcercuei 0:03b5121a232e 3545 enc = xmlParseCharEncoding((const char *) encoding);
pcercuei 0:03b5121a232e 3546 /*
pcercuei 0:03b5121a232e 3547 * registered set of known encodings
pcercuei 0:03b5121a232e 3548 */
pcercuei 0:03b5121a232e 3549 if (enc != XML_CHAR_ENCODING_ERROR) {
pcercuei 0:03b5121a232e 3550 if (((enc == XML_CHAR_ENCODING_UTF16LE) ||
pcercuei 0:03b5121a232e 3551 (enc == XML_CHAR_ENCODING_UTF16BE) ||
pcercuei 0:03b5121a232e 3552 (enc == XML_CHAR_ENCODING_UCS4LE) ||
pcercuei 0:03b5121a232e 3553 (enc == XML_CHAR_ENCODING_UCS4BE)) &&
pcercuei 0:03b5121a232e 3554 (ctxt->input->buf != NULL) &&
pcercuei 0:03b5121a232e 3555 (ctxt->input->buf->encoder == NULL)) {
pcercuei 0:03b5121a232e 3556 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
pcercuei 0:03b5121a232e 3557 "htmlCheckEncoding: wrong encoding meta\n",
pcercuei 0:03b5121a232e 3558 NULL, NULL);
pcercuei 0:03b5121a232e 3559 } else {
pcercuei 0:03b5121a232e 3560 xmlSwitchEncoding(ctxt, enc);
pcercuei 0:03b5121a232e 3561 }
pcercuei 0:03b5121a232e 3562 ctxt->charset = XML_CHAR_ENCODING_UTF8;
pcercuei 0:03b5121a232e 3563 } else {
pcercuei 0:03b5121a232e 3564 /*
pcercuei 0:03b5121a232e 3565 * fallback for unknown encodings
pcercuei 0:03b5121a232e 3566 */
pcercuei 0:03b5121a232e 3567 handler = xmlFindCharEncodingHandler((const char *) encoding);
pcercuei 0:03b5121a232e 3568 if (handler != NULL) {
pcercuei 0:03b5121a232e 3569 xmlSwitchToEncoding(ctxt, handler);
pcercuei 0:03b5121a232e 3570 ctxt->charset = XML_CHAR_ENCODING_UTF8;
pcercuei 0:03b5121a232e 3571 } else {
pcercuei 0:03b5121a232e 3572 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
pcercuei 0:03b5121a232e 3573 "htmlCheckEncoding: unknown encoding %s\n",
pcercuei 0:03b5121a232e 3574 encoding, NULL);
pcercuei 0:03b5121a232e 3575 }
pcercuei 0:03b5121a232e 3576 }
pcercuei 0:03b5121a232e 3577
pcercuei 0:03b5121a232e 3578 if ((ctxt->input->buf != NULL) &&
pcercuei 0:03b5121a232e 3579 (ctxt->input->buf->encoder != NULL) &&
pcercuei 0:03b5121a232e 3580 (ctxt->input->buf->raw != NULL) &&
pcercuei 0:03b5121a232e 3581 (ctxt->input->buf->buffer != NULL)) {
pcercuei 0:03b5121a232e 3582 int nbchars;
pcercuei 0:03b5121a232e 3583 int processed;
pcercuei 0:03b5121a232e 3584
pcercuei 0:03b5121a232e 3585 /*
pcercuei 0:03b5121a232e 3586 * convert as much as possible to the parser reading buffer.
pcercuei 0:03b5121a232e 3587 */
pcercuei 0:03b5121a232e 3588 processed = ctxt->input->cur - ctxt->input->base;
pcercuei 0:03b5121a232e 3589 xmlBufShrink(ctxt->input->buf->buffer, processed);
pcercuei 0:03b5121a232e 3590 nbchars = xmlCharEncInput(ctxt->input->buf, 1);
pcercuei 0:03b5121a232e 3591 if (nbchars < 0) {
pcercuei 0:03b5121a232e 3592 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
pcercuei 0:03b5121a232e 3593 "htmlCheckEncoding: encoder error\n",
pcercuei 0:03b5121a232e 3594 NULL, NULL);
pcercuei 0:03b5121a232e 3595 }
pcercuei 0:03b5121a232e 3596 xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
pcercuei 0:03b5121a232e 3597 }
pcercuei 0:03b5121a232e 3598 }
pcercuei 0:03b5121a232e 3599 }
pcercuei 0:03b5121a232e 3600
pcercuei 0:03b5121a232e 3601 /**
pcercuei 0:03b5121a232e 3602 * htmlCheckEncoding:
pcercuei 0:03b5121a232e 3603 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3604 * @attvalue: the attribute value
pcercuei 0:03b5121a232e 3605 *
pcercuei 0:03b5121a232e 3606 * Checks an http-equiv attribute from a Meta tag to detect
pcercuei 0:03b5121a232e 3607 * the encoding
pcercuei 0:03b5121a232e 3608 * If a new encoding is detected the parser is switched to decode
pcercuei 0:03b5121a232e 3609 * it and pass UTF8
pcercuei 0:03b5121a232e 3610 */
pcercuei 0:03b5121a232e 3611 static void
pcercuei 0:03b5121a232e 3612 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
pcercuei 0:03b5121a232e 3613 const xmlChar *encoding;
pcercuei 0:03b5121a232e 3614
pcercuei 0:03b5121a232e 3615 if (!attvalue)
pcercuei 0:03b5121a232e 3616 return;
pcercuei 0:03b5121a232e 3617
pcercuei 0:03b5121a232e 3618 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset");
pcercuei 0:03b5121a232e 3619 if (encoding != NULL) {
pcercuei 0:03b5121a232e 3620 encoding += 7;
pcercuei 0:03b5121a232e 3621 }
pcercuei 0:03b5121a232e 3622 /*
pcercuei 0:03b5121a232e 3623 * skip blank
pcercuei 0:03b5121a232e 3624 */
pcercuei 0:03b5121a232e 3625 if (encoding && IS_BLANK_CH(*encoding))
pcercuei 0:03b5121a232e 3626 encoding = xmlStrcasestr(attvalue, BAD_CAST"=");
pcercuei 0:03b5121a232e 3627 if (encoding && *encoding == '=') {
pcercuei 0:03b5121a232e 3628 encoding ++;
pcercuei 0:03b5121a232e 3629 htmlCheckEncodingDirect(ctxt, encoding);
pcercuei 0:03b5121a232e 3630 }
pcercuei 0:03b5121a232e 3631 }
pcercuei 0:03b5121a232e 3632
pcercuei 0:03b5121a232e 3633 /**
pcercuei 0:03b5121a232e 3634 * htmlCheckMeta:
pcercuei 0:03b5121a232e 3635 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3636 * @atts: the attributes values
pcercuei 0:03b5121a232e 3637 *
pcercuei 0:03b5121a232e 3638 * Checks an attributes from a Meta tag
pcercuei 0:03b5121a232e 3639 */
pcercuei 0:03b5121a232e 3640 static void
pcercuei 0:03b5121a232e 3641 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
pcercuei 0:03b5121a232e 3642 int i;
pcercuei 0:03b5121a232e 3643 const xmlChar *att, *value;
pcercuei 0:03b5121a232e 3644 int http = 0;
pcercuei 0:03b5121a232e 3645 const xmlChar *content = NULL;
pcercuei 0:03b5121a232e 3646
pcercuei 0:03b5121a232e 3647 if ((ctxt == NULL) || (atts == NULL))
pcercuei 0:03b5121a232e 3648 return;
pcercuei 0:03b5121a232e 3649
pcercuei 0:03b5121a232e 3650 i = 0;
pcercuei 0:03b5121a232e 3651 att = atts[i++];
pcercuei 0:03b5121a232e 3652 while (att != NULL) {
pcercuei 0:03b5121a232e 3653 value = atts[i++];
pcercuei 0:03b5121a232e 3654 if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))
pcercuei 0:03b5121a232e 3655 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
pcercuei 0:03b5121a232e 3656 http = 1;
pcercuei 0:03b5121a232e 3657 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset")))
pcercuei 0:03b5121a232e 3658 htmlCheckEncodingDirect(ctxt, value);
pcercuei 0:03b5121a232e 3659 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))
pcercuei 0:03b5121a232e 3660 content = value;
pcercuei 0:03b5121a232e 3661 att = atts[i++];
pcercuei 0:03b5121a232e 3662 }
pcercuei 0:03b5121a232e 3663 if ((http) && (content != NULL))
pcercuei 0:03b5121a232e 3664 htmlCheckEncoding(ctxt, content);
pcercuei 0:03b5121a232e 3665
pcercuei 0:03b5121a232e 3666 }
pcercuei 0:03b5121a232e 3667
pcercuei 0:03b5121a232e 3668 /**
pcercuei 0:03b5121a232e 3669 * htmlParseStartTag:
pcercuei 0:03b5121a232e 3670 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3671 *
pcercuei 0:03b5121a232e 3672 * parse a start of tag either for rule element or
pcercuei 0:03b5121a232e 3673 * EmptyElement. In both case we don't parse the tag closing chars.
pcercuei 0:03b5121a232e 3674 *
pcercuei 0:03b5121a232e 3675 * [40] STag ::= '<' Name (S Attribute)* S? '>'
pcercuei 0:03b5121a232e 3676 *
pcercuei 0:03b5121a232e 3677 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
pcercuei 0:03b5121a232e 3678 *
pcercuei 0:03b5121a232e 3679 * With namespace:
pcercuei 0:03b5121a232e 3680 *
pcercuei 0:03b5121a232e 3681 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
pcercuei 0:03b5121a232e 3682 *
pcercuei 0:03b5121a232e 3683 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
pcercuei 0:03b5121a232e 3684 *
pcercuei 0:03b5121a232e 3685 * Returns 0 in case of success, -1 in case of error and 1 if discarded
pcercuei 0:03b5121a232e 3686 */
pcercuei 0:03b5121a232e 3687
pcercuei 0:03b5121a232e 3688 static int
pcercuei 0:03b5121a232e 3689 htmlParseStartTag(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 3690 const xmlChar *name;
pcercuei 0:03b5121a232e 3691 const xmlChar *attname;
pcercuei 0:03b5121a232e 3692 xmlChar *attvalue;
pcercuei 0:03b5121a232e 3693 const xmlChar **atts;
pcercuei 0:03b5121a232e 3694 int nbatts = 0;
pcercuei 0:03b5121a232e 3695 int maxatts;
pcercuei 0:03b5121a232e 3696 int meta = 0;
pcercuei 0:03b5121a232e 3697 int i;
pcercuei 0:03b5121a232e 3698 int discardtag = 0;
pcercuei 0:03b5121a232e 3699
pcercuei 0:03b5121a232e 3700 if ((ctxt == NULL) || (ctxt->input == NULL)) {
pcercuei 0:03b5121a232e 3701 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 3702 "htmlParseStartTag: context error\n", NULL, NULL);
pcercuei 0:03b5121a232e 3703 return -1;
pcercuei 0:03b5121a232e 3704 }
pcercuei 0:03b5121a232e 3705 if (ctxt->instate == XML_PARSER_EOF)
pcercuei 0:03b5121a232e 3706 return(-1);
pcercuei 0:03b5121a232e 3707 if (CUR != '<') return -1;
pcercuei 0:03b5121a232e 3708 NEXT;
pcercuei 0:03b5121a232e 3709
pcercuei 0:03b5121a232e 3710 atts = ctxt->atts;
pcercuei 0:03b5121a232e 3711 maxatts = ctxt->maxatts;
pcercuei 0:03b5121a232e 3712
pcercuei 0:03b5121a232e 3713 GROW;
pcercuei 0:03b5121a232e 3714 name = htmlParseHTMLName(ctxt);
pcercuei 0:03b5121a232e 3715 if (name == NULL) {
pcercuei 0:03b5121a232e 3716 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
pcercuei 0:03b5121a232e 3717 "htmlParseStartTag: invalid element name\n",
pcercuei 0:03b5121a232e 3718 NULL, NULL);
pcercuei 0:03b5121a232e 3719 /* if recover preserve text on classic misconstructs */
pcercuei 0:03b5121a232e 3720 if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') ||
pcercuei 0:03b5121a232e 3721 (CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) {
pcercuei 0:03b5121a232e 3722 htmlParseCharDataInternal(ctxt, '<');
pcercuei 0:03b5121a232e 3723 return(-1);
pcercuei 0:03b5121a232e 3724 }
pcercuei 0:03b5121a232e 3725
pcercuei 0:03b5121a232e 3726
pcercuei 0:03b5121a232e 3727 /* Dump the bogus tag like browsers do */
pcercuei 0:03b5121a232e 3728 while ((IS_CHAR_CH(CUR)) && (CUR != '>') &&
pcercuei 0:03b5121a232e 3729 (ctxt->instate != XML_PARSER_EOF))
pcercuei 0:03b5121a232e 3730 NEXT;
pcercuei 0:03b5121a232e 3731 return -1;
pcercuei 0:03b5121a232e 3732 }
pcercuei 0:03b5121a232e 3733 if (xmlStrEqual(name, BAD_CAST"meta"))
pcercuei 0:03b5121a232e 3734 meta = 1;
pcercuei 0:03b5121a232e 3735
pcercuei 0:03b5121a232e 3736 /*
pcercuei 0:03b5121a232e 3737 * Check for auto-closure of HTML elements.
pcercuei 0:03b5121a232e 3738 */
pcercuei 0:03b5121a232e 3739 htmlAutoClose(ctxt, name);
pcercuei 0:03b5121a232e 3740
pcercuei 0:03b5121a232e 3741 /*
pcercuei 0:03b5121a232e 3742 * Check for implied HTML elements.
pcercuei 0:03b5121a232e 3743 */
pcercuei 0:03b5121a232e 3744 htmlCheckImplied(ctxt, name);
pcercuei 0:03b5121a232e 3745
pcercuei 0:03b5121a232e 3746 /*
pcercuei 0:03b5121a232e 3747 * Avoid html at any level > 0, head at any level != 1
pcercuei 0:03b5121a232e 3748 * or any attempt to recurse body
pcercuei 0:03b5121a232e 3749 */
pcercuei 0:03b5121a232e 3750 if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
pcercuei 0:03b5121a232e 3751 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
pcercuei 0:03b5121a232e 3752 "htmlParseStartTag: misplaced <html> tag\n",
pcercuei 0:03b5121a232e 3753 name, NULL);
pcercuei 0:03b5121a232e 3754 discardtag = 1;
pcercuei 0:03b5121a232e 3755 ctxt->depth++;
pcercuei 0:03b5121a232e 3756 }
pcercuei 0:03b5121a232e 3757 if ((ctxt->nameNr != 1) &&
pcercuei 0:03b5121a232e 3758 (xmlStrEqual(name, BAD_CAST"head"))) {
pcercuei 0:03b5121a232e 3759 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
pcercuei 0:03b5121a232e 3760 "htmlParseStartTag: misplaced <head> tag\n",
pcercuei 0:03b5121a232e 3761 name, NULL);
pcercuei 0:03b5121a232e 3762 discardtag = 1;
pcercuei 0:03b5121a232e 3763 ctxt->depth++;
pcercuei 0:03b5121a232e 3764 }
pcercuei 0:03b5121a232e 3765 if (xmlStrEqual(name, BAD_CAST"body")) {
pcercuei 0:03b5121a232e 3766 int indx;
pcercuei 0:03b5121a232e 3767 for (indx = 0;indx < ctxt->nameNr;indx++) {
pcercuei 0:03b5121a232e 3768 if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
pcercuei 0:03b5121a232e 3769 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
pcercuei 0:03b5121a232e 3770 "htmlParseStartTag: misplaced <body> tag\n",
pcercuei 0:03b5121a232e 3771 name, NULL);
pcercuei 0:03b5121a232e 3772 discardtag = 1;
pcercuei 0:03b5121a232e 3773 ctxt->depth++;
pcercuei 0:03b5121a232e 3774 }
pcercuei 0:03b5121a232e 3775 }
pcercuei 0:03b5121a232e 3776 }
pcercuei 0:03b5121a232e 3777
pcercuei 0:03b5121a232e 3778 /*
pcercuei 0:03b5121a232e 3779 * Now parse the attributes, it ends up with the ending
pcercuei 0:03b5121a232e 3780 *
pcercuei 0:03b5121a232e 3781 * (S Attribute)* S?
pcercuei 0:03b5121a232e 3782 */
pcercuei 0:03b5121a232e 3783 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3784 while ((IS_CHAR_CH(CUR)) &&
pcercuei 0:03b5121a232e 3785 (CUR != '>') &&
pcercuei 0:03b5121a232e 3786 ((CUR != '/') || (NXT(1) != '>'))) {
pcercuei 0:03b5121a232e 3787 long cons = ctxt->nbChars;
pcercuei 0:03b5121a232e 3788
pcercuei 0:03b5121a232e 3789 GROW;
pcercuei 0:03b5121a232e 3790 attname = htmlParseAttribute(ctxt, &attvalue);
pcercuei 0:03b5121a232e 3791 if (attname != NULL) {
pcercuei 0:03b5121a232e 3792
pcercuei 0:03b5121a232e 3793 /*
pcercuei 0:03b5121a232e 3794 * Well formedness requires at most one declaration of an attribute
pcercuei 0:03b5121a232e 3795 */
pcercuei 0:03b5121a232e 3796 for (i = 0; i < nbatts;i += 2) {
pcercuei 0:03b5121a232e 3797 if (xmlStrEqual(atts[i], attname)) {
pcercuei 0:03b5121a232e 3798 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
pcercuei 0:03b5121a232e 3799 "Attribute %s redefined\n", attname, NULL);
pcercuei 0:03b5121a232e 3800 if (attvalue != NULL)
pcercuei 0:03b5121a232e 3801 xmlFree(attvalue);
pcercuei 0:03b5121a232e 3802 goto failed;
pcercuei 0:03b5121a232e 3803 }
pcercuei 0:03b5121a232e 3804 }
pcercuei 0:03b5121a232e 3805
pcercuei 0:03b5121a232e 3806 /*
pcercuei 0:03b5121a232e 3807 * Add the pair to atts
pcercuei 0:03b5121a232e 3808 */
pcercuei 0:03b5121a232e 3809 if (atts == NULL) {
pcercuei 0:03b5121a232e 3810 maxatts = 22; /* allow for 10 attrs by default */
pcercuei 0:03b5121a232e 3811 atts = (const xmlChar **)
pcercuei 0:03b5121a232e 3812 xmlMalloc(maxatts * sizeof(xmlChar *));
pcercuei 0:03b5121a232e 3813 if (atts == NULL) {
pcercuei 0:03b5121a232e 3814 htmlErrMemory(ctxt, NULL);
pcercuei 0:03b5121a232e 3815 if (attvalue != NULL)
pcercuei 0:03b5121a232e 3816 xmlFree(attvalue);
pcercuei 0:03b5121a232e 3817 goto failed;
pcercuei 0:03b5121a232e 3818 }
pcercuei 0:03b5121a232e 3819 ctxt->atts = atts;
pcercuei 0:03b5121a232e 3820 ctxt->maxatts = maxatts;
pcercuei 0:03b5121a232e 3821 } else if (nbatts + 4 > maxatts) {
pcercuei 0:03b5121a232e 3822 const xmlChar **n;
pcercuei 0:03b5121a232e 3823
pcercuei 0:03b5121a232e 3824 maxatts *= 2;
pcercuei 0:03b5121a232e 3825 n = (const xmlChar **) xmlRealloc((void *) atts,
pcercuei 0:03b5121a232e 3826 maxatts * sizeof(const xmlChar *));
pcercuei 0:03b5121a232e 3827 if (n == NULL) {
pcercuei 0:03b5121a232e 3828 htmlErrMemory(ctxt, NULL);
pcercuei 0:03b5121a232e 3829 if (attvalue != NULL)
pcercuei 0:03b5121a232e 3830 xmlFree(attvalue);
pcercuei 0:03b5121a232e 3831 goto failed;
pcercuei 0:03b5121a232e 3832 }
pcercuei 0:03b5121a232e 3833 atts = n;
pcercuei 0:03b5121a232e 3834 ctxt->atts = atts;
pcercuei 0:03b5121a232e 3835 ctxt->maxatts = maxatts;
pcercuei 0:03b5121a232e 3836 }
pcercuei 0:03b5121a232e 3837 atts[nbatts++] = attname;
pcercuei 0:03b5121a232e 3838 atts[nbatts++] = attvalue;
pcercuei 0:03b5121a232e 3839 atts[nbatts] = NULL;
pcercuei 0:03b5121a232e 3840 atts[nbatts + 1] = NULL;
pcercuei 0:03b5121a232e 3841 }
pcercuei 0:03b5121a232e 3842 else {
pcercuei 0:03b5121a232e 3843 if (attvalue != NULL)
pcercuei 0:03b5121a232e 3844 xmlFree(attvalue);
pcercuei 0:03b5121a232e 3845 /* Dump the bogus attribute string up to the next blank or
pcercuei 0:03b5121a232e 3846 * the end of the tag. */
pcercuei 0:03b5121a232e 3847 while ((IS_CHAR_CH(CUR)) &&
pcercuei 0:03b5121a232e 3848 !(IS_BLANK_CH(CUR)) && (CUR != '>') &&
pcercuei 0:03b5121a232e 3849 ((CUR != '/') || (NXT(1) != '>')))
pcercuei 0:03b5121a232e 3850 NEXT;
pcercuei 0:03b5121a232e 3851 }
pcercuei 0:03b5121a232e 3852
pcercuei 0:03b5121a232e 3853 failed:
pcercuei 0:03b5121a232e 3854 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3855 if (cons == ctxt->nbChars) {
pcercuei 0:03b5121a232e 3856 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 3857 "htmlParseStartTag: problem parsing attributes\n",
pcercuei 0:03b5121a232e 3858 NULL, NULL);
pcercuei 0:03b5121a232e 3859 break;
pcercuei 0:03b5121a232e 3860 }
pcercuei 0:03b5121a232e 3861 }
pcercuei 0:03b5121a232e 3862
pcercuei 0:03b5121a232e 3863 /*
pcercuei 0:03b5121a232e 3864 * Handle specific association to the META tag
pcercuei 0:03b5121a232e 3865 */
pcercuei 0:03b5121a232e 3866 if (meta && (nbatts != 0))
pcercuei 0:03b5121a232e 3867 htmlCheckMeta(ctxt, atts);
pcercuei 0:03b5121a232e 3868
pcercuei 0:03b5121a232e 3869 /*
pcercuei 0:03b5121a232e 3870 * SAX: Start of Element !
pcercuei 0:03b5121a232e 3871 */
pcercuei 0:03b5121a232e 3872 if (!discardtag) {
pcercuei 0:03b5121a232e 3873 htmlnamePush(ctxt, name);
pcercuei 0:03b5121a232e 3874 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
pcercuei 0:03b5121a232e 3875 if (nbatts != 0)
pcercuei 0:03b5121a232e 3876 ctxt->sax->startElement(ctxt->userData, name, atts);
pcercuei 0:03b5121a232e 3877 else
pcercuei 0:03b5121a232e 3878 ctxt->sax->startElement(ctxt->userData, name, NULL);
pcercuei 0:03b5121a232e 3879 }
pcercuei 0:03b5121a232e 3880 }
pcercuei 0:03b5121a232e 3881
pcercuei 0:03b5121a232e 3882 if (atts != NULL) {
pcercuei 0:03b5121a232e 3883 for (i = 1;i < nbatts;i += 2) {
pcercuei 0:03b5121a232e 3884 if (atts[i] != NULL)
pcercuei 0:03b5121a232e 3885 xmlFree((xmlChar *) atts[i]);
pcercuei 0:03b5121a232e 3886 }
pcercuei 0:03b5121a232e 3887 }
pcercuei 0:03b5121a232e 3888
pcercuei 0:03b5121a232e 3889 return(discardtag);
pcercuei 0:03b5121a232e 3890 }
pcercuei 0:03b5121a232e 3891
pcercuei 0:03b5121a232e 3892 /**
pcercuei 0:03b5121a232e 3893 * htmlParseEndTag:
pcercuei 0:03b5121a232e 3894 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 3895 *
pcercuei 0:03b5121a232e 3896 * parse an end of tag
pcercuei 0:03b5121a232e 3897 *
pcercuei 0:03b5121a232e 3898 * [42] ETag ::= '</' Name S? '>'
pcercuei 0:03b5121a232e 3899 *
pcercuei 0:03b5121a232e 3900 * With namespace
pcercuei 0:03b5121a232e 3901 *
pcercuei 0:03b5121a232e 3902 * [NS 9] ETag ::= '</' QName S? '>'
pcercuei 0:03b5121a232e 3903 *
pcercuei 0:03b5121a232e 3904 * Returns 1 if the current level should be closed.
pcercuei 0:03b5121a232e 3905 */
pcercuei 0:03b5121a232e 3906
pcercuei 0:03b5121a232e 3907 static int
pcercuei 0:03b5121a232e 3908 htmlParseEndTag(htmlParserCtxtPtr ctxt)
pcercuei 0:03b5121a232e 3909 {
pcercuei 0:03b5121a232e 3910 const xmlChar *name;
pcercuei 0:03b5121a232e 3911 const xmlChar *oldname;
pcercuei 0:03b5121a232e 3912 int i, ret;
pcercuei 0:03b5121a232e 3913
pcercuei 0:03b5121a232e 3914 if ((CUR != '<') || (NXT(1) != '/')) {
pcercuei 0:03b5121a232e 3915 htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
pcercuei 0:03b5121a232e 3916 "htmlParseEndTag: '</' not found\n", NULL, NULL);
pcercuei 0:03b5121a232e 3917 return (0);
pcercuei 0:03b5121a232e 3918 }
pcercuei 0:03b5121a232e 3919 SKIP(2);
pcercuei 0:03b5121a232e 3920
pcercuei 0:03b5121a232e 3921 name = htmlParseHTMLName(ctxt);
pcercuei 0:03b5121a232e 3922 if (name == NULL)
pcercuei 0:03b5121a232e 3923 return (0);
pcercuei 0:03b5121a232e 3924 /*
pcercuei 0:03b5121a232e 3925 * We should definitely be at the ending "S? '>'" part
pcercuei 0:03b5121a232e 3926 */
pcercuei 0:03b5121a232e 3927 SKIP_BLANKS;
pcercuei 0:03b5121a232e 3928 if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) {
pcercuei 0:03b5121a232e 3929 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
pcercuei 0:03b5121a232e 3930 "End tag : expected '>'\n", NULL, NULL);
pcercuei 0:03b5121a232e 3931 if (ctxt->recovery) {
pcercuei 0:03b5121a232e 3932 /*
pcercuei 0:03b5121a232e 3933 * We're not at the ending > !!
pcercuei 0:03b5121a232e 3934 * Error, unless in recover mode where we search forwards
pcercuei 0:03b5121a232e 3935 * until we find a >
pcercuei 0:03b5121a232e 3936 */
pcercuei 0:03b5121a232e 3937 while (CUR != '\0' && CUR != '>') NEXT;
pcercuei 0:03b5121a232e 3938 NEXT;
pcercuei 0:03b5121a232e 3939 }
pcercuei 0:03b5121a232e 3940 } else
pcercuei 0:03b5121a232e 3941 NEXT;
pcercuei 0:03b5121a232e 3942
pcercuei 0:03b5121a232e 3943 /*
pcercuei 0:03b5121a232e 3944 * if we ignored misplaced tags in htmlParseStartTag don't pop them
pcercuei 0:03b5121a232e 3945 * out now.
pcercuei 0:03b5121a232e 3946 */
pcercuei 0:03b5121a232e 3947 if ((ctxt->depth > 0) &&
pcercuei 0:03b5121a232e 3948 (xmlStrEqual(name, BAD_CAST "html") ||
pcercuei 0:03b5121a232e 3949 xmlStrEqual(name, BAD_CAST "body") ||
pcercuei 0:03b5121a232e 3950 xmlStrEqual(name, BAD_CAST "head"))) {
pcercuei 0:03b5121a232e 3951 ctxt->depth--;
pcercuei 0:03b5121a232e 3952 return (0);
pcercuei 0:03b5121a232e 3953 }
pcercuei 0:03b5121a232e 3954
pcercuei 0:03b5121a232e 3955 /*
pcercuei 0:03b5121a232e 3956 * If the name read is not one of the element in the parsing stack
pcercuei 0:03b5121a232e 3957 * then return, it's just an error.
pcercuei 0:03b5121a232e 3958 */
pcercuei 0:03b5121a232e 3959 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
pcercuei 0:03b5121a232e 3960 if (xmlStrEqual(name, ctxt->nameTab[i]))
pcercuei 0:03b5121a232e 3961 break;
pcercuei 0:03b5121a232e 3962 }
pcercuei 0:03b5121a232e 3963 if (i < 0) {
pcercuei 0:03b5121a232e 3964 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
pcercuei 0:03b5121a232e 3965 "Unexpected end tag : %s\n", name, NULL);
pcercuei 0:03b5121a232e 3966 return (0);
pcercuei 0:03b5121a232e 3967 }
pcercuei 0:03b5121a232e 3968
pcercuei 0:03b5121a232e 3969
pcercuei 0:03b5121a232e 3970 /*
pcercuei 0:03b5121a232e 3971 * Check for auto-closure of HTML elements.
pcercuei 0:03b5121a232e 3972 */
pcercuei 0:03b5121a232e 3973
pcercuei 0:03b5121a232e 3974 htmlAutoCloseOnClose(ctxt, name);
pcercuei 0:03b5121a232e 3975
pcercuei 0:03b5121a232e 3976 /*
pcercuei 0:03b5121a232e 3977 * Well formedness constraints, opening and closing must match.
pcercuei 0:03b5121a232e 3978 * With the exception that the autoclose may have popped stuff out
pcercuei 0:03b5121a232e 3979 * of the stack.
pcercuei 0:03b5121a232e 3980 */
pcercuei 0:03b5121a232e 3981 if (!xmlStrEqual(name, ctxt->name)) {
pcercuei 0:03b5121a232e 3982 if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
pcercuei 0:03b5121a232e 3983 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
pcercuei 0:03b5121a232e 3984 "Opening and ending tag mismatch: %s and %s\n",
pcercuei 0:03b5121a232e 3985 name, ctxt->name);
pcercuei 0:03b5121a232e 3986 }
pcercuei 0:03b5121a232e 3987 }
pcercuei 0:03b5121a232e 3988
pcercuei 0:03b5121a232e 3989 /*
pcercuei 0:03b5121a232e 3990 * SAX: End of Tag
pcercuei 0:03b5121a232e 3991 */
pcercuei 0:03b5121a232e 3992 oldname = ctxt->name;
pcercuei 0:03b5121a232e 3993 if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
pcercuei 0:03b5121a232e 3994 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 3995 ctxt->sax->endElement(ctxt->userData, name);
pcercuei 0:03b5121a232e 3996 htmlNodeInfoPop(ctxt);
pcercuei 0:03b5121a232e 3997 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 3998 ret = 1;
pcercuei 0:03b5121a232e 3999 } else {
pcercuei 0:03b5121a232e 4000 ret = 0;
pcercuei 0:03b5121a232e 4001 }
pcercuei 0:03b5121a232e 4002
pcercuei 0:03b5121a232e 4003 return (ret);
pcercuei 0:03b5121a232e 4004 }
pcercuei 0:03b5121a232e 4005
pcercuei 0:03b5121a232e 4006
pcercuei 0:03b5121a232e 4007 /**
pcercuei 0:03b5121a232e 4008 * htmlParseReference:
pcercuei 0:03b5121a232e 4009 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 4010 *
pcercuei 0:03b5121a232e 4011 * parse and handle entity references in content,
pcercuei 0:03b5121a232e 4012 * this will end-up in a call to character() since this is either a
pcercuei 0:03b5121a232e 4013 * CharRef, or a predefined entity.
pcercuei 0:03b5121a232e 4014 */
pcercuei 0:03b5121a232e 4015 static void
pcercuei 0:03b5121a232e 4016 htmlParseReference(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 4017 const htmlEntityDesc * ent;
pcercuei 0:03b5121a232e 4018 xmlChar out[6];
pcercuei 0:03b5121a232e 4019 const xmlChar *name;
pcercuei 0:03b5121a232e 4020 if (CUR != '&') return;
pcercuei 0:03b5121a232e 4021
pcercuei 0:03b5121a232e 4022 if (NXT(1) == '#') {
pcercuei 0:03b5121a232e 4023 unsigned int c;
pcercuei 0:03b5121a232e 4024 int bits, i = 0;
pcercuei 0:03b5121a232e 4025
pcercuei 0:03b5121a232e 4026 c = htmlParseCharRef(ctxt);
pcercuei 0:03b5121a232e 4027 if (c == 0)
pcercuei 0:03b5121a232e 4028 return;
pcercuei 0:03b5121a232e 4029
pcercuei 0:03b5121a232e 4030 if (c < 0x80) { out[i++]= c; bits= -6; }
pcercuei 0:03b5121a232e 4031 else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
pcercuei 0:03b5121a232e 4032 else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
pcercuei 0:03b5121a232e 4033 else { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
pcercuei 0:03b5121a232e 4034
pcercuei 0:03b5121a232e 4035 for ( ; bits >= 0; bits-= 6) {
pcercuei 0:03b5121a232e 4036 out[i++]= ((c >> bits) & 0x3F) | 0x80;
pcercuei 0:03b5121a232e 4037 }
pcercuei 0:03b5121a232e 4038 out[i] = 0;
pcercuei 0:03b5121a232e 4039
pcercuei 0:03b5121a232e 4040 htmlCheckParagraph(ctxt);
pcercuei 0:03b5121a232e 4041 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
pcercuei 0:03b5121a232e 4042 ctxt->sax->characters(ctxt->userData, out, i);
pcercuei 0:03b5121a232e 4043 } else {
pcercuei 0:03b5121a232e 4044 ent = htmlParseEntityRef(ctxt, &name);
pcercuei 0:03b5121a232e 4045 if (name == NULL) {
pcercuei 0:03b5121a232e 4046 htmlCheckParagraph(ctxt);
pcercuei 0:03b5121a232e 4047 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
pcercuei 0:03b5121a232e 4048 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
pcercuei 0:03b5121a232e 4049 return;
pcercuei 0:03b5121a232e 4050 }
pcercuei 0:03b5121a232e 4051 if ((ent == NULL) || !(ent->value > 0)) {
pcercuei 0:03b5121a232e 4052 htmlCheckParagraph(ctxt);
pcercuei 0:03b5121a232e 4053 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
pcercuei 0:03b5121a232e 4054 ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
pcercuei 0:03b5121a232e 4055 ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
pcercuei 0:03b5121a232e 4056 /* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
pcercuei 0:03b5121a232e 4057 }
pcercuei 0:03b5121a232e 4058 } else {
pcercuei 0:03b5121a232e 4059 unsigned int c;
pcercuei 0:03b5121a232e 4060 int bits, i = 0;
pcercuei 0:03b5121a232e 4061
pcercuei 0:03b5121a232e 4062 c = ent->value;
pcercuei 0:03b5121a232e 4063 if (c < 0x80)
pcercuei 0:03b5121a232e 4064 { out[i++]= c; bits= -6; }
pcercuei 0:03b5121a232e 4065 else if (c < 0x800)
pcercuei 0:03b5121a232e 4066 { out[i++]=((c >> 6) & 0x1F) | 0xC0; bits= 0; }
pcercuei 0:03b5121a232e 4067 else if (c < 0x10000)
pcercuei 0:03b5121a232e 4068 { out[i++]=((c >> 12) & 0x0F) | 0xE0; bits= 6; }
pcercuei 0:03b5121a232e 4069 else
pcercuei 0:03b5121a232e 4070 { out[i++]=((c >> 18) & 0x07) | 0xF0; bits= 12; }
pcercuei 0:03b5121a232e 4071
pcercuei 0:03b5121a232e 4072 for ( ; bits >= 0; bits-= 6) {
pcercuei 0:03b5121a232e 4073 out[i++]= ((c >> bits) & 0x3F) | 0x80;
pcercuei 0:03b5121a232e 4074 }
pcercuei 0:03b5121a232e 4075 out[i] = 0;
pcercuei 0:03b5121a232e 4076
pcercuei 0:03b5121a232e 4077 htmlCheckParagraph(ctxt);
pcercuei 0:03b5121a232e 4078 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
pcercuei 0:03b5121a232e 4079 ctxt->sax->characters(ctxt->userData, out, i);
pcercuei 0:03b5121a232e 4080 }
pcercuei 0:03b5121a232e 4081 }
pcercuei 0:03b5121a232e 4082 }
pcercuei 0:03b5121a232e 4083
pcercuei 0:03b5121a232e 4084 /**
pcercuei 0:03b5121a232e 4085 * htmlParseContent:
pcercuei 0:03b5121a232e 4086 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 4087 *
pcercuei 0:03b5121a232e 4088 * Parse a content: comment, sub-element, reference or text.
pcercuei 0:03b5121a232e 4089 * Kept for compatibility with old code
pcercuei 0:03b5121a232e 4090 */
pcercuei 0:03b5121a232e 4091
pcercuei 0:03b5121a232e 4092 static void
pcercuei 0:03b5121a232e 4093 htmlParseContent(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 4094 xmlChar *currentNode;
pcercuei 0:03b5121a232e 4095 int depth;
pcercuei 0:03b5121a232e 4096 const xmlChar *name;
pcercuei 0:03b5121a232e 4097
pcercuei 0:03b5121a232e 4098 currentNode = xmlStrdup(ctxt->name);
pcercuei 0:03b5121a232e 4099 depth = ctxt->nameNr;
pcercuei 0:03b5121a232e 4100 while (1) {
pcercuei 0:03b5121a232e 4101 long cons = ctxt->nbChars;
pcercuei 0:03b5121a232e 4102
pcercuei 0:03b5121a232e 4103 GROW;
pcercuei 0:03b5121a232e 4104
pcercuei 0:03b5121a232e 4105 if (ctxt->instate == XML_PARSER_EOF)
pcercuei 0:03b5121a232e 4106 break;
pcercuei 0:03b5121a232e 4107
pcercuei 0:03b5121a232e 4108 /*
pcercuei 0:03b5121a232e 4109 * Our tag or one of it's parent or children is ending.
pcercuei 0:03b5121a232e 4110 */
pcercuei 0:03b5121a232e 4111 if ((CUR == '<') && (NXT(1) == '/')) {
pcercuei 0:03b5121a232e 4112 if (htmlParseEndTag(ctxt) &&
pcercuei 0:03b5121a232e 4113 ((currentNode != NULL) || (ctxt->nameNr == 0))) {
pcercuei 0:03b5121a232e 4114 if (currentNode != NULL)
pcercuei 0:03b5121a232e 4115 xmlFree(currentNode);
pcercuei 0:03b5121a232e 4116 return;
pcercuei 0:03b5121a232e 4117 }
pcercuei 0:03b5121a232e 4118 continue; /* while */
pcercuei 0:03b5121a232e 4119 }
pcercuei 0:03b5121a232e 4120
pcercuei 0:03b5121a232e 4121 else if ((CUR == '<') &&
pcercuei 0:03b5121a232e 4122 ((IS_ASCII_LETTER(NXT(1))) ||
pcercuei 0:03b5121a232e 4123 (NXT(1) == '_') || (NXT(1) == ':'))) {
pcercuei 0:03b5121a232e 4124 name = htmlParseHTMLName_nonInvasive(ctxt);
pcercuei 0:03b5121a232e 4125 if (name == NULL) {
pcercuei 0:03b5121a232e 4126 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
pcercuei 0:03b5121a232e 4127 "htmlParseStartTag: invalid element name\n",
pcercuei 0:03b5121a232e 4128 NULL, NULL);
pcercuei 0:03b5121a232e 4129 /* Dump the bogus tag like browsers do */
pcercuei 0:03b5121a232e 4130 while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
pcercuei 0:03b5121a232e 4131 NEXT;
pcercuei 0:03b5121a232e 4132
pcercuei 0:03b5121a232e 4133 if (currentNode != NULL)
pcercuei 0:03b5121a232e 4134 xmlFree(currentNode);
pcercuei 0:03b5121a232e 4135 return;
pcercuei 0:03b5121a232e 4136 }
pcercuei 0:03b5121a232e 4137
pcercuei 0:03b5121a232e 4138 if (ctxt->name != NULL) {
pcercuei 0:03b5121a232e 4139 if (htmlCheckAutoClose(name, ctxt->name) == 1) {
pcercuei 0:03b5121a232e 4140 htmlAutoClose(ctxt, name);
pcercuei 0:03b5121a232e 4141 continue;
pcercuei 0:03b5121a232e 4142 }
pcercuei 0:03b5121a232e 4143 }
pcercuei 0:03b5121a232e 4144 }
pcercuei 0:03b5121a232e 4145
pcercuei 0:03b5121a232e 4146 /*
pcercuei 0:03b5121a232e 4147 * Has this node been popped out during parsing of
pcercuei 0:03b5121a232e 4148 * the next element
pcercuei 0:03b5121a232e 4149 */
pcercuei 0:03b5121a232e 4150 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
pcercuei 0:03b5121a232e 4151 (!xmlStrEqual(currentNode, ctxt->name)))
pcercuei 0:03b5121a232e 4152 {
pcercuei 0:03b5121a232e 4153 if (currentNode != NULL) xmlFree(currentNode);
pcercuei 0:03b5121a232e 4154 return;
pcercuei 0:03b5121a232e 4155 }
pcercuei 0:03b5121a232e 4156
pcercuei 0:03b5121a232e 4157 if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) ||
pcercuei 0:03b5121a232e 4158 (xmlStrEqual(currentNode, BAD_CAST"style")))) {
pcercuei 0:03b5121a232e 4159 /*
pcercuei 0:03b5121a232e 4160 * Handle SCRIPT/STYLE separately
pcercuei 0:03b5121a232e 4161 */
pcercuei 0:03b5121a232e 4162 htmlParseScript(ctxt);
pcercuei 0:03b5121a232e 4163 } else {
pcercuei 0:03b5121a232e 4164 /*
pcercuei 0:03b5121a232e 4165 * Sometimes DOCTYPE arrives in the middle of the document
pcercuei 0:03b5121a232e 4166 */
pcercuei 0:03b5121a232e 4167 if ((CUR == '<') && (NXT(1) == '!') &&
pcercuei 0:03b5121a232e 4168 (UPP(2) == 'D') && (UPP(3) == 'O') &&
pcercuei 0:03b5121a232e 4169 (UPP(4) == 'C') && (UPP(5) == 'T') &&
pcercuei 0:03b5121a232e 4170 (UPP(6) == 'Y') && (UPP(7) == 'P') &&
pcercuei 0:03b5121a232e 4171 (UPP(8) == 'E')) {
pcercuei 0:03b5121a232e 4172 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
pcercuei 0:03b5121a232e 4173 "Misplaced DOCTYPE declaration\n",
pcercuei 0:03b5121a232e 4174 BAD_CAST "DOCTYPE" , NULL);
pcercuei 0:03b5121a232e 4175 htmlParseDocTypeDecl(ctxt);
pcercuei 0:03b5121a232e 4176 }
pcercuei 0:03b5121a232e 4177
pcercuei 0:03b5121a232e 4178 /*
pcercuei 0:03b5121a232e 4179 * First case : a comment
pcercuei 0:03b5121a232e 4180 */
pcercuei 0:03b5121a232e 4181 if ((CUR == '<') && (NXT(1) == '!') &&
pcercuei 0:03b5121a232e 4182 (NXT(2) == '-') && (NXT(3) == '-')) {
pcercuei 0:03b5121a232e 4183 htmlParseComment(ctxt);
pcercuei 0:03b5121a232e 4184 }
pcercuei 0:03b5121a232e 4185
pcercuei 0:03b5121a232e 4186 /*
pcercuei 0:03b5121a232e 4187 * Second case : a Processing Instruction.
pcercuei 0:03b5121a232e 4188 */
pcercuei 0:03b5121a232e 4189 else if ((CUR == '<') && (NXT(1) == '?')) {
pcercuei 0:03b5121a232e 4190 htmlParsePI(ctxt);
pcercuei 0:03b5121a232e 4191 }
pcercuei 0:03b5121a232e 4192
pcercuei 0:03b5121a232e 4193 /*
pcercuei 0:03b5121a232e 4194 * Third case : a sub-element.
pcercuei 0:03b5121a232e 4195 */
pcercuei 0:03b5121a232e 4196 else if (CUR == '<') {
pcercuei 0:03b5121a232e 4197 htmlParseElement(ctxt);
pcercuei 0:03b5121a232e 4198 }
pcercuei 0:03b5121a232e 4199
pcercuei 0:03b5121a232e 4200 /*
pcercuei 0:03b5121a232e 4201 * Fourth case : a reference. If if has not been resolved,
pcercuei 0:03b5121a232e 4202 * parsing returns it's Name, create the node
pcercuei 0:03b5121a232e 4203 */
pcercuei 0:03b5121a232e 4204 else if (CUR == '&') {
pcercuei 0:03b5121a232e 4205 htmlParseReference(ctxt);
pcercuei 0:03b5121a232e 4206 }
pcercuei 0:03b5121a232e 4207
pcercuei 0:03b5121a232e 4208 /*
pcercuei 0:03b5121a232e 4209 * Fifth case : end of the resource
pcercuei 0:03b5121a232e 4210 */
pcercuei 0:03b5121a232e 4211 else if (CUR == 0) {
pcercuei 0:03b5121a232e 4212 htmlAutoCloseOnEnd(ctxt);
pcercuei 0:03b5121a232e 4213 break;
pcercuei 0:03b5121a232e 4214 }
pcercuei 0:03b5121a232e 4215
pcercuei 0:03b5121a232e 4216 /*
pcercuei 0:03b5121a232e 4217 * Last case, text. Note that References are handled directly.
pcercuei 0:03b5121a232e 4218 */
pcercuei 0:03b5121a232e 4219 else {
pcercuei 0:03b5121a232e 4220 htmlParseCharData(ctxt);
pcercuei 0:03b5121a232e 4221 }
pcercuei 0:03b5121a232e 4222
pcercuei 0:03b5121a232e 4223 if (cons == ctxt->nbChars) {
pcercuei 0:03b5121a232e 4224 if (ctxt->node != NULL) {
pcercuei 0:03b5121a232e 4225 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 4226 "detected an error in element content\n",
pcercuei 0:03b5121a232e 4227 NULL, NULL);
pcercuei 0:03b5121a232e 4228 }
pcercuei 0:03b5121a232e 4229 break;
pcercuei 0:03b5121a232e 4230 }
pcercuei 0:03b5121a232e 4231 }
pcercuei 0:03b5121a232e 4232 GROW;
pcercuei 0:03b5121a232e 4233 }
pcercuei 0:03b5121a232e 4234 if (currentNode != NULL) xmlFree(currentNode);
pcercuei 0:03b5121a232e 4235 }
pcercuei 0:03b5121a232e 4236
pcercuei 0:03b5121a232e 4237 /**
pcercuei 0:03b5121a232e 4238 * htmlParseElement:
pcercuei 0:03b5121a232e 4239 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 4240 *
pcercuei 0:03b5121a232e 4241 * parse an HTML element, this is highly recursive
pcercuei 0:03b5121a232e 4242 * this is kept for compatibility with previous code versions
pcercuei 0:03b5121a232e 4243 *
pcercuei 0:03b5121a232e 4244 * [39] element ::= EmptyElemTag | STag content ETag
pcercuei 0:03b5121a232e 4245 *
pcercuei 0:03b5121a232e 4246 * [41] Attribute ::= Name Eq AttValue
pcercuei 0:03b5121a232e 4247 */
pcercuei 0:03b5121a232e 4248
pcercuei 0:03b5121a232e 4249 void
pcercuei 0:03b5121a232e 4250 htmlParseElement(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 4251 const xmlChar *name;
pcercuei 0:03b5121a232e 4252 xmlChar *currentNode = NULL;
pcercuei 0:03b5121a232e 4253 const htmlElemDesc * info;
pcercuei 0:03b5121a232e 4254 htmlParserNodeInfo node_info;
pcercuei 0:03b5121a232e 4255 int failed;
pcercuei 0:03b5121a232e 4256 int depth;
pcercuei 0:03b5121a232e 4257 const xmlChar *oldptr;
pcercuei 0:03b5121a232e 4258
pcercuei 0:03b5121a232e 4259 if ((ctxt == NULL) || (ctxt->input == NULL)) {
pcercuei 0:03b5121a232e 4260 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 4261 "htmlParseElement: context error\n", NULL, NULL);
pcercuei 0:03b5121a232e 4262 return;
pcercuei 0:03b5121a232e 4263 }
pcercuei 0:03b5121a232e 4264
pcercuei 0:03b5121a232e 4265 if (ctxt->instate == XML_PARSER_EOF)
pcercuei 0:03b5121a232e 4266 return;
pcercuei 0:03b5121a232e 4267
pcercuei 0:03b5121a232e 4268 /* Capture start position */
pcercuei 0:03b5121a232e 4269 if (ctxt->record_info) {
pcercuei 0:03b5121a232e 4270 node_info.begin_pos = ctxt->input->consumed +
pcercuei 0:03b5121a232e 4271 (CUR_PTR - ctxt->input->base);
pcercuei 0:03b5121a232e 4272 node_info.begin_line = ctxt->input->line;
pcercuei 0:03b5121a232e 4273 }
pcercuei 0:03b5121a232e 4274
pcercuei 0:03b5121a232e 4275 failed = htmlParseStartTag(ctxt);
pcercuei 0:03b5121a232e 4276 name = ctxt->name;
pcercuei 0:03b5121a232e 4277 if ((failed == -1) || (name == NULL)) {
pcercuei 0:03b5121a232e 4278 if (CUR == '>')
pcercuei 0:03b5121a232e 4279 NEXT;
pcercuei 0:03b5121a232e 4280 return;
pcercuei 0:03b5121a232e 4281 }
pcercuei 0:03b5121a232e 4282
pcercuei 0:03b5121a232e 4283 /*
pcercuei 0:03b5121a232e 4284 * Lookup the info for that element.
pcercuei 0:03b5121a232e 4285 */
pcercuei 0:03b5121a232e 4286 info = htmlTagLookup(name);
pcercuei 0:03b5121a232e 4287 if (info == NULL) {
pcercuei 0:03b5121a232e 4288 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
pcercuei 0:03b5121a232e 4289 "Tag %s invalid\n", name, NULL);
pcercuei 0:03b5121a232e 4290 }
pcercuei 0:03b5121a232e 4291
pcercuei 0:03b5121a232e 4292 /*
pcercuei 0:03b5121a232e 4293 * Check for an Empty Element labeled the XML/SGML way
pcercuei 0:03b5121a232e 4294 */
pcercuei 0:03b5121a232e 4295 if ((CUR == '/') && (NXT(1) == '>')) {
pcercuei 0:03b5121a232e 4296 SKIP(2);
pcercuei 0:03b5121a232e 4297 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 4298 ctxt->sax->endElement(ctxt->userData, name);
pcercuei 0:03b5121a232e 4299 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 4300 return;
pcercuei 0:03b5121a232e 4301 }
pcercuei 0:03b5121a232e 4302
pcercuei 0:03b5121a232e 4303 if (CUR == '>') {
pcercuei 0:03b5121a232e 4304 NEXT;
pcercuei 0:03b5121a232e 4305 } else {
pcercuei 0:03b5121a232e 4306 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
pcercuei 0:03b5121a232e 4307 "Couldn't find end of Start Tag %s\n", name, NULL);
pcercuei 0:03b5121a232e 4308
pcercuei 0:03b5121a232e 4309 /*
pcercuei 0:03b5121a232e 4310 * end of parsing of this node.
pcercuei 0:03b5121a232e 4311 */
pcercuei 0:03b5121a232e 4312 if (xmlStrEqual(name, ctxt->name)) {
pcercuei 0:03b5121a232e 4313 nodePop(ctxt);
pcercuei 0:03b5121a232e 4314 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 4315 }
pcercuei 0:03b5121a232e 4316
pcercuei 0:03b5121a232e 4317 /*
pcercuei 0:03b5121a232e 4318 * Capture end position and add node
pcercuei 0:03b5121a232e 4319 */
pcercuei 0:03b5121a232e 4320 if (ctxt->record_info) {
pcercuei 0:03b5121a232e 4321 node_info.end_pos = ctxt->input->consumed +
pcercuei 0:03b5121a232e 4322 (CUR_PTR - ctxt->input->base);
pcercuei 0:03b5121a232e 4323 node_info.end_line = ctxt->input->line;
pcercuei 0:03b5121a232e 4324 node_info.node = ctxt->node;
pcercuei 0:03b5121a232e 4325 xmlParserAddNodeInfo(ctxt, &node_info);
pcercuei 0:03b5121a232e 4326 }
pcercuei 0:03b5121a232e 4327 return;
pcercuei 0:03b5121a232e 4328 }
pcercuei 0:03b5121a232e 4329
pcercuei 0:03b5121a232e 4330 /*
pcercuei 0:03b5121a232e 4331 * Check for an Empty Element from DTD definition
pcercuei 0:03b5121a232e 4332 */
pcercuei 0:03b5121a232e 4333 if ((info != NULL) && (info->empty)) {
pcercuei 0:03b5121a232e 4334 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 4335 ctxt->sax->endElement(ctxt->userData, name);
pcercuei 0:03b5121a232e 4336 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 4337 return;
pcercuei 0:03b5121a232e 4338 }
pcercuei 0:03b5121a232e 4339
pcercuei 0:03b5121a232e 4340 /*
pcercuei 0:03b5121a232e 4341 * Parse the content of the element:
pcercuei 0:03b5121a232e 4342 */
pcercuei 0:03b5121a232e 4343 currentNode = xmlStrdup(ctxt->name);
pcercuei 0:03b5121a232e 4344 depth = ctxt->nameNr;
pcercuei 0:03b5121a232e 4345 while (IS_CHAR_CH(CUR)) {
pcercuei 0:03b5121a232e 4346 oldptr = ctxt->input->cur;
pcercuei 0:03b5121a232e 4347 htmlParseContent(ctxt);
pcercuei 0:03b5121a232e 4348 if (oldptr==ctxt->input->cur) break;
pcercuei 0:03b5121a232e 4349 if (ctxt->nameNr < depth) break;
pcercuei 0:03b5121a232e 4350 }
pcercuei 0:03b5121a232e 4351
pcercuei 0:03b5121a232e 4352 /*
pcercuei 0:03b5121a232e 4353 * Capture end position and add node
pcercuei 0:03b5121a232e 4354 */
pcercuei 0:03b5121a232e 4355 if ( currentNode != NULL && ctxt->record_info ) {
pcercuei 0:03b5121a232e 4356 node_info.end_pos = ctxt->input->consumed +
pcercuei 0:03b5121a232e 4357 (CUR_PTR - ctxt->input->base);
pcercuei 0:03b5121a232e 4358 node_info.end_line = ctxt->input->line;
pcercuei 0:03b5121a232e 4359 node_info.node = ctxt->node;
pcercuei 0:03b5121a232e 4360 xmlParserAddNodeInfo(ctxt, &node_info);
pcercuei 0:03b5121a232e 4361 }
pcercuei 0:03b5121a232e 4362 if (!IS_CHAR_CH(CUR)) {
pcercuei 0:03b5121a232e 4363 htmlAutoCloseOnEnd(ctxt);
pcercuei 0:03b5121a232e 4364 }
pcercuei 0:03b5121a232e 4365
pcercuei 0:03b5121a232e 4366 if (currentNode != NULL)
pcercuei 0:03b5121a232e 4367 xmlFree(currentNode);
pcercuei 0:03b5121a232e 4368 }
pcercuei 0:03b5121a232e 4369
pcercuei 0:03b5121a232e 4370 static void
pcercuei 0:03b5121a232e 4371 htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 4372 /*
pcercuei 0:03b5121a232e 4373 * Capture end position and add node
pcercuei 0:03b5121a232e 4374 */
pcercuei 0:03b5121a232e 4375 if ( ctxt->node != NULL && ctxt->record_info ) {
pcercuei 0:03b5121a232e 4376 ctxt->nodeInfo->end_pos = ctxt->input->consumed +
pcercuei 0:03b5121a232e 4377 (CUR_PTR - ctxt->input->base);
pcercuei 0:03b5121a232e 4378 ctxt->nodeInfo->end_line = ctxt->input->line;
pcercuei 0:03b5121a232e 4379 ctxt->nodeInfo->node = ctxt->node;
pcercuei 0:03b5121a232e 4380 xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
pcercuei 0:03b5121a232e 4381 htmlNodeInfoPop(ctxt);
pcercuei 0:03b5121a232e 4382 }
pcercuei 0:03b5121a232e 4383 if (!IS_CHAR_CH(CUR)) {
pcercuei 0:03b5121a232e 4384 htmlAutoCloseOnEnd(ctxt);
pcercuei 0:03b5121a232e 4385 }
pcercuei 0:03b5121a232e 4386 }
pcercuei 0:03b5121a232e 4387
pcercuei 0:03b5121a232e 4388 /**
pcercuei 0:03b5121a232e 4389 * htmlParseElementInternal:
pcercuei 0:03b5121a232e 4390 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 4391 *
pcercuei 0:03b5121a232e 4392 * parse an HTML element, new version, non recursive
pcercuei 0:03b5121a232e 4393 *
pcercuei 0:03b5121a232e 4394 * [39] element ::= EmptyElemTag | STag content ETag
pcercuei 0:03b5121a232e 4395 *
pcercuei 0:03b5121a232e 4396 * [41] Attribute ::= Name Eq AttValue
pcercuei 0:03b5121a232e 4397 */
pcercuei 0:03b5121a232e 4398
pcercuei 0:03b5121a232e 4399 static void
pcercuei 0:03b5121a232e 4400 htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 4401 const xmlChar *name;
pcercuei 0:03b5121a232e 4402 const htmlElemDesc * info;
pcercuei 0:03b5121a232e 4403 htmlParserNodeInfo node_info = { 0, };
pcercuei 0:03b5121a232e 4404 int failed;
pcercuei 0:03b5121a232e 4405
pcercuei 0:03b5121a232e 4406 if ((ctxt == NULL) || (ctxt->input == NULL)) {
pcercuei 0:03b5121a232e 4407 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 4408 "htmlParseElementInternal: context error\n", NULL, NULL);
pcercuei 0:03b5121a232e 4409 return;
pcercuei 0:03b5121a232e 4410 }
pcercuei 0:03b5121a232e 4411
pcercuei 0:03b5121a232e 4412 if (ctxt->instate == XML_PARSER_EOF)
pcercuei 0:03b5121a232e 4413 return;
pcercuei 0:03b5121a232e 4414
pcercuei 0:03b5121a232e 4415 /* Capture start position */
pcercuei 0:03b5121a232e 4416 if (ctxt->record_info) {
pcercuei 0:03b5121a232e 4417 node_info.begin_pos = ctxt->input->consumed +
pcercuei 0:03b5121a232e 4418 (CUR_PTR - ctxt->input->base);
pcercuei 0:03b5121a232e 4419 node_info.begin_line = ctxt->input->line;
pcercuei 0:03b5121a232e 4420 }
pcercuei 0:03b5121a232e 4421
pcercuei 0:03b5121a232e 4422 failed = htmlParseStartTag(ctxt);
pcercuei 0:03b5121a232e 4423 name = ctxt->name;
pcercuei 0:03b5121a232e 4424 if ((failed == -1) || (name == NULL)) {
pcercuei 0:03b5121a232e 4425 if (CUR == '>')
pcercuei 0:03b5121a232e 4426 NEXT;
pcercuei 0:03b5121a232e 4427 return;
pcercuei 0:03b5121a232e 4428 }
pcercuei 0:03b5121a232e 4429
pcercuei 0:03b5121a232e 4430 /*
pcercuei 0:03b5121a232e 4431 * Lookup the info for that element.
pcercuei 0:03b5121a232e 4432 */
pcercuei 0:03b5121a232e 4433 info = htmlTagLookup(name);
pcercuei 0:03b5121a232e 4434 if (info == NULL) {
pcercuei 0:03b5121a232e 4435 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
pcercuei 0:03b5121a232e 4436 "Tag %s invalid\n", name, NULL);
pcercuei 0:03b5121a232e 4437 }
pcercuei 0:03b5121a232e 4438
pcercuei 0:03b5121a232e 4439 /*
pcercuei 0:03b5121a232e 4440 * Check for an Empty Element labeled the XML/SGML way
pcercuei 0:03b5121a232e 4441 */
pcercuei 0:03b5121a232e 4442 if ((CUR == '/') && (NXT(1) == '>')) {
pcercuei 0:03b5121a232e 4443 SKIP(2);
pcercuei 0:03b5121a232e 4444 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 4445 ctxt->sax->endElement(ctxt->userData, name);
pcercuei 0:03b5121a232e 4446 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 4447 return;
pcercuei 0:03b5121a232e 4448 }
pcercuei 0:03b5121a232e 4449
pcercuei 0:03b5121a232e 4450 if (CUR == '>') {
pcercuei 0:03b5121a232e 4451 NEXT;
pcercuei 0:03b5121a232e 4452 } else {
pcercuei 0:03b5121a232e 4453 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
pcercuei 0:03b5121a232e 4454 "Couldn't find end of Start Tag %s\n", name, NULL);
pcercuei 0:03b5121a232e 4455
pcercuei 0:03b5121a232e 4456 /*
pcercuei 0:03b5121a232e 4457 * end of parsing of this node.
pcercuei 0:03b5121a232e 4458 */
pcercuei 0:03b5121a232e 4459 if (xmlStrEqual(name, ctxt->name)) {
pcercuei 0:03b5121a232e 4460 nodePop(ctxt);
pcercuei 0:03b5121a232e 4461 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 4462 }
pcercuei 0:03b5121a232e 4463
pcercuei 0:03b5121a232e 4464 if (ctxt->record_info)
pcercuei 0:03b5121a232e 4465 htmlNodeInfoPush(ctxt, &node_info);
pcercuei 0:03b5121a232e 4466 htmlParserFinishElementParsing(ctxt);
pcercuei 0:03b5121a232e 4467 return;
pcercuei 0:03b5121a232e 4468 }
pcercuei 0:03b5121a232e 4469
pcercuei 0:03b5121a232e 4470 /*
pcercuei 0:03b5121a232e 4471 * Check for an Empty Element from DTD definition
pcercuei 0:03b5121a232e 4472 */
pcercuei 0:03b5121a232e 4473 if ((info != NULL) && (info->empty)) {
pcercuei 0:03b5121a232e 4474 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 4475 ctxt->sax->endElement(ctxt->userData, name);
pcercuei 0:03b5121a232e 4476 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 4477 return;
pcercuei 0:03b5121a232e 4478 }
pcercuei 0:03b5121a232e 4479
pcercuei 0:03b5121a232e 4480 if (ctxt->record_info)
pcercuei 0:03b5121a232e 4481 htmlNodeInfoPush(ctxt, &node_info);
pcercuei 0:03b5121a232e 4482 }
pcercuei 0:03b5121a232e 4483
pcercuei 0:03b5121a232e 4484 /**
pcercuei 0:03b5121a232e 4485 * htmlParseContentInternal:
pcercuei 0:03b5121a232e 4486 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 4487 *
pcercuei 0:03b5121a232e 4488 * Parse a content: comment, sub-element, reference or text.
pcercuei 0:03b5121a232e 4489 * New version for non recursive htmlParseElementInternal
pcercuei 0:03b5121a232e 4490 */
pcercuei 0:03b5121a232e 4491
pcercuei 0:03b5121a232e 4492 static void
pcercuei 0:03b5121a232e 4493 htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 4494 xmlChar *currentNode;
pcercuei 0:03b5121a232e 4495 int depth;
pcercuei 0:03b5121a232e 4496 const xmlChar *name;
pcercuei 0:03b5121a232e 4497
pcercuei 0:03b5121a232e 4498 currentNode = xmlStrdup(ctxt->name);
pcercuei 0:03b5121a232e 4499 depth = ctxt->nameNr;
pcercuei 0:03b5121a232e 4500 while (1) {
pcercuei 0:03b5121a232e 4501 long cons = ctxt->nbChars;
pcercuei 0:03b5121a232e 4502
pcercuei 0:03b5121a232e 4503 GROW;
pcercuei 0:03b5121a232e 4504
pcercuei 0:03b5121a232e 4505 if (ctxt->instate == XML_PARSER_EOF)
pcercuei 0:03b5121a232e 4506 break;
pcercuei 0:03b5121a232e 4507
pcercuei 0:03b5121a232e 4508 /*
pcercuei 0:03b5121a232e 4509 * Our tag or one of it's parent or children is ending.
pcercuei 0:03b5121a232e 4510 */
pcercuei 0:03b5121a232e 4511 if ((CUR == '<') && (NXT(1) == '/')) {
pcercuei 0:03b5121a232e 4512 if (htmlParseEndTag(ctxt) &&
pcercuei 0:03b5121a232e 4513 ((currentNode != NULL) || (ctxt->nameNr == 0))) {
pcercuei 0:03b5121a232e 4514 if (currentNode != NULL)
pcercuei 0:03b5121a232e 4515 xmlFree(currentNode);
pcercuei 0:03b5121a232e 4516
pcercuei 0:03b5121a232e 4517 currentNode = xmlStrdup(ctxt->name);
pcercuei 0:03b5121a232e 4518 depth = ctxt->nameNr;
pcercuei 0:03b5121a232e 4519 }
pcercuei 0:03b5121a232e 4520 continue; /* while */
pcercuei 0:03b5121a232e 4521 }
pcercuei 0:03b5121a232e 4522
pcercuei 0:03b5121a232e 4523 else if ((CUR == '<') &&
pcercuei 0:03b5121a232e 4524 ((IS_ASCII_LETTER(NXT(1))) ||
pcercuei 0:03b5121a232e 4525 (NXT(1) == '_') || (NXT(1) == ':'))) {
pcercuei 0:03b5121a232e 4526 name = htmlParseHTMLName_nonInvasive(ctxt);
pcercuei 0:03b5121a232e 4527 if (name == NULL) {
pcercuei 0:03b5121a232e 4528 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
pcercuei 0:03b5121a232e 4529 "htmlParseStartTag: invalid element name\n",
pcercuei 0:03b5121a232e 4530 NULL, NULL);
pcercuei 0:03b5121a232e 4531 /* Dump the bogus tag like browsers do */
pcercuei 0:03b5121a232e 4532 while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
pcercuei 0:03b5121a232e 4533 NEXT;
pcercuei 0:03b5121a232e 4534
pcercuei 0:03b5121a232e 4535 htmlParserFinishElementParsing(ctxt);
pcercuei 0:03b5121a232e 4536 if (currentNode != NULL)
pcercuei 0:03b5121a232e 4537 xmlFree(currentNode);
pcercuei 0:03b5121a232e 4538
pcercuei 0:03b5121a232e 4539 currentNode = xmlStrdup(ctxt->name);
pcercuei 0:03b5121a232e 4540 depth = ctxt->nameNr;
pcercuei 0:03b5121a232e 4541 continue;
pcercuei 0:03b5121a232e 4542 }
pcercuei 0:03b5121a232e 4543
pcercuei 0:03b5121a232e 4544 if (ctxt->name != NULL) {
pcercuei 0:03b5121a232e 4545 if (htmlCheckAutoClose(name, ctxt->name) == 1) {
pcercuei 0:03b5121a232e 4546 htmlAutoClose(ctxt, name);
pcercuei 0:03b5121a232e 4547 continue;
pcercuei 0:03b5121a232e 4548 }
pcercuei 0:03b5121a232e 4549 }
pcercuei 0:03b5121a232e 4550 }
pcercuei 0:03b5121a232e 4551
pcercuei 0:03b5121a232e 4552 /*
pcercuei 0:03b5121a232e 4553 * Has this node been popped out during parsing of
pcercuei 0:03b5121a232e 4554 * the next element
pcercuei 0:03b5121a232e 4555 */
pcercuei 0:03b5121a232e 4556 if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
pcercuei 0:03b5121a232e 4557 (!xmlStrEqual(currentNode, ctxt->name)))
pcercuei 0:03b5121a232e 4558 {
pcercuei 0:03b5121a232e 4559 htmlParserFinishElementParsing(ctxt);
pcercuei 0:03b5121a232e 4560 if (currentNode != NULL) xmlFree(currentNode);
pcercuei 0:03b5121a232e 4561
pcercuei 0:03b5121a232e 4562 currentNode = xmlStrdup(ctxt->name);
pcercuei 0:03b5121a232e 4563 depth = ctxt->nameNr;
pcercuei 0:03b5121a232e 4564 continue;
pcercuei 0:03b5121a232e 4565 }
pcercuei 0:03b5121a232e 4566
pcercuei 0:03b5121a232e 4567 if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) ||
pcercuei 0:03b5121a232e 4568 (xmlStrEqual(currentNode, BAD_CAST"style")))) {
pcercuei 0:03b5121a232e 4569 /*
pcercuei 0:03b5121a232e 4570 * Handle SCRIPT/STYLE separately
pcercuei 0:03b5121a232e 4571 */
pcercuei 0:03b5121a232e 4572 htmlParseScript(ctxt);
pcercuei 0:03b5121a232e 4573 } else {
pcercuei 0:03b5121a232e 4574 /*
pcercuei 0:03b5121a232e 4575 * Sometimes DOCTYPE arrives in the middle of the document
pcercuei 0:03b5121a232e 4576 */
pcercuei 0:03b5121a232e 4577 if ((CUR == '<') && (NXT(1) == '!') &&
pcercuei 0:03b5121a232e 4578 (UPP(2) == 'D') && (UPP(3) == 'O') &&
pcercuei 0:03b5121a232e 4579 (UPP(4) == 'C') && (UPP(5) == 'T') &&
pcercuei 0:03b5121a232e 4580 (UPP(6) == 'Y') && (UPP(7) == 'P') &&
pcercuei 0:03b5121a232e 4581 (UPP(8) == 'E')) {
pcercuei 0:03b5121a232e 4582 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
pcercuei 0:03b5121a232e 4583 "Misplaced DOCTYPE declaration\n",
pcercuei 0:03b5121a232e 4584 BAD_CAST "DOCTYPE" , NULL);
pcercuei 0:03b5121a232e 4585 htmlParseDocTypeDecl(ctxt);
pcercuei 0:03b5121a232e 4586 }
pcercuei 0:03b5121a232e 4587
pcercuei 0:03b5121a232e 4588 /*
pcercuei 0:03b5121a232e 4589 * First case : a comment
pcercuei 0:03b5121a232e 4590 */
pcercuei 0:03b5121a232e 4591 if ((CUR == '<') && (NXT(1) == '!') &&
pcercuei 0:03b5121a232e 4592 (NXT(2) == '-') && (NXT(3) == '-')) {
pcercuei 0:03b5121a232e 4593 htmlParseComment(ctxt);
pcercuei 0:03b5121a232e 4594 }
pcercuei 0:03b5121a232e 4595
pcercuei 0:03b5121a232e 4596 /*
pcercuei 0:03b5121a232e 4597 * Second case : a Processing Instruction.
pcercuei 0:03b5121a232e 4598 */
pcercuei 0:03b5121a232e 4599 else if ((CUR == '<') && (NXT(1) == '?')) {
pcercuei 0:03b5121a232e 4600 htmlParsePI(ctxt);
pcercuei 0:03b5121a232e 4601 }
pcercuei 0:03b5121a232e 4602
pcercuei 0:03b5121a232e 4603 /*
pcercuei 0:03b5121a232e 4604 * Third case : a sub-element.
pcercuei 0:03b5121a232e 4605 */
pcercuei 0:03b5121a232e 4606 else if (CUR == '<') {
pcercuei 0:03b5121a232e 4607 htmlParseElementInternal(ctxt);
pcercuei 0:03b5121a232e 4608 if (currentNode != NULL) xmlFree(currentNode);
pcercuei 0:03b5121a232e 4609
pcercuei 0:03b5121a232e 4610 currentNode = xmlStrdup(ctxt->name);
pcercuei 0:03b5121a232e 4611 depth = ctxt->nameNr;
pcercuei 0:03b5121a232e 4612 }
pcercuei 0:03b5121a232e 4613
pcercuei 0:03b5121a232e 4614 /*
pcercuei 0:03b5121a232e 4615 * Fourth case : a reference. If if has not been resolved,
pcercuei 0:03b5121a232e 4616 * parsing returns it's Name, create the node
pcercuei 0:03b5121a232e 4617 */
pcercuei 0:03b5121a232e 4618 else if (CUR == '&') {
pcercuei 0:03b5121a232e 4619 htmlParseReference(ctxt);
pcercuei 0:03b5121a232e 4620 }
pcercuei 0:03b5121a232e 4621
pcercuei 0:03b5121a232e 4622 /*
pcercuei 0:03b5121a232e 4623 * Fifth case : end of the resource
pcercuei 0:03b5121a232e 4624 */
pcercuei 0:03b5121a232e 4625 else if (CUR == 0) {
pcercuei 0:03b5121a232e 4626 htmlAutoCloseOnEnd(ctxt);
pcercuei 0:03b5121a232e 4627 break;
pcercuei 0:03b5121a232e 4628 }
pcercuei 0:03b5121a232e 4629
pcercuei 0:03b5121a232e 4630 /*
pcercuei 0:03b5121a232e 4631 * Last case, text. Note that References are handled directly.
pcercuei 0:03b5121a232e 4632 */
pcercuei 0:03b5121a232e 4633 else {
pcercuei 0:03b5121a232e 4634 htmlParseCharData(ctxt);
pcercuei 0:03b5121a232e 4635 }
pcercuei 0:03b5121a232e 4636
pcercuei 0:03b5121a232e 4637 if (cons == ctxt->nbChars) {
pcercuei 0:03b5121a232e 4638 if (ctxt->node != NULL) {
pcercuei 0:03b5121a232e 4639 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 4640 "detected an error in element content\n",
pcercuei 0:03b5121a232e 4641 NULL, NULL);
pcercuei 0:03b5121a232e 4642 }
pcercuei 0:03b5121a232e 4643 break;
pcercuei 0:03b5121a232e 4644 }
pcercuei 0:03b5121a232e 4645 }
pcercuei 0:03b5121a232e 4646 GROW;
pcercuei 0:03b5121a232e 4647 }
pcercuei 0:03b5121a232e 4648 if (currentNode != NULL) xmlFree(currentNode);
pcercuei 0:03b5121a232e 4649 }
pcercuei 0:03b5121a232e 4650
pcercuei 0:03b5121a232e 4651 /**
pcercuei 0:03b5121a232e 4652 * htmlParseContent:
pcercuei 0:03b5121a232e 4653 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 4654 *
pcercuei 0:03b5121a232e 4655 * Parse a content: comment, sub-element, reference or text.
pcercuei 0:03b5121a232e 4656 * This is the entry point when called from parser.c
pcercuei 0:03b5121a232e 4657 */
pcercuei 0:03b5121a232e 4658
pcercuei 0:03b5121a232e 4659 void
pcercuei 0:03b5121a232e 4660 __htmlParseContent(void *ctxt) {
pcercuei 0:03b5121a232e 4661 if (ctxt != NULL)
pcercuei 0:03b5121a232e 4662 htmlParseContentInternal((htmlParserCtxtPtr) ctxt);
pcercuei 0:03b5121a232e 4663 }
pcercuei 0:03b5121a232e 4664
pcercuei 0:03b5121a232e 4665 /**
pcercuei 0:03b5121a232e 4666 * htmlParseDocument:
pcercuei 0:03b5121a232e 4667 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 4668 *
pcercuei 0:03b5121a232e 4669 * parse an HTML document (and build a tree if using the standard SAX
pcercuei 0:03b5121a232e 4670 * interface).
pcercuei 0:03b5121a232e 4671 *
pcercuei 0:03b5121a232e 4672 * Returns 0, -1 in case of error. the parser context is augmented
pcercuei 0:03b5121a232e 4673 * as a result of the parsing.
pcercuei 0:03b5121a232e 4674 */
pcercuei 0:03b5121a232e 4675
pcercuei 0:03b5121a232e 4676 int
pcercuei 0:03b5121a232e 4677 htmlParseDocument(htmlParserCtxtPtr ctxt) {
pcercuei 0:03b5121a232e 4678 xmlChar start[4];
pcercuei 0:03b5121a232e 4679 xmlCharEncoding enc;
pcercuei 0:03b5121a232e 4680 xmlDtdPtr dtd;
pcercuei 0:03b5121a232e 4681
pcercuei 0:03b5121a232e 4682 xmlInitParser();
pcercuei 0:03b5121a232e 4683
pcercuei 0:03b5121a232e 4684 htmlDefaultSAXHandlerInit();
pcercuei 0:03b5121a232e 4685
pcercuei 0:03b5121a232e 4686 if ((ctxt == NULL) || (ctxt->input == NULL)) {
pcercuei 0:03b5121a232e 4687 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 4688 "htmlParseDocument: context error\n", NULL, NULL);
pcercuei 0:03b5121a232e 4689 return(XML_ERR_INTERNAL_ERROR);
pcercuei 0:03b5121a232e 4690 }
pcercuei 0:03b5121a232e 4691 ctxt->html = 1;
pcercuei 0:03b5121a232e 4692 ctxt->linenumbers = 1;
pcercuei 0:03b5121a232e 4693 GROW;
pcercuei 0:03b5121a232e 4694 /*
pcercuei 0:03b5121a232e 4695 * SAX: beginning of the document processing.
pcercuei 0:03b5121a232e 4696 */
pcercuei 0:03b5121a232e 4697 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
pcercuei 0:03b5121a232e 4698 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
pcercuei 0:03b5121a232e 4699
pcercuei 0:03b5121a232e 4700 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
pcercuei 0:03b5121a232e 4701 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
pcercuei 0:03b5121a232e 4702 /*
pcercuei 0:03b5121a232e 4703 * Get the 4 first bytes and decode the charset
pcercuei 0:03b5121a232e 4704 * if enc != XML_CHAR_ENCODING_NONE
pcercuei 0:03b5121a232e 4705 * plug some encoding conversion routines.
pcercuei 0:03b5121a232e 4706 */
pcercuei 0:03b5121a232e 4707 start[0] = RAW;
pcercuei 0:03b5121a232e 4708 start[1] = NXT(1);
pcercuei 0:03b5121a232e 4709 start[2] = NXT(2);
pcercuei 0:03b5121a232e 4710 start[3] = NXT(3);
pcercuei 0:03b5121a232e 4711 enc = xmlDetectCharEncoding(&start[0], 4);
pcercuei 0:03b5121a232e 4712 if (enc != XML_CHAR_ENCODING_NONE) {
pcercuei 0:03b5121a232e 4713 xmlSwitchEncoding(ctxt, enc);
pcercuei 0:03b5121a232e 4714 }
pcercuei 0:03b5121a232e 4715 }
pcercuei 0:03b5121a232e 4716
pcercuei 0:03b5121a232e 4717 /*
pcercuei 0:03b5121a232e 4718 * Wipe out everything which is before the first '<'
pcercuei 0:03b5121a232e 4719 */
pcercuei 0:03b5121a232e 4720 SKIP_BLANKS;
pcercuei 0:03b5121a232e 4721 if (CUR == 0) {
pcercuei 0:03b5121a232e 4722 htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
pcercuei 0:03b5121a232e 4723 "Document is empty\n", NULL, NULL);
pcercuei 0:03b5121a232e 4724 }
pcercuei 0:03b5121a232e 4725
pcercuei 0:03b5121a232e 4726 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
pcercuei 0:03b5121a232e 4727 ctxt->sax->startDocument(ctxt->userData);
pcercuei 0:03b5121a232e 4728
pcercuei 0:03b5121a232e 4729
pcercuei 0:03b5121a232e 4730 /*
pcercuei 0:03b5121a232e 4731 * Parse possible comments and PIs before any content
pcercuei 0:03b5121a232e 4732 */
pcercuei 0:03b5121a232e 4733 while (((CUR == '<') && (NXT(1) == '!') &&
pcercuei 0:03b5121a232e 4734 (NXT(2) == '-') && (NXT(3) == '-')) ||
pcercuei 0:03b5121a232e 4735 ((CUR == '<') && (NXT(1) == '?'))) {
pcercuei 0:03b5121a232e 4736 htmlParseComment(ctxt);
pcercuei 0:03b5121a232e 4737 htmlParsePI(ctxt);
pcercuei 0:03b5121a232e 4738 SKIP_BLANKS;
pcercuei 0:03b5121a232e 4739 }
pcercuei 0:03b5121a232e 4740
pcercuei 0:03b5121a232e 4741
pcercuei 0:03b5121a232e 4742 /*
pcercuei 0:03b5121a232e 4743 * Then possibly doc type declaration(s) and more Misc
pcercuei 0:03b5121a232e 4744 * (doctypedecl Misc*)?
pcercuei 0:03b5121a232e 4745 */
pcercuei 0:03b5121a232e 4746 if ((CUR == '<') && (NXT(1) == '!') &&
pcercuei 0:03b5121a232e 4747 (UPP(2) == 'D') && (UPP(3) == 'O') &&
pcercuei 0:03b5121a232e 4748 (UPP(4) == 'C') && (UPP(5) == 'T') &&
pcercuei 0:03b5121a232e 4749 (UPP(6) == 'Y') && (UPP(7) == 'P') &&
pcercuei 0:03b5121a232e 4750 (UPP(8) == 'E')) {
pcercuei 0:03b5121a232e 4751 htmlParseDocTypeDecl(ctxt);
pcercuei 0:03b5121a232e 4752 }
pcercuei 0:03b5121a232e 4753 SKIP_BLANKS;
pcercuei 0:03b5121a232e 4754
pcercuei 0:03b5121a232e 4755 /*
pcercuei 0:03b5121a232e 4756 * Parse possible comments and PIs before any content
pcercuei 0:03b5121a232e 4757 */
pcercuei 0:03b5121a232e 4758 while (((CUR == '<') && (NXT(1) == '!') &&
pcercuei 0:03b5121a232e 4759 (NXT(2) == '-') && (NXT(3) == '-')) ||
pcercuei 0:03b5121a232e 4760 ((CUR == '<') && (NXT(1) == '?'))) {
pcercuei 0:03b5121a232e 4761 htmlParseComment(ctxt);
pcercuei 0:03b5121a232e 4762 htmlParsePI(ctxt);
pcercuei 0:03b5121a232e 4763 SKIP_BLANKS;
pcercuei 0:03b5121a232e 4764 }
pcercuei 0:03b5121a232e 4765
pcercuei 0:03b5121a232e 4766 /*
pcercuei 0:03b5121a232e 4767 * Time to start parsing the tree itself
pcercuei 0:03b5121a232e 4768 */
pcercuei 0:03b5121a232e 4769 htmlParseContentInternal(ctxt);
pcercuei 0:03b5121a232e 4770
pcercuei 0:03b5121a232e 4771 /*
pcercuei 0:03b5121a232e 4772 * autoclose
pcercuei 0:03b5121a232e 4773 */
pcercuei 0:03b5121a232e 4774 if (CUR == 0)
pcercuei 0:03b5121a232e 4775 htmlAutoCloseOnEnd(ctxt);
pcercuei 0:03b5121a232e 4776
pcercuei 0:03b5121a232e 4777
pcercuei 0:03b5121a232e 4778 /*
pcercuei 0:03b5121a232e 4779 * SAX: end of the document processing.
pcercuei 0:03b5121a232e 4780 */
pcercuei 0:03b5121a232e 4781 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
pcercuei 0:03b5121a232e 4782 ctxt->sax->endDocument(ctxt->userData);
pcercuei 0:03b5121a232e 4783
pcercuei 0:03b5121a232e 4784 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
pcercuei 0:03b5121a232e 4785 dtd = xmlGetIntSubset(ctxt->myDoc);
pcercuei 0:03b5121a232e 4786 if (dtd == NULL)
pcercuei 0:03b5121a232e 4787 ctxt->myDoc->intSubset =
pcercuei 0:03b5121a232e 4788 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
pcercuei 0:03b5121a232e 4789 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
pcercuei 0:03b5121a232e 4790 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
pcercuei 0:03b5121a232e 4791 }
pcercuei 0:03b5121a232e 4792 if (! ctxt->wellFormed) return(-1);
pcercuei 0:03b5121a232e 4793 return(0);
pcercuei 0:03b5121a232e 4794 }
pcercuei 0:03b5121a232e 4795
pcercuei 0:03b5121a232e 4796
pcercuei 0:03b5121a232e 4797 /************************************************************************
pcercuei 0:03b5121a232e 4798 * *
pcercuei 0:03b5121a232e 4799 * Parser contexts handling *
pcercuei 0:03b5121a232e 4800 * *
pcercuei 0:03b5121a232e 4801 ************************************************************************/
pcercuei 0:03b5121a232e 4802
pcercuei 0:03b5121a232e 4803 /**
pcercuei 0:03b5121a232e 4804 * htmlInitParserCtxt:
pcercuei 0:03b5121a232e 4805 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 4806 *
pcercuei 0:03b5121a232e 4807 * Initialize a parser context
pcercuei 0:03b5121a232e 4808 *
pcercuei 0:03b5121a232e 4809 * Returns 0 in case of success and -1 in case of error
pcercuei 0:03b5121a232e 4810 */
pcercuei 0:03b5121a232e 4811
pcercuei 0:03b5121a232e 4812 static int
pcercuei 0:03b5121a232e 4813 htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
pcercuei 0:03b5121a232e 4814 {
pcercuei 0:03b5121a232e 4815 htmlSAXHandler *sax;
pcercuei 0:03b5121a232e 4816
pcercuei 0:03b5121a232e 4817 if (ctxt == NULL) return(-1);
pcercuei 0:03b5121a232e 4818 memset(ctxt, 0, sizeof(htmlParserCtxt));
pcercuei 0:03b5121a232e 4819
pcercuei 0:03b5121a232e 4820 ctxt->dict = xmlDictCreate();
pcercuei 0:03b5121a232e 4821 if (ctxt->dict == NULL) {
pcercuei 0:03b5121a232e 4822 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
pcercuei 0:03b5121a232e 4823 return(-1);
pcercuei 0:03b5121a232e 4824 }
pcercuei 0:03b5121a232e 4825 sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
pcercuei 0:03b5121a232e 4826 if (sax == NULL) {
pcercuei 0:03b5121a232e 4827 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
pcercuei 0:03b5121a232e 4828 return(-1);
pcercuei 0:03b5121a232e 4829 }
pcercuei 0:03b5121a232e 4830 else
pcercuei 0:03b5121a232e 4831 memset(sax, 0, sizeof(htmlSAXHandler));
pcercuei 0:03b5121a232e 4832
pcercuei 0:03b5121a232e 4833 /* Allocate the Input stack */
pcercuei 0:03b5121a232e 4834 ctxt->inputTab = (htmlParserInputPtr *)
pcercuei 0:03b5121a232e 4835 xmlMalloc(5 * sizeof(htmlParserInputPtr));
pcercuei 0:03b5121a232e 4836 if (ctxt->inputTab == NULL) {
pcercuei 0:03b5121a232e 4837 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
pcercuei 0:03b5121a232e 4838 ctxt->inputNr = 0;
pcercuei 0:03b5121a232e 4839 ctxt->inputMax = 0;
pcercuei 0:03b5121a232e 4840 ctxt->input = NULL;
pcercuei 0:03b5121a232e 4841 return(-1);
pcercuei 0:03b5121a232e 4842 }
pcercuei 0:03b5121a232e 4843 ctxt->inputNr = 0;
pcercuei 0:03b5121a232e 4844 ctxt->inputMax = 5;
pcercuei 0:03b5121a232e 4845 ctxt->input = NULL;
pcercuei 0:03b5121a232e 4846 ctxt->version = NULL;
pcercuei 0:03b5121a232e 4847 ctxt->encoding = NULL;
pcercuei 0:03b5121a232e 4848 ctxt->standalone = -1;
pcercuei 0:03b5121a232e 4849 ctxt->instate = XML_PARSER_START;
pcercuei 0:03b5121a232e 4850
pcercuei 0:03b5121a232e 4851 /* Allocate the Node stack */
pcercuei 0:03b5121a232e 4852 ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
pcercuei 0:03b5121a232e 4853 if (ctxt->nodeTab == NULL) {
pcercuei 0:03b5121a232e 4854 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
pcercuei 0:03b5121a232e 4855 ctxt->nodeNr = 0;
pcercuei 0:03b5121a232e 4856 ctxt->nodeMax = 0;
pcercuei 0:03b5121a232e 4857 ctxt->node = NULL;
pcercuei 0:03b5121a232e 4858 ctxt->inputNr = 0;
pcercuei 0:03b5121a232e 4859 ctxt->inputMax = 0;
pcercuei 0:03b5121a232e 4860 ctxt->input = NULL;
pcercuei 0:03b5121a232e 4861 return(-1);
pcercuei 0:03b5121a232e 4862 }
pcercuei 0:03b5121a232e 4863 ctxt->nodeNr = 0;
pcercuei 0:03b5121a232e 4864 ctxt->nodeMax = 10;
pcercuei 0:03b5121a232e 4865 ctxt->node = NULL;
pcercuei 0:03b5121a232e 4866
pcercuei 0:03b5121a232e 4867 /* Allocate the Name stack */
pcercuei 0:03b5121a232e 4868 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
pcercuei 0:03b5121a232e 4869 if (ctxt->nameTab == NULL) {
pcercuei 0:03b5121a232e 4870 htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
pcercuei 0:03b5121a232e 4871 ctxt->nameNr = 0;
pcercuei 0:03b5121a232e 4872 ctxt->nameMax = 0;
pcercuei 0:03b5121a232e 4873 ctxt->name = NULL;
pcercuei 0:03b5121a232e 4874 ctxt->nodeNr = 0;
pcercuei 0:03b5121a232e 4875 ctxt->nodeMax = 0;
pcercuei 0:03b5121a232e 4876 ctxt->node = NULL;
pcercuei 0:03b5121a232e 4877 ctxt->inputNr = 0;
pcercuei 0:03b5121a232e 4878 ctxt->inputMax = 0;
pcercuei 0:03b5121a232e 4879 ctxt->input = NULL;
pcercuei 0:03b5121a232e 4880 return(-1);
pcercuei 0:03b5121a232e 4881 }
pcercuei 0:03b5121a232e 4882 ctxt->nameNr = 0;
pcercuei 0:03b5121a232e 4883 ctxt->nameMax = 10;
pcercuei 0:03b5121a232e 4884 ctxt->name = NULL;
pcercuei 0:03b5121a232e 4885
pcercuei 0:03b5121a232e 4886 ctxt->nodeInfoTab = NULL;
pcercuei 0:03b5121a232e 4887 ctxt->nodeInfoNr = 0;
pcercuei 0:03b5121a232e 4888 ctxt->nodeInfoMax = 0;
pcercuei 0:03b5121a232e 4889
pcercuei 0:03b5121a232e 4890 if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
pcercuei 0:03b5121a232e 4891 else {
pcercuei 0:03b5121a232e 4892 ctxt->sax = sax;
pcercuei 0:03b5121a232e 4893 memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
pcercuei 0:03b5121a232e 4894 }
pcercuei 0:03b5121a232e 4895 ctxt->userData = ctxt;
pcercuei 0:03b5121a232e 4896 ctxt->myDoc = NULL;
pcercuei 0:03b5121a232e 4897 ctxt->wellFormed = 1;
pcercuei 0:03b5121a232e 4898 ctxt->replaceEntities = 0;
pcercuei 0:03b5121a232e 4899 ctxt->linenumbers = xmlLineNumbersDefaultValue;
pcercuei 0:03b5121a232e 4900 ctxt->html = 1;
pcercuei 0:03b5121a232e 4901 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
pcercuei 0:03b5121a232e 4902 ctxt->vctxt.userData = ctxt;
pcercuei 0:03b5121a232e 4903 ctxt->vctxt.error = xmlParserValidityError;
pcercuei 0:03b5121a232e 4904 ctxt->vctxt.warning = xmlParserValidityWarning;
pcercuei 0:03b5121a232e 4905 ctxt->record_info = 0;
pcercuei 0:03b5121a232e 4906 ctxt->validate = 0;
pcercuei 0:03b5121a232e 4907 ctxt->nbChars = 0;
pcercuei 0:03b5121a232e 4908 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 4909 ctxt->catalogs = NULL;
pcercuei 0:03b5121a232e 4910 xmlInitNodeInfoSeq(&ctxt->node_seq);
pcercuei 0:03b5121a232e 4911 return(0);
pcercuei 0:03b5121a232e 4912 }
pcercuei 0:03b5121a232e 4913
pcercuei 0:03b5121a232e 4914 /**
pcercuei 0:03b5121a232e 4915 * htmlFreeParserCtxt:
pcercuei 0:03b5121a232e 4916 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 4917 *
pcercuei 0:03b5121a232e 4918 * Free all the memory used by a parser context. However the parsed
pcercuei 0:03b5121a232e 4919 * document in ctxt->myDoc is not freed.
pcercuei 0:03b5121a232e 4920 */
pcercuei 0:03b5121a232e 4921
pcercuei 0:03b5121a232e 4922 void
pcercuei 0:03b5121a232e 4923 htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
pcercuei 0:03b5121a232e 4924 {
pcercuei 0:03b5121a232e 4925 xmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 4926 }
pcercuei 0:03b5121a232e 4927
pcercuei 0:03b5121a232e 4928 /**
pcercuei 0:03b5121a232e 4929 * htmlNewParserCtxt:
pcercuei 0:03b5121a232e 4930 *
pcercuei 0:03b5121a232e 4931 * Allocate and initialize a new parser context.
pcercuei 0:03b5121a232e 4932 *
pcercuei 0:03b5121a232e 4933 * Returns the htmlParserCtxtPtr or NULL in case of allocation error
pcercuei 0:03b5121a232e 4934 */
pcercuei 0:03b5121a232e 4935
pcercuei 0:03b5121a232e 4936 htmlParserCtxtPtr
pcercuei 0:03b5121a232e 4937 htmlNewParserCtxt(void)
pcercuei 0:03b5121a232e 4938 {
pcercuei 0:03b5121a232e 4939 xmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 4940
pcercuei 0:03b5121a232e 4941 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
pcercuei 0:03b5121a232e 4942 if (ctxt == NULL) {
pcercuei 0:03b5121a232e 4943 htmlErrMemory(NULL, "NewParserCtxt: out of memory\n");
pcercuei 0:03b5121a232e 4944 return(NULL);
pcercuei 0:03b5121a232e 4945 }
pcercuei 0:03b5121a232e 4946 memset(ctxt, 0, sizeof(xmlParserCtxt));
pcercuei 0:03b5121a232e 4947 if (htmlInitParserCtxt(ctxt) < 0) {
pcercuei 0:03b5121a232e 4948 htmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 4949 return(NULL);
pcercuei 0:03b5121a232e 4950 }
pcercuei 0:03b5121a232e 4951 return(ctxt);
pcercuei 0:03b5121a232e 4952 }
pcercuei 0:03b5121a232e 4953
pcercuei 0:03b5121a232e 4954 /**
pcercuei 0:03b5121a232e 4955 * htmlCreateMemoryParserCtxt:
pcercuei 0:03b5121a232e 4956 * @buffer: a pointer to a char array
pcercuei 0:03b5121a232e 4957 * @size: the size of the array
pcercuei 0:03b5121a232e 4958 *
pcercuei 0:03b5121a232e 4959 * Create a parser context for an HTML in-memory document.
pcercuei 0:03b5121a232e 4960 *
pcercuei 0:03b5121a232e 4961 * Returns the new parser context or NULL
pcercuei 0:03b5121a232e 4962 */
pcercuei 0:03b5121a232e 4963 htmlParserCtxtPtr
pcercuei 0:03b5121a232e 4964 htmlCreateMemoryParserCtxt(const char *buffer, int size) {
pcercuei 0:03b5121a232e 4965 xmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 4966 xmlParserInputPtr input;
pcercuei 0:03b5121a232e 4967 xmlParserInputBufferPtr buf;
pcercuei 0:03b5121a232e 4968
pcercuei 0:03b5121a232e 4969 if (buffer == NULL)
pcercuei 0:03b5121a232e 4970 return(NULL);
pcercuei 0:03b5121a232e 4971 if (size <= 0)
pcercuei 0:03b5121a232e 4972 return(NULL);
pcercuei 0:03b5121a232e 4973
pcercuei 0:03b5121a232e 4974 ctxt = htmlNewParserCtxt();
pcercuei 0:03b5121a232e 4975 if (ctxt == NULL)
pcercuei 0:03b5121a232e 4976 return(NULL);
pcercuei 0:03b5121a232e 4977
pcercuei 0:03b5121a232e 4978 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 4979 if (buf == NULL) return(NULL);
pcercuei 0:03b5121a232e 4980
pcercuei 0:03b5121a232e 4981 input = xmlNewInputStream(ctxt);
pcercuei 0:03b5121a232e 4982 if (input == NULL) {
pcercuei 0:03b5121a232e 4983 xmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 4984 return(NULL);
pcercuei 0:03b5121a232e 4985 }
pcercuei 0:03b5121a232e 4986
pcercuei 0:03b5121a232e 4987 input->filename = NULL;
pcercuei 0:03b5121a232e 4988 input->buf = buf;
pcercuei 0:03b5121a232e 4989 xmlBufResetInput(buf->buffer, input);
pcercuei 0:03b5121a232e 4990
pcercuei 0:03b5121a232e 4991 inputPush(ctxt, input);
pcercuei 0:03b5121a232e 4992 return(ctxt);
pcercuei 0:03b5121a232e 4993 }
pcercuei 0:03b5121a232e 4994
pcercuei 0:03b5121a232e 4995 /**
pcercuei 0:03b5121a232e 4996 * htmlCreateDocParserCtxt:
pcercuei 0:03b5121a232e 4997 * @cur: a pointer to an array of xmlChar
pcercuei 0:03b5121a232e 4998 * @encoding: a free form C string describing the HTML document encoding, or NULL
pcercuei 0:03b5121a232e 4999 *
pcercuei 0:03b5121a232e 5000 * Create a parser context for an HTML document.
pcercuei 0:03b5121a232e 5001 *
pcercuei 0:03b5121a232e 5002 * TODO: check the need to add encoding handling there
pcercuei 0:03b5121a232e 5003 *
pcercuei 0:03b5121a232e 5004 * Returns the new parser context or NULL
pcercuei 0:03b5121a232e 5005 */
pcercuei 0:03b5121a232e 5006 static htmlParserCtxtPtr
pcercuei 0:03b5121a232e 5007 htmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) {
pcercuei 0:03b5121a232e 5008 int len;
pcercuei 0:03b5121a232e 5009 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 5010
pcercuei 0:03b5121a232e 5011 if (cur == NULL)
pcercuei 0:03b5121a232e 5012 return(NULL);
pcercuei 0:03b5121a232e 5013 len = xmlStrlen(cur);
pcercuei 0:03b5121a232e 5014 ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
pcercuei 0:03b5121a232e 5015 if (ctxt == NULL)
pcercuei 0:03b5121a232e 5016 return(NULL);
pcercuei 0:03b5121a232e 5017
pcercuei 0:03b5121a232e 5018 if (encoding != NULL) {
pcercuei 0:03b5121a232e 5019 xmlCharEncoding enc;
pcercuei 0:03b5121a232e 5020 xmlCharEncodingHandlerPtr handler;
pcercuei 0:03b5121a232e 5021
pcercuei 0:03b5121a232e 5022 if (ctxt->input->encoding != NULL)
pcercuei 0:03b5121a232e 5023 xmlFree((xmlChar *) ctxt->input->encoding);
pcercuei 0:03b5121a232e 5024 ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
pcercuei 0:03b5121a232e 5025
pcercuei 0:03b5121a232e 5026 enc = xmlParseCharEncoding(encoding);
pcercuei 0:03b5121a232e 5027 /*
pcercuei 0:03b5121a232e 5028 * registered set of known encodings
pcercuei 0:03b5121a232e 5029 */
pcercuei 0:03b5121a232e 5030 if (enc != XML_CHAR_ENCODING_ERROR) {
pcercuei 0:03b5121a232e 5031 xmlSwitchEncoding(ctxt, enc);
pcercuei 0:03b5121a232e 5032 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
pcercuei 0:03b5121a232e 5033 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
pcercuei 0:03b5121a232e 5034 "Unsupported encoding %s\n",
pcercuei 0:03b5121a232e 5035 (const xmlChar *) encoding, NULL);
pcercuei 0:03b5121a232e 5036 }
pcercuei 0:03b5121a232e 5037 } else {
pcercuei 0:03b5121a232e 5038 /*
pcercuei 0:03b5121a232e 5039 * fallback for unknown encodings
pcercuei 0:03b5121a232e 5040 */
pcercuei 0:03b5121a232e 5041 handler = xmlFindCharEncodingHandler((const char *) encoding);
pcercuei 0:03b5121a232e 5042 if (handler != NULL) {
pcercuei 0:03b5121a232e 5043 xmlSwitchToEncoding(ctxt, handler);
pcercuei 0:03b5121a232e 5044 } else {
pcercuei 0:03b5121a232e 5045 htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
pcercuei 0:03b5121a232e 5046 "Unsupported encoding %s\n",
pcercuei 0:03b5121a232e 5047 (const xmlChar *) encoding, NULL);
pcercuei 0:03b5121a232e 5048 }
pcercuei 0:03b5121a232e 5049 }
pcercuei 0:03b5121a232e 5050 }
pcercuei 0:03b5121a232e 5051 return(ctxt);
pcercuei 0:03b5121a232e 5052 }
pcercuei 0:03b5121a232e 5053
pcercuei 0:03b5121a232e 5054 #ifdef LIBXML_PUSH_ENABLED
pcercuei 0:03b5121a232e 5055 /************************************************************************
pcercuei 0:03b5121a232e 5056 * *
pcercuei 0:03b5121a232e 5057 * Progressive parsing interfaces *
pcercuei 0:03b5121a232e 5058 * *
pcercuei 0:03b5121a232e 5059 ************************************************************************/
pcercuei 0:03b5121a232e 5060
pcercuei 0:03b5121a232e 5061 /**
pcercuei 0:03b5121a232e 5062 * htmlParseLookupSequence:
pcercuei 0:03b5121a232e 5063 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 5064 * @first: the first char to lookup
pcercuei 0:03b5121a232e 5065 * @next: the next char to lookup or zero
pcercuei 0:03b5121a232e 5066 * @third: the next char to lookup or zero
pcercuei 0:03b5121a232e 5067 * @comment: flag to force checking inside comments
pcercuei 0:03b5121a232e 5068 *
pcercuei 0:03b5121a232e 5069 * Try to find if a sequence (first, next, third) or just (first next) or
pcercuei 0:03b5121a232e 5070 * (first) is available in the input stream.
pcercuei 0:03b5121a232e 5071 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
pcercuei 0:03b5121a232e 5072 * to avoid rescanning sequences of bytes, it DOES change the state of the
pcercuei 0:03b5121a232e 5073 * parser, do not use liberally.
pcercuei 0:03b5121a232e 5074 * This is basically similar to xmlParseLookupSequence()
pcercuei 0:03b5121a232e 5075 *
pcercuei 0:03b5121a232e 5076 * Returns the index to the current parsing point if the full sequence
pcercuei 0:03b5121a232e 5077 * is available, -1 otherwise.
pcercuei 0:03b5121a232e 5078 */
pcercuei 0:03b5121a232e 5079 static int
pcercuei 0:03b5121a232e 5080 htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
pcercuei 0:03b5121a232e 5081 xmlChar next, xmlChar third, int iscomment,
pcercuei 0:03b5121a232e 5082 int ignoreattrval)
pcercuei 0:03b5121a232e 5083 {
pcercuei 0:03b5121a232e 5084 int base, len;
pcercuei 0:03b5121a232e 5085 htmlParserInputPtr in;
pcercuei 0:03b5121a232e 5086 const xmlChar *buf;
pcercuei 0:03b5121a232e 5087 int incomment = 0;
pcercuei 0:03b5121a232e 5088 int invalue = 0;
pcercuei 0:03b5121a232e 5089 char valdellim = 0x0;
pcercuei 0:03b5121a232e 5090
pcercuei 0:03b5121a232e 5091 in = ctxt->input;
pcercuei 0:03b5121a232e 5092 if (in == NULL)
pcercuei 0:03b5121a232e 5093 return (-1);
pcercuei 0:03b5121a232e 5094
pcercuei 0:03b5121a232e 5095 base = in->cur - in->base;
pcercuei 0:03b5121a232e 5096 if (base < 0)
pcercuei 0:03b5121a232e 5097 return (-1);
pcercuei 0:03b5121a232e 5098
pcercuei 0:03b5121a232e 5099 if (ctxt->checkIndex > base)
pcercuei 0:03b5121a232e 5100 base = ctxt->checkIndex;
pcercuei 0:03b5121a232e 5101
pcercuei 0:03b5121a232e 5102 if (in->buf == NULL) {
pcercuei 0:03b5121a232e 5103 buf = in->base;
pcercuei 0:03b5121a232e 5104 len = in->length;
pcercuei 0:03b5121a232e 5105 } else {
pcercuei 0:03b5121a232e 5106 buf = xmlBufContent(in->buf->buffer);
pcercuei 0:03b5121a232e 5107 len = xmlBufUse(in->buf->buffer);
pcercuei 0:03b5121a232e 5108 }
pcercuei 0:03b5121a232e 5109
pcercuei 0:03b5121a232e 5110 /* take into account the sequence length */
pcercuei 0:03b5121a232e 5111 if (third)
pcercuei 0:03b5121a232e 5112 len -= 2;
pcercuei 0:03b5121a232e 5113 else if (next)
pcercuei 0:03b5121a232e 5114 len--;
pcercuei 0:03b5121a232e 5115 for (; base < len; base++) {
pcercuei 0:03b5121a232e 5116 if ((!incomment) && (base + 4 < len) && (!iscomment)) {
pcercuei 0:03b5121a232e 5117 if ((buf[base] == '<') && (buf[base + 1] == '!') &&
pcercuei 0:03b5121a232e 5118 (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
pcercuei 0:03b5121a232e 5119 incomment = 1;
pcercuei 0:03b5121a232e 5120 /* do not increment past <! - some people use <!--> */
pcercuei 0:03b5121a232e 5121 base += 2;
pcercuei 0:03b5121a232e 5122 }
pcercuei 0:03b5121a232e 5123 }
pcercuei 0:03b5121a232e 5124 if (ignoreattrval) {
pcercuei 0:03b5121a232e 5125 if (buf[base] == '"' || buf[base] == '\'') {
pcercuei 0:03b5121a232e 5126 if (invalue) {
pcercuei 0:03b5121a232e 5127 if (buf[base] == valdellim) {
pcercuei 0:03b5121a232e 5128 invalue = 0;
pcercuei 0:03b5121a232e 5129 continue;
pcercuei 0:03b5121a232e 5130 }
pcercuei 0:03b5121a232e 5131 } else {
pcercuei 0:03b5121a232e 5132 valdellim = buf[base];
pcercuei 0:03b5121a232e 5133 invalue = 1;
pcercuei 0:03b5121a232e 5134 continue;
pcercuei 0:03b5121a232e 5135 }
pcercuei 0:03b5121a232e 5136 } else if (invalue) {
pcercuei 0:03b5121a232e 5137 continue;
pcercuei 0:03b5121a232e 5138 }
pcercuei 0:03b5121a232e 5139 }
pcercuei 0:03b5121a232e 5140 if (incomment) {
pcercuei 0:03b5121a232e 5141 if (base + 3 > len)
pcercuei 0:03b5121a232e 5142 return (-1);
pcercuei 0:03b5121a232e 5143 if ((buf[base] == '-') && (buf[base + 1] == '-') &&
pcercuei 0:03b5121a232e 5144 (buf[base + 2] == '>')) {
pcercuei 0:03b5121a232e 5145 incomment = 0;
pcercuei 0:03b5121a232e 5146 base += 2;
pcercuei 0:03b5121a232e 5147 }
pcercuei 0:03b5121a232e 5148 continue;
pcercuei 0:03b5121a232e 5149 }
pcercuei 0:03b5121a232e 5150 if (buf[base] == first) {
pcercuei 0:03b5121a232e 5151 if (third != 0) {
pcercuei 0:03b5121a232e 5152 if ((buf[base + 1] != next) || (buf[base + 2] != third))
pcercuei 0:03b5121a232e 5153 continue;
pcercuei 0:03b5121a232e 5154 } else if (next != 0) {
pcercuei 0:03b5121a232e 5155 if (buf[base + 1] != next)
pcercuei 0:03b5121a232e 5156 continue;
pcercuei 0:03b5121a232e 5157 }
pcercuei 0:03b5121a232e 5158 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5159 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5160 if (next == 0)
pcercuei 0:03b5121a232e 5161 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5162 "HPP: lookup '%c' found at %d\n",
pcercuei 0:03b5121a232e 5163 first, base);
pcercuei 0:03b5121a232e 5164 else if (third == 0)
pcercuei 0:03b5121a232e 5165 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5166 "HPP: lookup '%c%c' found at %d\n",
pcercuei 0:03b5121a232e 5167 first, next, base);
pcercuei 0:03b5121a232e 5168 else
pcercuei 0:03b5121a232e 5169 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5170 "HPP: lookup '%c%c%c' found at %d\n",
pcercuei 0:03b5121a232e 5171 first, next, third, base);
pcercuei 0:03b5121a232e 5172 #endif
pcercuei 0:03b5121a232e 5173 return (base - (in->cur - in->base));
pcercuei 0:03b5121a232e 5174 }
pcercuei 0:03b5121a232e 5175 }
pcercuei 0:03b5121a232e 5176 if ((!incomment) && (!invalue))
pcercuei 0:03b5121a232e 5177 ctxt->checkIndex = base;
pcercuei 0:03b5121a232e 5178 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5179 if (next == 0)
pcercuei 0:03b5121a232e 5180 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5181 "HPP: lookup '%c' failed\n", first);
pcercuei 0:03b5121a232e 5182 else if (third == 0)
pcercuei 0:03b5121a232e 5183 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5184 "HPP: lookup '%c%c' failed\n", first, next);
pcercuei 0:03b5121a232e 5185 else
pcercuei 0:03b5121a232e 5186 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5187 "HPP: lookup '%c%c%c' failed\n", first, next,
pcercuei 0:03b5121a232e 5188 third);
pcercuei 0:03b5121a232e 5189 #endif
pcercuei 0:03b5121a232e 5190 return (-1);
pcercuei 0:03b5121a232e 5191 }
pcercuei 0:03b5121a232e 5192
pcercuei 0:03b5121a232e 5193 /**
pcercuei 0:03b5121a232e 5194 * htmlParseLookupChars:
pcercuei 0:03b5121a232e 5195 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 5196 * @stop: Array of chars, which stop the lookup.
pcercuei 0:03b5121a232e 5197 * @stopLen: Length of stop-Array
pcercuei 0:03b5121a232e 5198 *
pcercuei 0:03b5121a232e 5199 * Try to find if any char of the stop-Array is available in the input
pcercuei 0:03b5121a232e 5200 * stream.
pcercuei 0:03b5121a232e 5201 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
pcercuei 0:03b5121a232e 5202 * to avoid rescanning sequences of bytes, it DOES change the state of the
pcercuei 0:03b5121a232e 5203 * parser, do not use liberally.
pcercuei 0:03b5121a232e 5204 *
pcercuei 0:03b5121a232e 5205 * Returns the index to the current parsing point if a stopChar
pcercuei 0:03b5121a232e 5206 * is available, -1 otherwise.
pcercuei 0:03b5121a232e 5207 */
pcercuei 0:03b5121a232e 5208 static int
pcercuei 0:03b5121a232e 5209 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
pcercuei 0:03b5121a232e 5210 int stopLen)
pcercuei 0:03b5121a232e 5211 {
pcercuei 0:03b5121a232e 5212 int base, len;
pcercuei 0:03b5121a232e 5213 htmlParserInputPtr in;
pcercuei 0:03b5121a232e 5214 const xmlChar *buf;
pcercuei 0:03b5121a232e 5215 int incomment = 0;
pcercuei 0:03b5121a232e 5216 int i;
pcercuei 0:03b5121a232e 5217
pcercuei 0:03b5121a232e 5218 in = ctxt->input;
pcercuei 0:03b5121a232e 5219 if (in == NULL)
pcercuei 0:03b5121a232e 5220 return (-1);
pcercuei 0:03b5121a232e 5221
pcercuei 0:03b5121a232e 5222 base = in->cur - in->base;
pcercuei 0:03b5121a232e 5223 if (base < 0)
pcercuei 0:03b5121a232e 5224 return (-1);
pcercuei 0:03b5121a232e 5225
pcercuei 0:03b5121a232e 5226 if (ctxt->checkIndex > base)
pcercuei 0:03b5121a232e 5227 base = ctxt->checkIndex;
pcercuei 0:03b5121a232e 5228
pcercuei 0:03b5121a232e 5229 if (in->buf == NULL) {
pcercuei 0:03b5121a232e 5230 buf = in->base;
pcercuei 0:03b5121a232e 5231 len = in->length;
pcercuei 0:03b5121a232e 5232 } else {
pcercuei 0:03b5121a232e 5233 buf = xmlBufContent(in->buf->buffer);
pcercuei 0:03b5121a232e 5234 len = xmlBufUse(in->buf->buffer);
pcercuei 0:03b5121a232e 5235 }
pcercuei 0:03b5121a232e 5236
pcercuei 0:03b5121a232e 5237 for (; base < len; base++) {
pcercuei 0:03b5121a232e 5238 if (!incomment && (base + 4 < len)) {
pcercuei 0:03b5121a232e 5239 if ((buf[base] == '<') && (buf[base + 1] == '!') &&
pcercuei 0:03b5121a232e 5240 (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
pcercuei 0:03b5121a232e 5241 incomment = 1;
pcercuei 0:03b5121a232e 5242 /* do not increment past <! - some people use <!--> */
pcercuei 0:03b5121a232e 5243 base += 2;
pcercuei 0:03b5121a232e 5244 }
pcercuei 0:03b5121a232e 5245 }
pcercuei 0:03b5121a232e 5246 if (incomment) {
pcercuei 0:03b5121a232e 5247 if (base + 3 > len)
pcercuei 0:03b5121a232e 5248 return (-1);
pcercuei 0:03b5121a232e 5249 if ((buf[base] == '-') && (buf[base + 1] == '-') &&
pcercuei 0:03b5121a232e 5250 (buf[base + 2] == '>')) {
pcercuei 0:03b5121a232e 5251 incomment = 0;
pcercuei 0:03b5121a232e 5252 base += 2;
pcercuei 0:03b5121a232e 5253 }
pcercuei 0:03b5121a232e 5254 continue;
pcercuei 0:03b5121a232e 5255 }
pcercuei 0:03b5121a232e 5256 for (i = 0; i < stopLen; ++i) {
pcercuei 0:03b5121a232e 5257 if (buf[base] == stop[i]) {
pcercuei 0:03b5121a232e 5258 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5259 return (base - (in->cur - in->base));
pcercuei 0:03b5121a232e 5260 }
pcercuei 0:03b5121a232e 5261 }
pcercuei 0:03b5121a232e 5262 }
pcercuei 0:03b5121a232e 5263 ctxt->checkIndex = base;
pcercuei 0:03b5121a232e 5264 return (-1);
pcercuei 0:03b5121a232e 5265 }
pcercuei 0:03b5121a232e 5266
pcercuei 0:03b5121a232e 5267 /**
pcercuei 0:03b5121a232e 5268 * htmlParseTryOrFinish:
pcercuei 0:03b5121a232e 5269 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 5270 * @terminate: last chunk indicator
pcercuei 0:03b5121a232e 5271 *
pcercuei 0:03b5121a232e 5272 * Try to progress on parsing
pcercuei 0:03b5121a232e 5273 *
pcercuei 0:03b5121a232e 5274 * Returns zero if no parsing was possible
pcercuei 0:03b5121a232e 5275 */
pcercuei 0:03b5121a232e 5276 static int
pcercuei 0:03b5121a232e 5277 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
pcercuei 0:03b5121a232e 5278 int ret = 0;
pcercuei 0:03b5121a232e 5279 htmlParserInputPtr in;
pcercuei 0:03b5121a232e 5280 int avail = 0;
pcercuei 0:03b5121a232e 5281 xmlChar cur, next;
pcercuei 0:03b5121a232e 5282
pcercuei 0:03b5121a232e 5283 htmlParserNodeInfo node_info;
pcercuei 0:03b5121a232e 5284
pcercuei 0:03b5121a232e 5285 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5286 switch (ctxt->instate) {
pcercuei 0:03b5121a232e 5287 case XML_PARSER_EOF:
pcercuei 0:03b5121a232e 5288 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5289 "HPP: try EOF\n"); break;
pcercuei 0:03b5121a232e 5290 case XML_PARSER_START:
pcercuei 0:03b5121a232e 5291 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5292 "HPP: try START\n"); break;
pcercuei 0:03b5121a232e 5293 case XML_PARSER_MISC:
pcercuei 0:03b5121a232e 5294 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5295 "HPP: try MISC\n");break;
pcercuei 0:03b5121a232e 5296 case XML_PARSER_COMMENT:
pcercuei 0:03b5121a232e 5297 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5298 "HPP: try COMMENT\n");break;
pcercuei 0:03b5121a232e 5299 case XML_PARSER_PROLOG:
pcercuei 0:03b5121a232e 5300 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5301 "HPP: try PROLOG\n");break;
pcercuei 0:03b5121a232e 5302 case XML_PARSER_START_TAG:
pcercuei 0:03b5121a232e 5303 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5304 "HPP: try START_TAG\n");break;
pcercuei 0:03b5121a232e 5305 case XML_PARSER_CONTENT:
pcercuei 0:03b5121a232e 5306 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5307 "HPP: try CONTENT\n");break;
pcercuei 0:03b5121a232e 5308 case XML_PARSER_CDATA_SECTION:
pcercuei 0:03b5121a232e 5309 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5310 "HPP: try CDATA_SECTION\n");break;
pcercuei 0:03b5121a232e 5311 case XML_PARSER_END_TAG:
pcercuei 0:03b5121a232e 5312 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5313 "HPP: try END_TAG\n");break;
pcercuei 0:03b5121a232e 5314 case XML_PARSER_ENTITY_DECL:
pcercuei 0:03b5121a232e 5315 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5316 "HPP: try ENTITY_DECL\n");break;
pcercuei 0:03b5121a232e 5317 case XML_PARSER_ENTITY_VALUE:
pcercuei 0:03b5121a232e 5318 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5319 "HPP: try ENTITY_VALUE\n");break;
pcercuei 0:03b5121a232e 5320 case XML_PARSER_ATTRIBUTE_VALUE:
pcercuei 0:03b5121a232e 5321 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5322 "HPP: try ATTRIBUTE_VALUE\n");break;
pcercuei 0:03b5121a232e 5323 case XML_PARSER_DTD:
pcercuei 0:03b5121a232e 5324 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5325 "HPP: try DTD\n");break;
pcercuei 0:03b5121a232e 5326 case XML_PARSER_EPILOG:
pcercuei 0:03b5121a232e 5327 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5328 "HPP: try EPILOG\n");break;
pcercuei 0:03b5121a232e 5329 case XML_PARSER_PI:
pcercuei 0:03b5121a232e 5330 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5331 "HPP: try PI\n");break;
pcercuei 0:03b5121a232e 5332 case XML_PARSER_SYSTEM_LITERAL:
pcercuei 0:03b5121a232e 5333 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5334 "HPP: try SYSTEM_LITERAL\n");break;
pcercuei 0:03b5121a232e 5335 }
pcercuei 0:03b5121a232e 5336 #endif
pcercuei 0:03b5121a232e 5337
pcercuei 0:03b5121a232e 5338 while (1) {
pcercuei 0:03b5121a232e 5339
pcercuei 0:03b5121a232e 5340 in = ctxt->input;
pcercuei 0:03b5121a232e 5341 if (in == NULL) break;
pcercuei 0:03b5121a232e 5342 if (in->buf == NULL)
pcercuei 0:03b5121a232e 5343 avail = in->length - (in->cur - in->base);
pcercuei 0:03b5121a232e 5344 else
pcercuei 0:03b5121a232e 5345 avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
pcercuei 0:03b5121a232e 5346 if ((avail == 0) && (terminate)) {
pcercuei 0:03b5121a232e 5347 htmlAutoCloseOnEnd(ctxt);
pcercuei 0:03b5121a232e 5348 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
pcercuei 0:03b5121a232e 5349 /*
pcercuei 0:03b5121a232e 5350 * SAX: end of the document processing.
pcercuei 0:03b5121a232e 5351 */
pcercuei 0:03b5121a232e 5352 ctxt->instate = XML_PARSER_EOF;
pcercuei 0:03b5121a232e 5353 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
pcercuei 0:03b5121a232e 5354 ctxt->sax->endDocument(ctxt->userData);
pcercuei 0:03b5121a232e 5355 }
pcercuei 0:03b5121a232e 5356 }
pcercuei 0:03b5121a232e 5357 if (avail < 1)
pcercuei 0:03b5121a232e 5358 goto done;
pcercuei 0:03b5121a232e 5359 cur = in->cur[0];
pcercuei 0:03b5121a232e 5360 if (cur == 0) {
pcercuei 0:03b5121a232e 5361 SKIP(1);
pcercuei 0:03b5121a232e 5362 continue;
pcercuei 0:03b5121a232e 5363 }
pcercuei 0:03b5121a232e 5364
pcercuei 0:03b5121a232e 5365 switch (ctxt->instate) {
pcercuei 0:03b5121a232e 5366 case XML_PARSER_EOF:
pcercuei 0:03b5121a232e 5367 /*
pcercuei 0:03b5121a232e 5368 * Document parsing is done !
pcercuei 0:03b5121a232e 5369 */
pcercuei 0:03b5121a232e 5370 goto done;
pcercuei 0:03b5121a232e 5371 case XML_PARSER_START:
pcercuei 0:03b5121a232e 5372 /*
pcercuei 0:03b5121a232e 5373 * Very first chars read from the document flow.
pcercuei 0:03b5121a232e 5374 */
pcercuei 0:03b5121a232e 5375 cur = in->cur[0];
pcercuei 0:03b5121a232e 5376 if (IS_BLANK_CH(cur)) {
pcercuei 0:03b5121a232e 5377 SKIP_BLANKS;
pcercuei 0:03b5121a232e 5378 if (in->buf == NULL)
pcercuei 0:03b5121a232e 5379 avail = in->length - (in->cur - in->base);
pcercuei 0:03b5121a232e 5380 else
pcercuei 0:03b5121a232e 5381 avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
pcercuei 0:03b5121a232e 5382 }
pcercuei 0:03b5121a232e 5383 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
pcercuei 0:03b5121a232e 5384 ctxt->sax->setDocumentLocator(ctxt->userData,
pcercuei 0:03b5121a232e 5385 &xmlDefaultSAXLocator);
pcercuei 0:03b5121a232e 5386 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
pcercuei 0:03b5121a232e 5387 (!ctxt->disableSAX))
pcercuei 0:03b5121a232e 5388 ctxt->sax->startDocument(ctxt->userData);
pcercuei 0:03b5121a232e 5389
pcercuei 0:03b5121a232e 5390 cur = in->cur[0];
pcercuei 0:03b5121a232e 5391 next = in->cur[1];
pcercuei 0:03b5121a232e 5392 if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5393 (UPP(2) == 'D') && (UPP(3) == 'O') &&
pcercuei 0:03b5121a232e 5394 (UPP(4) == 'C') && (UPP(5) == 'T') &&
pcercuei 0:03b5121a232e 5395 (UPP(6) == 'Y') && (UPP(7) == 'P') &&
pcercuei 0:03b5121a232e 5396 (UPP(8) == 'E')) {
pcercuei 0:03b5121a232e 5397 if ((!terminate) &&
pcercuei 0:03b5121a232e 5398 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
pcercuei 0:03b5121a232e 5399 goto done;
pcercuei 0:03b5121a232e 5400 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5401 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5402 "HPP: Parsing internal subset\n");
pcercuei 0:03b5121a232e 5403 #endif
pcercuei 0:03b5121a232e 5404 htmlParseDocTypeDecl(ctxt);
pcercuei 0:03b5121a232e 5405 ctxt->instate = XML_PARSER_PROLOG;
pcercuei 0:03b5121a232e 5406 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5407 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5408 "HPP: entering PROLOG\n");
pcercuei 0:03b5121a232e 5409 #endif
pcercuei 0:03b5121a232e 5410 } else {
pcercuei 0:03b5121a232e 5411 ctxt->instate = XML_PARSER_MISC;
pcercuei 0:03b5121a232e 5412 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5413 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5414 "HPP: entering MISC\n");
pcercuei 0:03b5121a232e 5415 #endif
pcercuei 0:03b5121a232e 5416 }
pcercuei 0:03b5121a232e 5417 break;
pcercuei 0:03b5121a232e 5418 case XML_PARSER_MISC:
pcercuei 0:03b5121a232e 5419 SKIP_BLANKS;
pcercuei 0:03b5121a232e 5420 if (in->buf == NULL)
pcercuei 0:03b5121a232e 5421 avail = in->length - (in->cur - in->base);
pcercuei 0:03b5121a232e 5422 else
pcercuei 0:03b5121a232e 5423 avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
pcercuei 0:03b5121a232e 5424 /*
pcercuei 0:03b5121a232e 5425 * no chars in buffer
pcercuei 0:03b5121a232e 5426 */
pcercuei 0:03b5121a232e 5427 if (avail < 1)
pcercuei 0:03b5121a232e 5428 goto done;
pcercuei 0:03b5121a232e 5429 /*
pcercuei 0:03b5121a232e 5430 * not enouth chars in buffer
pcercuei 0:03b5121a232e 5431 */
pcercuei 0:03b5121a232e 5432 if (avail < 2) {
pcercuei 0:03b5121a232e 5433 if (!terminate)
pcercuei 0:03b5121a232e 5434 goto done;
pcercuei 0:03b5121a232e 5435 else
pcercuei 0:03b5121a232e 5436 next = ' ';
pcercuei 0:03b5121a232e 5437 } else {
pcercuei 0:03b5121a232e 5438 next = in->cur[1];
pcercuei 0:03b5121a232e 5439 }
pcercuei 0:03b5121a232e 5440 cur = in->cur[0];
pcercuei 0:03b5121a232e 5441 if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5442 (in->cur[2] == '-') && (in->cur[3] == '-')) {
pcercuei 0:03b5121a232e 5443 if ((!terminate) &&
pcercuei 0:03b5121a232e 5444 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
pcercuei 0:03b5121a232e 5445 goto done;
pcercuei 0:03b5121a232e 5446 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5447 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5448 "HPP: Parsing Comment\n");
pcercuei 0:03b5121a232e 5449 #endif
pcercuei 0:03b5121a232e 5450 htmlParseComment(ctxt);
pcercuei 0:03b5121a232e 5451 ctxt->instate = XML_PARSER_MISC;
pcercuei 0:03b5121a232e 5452 } else if ((cur == '<') && (next == '?')) {
pcercuei 0:03b5121a232e 5453 if ((!terminate) &&
pcercuei 0:03b5121a232e 5454 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
pcercuei 0:03b5121a232e 5455 goto done;
pcercuei 0:03b5121a232e 5456 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5457 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5458 "HPP: Parsing PI\n");
pcercuei 0:03b5121a232e 5459 #endif
pcercuei 0:03b5121a232e 5460 htmlParsePI(ctxt);
pcercuei 0:03b5121a232e 5461 ctxt->instate = XML_PARSER_MISC;
pcercuei 0:03b5121a232e 5462 } else if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5463 (UPP(2) == 'D') && (UPP(3) == 'O') &&
pcercuei 0:03b5121a232e 5464 (UPP(4) == 'C') && (UPP(5) == 'T') &&
pcercuei 0:03b5121a232e 5465 (UPP(6) == 'Y') && (UPP(7) == 'P') &&
pcercuei 0:03b5121a232e 5466 (UPP(8) == 'E')) {
pcercuei 0:03b5121a232e 5467 if ((!terminate) &&
pcercuei 0:03b5121a232e 5468 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
pcercuei 0:03b5121a232e 5469 goto done;
pcercuei 0:03b5121a232e 5470 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5471 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5472 "HPP: Parsing internal subset\n");
pcercuei 0:03b5121a232e 5473 #endif
pcercuei 0:03b5121a232e 5474 htmlParseDocTypeDecl(ctxt);
pcercuei 0:03b5121a232e 5475 ctxt->instate = XML_PARSER_PROLOG;
pcercuei 0:03b5121a232e 5476 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5477 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5478 "HPP: entering PROLOG\n");
pcercuei 0:03b5121a232e 5479 #endif
pcercuei 0:03b5121a232e 5480 } else if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5481 (avail < 9)) {
pcercuei 0:03b5121a232e 5482 goto done;
pcercuei 0:03b5121a232e 5483 } else {
pcercuei 0:03b5121a232e 5484 ctxt->instate = XML_PARSER_START_TAG;
pcercuei 0:03b5121a232e 5485 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5486 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5487 "HPP: entering START_TAG\n");
pcercuei 0:03b5121a232e 5488 #endif
pcercuei 0:03b5121a232e 5489 }
pcercuei 0:03b5121a232e 5490 break;
pcercuei 0:03b5121a232e 5491 case XML_PARSER_PROLOG:
pcercuei 0:03b5121a232e 5492 SKIP_BLANKS;
pcercuei 0:03b5121a232e 5493 if (in->buf == NULL)
pcercuei 0:03b5121a232e 5494 avail = in->length - (in->cur - in->base);
pcercuei 0:03b5121a232e 5495 else
pcercuei 0:03b5121a232e 5496 avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
pcercuei 0:03b5121a232e 5497 if (avail < 2)
pcercuei 0:03b5121a232e 5498 goto done;
pcercuei 0:03b5121a232e 5499 cur = in->cur[0];
pcercuei 0:03b5121a232e 5500 next = in->cur[1];
pcercuei 0:03b5121a232e 5501 if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5502 (in->cur[2] == '-') && (in->cur[3] == '-')) {
pcercuei 0:03b5121a232e 5503 if ((!terminate) &&
pcercuei 0:03b5121a232e 5504 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
pcercuei 0:03b5121a232e 5505 goto done;
pcercuei 0:03b5121a232e 5506 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5507 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5508 "HPP: Parsing Comment\n");
pcercuei 0:03b5121a232e 5509 #endif
pcercuei 0:03b5121a232e 5510 htmlParseComment(ctxt);
pcercuei 0:03b5121a232e 5511 ctxt->instate = XML_PARSER_PROLOG;
pcercuei 0:03b5121a232e 5512 } else if ((cur == '<') && (next == '?')) {
pcercuei 0:03b5121a232e 5513 if ((!terminate) &&
pcercuei 0:03b5121a232e 5514 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
pcercuei 0:03b5121a232e 5515 goto done;
pcercuei 0:03b5121a232e 5516 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5517 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5518 "HPP: Parsing PI\n");
pcercuei 0:03b5121a232e 5519 #endif
pcercuei 0:03b5121a232e 5520 htmlParsePI(ctxt);
pcercuei 0:03b5121a232e 5521 ctxt->instate = XML_PARSER_PROLOG;
pcercuei 0:03b5121a232e 5522 } else if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5523 (avail < 4)) {
pcercuei 0:03b5121a232e 5524 goto done;
pcercuei 0:03b5121a232e 5525 } else {
pcercuei 0:03b5121a232e 5526 ctxt->instate = XML_PARSER_START_TAG;
pcercuei 0:03b5121a232e 5527 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5528 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5529 "HPP: entering START_TAG\n");
pcercuei 0:03b5121a232e 5530 #endif
pcercuei 0:03b5121a232e 5531 }
pcercuei 0:03b5121a232e 5532 break;
pcercuei 0:03b5121a232e 5533 case XML_PARSER_EPILOG:
pcercuei 0:03b5121a232e 5534 if (in->buf == NULL)
pcercuei 0:03b5121a232e 5535 avail = in->length - (in->cur - in->base);
pcercuei 0:03b5121a232e 5536 else
pcercuei 0:03b5121a232e 5537 avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
pcercuei 0:03b5121a232e 5538 if (avail < 1)
pcercuei 0:03b5121a232e 5539 goto done;
pcercuei 0:03b5121a232e 5540 cur = in->cur[0];
pcercuei 0:03b5121a232e 5541 if (IS_BLANK_CH(cur)) {
pcercuei 0:03b5121a232e 5542 htmlParseCharData(ctxt);
pcercuei 0:03b5121a232e 5543 goto done;
pcercuei 0:03b5121a232e 5544 }
pcercuei 0:03b5121a232e 5545 if (avail < 2)
pcercuei 0:03b5121a232e 5546 goto done;
pcercuei 0:03b5121a232e 5547 next = in->cur[1];
pcercuei 0:03b5121a232e 5548 if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5549 (in->cur[2] == '-') && (in->cur[3] == '-')) {
pcercuei 0:03b5121a232e 5550 if ((!terminate) &&
pcercuei 0:03b5121a232e 5551 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
pcercuei 0:03b5121a232e 5552 goto done;
pcercuei 0:03b5121a232e 5553 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5554 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5555 "HPP: Parsing Comment\n");
pcercuei 0:03b5121a232e 5556 #endif
pcercuei 0:03b5121a232e 5557 htmlParseComment(ctxt);
pcercuei 0:03b5121a232e 5558 ctxt->instate = XML_PARSER_EPILOG;
pcercuei 0:03b5121a232e 5559 } else if ((cur == '<') && (next == '?')) {
pcercuei 0:03b5121a232e 5560 if ((!terminate) &&
pcercuei 0:03b5121a232e 5561 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
pcercuei 0:03b5121a232e 5562 goto done;
pcercuei 0:03b5121a232e 5563 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5564 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5565 "HPP: Parsing PI\n");
pcercuei 0:03b5121a232e 5566 #endif
pcercuei 0:03b5121a232e 5567 htmlParsePI(ctxt);
pcercuei 0:03b5121a232e 5568 ctxt->instate = XML_PARSER_EPILOG;
pcercuei 0:03b5121a232e 5569 } else if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5570 (avail < 4)) {
pcercuei 0:03b5121a232e 5571 goto done;
pcercuei 0:03b5121a232e 5572 } else {
pcercuei 0:03b5121a232e 5573 ctxt->errNo = XML_ERR_DOCUMENT_END;
pcercuei 0:03b5121a232e 5574 ctxt->wellFormed = 0;
pcercuei 0:03b5121a232e 5575 ctxt->instate = XML_PARSER_EOF;
pcercuei 0:03b5121a232e 5576 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5577 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5578 "HPP: entering EOF\n");
pcercuei 0:03b5121a232e 5579 #endif
pcercuei 0:03b5121a232e 5580 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
pcercuei 0:03b5121a232e 5581 ctxt->sax->endDocument(ctxt->userData);
pcercuei 0:03b5121a232e 5582 goto done;
pcercuei 0:03b5121a232e 5583 }
pcercuei 0:03b5121a232e 5584 break;
pcercuei 0:03b5121a232e 5585 case XML_PARSER_START_TAG: {
pcercuei 0:03b5121a232e 5586 const xmlChar *name;
pcercuei 0:03b5121a232e 5587 int failed;
pcercuei 0:03b5121a232e 5588 const htmlElemDesc * info;
pcercuei 0:03b5121a232e 5589
pcercuei 0:03b5121a232e 5590 /*
pcercuei 0:03b5121a232e 5591 * no chars in buffer
pcercuei 0:03b5121a232e 5592 */
pcercuei 0:03b5121a232e 5593 if (avail < 1)
pcercuei 0:03b5121a232e 5594 goto done;
pcercuei 0:03b5121a232e 5595 /*
pcercuei 0:03b5121a232e 5596 * not enouth chars in buffer
pcercuei 0:03b5121a232e 5597 */
pcercuei 0:03b5121a232e 5598 if (avail < 2) {
pcercuei 0:03b5121a232e 5599 if (!terminate)
pcercuei 0:03b5121a232e 5600 goto done;
pcercuei 0:03b5121a232e 5601 else
pcercuei 0:03b5121a232e 5602 next = ' ';
pcercuei 0:03b5121a232e 5603 } else {
pcercuei 0:03b5121a232e 5604 next = in->cur[1];
pcercuei 0:03b5121a232e 5605 }
pcercuei 0:03b5121a232e 5606 cur = in->cur[0];
pcercuei 0:03b5121a232e 5607 if (cur != '<') {
pcercuei 0:03b5121a232e 5608 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5609 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5610 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5611 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5612 #endif
pcercuei 0:03b5121a232e 5613 break;
pcercuei 0:03b5121a232e 5614 }
pcercuei 0:03b5121a232e 5615 if (next == '/') {
pcercuei 0:03b5121a232e 5616 ctxt->instate = XML_PARSER_END_TAG;
pcercuei 0:03b5121a232e 5617 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5618 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5619 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5620 "HPP: entering END_TAG\n");
pcercuei 0:03b5121a232e 5621 #endif
pcercuei 0:03b5121a232e 5622 break;
pcercuei 0:03b5121a232e 5623 }
pcercuei 0:03b5121a232e 5624 if ((!terminate) &&
pcercuei 0:03b5121a232e 5625 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
pcercuei 0:03b5121a232e 5626 goto done;
pcercuei 0:03b5121a232e 5627
pcercuei 0:03b5121a232e 5628 /* Capture start position */
pcercuei 0:03b5121a232e 5629 if (ctxt->record_info) {
pcercuei 0:03b5121a232e 5630 node_info.begin_pos = ctxt->input->consumed +
pcercuei 0:03b5121a232e 5631 (CUR_PTR - ctxt->input->base);
pcercuei 0:03b5121a232e 5632 node_info.begin_line = ctxt->input->line;
pcercuei 0:03b5121a232e 5633 }
pcercuei 0:03b5121a232e 5634
pcercuei 0:03b5121a232e 5635
pcercuei 0:03b5121a232e 5636 failed = htmlParseStartTag(ctxt);
pcercuei 0:03b5121a232e 5637 name = ctxt->name;
pcercuei 0:03b5121a232e 5638 if ((failed == -1) ||
pcercuei 0:03b5121a232e 5639 (name == NULL)) {
pcercuei 0:03b5121a232e 5640 if (CUR == '>')
pcercuei 0:03b5121a232e 5641 NEXT;
pcercuei 0:03b5121a232e 5642 break;
pcercuei 0:03b5121a232e 5643 }
pcercuei 0:03b5121a232e 5644
pcercuei 0:03b5121a232e 5645 /*
pcercuei 0:03b5121a232e 5646 * Lookup the info for that element.
pcercuei 0:03b5121a232e 5647 */
pcercuei 0:03b5121a232e 5648 info = htmlTagLookup(name);
pcercuei 0:03b5121a232e 5649 if (info == NULL) {
pcercuei 0:03b5121a232e 5650 htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
pcercuei 0:03b5121a232e 5651 "Tag %s invalid\n", name, NULL);
pcercuei 0:03b5121a232e 5652 }
pcercuei 0:03b5121a232e 5653
pcercuei 0:03b5121a232e 5654 /*
pcercuei 0:03b5121a232e 5655 * Check for an Empty Element labeled the XML/SGML way
pcercuei 0:03b5121a232e 5656 */
pcercuei 0:03b5121a232e 5657 if ((CUR == '/') && (NXT(1) == '>')) {
pcercuei 0:03b5121a232e 5658 SKIP(2);
pcercuei 0:03b5121a232e 5659 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 5660 ctxt->sax->endElement(ctxt->userData, name);
pcercuei 0:03b5121a232e 5661 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 5662 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5663 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5664 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5665 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5666 #endif
pcercuei 0:03b5121a232e 5667 break;
pcercuei 0:03b5121a232e 5668 }
pcercuei 0:03b5121a232e 5669
pcercuei 0:03b5121a232e 5670 if (CUR == '>') {
pcercuei 0:03b5121a232e 5671 NEXT;
pcercuei 0:03b5121a232e 5672 } else {
pcercuei 0:03b5121a232e 5673 htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
pcercuei 0:03b5121a232e 5674 "Couldn't find end of Start Tag %s\n",
pcercuei 0:03b5121a232e 5675 name, NULL);
pcercuei 0:03b5121a232e 5676
pcercuei 0:03b5121a232e 5677 /*
pcercuei 0:03b5121a232e 5678 * end of parsing of this node.
pcercuei 0:03b5121a232e 5679 */
pcercuei 0:03b5121a232e 5680 if (xmlStrEqual(name, ctxt->name)) {
pcercuei 0:03b5121a232e 5681 nodePop(ctxt);
pcercuei 0:03b5121a232e 5682 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 5683 }
pcercuei 0:03b5121a232e 5684
pcercuei 0:03b5121a232e 5685 if (ctxt->record_info)
pcercuei 0:03b5121a232e 5686 htmlNodeInfoPush(ctxt, &node_info);
pcercuei 0:03b5121a232e 5687
pcercuei 0:03b5121a232e 5688 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5689 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5690 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5691 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5692 #endif
pcercuei 0:03b5121a232e 5693 break;
pcercuei 0:03b5121a232e 5694 }
pcercuei 0:03b5121a232e 5695
pcercuei 0:03b5121a232e 5696 /*
pcercuei 0:03b5121a232e 5697 * Check for an Empty Element from DTD definition
pcercuei 0:03b5121a232e 5698 */
pcercuei 0:03b5121a232e 5699 if ((info != NULL) && (info->empty)) {
pcercuei 0:03b5121a232e 5700 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
pcercuei 0:03b5121a232e 5701 ctxt->sax->endElement(ctxt->userData, name);
pcercuei 0:03b5121a232e 5702 htmlnamePop(ctxt);
pcercuei 0:03b5121a232e 5703 }
pcercuei 0:03b5121a232e 5704
pcercuei 0:03b5121a232e 5705 if (ctxt->record_info)
pcercuei 0:03b5121a232e 5706 htmlNodeInfoPush(ctxt, &node_info);
pcercuei 0:03b5121a232e 5707
pcercuei 0:03b5121a232e 5708 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5709 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5710 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5711 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5712 #endif
pcercuei 0:03b5121a232e 5713 break;
pcercuei 0:03b5121a232e 5714 }
pcercuei 0:03b5121a232e 5715 case XML_PARSER_CONTENT: {
pcercuei 0:03b5121a232e 5716 long cons;
pcercuei 0:03b5121a232e 5717 /*
pcercuei 0:03b5121a232e 5718 * Handle preparsed entities and charRef
pcercuei 0:03b5121a232e 5719 */
pcercuei 0:03b5121a232e 5720 if (ctxt->token != 0) {
pcercuei 0:03b5121a232e 5721 xmlChar chr[2] = { 0 , 0 } ;
pcercuei 0:03b5121a232e 5722
pcercuei 0:03b5121a232e 5723 chr[0] = (xmlChar) ctxt->token;
pcercuei 0:03b5121a232e 5724 htmlCheckParagraph(ctxt);
pcercuei 0:03b5121a232e 5725 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
pcercuei 0:03b5121a232e 5726 ctxt->sax->characters(ctxt->userData, chr, 1);
pcercuei 0:03b5121a232e 5727 ctxt->token = 0;
pcercuei 0:03b5121a232e 5728 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5729 }
pcercuei 0:03b5121a232e 5730 if ((avail == 1) && (terminate)) {
pcercuei 0:03b5121a232e 5731 cur = in->cur[0];
pcercuei 0:03b5121a232e 5732 if ((cur != '<') && (cur != '&')) {
pcercuei 0:03b5121a232e 5733 if (ctxt->sax != NULL) {
pcercuei 0:03b5121a232e 5734 if (IS_BLANK_CH(cur)) {
pcercuei 0:03b5121a232e 5735 if (ctxt->keepBlanks) {
pcercuei 0:03b5121a232e 5736 if (ctxt->sax->characters != NULL)
pcercuei 0:03b5121a232e 5737 ctxt->sax->characters(
pcercuei 0:03b5121a232e 5738 ctxt->userData, &in->cur[0], 1);
pcercuei 0:03b5121a232e 5739 } else {
pcercuei 0:03b5121a232e 5740 if (ctxt->sax->ignorableWhitespace != NULL)
pcercuei 0:03b5121a232e 5741 ctxt->sax->ignorableWhitespace(
pcercuei 0:03b5121a232e 5742 ctxt->userData, &in->cur[0], 1);
pcercuei 0:03b5121a232e 5743 }
pcercuei 0:03b5121a232e 5744 } else {
pcercuei 0:03b5121a232e 5745 htmlCheckParagraph(ctxt);
pcercuei 0:03b5121a232e 5746 if (ctxt->sax->characters != NULL)
pcercuei 0:03b5121a232e 5747 ctxt->sax->characters(
pcercuei 0:03b5121a232e 5748 ctxt->userData, &in->cur[0], 1);
pcercuei 0:03b5121a232e 5749 }
pcercuei 0:03b5121a232e 5750 }
pcercuei 0:03b5121a232e 5751 ctxt->token = 0;
pcercuei 0:03b5121a232e 5752 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5753 in->cur++;
pcercuei 0:03b5121a232e 5754 break;
pcercuei 0:03b5121a232e 5755 }
pcercuei 0:03b5121a232e 5756 }
pcercuei 0:03b5121a232e 5757 if (avail < 2)
pcercuei 0:03b5121a232e 5758 goto done;
pcercuei 0:03b5121a232e 5759 cur = in->cur[0];
pcercuei 0:03b5121a232e 5760 next = in->cur[1];
pcercuei 0:03b5121a232e 5761 cons = ctxt->nbChars;
pcercuei 0:03b5121a232e 5762 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
pcercuei 0:03b5121a232e 5763 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
pcercuei 0:03b5121a232e 5764 /*
pcercuei 0:03b5121a232e 5765 * Handle SCRIPT/STYLE separately
pcercuei 0:03b5121a232e 5766 */
pcercuei 0:03b5121a232e 5767 if (!terminate) {
pcercuei 0:03b5121a232e 5768 int idx;
pcercuei 0:03b5121a232e 5769 xmlChar val;
pcercuei 0:03b5121a232e 5770
pcercuei 0:03b5121a232e 5771 idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0);
pcercuei 0:03b5121a232e 5772 if (idx < 0)
pcercuei 0:03b5121a232e 5773 goto done;
pcercuei 0:03b5121a232e 5774 val = in->cur[idx + 2];
pcercuei 0:03b5121a232e 5775 if (val == 0) /* bad cut of input */
pcercuei 0:03b5121a232e 5776 goto done;
pcercuei 0:03b5121a232e 5777 }
pcercuei 0:03b5121a232e 5778 htmlParseScript(ctxt);
pcercuei 0:03b5121a232e 5779 if ((cur == '<') && (next == '/')) {
pcercuei 0:03b5121a232e 5780 ctxt->instate = XML_PARSER_END_TAG;
pcercuei 0:03b5121a232e 5781 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5782 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5783 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5784 "HPP: entering END_TAG\n");
pcercuei 0:03b5121a232e 5785 #endif
pcercuei 0:03b5121a232e 5786 break;
pcercuei 0:03b5121a232e 5787 }
pcercuei 0:03b5121a232e 5788 } else {
pcercuei 0:03b5121a232e 5789 /*
pcercuei 0:03b5121a232e 5790 * Sometimes DOCTYPE arrives in the middle of the document
pcercuei 0:03b5121a232e 5791 */
pcercuei 0:03b5121a232e 5792 if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5793 (UPP(2) == 'D') && (UPP(3) == 'O') &&
pcercuei 0:03b5121a232e 5794 (UPP(4) == 'C') && (UPP(5) == 'T') &&
pcercuei 0:03b5121a232e 5795 (UPP(6) == 'Y') && (UPP(7) == 'P') &&
pcercuei 0:03b5121a232e 5796 (UPP(8) == 'E')) {
pcercuei 0:03b5121a232e 5797 if ((!terminate) &&
pcercuei 0:03b5121a232e 5798 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
pcercuei 0:03b5121a232e 5799 goto done;
pcercuei 0:03b5121a232e 5800 htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
pcercuei 0:03b5121a232e 5801 "Misplaced DOCTYPE declaration\n",
pcercuei 0:03b5121a232e 5802 BAD_CAST "DOCTYPE" , NULL);
pcercuei 0:03b5121a232e 5803 htmlParseDocTypeDecl(ctxt);
pcercuei 0:03b5121a232e 5804 } else if ((cur == '<') && (next == '!') &&
pcercuei 0:03b5121a232e 5805 (in->cur[2] == '-') && (in->cur[3] == '-')) {
pcercuei 0:03b5121a232e 5806 if ((!terminate) &&
pcercuei 0:03b5121a232e 5807 (htmlParseLookupSequence(
pcercuei 0:03b5121a232e 5808 ctxt, '-', '-', '>', 1, 1) < 0))
pcercuei 0:03b5121a232e 5809 goto done;
pcercuei 0:03b5121a232e 5810 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5811 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5812 "HPP: Parsing Comment\n");
pcercuei 0:03b5121a232e 5813 #endif
pcercuei 0:03b5121a232e 5814 htmlParseComment(ctxt);
pcercuei 0:03b5121a232e 5815 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5816 } else if ((cur == '<') && (next == '?')) {
pcercuei 0:03b5121a232e 5817 if ((!terminate) &&
pcercuei 0:03b5121a232e 5818 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
pcercuei 0:03b5121a232e 5819 goto done;
pcercuei 0:03b5121a232e 5820 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5821 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5822 "HPP: Parsing PI\n");
pcercuei 0:03b5121a232e 5823 #endif
pcercuei 0:03b5121a232e 5824 htmlParsePI(ctxt);
pcercuei 0:03b5121a232e 5825 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5826 } else if ((cur == '<') && (next == '!') && (avail < 4)) {
pcercuei 0:03b5121a232e 5827 goto done;
pcercuei 0:03b5121a232e 5828 } else if ((cur == '<') && (next == '/')) {
pcercuei 0:03b5121a232e 5829 ctxt->instate = XML_PARSER_END_TAG;
pcercuei 0:03b5121a232e 5830 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5831 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5832 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5833 "HPP: entering END_TAG\n");
pcercuei 0:03b5121a232e 5834 #endif
pcercuei 0:03b5121a232e 5835 break;
pcercuei 0:03b5121a232e 5836 } else if (cur == '<') {
pcercuei 0:03b5121a232e 5837 ctxt->instate = XML_PARSER_START_TAG;
pcercuei 0:03b5121a232e 5838 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5839 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5840 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5841 "HPP: entering START_TAG\n");
pcercuei 0:03b5121a232e 5842 #endif
pcercuei 0:03b5121a232e 5843 break;
pcercuei 0:03b5121a232e 5844 } else if (cur == '&') {
pcercuei 0:03b5121a232e 5845 if ((!terminate) &&
pcercuei 0:03b5121a232e 5846 (htmlParseLookupChars(ctxt,
pcercuei 0:03b5121a232e 5847 BAD_CAST "; >/", 4) < 0))
pcercuei 0:03b5121a232e 5848 goto done;
pcercuei 0:03b5121a232e 5849 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5850 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5851 "HPP: Parsing Reference\n");
pcercuei 0:03b5121a232e 5852 #endif
pcercuei 0:03b5121a232e 5853 /* TODO: check generation of subtrees if noent !!! */
pcercuei 0:03b5121a232e 5854 htmlParseReference(ctxt);
pcercuei 0:03b5121a232e 5855 } else {
pcercuei 0:03b5121a232e 5856 /*
pcercuei 0:03b5121a232e 5857 * check that the text sequence is complete
pcercuei 0:03b5121a232e 5858 * before handing out the data to the parser
pcercuei 0:03b5121a232e 5859 * to avoid problems with erroneous end of
pcercuei 0:03b5121a232e 5860 * data detection.
pcercuei 0:03b5121a232e 5861 */
pcercuei 0:03b5121a232e 5862 if ((!terminate) &&
pcercuei 0:03b5121a232e 5863 (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0))
pcercuei 0:03b5121a232e 5864 goto done;
pcercuei 0:03b5121a232e 5865 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5866 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5867 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5868 "HPP: Parsing char data\n");
pcercuei 0:03b5121a232e 5869 #endif
pcercuei 0:03b5121a232e 5870 htmlParseCharData(ctxt);
pcercuei 0:03b5121a232e 5871 }
pcercuei 0:03b5121a232e 5872 }
pcercuei 0:03b5121a232e 5873 if (cons == ctxt->nbChars) {
pcercuei 0:03b5121a232e 5874 if (ctxt->node != NULL) {
pcercuei 0:03b5121a232e 5875 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5876 "detected an error in element content\n",
pcercuei 0:03b5121a232e 5877 NULL, NULL);
pcercuei 0:03b5121a232e 5878 }
pcercuei 0:03b5121a232e 5879 NEXT;
pcercuei 0:03b5121a232e 5880 break;
pcercuei 0:03b5121a232e 5881 }
pcercuei 0:03b5121a232e 5882
pcercuei 0:03b5121a232e 5883 break;
pcercuei 0:03b5121a232e 5884 }
pcercuei 0:03b5121a232e 5885 case XML_PARSER_END_TAG:
pcercuei 0:03b5121a232e 5886 if (avail < 2)
pcercuei 0:03b5121a232e 5887 goto done;
pcercuei 0:03b5121a232e 5888 if ((!terminate) &&
pcercuei 0:03b5121a232e 5889 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
pcercuei 0:03b5121a232e 5890 goto done;
pcercuei 0:03b5121a232e 5891 htmlParseEndTag(ctxt);
pcercuei 0:03b5121a232e 5892 if (ctxt->nameNr == 0) {
pcercuei 0:03b5121a232e 5893 ctxt->instate = XML_PARSER_EPILOG;
pcercuei 0:03b5121a232e 5894 } else {
pcercuei 0:03b5121a232e 5895 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5896 }
pcercuei 0:03b5121a232e 5897 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5898 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5899 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5900 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5901 #endif
pcercuei 0:03b5121a232e 5902 break;
pcercuei 0:03b5121a232e 5903 case XML_PARSER_CDATA_SECTION:
pcercuei 0:03b5121a232e 5904 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5905 "HPP: internal error, state == CDATA\n",
pcercuei 0:03b5121a232e 5906 NULL, NULL);
pcercuei 0:03b5121a232e 5907 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5908 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5909 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5910 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5911 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5912 #endif
pcercuei 0:03b5121a232e 5913 break;
pcercuei 0:03b5121a232e 5914 case XML_PARSER_DTD:
pcercuei 0:03b5121a232e 5915 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5916 "HPP: internal error, state == DTD\n",
pcercuei 0:03b5121a232e 5917 NULL, NULL);
pcercuei 0:03b5121a232e 5918 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5919 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5920 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5921 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5922 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5923 #endif
pcercuei 0:03b5121a232e 5924 break;
pcercuei 0:03b5121a232e 5925 case XML_PARSER_COMMENT:
pcercuei 0:03b5121a232e 5926 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5927 "HPP: internal error, state == COMMENT\n",
pcercuei 0:03b5121a232e 5928 NULL, NULL);
pcercuei 0:03b5121a232e 5929 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5930 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5931 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5932 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5933 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5934 #endif
pcercuei 0:03b5121a232e 5935 break;
pcercuei 0:03b5121a232e 5936 case XML_PARSER_PI:
pcercuei 0:03b5121a232e 5937 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5938 "HPP: internal error, state == PI\n",
pcercuei 0:03b5121a232e 5939 NULL, NULL);
pcercuei 0:03b5121a232e 5940 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5941 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5942 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5943 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5944 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5945 #endif
pcercuei 0:03b5121a232e 5946 break;
pcercuei 0:03b5121a232e 5947 case XML_PARSER_ENTITY_DECL:
pcercuei 0:03b5121a232e 5948 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5949 "HPP: internal error, state == ENTITY_DECL\n",
pcercuei 0:03b5121a232e 5950 NULL, NULL);
pcercuei 0:03b5121a232e 5951 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5952 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5953 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5954 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5955 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5956 #endif
pcercuei 0:03b5121a232e 5957 break;
pcercuei 0:03b5121a232e 5958 case XML_PARSER_ENTITY_VALUE:
pcercuei 0:03b5121a232e 5959 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5960 "HPP: internal error, state == ENTITY_VALUE\n",
pcercuei 0:03b5121a232e 5961 NULL, NULL);
pcercuei 0:03b5121a232e 5962 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5963 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5964 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5965 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5966 "HPP: entering DTD\n");
pcercuei 0:03b5121a232e 5967 #endif
pcercuei 0:03b5121a232e 5968 break;
pcercuei 0:03b5121a232e 5969 case XML_PARSER_ATTRIBUTE_VALUE:
pcercuei 0:03b5121a232e 5970 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5971 "HPP: internal error, state == ATTRIBUTE_VALUE\n",
pcercuei 0:03b5121a232e 5972 NULL, NULL);
pcercuei 0:03b5121a232e 5973 ctxt->instate = XML_PARSER_START_TAG;
pcercuei 0:03b5121a232e 5974 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5975 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5976 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5977 "HPP: entering START_TAG\n");
pcercuei 0:03b5121a232e 5978 #endif
pcercuei 0:03b5121a232e 5979 break;
pcercuei 0:03b5121a232e 5980 case XML_PARSER_SYSTEM_LITERAL:
pcercuei 0:03b5121a232e 5981 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5982 "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n",
pcercuei 0:03b5121a232e 5983 NULL, NULL);
pcercuei 0:03b5121a232e 5984 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5985 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5986 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5987 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5988 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 5989 #endif
pcercuei 0:03b5121a232e 5990 break;
pcercuei 0:03b5121a232e 5991 case XML_PARSER_IGNORE:
pcercuei 0:03b5121a232e 5992 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 5993 "HPP: internal error, state == XML_PARSER_IGNORE\n",
pcercuei 0:03b5121a232e 5994 NULL, NULL);
pcercuei 0:03b5121a232e 5995 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 5996 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 5997 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 5998 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 5999 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 6000 #endif
pcercuei 0:03b5121a232e 6001 break;
pcercuei 0:03b5121a232e 6002 case XML_PARSER_PUBLIC_LITERAL:
pcercuei 0:03b5121a232e 6003 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 6004 "HPP: internal error, state == XML_PARSER_LITERAL\n",
pcercuei 0:03b5121a232e 6005 NULL, NULL);
pcercuei 0:03b5121a232e 6006 ctxt->instate = XML_PARSER_CONTENT;
pcercuei 0:03b5121a232e 6007 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 6008 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 6009 xmlGenericError(xmlGenericErrorContext,
pcercuei 0:03b5121a232e 6010 "HPP: entering CONTENT\n");
pcercuei 0:03b5121a232e 6011 #endif
pcercuei 0:03b5121a232e 6012 break;
pcercuei 0:03b5121a232e 6013
pcercuei 0:03b5121a232e 6014 }
pcercuei 0:03b5121a232e 6015 }
pcercuei 0:03b5121a232e 6016 done:
pcercuei 0:03b5121a232e 6017 if ((avail == 0) && (terminate)) {
pcercuei 0:03b5121a232e 6018 htmlAutoCloseOnEnd(ctxt);
pcercuei 0:03b5121a232e 6019 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
pcercuei 0:03b5121a232e 6020 /*
pcercuei 0:03b5121a232e 6021 * SAX: end of the document processing.
pcercuei 0:03b5121a232e 6022 */
pcercuei 0:03b5121a232e 6023 ctxt->instate = XML_PARSER_EOF;
pcercuei 0:03b5121a232e 6024 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
pcercuei 0:03b5121a232e 6025 ctxt->sax->endDocument(ctxt->userData);
pcercuei 0:03b5121a232e 6026 }
pcercuei 0:03b5121a232e 6027 }
pcercuei 0:03b5121a232e 6028 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) &&
pcercuei 0:03b5121a232e 6029 ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
pcercuei 0:03b5121a232e 6030 (ctxt->instate == XML_PARSER_EPILOG))) {
pcercuei 0:03b5121a232e 6031 xmlDtdPtr dtd;
pcercuei 0:03b5121a232e 6032 dtd = xmlGetIntSubset(ctxt->myDoc);
pcercuei 0:03b5121a232e 6033 if (dtd == NULL)
pcercuei 0:03b5121a232e 6034 ctxt->myDoc->intSubset =
pcercuei 0:03b5121a232e 6035 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
pcercuei 0:03b5121a232e 6036 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
pcercuei 0:03b5121a232e 6037 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
pcercuei 0:03b5121a232e 6038 }
pcercuei 0:03b5121a232e 6039 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 6040 xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
pcercuei 0:03b5121a232e 6041 #endif
pcercuei 0:03b5121a232e 6042 return(ret);
pcercuei 0:03b5121a232e 6043 }
pcercuei 0:03b5121a232e 6044
pcercuei 0:03b5121a232e 6045 /**
pcercuei 0:03b5121a232e 6046 * htmlParseChunk:
pcercuei 0:03b5121a232e 6047 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 6048 * @chunk: an char array
pcercuei 0:03b5121a232e 6049 * @size: the size in byte of the chunk
pcercuei 0:03b5121a232e 6050 * @terminate: last chunk indicator
pcercuei 0:03b5121a232e 6051 *
pcercuei 0:03b5121a232e 6052 * Parse a Chunk of memory
pcercuei 0:03b5121a232e 6053 *
pcercuei 0:03b5121a232e 6054 * Returns zero if no error, the xmlParserErrors otherwise.
pcercuei 0:03b5121a232e 6055 */
pcercuei 0:03b5121a232e 6056 int
pcercuei 0:03b5121a232e 6057 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
pcercuei 0:03b5121a232e 6058 int terminate) {
pcercuei 0:03b5121a232e 6059 if ((ctxt == NULL) || (ctxt->input == NULL)) {
pcercuei 0:03b5121a232e 6060 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
pcercuei 0:03b5121a232e 6061 "htmlParseChunk: context error\n", NULL, NULL);
pcercuei 0:03b5121a232e 6062 return(XML_ERR_INTERNAL_ERROR);
pcercuei 0:03b5121a232e 6063 }
pcercuei 0:03b5121a232e 6064 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
pcercuei 0:03b5121a232e 6065 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
pcercuei 0:03b5121a232e 6066 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
pcercuei 0:03b5121a232e 6067 size_t cur = ctxt->input->cur - ctxt->input->base;
pcercuei 0:03b5121a232e 6068 int res;
pcercuei 0:03b5121a232e 6069
pcercuei 0:03b5121a232e 6070 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
pcercuei 0:03b5121a232e 6071 if (res < 0) {
pcercuei 0:03b5121a232e 6072 ctxt->errNo = XML_PARSER_EOF;
pcercuei 0:03b5121a232e 6073 ctxt->disableSAX = 1;
pcercuei 0:03b5121a232e 6074 return (XML_PARSER_EOF);
pcercuei 0:03b5121a232e 6075 }
pcercuei 0:03b5121a232e 6076 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
pcercuei 0:03b5121a232e 6077 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 6078 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
pcercuei 0:03b5121a232e 6079 #endif
pcercuei 0:03b5121a232e 6080
pcercuei 0:03b5121a232e 6081 #if 0
pcercuei 0:03b5121a232e 6082 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
pcercuei 0:03b5121a232e 6083 htmlParseTryOrFinish(ctxt, terminate);
pcercuei 0:03b5121a232e 6084 #endif
pcercuei 0:03b5121a232e 6085 } else if (ctxt->instate != XML_PARSER_EOF) {
pcercuei 0:03b5121a232e 6086 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
pcercuei 0:03b5121a232e 6087 xmlParserInputBufferPtr in = ctxt->input->buf;
pcercuei 0:03b5121a232e 6088 if ((in->encoder != NULL) && (in->buffer != NULL) &&
pcercuei 0:03b5121a232e 6089 (in->raw != NULL)) {
pcercuei 0:03b5121a232e 6090 int nbchars;
pcercuei 0:03b5121a232e 6091 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
pcercuei 0:03b5121a232e 6092 size_t current = ctxt->input->cur - ctxt->input->base;
pcercuei 0:03b5121a232e 6093
pcercuei 0:03b5121a232e 6094 nbchars = xmlCharEncInput(in, terminate);
pcercuei 0:03b5121a232e 6095 if (nbchars < 0) {
pcercuei 0:03b5121a232e 6096 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
pcercuei 0:03b5121a232e 6097 "encoder error\n", NULL, NULL);
pcercuei 0:03b5121a232e 6098 return(XML_ERR_INVALID_ENCODING);
pcercuei 0:03b5121a232e 6099 }
pcercuei 0:03b5121a232e 6100 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
pcercuei 0:03b5121a232e 6101 }
pcercuei 0:03b5121a232e 6102 }
pcercuei 0:03b5121a232e 6103 }
pcercuei 0:03b5121a232e 6104 htmlParseTryOrFinish(ctxt, terminate);
pcercuei 0:03b5121a232e 6105 if (terminate) {
pcercuei 0:03b5121a232e 6106 if ((ctxt->instate != XML_PARSER_EOF) &&
pcercuei 0:03b5121a232e 6107 (ctxt->instate != XML_PARSER_EPILOG) &&
pcercuei 0:03b5121a232e 6108 (ctxt->instate != XML_PARSER_MISC)) {
pcercuei 0:03b5121a232e 6109 ctxt->errNo = XML_ERR_DOCUMENT_END;
pcercuei 0:03b5121a232e 6110 ctxt->wellFormed = 0;
pcercuei 0:03b5121a232e 6111 }
pcercuei 0:03b5121a232e 6112 if (ctxt->instate != XML_PARSER_EOF) {
pcercuei 0:03b5121a232e 6113 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
pcercuei 0:03b5121a232e 6114 ctxt->sax->endDocument(ctxt->userData);
pcercuei 0:03b5121a232e 6115 }
pcercuei 0:03b5121a232e 6116 ctxt->instate = XML_PARSER_EOF;
pcercuei 0:03b5121a232e 6117 }
pcercuei 0:03b5121a232e 6118 return((xmlParserErrors) ctxt->errNo);
pcercuei 0:03b5121a232e 6119 }
pcercuei 0:03b5121a232e 6120
pcercuei 0:03b5121a232e 6121 /************************************************************************
pcercuei 0:03b5121a232e 6122 * *
pcercuei 0:03b5121a232e 6123 * User entry points *
pcercuei 0:03b5121a232e 6124 * *
pcercuei 0:03b5121a232e 6125 ************************************************************************/
pcercuei 0:03b5121a232e 6126
pcercuei 0:03b5121a232e 6127 /**
pcercuei 0:03b5121a232e 6128 * htmlCreatePushParserCtxt:
pcercuei 0:03b5121a232e 6129 * @sax: a SAX handler
pcercuei 0:03b5121a232e 6130 * @user_data: The user data returned on SAX callbacks
pcercuei 0:03b5121a232e 6131 * @chunk: a pointer to an array of chars
pcercuei 0:03b5121a232e 6132 * @size: number of chars in the array
pcercuei 0:03b5121a232e 6133 * @filename: an optional file name or URI
pcercuei 0:03b5121a232e 6134 * @enc: an optional encoding
pcercuei 0:03b5121a232e 6135 *
pcercuei 0:03b5121a232e 6136 * Create a parser context for using the HTML parser in push mode
pcercuei 0:03b5121a232e 6137 * The value of @filename is used for fetching external entities
pcercuei 0:03b5121a232e 6138 * and error/warning reports.
pcercuei 0:03b5121a232e 6139 *
pcercuei 0:03b5121a232e 6140 * Returns the new parser context or NULL
pcercuei 0:03b5121a232e 6141 */
pcercuei 0:03b5121a232e 6142 htmlParserCtxtPtr
pcercuei 0:03b5121a232e 6143 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
pcercuei 0:03b5121a232e 6144 const char *chunk, int size, const char *filename,
pcercuei 0:03b5121a232e 6145 xmlCharEncoding enc) {
pcercuei 0:03b5121a232e 6146 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 6147 htmlParserInputPtr inputStream;
pcercuei 0:03b5121a232e 6148 xmlParserInputBufferPtr buf;
pcercuei 0:03b5121a232e 6149
pcercuei 0:03b5121a232e 6150 xmlInitParser();
pcercuei 0:03b5121a232e 6151
pcercuei 0:03b5121a232e 6152 buf = xmlAllocParserInputBuffer(enc);
pcercuei 0:03b5121a232e 6153 if (buf == NULL) return(NULL);
pcercuei 0:03b5121a232e 6154
pcercuei 0:03b5121a232e 6155 ctxt = htmlNewParserCtxt();
pcercuei 0:03b5121a232e 6156 if (ctxt == NULL) {
pcercuei 0:03b5121a232e 6157 xmlFreeParserInputBuffer(buf);
pcercuei 0:03b5121a232e 6158 return(NULL);
pcercuei 0:03b5121a232e 6159 }
pcercuei 0:03b5121a232e 6160 if(enc==XML_CHAR_ENCODING_UTF8 || buf->encoder)
pcercuei 0:03b5121a232e 6161 ctxt->charset=XML_CHAR_ENCODING_UTF8;
pcercuei 0:03b5121a232e 6162 if (sax != NULL) {
pcercuei 0:03b5121a232e 6163 if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
pcercuei 0:03b5121a232e 6164 xmlFree(ctxt->sax);
pcercuei 0:03b5121a232e 6165 ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
pcercuei 0:03b5121a232e 6166 if (ctxt->sax == NULL) {
pcercuei 0:03b5121a232e 6167 xmlFree(buf);
pcercuei 0:03b5121a232e 6168 xmlFree(ctxt);
pcercuei 0:03b5121a232e 6169 return(NULL);
pcercuei 0:03b5121a232e 6170 }
pcercuei 0:03b5121a232e 6171 memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
pcercuei 0:03b5121a232e 6172 if (user_data != NULL)
pcercuei 0:03b5121a232e 6173 ctxt->userData = user_data;
pcercuei 0:03b5121a232e 6174 }
pcercuei 0:03b5121a232e 6175 if (filename == NULL) {
pcercuei 0:03b5121a232e 6176 ctxt->directory = NULL;
pcercuei 0:03b5121a232e 6177 } else {
pcercuei 0:03b5121a232e 6178 ctxt->directory = xmlParserGetDirectory(filename);
pcercuei 0:03b5121a232e 6179 }
pcercuei 0:03b5121a232e 6180
pcercuei 0:03b5121a232e 6181 inputStream = htmlNewInputStream(ctxt);
pcercuei 0:03b5121a232e 6182 if (inputStream == NULL) {
pcercuei 0:03b5121a232e 6183 xmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 6184 xmlFree(buf);
pcercuei 0:03b5121a232e 6185 return(NULL);
pcercuei 0:03b5121a232e 6186 }
pcercuei 0:03b5121a232e 6187
pcercuei 0:03b5121a232e 6188 if (filename == NULL)
pcercuei 0:03b5121a232e 6189 inputStream->filename = NULL;
pcercuei 0:03b5121a232e 6190 else
pcercuei 0:03b5121a232e 6191 inputStream->filename = (char *)
pcercuei 0:03b5121a232e 6192 xmlCanonicPath((const xmlChar *) filename);
pcercuei 0:03b5121a232e 6193 inputStream->buf = buf;
pcercuei 0:03b5121a232e 6194 xmlBufResetInput(buf->buffer, inputStream);
pcercuei 0:03b5121a232e 6195
pcercuei 0:03b5121a232e 6196 inputPush(ctxt, inputStream);
pcercuei 0:03b5121a232e 6197
pcercuei 0:03b5121a232e 6198 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
pcercuei 0:03b5121a232e 6199 (ctxt->input->buf != NULL)) {
pcercuei 0:03b5121a232e 6200 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
pcercuei 0:03b5121a232e 6201 size_t cur = ctxt->input->cur - ctxt->input->base;
pcercuei 0:03b5121a232e 6202
pcercuei 0:03b5121a232e 6203 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
pcercuei 0:03b5121a232e 6204
pcercuei 0:03b5121a232e 6205 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
pcercuei 0:03b5121a232e 6206 #ifdef DEBUG_PUSH
pcercuei 0:03b5121a232e 6207 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
pcercuei 0:03b5121a232e 6208 #endif
pcercuei 0:03b5121a232e 6209 }
pcercuei 0:03b5121a232e 6210 ctxt->progressive = 1;
pcercuei 0:03b5121a232e 6211
pcercuei 0:03b5121a232e 6212 return(ctxt);
pcercuei 0:03b5121a232e 6213 }
pcercuei 0:03b5121a232e 6214 #endif /* LIBXML_PUSH_ENABLED */
pcercuei 0:03b5121a232e 6215
pcercuei 0:03b5121a232e 6216 /**
pcercuei 0:03b5121a232e 6217 * htmlSAXParseDoc:
pcercuei 0:03b5121a232e 6218 * @cur: a pointer to an array of xmlChar
pcercuei 0:03b5121a232e 6219 * @encoding: a free form C string describing the HTML document encoding, or NULL
pcercuei 0:03b5121a232e 6220 * @sax: the SAX handler block
pcercuei 0:03b5121a232e 6221 * @userData: if using SAX, this pointer will be provided on callbacks.
pcercuei 0:03b5121a232e 6222 *
pcercuei 0:03b5121a232e 6223 * Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
pcercuei 0:03b5121a232e 6224 * to handle parse events. If sax is NULL, fallback to the default DOM
pcercuei 0:03b5121a232e 6225 * behavior and return a tree.
pcercuei 0:03b5121a232e 6226 *
pcercuei 0:03b5121a232e 6227 * Returns the resulting document tree unless SAX is NULL or the document is
pcercuei 0:03b5121a232e 6228 * not well formed.
pcercuei 0:03b5121a232e 6229 */
pcercuei 0:03b5121a232e 6230
pcercuei 0:03b5121a232e 6231 htmlDocPtr
pcercuei 0:03b5121a232e 6232 htmlSAXParseDoc(xmlChar *cur, const char *encoding, htmlSAXHandlerPtr sax, void *userData) {
pcercuei 0:03b5121a232e 6233 htmlDocPtr ret;
pcercuei 0:03b5121a232e 6234 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 6235
pcercuei 0:03b5121a232e 6236 xmlInitParser();
pcercuei 0:03b5121a232e 6237
pcercuei 0:03b5121a232e 6238 if (cur == NULL) return(NULL);
pcercuei 0:03b5121a232e 6239
pcercuei 0:03b5121a232e 6240
pcercuei 0:03b5121a232e 6241 ctxt = htmlCreateDocParserCtxt(cur, encoding);
pcercuei 0:03b5121a232e 6242 if (ctxt == NULL) return(NULL);
pcercuei 0:03b5121a232e 6243 if (sax != NULL) {
pcercuei 0:03b5121a232e 6244 if (ctxt->sax != NULL) xmlFree (ctxt->sax);
pcercuei 0:03b5121a232e 6245 ctxt->sax = sax;
pcercuei 0:03b5121a232e 6246 ctxt->userData = userData;
pcercuei 0:03b5121a232e 6247 }
pcercuei 0:03b5121a232e 6248
pcercuei 0:03b5121a232e 6249 htmlParseDocument(ctxt);
pcercuei 0:03b5121a232e 6250 ret = ctxt->myDoc;
pcercuei 0:03b5121a232e 6251 if (sax != NULL) {
pcercuei 0:03b5121a232e 6252 ctxt->sax = NULL;
pcercuei 0:03b5121a232e 6253 ctxt->userData = NULL;
pcercuei 0:03b5121a232e 6254 }
pcercuei 0:03b5121a232e 6255 htmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 6256
pcercuei 0:03b5121a232e 6257 return(ret);
pcercuei 0:03b5121a232e 6258 }
pcercuei 0:03b5121a232e 6259
pcercuei 0:03b5121a232e 6260 /**
pcercuei 0:03b5121a232e 6261 * htmlParseDoc:
pcercuei 0:03b5121a232e 6262 * @cur: a pointer to an array of xmlChar
pcercuei 0:03b5121a232e 6263 * @encoding: a free form C string describing the HTML document encoding, or NULL
pcercuei 0:03b5121a232e 6264 *
pcercuei 0:03b5121a232e 6265 * parse an HTML in-memory document and build a tree.
pcercuei 0:03b5121a232e 6266 *
pcercuei 0:03b5121a232e 6267 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6268 */
pcercuei 0:03b5121a232e 6269
pcercuei 0:03b5121a232e 6270 htmlDocPtr
pcercuei 0:03b5121a232e 6271 htmlParseDoc(xmlChar *cur, const char *encoding) {
pcercuei 0:03b5121a232e 6272 return(htmlSAXParseDoc(cur, encoding, NULL, NULL));
pcercuei 0:03b5121a232e 6273 }
pcercuei 0:03b5121a232e 6274
pcercuei 0:03b5121a232e 6275
pcercuei 0:03b5121a232e 6276 /**
pcercuei 0:03b5121a232e 6277 * htmlCreateFileParserCtxt:
pcercuei 0:03b5121a232e 6278 * @filename: the filename
pcercuei 0:03b5121a232e 6279 * @encoding: a free form C string describing the HTML document encoding, or NULL
pcercuei 0:03b5121a232e 6280 *
pcercuei 0:03b5121a232e 6281 * Create a parser context for a file content.
pcercuei 0:03b5121a232e 6282 * Automatic support for ZLIB/Compress compressed document is provided
pcercuei 0:03b5121a232e 6283 * by default if found at compile-time.
pcercuei 0:03b5121a232e 6284 *
pcercuei 0:03b5121a232e 6285 * Returns the new parser context or NULL
pcercuei 0:03b5121a232e 6286 */
pcercuei 0:03b5121a232e 6287 htmlParserCtxtPtr
pcercuei 0:03b5121a232e 6288 htmlCreateFileParserCtxt(const char *filename, const char *encoding)
pcercuei 0:03b5121a232e 6289 {
pcercuei 0:03b5121a232e 6290 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 6291 htmlParserInputPtr inputStream;
pcercuei 0:03b5121a232e 6292 char *canonicFilename;
pcercuei 0:03b5121a232e 6293 /* htmlCharEncoding enc; */
pcercuei 0:03b5121a232e 6294 xmlChar *content, *content_line = (xmlChar *) "charset=";
pcercuei 0:03b5121a232e 6295
pcercuei 0:03b5121a232e 6296 if (filename == NULL)
pcercuei 0:03b5121a232e 6297 return(NULL);
pcercuei 0:03b5121a232e 6298
pcercuei 0:03b5121a232e 6299 ctxt = htmlNewParserCtxt();
pcercuei 0:03b5121a232e 6300 if (ctxt == NULL) {
pcercuei 0:03b5121a232e 6301 return(NULL);
pcercuei 0:03b5121a232e 6302 }
pcercuei 0:03b5121a232e 6303 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) filename);
pcercuei 0:03b5121a232e 6304 if (canonicFilename == NULL) {
pcercuei 0:03b5121a232e 6305 #ifdef LIBXML_SAX1_ENABLED
pcercuei 0:03b5121a232e 6306 if (xmlDefaultSAXHandler.error != NULL) {
pcercuei 0:03b5121a232e 6307 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
pcercuei 0:03b5121a232e 6308 }
pcercuei 0:03b5121a232e 6309 #endif
pcercuei 0:03b5121a232e 6310 xmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 6311 return(NULL);
pcercuei 0:03b5121a232e 6312 }
pcercuei 0:03b5121a232e 6313
pcercuei 0:03b5121a232e 6314 inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
pcercuei 0:03b5121a232e 6315 xmlFree(canonicFilename);
pcercuei 0:03b5121a232e 6316 if (inputStream == NULL) {
pcercuei 0:03b5121a232e 6317 xmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 6318 return(NULL);
pcercuei 0:03b5121a232e 6319 }
pcercuei 0:03b5121a232e 6320
pcercuei 0:03b5121a232e 6321 inputPush(ctxt, inputStream);
pcercuei 0:03b5121a232e 6322
pcercuei 0:03b5121a232e 6323 /* set encoding */
pcercuei 0:03b5121a232e 6324 if (encoding) {
pcercuei 0:03b5121a232e 6325 size_t l = strlen(encoding);
pcercuei 0:03b5121a232e 6326
pcercuei 0:03b5121a232e 6327 if (l < 1000) {
pcercuei 0:03b5121a232e 6328 content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1);
pcercuei 0:03b5121a232e 6329 if (content) {
pcercuei 0:03b5121a232e 6330 strcpy ((char *)content, (char *)content_line);
pcercuei 0:03b5121a232e 6331 strcat ((char *)content, (char *)encoding);
pcercuei 0:03b5121a232e 6332 htmlCheckEncoding (ctxt, content);
pcercuei 0:03b5121a232e 6333 xmlFree (content);
pcercuei 0:03b5121a232e 6334 }
pcercuei 0:03b5121a232e 6335 }
pcercuei 0:03b5121a232e 6336 }
pcercuei 0:03b5121a232e 6337
pcercuei 0:03b5121a232e 6338 return(ctxt);
pcercuei 0:03b5121a232e 6339 }
pcercuei 0:03b5121a232e 6340
pcercuei 0:03b5121a232e 6341 /**
pcercuei 0:03b5121a232e 6342 * htmlSAXParseFile:
pcercuei 0:03b5121a232e 6343 * @filename: the filename
pcercuei 0:03b5121a232e 6344 * @encoding: a free form C string describing the HTML document encoding, or NULL
pcercuei 0:03b5121a232e 6345 * @sax: the SAX handler block
pcercuei 0:03b5121a232e 6346 * @userData: if using SAX, this pointer will be provided on callbacks.
pcercuei 0:03b5121a232e 6347 *
pcercuei 0:03b5121a232e 6348 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress
pcercuei 0:03b5121a232e 6349 * compressed document is provided by default if found at compile-time.
pcercuei 0:03b5121a232e 6350 * It use the given SAX function block to handle the parsing callback.
pcercuei 0:03b5121a232e 6351 * If sax is NULL, fallback to the default DOM tree building routines.
pcercuei 0:03b5121a232e 6352 *
pcercuei 0:03b5121a232e 6353 * Returns the resulting document tree unless SAX is NULL or the document is
pcercuei 0:03b5121a232e 6354 * not well formed.
pcercuei 0:03b5121a232e 6355 */
pcercuei 0:03b5121a232e 6356
pcercuei 0:03b5121a232e 6357 htmlDocPtr
pcercuei 0:03b5121a232e 6358 htmlSAXParseFile(const char *filename, const char *encoding, htmlSAXHandlerPtr sax,
pcercuei 0:03b5121a232e 6359 void *userData) {
pcercuei 0:03b5121a232e 6360 htmlDocPtr ret;
pcercuei 0:03b5121a232e 6361 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 6362 htmlSAXHandlerPtr oldsax = NULL;
pcercuei 0:03b5121a232e 6363
pcercuei 0:03b5121a232e 6364 xmlInitParser();
pcercuei 0:03b5121a232e 6365
pcercuei 0:03b5121a232e 6366 ctxt = htmlCreateFileParserCtxt(filename, encoding);
pcercuei 0:03b5121a232e 6367 if (ctxt == NULL) return(NULL);
pcercuei 0:03b5121a232e 6368 if (sax != NULL) {
pcercuei 0:03b5121a232e 6369 oldsax = ctxt->sax;
pcercuei 0:03b5121a232e 6370 ctxt->sax = sax;
pcercuei 0:03b5121a232e 6371 ctxt->userData = userData;
pcercuei 0:03b5121a232e 6372 }
pcercuei 0:03b5121a232e 6373
pcercuei 0:03b5121a232e 6374 htmlParseDocument(ctxt);
pcercuei 0:03b5121a232e 6375
pcercuei 0:03b5121a232e 6376 ret = ctxt->myDoc;
pcercuei 0:03b5121a232e 6377 if (sax != NULL) {
pcercuei 0:03b5121a232e 6378 ctxt->sax = oldsax;
pcercuei 0:03b5121a232e 6379 ctxt->userData = NULL;
pcercuei 0:03b5121a232e 6380 }
pcercuei 0:03b5121a232e 6381 htmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 6382
pcercuei 0:03b5121a232e 6383 return(ret);
pcercuei 0:03b5121a232e 6384 }
pcercuei 0:03b5121a232e 6385
pcercuei 0:03b5121a232e 6386 /**
pcercuei 0:03b5121a232e 6387 * htmlParseFile:
pcercuei 0:03b5121a232e 6388 * @filename: the filename
pcercuei 0:03b5121a232e 6389 * @encoding: a free form C string describing the HTML document encoding, or NULL
pcercuei 0:03b5121a232e 6390 *
pcercuei 0:03b5121a232e 6391 * parse an HTML file and build a tree. Automatic support for ZLIB/Compress
pcercuei 0:03b5121a232e 6392 * compressed document is provided by default if found at compile-time.
pcercuei 0:03b5121a232e 6393 *
pcercuei 0:03b5121a232e 6394 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6395 */
pcercuei 0:03b5121a232e 6396
pcercuei 0:03b5121a232e 6397 htmlDocPtr
pcercuei 0:03b5121a232e 6398 htmlParseFile(const char *filename, const char *encoding) {
pcercuei 0:03b5121a232e 6399 return(htmlSAXParseFile(filename, encoding, NULL, NULL));
pcercuei 0:03b5121a232e 6400 }
pcercuei 0:03b5121a232e 6401
pcercuei 0:03b5121a232e 6402 /**
pcercuei 0:03b5121a232e 6403 * htmlHandleOmittedElem:
pcercuei 0:03b5121a232e 6404 * @val: int 0 or 1
pcercuei 0:03b5121a232e 6405 *
pcercuei 0:03b5121a232e 6406 * Set and return the previous value for handling HTML omitted tags.
pcercuei 0:03b5121a232e 6407 *
pcercuei 0:03b5121a232e 6408 * Returns the last value for 0 for no handling, 1 for auto insertion.
pcercuei 0:03b5121a232e 6409 */
pcercuei 0:03b5121a232e 6410
pcercuei 0:03b5121a232e 6411 int
pcercuei 0:03b5121a232e 6412 htmlHandleOmittedElem(int val) {
pcercuei 0:03b5121a232e 6413 int old = htmlOmittedDefaultValue;
pcercuei 0:03b5121a232e 6414
pcercuei 0:03b5121a232e 6415 htmlOmittedDefaultValue = val;
pcercuei 0:03b5121a232e 6416 return(old);
pcercuei 0:03b5121a232e 6417 }
pcercuei 0:03b5121a232e 6418
pcercuei 0:03b5121a232e 6419 /**
pcercuei 0:03b5121a232e 6420 * htmlElementAllowedHere:
pcercuei 0:03b5121a232e 6421 * @parent: HTML parent element
pcercuei 0:03b5121a232e 6422 * @elt: HTML element
pcercuei 0:03b5121a232e 6423 *
pcercuei 0:03b5121a232e 6424 * Checks whether an HTML element may be a direct child of a parent element.
pcercuei 0:03b5121a232e 6425 * Note - doesn't check for deprecated elements
pcercuei 0:03b5121a232e 6426 *
pcercuei 0:03b5121a232e 6427 * Returns 1 if allowed; 0 otherwise.
pcercuei 0:03b5121a232e 6428 */
pcercuei 0:03b5121a232e 6429 int
pcercuei 0:03b5121a232e 6430 htmlElementAllowedHere(const htmlElemDesc* parent, const xmlChar* elt) {
pcercuei 0:03b5121a232e 6431 const char** p ;
pcercuei 0:03b5121a232e 6432
pcercuei 0:03b5121a232e 6433 if ( ! elt || ! parent || ! parent->subelts )
pcercuei 0:03b5121a232e 6434 return 0 ;
pcercuei 0:03b5121a232e 6435
pcercuei 0:03b5121a232e 6436 for ( p = parent->subelts; *p; ++p )
pcercuei 0:03b5121a232e 6437 if ( !xmlStrcmp((const xmlChar *)*p, elt) )
pcercuei 0:03b5121a232e 6438 return 1 ;
pcercuei 0:03b5121a232e 6439
pcercuei 0:03b5121a232e 6440 return 0 ;
pcercuei 0:03b5121a232e 6441 }
pcercuei 0:03b5121a232e 6442 /**
pcercuei 0:03b5121a232e 6443 * htmlElementStatusHere:
pcercuei 0:03b5121a232e 6444 * @parent: HTML parent element
pcercuei 0:03b5121a232e 6445 * @elt: HTML element
pcercuei 0:03b5121a232e 6446 *
pcercuei 0:03b5121a232e 6447 * Checks whether an HTML element may be a direct child of a parent element.
pcercuei 0:03b5121a232e 6448 * and if so whether it is valid or deprecated.
pcercuei 0:03b5121a232e 6449 *
pcercuei 0:03b5121a232e 6450 * Returns one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID
pcercuei 0:03b5121a232e 6451 */
pcercuei 0:03b5121a232e 6452 htmlStatus
pcercuei 0:03b5121a232e 6453 htmlElementStatusHere(const htmlElemDesc* parent, const htmlElemDesc* elt) {
pcercuei 0:03b5121a232e 6454 if ( ! parent || ! elt )
pcercuei 0:03b5121a232e 6455 return HTML_INVALID ;
pcercuei 0:03b5121a232e 6456 if ( ! htmlElementAllowedHere(parent, (const xmlChar*) elt->name ) )
pcercuei 0:03b5121a232e 6457 return HTML_INVALID ;
pcercuei 0:03b5121a232e 6458
pcercuei 0:03b5121a232e 6459 return ( elt->dtd == 0 ) ? HTML_VALID : HTML_DEPRECATED ;
pcercuei 0:03b5121a232e 6460 }
pcercuei 0:03b5121a232e 6461 /**
pcercuei 0:03b5121a232e 6462 * htmlAttrAllowed:
pcercuei 0:03b5121a232e 6463 * @elt: HTML element
pcercuei 0:03b5121a232e 6464 * @attr: HTML attribute
pcercuei 0:03b5121a232e 6465 * @legacy: whether to allow deprecated attributes
pcercuei 0:03b5121a232e 6466 *
pcercuei 0:03b5121a232e 6467 * Checks whether an attribute is valid for an element
pcercuei 0:03b5121a232e 6468 * Has full knowledge of Required and Deprecated attributes
pcercuei 0:03b5121a232e 6469 *
pcercuei 0:03b5121a232e 6470 * Returns one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID
pcercuei 0:03b5121a232e 6471 */
pcercuei 0:03b5121a232e 6472 htmlStatus
pcercuei 0:03b5121a232e 6473 htmlAttrAllowed(const htmlElemDesc* elt, const xmlChar* attr, int legacy) {
pcercuei 0:03b5121a232e 6474 const char** p ;
pcercuei 0:03b5121a232e 6475
pcercuei 0:03b5121a232e 6476 if ( !elt || ! attr )
pcercuei 0:03b5121a232e 6477 return HTML_INVALID ;
pcercuei 0:03b5121a232e 6478
pcercuei 0:03b5121a232e 6479 if ( elt->attrs_req )
pcercuei 0:03b5121a232e 6480 for ( p = elt->attrs_req; *p; ++p)
pcercuei 0:03b5121a232e 6481 if ( !xmlStrcmp((const xmlChar*)*p, attr) )
pcercuei 0:03b5121a232e 6482 return HTML_REQUIRED ;
pcercuei 0:03b5121a232e 6483
pcercuei 0:03b5121a232e 6484 if ( elt->attrs_opt )
pcercuei 0:03b5121a232e 6485 for ( p = elt->attrs_opt; *p; ++p)
pcercuei 0:03b5121a232e 6486 if ( !xmlStrcmp((const xmlChar*)*p, attr) )
pcercuei 0:03b5121a232e 6487 return HTML_VALID ;
pcercuei 0:03b5121a232e 6488
pcercuei 0:03b5121a232e 6489 if ( legacy && elt->attrs_depr )
pcercuei 0:03b5121a232e 6490 for ( p = elt->attrs_depr; *p; ++p)
pcercuei 0:03b5121a232e 6491 if ( !xmlStrcmp((const xmlChar*)*p, attr) )
pcercuei 0:03b5121a232e 6492 return HTML_DEPRECATED ;
pcercuei 0:03b5121a232e 6493
pcercuei 0:03b5121a232e 6494 return HTML_INVALID ;
pcercuei 0:03b5121a232e 6495 }
pcercuei 0:03b5121a232e 6496 /**
pcercuei 0:03b5121a232e 6497 * htmlNodeStatus:
pcercuei 0:03b5121a232e 6498 * @node: an htmlNodePtr in a tree
pcercuei 0:03b5121a232e 6499 * @legacy: whether to allow deprecated elements (YES is faster here
pcercuei 0:03b5121a232e 6500 * for Element nodes)
pcercuei 0:03b5121a232e 6501 *
pcercuei 0:03b5121a232e 6502 * Checks whether the tree node is valid. Experimental (the author
pcercuei 0:03b5121a232e 6503 * only uses the HTML enhancements in a SAX parser)
pcercuei 0:03b5121a232e 6504 *
pcercuei 0:03b5121a232e 6505 * Return: for Element nodes, a return from htmlElementAllowedHere (if
pcercuei 0:03b5121a232e 6506 * legacy allowed) or htmlElementStatusHere (otherwise).
pcercuei 0:03b5121a232e 6507 * for Attribute nodes, a return from htmlAttrAllowed
pcercuei 0:03b5121a232e 6508 * for other nodes, HTML_NA (no checks performed)
pcercuei 0:03b5121a232e 6509 */
pcercuei 0:03b5121a232e 6510 htmlStatus
pcercuei 0:03b5121a232e 6511 htmlNodeStatus(const htmlNodePtr node, int legacy) {
pcercuei 0:03b5121a232e 6512 if ( ! node )
pcercuei 0:03b5121a232e 6513 return HTML_INVALID ;
pcercuei 0:03b5121a232e 6514
pcercuei 0:03b5121a232e 6515 switch ( node->type ) {
pcercuei 0:03b5121a232e 6516 case XML_ELEMENT_NODE:
pcercuei 0:03b5121a232e 6517 return legacy
pcercuei 0:03b5121a232e 6518 ? ( htmlElementAllowedHere (
pcercuei 0:03b5121a232e 6519 htmlTagLookup(node->parent->name) , node->name
pcercuei 0:03b5121a232e 6520 ) ? HTML_VALID : HTML_INVALID )
pcercuei 0:03b5121a232e 6521 : htmlElementStatusHere(
pcercuei 0:03b5121a232e 6522 htmlTagLookup(node->parent->name) ,
pcercuei 0:03b5121a232e 6523 htmlTagLookup(node->name) )
pcercuei 0:03b5121a232e 6524 ;
pcercuei 0:03b5121a232e 6525 case XML_ATTRIBUTE_NODE:
pcercuei 0:03b5121a232e 6526 return htmlAttrAllowed(
pcercuei 0:03b5121a232e 6527 htmlTagLookup(node->parent->name) , node->name, legacy) ;
pcercuei 0:03b5121a232e 6528 default: return HTML_NA ;
pcercuei 0:03b5121a232e 6529 }
pcercuei 0:03b5121a232e 6530 }
pcercuei 0:03b5121a232e 6531 /************************************************************************
pcercuei 0:03b5121a232e 6532 * *
pcercuei 0:03b5121a232e 6533 * New set (2.6.0) of simpler and more flexible APIs *
pcercuei 0:03b5121a232e 6534 * *
pcercuei 0:03b5121a232e 6535 ************************************************************************/
pcercuei 0:03b5121a232e 6536 /**
pcercuei 0:03b5121a232e 6537 * DICT_FREE:
pcercuei 0:03b5121a232e 6538 * @str: a string
pcercuei 0:03b5121a232e 6539 *
pcercuei 0:03b5121a232e 6540 * Free a string if it is not owned by the "dict" dictionnary in the
pcercuei 0:03b5121a232e 6541 * current scope
pcercuei 0:03b5121a232e 6542 */
pcercuei 0:03b5121a232e 6543 #define DICT_FREE(str) \
pcercuei 0:03b5121a232e 6544 if ((str) && ((!dict) || \
pcercuei 0:03b5121a232e 6545 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
pcercuei 0:03b5121a232e 6546 xmlFree((char *)(str));
pcercuei 0:03b5121a232e 6547
pcercuei 0:03b5121a232e 6548 /**
pcercuei 0:03b5121a232e 6549 * htmlCtxtReset:
pcercuei 0:03b5121a232e 6550 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 6551 *
pcercuei 0:03b5121a232e 6552 * Reset a parser context
pcercuei 0:03b5121a232e 6553 */
pcercuei 0:03b5121a232e 6554 void
pcercuei 0:03b5121a232e 6555 htmlCtxtReset(htmlParserCtxtPtr ctxt)
pcercuei 0:03b5121a232e 6556 {
pcercuei 0:03b5121a232e 6557 xmlParserInputPtr input;
pcercuei 0:03b5121a232e 6558 xmlDictPtr dict;
pcercuei 0:03b5121a232e 6559
pcercuei 0:03b5121a232e 6560 if (ctxt == NULL)
pcercuei 0:03b5121a232e 6561 return;
pcercuei 0:03b5121a232e 6562
pcercuei 0:03b5121a232e 6563 xmlInitParser();
pcercuei 0:03b5121a232e 6564 dict = ctxt->dict;
pcercuei 0:03b5121a232e 6565
pcercuei 0:03b5121a232e 6566 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
pcercuei 0:03b5121a232e 6567 xmlFreeInputStream(input);
pcercuei 0:03b5121a232e 6568 }
pcercuei 0:03b5121a232e 6569 ctxt->inputNr = 0;
pcercuei 0:03b5121a232e 6570 ctxt->input = NULL;
pcercuei 0:03b5121a232e 6571
pcercuei 0:03b5121a232e 6572 ctxt->spaceNr = 0;
pcercuei 0:03b5121a232e 6573 if (ctxt->spaceTab != NULL) {
pcercuei 0:03b5121a232e 6574 ctxt->spaceTab[0] = -1;
pcercuei 0:03b5121a232e 6575 ctxt->space = &ctxt->spaceTab[0];
pcercuei 0:03b5121a232e 6576 } else {
pcercuei 0:03b5121a232e 6577 ctxt->space = NULL;
pcercuei 0:03b5121a232e 6578 }
pcercuei 0:03b5121a232e 6579
pcercuei 0:03b5121a232e 6580
pcercuei 0:03b5121a232e 6581 ctxt->nodeNr = 0;
pcercuei 0:03b5121a232e 6582 ctxt->node = NULL;
pcercuei 0:03b5121a232e 6583
pcercuei 0:03b5121a232e 6584 ctxt->nameNr = 0;
pcercuei 0:03b5121a232e 6585 ctxt->name = NULL;
pcercuei 0:03b5121a232e 6586
pcercuei 0:03b5121a232e 6587 DICT_FREE(ctxt->version);
pcercuei 0:03b5121a232e 6588 ctxt->version = NULL;
pcercuei 0:03b5121a232e 6589 DICT_FREE(ctxt->encoding);
pcercuei 0:03b5121a232e 6590 ctxt->encoding = NULL;
pcercuei 0:03b5121a232e 6591 DICT_FREE(ctxt->directory);
pcercuei 0:03b5121a232e 6592 ctxt->directory = NULL;
pcercuei 0:03b5121a232e 6593 DICT_FREE(ctxt->extSubURI);
pcercuei 0:03b5121a232e 6594 ctxt->extSubURI = NULL;
pcercuei 0:03b5121a232e 6595 DICT_FREE(ctxt->extSubSystem);
pcercuei 0:03b5121a232e 6596 ctxt->extSubSystem = NULL;
pcercuei 0:03b5121a232e 6597 if (ctxt->myDoc != NULL)
pcercuei 0:03b5121a232e 6598 xmlFreeDoc(ctxt->myDoc);
pcercuei 0:03b5121a232e 6599 ctxt->myDoc = NULL;
pcercuei 0:03b5121a232e 6600
pcercuei 0:03b5121a232e 6601 ctxt->standalone = -1;
pcercuei 0:03b5121a232e 6602 ctxt->hasExternalSubset = 0;
pcercuei 0:03b5121a232e 6603 ctxt->hasPErefs = 0;
pcercuei 0:03b5121a232e 6604 ctxt->html = 1;
pcercuei 0:03b5121a232e 6605 ctxt->external = 0;
pcercuei 0:03b5121a232e 6606 ctxt->instate = XML_PARSER_START;
pcercuei 0:03b5121a232e 6607 ctxt->token = 0;
pcercuei 0:03b5121a232e 6608
pcercuei 0:03b5121a232e 6609 ctxt->wellFormed = 1;
pcercuei 0:03b5121a232e 6610 ctxt->nsWellFormed = 1;
pcercuei 0:03b5121a232e 6611 ctxt->disableSAX = 0;
pcercuei 0:03b5121a232e 6612 ctxt->valid = 1;
pcercuei 0:03b5121a232e 6613 ctxt->vctxt.userData = ctxt;
pcercuei 0:03b5121a232e 6614 ctxt->vctxt.error = xmlParserValidityError;
pcercuei 0:03b5121a232e 6615 ctxt->vctxt.warning = xmlParserValidityWarning;
pcercuei 0:03b5121a232e 6616 ctxt->record_info = 0;
pcercuei 0:03b5121a232e 6617 ctxt->nbChars = 0;
pcercuei 0:03b5121a232e 6618 ctxt->checkIndex = 0;
pcercuei 0:03b5121a232e 6619 ctxt->inSubset = 0;
pcercuei 0:03b5121a232e 6620 ctxt->errNo = XML_ERR_OK;
pcercuei 0:03b5121a232e 6621 ctxt->depth = 0;
pcercuei 0:03b5121a232e 6622 ctxt->charset = XML_CHAR_ENCODING_NONE;
pcercuei 0:03b5121a232e 6623 ctxt->catalogs = NULL;
pcercuei 0:03b5121a232e 6624 xmlInitNodeInfoSeq(&ctxt->node_seq);
pcercuei 0:03b5121a232e 6625
pcercuei 0:03b5121a232e 6626 if (ctxt->attsDefault != NULL) {
pcercuei 0:03b5121a232e 6627 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
pcercuei 0:03b5121a232e 6628 ctxt->attsDefault = NULL;
pcercuei 0:03b5121a232e 6629 }
pcercuei 0:03b5121a232e 6630 if (ctxt->attsSpecial != NULL) {
pcercuei 0:03b5121a232e 6631 xmlHashFree(ctxt->attsSpecial, NULL);
pcercuei 0:03b5121a232e 6632 ctxt->attsSpecial = NULL;
pcercuei 0:03b5121a232e 6633 }
pcercuei 0:03b5121a232e 6634 }
pcercuei 0:03b5121a232e 6635
pcercuei 0:03b5121a232e 6636 /**
pcercuei 0:03b5121a232e 6637 * htmlCtxtUseOptions:
pcercuei 0:03b5121a232e 6638 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 6639 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6640 *
pcercuei 0:03b5121a232e 6641 * Applies the options to the parser context
pcercuei 0:03b5121a232e 6642 *
pcercuei 0:03b5121a232e 6643 * Returns 0 in case of success, the set of unknown or unimplemented options
pcercuei 0:03b5121a232e 6644 * in case of error.
pcercuei 0:03b5121a232e 6645 */
pcercuei 0:03b5121a232e 6646 int
pcercuei 0:03b5121a232e 6647 htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
pcercuei 0:03b5121a232e 6648 {
pcercuei 0:03b5121a232e 6649 if (ctxt == NULL)
pcercuei 0:03b5121a232e 6650 return(-1);
pcercuei 0:03b5121a232e 6651
pcercuei 0:03b5121a232e 6652 if (options & HTML_PARSE_NOWARNING) {
pcercuei 0:03b5121a232e 6653 ctxt->sax->warning = NULL;
pcercuei 0:03b5121a232e 6654 ctxt->vctxt.warning = NULL;
pcercuei 0:03b5121a232e 6655 options -= XML_PARSE_NOWARNING;
pcercuei 0:03b5121a232e 6656 ctxt->options |= XML_PARSE_NOWARNING;
pcercuei 0:03b5121a232e 6657 }
pcercuei 0:03b5121a232e 6658 if (options & HTML_PARSE_NOERROR) {
pcercuei 0:03b5121a232e 6659 ctxt->sax->error = NULL;
pcercuei 0:03b5121a232e 6660 ctxt->vctxt.error = NULL;
pcercuei 0:03b5121a232e 6661 ctxt->sax->fatalError = NULL;
pcercuei 0:03b5121a232e 6662 options -= XML_PARSE_NOERROR;
pcercuei 0:03b5121a232e 6663 ctxt->options |= XML_PARSE_NOERROR;
pcercuei 0:03b5121a232e 6664 }
pcercuei 0:03b5121a232e 6665 if (options & HTML_PARSE_PEDANTIC) {
pcercuei 0:03b5121a232e 6666 ctxt->pedantic = 1;
pcercuei 0:03b5121a232e 6667 options -= XML_PARSE_PEDANTIC;
pcercuei 0:03b5121a232e 6668 ctxt->options |= XML_PARSE_PEDANTIC;
pcercuei 0:03b5121a232e 6669 } else
pcercuei 0:03b5121a232e 6670 ctxt->pedantic = 0;
pcercuei 0:03b5121a232e 6671 if (options & XML_PARSE_NOBLANKS) {
pcercuei 0:03b5121a232e 6672 ctxt->keepBlanks = 0;
pcercuei 0:03b5121a232e 6673 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
pcercuei 0:03b5121a232e 6674 options -= XML_PARSE_NOBLANKS;
pcercuei 0:03b5121a232e 6675 ctxt->options |= XML_PARSE_NOBLANKS;
pcercuei 0:03b5121a232e 6676 } else
pcercuei 0:03b5121a232e 6677 ctxt->keepBlanks = 1;
pcercuei 0:03b5121a232e 6678 if (options & HTML_PARSE_RECOVER) {
pcercuei 0:03b5121a232e 6679 ctxt->recovery = 1;
pcercuei 0:03b5121a232e 6680 options -= HTML_PARSE_RECOVER;
pcercuei 0:03b5121a232e 6681 } else
pcercuei 0:03b5121a232e 6682 ctxt->recovery = 0;
pcercuei 0:03b5121a232e 6683 if (options & HTML_PARSE_COMPACT) {
pcercuei 0:03b5121a232e 6684 ctxt->options |= HTML_PARSE_COMPACT;
pcercuei 0:03b5121a232e 6685 options -= HTML_PARSE_COMPACT;
pcercuei 0:03b5121a232e 6686 }
pcercuei 0:03b5121a232e 6687 if (options & XML_PARSE_HUGE) {
pcercuei 0:03b5121a232e 6688 ctxt->options |= XML_PARSE_HUGE;
pcercuei 0:03b5121a232e 6689 options -= XML_PARSE_HUGE;
pcercuei 0:03b5121a232e 6690 }
pcercuei 0:03b5121a232e 6691 if (options & HTML_PARSE_NODEFDTD) {
pcercuei 0:03b5121a232e 6692 ctxt->options |= HTML_PARSE_NODEFDTD;
pcercuei 0:03b5121a232e 6693 options -= HTML_PARSE_NODEFDTD;
pcercuei 0:03b5121a232e 6694 }
pcercuei 0:03b5121a232e 6695 if (options & HTML_PARSE_IGNORE_ENC) {
pcercuei 0:03b5121a232e 6696 ctxt->options |= HTML_PARSE_IGNORE_ENC;
pcercuei 0:03b5121a232e 6697 options -= HTML_PARSE_IGNORE_ENC;
pcercuei 0:03b5121a232e 6698 }
pcercuei 0:03b5121a232e 6699 if (options & HTML_PARSE_NOIMPLIED) {
pcercuei 0:03b5121a232e 6700 ctxt->options |= HTML_PARSE_NOIMPLIED;
pcercuei 0:03b5121a232e 6701 options -= HTML_PARSE_NOIMPLIED;
pcercuei 0:03b5121a232e 6702 }
pcercuei 0:03b5121a232e 6703 ctxt->dictNames = 0;
pcercuei 0:03b5121a232e 6704 return (options);
pcercuei 0:03b5121a232e 6705 }
pcercuei 0:03b5121a232e 6706
pcercuei 0:03b5121a232e 6707 /**
pcercuei 0:03b5121a232e 6708 * htmlDoRead:
pcercuei 0:03b5121a232e 6709 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 6710 * @URL: the base URL to use for the document
pcercuei 0:03b5121a232e 6711 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 6712 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6713 * @reuse: keep the context for reuse
pcercuei 0:03b5121a232e 6714 *
pcercuei 0:03b5121a232e 6715 * Common front-end for the htmlRead functions
pcercuei 0:03b5121a232e 6716 *
pcercuei 0:03b5121a232e 6717 * Returns the resulting document tree or NULL
pcercuei 0:03b5121a232e 6718 */
pcercuei 0:03b5121a232e 6719 static htmlDocPtr
pcercuei 0:03b5121a232e 6720 htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
pcercuei 0:03b5121a232e 6721 int options, int reuse)
pcercuei 0:03b5121a232e 6722 {
pcercuei 0:03b5121a232e 6723 htmlDocPtr ret;
pcercuei 0:03b5121a232e 6724
pcercuei 0:03b5121a232e 6725 htmlCtxtUseOptions(ctxt, options);
pcercuei 0:03b5121a232e 6726 ctxt->html = 1;
pcercuei 0:03b5121a232e 6727 if (encoding != NULL) {
pcercuei 0:03b5121a232e 6728 xmlCharEncodingHandlerPtr hdlr;
pcercuei 0:03b5121a232e 6729
pcercuei 0:03b5121a232e 6730 hdlr = xmlFindCharEncodingHandler(encoding);
pcercuei 0:03b5121a232e 6731 if (hdlr != NULL) {
pcercuei 0:03b5121a232e 6732 xmlSwitchToEncoding(ctxt, hdlr);
pcercuei 0:03b5121a232e 6733 if (ctxt->input->encoding != NULL)
pcercuei 0:03b5121a232e 6734 xmlFree((xmlChar *) ctxt->input->encoding);
pcercuei 0:03b5121a232e 6735 ctxt->input->encoding = xmlStrdup((xmlChar *)encoding);
pcercuei 0:03b5121a232e 6736 }
pcercuei 0:03b5121a232e 6737 }
pcercuei 0:03b5121a232e 6738 if ((URL != NULL) && (ctxt->input != NULL) &&
pcercuei 0:03b5121a232e 6739 (ctxt->input->filename == NULL))
pcercuei 0:03b5121a232e 6740 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
pcercuei 0:03b5121a232e 6741 htmlParseDocument(ctxt);
pcercuei 0:03b5121a232e 6742 ret = ctxt->myDoc;
pcercuei 0:03b5121a232e 6743 ctxt->myDoc = NULL;
pcercuei 0:03b5121a232e 6744 if (!reuse) {
pcercuei 0:03b5121a232e 6745 if ((ctxt->dictNames) &&
pcercuei 0:03b5121a232e 6746 (ret != NULL) &&
pcercuei 0:03b5121a232e 6747 (ret->dict == ctxt->dict))
pcercuei 0:03b5121a232e 6748 ctxt->dict = NULL;
pcercuei 0:03b5121a232e 6749 xmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 6750 }
pcercuei 0:03b5121a232e 6751 return (ret);
pcercuei 0:03b5121a232e 6752 }
pcercuei 0:03b5121a232e 6753
pcercuei 0:03b5121a232e 6754 /**
pcercuei 0:03b5121a232e 6755 * htmlReadDoc:
pcercuei 0:03b5121a232e 6756 * @cur: a pointer to a zero terminated string
pcercuei 0:03b5121a232e 6757 * @URL: the base URL to use for the document
pcercuei 0:03b5121a232e 6758 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 6759 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6760 *
pcercuei 0:03b5121a232e 6761 * parse an XML in-memory document and build a tree.
pcercuei 0:03b5121a232e 6762 *
pcercuei 0:03b5121a232e 6763 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6764 */
pcercuei 0:03b5121a232e 6765 htmlDocPtr
pcercuei 0:03b5121a232e 6766 htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
pcercuei 0:03b5121a232e 6767 {
pcercuei 0:03b5121a232e 6768 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 6769
pcercuei 0:03b5121a232e 6770 if (cur == NULL)
pcercuei 0:03b5121a232e 6771 return (NULL);
pcercuei 0:03b5121a232e 6772
pcercuei 0:03b5121a232e 6773 xmlInitParser();
pcercuei 0:03b5121a232e 6774 ctxt = htmlCreateDocParserCtxt(cur, NULL);
pcercuei 0:03b5121a232e 6775 if (ctxt == NULL)
pcercuei 0:03b5121a232e 6776 return (NULL);
pcercuei 0:03b5121a232e 6777 return (htmlDoRead(ctxt, URL, encoding, options, 0));
pcercuei 0:03b5121a232e 6778 }
pcercuei 0:03b5121a232e 6779
pcercuei 0:03b5121a232e 6780 /**
pcercuei 0:03b5121a232e 6781 * htmlReadFile:
pcercuei 0:03b5121a232e 6782 * @filename: a file or URL
pcercuei 0:03b5121a232e 6783 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 6784 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6785 *
pcercuei 0:03b5121a232e 6786 * parse an XML file from the filesystem or the network.
pcercuei 0:03b5121a232e 6787 *
pcercuei 0:03b5121a232e 6788 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6789 */
pcercuei 0:03b5121a232e 6790 htmlDocPtr
pcercuei 0:03b5121a232e 6791 htmlReadFile(const char *filename, const char *encoding, int options)
pcercuei 0:03b5121a232e 6792 {
pcercuei 0:03b5121a232e 6793 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 6794
pcercuei 0:03b5121a232e 6795 xmlInitParser();
pcercuei 0:03b5121a232e 6796 ctxt = htmlCreateFileParserCtxt(filename, encoding);
pcercuei 0:03b5121a232e 6797 if (ctxt == NULL)
pcercuei 0:03b5121a232e 6798 return (NULL);
pcercuei 0:03b5121a232e 6799 return (htmlDoRead(ctxt, NULL, NULL, options, 0));
pcercuei 0:03b5121a232e 6800 }
pcercuei 0:03b5121a232e 6801
pcercuei 0:03b5121a232e 6802 /**
pcercuei 0:03b5121a232e 6803 * htmlReadMemory:
pcercuei 0:03b5121a232e 6804 * @buffer: a pointer to a char array
pcercuei 0:03b5121a232e 6805 * @size: the size of the array
pcercuei 0:03b5121a232e 6806 * @URL: the base URL to use for the document
pcercuei 0:03b5121a232e 6807 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 6808 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6809 *
pcercuei 0:03b5121a232e 6810 * parse an XML in-memory document and build a tree.
pcercuei 0:03b5121a232e 6811 *
pcercuei 0:03b5121a232e 6812 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6813 */
pcercuei 0:03b5121a232e 6814 htmlDocPtr
pcercuei 0:03b5121a232e 6815 htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
pcercuei 0:03b5121a232e 6816 {
pcercuei 0:03b5121a232e 6817 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 6818
pcercuei 0:03b5121a232e 6819 xmlInitParser();
pcercuei 0:03b5121a232e 6820 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
pcercuei 0:03b5121a232e 6821 if (ctxt == NULL)
pcercuei 0:03b5121a232e 6822 return (NULL);
pcercuei 0:03b5121a232e 6823 htmlDefaultSAXHandlerInit();
pcercuei 0:03b5121a232e 6824 if (ctxt->sax != NULL)
pcercuei 0:03b5121a232e 6825 memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
pcercuei 0:03b5121a232e 6826 return (htmlDoRead(ctxt, URL, encoding, options, 0));
pcercuei 0:03b5121a232e 6827 }
pcercuei 0:03b5121a232e 6828
pcercuei 0:03b5121a232e 6829 /**
pcercuei 0:03b5121a232e 6830 * htmlReadFd:
pcercuei 0:03b5121a232e 6831 * @fd: an open file descriptor
pcercuei 0:03b5121a232e 6832 * @URL: the base URL to use for the document
pcercuei 0:03b5121a232e 6833 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 6834 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6835 *
pcercuei 0:03b5121a232e 6836 * parse an XML from a file descriptor and build a tree.
pcercuei 0:03b5121a232e 6837 *
pcercuei 0:03b5121a232e 6838 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6839 */
pcercuei 0:03b5121a232e 6840 htmlDocPtr
pcercuei 0:03b5121a232e 6841 htmlReadFd(int fd, const char *URL, const char *encoding, int options)
pcercuei 0:03b5121a232e 6842 {
pcercuei 0:03b5121a232e 6843 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 6844 xmlParserInputBufferPtr input;
pcercuei 0:03b5121a232e 6845 xmlParserInputPtr stream;
pcercuei 0:03b5121a232e 6846
pcercuei 0:03b5121a232e 6847 if (fd < 0)
pcercuei 0:03b5121a232e 6848 return (NULL);
pcercuei 0:03b5121a232e 6849 xmlInitParser();
pcercuei 0:03b5121a232e 6850
pcercuei 0:03b5121a232e 6851 xmlInitParser();
pcercuei 0:03b5121a232e 6852 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 6853 if (input == NULL)
pcercuei 0:03b5121a232e 6854 return (NULL);
pcercuei 0:03b5121a232e 6855 ctxt = xmlNewParserCtxt();
pcercuei 0:03b5121a232e 6856 if (ctxt == NULL) {
pcercuei 0:03b5121a232e 6857 xmlFreeParserInputBuffer(input);
pcercuei 0:03b5121a232e 6858 return (NULL);
pcercuei 0:03b5121a232e 6859 }
pcercuei 0:03b5121a232e 6860 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 6861 if (stream == NULL) {
pcercuei 0:03b5121a232e 6862 xmlFreeParserInputBuffer(input);
pcercuei 0:03b5121a232e 6863 xmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 6864 return (NULL);
pcercuei 0:03b5121a232e 6865 }
pcercuei 0:03b5121a232e 6866 inputPush(ctxt, stream);
pcercuei 0:03b5121a232e 6867 return (htmlDoRead(ctxt, URL, encoding, options, 0));
pcercuei 0:03b5121a232e 6868 }
pcercuei 0:03b5121a232e 6869
pcercuei 0:03b5121a232e 6870 /**
pcercuei 0:03b5121a232e 6871 * htmlReadIO:
pcercuei 0:03b5121a232e 6872 * @ioread: an I/O read function
pcercuei 0:03b5121a232e 6873 * @ioclose: an I/O close function
pcercuei 0:03b5121a232e 6874 * @ioctx: an I/O handler
pcercuei 0:03b5121a232e 6875 * @URL: the base URL to use for the document
pcercuei 0:03b5121a232e 6876 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 6877 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6878 *
pcercuei 0:03b5121a232e 6879 * parse an HTML document from I/O functions and source and build a tree.
pcercuei 0:03b5121a232e 6880 *
pcercuei 0:03b5121a232e 6881 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6882 */
pcercuei 0:03b5121a232e 6883 htmlDocPtr
pcercuei 0:03b5121a232e 6884 htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
pcercuei 0:03b5121a232e 6885 void *ioctx, const char *URL, const char *encoding, int options)
pcercuei 0:03b5121a232e 6886 {
pcercuei 0:03b5121a232e 6887 htmlParserCtxtPtr ctxt;
pcercuei 0:03b5121a232e 6888 xmlParserInputBufferPtr input;
pcercuei 0:03b5121a232e 6889 xmlParserInputPtr stream;
pcercuei 0:03b5121a232e 6890
pcercuei 0:03b5121a232e 6891 if (ioread == NULL)
pcercuei 0:03b5121a232e 6892 return (NULL);
pcercuei 0:03b5121a232e 6893 xmlInitParser();
pcercuei 0:03b5121a232e 6894
pcercuei 0:03b5121a232e 6895 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
pcercuei 0:03b5121a232e 6896 XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 6897 if (input == NULL) {
pcercuei 0:03b5121a232e 6898 if (ioclose != NULL)
pcercuei 0:03b5121a232e 6899 ioclose(ioctx);
pcercuei 0:03b5121a232e 6900 return (NULL);
pcercuei 0:03b5121a232e 6901 }
pcercuei 0:03b5121a232e 6902 ctxt = htmlNewParserCtxt();
pcercuei 0:03b5121a232e 6903 if (ctxt == NULL) {
pcercuei 0:03b5121a232e 6904 xmlFreeParserInputBuffer(input);
pcercuei 0:03b5121a232e 6905 return (NULL);
pcercuei 0:03b5121a232e 6906 }
pcercuei 0:03b5121a232e 6907 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 6908 if (stream == NULL) {
pcercuei 0:03b5121a232e 6909 xmlFreeParserInputBuffer(input);
pcercuei 0:03b5121a232e 6910 xmlFreeParserCtxt(ctxt);
pcercuei 0:03b5121a232e 6911 return (NULL);
pcercuei 0:03b5121a232e 6912 }
pcercuei 0:03b5121a232e 6913 inputPush(ctxt, stream);
pcercuei 0:03b5121a232e 6914 return (htmlDoRead(ctxt, URL, encoding, options, 0));
pcercuei 0:03b5121a232e 6915 }
pcercuei 0:03b5121a232e 6916
pcercuei 0:03b5121a232e 6917 /**
pcercuei 0:03b5121a232e 6918 * htmlCtxtReadDoc:
pcercuei 0:03b5121a232e 6919 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 6920 * @cur: a pointer to a zero terminated string
pcercuei 0:03b5121a232e 6921 * @URL: the base URL to use for the document
pcercuei 0:03b5121a232e 6922 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 6923 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6924 *
pcercuei 0:03b5121a232e 6925 * parse an XML in-memory document and build a tree.
pcercuei 0:03b5121a232e 6926 * This reuses the existing @ctxt parser context
pcercuei 0:03b5121a232e 6927 *
pcercuei 0:03b5121a232e 6928 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6929 */
pcercuei 0:03b5121a232e 6930 htmlDocPtr
pcercuei 0:03b5121a232e 6931 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
pcercuei 0:03b5121a232e 6932 const char *URL, const char *encoding, int options)
pcercuei 0:03b5121a232e 6933 {
pcercuei 0:03b5121a232e 6934 xmlParserInputPtr stream;
pcercuei 0:03b5121a232e 6935
pcercuei 0:03b5121a232e 6936 if (cur == NULL)
pcercuei 0:03b5121a232e 6937 return (NULL);
pcercuei 0:03b5121a232e 6938 if (ctxt == NULL)
pcercuei 0:03b5121a232e 6939 return (NULL);
pcercuei 0:03b5121a232e 6940 xmlInitParser();
pcercuei 0:03b5121a232e 6941
pcercuei 0:03b5121a232e 6942 htmlCtxtReset(ctxt);
pcercuei 0:03b5121a232e 6943
pcercuei 0:03b5121a232e 6944 stream = xmlNewStringInputStream(ctxt, cur);
pcercuei 0:03b5121a232e 6945 if (stream == NULL) {
pcercuei 0:03b5121a232e 6946 return (NULL);
pcercuei 0:03b5121a232e 6947 }
pcercuei 0:03b5121a232e 6948 inputPush(ctxt, stream);
pcercuei 0:03b5121a232e 6949 return (htmlDoRead(ctxt, URL, encoding, options, 1));
pcercuei 0:03b5121a232e 6950 }
pcercuei 0:03b5121a232e 6951
pcercuei 0:03b5121a232e 6952 /**
pcercuei 0:03b5121a232e 6953 * htmlCtxtReadFile:
pcercuei 0:03b5121a232e 6954 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 6955 * @filename: a file or URL
pcercuei 0:03b5121a232e 6956 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 6957 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6958 *
pcercuei 0:03b5121a232e 6959 * parse an XML file from the filesystem or the network.
pcercuei 0:03b5121a232e 6960 * This reuses the existing @ctxt parser context
pcercuei 0:03b5121a232e 6961 *
pcercuei 0:03b5121a232e 6962 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6963 */
pcercuei 0:03b5121a232e 6964 htmlDocPtr
pcercuei 0:03b5121a232e 6965 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
pcercuei 0:03b5121a232e 6966 const char *encoding, int options)
pcercuei 0:03b5121a232e 6967 {
pcercuei 0:03b5121a232e 6968 xmlParserInputPtr stream;
pcercuei 0:03b5121a232e 6969
pcercuei 0:03b5121a232e 6970 if (filename == NULL)
pcercuei 0:03b5121a232e 6971 return (NULL);
pcercuei 0:03b5121a232e 6972 if (ctxt == NULL)
pcercuei 0:03b5121a232e 6973 return (NULL);
pcercuei 0:03b5121a232e 6974 xmlInitParser();
pcercuei 0:03b5121a232e 6975
pcercuei 0:03b5121a232e 6976 htmlCtxtReset(ctxt);
pcercuei 0:03b5121a232e 6977
pcercuei 0:03b5121a232e 6978 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
pcercuei 0:03b5121a232e 6979 if (stream == NULL) {
pcercuei 0:03b5121a232e 6980 return (NULL);
pcercuei 0:03b5121a232e 6981 }
pcercuei 0:03b5121a232e 6982 inputPush(ctxt, stream);
pcercuei 0:03b5121a232e 6983 return (htmlDoRead(ctxt, NULL, encoding, options, 1));
pcercuei 0:03b5121a232e 6984 }
pcercuei 0:03b5121a232e 6985
pcercuei 0:03b5121a232e 6986 /**
pcercuei 0:03b5121a232e 6987 * htmlCtxtReadMemory:
pcercuei 0:03b5121a232e 6988 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 6989 * @buffer: a pointer to a char array
pcercuei 0:03b5121a232e 6990 * @size: the size of the array
pcercuei 0:03b5121a232e 6991 * @URL: the base URL to use for the document
pcercuei 0:03b5121a232e 6992 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 6993 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 6994 *
pcercuei 0:03b5121a232e 6995 * parse an XML in-memory document and build a tree.
pcercuei 0:03b5121a232e 6996 * This reuses the existing @ctxt parser context
pcercuei 0:03b5121a232e 6997 *
pcercuei 0:03b5121a232e 6998 * Returns the resulting document tree
pcercuei 0:03b5121a232e 6999 */
pcercuei 0:03b5121a232e 7000 htmlDocPtr
pcercuei 0:03b5121a232e 7001 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
pcercuei 0:03b5121a232e 7002 const char *URL, const char *encoding, int options)
pcercuei 0:03b5121a232e 7003 {
pcercuei 0:03b5121a232e 7004 xmlParserInputBufferPtr input;
pcercuei 0:03b5121a232e 7005 xmlParserInputPtr stream;
pcercuei 0:03b5121a232e 7006
pcercuei 0:03b5121a232e 7007 if (ctxt == NULL)
pcercuei 0:03b5121a232e 7008 return (NULL);
pcercuei 0:03b5121a232e 7009 if (buffer == NULL)
pcercuei 0:03b5121a232e 7010 return (NULL);
pcercuei 0:03b5121a232e 7011 xmlInitParser();
pcercuei 0:03b5121a232e 7012
pcercuei 0:03b5121a232e 7013 htmlCtxtReset(ctxt);
pcercuei 0:03b5121a232e 7014
pcercuei 0:03b5121a232e 7015 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 7016 if (input == NULL) {
pcercuei 0:03b5121a232e 7017 return(NULL);
pcercuei 0:03b5121a232e 7018 }
pcercuei 0:03b5121a232e 7019
pcercuei 0:03b5121a232e 7020 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 7021 if (stream == NULL) {
pcercuei 0:03b5121a232e 7022 xmlFreeParserInputBuffer(input);
pcercuei 0:03b5121a232e 7023 return(NULL);
pcercuei 0:03b5121a232e 7024 }
pcercuei 0:03b5121a232e 7025
pcercuei 0:03b5121a232e 7026 inputPush(ctxt, stream);
pcercuei 0:03b5121a232e 7027 return (htmlDoRead(ctxt, URL, encoding, options, 1));
pcercuei 0:03b5121a232e 7028 }
pcercuei 0:03b5121a232e 7029
pcercuei 0:03b5121a232e 7030 /**
pcercuei 0:03b5121a232e 7031 * htmlCtxtReadFd:
pcercuei 0:03b5121a232e 7032 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 7033 * @fd: an open file descriptor
pcercuei 0:03b5121a232e 7034 * @URL: the base URL to use for the document
pcercuei 0:03b5121a232e 7035 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 7036 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 7037 *
pcercuei 0:03b5121a232e 7038 * parse an XML from a file descriptor and build a tree.
pcercuei 0:03b5121a232e 7039 * This reuses the existing @ctxt parser context
pcercuei 0:03b5121a232e 7040 *
pcercuei 0:03b5121a232e 7041 * Returns the resulting document tree
pcercuei 0:03b5121a232e 7042 */
pcercuei 0:03b5121a232e 7043 htmlDocPtr
pcercuei 0:03b5121a232e 7044 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
pcercuei 0:03b5121a232e 7045 const char *URL, const char *encoding, int options)
pcercuei 0:03b5121a232e 7046 {
pcercuei 0:03b5121a232e 7047 xmlParserInputBufferPtr input;
pcercuei 0:03b5121a232e 7048 xmlParserInputPtr stream;
pcercuei 0:03b5121a232e 7049
pcercuei 0:03b5121a232e 7050 if (fd < 0)
pcercuei 0:03b5121a232e 7051 return (NULL);
pcercuei 0:03b5121a232e 7052 if (ctxt == NULL)
pcercuei 0:03b5121a232e 7053 return (NULL);
pcercuei 0:03b5121a232e 7054 xmlInitParser();
pcercuei 0:03b5121a232e 7055
pcercuei 0:03b5121a232e 7056 htmlCtxtReset(ctxt);
pcercuei 0:03b5121a232e 7057
pcercuei 0:03b5121a232e 7058
pcercuei 0:03b5121a232e 7059 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 7060 if (input == NULL)
pcercuei 0:03b5121a232e 7061 return (NULL);
pcercuei 0:03b5121a232e 7062 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 7063 if (stream == NULL) {
pcercuei 0:03b5121a232e 7064 xmlFreeParserInputBuffer(input);
pcercuei 0:03b5121a232e 7065 return (NULL);
pcercuei 0:03b5121a232e 7066 }
pcercuei 0:03b5121a232e 7067 inputPush(ctxt, stream);
pcercuei 0:03b5121a232e 7068 return (htmlDoRead(ctxt, URL, encoding, options, 1));
pcercuei 0:03b5121a232e 7069 }
pcercuei 0:03b5121a232e 7070
pcercuei 0:03b5121a232e 7071 /**
pcercuei 0:03b5121a232e 7072 * htmlCtxtReadIO:
pcercuei 0:03b5121a232e 7073 * @ctxt: an HTML parser context
pcercuei 0:03b5121a232e 7074 * @ioread: an I/O read function
pcercuei 0:03b5121a232e 7075 * @ioclose: an I/O close function
pcercuei 0:03b5121a232e 7076 * @ioctx: an I/O handler
pcercuei 0:03b5121a232e 7077 * @URL: the base URL to use for the document
pcercuei 0:03b5121a232e 7078 * @encoding: the document encoding, or NULL
pcercuei 0:03b5121a232e 7079 * @options: a combination of htmlParserOption(s)
pcercuei 0:03b5121a232e 7080 *
pcercuei 0:03b5121a232e 7081 * parse an HTML document from I/O functions and source and build a tree.
pcercuei 0:03b5121a232e 7082 * This reuses the existing @ctxt parser context
pcercuei 0:03b5121a232e 7083 *
pcercuei 0:03b5121a232e 7084 * Returns the resulting document tree
pcercuei 0:03b5121a232e 7085 */
pcercuei 0:03b5121a232e 7086 htmlDocPtr
pcercuei 0:03b5121a232e 7087 htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
pcercuei 0:03b5121a232e 7088 xmlInputCloseCallback ioclose, void *ioctx,
pcercuei 0:03b5121a232e 7089 const char *URL,
pcercuei 0:03b5121a232e 7090 const char *encoding, int options)
pcercuei 0:03b5121a232e 7091 {
pcercuei 0:03b5121a232e 7092 xmlParserInputBufferPtr input;
pcercuei 0:03b5121a232e 7093 xmlParserInputPtr stream;
pcercuei 0:03b5121a232e 7094
pcercuei 0:03b5121a232e 7095 if (ioread == NULL)
pcercuei 0:03b5121a232e 7096 return (NULL);
pcercuei 0:03b5121a232e 7097 if (ctxt == NULL)
pcercuei 0:03b5121a232e 7098 return (NULL);
pcercuei 0:03b5121a232e 7099 xmlInitParser();
pcercuei 0:03b5121a232e 7100
pcercuei 0:03b5121a232e 7101 htmlCtxtReset(ctxt);
pcercuei 0:03b5121a232e 7102
pcercuei 0:03b5121a232e 7103 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
pcercuei 0:03b5121a232e 7104 XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 7105 if (input == NULL) {
pcercuei 0:03b5121a232e 7106 if (ioclose != NULL)
pcercuei 0:03b5121a232e 7107 ioclose(ioctx);
pcercuei 0:03b5121a232e 7108 return (NULL);
pcercuei 0:03b5121a232e 7109 }
pcercuei 0:03b5121a232e 7110 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
pcercuei 0:03b5121a232e 7111 if (stream == NULL) {
pcercuei 0:03b5121a232e 7112 xmlFreeParserInputBuffer(input);
pcercuei 0:03b5121a232e 7113 return (NULL);
pcercuei 0:03b5121a232e 7114 }
pcercuei 0:03b5121a232e 7115 inputPush(ctxt, stream);
pcercuei 0:03b5121a232e 7116 return (htmlDoRead(ctxt, URL, encoding, options, 1));
pcercuei 0:03b5121a232e 7117 }
pcercuei 0:03b5121a232e 7118
pcercuei 0:03b5121a232e 7119 #define bottom_HTMLparser
pcercuei 0:03b5121a232e 7120 #include "elfgcchack.h"
pcercuei 0:03b5121a232e 7121 #endif /* LIBXML_HTML_ENABLED */
pcercuei 0:03b5121a232e 7122