XML C parser available under the MIT license. http://xmlsoft.org/

Dependents:   libiio

Committer:
pcercuei
Date:
Thu Aug 25 10:07:34 2016 +0000
Revision:
1:26f20484cbdc
Parent:
0:03b5121a232e
Add config.h and dummy.c containing empty functions

Who changed what in which revision?

UserRevisionLine numberNew contents of line
pcercuei 0:03b5121a232e 1 /*
pcercuei 0:03b5121a232e 2 * HTMLtree.c : implementation of access function for an HTML tree.
pcercuei 0:03b5121a232e 3 *
pcercuei 0:03b5121a232e 4 * See Copyright for the status of this software.
pcercuei 0:03b5121a232e 5 *
pcercuei 0:03b5121a232e 6 * daniel@veillard.com
pcercuei 0:03b5121a232e 7 */
pcercuei 0:03b5121a232e 8
pcercuei 0:03b5121a232e 9
pcercuei 0:03b5121a232e 10 #define IN_LIBXML
pcercuei 0:03b5121a232e 11 #include "libxml.h"
pcercuei 0:03b5121a232e 12 #ifdef LIBXML_HTML_ENABLED
pcercuei 0:03b5121a232e 13
pcercuei 0:03b5121a232e 14 #include <string.h> /* for memset() only ! */
pcercuei 0:03b5121a232e 15
pcercuei 0:03b5121a232e 16 #ifdef HAVE_CTYPE_H
pcercuei 0:03b5121a232e 17 #include <ctype.h>
pcercuei 0:03b5121a232e 18 #endif
pcercuei 0:03b5121a232e 19 #ifdef HAVE_STDLIB_H
pcercuei 0:03b5121a232e 20 #include <stdlib.h>
pcercuei 0:03b5121a232e 21 #endif
pcercuei 0:03b5121a232e 22
pcercuei 0:03b5121a232e 23 #include <libxml/xmlmemory.h>
pcercuei 0:03b5121a232e 24 #include <libxml/HTMLparser.h>
pcercuei 0:03b5121a232e 25 #include <libxml/HTMLtree.h>
pcercuei 0:03b5121a232e 26 #include <libxml/entities.h>
pcercuei 0:03b5121a232e 27 #include <libxml/valid.h>
pcercuei 0:03b5121a232e 28 #include <libxml/xmlerror.h>
pcercuei 0:03b5121a232e 29 #include <libxml/parserInternals.h>
pcercuei 0:03b5121a232e 30 #include <libxml/globals.h>
pcercuei 0:03b5121a232e 31 #include <libxml/uri.h>
pcercuei 0:03b5121a232e 32
pcercuei 0:03b5121a232e 33 #include "buf.h"
pcercuei 0:03b5121a232e 34
pcercuei 0:03b5121a232e 35 /************************************************************************
pcercuei 0:03b5121a232e 36 * *
pcercuei 0:03b5121a232e 37 * Getting/Setting encoding meta tags *
pcercuei 0:03b5121a232e 38 * *
pcercuei 0:03b5121a232e 39 ************************************************************************/
pcercuei 0:03b5121a232e 40
pcercuei 0:03b5121a232e 41 /**
pcercuei 0:03b5121a232e 42 * htmlGetMetaEncoding:
pcercuei 0:03b5121a232e 43 * @doc: the document
pcercuei 0:03b5121a232e 44 *
pcercuei 0:03b5121a232e 45 * Encoding definition lookup in the Meta tags
pcercuei 0:03b5121a232e 46 *
pcercuei 0:03b5121a232e 47 * Returns the current encoding as flagged in the HTML source
pcercuei 0:03b5121a232e 48 */
pcercuei 0:03b5121a232e 49 const xmlChar *
pcercuei 0:03b5121a232e 50 htmlGetMetaEncoding(htmlDocPtr doc) {
pcercuei 0:03b5121a232e 51 htmlNodePtr cur;
pcercuei 0:03b5121a232e 52 const xmlChar *content;
pcercuei 0:03b5121a232e 53 const xmlChar *encoding;
pcercuei 0:03b5121a232e 54
pcercuei 0:03b5121a232e 55 if (doc == NULL)
pcercuei 0:03b5121a232e 56 return(NULL);
pcercuei 0:03b5121a232e 57 cur = doc->children;
pcercuei 0:03b5121a232e 58
pcercuei 0:03b5121a232e 59 /*
pcercuei 0:03b5121a232e 60 * Search the html
pcercuei 0:03b5121a232e 61 */
pcercuei 0:03b5121a232e 62 while (cur != NULL) {
pcercuei 0:03b5121a232e 63 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
pcercuei 0:03b5121a232e 64 if (xmlStrEqual(cur->name, BAD_CAST"html"))
pcercuei 0:03b5121a232e 65 break;
pcercuei 0:03b5121a232e 66 if (xmlStrEqual(cur->name, BAD_CAST"head"))
pcercuei 0:03b5121a232e 67 goto found_head;
pcercuei 0:03b5121a232e 68 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
pcercuei 0:03b5121a232e 69 goto found_meta;
pcercuei 0:03b5121a232e 70 }
pcercuei 0:03b5121a232e 71 cur = cur->next;
pcercuei 0:03b5121a232e 72 }
pcercuei 0:03b5121a232e 73 if (cur == NULL)
pcercuei 0:03b5121a232e 74 return(NULL);
pcercuei 0:03b5121a232e 75 cur = cur->children;
pcercuei 0:03b5121a232e 76
pcercuei 0:03b5121a232e 77 /*
pcercuei 0:03b5121a232e 78 * Search the head
pcercuei 0:03b5121a232e 79 */
pcercuei 0:03b5121a232e 80 while (cur != NULL) {
pcercuei 0:03b5121a232e 81 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
pcercuei 0:03b5121a232e 82 if (xmlStrEqual(cur->name, BAD_CAST"head"))
pcercuei 0:03b5121a232e 83 break;
pcercuei 0:03b5121a232e 84 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
pcercuei 0:03b5121a232e 85 goto found_meta;
pcercuei 0:03b5121a232e 86 }
pcercuei 0:03b5121a232e 87 cur = cur->next;
pcercuei 0:03b5121a232e 88 }
pcercuei 0:03b5121a232e 89 if (cur == NULL)
pcercuei 0:03b5121a232e 90 return(NULL);
pcercuei 0:03b5121a232e 91 found_head:
pcercuei 0:03b5121a232e 92 cur = cur->children;
pcercuei 0:03b5121a232e 93
pcercuei 0:03b5121a232e 94 /*
pcercuei 0:03b5121a232e 95 * Search the meta elements
pcercuei 0:03b5121a232e 96 */
pcercuei 0:03b5121a232e 97 found_meta:
pcercuei 0:03b5121a232e 98 while (cur != NULL) {
pcercuei 0:03b5121a232e 99 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
pcercuei 0:03b5121a232e 100 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
pcercuei 0:03b5121a232e 101 xmlAttrPtr attr = cur->properties;
pcercuei 0:03b5121a232e 102 int http;
pcercuei 0:03b5121a232e 103 const xmlChar *value;
pcercuei 0:03b5121a232e 104
pcercuei 0:03b5121a232e 105 content = NULL;
pcercuei 0:03b5121a232e 106 http = 0;
pcercuei 0:03b5121a232e 107 while (attr != NULL) {
pcercuei 0:03b5121a232e 108 if ((attr->children != NULL) &&
pcercuei 0:03b5121a232e 109 (attr->children->type == XML_TEXT_NODE) &&
pcercuei 0:03b5121a232e 110 (attr->children->next == NULL)) {
pcercuei 0:03b5121a232e 111 value = attr->children->content;
pcercuei 0:03b5121a232e 112 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
pcercuei 0:03b5121a232e 113 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
pcercuei 0:03b5121a232e 114 http = 1;
pcercuei 0:03b5121a232e 115 else if ((value != NULL)
pcercuei 0:03b5121a232e 116 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
pcercuei 0:03b5121a232e 117 content = value;
pcercuei 0:03b5121a232e 118 if ((http != 0) && (content != NULL))
pcercuei 0:03b5121a232e 119 goto found_content;
pcercuei 0:03b5121a232e 120 }
pcercuei 0:03b5121a232e 121 attr = attr->next;
pcercuei 0:03b5121a232e 122 }
pcercuei 0:03b5121a232e 123 }
pcercuei 0:03b5121a232e 124 }
pcercuei 0:03b5121a232e 125 cur = cur->next;
pcercuei 0:03b5121a232e 126 }
pcercuei 0:03b5121a232e 127 return(NULL);
pcercuei 0:03b5121a232e 128
pcercuei 0:03b5121a232e 129 found_content:
pcercuei 0:03b5121a232e 130 encoding = xmlStrstr(content, BAD_CAST"charset=");
pcercuei 0:03b5121a232e 131 if (encoding == NULL)
pcercuei 0:03b5121a232e 132 encoding = xmlStrstr(content, BAD_CAST"Charset=");
pcercuei 0:03b5121a232e 133 if (encoding == NULL)
pcercuei 0:03b5121a232e 134 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
pcercuei 0:03b5121a232e 135 if (encoding != NULL) {
pcercuei 0:03b5121a232e 136 encoding += 8;
pcercuei 0:03b5121a232e 137 } else {
pcercuei 0:03b5121a232e 138 encoding = xmlStrstr(content, BAD_CAST"charset =");
pcercuei 0:03b5121a232e 139 if (encoding == NULL)
pcercuei 0:03b5121a232e 140 encoding = xmlStrstr(content, BAD_CAST"Charset =");
pcercuei 0:03b5121a232e 141 if (encoding == NULL)
pcercuei 0:03b5121a232e 142 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
pcercuei 0:03b5121a232e 143 if (encoding != NULL)
pcercuei 0:03b5121a232e 144 encoding += 9;
pcercuei 0:03b5121a232e 145 }
pcercuei 0:03b5121a232e 146 if (encoding != NULL) {
pcercuei 0:03b5121a232e 147 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
pcercuei 0:03b5121a232e 148 }
pcercuei 0:03b5121a232e 149 return(encoding);
pcercuei 0:03b5121a232e 150 }
pcercuei 0:03b5121a232e 151
pcercuei 0:03b5121a232e 152 /**
pcercuei 0:03b5121a232e 153 * htmlSetMetaEncoding:
pcercuei 0:03b5121a232e 154 * @doc: the document
pcercuei 0:03b5121a232e 155 * @encoding: the encoding string
pcercuei 0:03b5121a232e 156 *
pcercuei 0:03b5121a232e 157 * Sets the current encoding in the Meta tags
pcercuei 0:03b5121a232e 158 * NOTE: this will not change the document content encoding, just
pcercuei 0:03b5121a232e 159 * the META flag associated.
pcercuei 0:03b5121a232e 160 *
pcercuei 0:03b5121a232e 161 * Returns 0 in case of success and -1 in case of error
pcercuei 0:03b5121a232e 162 */
pcercuei 0:03b5121a232e 163 int
pcercuei 0:03b5121a232e 164 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
pcercuei 0:03b5121a232e 165 htmlNodePtr cur, meta = NULL, head = NULL;
pcercuei 0:03b5121a232e 166 const xmlChar *content = NULL;
pcercuei 0:03b5121a232e 167 char newcontent[100];
pcercuei 0:03b5121a232e 168
pcercuei 0:03b5121a232e 169 newcontent[0] = 0;
pcercuei 0:03b5121a232e 170
pcercuei 0:03b5121a232e 171 if (doc == NULL)
pcercuei 0:03b5121a232e 172 return(-1);
pcercuei 0:03b5121a232e 173
pcercuei 0:03b5121a232e 174 /* html isn't a real encoding it's just libxml2 way to get entities */
pcercuei 0:03b5121a232e 175 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
pcercuei 0:03b5121a232e 176 return(-1);
pcercuei 0:03b5121a232e 177
pcercuei 0:03b5121a232e 178 if (encoding != NULL) {
pcercuei 0:03b5121a232e 179 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
pcercuei 0:03b5121a232e 180 (char *)encoding);
pcercuei 0:03b5121a232e 181 newcontent[sizeof(newcontent) - 1] = 0;
pcercuei 0:03b5121a232e 182 }
pcercuei 0:03b5121a232e 183
pcercuei 0:03b5121a232e 184 cur = doc->children;
pcercuei 0:03b5121a232e 185
pcercuei 0:03b5121a232e 186 /*
pcercuei 0:03b5121a232e 187 * Search the html
pcercuei 0:03b5121a232e 188 */
pcercuei 0:03b5121a232e 189 while (cur != NULL) {
pcercuei 0:03b5121a232e 190 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
pcercuei 0:03b5121a232e 191 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
pcercuei 0:03b5121a232e 192 break;
pcercuei 0:03b5121a232e 193 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
pcercuei 0:03b5121a232e 194 goto found_head;
pcercuei 0:03b5121a232e 195 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
pcercuei 0:03b5121a232e 196 goto found_meta;
pcercuei 0:03b5121a232e 197 }
pcercuei 0:03b5121a232e 198 cur = cur->next;
pcercuei 0:03b5121a232e 199 }
pcercuei 0:03b5121a232e 200 if (cur == NULL)
pcercuei 0:03b5121a232e 201 return(-1);
pcercuei 0:03b5121a232e 202 cur = cur->children;
pcercuei 0:03b5121a232e 203
pcercuei 0:03b5121a232e 204 /*
pcercuei 0:03b5121a232e 205 * Search the head
pcercuei 0:03b5121a232e 206 */
pcercuei 0:03b5121a232e 207 while (cur != NULL) {
pcercuei 0:03b5121a232e 208 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
pcercuei 0:03b5121a232e 209 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
pcercuei 0:03b5121a232e 210 break;
pcercuei 0:03b5121a232e 211 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
pcercuei 0:03b5121a232e 212 head = cur->parent;
pcercuei 0:03b5121a232e 213 goto found_meta;
pcercuei 0:03b5121a232e 214 }
pcercuei 0:03b5121a232e 215 }
pcercuei 0:03b5121a232e 216 cur = cur->next;
pcercuei 0:03b5121a232e 217 }
pcercuei 0:03b5121a232e 218 if (cur == NULL)
pcercuei 0:03b5121a232e 219 return(-1);
pcercuei 0:03b5121a232e 220 found_head:
pcercuei 0:03b5121a232e 221 head = cur;
pcercuei 0:03b5121a232e 222 if (cur->children == NULL)
pcercuei 0:03b5121a232e 223 goto create;
pcercuei 0:03b5121a232e 224 cur = cur->children;
pcercuei 0:03b5121a232e 225
pcercuei 0:03b5121a232e 226 found_meta:
pcercuei 0:03b5121a232e 227 /*
pcercuei 0:03b5121a232e 228 * Search and update all the remaining the meta elements carrying
pcercuei 0:03b5121a232e 229 * encoding informations
pcercuei 0:03b5121a232e 230 */
pcercuei 0:03b5121a232e 231 while (cur != NULL) {
pcercuei 0:03b5121a232e 232 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
pcercuei 0:03b5121a232e 233 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
pcercuei 0:03b5121a232e 234 xmlAttrPtr attr = cur->properties;
pcercuei 0:03b5121a232e 235 int http;
pcercuei 0:03b5121a232e 236 const xmlChar *value;
pcercuei 0:03b5121a232e 237
pcercuei 0:03b5121a232e 238 content = NULL;
pcercuei 0:03b5121a232e 239 http = 0;
pcercuei 0:03b5121a232e 240 while (attr != NULL) {
pcercuei 0:03b5121a232e 241 if ((attr->children != NULL) &&
pcercuei 0:03b5121a232e 242 (attr->children->type == XML_TEXT_NODE) &&
pcercuei 0:03b5121a232e 243 (attr->children->next == NULL)) {
pcercuei 0:03b5121a232e 244 value = attr->children->content;
pcercuei 0:03b5121a232e 245 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
pcercuei 0:03b5121a232e 246 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
pcercuei 0:03b5121a232e 247 http = 1;
pcercuei 0:03b5121a232e 248 else
pcercuei 0:03b5121a232e 249 {
pcercuei 0:03b5121a232e 250 if ((value != NULL) &&
pcercuei 0:03b5121a232e 251 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
pcercuei 0:03b5121a232e 252 content = value;
pcercuei 0:03b5121a232e 253 }
pcercuei 0:03b5121a232e 254 if ((http != 0) && (content != NULL))
pcercuei 0:03b5121a232e 255 break;
pcercuei 0:03b5121a232e 256 }
pcercuei 0:03b5121a232e 257 attr = attr->next;
pcercuei 0:03b5121a232e 258 }
pcercuei 0:03b5121a232e 259 if ((http != 0) && (content != NULL)) {
pcercuei 0:03b5121a232e 260 meta = cur;
pcercuei 0:03b5121a232e 261 break;
pcercuei 0:03b5121a232e 262 }
pcercuei 0:03b5121a232e 263
pcercuei 0:03b5121a232e 264 }
pcercuei 0:03b5121a232e 265 }
pcercuei 0:03b5121a232e 266 cur = cur->next;
pcercuei 0:03b5121a232e 267 }
pcercuei 0:03b5121a232e 268 create:
pcercuei 0:03b5121a232e 269 if (meta == NULL) {
pcercuei 0:03b5121a232e 270 if ((encoding != NULL) && (head != NULL)) {
pcercuei 0:03b5121a232e 271 /*
pcercuei 0:03b5121a232e 272 * Create a new Meta element with the right attributes
pcercuei 0:03b5121a232e 273 */
pcercuei 0:03b5121a232e 274
pcercuei 0:03b5121a232e 275 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
pcercuei 0:03b5121a232e 276 if (head->children == NULL)
pcercuei 0:03b5121a232e 277 xmlAddChild(head, meta);
pcercuei 0:03b5121a232e 278 else
pcercuei 0:03b5121a232e 279 xmlAddPrevSibling(head->children, meta);
pcercuei 0:03b5121a232e 280 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
pcercuei 0:03b5121a232e 281 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
pcercuei 0:03b5121a232e 282 }
pcercuei 0:03b5121a232e 283 } else {
pcercuei 0:03b5121a232e 284 /* remove the meta tag if NULL is passed */
pcercuei 0:03b5121a232e 285 if (encoding == NULL) {
pcercuei 0:03b5121a232e 286 xmlUnlinkNode(meta);
pcercuei 0:03b5121a232e 287 xmlFreeNode(meta);
pcercuei 0:03b5121a232e 288 }
pcercuei 0:03b5121a232e 289 /* change the document only if there is a real encoding change */
pcercuei 0:03b5121a232e 290 else if (xmlStrcasestr(content, encoding) == NULL) {
pcercuei 0:03b5121a232e 291 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
pcercuei 0:03b5121a232e 292 }
pcercuei 0:03b5121a232e 293 }
pcercuei 0:03b5121a232e 294
pcercuei 0:03b5121a232e 295
pcercuei 0:03b5121a232e 296 return(0);
pcercuei 0:03b5121a232e 297 }
pcercuei 0:03b5121a232e 298
pcercuei 0:03b5121a232e 299 /**
pcercuei 0:03b5121a232e 300 * booleanHTMLAttrs:
pcercuei 0:03b5121a232e 301 *
pcercuei 0:03b5121a232e 302 * These are the HTML attributes which will be output
pcercuei 0:03b5121a232e 303 * in minimized form, i.e. <option selected="selected"> will be
pcercuei 0:03b5121a232e 304 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
pcercuei 0:03b5121a232e 305 *
pcercuei 0:03b5121a232e 306 */
pcercuei 0:03b5121a232e 307 static const char* htmlBooleanAttrs[] = {
pcercuei 0:03b5121a232e 308 "checked", "compact", "declare", "defer", "disabled", "ismap",
pcercuei 0:03b5121a232e 309 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
pcercuei 0:03b5121a232e 310 "selected", NULL
pcercuei 0:03b5121a232e 311 };
pcercuei 0:03b5121a232e 312
pcercuei 0:03b5121a232e 313
pcercuei 0:03b5121a232e 314 /**
pcercuei 0:03b5121a232e 315 * htmlIsBooleanAttr:
pcercuei 0:03b5121a232e 316 * @name: the name of the attribute to check
pcercuei 0:03b5121a232e 317 *
pcercuei 0:03b5121a232e 318 * Determine if a given attribute is a boolean attribute.
pcercuei 0:03b5121a232e 319 *
pcercuei 0:03b5121a232e 320 * returns: false if the attribute is not boolean, true otherwise.
pcercuei 0:03b5121a232e 321 */
pcercuei 0:03b5121a232e 322 int
pcercuei 0:03b5121a232e 323 htmlIsBooleanAttr(const xmlChar *name)
pcercuei 0:03b5121a232e 324 {
pcercuei 0:03b5121a232e 325 int i = 0;
pcercuei 0:03b5121a232e 326
pcercuei 0:03b5121a232e 327 while (htmlBooleanAttrs[i] != NULL) {
pcercuei 0:03b5121a232e 328 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
pcercuei 0:03b5121a232e 329 return 1;
pcercuei 0:03b5121a232e 330 i++;
pcercuei 0:03b5121a232e 331 }
pcercuei 0:03b5121a232e 332 return 0;
pcercuei 0:03b5121a232e 333 }
pcercuei 0:03b5121a232e 334
pcercuei 0:03b5121a232e 335 #ifdef LIBXML_OUTPUT_ENABLED
pcercuei 0:03b5121a232e 336 /*
pcercuei 0:03b5121a232e 337 * private routine exported from xmlIO.c
pcercuei 0:03b5121a232e 338 */
pcercuei 0:03b5121a232e 339 xmlOutputBufferPtr
pcercuei 0:03b5121a232e 340 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
pcercuei 0:03b5121a232e 341 /************************************************************************
pcercuei 0:03b5121a232e 342 * *
pcercuei 0:03b5121a232e 343 * Output error handlers *
pcercuei 0:03b5121a232e 344 * *
pcercuei 0:03b5121a232e 345 ************************************************************************/
pcercuei 0:03b5121a232e 346 /**
pcercuei 0:03b5121a232e 347 * htmlSaveErrMemory:
pcercuei 0:03b5121a232e 348 * @extra: extra informations
pcercuei 0:03b5121a232e 349 *
pcercuei 0:03b5121a232e 350 * Handle an out of memory condition
pcercuei 0:03b5121a232e 351 */
pcercuei 0:03b5121a232e 352 static void
pcercuei 0:03b5121a232e 353 htmlSaveErrMemory(const char *extra)
pcercuei 0:03b5121a232e 354 {
pcercuei 0:03b5121a232e 355 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
pcercuei 0:03b5121a232e 356 }
pcercuei 0:03b5121a232e 357
pcercuei 0:03b5121a232e 358 /**
pcercuei 0:03b5121a232e 359 * htmlSaveErr:
pcercuei 0:03b5121a232e 360 * @code: the error number
pcercuei 0:03b5121a232e 361 * @node: the location of the error.
pcercuei 0:03b5121a232e 362 * @extra: extra informations
pcercuei 0:03b5121a232e 363 *
pcercuei 0:03b5121a232e 364 * Handle an out of memory condition
pcercuei 0:03b5121a232e 365 */
pcercuei 0:03b5121a232e 366 static void
pcercuei 0:03b5121a232e 367 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
pcercuei 0:03b5121a232e 368 {
pcercuei 0:03b5121a232e 369 const char *msg = NULL;
pcercuei 0:03b5121a232e 370
pcercuei 0:03b5121a232e 371 switch(code) {
pcercuei 0:03b5121a232e 372 case XML_SAVE_NOT_UTF8:
pcercuei 0:03b5121a232e 373 msg = "string is not in UTF-8\n";
pcercuei 0:03b5121a232e 374 break;
pcercuei 0:03b5121a232e 375 case XML_SAVE_CHAR_INVALID:
pcercuei 0:03b5121a232e 376 msg = "invalid character value\n";
pcercuei 0:03b5121a232e 377 break;
pcercuei 0:03b5121a232e 378 case XML_SAVE_UNKNOWN_ENCODING:
pcercuei 0:03b5121a232e 379 msg = "unknown encoding %s\n";
pcercuei 0:03b5121a232e 380 break;
pcercuei 0:03b5121a232e 381 case XML_SAVE_NO_DOCTYPE:
pcercuei 0:03b5121a232e 382 msg = "HTML has no DOCTYPE\n";
pcercuei 0:03b5121a232e 383 break;
pcercuei 0:03b5121a232e 384 default:
pcercuei 0:03b5121a232e 385 msg = "unexpected error number\n";
pcercuei 0:03b5121a232e 386 }
pcercuei 0:03b5121a232e 387 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
pcercuei 0:03b5121a232e 388 }
pcercuei 0:03b5121a232e 389
pcercuei 0:03b5121a232e 390 /************************************************************************
pcercuei 0:03b5121a232e 391 * *
pcercuei 0:03b5121a232e 392 * Dumping HTML tree content to a simple buffer *
pcercuei 0:03b5121a232e 393 * *
pcercuei 0:03b5121a232e 394 ************************************************************************/
pcercuei 0:03b5121a232e 395
pcercuei 0:03b5121a232e 396 /**
pcercuei 0:03b5121a232e 397 * htmlBufNodeDumpFormat:
pcercuei 0:03b5121a232e 398 * @buf: the xmlBufPtr output
pcercuei 0:03b5121a232e 399 * @doc: the document
pcercuei 0:03b5121a232e 400 * @cur: the current node
pcercuei 0:03b5121a232e 401 * @format: should formatting spaces been added
pcercuei 0:03b5121a232e 402 *
pcercuei 0:03b5121a232e 403 * Dump an HTML node, recursive behaviour,children are printed too.
pcercuei 0:03b5121a232e 404 *
pcercuei 0:03b5121a232e 405 * Returns the number of byte written or -1 in case of error
pcercuei 0:03b5121a232e 406 */
pcercuei 0:03b5121a232e 407 static size_t
pcercuei 0:03b5121a232e 408 htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
pcercuei 0:03b5121a232e 409 int format) {
pcercuei 0:03b5121a232e 410 size_t use;
pcercuei 0:03b5121a232e 411 int ret;
pcercuei 0:03b5121a232e 412 xmlOutputBufferPtr outbuf;
pcercuei 0:03b5121a232e 413
pcercuei 0:03b5121a232e 414 if (cur == NULL) {
pcercuei 0:03b5121a232e 415 return (-1);
pcercuei 0:03b5121a232e 416 }
pcercuei 0:03b5121a232e 417 if (buf == NULL) {
pcercuei 0:03b5121a232e 418 return (-1);
pcercuei 0:03b5121a232e 419 }
pcercuei 0:03b5121a232e 420 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
pcercuei 0:03b5121a232e 421 if (outbuf == NULL) {
pcercuei 0:03b5121a232e 422 htmlSaveErrMemory("allocating HTML output buffer");
pcercuei 0:03b5121a232e 423 return (-1);
pcercuei 0:03b5121a232e 424 }
pcercuei 0:03b5121a232e 425 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
pcercuei 0:03b5121a232e 426 outbuf->buffer = buf;
pcercuei 0:03b5121a232e 427 outbuf->encoder = NULL;
pcercuei 0:03b5121a232e 428 outbuf->writecallback = NULL;
pcercuei 0:03b5121a232e 429 outbuf->closecallback = NULL;
pcercuei 0:03b5121a232e 430 outbuf->context = NULL;
pcercuei 0:03b5121a232e 431 outbuf->written = 0;
pcercuei 0:03b5121a232e 432
pcercuei 0:03b5121a232e 433 use = xmlBufUse(buf);
pcercuei 0:03b5121a232e 434 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
pcercuei 0:03b5121a232e 435 xmlFree(outbuf);
pcercuei 0:03b5121a232e 436 ret = xmlBufUse(buf) - use;
pcercuei 0:03b5121a232e 437 return (ret);
pcercuei 0:03b5121a232e 438 }
pcercuei 0:03b5121a232e 439
pcercuei 0:03b5121a232e 440 /**
pcercuei 0:03b5121a232e 441 * htmlNodeDump:
pcercuei 0:03b5121a232e 442 * @buf: the HTML buffer output
pcercuei 0:03b5121a232e 443 * @doc: the document
pcercuei 0:03b5121a232e 444 * @cur: the current node
pcercuei 0:03b5121a232e 445 *
pcercuei 0:03b5121a232e 446 * Dump an HTML node, recursive behaviour,children are printed too,
pcercuei 0:03b5121a232e 447 * and formatting returns are added.
pcercuei 0:03b5121a232e 448 *
pcercuei 0:03b5121a232e 449 * Returns the number of byte written or -1 in case of error
pcercuei 0:03b5121a232e 450 */
pcercuei 0:03b5121a232e 451 int
pcercuei 0:03b5121a232e 452 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
pcercuei 0:03b5121a232e 453 xmlBufPtr buffer;
pcercuei 0:03b5121a232e 454 size_t ret;
pcercuei 0:03b5121a232e 455
pcercuei 0:03b5121a232e 456 if ((buf == NULL) || (cur == NULL))
pcercuei 0:03b5121a232e 457 return(-1);
pcercuei 0:03b5121a232e 458
pcercuei 0:03b5121a232e 459 xmlInitParser();
pcercuei 0:03b5121a232e 460 buffer = xmlBufFromBuffer(buf);
pcercuei 0:03b5121a232e 461 if (buffer == NULL)
pcercuei 0:03b5121a232e 462 return(-1);
pcercuei 0:03b5121a232e 463
pcercuei 0:03b5121a232e 464 ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
pcercuei 0:03b5121a232e 465
pcercuei 0:03b5121a232e 466 xmlBufBackToBuffer(buffer);
pcercuei 0:03b5121a232e 467
pcercuei 0:03b5121a232e 468 if (ret > INT_MAX)
pcercuei 0:03b5121a232e 469 return(-1);
pcercuei 0:03b5121a232e 470 return((int) ret);
pcercuei 0:03b5121a232e 471 }
pcercuei 0:03b5121a232e 472
pcercuei 0:03b5121a232e 473 /**
pcercuei 0:03b5121a232e 474 * htmlNodeDumpFileFormat:
pcercuei 0:03b5121a232e 475 * @out: the FILE pointer
pcercuei 0:03b5121a232e 476 * @doc: the document
pcercuei 0:03b5121a232e 477 * @cur: the current node
pcercuei 0:03b5121a232e 478 * @encoding: the document encoding
pcercuei 0:03b5121a232e 479 * @format: should formatting spaces been added
pcercuei 0:03b5121a232e 480 *
pcercuei 0:03b5121a232e 481 * Dump an HTML node, recursive behaviour,children are printed too.
pcercuei 0:03b5121a232e 482 *
pcercuei 0:03b5121a232e 483 * TODO: if encoding == NULL try to save in the doc encoding
pcercuei 0:03b5121a232e 484 *
pcercuei 0:03b5121a232e 485 * returns: the number of byte written or -1 in case of failure.
pcercuei 0:03b5121a232e 486 */
pcercuei 0:03b5121a232e 487 int
pcercuei 0:03b5121a232e 488 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
pcercuei 0:03b5121a232e 489 xmlNodePtr cur, const char *encoding, int format) {
pcercuei 0:03b5121a232e 490 xmlOutputBufferPtr buf;
pcercuei 0:03b5121a232e 491 xmlCharEncodingHandlerPtr handler = NULL;
pcercuei 0:03b5121a232e 492 int ret;
pcercuei 0:03b5121a232e 493
pcercuei 0:03b5121a232e 494 xmlInitParser();
pcercuei 0:03b5121a232e 495
pcercuei 0:03b5121a232e 496 if (encoding != NULL) {
pcercuei 0:03b5121a232e 497 xmlCharEncoding enc;
pcercuei 0:03b5121a232e 498
pcercuei 0:03b5121a232e 499 enc = xmlParseCharEncoding(encoding);
pcercuei 0:03b5121a232e 500 if (enc != XML_CHAR_ENCODING_UTF8) {
pcercuei 0:03b5121a232e 501 handler = xmlFindCharEncodingHandler(encoding);
pcercuei 0:03b5121a232e 502 if (handler == NULL)
pcercuei 0:03b5121a232e 503 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
pcercuei 0:03b5121a232e 504 }
pcercuei 0:03b5121a232e 505 }
pcercuei 0:03b5121a232e 506
pcercuei 0:03b5121a232e 507 /*
pcercuei 0:03b5121a232e 508 * Fallback to HTML or ASCII when the encoding is unspecified
pcercuei 0:03b5121a232e 509 */
pcercuei 0:03b5121a232e 510 if (handler == NULL)
pcercuei 0:03b5121a232e 511 handler = xmlFindCharEncodingHandler("HTML");
pcercuei 0:03b5121a232e 512 if (handler == NULL)
pcercuei 0:03b5121a232e 513 handler = xmlFindCharEncodingHandler("ascii");
pcercuei 0:03b5121a232e 514
pcercuei 0:03b5121a232e 515 /*
pcercuei 0:03b5121a232e 516 * save the content to a temp buffer.
pcercuei 0:03b5121a232e 517 */
pcercuei 0:03b5121a232e 518 buf = xmlOutputBufferCreateFile(out, handler);
pcercuei 0:03b5121a232e 519 if (buf == NULL) return(0);
pcercuei 0:03b5121a232e 520
pcercuei 0:03b5121a232e 521 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
pcercuei 0:03b5121a232e 522
pcercuei 0:03b5121a232e 523 ret = xmlOutputBufferClose(buf);
pcercuei 0:03b5121a232e 524 return(ret);
pcercuei 0:03b5121a232e 525 }
pcercuei 0:03b5121a232e 526
pcercuei 0:03b5121a232e 527 /**
pcercuei 0:03b5121a232e 528 * htmlNodeDumpFile:
pcercuei 0:03b5121a232e 529 * @out: the FILE pointer
pcercuei 0:03b5121a232e 530 * @doc: the document
pcercuei 0:03b5121a232e 531 * @cur: the current node
pcercuei 0:03b5121a232e 532 *
pcercuei 0:03b5121a232e 533 * Dump an HTML node, recursive behaviour,children are printed too,
pcercuei 0:03b5121a232e 534 * and formatting returns are added.
pcercuei 0:03b5121a232e 535 */
pcercuei 0:03b5121a232e 536 void
pcercuei 0:03b5121a232e 537 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
pcercuei 0:03b5121a232e 538 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
pcercuei 0:03b5121a232e 539 }
pcercuei 0:03b5121a232e 540
pcercuei 0:03b5121a232e 541 /**
pcercuei 0:03b5121a232e 542 * htmlDocDumpMemoryFormat:
pcercuei 0:03b5121a232e 543 * @cur: the document
pcercuei 0:03b5121a232e 544 * @mem: OUT: the memory pointer
pcercuei 0:03b5121a232e 545 * @size: OUT: the memory length
pcercuei 0:03b5121a232e 546 * @format: should formatting spaces been added
pcercuei 0:03b5121a232e 547 *
pcercuei 0:03b5121a232e 548 * Dump an HTML document in memory and return the xmlChar * and it's size.
pcercuei 0:03b5121a232e 549 * It's up to the caller to free the memory.
pcercuei 0:03b5121a232e 550 */
pcercuei 0:03b5121a232e 551 void
pcercuei 0:03b5121a232e 552 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
pcercuei 0:03b5121a232e 553 xmlOutputBufferPtr buf;
pcercuei 0:03b5121a232e 554 xmlCharEncodingHandlerPtr handler = NULL;
pcercuei 0:03b5121a232e 555 const char *encoding;
pcercuei 0:03b5121a232e 556
pcercuei 0:03b5121a232e 557 xmlInitParser();
pcercuei 0:03b5121a232e 558
pcercuei 0:03b5121a232e 559 if ((mem == NULL) || (size == NULL))
pcercuei 0:03b5121a232e 560 return;
pcercuei 0:03b5121a232e 561 if (cur == NULL) {
pcercuei 0:03b5121a232e 562 *mem = NULL;
pcercuei 0:03b5121a232e 563 *size = 0;
pcercuei 0:03b5121a232e 564 return;
pcercuei 0:03b5121a232e 565 }
pcercuei 0:03b5121a232e 566
pcercuei 0:03b5121a232e 567 encoding = (const char *) htmlGetMetaEncoding(cur);
pcercuei 0:03b5121a232e 568
pcercuei 0:03b5121a232e 569 if (encoding != NULL) {
pcercuei 0:03b5121a232e 570 xmlCharEncoding enc;
pcercuei 0:03b5121a232e 571
pcercuei 0:03b5121a232e 572 enc = xmlParseCharEncoding(encoding);
pcercuei 0:03b5121a232e 573 if (enc != cur->charset) {
pcercuei 0:03b5121a232e 574 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
pcercuei 0:03b5121a232e 575 /*
pcercuei 0:03b5121a232e 576 * Not supported yet
pcercuei 0:03b5121a232e 577 */
pcercuei 0:03b5121a232e 578 *mem = NULL;
pcercuei 0:03b5121a232e 579 *size = 0;
pcercuei 0:03b5121a232e 580 return;
pcercuei 0:03b5121a232e 581 }
pcercuei 0:03b5121a232e 582
pcercuei 0:03b5121a232e 583 handler = xmlFindCharEncodingHandler(encoding);
pcercuei 0:03b5121a232e 584 if (handler == NULL)
pcercuei 0:03b5121a232e 585 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
pcercuei 0:03b5121a232e 586
pcercuei 0:03b5121a232e 587 } else {
pcercuei 0:03b5121a232e 588 handler = xmlFindCharEncodingHandler(encoding);
pcercuei 0:03b5121a232e 589 }
pcercuei 0:03b5121a232e 590 }
pcercuei 0:03b5121a232e 591
pcercuei 0:03b5121a232e 592 /*
pcercuei 0:03b5121a232e 593 * Fallback to HTML or ASCII when the encoding is unspecified
pcercuei 0:03b5121a232e 594 */
pcercuei 0:03b5121a232e 595 if (handler == NULL)
pcercuei 0:03b5121a232e 596 handler = xmlFindCharEncodingHandler("HTML");
pcercuei 0:03b5121a232e 597 if (handler == NULL)
pcercuei 0:03b5121a232e 598 handler = xmlFindCharEncodingHandler("ascii");
pcercuei 0:03b5121a232e 599
pcercuei 0:03b5121a232e 600 buf = xmlAllocOutputBufferInternal(handler);
pcercuei 0:03b5121a232e 601 if (buf == NULL) {
pcercuei 0:03b5121a232e 602 *mem = NULL;
pcercuei 0:03b5121a232e 603 *size = 0;
pcercuei 0:03b5121a232e 604 return;
pcercuei 0:03b5121a232e 605 }
pcercuei 0:03b5121a232e 606
pcercuei 0:03b5121a232e 607 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
pcercuei 0:03b5121a232e 608
pcercuei 0:03b5121a232e 609 xmlOutputBufferFlush(buf);
pcercuei 0:03b5121a232e 610 if (buf->conv != NULL) {
pcercuei 0:03b5121a232e 611 *size = xmlBufUse(buf->conv);
pcercuei 0:03b5121a232e 612 *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
pcercuei 0:03b5121a232e 613 } else {
pcercuei 0:03b5121a232e 614 *size = xmlBufUse(buf->buffer);
pcercuei 0:03b5121a232e 615 *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
pcercuei 0:03b5121a232e 616 }
pcercuei 0:03b5121a232e 617 (void)xmlOutputBufferClose(buf);
pcercuei 0:03b5121a232e 618 }
pcercuei 0:03b5121a232e 619
pcercuei 0:03b5121a232e 620 /**
pcercuei 0:03b5121a232e 621 * htmlDocDumpMemory:
pcercuei 0:03b5121a232e 622 * @cur: the document
pcercuei 0:03b5121a232e 623 * @mem: OUT: the memory pointer
pcercuei 0:03b5121a232e 624 * @size: OUT: the memory length
pcercuei 0:03b5121a232e 625 *
pcercuei 0:03b5121a232e 626 * Dump an HTML document in memory and return the xmlChar * and it's size.
pcercuei 0:03b5121a232e 627 * It's up to the caller to free the memory.
pcercuei 0:03b5121a232e 628 */
pcercuei 0:03b5121a232e 629 void
pcercuei 0:03b5121a232e 630 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
pcercuei 0:03b5121a232e 631 htmlDocDumpMemoryFormat(cur, mem, size, 1);
pcercuei 0:03b5121a232e 632 }
pcercuei 0:03b5121a232e 633
pcercuei 0:03b5121a232e 634
pcercuei 0:03b5121a232e 635 /************************************************************************
pcercuei 0:03b5121a232e 636 * *
pcercuei 0:03b5121a232e 637 * Dumping HTML tree content to an I/O output buffer *
pcercuei 0:03b5121a232e 638 * *
pcercuei 0:03b5121a232e 639 ************************************************************************/
pcercuei 0:03b5121a232e 640
pcercuei 0:03b5121a232e 641 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
pcercuei 0:03b5121a232e 642
pcercuei 0:03b5121a232e 643 /**
pcercuei 0:03b5121a232e 644 * htmlDtdDumpOutput:
pcercuei 0:03b5121a232e 645 * @buf: the HTML buffer output
pcercuei 0:03b5121a232e 646 * @doc: the document
pcercuei 0:03b5121a232e 647 * @encoding: the encoding string
pcercuei 0:03b5121a232e 648 *
pcercuei 0:03b5121a232e 649 * TODO: check whether encoding is needed
pcercuei 0:03b5121a232e 650 *
pcercuei 0:03b5121a232e 651 * Dump the HTML document DTD, if any.
pcercuei 0:03b5121a232e 652 */
pcercuei 0:03b5121a232e 653 static void
pcercuei 0:03b5121a232e 654 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
pcercuei 0:03b5121a232e 655 const char *encoding ATTRIBUTE_UNUSED) {
pcercuei 0:03b5121a232e 656 xmlDtdPtr cur = doc->intSubset;
pcercuei 0:03b5121a232e 657
pcercuei 0:03b5121a232e 658 if (cur == NULL) {
pcercuei 0:03b5121a232e 659 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
pcercuei 0:03b5121a232e 660 return;
pcercuei 0:03b5121a232e 661 }
pcercuei 0:03b5121a232e 662 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
pcercuei 0:03b5121a232e 663 xmlOutputBufferWriteString(buf, (const char *)cur->name);
pcercuei 0:03b5121a232e 664 if (cur->ExternalID != NULL) {
pcercuei 0:03b5121a232e 665 xmlOutputBufferWriteString(buf, " PUBLIC ");
pcercuei 0:03b5121a232e 666 xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
pcercuei 0:03b5121a232e 667 if (cur->SystemID != NULL) {
pcercuei 0:03b5121a232e 668 xmlOutputBufferWriteString(buf, " ");
pcercuei 0:03b5121a232e 669 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
pcercuei 0:03b5121a232e 670 }
pcercuei 0:03b5121a232e 671 } else if (cur->SystemID != NULL &&
pcercuei 0:03b5121a232e 672 xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
pcercuei 0:03b5121a232e 673 xmlOutputBufferWriteString(buf, " SYSTEM ");
pcercuei 0:03b5121a232e 674 xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
pcercuei 0:03b5121a232e 675 }
pcercuei 0:03b5121a232e 676 xmlOutputBufferWriteString(buf, ">\n");
pcercuei 0:03b5121a232e 677 }
pcercuei 0:03b5121a232e 678
pcercuei 0:03b5121a232e 679 /**
pcercuei 0:03b5121a232e 680 * htmlAttrDumpOutput:
pcercuei 0:03b5121a232e 681 * @buf: the HTML buffer output
pcercuei 0:03b5121a232e 682 * @doc: the document
pcercuei 0:03b5121a232e 683 * @cur: the attribute pointer
pcercuei 0:03b5121a232e 684 * @encoding: the encoding string
pcercuei 0:03b5121a232e 685 *
pcercuei 0:03b5121a232e 686 * Dump an HTML attribute
pcercuei 0:03b5121a232e 687 */
pcercuei 0:03b5121a232e 688 static void
pcercuei 0:03b5121a232e 689 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
pcercuei 0:03b5121a232e 690 const char *encoding ATTRIBUTE_UNUSED) {
pcercuei 0:03b5121a232e 691 xmlChar *value;
pcercuei 0:03b5121a232e 692
pcercuei 0:03b5121a232e 693 /*
pcercuei 0:03b5121a232e 694 * The html output method should not escape a & character
pcercuei 0:03b5121a232e 695 * occurring in an attribute value immediately followed by
pcercuei 0:03b5121a232e 696 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
pcercuei 0:03b5121a232e 697 * This is implemented in xmlEncodeEntitiesReentrant
pcercuei 0:03b5121a232e 698 */
pcercuei 0:03b5121a232e 699
pcercuei 0:03b5121a232e 700 if (cur == NULL) {
pcercuei 0:03b5121a232e 701 return;
pcercuei 0:03b5121a232e 702 }
pcercuei 0:03b5121a232e 703 xmlOutputBufferWriteString(buf, " ");
pcercuei 0:03b5121a232e 704 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
pcercuei 0:03b5121a232e 705 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
pcercuei 0:03b5121a232e 706 xmlOutputBufferWriteString(buf, ":");
pcercuei 0:03b5121a232e 707 }
pcercuei 0:03b5121a232e 708 xmlOutputBufferWriteString(buf, (const char *)cur->name);
pcercuei 0:03b5121a232e 709 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
pcercuei 0:03b5121a232e 710 value = xmlNodeListGetString(doc, cur->children, 0);
pcercuei 0:03b5121a232e 711 if (value) {
pcercuei 0:03b5121a232e 712 xmlOutputBufferWriteString(buf, "=");
pcercuei 0:03b5121a232e 713 if ((cur->ns == NULL) && (cur->parent != NULL) &&
pcercuei 0:03b5121a232e 714 (cur->parent->ns == NULL) &&
pcercuei 0:03b5121a232e 715 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
pcercuei 0:03b5121a232e 716 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
pcercuei 0:03b5121a232e 717 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
pcercuei 0:03b5121a232e 718 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
pcercuei 0:03b5121a232e 719 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
pcercuei 0:03b5121a232e 720 xmlChar *tmp = value;
pcercuei 0:03b5121a232e 721 /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
pcercuei 0:03b5121a232e 722 xmlBufCCat(buf->buffer, "\"");
pcercuei 0:03b5121a232e 723
pcercuei 0:03b5121a232e 724 while (IS_BLANK_CH(*tmp)) tmp++;
pcercuei 0:03b5121a232e 725
pcercuei 0:03b5121a232e 726 /* URI Escape everything, except server side includes. */
pcercuei 0:03b5121a232e 727 for ( ; ; ) {
pcercuei 0:03b5121a232e 728 xmlChar *escaped;
pcercuei 0:03b5121a232e 729 xmlChar endChar;
pcercuei 0:03b5121a232e 730 xmlChar *end = NULL;
pcercuei 0:03b5121a232e 731 xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
pcercuei 0:03b5121a232e 732 if (start != NULL) {
pcercuei 0:03b5121a232e 733 end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
pcercuei 0:03b5121a232e 734 if (end != NULL) {
pcercuei 0:03b5121a232e 735 *start = '\0';
pcercuei 0:03b5121a232e 736 }
pcercuei 0:03b5121a232e 737 }
pcercuei 0:03b5121a232e 738
pcercuei 0:03b5121a232e 739 /* Escape the whole string, or until start (set to '\0'). */
pcercuei 0:03b5121a232e 740 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
pcercuei 0:03b5121a232e 741 if (escaped != NULL) {
pcercuei 0:03b5121a232e 742 xmlBufCat(buf->buffer, escaped);
pcercuei 0:03b5121a232e 743 xmlFree(escaped);
pcercuei 0:03b5121a232e 744 } else {
pcercuei 0:03b5121a232e 745 xmlBufCat(buf->buffer, tmp);
pcercuei 0:03b5121a232e 746 }
pcercuei 0:03b5121a232e 747
pcercuei 0:03b5121a232e 748 if (end == NULL) { /* Everything has been written. */
pcercuei 0:03b5121a232e 749 break;
pcercuei 0:03b5121a232e 750 }
pcercuei 0:03b5121a232e 751
pcercuei 0:03b5121a232e 752 /* Do not escape anything within server side includes. */
pcercuei 0:03b5121a232e 753 *start = '<'; /* Restore the first character of "<!--". */
pcercuei 0:03b5121a232e 754 end += 3; /* strlen("-->") */
pcercuei 0:03b5121a232e 755 endChar = *end;
pcercuei 0:03b5121a232e 756 *end = '\0';
pcercuei 0:03b5121a232e 757 xmlBufCat(buf->buffer, start);
pcercuei 0:03b5121a232e 758 *end = endChar;
pcercuei 0:03b5121a232e 759 tmp = end;
pcercuei 0:03b5121a232e 760 }
pcercuei 0:03b5121a232e 761
pcercuei 0:03b5121a232e 762 xmlBufCCat(buf->buffer, "\"");
pcercuei 0:03b5121a232e 763 } else {
pcercuei 0:03b5121a232e 764 xmlBufWriteQuotedString(buf->buffer, value);
pcercuei 0:03b5121a232e 765 }
pcercuei 0:03b5121a232e 766 xmlFree(value);
pcercuei 0:03b5121a232e 767 } else {
pcercuei 0:03b5121a232e 768 xmlOutputBufferWriteString(buf, "=\"\"");
pcercuei 0:03b5121a232e 769 }
pcercuei 0:03b5121a232e 770 }
pcercuei 0:03b5121a232e 771 }
pcercuei 0:03b5121a232e 772
pcercuei 0:03b5121a232e 773 /**
pcercuei 0:03b5121a232e 774 * htmlAttrListDumpOutput:
pcercuei 0:03b5121a232e 775 * @buf: the HTML buffer output
pcercuei 0:03b5121a232e 776 * @doc: the document
pcercuei 0:03b5121a232e 777 * @cur: the first attribute pointer
pcercuei 0:03b5121a232e 778 * @encoding: the encoding string
pcercuei 0:03b5121a232e 779 *
pcercuei 0:03b5121a232e 780 * Dump a list of HTML attributes
pcercuei 0:03b5121a232e 781 */
pcercuei 0:03b5121a232e 782 static void
pcercuei 0:03b5121a232e 783 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
pcercuei 0:03b5121a232e 784 if (cur == NULL) {
pcercuei 0:03b5121a232e 785 return;
pcercuei 0:03b5121a232e 786 }
pcercuei 0:03b5121a232e 787 while (cur != NULL) {
pcercuei 0:03b5121a232e 788 htmlAttrDumpOutput(buf, doc, cur, encoding);
pcercuei 0:03b5121a232e 789 cur = cur->next;
pcercuei 0:03b5121a232e 790 }
pcercuei 0:03b5121a232e 791 }
pcercuei 0:03b5121a232e 792
pcercuei 0:03b5121a232e 793
pcercuei 0:03b5121a232e 794
pcercuei 0:03b5121a232e 795 /**
pcercuei 0:03b5121a232e 796 * htmlNodeListDumpOutput:
pcercuei 0:03b5121a232e 797 * @buf: the HTML buffer output
pcercuei 0:03b5121a232e 798 * @doc: the document
pcercuei 0:03b5121a232e 799 * @cur: the first node
pcercuei 0:03b5121a232e 800 * @encoding: the encoding string
pcercuei 0:03b5121a232e 801 * @format: should formatting spaces been added
pcercuei 0:03b5121a232e 802 *
pcercuei 0:03b5121a232e 803 * Dump an HTML node list, recursive behaviour,children are printed too.
pcercuei 0:03b5121a232e 804 */
pcercuei 0:03b5121a232e 805 static void
pcercuei 0:03b5121a232e 806 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
pcercuei 0:03b5121a232e 807 xmlNodePtr cur, const char *encoding, int format) {
pcercuei 0:03b5121a232e 808 if (cur == NULL) {
pcercuei 0:03b5121a232e 809 return;
pcercuei 0:03b5121a232e 810 }
pcercuei 0:03b5121a232e 811 while (cur != NULL) {
pcercuei 0:03b5121a232e 812 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
pcercuei 0:03b5121a232e 813 cur = cur->next;
pcercuei 0:03b5121a232e 814 }
pcercuei 0:03b5121a232e 815 }
pcercuei 0:03b5121a232e 816
pcercuei 0:03b5121a232e 817 /**
pcercuei 0:03b5121a232e 818 * htmlNodeDumpFormatOutput:
pcercuei 0:03b5121a232e 819 * @buf: the HTML buffer output
pcercuei 0:03b5121a232e 820 * @doc: the document
pcercuei 0:03b5121a232e 821 * @cur: the current node
pcercuei 0:03b5121a232e 822 * @encoding: the encoding string
pcercuei 0:03b5121a232e 823 * @format: should formatting spaces been added
pcercuei 0:03b5121a232e 824 *
pcercuei 0:03b5121a232e 825 * Dump an HTML node, recursive behaviour,children are printed too.
pcercuei 0:03b5121a232e 826 */
pcercuei 0:03b5121a232e 827 void
pcercuei 0:03b5121a232e 828 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
pcercuei 0:03b5121a232e 829 xmlNodePtr cur, const char *encoding, int format) {
pcercuei 0:03b5121a232e 830 const htmlElemDesc * info;
pcercuei 0:03b5121a232e 831
pcercuei 0:03b5121a232e 832 xmlInitParser();
pcercuei 0:03b5121a232e 833
pcercuei 0:03b5121a232e 834 if ((cur == NULL) || (buf == NULL)) {
pcercuei 0:03b5121a232e 835 return;
pcercuei 0:03b5121a232e 836 }
pcercuei 0:03b5121a232e 837 /*
pcercuei 0:03b5121a232e 838 * Special cases.
pcercuei 0:03b5121a232e 839 */
pcercuei 0:03b5121a232e 840 if (cur->type == XML_DTD_NODE)
pcercuei 0:03b5121a232e 841 return;
pcercuei 0:03b5121a232e 842 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
pcercuei 0:03b5121a232e 843 (cur->type == XML_DOCUMENT_NODE)){
pcercuei 0:03b5121a232e 844 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
pcercuei 0:03b5121a232e 845 return;
pcercuei 0:03b5121a232e 846 }
pcercuei 0:03b5121a232e 847 if (cur->type == XML_ATTRIBUTE_NODE) {
pcercuei 0:03b5121a232e 848 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
pcercuei 0:03b5121a232e 849 return;
pcercuei 0:03b5121a232e 850 }
pcercuei 0:03b5121a232e 851 if (cur->type == HTML_TEXT_NODE) {
pcercuei 0:03b5121a232e 852 if (cur->content != NULL) {
pcercuei 0:03b5121a232e 853 if (((cur->name == (const xmlChar *)xmlStringText) ||
pcercuei 0:03b5121a232e 854 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
pcercuei 0:03b5121a232e 855 ((cur->parent == NULL) ||
pcercuei 0:03b5121a232e 856 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
pcercuei 0:03b5121a232e 857 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
pcercuei 0:03b5121a232e 858 xmlChar *buffer;
pcercuei 0:03b5121a232e 859
pcercuei 0:03b5121a232e 860 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
pcercuei 0:03b5121a232e 861 if (buffer != NULL) {
pcercuei 0:03b5121a232e 862 xmlOutputBufferWriteString(buf, (const char *)buffer);
pcercuei 0:03b5121a232e 863 xmlFree(buffer);
pcercuei 0:03b5121a232e 864 }
pcercuei 0:03b5121a232e 865 } else {
pcercuei 0:03b5121a232e 866 xmlOutputBufferWriteString(buf, (const char *)cur->content);
pcercuei 0:03b5121a232e 867 }
pcercuei 0:03b5121a232e 868 }
pcercuei 0:03b5121a232e 869 return;
pcercuei 0:03b5121a232e 870 }
pcercuei 0:03b5121a232e 871 if (cur->type == HTML_COMMENT_NODE) {
pcercuei 0:03b5121a232e 872 if (cur->content != NULL) {
pcercuei 0:03b5121a232e 873 xmlOutputBufferWriteString(buf, "<!--");
pcercuei 0:03b5121a232e 874 xmlOutputBufferWriteString(buf, (const char *)cur->content);
pcercuei 0:03b5121a232e 875 xmlOutputBufferWriteString(buf, "-->");
pcercuei 0:03b5121a232e 876 }
pcercuei 0:03b5121a232e 877 return;
pcercuei 0:03b5121a232e 878 }
pcercuei 0:03b5121a232e 879 if (cur->type == HTML_PI_NODE) {
pcercuei 0:03b5121a232e 880 if (cur->name == NULL)
pcercuei 0:03b5121a232e 881 return;
pcercuei 0:03b5121a232e 882 xmlOutputBufferWriteString(buf, "<?");
pcercuei 0:03b5121a232e 883 xmlOutputBufferWriteString(buf, (const char *)cur->name);
pcercuei 0:03b5121a232e 884 if (cur->content != NULL) {
pcercuei 0:03b5121a232e 885 xmlOutputBufferWriteString(buf, " ");
pcercuei 0:03b5121a232e 886 xmlOutputBufferWriteString(buf, (const char *)cur->content);
pcercuei 0:03b5121a232e 887 }
pcercuei 0:03b5121a232e 888 xmlOutputBufferWriteString(buf, ">");
pcercuei 0:03b5121a232e 889 return;
pcercuei 0:03b5121a232e 890 }
pcercuei 0:03b5121a232e 891 if (cur->type == HTML_ENTITY_REF_NODE) {
pcercuei 0:03b5121a232e 892 xmlOutputBufferWriteString(buf, "&");
pcercuei 0:03b5121a232e 893 xmlOutputBufferWriteString(buf, (const char *)cur->name);
pcercuei 0:03b5121a232e 894 xmlOutputBufferWriteString(buf, ";");
pcercuei 0:03b5121a232e 895 return;
pcercuei 0:03b5121a232e 896 }
pcercuei 0:03b5121a232e 897 if (cur->type == HTML_PRESERVE_NODE) {
pcercuei 0:03b5121a232e 898 if (cur->content != NULL) {
pcercuei 0:03b5121a232e 899 xmlOutputBufferWriteString(buf, (const char *)cur->content);
pcercuei 0:03b5121a232e 900 }
pcercuei 0:03b5121a232e 901 return;
pcercuei 0:03b5121a232e 902 }
pcercuei 0:03b5121a232e 903
pcercuei 0:03b5121a232e 904 /*
pcercuei 0:03b5121a232e 905 * Get specific HTML info for that node.
pcercuei 0:03b5121a232e 906 */
pcercuei 0:03b5121a232e 907 if (cur->ns == NULL)
pcercuei 0:03b5121a232e 908 info = htmlTagLookup(cur->name);
pcercuei 0:03b5121a232e 909 else
pcercuei 0:03b5121a232e 910 info = NULL;
pcercuei 0:03b5121a232e 911
pcercuei 0:03b5121a232e 912 xmlOutputBufferWriteString(buf, "<");
pcercuei 0:03b5121a232e 913 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
pcercuei 0:03b5121a232e 914 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
pcercuei 0:03b5121a232e 915 xmlOutputBufferWriteString(buf, ":");
pcercuei 0:03b5121a232e 916 }
pcercuei 0:03b5121a232e 917 xmlOutputBufferWriteString(buf, (const char *)cur->name);
pcercuei 0:03b5121a232e 918 if (cur->nsDef)
pcercuei 0:03b5121a232e 919 xmlNsListDumpOutput(buf, cur->nsDef);
pcercuei 0:03b5121a232e 920 if (cur->properties != NULL)
pcercuei 0:03b5121a232e 921 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
pcercuei 0:03b5121a232e 922
pcercuei 0:03b5121a232e 923 if ((info != NULL) && (info->empty)) {
pcercuei 0:03b5121a232e 924 xmlOutputBufferWriteString(buf, ">");
pcercuei 0:03b5121a232e 925 if ((format) && (!info->isinline) && (cur->next != NULL)) {
pcercuei 0:03b5121a232e 926 if ((cur->next->type != HTML_TEXT_NODE) &&
pcercuei 0:03b5121a232e 927 (cur->next->type != HTML_ENTITY_REF_NODE) &&
pcercuei 0:03b5121a232e 928 (cur->parent != NULL) &&
pcercuei 0:03b5121a232e 929 (cur->parent->name != NULL) &&
pcercuei 0:03b5121a232e 930 (cur->parent->name[0] != 'p')) /* p, pre, param */
pcercuei 0:03b5121a232e 931 xmlOutputBufferWriteString(buf, "\n");
pcercuei 0:03b5121a232e 932 }
pcercuei 0:03b5121a232e 933 return;
pcercuei 0:03b5121a232e 934 }
pcercuei 0:03b5121a232e 935 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
pcercuei 0:03b5121a232e 936 (cur->children == NULL)) {
pcercuei 0:03b5121a232e 937 if ((info != NULL) && (info->saveEndTag != 0) &&
pcercuei 0:03b5121a232e 938 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
pcercuei 0:03b5121a232e 939 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
pcercuei 0:03b5121a232e 940 xmlOutputBufferWriteString(buf, ">");
pcercuei 0:03b5121a232e 941 } else {
pcercuei 0:03b5121a232e 942 xmlOutputBufferWriteString(buf, "></");
pcercuei 0:03b5121a232e 943 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
pcercuei 0:03b5121a232e 944 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
pcercuei 0:03b5121a232e 945 xmlOutputBufferWriteString(buf, ":");
pcercuei 0:03b5121a232e 946 }
pcercuei 0:03b5121a232e 947 xmlOutputBufferWriteString(buf, (const char *)cur->name);
pcercuei 0:03b5121a232e 948 xmlOutputBufferWriteString(buf, ">");
pcercuei 0:03b5121a232e 949 }
pcercuei 0:03b5121a232e 950 if ((format) && (cur->next != NULL) &&
pcercuei 0:03b5121a232e 951 (info != NULL) && (!info->isinline)) {
pcercuei 0:03b5121a232e 952 if ((cur->next->type != HTML_TEXT_NODE) &&
pcercuei 0:03b5121a232e 953 (cur->next->type != HTML_ENTITY_REF_NODE) &&
pcercuei 0:03b5121a232e 954 (cur->parent != NULL) &&
pcercuei 0:03b5121a232e 955 (cur->parent->name != NULL) &&
pcercuei 0:03b5121a232e 956 (cur->parent->name[0] != 'p')) /* p, pre, param */
pcercuei 0:03b5121a232e 957 xmlOutputBufferWriteString(buf, "\n");
pcercuei 0:03b5121a232e 958 }
pcercuei 0:03b5121a232e 959 return;
pcercuei 0:03b5121a232e 960 }
pcercuei 0:03b5121a232e 961 xmlOutputBufferWriteString(buf, ">");
pcercuei 0:03b5121a232e 962 if ((cur->type != XML_ELEMENT_NODE) &&
pcercuei 0:03b5121a232e 963 (cur->content != NULL)) {
pcercuei 0:03b5121a232e 964 /*
pcercuei 0:03b5121a232e 965 * Uses the OutputBuffer property to automatically convert
pcercuei 0:03b5121a232e 966 * invalids to charrefs
pcercuei 0:03b5121a232e 967 */
pcercuei 0:03b5121a232e 968
pcercuei 0:03b5121a232e 969 xmlOutputBufferWriteString(buf, (const char *) cur->content);
pcercuei 0:03b5121a232e 970 }
pcercuei 0:03b5121a232e 971 if (cur->children != NULL) {
pcercuei 0:03b5121a232e 972 if ((format) && (info != NULL) && (!info->isinline) &&
pcercuei 0:03b5121a232e 973 (cur->children->type != HTML_TEXT_NODE) &&
pcercuei 0:03b5121a232e 974 (cur->children->type != HTML_ENTITY_REF_NODE) &&
pcercuei 0:03b5121a232e 975 (cur->children != cur->last) &&
pcercuei 0:03b5121a232e 976 (cur->name != NULL) &&
pcercuei 0:03b5121a232e 977 (cur->name[0] != 'p')) /* p, pre, param */
pcercuei 0:03b5121a232e 978 xmlOutputBufferWriteString(buf, "\n");
pcercuei 0:03b5121a232e 979 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
pcercuei 0:03b5121a232e 980 if ((format) && (info != NULL) && (!info->isinline) &&
pcercuei 0:03b5121a232e 981 (cur->last->type != HTML_TEXT_NODE) &&
pcercuei 0:03b5121a232e 982 (cur->last->type != HTML_ENTITY_REF_NODE) &&
pcercuei 0:03b5121a232e 983 (cur->children != cur->last) &&
pcercuei 0:03b5121a232e 984 (cur->name != NULL) &&
pcercuei 0:03b5121a232e 985 (cur->name[0] != 'p')) /* p, pre, param */
pcercuei 0:03b5121a232e 986 xmlOutputBufferWriteString(buf, "\n");
pcercuei 0:03b5121a232e 987 }
pcercuei 0:03b5121a232e 988 xmlOutputBufferWriteString(buf, "</");
pcercuei 0:03b5121a232e 989 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
pcercuei 0:03b5121a232e 990 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
pcercuei 0:03b5121a232e 991 xmlOutputBufferWriteString(buf, ":");
pcercuei 0:03b5121a232e 992 }
pcercuei 0:03b5121a232e 993 xmlOutputBufferWriteString(buf, (const char *)cur->name);
pcercuei 0:03b5121a232e 994 xmlOutputBufferWriteString(buf, ">");
pcercuei 0:03b5121a232e 995 if ((format) && (info != NULL) && (!info->isinline) &&
pcercuei 0:03b5121a232e 996 (cur->next != NULL)) {
pcercuei 0:03b5121a232e 997 if ((cur->next->type != HTML_TEXT_NODE) &&
pcercuei 0:03b5121a232e 998 (cur->next->type != HTML_ENTITY_REF_NODE) &&
pcercuei 0:03b5121a232e 999 (cur->parent != NULL) &&
pcercuei 0:03b5121a232e 1000 (cur->parent->name != NULL) &&
pcercuei 0:03b5121a232e 1001 (cur->parent->name[0] != 'p')) /* p, pre, param */
pcercuei 0:03b5121a232e 1002 xmlOutputBufferWriteString(buf, "\n");
pcercuei 0:03b5121a232e 1003 }
pcercuei 0:03b5121a232e 1004 }
pcercuei 0:03b5121a232e 1005
pcercuei 0:03b5121a232e 1006 /**
pcercuei 0:03b5121a232e 1007 * htmlNodeDumpOutput:
pcercuei 0:03b5121a232e 1008 * @buf: the HTML buffer output
pcercuei 0:03b5121a232e 1009 * @doc: the document
pcercuei 0:03b5121a232e 1010 * @cur: the current node
pcercuei 0:03b5121a232e 1011 * @encoding: the encoding string
pcercuei 0:03b5121a232e 1012 *
pcercuei 0:03b5121a232e 1013 * Dump an HTML node, recursive behaviour,children are printed too,
pcercuei 0:03b5121a232e 1014 * and formatting returns/spaces are added.
pcercuei 0:03b5121a232e 1015 */
pcercuei 0:03b5121a232e 1016 void
pcercuei 0:03b5121a232e 1017 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
pcercuei 0:03b5121a232e 1018 xmlNodePtr cur, const char *encoding) {
pcercuei 0:03b5121a232e 1019 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
pcercuei 0:03b5121a232e 1020 }
pcercuei 0:03b5121a232e 1021
pcercuei 0:03b5121a232e 1022 /**
pcercuei 0:03b5121a232e 1023 * htmlDocContentDumpFormatOutput:
pcercuei 0:03b5121a232e 1024 * @buf: the HTML buffer output
pcercuei 0:03b5121a232e 1025 * @cur: the document
pcercuei 0:03b5121a232e 1026 * @encoding: the encoding string
pcercuei 0:03b5121a232e 1027 * @format: should formatting spaces been added
pcercuei 0:03b5121a232e 1028 *
pcercuei 0:03b5121a232e 1029 * Dump an HTML document.
pcercuei 0:03b5121a232e 1030 */
pcercuei 0:03b5121a232e 1031 void
pcercuei 0:03b5121a232e 1032 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
pcercuei 0:03b5121a232e 1033 const char *encoding, int format) {
pcercuei 0:03b5121a232e 1034 int type;
pcercuei 0:03b5121a232e 1035
pcercuei 0:03b5121a232e 1036 xmlInitParser();
pcercuei 0:03b5121a232e 1037
pcercuei 0:03b5121a232e 1038 if ((buf == NULL) || (cur == NULL))
pcercuei 0:03b5121a232e 1039 return;
pcercuei 0:03b5121a232e 1040
pcercuei 0:03b5121a232e 1041 /*
pcercuei 0:03b5121a232e 1042 * force to output the stuff as HTML, especially for entities
pcercuei 0:03b5121a232e 1043 */
pcercuei 0:03b5121a232e 1044 type = cur->type;
pcercuei 0:03b5121a232e 1045 cur->type = XML_HTML_DOCUMENT_NODE;
pcercuei 0:03b5121a232e 1046 if (cur->intSubset != NULL) {
pcercuei 0:03b5121a232e 1047 htmlDtdDumpOutput(buf, cur, NULL);
pcercuei 0:03b5121a232e 1048 }
pcercuei 0:03b5121a232e 1049 if (cur->children != NULL) {
pcercuei 0:03b5121a232e 1050 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
pcercuei 0:03b5121a232e 1051 }
pcercuei 0:03b5121a232e 1052 xmlOutputBufferWriteString(buf, "\n");
pcercuei 0:03b5121a232e 1053 cur->type = (xmlElementType) type;
pcercuei 0:03b5121a232e 1054 }
pcercuei 0:03b5121a232e 1055
pcercuei 0:03b5121a232e 1056 /**
pcercuei 0:03b5121a232e 1057 * htmlDocContentDumpOutput:
pcercuei 0:03b5121a232e 1058 * @buf: the HTML buffer output
pcercuei 0:03b5121a232e 1059 * @cur: the document
pcercuei 0:03b5121a232e 1060 * @encoding: the encoding string
pcercuei 0:03b5121a232e 1061 *
pcercuei 0:03b5121a232e 1062 * Dump an HTML document. Formating return/spaces are added.
pcercuei 0:03b5121a232e 1063 */
pcercuei 0:03b5121a232e 1064 void
pcercuei 0:03b5121a232e 1065 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
pcercuei 0:03b5121a232e 1066 const char *encoding) {
pcercuei 0:03b5121a232e 1067 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
pcercuei 0:03b5121a232e 1068 }
pcercuei 0:03b5121a232e 1069
pcercuei 0:03b5121a232e 1070 /************************************************************************
pcercuei 0:03b5121a232e 1071 * *
pcercuei 0:03b5121a232e 1072 * Saving functions front-ends *
pcercuei 0:03b5121a232e 1073 * *
pcercuei 0:03b5121a232e 1074 ************************************************************************/
pcercuei 0:03b5121a232e 1075
pcercuei 0:03b5121a232e 1076 /**
pcercuei 0:03b5121a232e 1077 * htmlDocDump:
pcercuei 0:03b5121a232e 1078 * @f: the FILE*
pcercuei 0:03b5121a232e 1079 * @cur: the document
pcercuei 0:03b5121a232e 1080 *
pcercuei 0:03b5121a232e 1081 * Dump an HTML document to an open FILE.
pcercuei 0:03b5121a232e 1082 *
pcercuei 0:03b5121a232e 1083 * returns: the number of byte written or -1 in case of failure.
pcercuei 0:03b5121a232e 1084 */
pcercuei 0:03b5121a232e 1085 int
pcercuei 0:03b5121a232e 1086 htmlDocDump(FILE *f, xmlDocPtr cur) {
pcercuei 0:03b5121a232e 1087 xmlOutputBufferPtr buf;
pcercuei 0:03b5121a232e 1088 xmlCharEncodingHandlerPtr handler = NULL;
pcercuei 0:03b5121a232e 1089 const char *encoding;
pcercuei 0:03b5121a232e 1090 int ret;
pcercuei 0:03b5121a232e 1091
pcercuei 0:03b5121a232e 1092 xmlInitParser();
pcercuei 0:03b5121a232e 1093
pcercuei 0:03b5121a232e 1094 if ((cur == NULL) || (f == NULL)) {
pcercuei 0:03b5121a232e 1095 return(-1);
pcercuei 0:03b5121a232e 1096 }
pcercuei 0:03b5121a232e 1097
pcercuei 0:03b5121a232e 1098 encoding = (const char *) htmlGetMetaEncoding(cur);
pcercuei 0:03b5121a232e 1099
pcercuei 0:03b5121a232e 1100 if (encoding != NULL) {
pcercuei 0:03b5121a232e 1101 xmlCharEncoding enc;
pcercuei 0:03b5121a232e 1102
pcercuei 0:03b5121a232e 1103 enc = xmlParseCharEncoding(encoding);
pcercuei 0:03b5121a232e 1104 if (enc != cur->charset) {
pcercuei 0:03b5121a232e 1105 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
pcercuei 0:03b5121a232e 1106 /*
pcercuei 0:03b5121a232e 1107 * Not supported yet
pcercuei 0:03b5121a232e 1108 */
pcercuei 0:03b5121a232e 1109 return(-1);
pcercuei 0:03b5121a232e 1110 }
pcercuei 0:03b5121a232e 1111
pcercuei 0:03b5121a232e 1112 handler = xmlFindCharEncodingHandler(encoding);
pcercuei 0:03b5121a232e 1113 if (handler == NULL)
pcercuei 0:03b5121a232e 1114 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
pcercuei 0:03b5121a232e 1115 } else {
pcercuei 0:03b5121a232e 1116 handler = xmlFindCharEncodingHandler(encoding);
pcercuei 0:03b5121a232e 1117 }
pcercuei 0:03b5121a232e 1118 }
pcercuei 0:03b5121a232e 1119
pcercuei 0:03b5121a232e 1120 /*
pcercuei 0:03b5121a232e 1121 * Fallback to HTML or ASCII when the encoding is unspecified
pcercuei 0:03b5121a232e 1122 */
pcercuei 0:03b5121a232e 1123 if (handler == NULL)
pcercuei 0:03b5121a232e 1124 handler = xmlFindCharEncodingHandler("HTML");
pcercuei 0:03b5121a232e 1125 if (handler == NULL)
pcercuei 0:03b5121a232e 1126 handler = xmlFindCharEncodingHandler("ascii");
pcercuei 0:03b5121a232e 1127
pcercuei 0:03b5121a232e 1128 buf = xmlOutputBufferCreateFile(f, handler);
pcercuei 0:03b5121a232e 1129 if (buf == NULL) return(-1);
pcercuei 0:03b5121a232e 1130 htmlDocContentDumpOutput(buf, cur, NULL);
pcercuei 0:03b5121a232e 1131
pcercuei 0:03b5121a232e 1132 ret = xmlOutputBufferClose(buf);
pcercuei 0:03b5121a232e 1133 return(ret);
pcercuei 0:03b5121a232e 1134 }
pcercuei 0:03b5121a232e 1135
pcercuei 0:03b5121a232e 1136 /**
pcercuei 0:03b5121a232e 1137 * htmlSaveFile:
pcercuei 0:03b5121a232e 1138 * @filename: the filename (or URL)
pcercuei 0:03b5121a232e 1139 * @cur: the document
pcercuei 0:03b5121a232e 1140 *
pcercuei 0:03b5121a232e 1141 * Dump an HTML document to a file. If @filename is "-" the stdout file is
pcercuei 0:03b5121a232e 1142 * used.
pcercuei 0:03b5121a232e 1143 * returns: the number of byte written or -1 in case of failure.
pcercuei 0:03b5121a232e 1144 */
pcercuei 0:03b5121a232e 1145 int
pcercuei 0:03b5121a232e 1146 htmlSaveFile(const char *filename, xmlDocPtr cur) {
pcercuei 0:03b5121a232e 1147 xmlOutputBufferPtr buf;
pcercuei 0:03b5121a232e 1148 xmlCharEncodingHandlerPtr handler = NULL;
pcercuei 0:03b5121a232e 1149 const char *encoding;
pcercuei 0:03b5121a232e 1150 int ret;
pcercuei 0:03b5121a232e 1151
pcercuei 0:03b5121a232e 1152 if ((cur == NULL) || (filename == NULL))
pcercuei 0:03b5121a232e 1153 return(-1);
pcercuei 0:03b5121a232e 1154
pcercuei 0:03b5121a232e 1155 xmlInitParser();
pcercuei 0:03b5121a232e 1156
pcercuei 0:03b5121a232e 1157 encoding = (const char *) htmlGetMetaEncoding(cur);
pcercuei 0:03b5121a232e 1158
pcercuei 0:03b5121a232e 1159 if (encoding != NULL) {
pcercuei 0:03b5121a232e 1160 xmlCharEncoding enc;
pcercuei 0:03b5121a232e 1161
pcercuei 0:03b5121a232e 1162 enc = xmlParseCharEncoding(encoding);
pcercuei 0:03b5121a232e 1163 if (enc != cur->charset) {
pcercuei 0:03b5121a232e 1164 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
pcercuei 0:03b5121a232e 1165 /*
pcercuei 0:03b5121a232e 1166 * Not supported yet
pcercuei 0:03b5121a232e 1167 */
pcercuei 0:03b5121a232e 1168 return(-1);
pcercuei 0:03b5121a232e 1169 }
pcercuei 0:03b5121a232e 1170
pcercuei 0:03b5121a232e 1171 handler = xmlFindCharEncodingHandler(encoding);
pcercuei 0:03b5121a232e 1172 if (handler == NULL)
pcercuei 0:03b5121a232e 1173 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
pcercuei 0:03b5121a232e 1174 }
pcercuei 0:03b5121a232e 1175 }
pcercuei 0:03b5121a232e 1176
pcercuei 0:03b5121a232e 1177 /*
pcercuei 0:03b5121a232e 1178 * Fallback to HTML or ASCII when the encoding is unspecified
pcercuei 0:03b5121a232e 1179 */
pcercuei 0:03b5121a232e 1180 if (handler == NULL)
pcercuei 0:03b5121a232e 1181 handler = xmlFindCharEncodingHandler("HTML");
pcercuei 0:03b5121a232e 1182 if (handler == NULL)
pcercuei 0:03b5121a232e 1183 handler = xmlFindCharEncodingHandler("ascii");
pcercuei 0:03b5121a232e 1184
pcercuei 0:03b5121a232e 1185 /*
pcercuei 0:03b5121a232e 1186 * save the content to a temp buffer.
pcercuei 0:03b5121a232e 1187 */
pcercuei 0:03b5121a232e 1188 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
pcercuei 0:03b5121a232e 1189 if (buf == NULL) return(0);
pcercuei 0:03b5121a232e 1190
pcercuei 0:03b5121a232e 1191 htmlDocContentDumpOutput(buf, cur, NULL);
pcercuei 0:03b5121a232e 1192
pcercuei 0:03b5121a232e 1193 ret = xmlOutputBufferClose(buf);
pcercuei 0:03b5121a232e 1194 return(ret);
pcercuei 0:03b5121a232e 1195 }
pcercuei 0:03b5121a232e 1196
pcercuei 0:03b5121a232e 1197 /**
pcercuei 0:03b5121a232e 1198 * htmlSaveFileFormat:
pcercuei 0:03b5121a232e 1199 * @filename: the filename
pcercuei 0:03b5121a232e 1200 * @cur: the document
pcercuei 0:03b5121a232e 1201 * @format: should formatting spaces been added
pcercuei 0:03b5121a232e 1202 * @encoding: the document encoding
pcercuei 0:03b5121a232e 1203 *
pcercuei 0:03b5121a232e 1204 * Dump an HTML document to a file using a given encoding.
pcercuei 0:03b5121a232e 1205 *
pcercuei 0:03b5121a232e 1206 * returns: the number of byte written or -1 in case of failure.
pcercuei 0:03b5121a232e 1207 */
pcercuei 0:03b5121a232e 1208 int
pcercuei 0:03b5121a232e 1209 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
pcercuei 0:03b5121a232e 1210 const char *encoding, int format) {
pcercuei 0:03b5121a232e 1211 xmlOutputBufferPtr buf;
pcercuei 0:03b5121a232e 1212 xmlCharEncodingHandlerPtr handler = NULL;
pcercuei 0:03b5121a232e 1213 int ret;
pcercuei 0:03b5121a232e 1214
pcercuei 0:03b5121a232e 1215 if ((cur == NULL) || (filename == NULL))
pcercuei 0:03b5121a232e 1216 return(-1);
pcercuei 0:03b5121a232e 1217
pcercuei 0:03b5121a232e 1218 xmlInitParser();
pcercuei 0:03b5121a232e 1219
pcercuei 0:03b5121a232e 1220 if (encoding != NULL) {
pcercuei 0:03b5121a232e 1221 xmlCharEncoding enc;
pcercuei 0:03b5121a232e 1222
pcercuei 0:03b5121a232e 1223 enc = xmlParseCharEncoding(encoding);
pcercuei 0:03b5121a232e 1224 if (enc != cur->charset) {
pcercuei 0:03b5121a232e 1225 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
pcercuei 0:03b5121a232e 1226 /*
pcercuei 0:03b5121a232e 1227 * Not supported yet
pcercuei 0:03b5121a232e 1228 */
pcercuei 0:03b5121a232e 1229 return(-1);
pcercuei 0:03b5121a232e 1230 }
pcercuei 0:03b5121a232e 1231
pcercuei 0:03b5121a232e 1232 handler = xmlFindCharEncodingHandler(encoding);
pcercuei 0:03b5121a232e 1233 if (handler == NULL)
pcercuei 0:03b5121a232e 1234 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
pcercuei 0:03b5121a232e 1235 }
pcercuei 0:03b5121a232e 1236 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
pcercuei 0:03b5121a232e 1237 } else {
pcercuei 0:03b5121a232e 1238 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
pcercuei 0:03b5121a232e 1239 }
pcercuei 0:03b5121a232e 1240
pcercuei 0:03b5121a232e 1241 /*
pcercuei 0:03b5121a232e 1242 * Fallback to HTML or ASCII when the encoding is unspecified
pcercuei 0:03b5121a232e 1243 */
pcercuei 0:03b5121a232e 1244 if (handler == NULL)
pcercuei 0:03b5121a232e 1245 handler = xmlFindCharEncodingHandler("HTML");
pcercuei 0:03b5121a232e 1246 if (handler == NULL)
pcercuei 0:03b5121a232e 1247 handler = xmlFindCharEncodingHandler("ascii");
pcercuei 0:03b5121a232e 1248
pcercuei 0:03b5121a232e 1249 /*
pcercuei 0:03b5121a232e 1250 * save the content to a temp buffer.
pcercuei 0:03b5121a232e 1251 */
pcercuei 0:03b5121a232e 1252 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
pcercuei 0:03b5121a232e 1253 if (buf == NULL) return(0);
pcercuei 0:03b5121a232e 1254
pcercuei 0:03b5121a232e 1255 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
pcercuei 0:03b5121a232e 1256
pcercuei 0:03b5121a232e 1257 ret = xmlOutputBufferClose(buf);
pcercuei 0:03b5121a232e 1258 return(ret);
pcercuei 0:03b5121a232e 1259 }
pcercuei 0:03b5121a232e 1260
pcercuei 0:03b5121a232e 1261 /**
pcercuei 0:03b5121a232e 1262 * htmlSaveFileEnc:
pcercuei 0:03b5121a232e 1263 * @filename: the filename
pcercuei 0:03b5121a232e 1264 * @cur: the document
pcercuei 0:03b5121a232e 1265 * @encoding: the document encoding
pcercuei 0:03b5121a232e 1266 *
pcercuei 0:03b5121a232e 1267 * Dump an HTML document to a file using a given encoding
pcercuei 0:03b5121a232e 1268 * and formatting returns/spaces are added.
pcercuei 0:03b5121a232e 1269 *
pcercuei 0:03b5121a232e 1270 * returns: the number of byte written or -1 in case of failure.
pcercuei 0:03b5121a232e 1271 */
pcercuei 0:03b5121a232e 1272 int
pcercuei 0:03b5121a232e 1273 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
pcercuei 0:03b5121a232e 1274 return(htmlSaveFileFormat(filename, cur, encoding, 1));
pcercuei 0:03b5121a232e 1275 }
pcercuei 0:03b5121a232e 1276
pcercuei 0:03b5121a232e 1277 #endif /* LIBXML_OUTPUT_ENABLED */
pcercuei 0:03b5121a232e 1278
pcercuei 0:03b5121a232e 1279 #define bottom_HTMLtree
pcercuei 0:03b5121a232e 1280 #include "elfgcchack.h"
pcercuei 0:03b5121a232e 1281 #endif /* LIBXML_HTML_ENABLED */
pcercuei 0:03b5121a232e 1282