HTMLValue.php
4.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
<?php
/**
* This class handles the converting of HTML fragments between a string and a DOMDocument based
* representation.
*
* It's designed to allow dependancy injection to replace the standard HTML4 version with one that
* handles XHTML or HTML5 instead
*
* @package framework
* @subpackage integration
*/
abstract class SS_HTMLValue extends ViewableData {
public function __construct($fragment = null) {
if ($fragment) $this->setContent($fragment);
parent::__construct();
}
abstract public function setContent($fragment);
/**
* @param string $content
* @return string
*/
public function getContent() {
$doc = clone $this->getDocument();
$xp = new DOMXPath($doc);
// If there's no body, the content is empty string
if (!$doc->getElementsByTagName('body')->length) return '';
// saveHTML Percentage-encodes any URI-based attributes. We don't want this, since it interferes with
// shortcodes. So first, save all the attribute values for later restoration.
$attrs = array(); $i = 0;
foreach ($xp->query('//body//@*') as $attr) {
$key = "__HTMLVALUE_".($i++);
$attrs[$key] = $attr->value;
$attr->value = $key;
}
// Then, call saveHTML & extract out the content from the body tag
$res = preg_replace(
array(
'/^(.*?)<body>/is',
'/<\/body>(.*?)$/isD',
),
'',
$doc->saveHTML()
);
// Then replace the saved attributes with their original versions
$res = preg_replace_callback('/__HTMLVALUE_(\d+)/', function($matches) use ($attrs) {
return Convert::raw2att($attrs[$matches[0]]);
}, $res);
return $res;
}
/** @see HTMLValue::getContent() */
public function forTemplate() {
return $this->getContent();
}
/** @var DOMDocument */
private $document = null;
/** @var bool */
private $valid = true;
/**
* Get the DOMDocument for the passed content
* @return DOMDocument | false - Return false if HTML not valid, the DOMDocument instance otherwise
*/
public function getDocument() {
if (!$this->valid) {
return false;
}
else if ($this->document) {
return $this->document;
}
else {
$this->document = new DOMDocument('1.0', 'UTF-8');
$this->document->strictErrorChecking = false;
$this->document->formatOutput = false;
return $this->document;
}
}
/**
* Is this HTMLValue in an errored state?
* @return bool
*/
public function isValid() {
return $this->valid;
}
/**
* @param DOMDocument $document
*/
public function setDocument($document) {
$this->document = $document;
$this->valid = true;
}
public function setInvalid() {
$this->document = $this->valid = false;
}
/**
* Pass through any missed method calls to DOMDocument (if they exist)
* so that HTMLValue can be treated mostly like an instance of DOMDocument
*/
public function __call($method, $arguments) {
$doc = $this->getDocument();
if(method_exists($doc, $method)) {
return call_user_func_array(array($doc, $method), $arguments);
}
else {
return parent::__call($method, $arguments);
}
}
/**
* Get the body element, or false if there isn't one (we haven't loaded any content
* or this instance is in an invalid state)
*/
public function getBody() {
$doc = $this->getDocument();
if (!$doc) return false;
$body = $doc->getElementsByTagName('body');
if (!$body->length) return false;
return $body->item(0);
}
/**
* Make an xpath query against this HTML
*
* @param $query string - The xpath query string
* @return DOMNodeList
*/
public function query($query) {
$xp = new DOMXPath($this->getDocument());
return $xp->query($query);
}
}
class SS_HTML4Value extends SS_HTMLValue {
/**
* @param string $content
* @return bool
*/
public function setContent($content) {
// Ensure that \r (carriage return) characters don't get replaced with " " entity by DOMDocument
// This behaviour is apparently XML spec, but we don't want this because it messes up the HTML
$content = str_replace(chr(13), '', $content);
// Reset the document if we're in an invalid state for some reason
if (!$this->isValid()) $this->setDocument(null);
$errorState = libxml_use_internal_errors(true);
$result = $this->getDocument()->loadHTML(
'<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head>' .
"<body>$content</body></html>"
);
libxml_clear_errors();
libxml_use_internal_errors($errorState);
return $result;
}
}