HTMLCleaner.php
2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
<?php
/**
* @package framework
* @subpackage misc
*/
/**
* Base class for HTML cleaning implementations.
*/
abstract class HTMLCleaner extends Object {
/**
* @var array
*/
protected $defaultConfig = array();
/**
* @var $config Array configuration variables for HTMLCleaners that support configuration (like Tidy)
*/
public $config;
/**
* @param Array The configuration for the cleaner, if necessary
*/
public function __construct($config = null) {
if ($config) $this->config = array_merge($this->defaultConfig, $config);
else $this->config = $this->defaultConfig;
}
/**
* @param Array
*/
public function setConfig($config) {
$this->config = $config;
}
/**
* @return Array
*/
public function getConfig() {
return $this->config;
}
/**
* Passed a string, return HTML that has been tidied.
*
* @param String HTML
* @return String HTML, tidied
*/
abstract public function cleanHTML($content);
/**
* Experimental inst class to create a default html cleaner class
*
* @return PurifierHTMLCleaner|TidyHTMLCleaner
*/
public static function inst() {
if (class_exists('HTMLPurifier')) return new PurifierHTMLCleaner();
elseif (class_exists('tidy')) return new TidyHTMLCleaner();
}
}
/**
* Cleans HTML using the HTMLPurifier package
* http://htmlpurifier.org/
*/
class PurifierHTMLCleaner extends HTMLCleaner {
public function cleanHTML($content) {
$html = new HTMLPurifier();
$doc = Injector::inst()->create('HTMLValue', $html->purify($content));
return $doc->getContent();
}
}
/**
* Cleans HTML using the Tidy package
* http://php.net/manual/en/book.tidy.php
*/
class TidyHTMLCleaner extends HTMLCleaner {
protected $defaultConfig = array(
'clean' => true,
'output-xhtml' => true,
'show-body-only' => true,
'wrap' => 0,
'doctype' => 'omit',
'input-encoding' => 'utf8',
'output-encoding' => 'utf8'
);
public function cleanHTML($content) {
$tidy = new tidy();
$output = $tidy->repairString($content, $this->config);
// Clean leading/trailing whitespace
return preg_replace('/(^\s+)|(\s+$)/', '', $output);
}
}