Skip to content
This repository has been archived by the owner on Apr 26, 2020. It is now read-only.

Load HTML as UTF-8 #9

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion phpQuery/phpQuery/DOMDocumentWrapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ protected function loadMarkupHTML($markup, $requestedCharset = null) {
// Document Encoding Conversion
// http://code.google.com/p/phpquery/issues/detail?id=86
if (function_exists('mb_detect_encoding')) {
$possibleCharsets = array($documentCharset, $requestedCharset, 'AUTO');
$possibleCharsets = array($requestedCharset, $documentCharset, 'AUTO');
$docEncoding = mb_detect_encoding($markup, implode(', ', $possibleCharsets));
if (! $docEncoding)
$docEncoding = $documentCharset; // ok trust the document
Expand Down Expand Up @@ -194,9 +194,25 @@ protected function loadMarkupHTML($markup, $requestedCharset = null) {
}
phpQuery::debug("Full markup load (HTML), documentCreate('$charset')");
$this->documentCreate($charset);
if ($charset === 'utf-8') {
// Hack to load HTML as UTF-8
$markup = '<?xml encoding="UTF-8">' . $markup;
}
$return = phpQuery::$debug === 2
? $this->document->loadHTML($markup)
: @$this->document->loadHTML($markup);
if ($charset === 'utf-8') {
// Remove hack
foreach ($this->document->childNodes as $item) {
if ($item->nodeType == XML_PI_NODE) {
// remove hack
$this->document->removeChild($item);
}
}
// insert proper
$this->document->encoding = 'UTF-8';
// End hack
}
if ($return)
$this->root = $this->document;
}
Expand Down
2 changes: 1 addition & 1 deletion phpQuery/phpQuery/Zend/Json/Decoder.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
*/

/**
* @see Zend_Json
* @see Zend_Json
*/
require_once 'Zend/Json.php';

Expand Down
8 changes: 4 additions & 4 deletions test-cases/run.php
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<?php
<?php
// TODO move them into directory
include 'test_1.php';
include 'test_2.php';
include 'test_1.php';
include 'test_2.php';
include 'test_3.php';
include 'test_4.php';
include 'test_5.php';
include 'test_wrap.php';
include 'test_replace.php';
include 'test_multidoc.php';
include 'test_multidoc.php';
?>
22 changes: 11 additions & 11 deletions test-cases/test_2.php
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
<?php
require_once('../phpQuery/phpQuery.php');
require_once('../phpQuery/phpQuery.php');
phpQuery::$debug = true;

$testName = 'Filter with pseudoclass';

$testName = 'Filter with pseudoclass';
$testResult = array(
'p.body',
);
$result = phpQuery::newDocumentFile('test.html');
$result = $result->find('p')
->filter('.body:gt(1)');
if ( $result->whois() == $testResult )
print "Test '{$testName}' PASSED :)";
else
);
$result = phpQuery::newDocumentFile('test.html');
$result = $result->find('p')
->filter('.body:gt(1)');
if ( $result->whois() == $testResult )
print "Test '{$testName}' PASSED :)";
else
print "Test '{$testName}' <strong>FAILED</strong> !!! ";
print_r($result->whois());
print_r($result->whois());
print "\n";


Expand Down
12 changes: 6 additions & 6 deletions test-cases/test_document.php
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
<?php
//error_reporting(E_ALL);
require_once('../phpQuery/phpQuery.php');
require_once('../phpQuery/phpQuery.php');
phpQuery::$debug = true;


$testName = 'HTML document load';
$doc = phpQuery::newDocumentFile('test.html');
print $doc->find('li:first')->html('foo <p>bar</p> foo <b><i>foo</i</b>')->html();
die();
$testResult = 10;
if ($doc->script('example', 'p')->length == $testResult)
print "Test '$testName' PASSED :)";
else {
$testResult = 10;
if ($doc->script('example', 'p')->length == $testResult)
print "Test '$testName' PASSED :)";
else {
print "Test '$testName' <strong>FAILED</strong> !!! ";
print "<pre>";
var_dump($doc->whois());
var_dump($doc->whois());
print "</pre>\n";
}
print "\n";
14 changes: 7 additions & 7 deletions test-cases/test_scripts.php
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
<?php
//error_reporting(E_ALL);
require_once('../phpQuery/phpQuery.php');
require_once('../phpQuery/phpQuery.php');
phpQuery::$debug = true;
phpQuery::plugin('Scripts');


$testName = 'Scripts/example';
$doc = phpQuery::newDocumentFile('test.html');
$testResult = 10;
if ($doc->script('example', 'p')->length == $testResult)
print "Test '$testName' PASSED :)";
else {
$testResult = 10;
if ($doc->script('example', 'p')->length == $testResult)
print "Test '$testName' PASSED :)";
else {
print "Test '$testName' <strong>FAILED</strong> !!! ";
print "<pre>";
var_dump($doc->whois());
var_dump($doc->whois());
print "</pre>\n";
}
print "\n";
Expand All @@ -34,5 +34,5 @@
var_dump($doc->whois());
print "</pre>\n";
}
print "\n";
print "\n";
?>
206 changes: 103 additions & 103 deletions test-cases/test_selectors.php
Original file line number Diff line number Diff line change
@@ -1,91 +1,91 @@
<?php
require_once('../phpQuery/phpQuery.php');
phpQuery::$debug = true;
$testName = 'Selectors';
$tests = array(
array(
'div:first',
array(
'div.articles',
)
),
array(
"p:contains('title')",
array(
'p.title',
'p.title',
'p.noTitle',
)
),
array(
"p:contains('title 2')",
array(
'p.title',
)
),
array(
'li:eq(1)',
array(
'li#testID',
)
),
array(
'li:eq(1) p:eq(1)',
array(
'p.title',
)
),
array(
'*[rel="test"]',
array(
'p',
'p'
)
),
array(
'#testID p:first',
array(
'p'
)
),
array(
"p:not('.title'):not('.body')",
array(
'p',
'p',
'p',
'p.noTitle',
'p.after',
)
),
array(
"[content*=html]",
array(
'meta'
)
),
array(
"li#testID, div.articles",
array(
'li#testID',
'div.articles'
)
),
array(
"script[src]:not([src^=<?php])",
array(
'script'
)
),
// array(
// 'li:not([ul/li])',
// array(
// 'li',
// 'li#testID',
// 'li',
// 'li.nested',
// 'li.second',
// )
require_once('../phpQuery/phpQuery.php');
phpQuery::$debug = true;
$testName = 'Selectors';
$tests = array(
array(
'div:first',
array(
'div.articles',
)
),
array(
"p:contains('title')",
array(
'p.title',
'p.title',
'p.noTitle',
)
),
array(
"p:contains('title 2')",
array(
'p.title',
)
),
array(
'li:eq(1)',
array(
'li#testID',
)
),
array(
'li:eq(1) p:eq(1)',
array(
'p.title',
)
),
array(
'*[rel="test"]',
array(
'p',
'p'
)
),
array(
'#testID p:first',
array(
'p'
)
),
array(
"p:not('.title'):not('.body')",
array(
'p',
'p',
'p',
'p.noTitle',
'p.after',
)
),
array(
"[content*=html]",
array(
'meta'
)
),
array(
"li#testID, div.articles",
array(
'li#testID',
'div.articles'
)
),
array(
"script[src]:not([src^=<?php])",
array(
'script'
)
),
// array(
// 'li:not([ul/li])',
// array(
// 'li',
// 'li#testID',
// 'li',
// 'li.nested',
// 'li.second',
// )
// ),
array(
'li:has(ul)',
Expand Down Expand Up @@ -270,21 +270,21 @@
// array(
// '',
// )
// ),
);
phpQuery::newDocumentFile('test.html');
foreach( $tests as $k => $test ) {
$tests[ $k ][2] = pq( $test[0] )->whois();
}
foreach( $tests as $test ) {
if ( $test[1] == $test[2] )
print "Test '{$test[0]}' PASSED :)";
else {
print "Test '{$test[0]}' <strong>FAILED</strong> !!!";
print_r($test[2]);
}
print "<br /><br />";
// ),
);

phpQuery::newDocumentFile('test.html');
foreach( $tests as $k => $test ) {
$tests[ $k ][2] = pq( $test[0] )->whois();
}
foreach( $tests as $test ) {
if ( $test[1] == $test[2] )
print "Test '{$test[0]}' PASSED :)";
else {
print "Test '{$test[0]}' <strong>FAILED</strong> !!!";
print_r($test[2]);
}
print "<br /><br />";
}

//
Expand All @@ -298,5 +298,5 @@
else
print "Test '{$testName}' <strong>FAILED</strong> !!! ";
$result->dump();
print "\n";
print "\n";
?>