Commit 5c0c7083 5c0c70836cd7debe0734beef2ad67567befa9e4e by Laurent Jouanneau

Fixed casper.getPageContent() for Gecko

Gecko encloses non HTML content in an HTML web page, like Webkit,
but with some differences.
1 parent 4a8e640e
......@@ -915,17 +915,29 @@ Casper.prototype.getPageContent = function getPageContent() {
if (!utils.isString(contentType)) {
return this.page.frameContent;
}
// for some reason (qt)webkit will always enclose non text/html body contents within an html
// for some reason (qt)webkit/Gecko will always enclose non text/html body contents within an html
// structure like this:
// <html><head></head><body><pre style="(...)">content</pre></body></html>
// webkit: <html><head></head><body><pre style="(...)">content</pre></body></html>
// gecko: <html><head><link rel="alternate stylesheet" type="text/css" href="resource://gre-resources/plaintext.css" title="..."></head><body><pre>document.write('foo');\n</pre></body></html>
var sanitizedHtml = this.evaluate(function checkHtml() {
var head = __utils__.findOne('head'),
body = __utils__.findOne('body');
if (head && head.childNodes.length === 0 &&
body && body.childNodes.length === 1 &&
if (!head || !body) {
return null;
}
if (head.childNodes.length == 0 &&
body.childNodes.length === 1 &&
__utils__.findOne('body pre[style]')) {
return __utils__.findOne('body pre').textContent.trim();
}
if (head.childNodes.length === 1 &&
body.childNodes.length === 1 &&
head.childNodes[0].localName == 'link' &&
head.childNodes[0].getAttribute('href') == 'resource://gre-resources/plaintext.css' &&
body.childNodes[0].localName == 'pre' ) {
return body.childNodes[0].textContent.trim();
}
return null;
});
return sanitizedHtml ? sanitizedHtml : this.page.frameContent;
};
......