Fixed casper.getPageContent() for Gecko
Gecko encloses non HTML content in an HTML web page, like Webkit, but with some differences.
Showing
1 changed file
with
16 additions
and
4 deletions
... | @@ -915,17 +915,29 @@ Casper.prototype.getPageContent = function getPageContent() { | ... | @@ -915,17 +915,29 @@ Casper.prototype.getPageContent = function getPageContent() { |
915 | if (!utils.isString(contentType)) { | 915 | if (!utils.isString(contentType)) { |
916 | return this.page.frameContent; | 916 | return this.page.frameContent; |
917 | } | 917 | } |
918 | // for some reason (qt)webkit will always enclose non text/html body contents within an html | 918 | // for some reason (qt)webkit/Gecko will always enclose non text/html body contents within an html |
919 | // structure like this: | 919 | // structure like this: |
920 | // <html><head></head><body><pre style="(...)">content</pre></body></html> | 920 | // webkit: <html><head></head><body><pre style="(...)">content</pre></body></html> |
921 | // gecko: <html><head><link rel="alternate stylesheet" type="text/css" href="resource://gre-resources/plaintext.css" title="..."></head><body><pre>document.write('foo');\n</pre></body></html> | ||
921 | var sanitizedHtml = this.evaluate(function checkHtml() { | 922 | var sanitizedHtml = this.evaluate(function checkHtml() { |
922 | var head = __utils__.findOne('head'), | 923 | var head = __utils__.findOne('head'), |
923 | body = __utils__.findOne('body'); | 924 | body = __utils__.findOne('body'); |
924 | if (head && head.childNodes.length === 0 && | 925 | if (!head || !body) { |
925 | body && body.childNodes.length === 1 && | 926 | return null; |
927 | } | ||
928 | if (head.childNodes.length == 0 && | ||
929 | body.childNodes.length === 1 && | ||
926 | __utils__.findOne('body pre[style]')) { | 930 | __utils__.findOne('body pre[style]')) { |
927 | return __utils__.findOne('body pre').textContent.trim(); | 931 | return __utils__.findOne('body pre').textContent.trim(); |
928 | } | 932 | } |
933 | if (head.childNodes.length === 1 && | ||
934 | body.childNodes.length === 1 && | ||
935 | head.childNodes[0].localName == 'link' && | ||
936 | head.childNodes[0].getAttribute('href') == 'resource://gre-resources/plaintext.css' && | ||
937 | body.childNodes[0].localName == 'pre' ) { | ||
938 | return body.childNodes[0].textContent.trim(); | ||
939 | } | ||
940 | return null; | ||
929 | }); | 941 | }); |
930 | return sanitizedHtml ? sanitizedHtml : this.page.frameContent; | 942 | return sanitizedHtml ? sanitizedHtml : this.page.frameContent; |
931 | }; | 943 | }; | ... | ... |
-
Please register or sign in to post a comment