fixes #178 - added Casper.getPageContent()

Extracts and returns the raw body contents for latest retrived page. Sample script: ```javascript var casper = require('casper').create(); casper.start().then(function() { this.open('http://search.twitter.com/search.json?q=casperjs', { method: 'get', headers: { 'Accept': 'application/json' } }); }); casper.then(function() { require('utils').dump(JSON.parse(this.getPageContent())); }); casper.run(function() { this.exit(); }); ```

fixes #178 - added Casper.getPageContent()
Extracts and returns the raw body contents for latest retrived page. Sample script: ```javascript var casper = require('casper').create(); casper.start().then(function() { this.open('http://search.twitter.com/search.json?q=casperjs', { method: 'get', headers: { 'Accept': 'application/json' } }); }); casper.then(function() { require('utils').dump(JSON.parse(this.getPageContent())); }); casper.run(function() { this.exit(); }); ```
Nicolas Perriault
Commit 76e0b5c1 ... 76e0b5c18b247e92230cffe6ee1dfbc0307c9c96 authored 2012-07-11 19:02:36 +0200 by Nicolas Perriault
Showing 2 changed files with 25 additions and 1 deletions
CHANGELOG.md
modules/casper.js
--- a/CHANGELOG.md
View file @76e0b5c
+++ b/CHANGELOG.md
View file @76e0b5c
@@ -4,8 +4,9 @@ CasperJS Changelog
 XXXX-XX-XX, v1.0.0
 ------------------

+- fixed [#178](https://github.com/n1k0/casperjs/issues/178) - added `Casper.getPageContent()` to access raw page body contents on non-html received content-types.
 - fixed [#164](https://github.com/n1k0/casperjs/issues/164) - ability to force CLI parameters as strings (see [related documentation](http://casperjs.org/cli.html#raw)).
- fixed [#153](https://github.com/n1k0/casperjs/issues/153) - erroneous mouse event results when event.preventDefault() was used
+- fixed [#153](https://github.com/n1k0/casperjs/issues/153) - erroneous mouse event results when event.preventDefault() was used.

 2012-06-26, v1.0.0-RC1
 ----------------------
--- a/modules/casper.js
View file @76e0b5c
+++ b/modules/casper.js
View file @76e0b5c
@@ -695,6 +695,29 @@ Casper.prototype.getColorizer = function getColorizer() {
 };

 /**
+ * Retrieves current page contents, dealing with exotic other content types than HTML.
+ *
+ * @return String
+ */
+Casper.prototype.getPageContent = function getPageContent() {
+    "use strict";
+    if (!this.started) {
+        throw new CasperError("Casper not started, can't getPageContent()");
+    }
+    var contentType = utils.getPropertyPath(this, 'currentResponse.contentType');
+    if (!utils.isString(contentType)) {
+        return this.page.content;
+    }
+    // for some reason webkit/qtwebkit will always enclose body contents within html tags
+    var match = (new RegExp('^<html><head></head><body><pre.+?>(.*)</pre></body></html>$')).exec(this.page.content);
+    if (!match) {
+        // Non-HTML response
+        return this.page.content;
+    }
+    return match[1];
+};
+
+/**
 * Retrieves current document url.
 *
 * @return String