Casperjs is a great tool. I have been able to understand certain basics. However, I am trying to tackle two issues. First, I split my code into several functions for closure reasons. I am getting a parse error. I have added the option to verbose: true, logLevel: "debug"
and see if it points out a line number but I get no results. Second, I want to make an ajax call, which are async. I am using waitFor
but I read THIS article for a more efficient way. How can I set waitForResource()
to track AJAX requests? and how to display parse error lines?
CODE
var urls = ['/'];
var casper = require('casper').create({
verbose: true,
logLevel: "debug"
});
function getNumberOfItems(casper) {
return casper.getElementsInfo(".listview .badge-grid-item").length;
}
function tryAndScroll(casper) {
casper.page.scrollPosition = { top: casper.page.scrollPosition["top"] + 4000, left: 0 };
var info = casper.getElementInfo('.badge-post-grid-load-more');
if (info.visible) {
var curItems = getNumberOfItems(casper);
if( curItems <= 60 ) {
casper.waitFor(function check(){
return curItems != getNumberOfItems(casper);
}, function then(){
tryAndScroll(this);
}, function onTimeout(){
this.echo("Timout reached");
}, 10000);
}
} else {
casper.echo("no more items");
}
}
function bineArrays(x, y) {
var result = [];
for(var i = 0, i < page_links.length; i++) {
result[i] = {};
result[i].page_link = x[i];
result[i].video_link = y[i];
}
return result;
}
function linkScraper(x){
var page_links = [];
var youtube = [];
for (var i = 0; i < x.length; i++)
{ // start for loop
casper.thenOpen(x[i], function() {
//Scroll down for elements
tryAndScroll(this);
casper.then(function() {
this.getElementsInfo('.title').forEach(function(element) {
// skip elements that don't have a href attribute...
if (!element.attributes.href) {
return;
}
page_links.push( element["attributes"]["href"] );
casper.thenOpen(element.attributes.href, function() {
this.click('.responsivewrapper');
}).then(function(){
casper.each(this.getElementsInfo('.badge-youtube-player'), function(casper, element, j) {
youtube.push( element["attributes"]["src"] );
});
})localhost;
});
});
});
}
return bineArrays(page_links,youtube);
}
function stringifyResult(webpages){
//Pass link array to linkScraper
var linksArr = linkScraper.call(this, webpages);
//send results to php page
server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
this.waitFor( function() {
response = this.evaluate(function() {
$.ajax({
type: "POST",
url: server,
data: JSON.stringify(linksArr),
//dataType: 'json',
contentType: "application/json",
success: function (data) {
this.echo("All done.");
return this.exit();
//return data.responseText;
},
error: function (xhr,status,error){
return this.echo(error);
}
});
});
});
}
casper.start().then(function() {
this.echo("Starting");
});
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X)');
casper.run(stringifyResult.call(this, urls));
Casperjs is a great tool. I have been able to understand certain basics. However, I am trying to tackle two issues. First, I split my code into several functions for closure reasons. I am getting a parse error. I have added the option to verbose: true, logLevel: "debug"
and see if it points out a line number but I get no results. Second, I want to make an ajax call, which are async. I am using waitFor
but I read THIS article for a more efficient way. How can I set waitForResource()
to track AJAX requests? and how to display parse error lines?
CODE
var urls = ['http://9gag.tv/'];
var casper = require('casper').create({
verbose: true,
logLevel: "debug"
});
function getNumberOfItems(casper) {
return casper.getElementsInfo(".listview .badge-grid-item").length;
}
function tryAndScroll(casper) {
casper.page.scrollPosition = { top: casper.page.scrollPosition["top"] + 4000, left: 0 };
var info = casper.getElementInfo('.badge-post-grid-load-more');
if (info.visible) {
var curItems = getNumberOfItems(casper);
if( curItems <= 60 ) {
casper.waitFor(function check(){
return curItems != getNumberOfItems(casper);
}, function then(){
tryAndScroll(this);
}, function onTimeout(){
this.echo("Timout reached");
}, 10000);
}
} else {
casper.echo("no more items");
}
}
function bineArrays(x, y) {
var result = [];
for(var i = 0, i < page_links.length; i++) {
result[i] = {};
result[i].page_link = x[i];
result[i].video_link = y[i];
}
return result;
}
function linkScraper(x){
var page_links = [];
var youtube = [];
for (var i = 0; i < x.length; i++)
{ // start for loop
casper.thenOpen(x[i], function() {
//Scroll down for elements
tryAndScroll(this);
casper.then(function() {
this.getElementsInfo('.title').forEach(function(element) {
// skip elements that don't have a href attribute...
if (!element.attributes.href) {
return;
}
page_links.push( element["attributes"]["href"] );
casper.thenOpen(element.attributes.href, function() {
this.click('.responsivewrapper');
}).then(function(){
casper.each(this.getElementsInfo('.badge-youtube-player'), function(casper, element, j) {
youtube.push( element["attributes"]["src"] );
});
})localhost;
});
});
});
}
return bineArrays(page_links,youtube);
}
function stringifyResult(webpages){
//Pass link array to linkScraper
var linksArr = linkScraper.call(this, webpages);
//send results to php page
server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
this.waitFor( function() {
response = this.evaluate(function() {
$.ajax({
type: "POST",
url: server,
data: JSON.stringify(linksArr),
//dataType: 'json',
contentType: "application/json",
success: function (data) {
this.echo("All done.");
return this.exit();
//return data.responseText;
},
error: function (xhr,status,error){
return this.echo(error);
}
});
});
});
}
casper.start().then(function() {
this.echo("Starting");
});
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X)');
casper.run(stringifyResult.call(this, urls));
Share
Improve this question
edited Mar 19, 2015 at 12:57
Artjom B.
62k26 gold badges135 silver badges230 bronze badges
asked Mar 18, 2015 at 23:01
MaryCodingMaryCoding
6642 gold badges9 silver badges36 bronze badges
0
1 Answer
Reset to default 7There are many problems with your code.
1. Syntax error
CasperJS and PhantomJS itself won't show you where the syntax error is, but it is probably in line for(var i = 0, i < page_links.length; i++) {
. Change ,
to ;
. And page_links
is not defined in this line. You probably meant x
.
For the future: CasperJS scripts are plain JavaScript. You can use online tools such as jslint. to find such bugs (and other problems with your code).
There is also a misplaced localhost
.
2. this
this
has many meanings depending on where it is placed.
a) Global this
Take for example the very last line:
casper.run(stringifyResult.call(this, urls));
This this
is outside of all functions. So it refers to window
(yes there are two separate window
objects, one inside of the page context and one outside). It doesn't refer to casper
which you probably expected. Use:
casper.run(stringifyResult.call(casper, urls));
(^ non-final code: see 6.)
b) jQuery callback
The this
inside of:
success: function (data) {
this.echo("All done.");
return this.exit();
},
refers to the jqXHR
object of jQuery (btw, you also cannot return something from an asynchronous function inside a function). It has nothing to do with CasperJS. Furthermore, it is impossible to call CasperJS functions from the page context (inside of casper.evaluate()
), because the page context is sandboxed. It has no access to variables defined outside (this includes linksArr
and server
). See this for more information.
3. Unnecessary waitFor
Your waitFor
will never finish, but you probably designed it this way so that your AJAX request is sent. The problem is that the same broken request will be sent every 20 milliseconds.
4. Sending AJAX request
CasperJS provides a utility to send AJAX requests in the page context: __utils__.sendAJAX()
. It also blocks the execution by default, so there is no need to wait for the request in a out-of-bound fashion.
function stringifyResult(webpages){
var linksArr = linkScraper.call(this, webpages);
//send results to php page
server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
this.evaluate(function(server, linksArr){
__utils__.sendAJAX(server, "POST", JSON.stringify(linksArr));
}, server, linksArr);
}
(^ non-final code: see 5.)
5. Returning from asynchronous function
All then*
and wait*
CasperJS functions are asynchronous. By calling them, you schedule the associated step to be executed at the end of the current step.
It means that you cannot return something from linkScraper
, because it contains asynchronous code. Or at least you cannot return the final results, because they are populated by the asynchronous code. You can however return the arrays which will eventually contain the result.
One possible fix, would be to move the bine call outside of linkScraper
:
function linkScraper(x){
var page_links = [];
var youtube = [];
// here are asynchronous calls
return {pl: page_links, yt: youtube};
}
function stringifyResult(webpages){
var linksObj = linkScraper.call(this, webpages);
// here linksObj contains empty lists
this.then(function(){
// here linksObj contains populated lists
var linksArr = bineArrays(linksObj.pl, linksObj.yt);
server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
this.evaluate(function(server, linksArr){
__utils__.sendAJAX(server, "POST", JSON.stringify(linksArr));
}, server, linksArr);
});
}
6. exit
casper.exit()
is immediate. So when you call it, the execution stops. You have to be careful when to call it, because it may be the case that there are still steps scheduled.
There is a way to avoid calling casper.exit()
by not providing a callback to casper.run()
. If you provide such a callback, there will be no automatic exit when all steps are executed.
casper.then(stringifyResult.call(casper, urls)).run();