最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - Casperjs ajax calls - waitForResource and parse errors - Stack Overflow

programmeradmin6浏览0评论

Casperjs is a great tool. I have been able to understand certain basics. However, I am trying to tackle two issues. First, I split my code into several functions for closure reasons. I am getting a parse error. I have added the option to verbose: true, logLevel: "debug" and see if it points out a line number but I get no results. Second, I want to make an ajax call, which are async. I am using waitFor but I read THIS article for a more efficient way. How can I set waitForResource() to track AJAX requests? and how to display parse error lines?

CODE

var urls = ['/'];
var casper = require('casper').create({
    verbose: true,
    logLevel: "debug"
});

function getNumberOfItems(casper) {
    return casper.getElementsInfo(".listview .badge-grid-item").length;
}

function tryAndScroll(casper) {
  casper.page.scrollPosition = { top: casper.page.scrollPosition["top"] + 4000, left: 0 };
  var info = casper.getElementInfo('.badge-post-grid-load-more');
  if (info.visible) {
    var curItems = getNumberOfItems(casper);
    if( curItems <= 60 ) {
        casper.waitFor(function check(){
          return curItems != getNumberOfItems(casper);
        }, function then(){
          tryAndScroll(this);
        }, function onTimeout(){
          this.echo("Timout reached");
        }, 10000);
    }
  } else {
    casper.echo("no more items");
  }

}

function bineArrays(x, y) {
    var result = [];
    for(var i = 0, i < page_links.length; i++) {
        result[i] = {};
        result[i].page_link = x[i];
        result[i].video_link = y[i];
    }
    return result;
}

function linkScraper(x){
    var page_links = [];
    var youtube = [];
    for (var i = 0; i < x.length; i++)
    { // start for loop
        casper.thenOpen(x[i], function() {
            //Scroll down for elements
            tryAndScroll(this);
            casper.then(function() {
              this.getElementsInfo('.title').forEach(function(element) {
                // skip elements that don't have a href attribute...
                if (!element.attributes.href) {
                  return;
                }
                page_links.push( element["attributes"]["href"] );
                casper.thenOpen(element.attributes.href, function() {
                  this.click('.responsivewrapper');
                }).then(function(){
                  casper.each(this.getElementsInfo('.badge-youtube-player'), function(casper, element, j) {
                    youtube.push( element["attributes"]["src"] );
                  });
                })localhost;
              });
            });
        });
    }
    return bineArrays(page_links,youtube);
}

function stringifyResult(webpages){
    //Pass link array to linkScraper
    var linksArr = linkScraper.call(this, webpages);
    //send results to php page
     server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
     this.waitFor( function() {
         response = this.evaluate(function() {
            $.ajax({
                type: "POST",
                url: server,
                data: JSON.stringify(linksArr),
                //dataType: 'json',
                contentType: "application/json",
                success: function (data) {
                    this.echo("All done.");
                    return this.exit();
                    //return data.responseText;
                },
                error: function (xhr,status,error){
                    return this.echo(error);
                }
            });
         });
     });
}



casper.start().then(function() {
    this.echo("Starting");
});
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X)');
casper.run(stringifyResult.call(this, urls));

Casperjs is a great tool. I have been able to understand certain basics. However, I am trying to tackle two issues. First, I split my code into several functions for closure reasons. I am getting a parse error. I have added the option to verbose: true, logLevel: "debug" and see if it points out a line number but I get no results. Second, I want to make an ajax call, which are async. I am using waitFor but I read THIS article for a more efficient way. How can I set waitForResource() to track AJAX requests? and how to display parse error lines?

CODE

var urls = ['http://9gag.tv/'];
var casper = require('casper').create({
    verbose: true,
    logLevel: "debug"
});

function getNumberOfItems(casper) {
    return casper.getElementsInfo(".listview .badge-grid-item").length;
}

function tryAndScroll(casper) {
  casper.page.scrollPosition = { top: casper.page.scrollPosition["top"] + 4000, left: 0 };
  var info = casper.getElementInfo('.badge-post-grid-load-more');
  if (info.visible) {
    var curItems = getNumberOfItems(casper);
    if( curItems <= 60 ) {
        casper.waitFor(function check(){
          return curItems != getNumberOfItems(casper);
        }, function then(){
          tryAndScroll(this);
        }, function onTimeout(){
          this.echo("Timout reached");
        }, 10000);
    }
  } else {
    casper.echo("no more items");
  }

}

function bineArrays(x, y) {
    var result = [];
    for(var i = 0, i < page_links.length; i++) {
        result[i] = {};
        result[i].page_link = x[i];
        result[i].video_link = y[i];
    }
    return result;
}

function linkScraper(x){
    var page_links = [];
    var youtube = [];
    for (var i = 0; i < x.length; i++)
    { // start for loop
        casper.thenOpen(x[i], function() {
            //Scroll down for elements
            tryAndScroll(this);
            casper.then(function() {
              this.getElementsInfo('.title').forEach(function(element) {
                // skip elements that don't have a href attribute...
                if (!element.attributes.href) {
                  return;
                }
                page_links.push( element["attributes"]["href"] );
                casper.thenOpen(element.attributes.href, function() {
                  this.click('.responsivewrapper');
                }).then(function(){
                  casper.each(this.getElementsInfo('.badge-youtube-player'), function(casper, element, j) {
                    youtube.push( element["attributes"]["src"] );
                  });
                })localhost;
              });
            });
        });
    }
    return bineArrays(page_links,youtube);
}

function stringifyResult(webpages){
    //Pass link array to linkScraper
    var linksArr = linkScraper.call(this, webpages);
    //send results to php page
     server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
     this.waitFor( function() {
         response = this.evaluate(function() {
            $.ajax({
                type: "POST",
                url: server,
                data: JSON.stringify(linksArr),
                //dataType: 'json',
                contentType: "application/json",
                success: function (data) {
                    this.echo("All done.");
                    return this.exit();
                    //return data.responseText;
                },
                error: function (xhr,status,error){
                    return this.echo(error);
                }
            });
         });
     });
}



casper.start().then(function() {
    this.echo("Starting");
});
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X)');
casper.run(stringifyResult.call(this, urls));
Share Improve this question edited Mar 19, 2015 at 12:57 Artjom B. 62k26 gold badges135 silver badges230 bronze badges asked Mar 18, 2015 at 23:01 MaryCodingMaryCoding 6642 gold badges9 silver badges36 bronze badges 0
Add a ment  | 

1 Answer 1

Reset to default 7

There are many problems with your code.

1. Syntax error

CasperJS and PhantomJS itself won't show you where the syntax error is, but it is probably in line for(var i = 0, i < page_links.length; i++) {. Change , to ;. And page_links is not defined in this line. You probably meant x.
For the future: CasperJS scripts are plain JavaScript. You can use online tools such as jslint. to find such bugs (and other problems with your code).

There is also a misplaced localhost.

2. this

this has many meanings depending on where it is placed.

a) Global this

Take for example the very last line:

casper.run(stringifyResult.call(this, urls));

This this is outside of all functions. So it refers to window (yes there are two separate window objects, one inside of the page context and one outside). It doesn't refer to casper which you probably expected. Use:

casper.run(stringifyResult.call(casper, urls));

(^ non-final code: see 6.)

b) jQuery callback

The this inside of:

success: function (data) {
    this.echo("All done.");
    return this.exit();
},

refers to the jqXHR object of jQuery (btw, you also cannot return something from an asynchronous function inside a function). It has nothing to do with CasperJS. Furthermore, it is impossible to call CasperJS functions from the page context (inside of casper.evaluate()), because the page context is sandboxed. It has no access to variables defined outside (this includes linksArr and server). See this for more information.

3. Unnecessary waitFor

Your waitFor will never finish, but you probably designed it this way so that your AJAX request is sent. The problem is that the same broken request will be sent every 20 milliseconds.

4. Sending AJAX request

CasperJS provides a utility to send AJAX requests in the page context: __utils__.sendAJAX(). It also blocks the execution by default, so there is no need to wait for the request in a out-of-bound fashion.

function stringifyResult(webpages){
    var linksArr = linkScraper.call(this, webpages);

    //send results to php page
    server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
    this.evaluate(function(server, linksArr){
        __utils__.sendAJAX(server, "POST", JSON.stringify(linksArr));
    }, server, linksArr);
}

(^ non-final code: see 5.)

5. Returning from asynchronous function

All then* and wait* CasperJS functions are asynchronous. By calling them, you schedule the associated step to be executed at the end of the current step.

It means that you cannot return something from linkScraper, because it contains asynchronous code. Or at least you cannot return the final results, because they are populated by the asynchronous code. You can however return the arrays which will eventually contain the result.

One possible fix, would be to move the bine call outside of linkScraper:

function linkScraper(x){
    var page_links = [];
    var youtube = [];
    // here are asynchronous calls
    return {pl: page_links, yt: youtube};
}
function stringifyResult(webpages){
    var linksObj = linkScraper.call(this, webpages);
    // here linksObj contains empty lists
    this.then(function(){
        // here linksObj contains populated lists
        var linksArr = bineArrays(linksObj.pl, linksObj.yt);

        server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
        this.evaluate(function(server, linksArr){
            __utils__.sendAJAX(server, "POST", JSON.stringify(linksArr));
        }, server, linksArr); 
    });
}

6. exit

casper.exit() is immediate. So when you call it, the execution stops. You have to be careful when to call it, because it may be the case that there are still steps scheduled.

There is a way to avoid calling casper.exit() by not providing a callback to casper.run(). If you provide such a callback, there will be no automatic exit when all steps are executed.

casper.then(stringifyResult.call(casper, urls)).run();
发布评论

评论列表(0)

  1. 暂无评论