comment_list.inc': $pre .= $default_pre .= 'comment_list.inc.htm'; break; case 'message': $pre .= $default_pre .= 'message.htm'; break; case 'tag_list': $pre .= $default_pre .= 'tag_list.htm'; break; case 'tag': $pre .= $default_pre .= 'tag.htm'; break; case 'flag': $pre .= $default_pre .= 'flag.htm'; break; case 'my': $pre .= $default_pre .= 'my.htm'; break; case 'my_password': $pre .= $default_pre .= 'my_password.htm'; break; case 'my_bind': $pre .= $default_pre .= 'my_bind.htm'; break; case 'my_avatar': $pre .= $default_pre .= 'my_avatar.htm'; break; case 'home_article': $pre .= $default_pre .= 'home_article.htm'; break; case 'home_comment': $pre .= $default_pre .= 'home_comment.htm'; break; case 'user': $pre .= $default_pre .= 'user.htm'; break; case 'user_login': $pre .= $default_pre .= 'user_login.htm'; break; case 'user_create': $pre .= $default_pre .= 'user_create.htm'; break; case 'user_resetpw': $pre .= $default_pre .= 'user_resetpw.htm'; break; case 'user_resetpw_complete': $pre .= $default_pre .= 'user_resetpw_complete.htm'; break; case 'user_comment': $pre .= $default_pre .= 'user_comment.htm'; break; case 'single_page': $pre .= $default_pre .= 'single_page.htm'; break; case 'search': $pre .= $default_pre .= 'search.htm'; break; case 'operate_sticky': $pre .= $default_pre .= 'operate_sticky.htm'; break; case 'operate_close': $pre .= $default_pre .= 'operate_close.htm'; break; case 'operate_delete': $pre .= $default_pre .= 'operate_delete.htm'; break; case 'operate_move': $pre .= $default_pre .= 'operate_move.htm'; break; case '404': $pre .= $default_pre .= '404.htm'; break; case 'read_404': $pre .= $default_pre .= 'read_404.htm'; break; case 'list_404': $pre .= $default_pre .= 'list_404.htm'; break; default: $pre .= $default_pre .= theme_mode_pre(); break; } if ($config['theme']) { $conffile = APP_PATH . 'view/template/' . $config['theme'] . '/conf.json'; $json = is_file($conffile) ? xn_json_decode(file_get_contents($conffile)) : array(); } !empty($json['installed']) and $path_file = APP_PATH . 'view/template/' . $config['theme'] . '/htm/' . ($id ? $id . '_' : '') . $pre; (empty($path_file) || !is_file($path_file)) and $path_file = APP_PATH . 'view/template/' . $config['theme'] . '/htm/' . $pre; if (!empty($config['theme_child']) && is_array($config['theme_child'])) { foreach ($config['theme_child'] as $theme) { if (empty($theme) || is_array($theme)) continue; $path_file = APP_PATH . 'view/template/' . $theme . '/htm/' . ($id ? $id . '_' : '') . $pre; !is_file($path_file) and $path_file = APP_PATH . 'view/template/' . $theme . '/htm/' . $pre; } } !is_file($path_file) and $path_file = APP_PATH . ($dir ? 'plugin/' . $dir . '/view/htm/' : 'view/htm/') . $default_pre; return $path_file; } function theme_mode_pre($type = 0) { global $config; $mode = $config['setting']['website_mode']; $pre = ''; if (1 == $mode) { $pre .= 2 == $type ? 'portal_category.htm' : 'portal.htm'; } elseif (2 == $mode) { $pre .= 2 == $type ? 'flat_category.htm' : 'flat.htm'; } else { $pre .= 2 == $type ? 'index_category.htm' : 'index.htm'; } return $pre; } ?>javascript - Using Puppeteer, how can I open a page, get the data, then go back to the previous page to get the next page on the
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - Using Puppeteer, how can I open a page, get the data, then go back to the previous page to get the next page on the

programmeradmin10浏览0评论

SITUATION:

Here is what I want to do:

1) I load page 0. Page 0 contains clickable links to different pages. I want to load the content of all those pages. So:

2) Click on the first link. Load page 1. Get Data. Go back to the previous page (Page 0)

3) Click on the second link which loads page 2. Etc.. ad infinitum until all links have been clicked.

With my current code, page 0 loads, then the first link is clicked and loads page 1, then there is a crash with the following error:

(node:2629) UnhandledPromiseRejectionWarning: Error: Protocol error (Runtime.callFunctionOn): Execution context was destroyed.

QUESTION:

What am I doing wrong and how can I make my script behave the way I intended ?


CODE:

const puppeteer = require('puppeteer');
const fs = require('fs');

let getData = async () => {
    const browser = await puppeteer.launch({headless: false});
    const page = await browser.newPage();

    await page.goto('url', { waitUntil: 'networkidle2' });
    await page.setViewport({width: ..., height:...});

    const result = await page.evaluate(async () => {
        let data = []; 
        let elements = document.querySelector('.items').querySelectorAll('.item'); 

        for (const element of elements) {

            element.click();
            await new Promise((resolve) => setTimeout(resolve, 2000));

            // GETTING THE DATA THEN PUSHING IT INTO THE DATA ARRAY

            await page.goBack();
        }

        return data; // Return our data array

    });

    browser.close();
    return result; // Return the data
};

SITUATION:

Here is what I want to do:

1) I load page 0. Page 0 contains clickable links to different pages. I want to load the content of all those pages. So:

2) Click on the first link. Load page 1. Get Data. Go back to the previous page (Page 0)

3) Click on the second link which loads page 2. Etc.. ad infinitum until all links have been clicked.

With my current code, page 0 loads, then the first link is clicked and loads page 1, then there is a crash with the following error:

(node:2629) UnhandledPromiseRejectionWarning: Error: Protocol error (Runtime.callFunctionOn): Execution context was destroyed.

QUESTION:

What am I doing wrong and how can I make my script behave the way I intended ?


CODE:

const puppeteer = require('puppeteer');
const fs = require('fs');

let getData = async () => {
    const browser = await puppeteer.launch({headless: false});
    const page = await browser.newPage();

    await page.goto('url', { waitUntil: 'networkidle2' });
    await page.setViewport({width: ..., height:...});

    const result = await page.evaluate(async () => {
        let data = []; 
        let elements = document.querySelector('.items').querySelectorAll('.item'); 

        for (const element of elements) {

            element.click();
            await new Promise((resolve) => setTimeout(resolve, 2000));

            // GETTING THE DATA THEN PUSHING IT INTO THE DATA ARRAY

            await page.goBack();
        }

        return data; // Return our data array

    });

    browser.close();
    return result; // Return the data
};
Share Improve this question asked Aug 6, 2018 at 11:23 TheProgrammerTheProgrammer 1,4894 gold badges29 silver badges54 bronze badges 3
  • If you add handlers for console and error, do you see any errors? – Aankhen Commented Aug 6, 2018 at 12:02
  • have you considered just making a scraper instead of actually loading the page in a window? – codeWonderland Commented Aug 6, 2018 at 12:09
  • @codeWonderland Yes. I need to do it this way. – TheProgrammer Commented Aug 6, 2018 at 12:16
Add a comment  | 

5 Answers 5

Reset to default 21

OK here's my take on this. Firstly, you're using the evaluate method incorrectly. Mainly because you don't actually need it but also because you're asking it to do something it can't do. Just to explain: the evaluate method operates in the context of your web page only. It pretty much only allows you to execute Javascript instructions directly on the current page in the remote browser. It has no concept of variables that you've declared externally to that function - so in this case, when you do this:

await page.goBack();

The evaluate method has no idea what page is nor how to use it. Now there are ways to inject page into the evaluate method but that won't resolve your problem either. Puppeteer API calls simply won't work inside an evaluate method (I've tried this myself and it always returns an exception).

So now lets get back to the problem you do have - what you're doing in the evaluate function is retrieving one UI element with class .items and then searching for every UI element within that UI element with class .item. You're then looping through all of the found UI elements, clicking on each one, grabbing some kind of data and then going back to click on the next one.

You can achieve all of this without ever using the evaluate method and, instead, using Puppeteer API calls as follows:

const itemsList = await page.$('.items'); // Using '.$' is the puppeteer equivalent of 'querySelector'
const elements = await itemsList.$$('.item'); // Using '.$$' is the puppeteer equivalent of 'querySelectorAll'

const data = [];
elements.forEach(async (element) => {
  await element.click();
  // Get the data you want here and push it into the data array
  await page.goBack();
});

Hope this helps you out!

Instead of navigating back-and-forth to click the next link from the first page, it would make better sense to store the links from the first page into an array, and then open them one at a time with page.goto().

In other words, you can accomplish this task using the following example:

await page.goto('https://example.com/page-1');

const urls = await page.evaluate(() => Array.from(document.querySelectorAll('.link'), element => element.href));

for (let i = 0, total_urls = urls.length; i < total_urls; i++) {
  await page.goto(urls[i]);

  // Get the data ...
}

@AJC24's did not work for me. The problem was that the page context was destroyed when clicking in and coming back to the original page.

What I ended up having to do was something similar to what Grant suggested. I collected all of the button identifiers in an array and upon going back to the original page I would click in again.

By using the iterations from @Grant

Execution context was destroyed, most likely because of a navigation.

Then I make it open a new tab in the iteration and it solved the problem!

for (let i = 0, total_urls = urls.length; i < total_urls; i++) {
  const page = await browser.newPage();
  await page.goto(url), { waitUntil: 'networkidle0', timeout: 0 };

  await page.goto(urls[i]);

  // Get the data ...
}

You should be able also to do it inside eval function with window.history.go(-1). MDN for history.go

const puppeteer = require('puppeteer');
const fs = require('fs');

let getData = async () => {
    const browser = await puppeteer.launch({headless: false});
    const page = await browser.newPage();

    await page.goto('url', { waitUntil: 'networkidle2' });
    await page.setViewport({width: ..., height:...});

    const result = await page.evaluate(async () => {
        let data = []; 
        let elements = document.querySelector('.items').querySelectorAll('.item'); 

        for (const element of elements) {

            element.click();
            await new Promise((resolve) => setTimeout(resolve, 2000));

            // GETTING THE DATA THEN PUSHING IT INTO THE DATA ARRAY

            await window.history.go(-1);
        }

        return data; // Return our data array

    });

    browser.close();
    return result; // Return the data
};

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论