最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - How to print an HTML document using Puppeteer? - Stack Overflow

programmeradmin3浏览0评论

Recently I started to crawl the web using Puppeteer. Below is a code for extracting a specific product name from the shopping mall.

const puppeteer = require('puppeteer');

(async () => {

    const width = 1600, height = 1040;

    const option = { headless: false, slowMo: true, args: [`--window-size=${width},${height}`] };

    const browser = await puppeteer.launch(option);
    const page = await browser.newPage();
    const vp = {width: width, height: height};
    await page.setViewport(vp);

    const navigationPromise = page.waitForNavigation();

    await page.goto('.nhn');
    await navigationPromise;
    await page.waitFor(2000);

    const textBoxId = 'co_srh_input';
    await page.type('.' + textBoxId, '양말', {delay: 100});
    await page.keyboard.press('Enter');

    await page.waitFor(5000);
    await page.waitForSelector('div.info > a.tit');

    const stores = await page.evaluate(() => {
        const links = Array.from(document.querySelectorAll('div.info > a.tit'));
        return links.map(link => link.innerText).slice(0, 10)   // 10개 제품만 가져오기
    });

    console.log(stores);
    await browser.close();

})();

I have a question. How can I output the crawled results to an HTML document (without using the database)? Please use sample code to explain it.

Recently I started to crawl the web using Puppeteer. Below is a code for extracting a specific product name from the shopping mall.

const puppeteer = require('puppeteer');

(async () => {

    const width = 1600, height = 1040;

    const option = { headless: false, slowMo: true, args: [`--window-size=${width},${height}`] };

    const browser = await puppeteer.launch(option);
    const page = await browser.newPage();
    const vp = {width: width, height: height};
    await page.setViewport(vp);

    const navigationPromise = page.waitForNavigation();

    await page.goto('https://shopping.naver./home/p/index.nhn');
    await navigationPromise;
    await page.waitFor(2000);

    const textBoxId = 'co_srh_input';
    await page.type('.' + textBoxId, '양말', {delay: 100});
    await page.keyboard.press('Enter');

    await page.waitFor(5000);
    await page.waitForSelector('div.info > a.tit');

    const stores = await page.evaluate(() => {
        const links = Array.from(document.querySelectorAll('div.info > a.tit'));
        return links.map(link => link.innerText).slice(0, 10)   // 10개 제품만 가져오기
    });

    console.log(stores);
    await browser.close();

})();

I have a question. How can I output the crawled results to an HTML document (without using the database)? Please use sample code to explain it.

Share Improve this question edited Nov 10, 2018 at 18:34 Grant Miller 29k16 gold badges155 silver badges168 bronze badges asked Nov 10, 2018 at 16:51 Inkweon KimInkweon Kim 1312 gold badges2 silver badges8 bronze badges
Add a ment  | 

2 Answers 2

Reset to default 8

I used what was seen on blog.kowalczyk.info

const puppeteer = require("puppeteer");
const fs = require("fs");

async function run() {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
    await page.goto("https://www.google./", { waitUntil: "networkidle2" });
    // hacky defensive move but I don't know a better way:
    // wait a bit so that the browser finishes executing JavaScript
    await page.waitFor(1 * 1000);
    const html = await page.content();
    fs.writeFileSync("index.html", html);
    await browser.close();
}

run();

fs.writeFile()

You can use the following write_file function that returns a Promise that resolves or rejects when fs.writeFile() succeeds or fails.

Then, you can await the Promise from within your anonymous, asynchronous function and check whether or not the data was written to the file:

'use strict';

const fs = require('fs');
const puppeteer = require('puppeteer');

const write_file = (file, data) => new Promise((resolve, reject) => {
  fs.writeFile(file, data, 'utf8', error => {
    if (error) {
      console.error(error);
      reject(false);
    } else {
      resolve(true);
    }
  });
});

(async () => {
  
  // ...
  
  const stores = await page.evaluate(() => {
    return Array.from(document.querySelectorAll('div.info > a.tit'), link => link.innerText).slice(0, 10); // 10개 제품만 가져오기
  });
  
  if (await write_file('example.html', stores.toString()) === false) {
    console.error('Error: Unable to write stores to example.html.');
  }
  
  // ...
  
});
发布评论

评论列表(0)

  1. 暂无评论