I've already tried everything mentioned in Error: Evaluation Failed: ReferenceError: util is not defined and How to pass required module object to puppeteer page.evaluate. Specifically, I've tried converting url.js using browserify (I've also tried converting url.js and punycode.js together), and I've added the corresponding script (bundle.js) to the page environment.
I'm trying to use the url module inside page.evaluate() in puppeteer. Here's a very simple example to show the error:
const puppeteer = require('puppeteer');
puppeteer.launch({dumpio: true}).then(async browser => {
const page = await browser.newPage();
const response = await page.goto('');
await page.waitFor(5000);
const pageUrl = page.url();
await page.addScriptTag({path: 'bundle.js'});
await page.evaluate(pageUrl => {
const anchors = Array.from(document.querySelectorAll('a'));
for (let anchor of anchors) {
const href = anchor.getAttribute('href');
let hrefUrl;
try {
hrefUrl = new URL(href);
} catch (e) {
hrefUrl = new URL(href, pageUrl);
}
console.log(url.format(hrefUrl, {fragment: false}));
}
}, pageUrl);
await page.close();
await browser.close();
});
This example generates the following error:
(node:23667) UnhandledPromiseRejectionWarning: Error: Evaluation failed: ReferenceError: url is not defined at pageUrl (puppeteer_evaluation_script:11:19) at ExecutionContext.evaluateHandle (/home/webb/node_modules/puppeteer/lib/ExecutionContext.js:97:13) at at process._tickCallback (internal/process/next_tick.js:188:7)
What else do I need to do to get the url module recognized?
I've already tried everything mentioned in Error: Evaluation Failed: ReferenceError: util is not defined and How to pass required module object to puppeteer page.evaluate. Specifically, I've tried converting url.js using browserify (I've also tried converting url.js and punycode.js together), and I've added the corresponding script (bundle.js) to the page environment.
I'm trying to use the url module inside page.evaluate() in puppeteer. Here's a very simple example to show the error:
const puppeteer = require('puppeteer');
puppeteer.launch({dumpio: true}).then(async browser => {
const page = await browser.newPage();
const response = await page.goto('https://www.google.');
await page.waitFor(5000);
const pageUrl = page.url();
await page.addScriptTag({path: 'bundle.js'});
await page.evaluate(pageUrl => {
const anchors = Array.from(document.querySelectorAll('a'));
for (let anchor of anchors) {
const href = anchor.getAttribute('href');
let hrefUrl;
try {
hrefUrl = new URL(href);
} catch (e) {
hrefUrl = new URL(href, pageUrl);
}
console.log(url.format(hrefUrl, {fragment: false}));
}
}, pageUrl);
await page.close();
await browser.close();
});
This example generates the following error:
(node:23667) UnhandledPromiseRejectionWarning: Error: Evaluation failed: ReferenceError: url is not defined at pageUrl (puppeteer_evaluation_script:11:19) at ExecutionContext.evaluateHandle (/home/webb/node_modules/puppeteer/lib/ExecutionContext.js:97:13) at at process._tickCallback (internal/process/next_tick.js:188:7)
What else do I need to do to get the url module recognized?
Share Improve this question asked Feb 24, 2019 at 18:18 user4487338user4487338 781 silver badge6 bronze badges 3- 1 Why don't you just evaluate and return the list of a hrefs and then do the url parsing on the node side of things. If you still want to do it on the client look at the expose function method: github./GoogleChrome/puppeteer/blob/v1.12.2/docs/… – dotconnor Commented Feb 24, 2019 at 18:22
- Because the actual use case is much more plicated and deals with a lot of DOM manipulations. I made this simple example just to illustrate the error and my inability to use the url module inside evaluate. – user4487338 Commented Feb 24, 2019 at 18:27
- But to your point, I could break things up and pass data out of one evaluate, process that data with the url module, and then pass the processed data back into another evaluate. I would just prefer to be able to do it all at once inside one evaluate. – user4487338 Commented Feb 24, 2019 at 18:35
2 Answers
Reset to default 4Variant with page.exposeFunction()
:
'use strict';
const url = require('url');
const puppeteer = require('puppeteer');
puppeteer.launch({ dumpio: true }).then(async browser => {
const page = await browser.newPage();
await page.exposeFunction('formatURL', formatURL);
const response = await page.goto('https://www.google.');
await page.waitFor(5000);
const pageUrl = page.url();
await page.evaluate(async (pageUrl) => {
const anchors = Array.from(document.querySelectorAll('a'));
for (const anchor of anchors) {
const href = anchor.getAttribute('href');
const hrefUrl = await formatURL(href, pageUrl);
console.log(hrefUrl);
}
}, pageUrl);
await page.close();
await browser.close();
});
function formatURL(href, base) {
try {
return url.format(new URL(href), { fragment: false });
} catch (e) {
return url.format(new URL(href, base), { fragment: false });
}
}
Expose all the functions from the url
package using page.exposeFunction
.
Iterate over the module's exports and add every function to expose
var url = require('url');
var functionsToExpose = [];
for(let key of Object.keys(url)){
if(typeof url[key] == 'function'){
functionsToExpose.push({name: 'url'+key, func: url[key]});
}
}
Expose them to the page
for(let item of functionsToExpose){
await page.exposeFunction(item.name, item.func);
}
Every function of the url package will be renamed. url.parse
is accessible using urlparse
.