te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>javascript - Puppeteer returning empty object - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - Puppeteer returning empty object - Stack Overflow

programmeradmin4浏览0评论

When I run the following code in the page console I'm trying to scrape, I got picture.

document.querySelector('#sb-site > div.sticky_footer > div:nth-child(9)')

However, when I run this in my program, the console log it and returns '{}'

const inputContent = await page.evaluate(() => {
return document.querySelector('#sb-site > div.sticky_footer > div:nth-child(9)'); });

When I run the following code in the page console I'm trying to scrape, I got picture.

document.querySelector('#sb-site > div.sticky_footer > div:nth-child(9)')

However, when I run this in my program, the console log it and returns '{}'

const inputContent = await page.evaluate(() => {
return document.querySelector('#sb-site > div.sticky_footer > div:nth-child(9)'); });
Share Improve this question edited Nov 10, 2023 at 16:17 ggorlen 57.1k8 gold badges110 silver badges150 bronze badges asked Mar 6, 2019 at 6:39 Ryan SoderbergRyan Soderberg 7729 silver badges24 bronze badges 7
  • How are you loading the page? Are you loading with waitUntil: 'networkidle0'? Are you trying to console a HTML element on the nodejs console or just get the text/link? – Md. Abu Taher Commented Mar 6, 2019 at 6:48
  • I have added that code so now it fully loads, I also added .innerHTML after the selector. I am trying to grab that giant block of text from the image in the main post so I can pull content out of it – Ryan Soderberg Commented Mar 6, 2019 at 7:17
  • You are trying to pull text from image? :/ – Md. Abu Taher Commented Mar 6, 2019 at 7:23
  • tbh, it's hard to help if you don't provide more code or url, so that we can reproduce this problem. I dealt with lots of react/vue/angular site scraping, but still I needed more specific information. – Md. Abu Taher Commented Mar 6, 2019 at 7:25
  • 1 Instead of sending us pictures, please copy and paste just the code you want into your question. – Heretic Monkey Commented Mar 6, 2019 at 20:29
 |  Show 2 more ments

3 Answers 3

Reset to default 12

puppeteer can transfer two types of data between Node.js and browser context: serializable data (i.e. data that is supported by JSON.stringify()/JSON.parse()) and JavaScript object ids (including DOM elements) — JSHandle and ElementHandle. Later ones have a bit more plicated API (see JSHandle and ElementHandle methods or methods that mention them).

page.evaluate() can only transfer serializable data, and instead of un-serializable data, it returns undefined or empty objects. DOM elements are non-serializable as they contain circular references and methods.

So if you just need some text or element attributes, try to do most of the processing in the browser context and return just serializable data.

Make sure the page loads pletely before scraping.

page.goto(url, {waitUntil: 'networkidle0'})

Also, according to the docs, .evaluate will return a promise, it will not return a DOM element.

It will print {} on console or the value the promise resolves to on console.

In your case you're trying to select a custom dom object injected into the page which is leading to some strange behavior when using the nth-child() css selector. So you should try to target the DOM node directly instead. So let's say you were trying to get a similar element here https://wefunder./chattanoogafc

You can do:

const inputContent = await page.evaluate(async () => {
  var elements =  document.querySelectorAll("#sb-site > div.sticky_footer > div")[3].querySelectorAll("*")[0];
  return elements.getAttribute("pany-json");
});

console.log("test:" + inputContent);

And that should return the JSON that you want. You can then parse it using JSON.parse(inputContent)

发布评论

评论列表(0)

  1. 暂无评论