te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>javascript - Python Requests-HTML Render() - No Content - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - Python Requests-HTML Render() - No Content - Stack Overflow

programmeradmin3浏览0评论

I'd like to scrape a page, the content of which seems to be rendered by an app referenced in the html like:

<div id="app" class="app-mobile-pusher"></div>

I'm using the render() method from Requests-HTML python library like so:

with HTMLSession() as session:
    p = session.post(login_url, data=payload)
    r = session.get(content_url)
    r.html.render()
    print(r.text)

This code returns the HTML for the page without any errors, but also without any content (just HTML tags). Notes:

  • I've tried adding time out arguments to session.get to give the page more time to render before accessing it and other variations on syntax of the above.

  • Also tried adding user agent information in headers based on this answer (in order to circumvent rejection of my automated scrape)

  • The chromium browser did download when I first ran render()

The lack of any error messages is stumping me and it is difficult to replicate the context of this request to test on another site.

Any specific suggestions for how to solve, or ideas for how to go about troubleshooting, appreciated. (Python 3.6, Mac OS)

I'd like to scrape a page, the content of which seems to be rendered by an app referenced in the html like:

<div id="app" class="app-mobile-pusher"></div>

I'm using the render() method from Requests-HTML python library like so:

with HTMLSession() as session:
    p = session.post(login_url, data=payload)
    r = session.get(content_url)
    r.html.render()
    print(r.text)

This code returns the HTML for the page without any errors, but also without any content (just HTML tags). Notes:

  • I've tried adding time out arguments to session.get to give the page more time to render before accessing it and other variations on syntax of the above.

  • Also tried adding user agent information in headers based on this answer (in order to circumvent rejection of my automated scrape)

  • The chromium browser did download when I first ran render()

The lack of any error messages is stumping me and it is difficult to replicate the context of this request to test on another site.

Any specific suggestions for how to solve, or ideas for how to go about troubleshooting, appreciated. (Python 3.6, Mac OS)

Share Improve this question asked Nov 13, 2018 at 0:41 DynekenDyneken 1111 gold badge2 silver badges7 bronze badges
Add a ment  | 

1 Answer 1

Reset to default 12

have you tried print(r.html.html) instead? The new rendered code is under this object path.

发布评论

评论列表(0)

  1. 暂无评论