Description:
I'm experiencing an issue with nodriver when trying to intercept and retrieve network response bodies using the Chrome DevTools Protocol (CDP). In my script, I register a handler for the Network.ResponseReceived
event and then call Network.getResponseBody
with the corresponding request ID. Although the handler correctly detects that a response matching my criteria is received, the subsequent call to Network.getResponseBody
never returns the expected data (or returns empty data), even though the response is known to contain valid JSON.
Steps to Reproduce:
Initialize the nodriver Browser:
- Start the browser using nodriver.
Enable Network Monitoring:
- Send the
Network.enable
command.
- Send the
Register a Response Handler:
- Add a handler for the
Network.ResponseReceived
event. - In the handler, check if the response URL contains the substring
"operationName=Marketplace&variables"
. - Use the provided request ID to call
Network.getResponseBody
.
- Add a handler for the
Expected Behavior:
- The call to
Network.getResponseBody
should return the full response body (decoded if necessary) so that the JSON data can be processed.
- The call to
Actual Behavior:
- Although the response is detected (as seen in logs),
Network.getResponseBody
does not return the expected data.
- Although the response is detected (as seen in logs),
What I've Tried:
Alternative Workarounds:
- I've attempted using the Fetch domain as an alternative to capture the response, but that approach did not produce reliable results for my use case.
Enabling Interception Early:
- I ensured that
Network.enable
is called early and that the response handler is registered before the requests are sent.
- I ensured that
Delaying and Retrying:
- I've experimented with different delays and even reloading the page, but the issue persists.
Researching Similar Issues:
- I found several discussions on GitHub and Stack Overflow regarding this issue (e.g., undetected-chromedriver issue #1788 and this Stack Overflow post). However, no definitive fix or workaround was provided for nodriver.
Environment:
- nodriver Version: most recent
- Python Version: (e.g., Python 3.9+)
- Browser: Chrome version 132
- OS: Windows 11
Reproducible Code Snippet:
import asyncio
import logging
import re
import os
import base64
from collections import defaultdict
import nodriver # Ensure you have nodriver installed
class ESPNBetFetcher:
def __init__(self, leagues, max_concurrent=20):
self.logger = logging.getLogger("espnbet_fetcher")
self.logger.setLevel(logging.INFO)
log_path = os.path.join(os.path.dirname(__file__), 'espnbet_fetch.log')
os.makedirs(os.path.dirname(log_path), exist_ok=True)
handler = logging.FileHandler(log_path, encoding='utf-8')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
self.logger.handlers.clear()
self.logger.addHandler(handler)
self.leagues = leagues
self.max_concurrent = max_concurrent
self.browser = None
async def startup(self):
self.logger.info("Starting nodriver browser...")
self.browser = await nodriver.start()
if not self.browser:
raise RuntimeError("Failed to initialize browser")
self.logger.info("Browser started successfully")
async def cleanup(self):
if self.browser:
self.logger.info("Cleaning up browser...")
try:
self.browser.stop() # nodriver's stop() is not async
except Exception as e:
self.logger.error(f"Error during cleanup: {e}")
self.browser = None
self.logger.info("Browser cleaned up successfully")
async def _driver_worker(self):
"""Load the NBA page and extract game IDs."""
game_ids = []
for league in self.leagues:
if league == "nba":
url = ";
self.logger.info(f"Fetching games for NBA from {url}")
page = await self.browser.get(url, new_tab=True)
await asyncio.sleep(2) # Allow page to load
try:
await page.find("section[data-testid='marketplace-shelf-']", timeout=30000)
body_el = await page.query_selector("body")
if not body_el:
self.logger.error("Could not find body element")
continue
js_function = "(elem) => { return elem.innerHTML; }"
content = await body_el.apply(js_function)
# Example regex; adjust if necessary based on page content.
matches = re.findall(r"default\|([a-f0-9-]+)\|([a-f0-9-]+)", content, re.IGNORECASE)
for game_id, _ in matches:
game_ids.append((league, game_id))
self.logger.info(f"Found {len(matches)} games for NBA")
except Exception as e:
self.logger.error(f"Error extracting games from NBA page: {e}")
finally:
await page.close()
return game_ids
async def _fetch_odds_for_game(self, league, game_id, worker_id):
"""For a given game, open the player props page and intercept the network response."""
url = (
f"/"
f"event/{game_id}/section/player_props"
)
try:
# Open new tab for the game page
page = await self.browser.get(url, new_tab=True)
self.logger.info(f"Worker {worker_id}: Opened page {url}")
# Enable network monitoring
await page.send(nodriver.cdpwork.enable())
async def handle_response(evt):
if "operationName=Marketplace&variables" in evt.response.url:
self.logger.info(f"Worker {worker_id}: Intercepted response: {evt.response.url}")
try:
params = {"requestId": evt.request_id}
response = await page.send("Network.getResponseBody", params)
if response and "body" in response:
body = response["body"]
if response.get("base64Encoded", False):
body = base64.b64decode(body).decode("utf-8")
self.logger.info(f"Worker {worker_id}: Response body: {body}")
except Exception as e:
self.logger.error(f"Worker {worker_id}: Error retrieving response body: {e}")
# Register the response handler
page.add_handler(nodriver.cdpwork.ResponseReceived, handle_response)
# Wait long enough to allow the API request to occur and be intercepted
await asyncio.sleep(60)
await page.close()
except Exception as e:
self.logger.error(f"Worker {worker_id}: Error fetching odds for game {game_id}: {e}")
async def scrape(self):
await self.startup()
try:
game_ids = await self._driver_worker()
if not game_ids:
self.logger.error("No game IDs found")
return
tasks = []
worker_id = 0
for league, game_id in game_ids:
tasks.append(asyncio.create_task(self._fetch_odds_for_game(league, game_id, worker_id)))
worker_id += 1
await asyncio.gather(*tasks)
finally:
await self.cleanup()
async def main():
fetcher = ESPNBetFetcher(["nba"])
await fetcher.scrape()
if __name__ == "__main__":
asyncio.run(main())
Additional Comments:
- I am aware that some users recommend using the Fetch domain as an alternative, but I prefer using the original Network domain approach.
- Any insights into why
Network.getResponseBody
might be failing or returning empty data would be greatly appreciated. - If this is a known limitation of nodriver, a workaround or further documentation would be very helpful.
Thanks for your help!