I'm starting web scraping with python and playwright. but when I run the code I get an error .how to find type of response data(binary,text,...) and handle the error about convert and save data as text data in a file.
from playwright.sync_api import sync_playwright
import json
def handle_response(response):
with open("copy.txt", "w", encoding="utf-8") as file:
file.write(response.text())
def main():
playwright=sync_playwright().start()
browser=playwright.chromium.launch(headless=True)
browser.new_context(no_viewport=True)
page=browser.new_page()
page.on('response',lambda response:handle_response(response))
page.goto(".en-gb.html?aid=304142&checkin=2025-05-15&checkout=2025-05-16#map_opened-map_trigger_header_pin")
page.wait_for_timeout(1000)
browser.close()
playwright.stop()
if __name__=='__main__':
main()
Error:
Exception has occurred: UnicodeDecodeError 'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte File "J:\SeSa\Playwright\sample.py", line 6, in handle_response file.write(response.text()) ~~~~~~~~~~~~~^^ File "J:\SeSa\Playwright\sample.py", line 15, in page.on('response',lambda response:handle_response(response))
~~~~~~~~~~~~~~~^^^^^^^^^^ UnicodeDecodeError: 'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte
I'm starting web scraping with python and playwright. but when I run the code I get an error .how to find type of response data(binary,text,...) and handle the error about convert and save data as text data in a file.
from playwright.sync_api import sync_playwright
import json
def handle_response(response):
with open("copy.txt", "w", encoding="utf-8") as file:
file.write(response.text())
def main():
playwright=sync_playwright().start()
browser=playwright.chromium.launch(headless=True)
browser.new_context(no_viewport=True)
page=browser.new_page()
page.on('response',lambda response:handle_response(response))
page.goto("https://www.booking/hotel/it/hotelnordroma.en-gb.html?aid=304142&checkin=2025-05-15&checkout=2025-05-16#map_opened-map_trigger_header_pin")
page.wait_for_timeout(1000)
browser.close()
playwright.stop()
if __name__=='__main__':
main()
Error:
Share Improve this question edited Mar 9 at 1:35 ggorlen 58k8 gold badges114 silver badges157 bronze badges asked Mar 8 at 19:43 MojsaMojsa 297 bronze badges 0Exception has occurred: UnicodeDecodeError 'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte File "J:\SeSa\Playwright\sample.py", line 6, in handle_response file.write(response.text()) ~~~~~~~~~~~~~^^ File "J:\SeSa\Playwright\sample.py", line 15, in page.on('response',lambda response:handle_response(response))
~~~~~~~~~~~~~~~^^^^^^^^^^ UnicodeDecodeError: 'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte
1 Answer
Reset to default 3I'm not sure what you're trying to achieve, but since many responses are binary files like images, use the "wb"
option in your write, and .body()
on the response (rather than .text()
).
Also, choose different names for each file, otherwise copy.txt
will simply contain only the last response received.
import os
from playwright.sync_api import sync_playwright # 1.48.0
url = "<Your URL>"
output_directory = "site_content"
def handle_response(response):
file_name = response.url.split("/")[-1][-100:]
if response.ok and file_name:
with open(os.path.join(output_directory, file_name), "wb") as file:
file.write(response.body())
def main():
os.makedirs(output_directory, exist_ok=True)
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
page = browser.new_page()
page.on("response", handle_response)
page.goto(url, wait_until="networkidle")
if __name__ == "__main__":
main()
In general, it's a bit unusual to want to capture all responses from a site like this. Most of the data that will be written is junk. Usually you're just after one JSON blob or something like that.
You might want to clarify your actual goal, because there's probably a more straightforward way to achieve it.
Note that response.headers["content-type"]
and response.request.resource_type
can also be useful tools for taking different actions depending on the data and request type.