I want to write Python code that works like this picture. However, I don't know how to send the file, picture, and question together using Python. I would appreciate it if you could tell me.enter image description here
I found a method to upload files to storage in the official documentation, but I couldn't find a way to use them with a query
def fetch_html(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
else:
raise Exception(f"Failed to fetch URL {url}. Status Code: {response.status_code}")
def save_html_to_file(html_content, file_name="webpage.html"):
with open(file_name, "w", encoding="utf-8") as file:
file.write(html_content)
return file_name
def upload_file_to_openai(file_path):
with open(file_path, "rb") as file:
response = openai.File.create(
file=file,
purpose="assistants"
)
return response["id"]
def analyze_html_structure(file_id):
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are an expert in web data analysis. Analyze the given HTML and determine what data can be extracted."},
{"role": "user", "content": f"Analyze the data that can be extracted from this HTML file ({file_id}). For example, let me know all possible data such as article titles, price information, and lists of links."}
],
temperature=0.5
)
return response["choices"][0]["message"]["content"]
def generate_scraper_code(file_id, data_to_extract):
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a web scraping expert. Analyze the HTML and write code using BeautifulSoup to extract specific data."},
{"role": "user", "content": f"Generate a Python web scraping code using BeautifulSoup to extract '{data_to_extract}' data from this HTML file ({file_id})."}
],
temperature=0.5
)
return response["choices"][0]["message"]["content"]
url = input("URL :")
try:
html_content = fetch_html(url)
file_path = save_html_to_file(html_content)
file_id = upload_file_to_openai(file_path)
print(f"{file_id}")
possible_data = analyze_html_structure(file_id)
print(possible_data)
data_to_extract = input("\n data: ")
scraper_code = generate_scraper_code(file_id, data_to_extract)
print(scraper_code)
except Exception as e:
print(f"⚠️: {str(e)}")