I have this app that I use to get comments from a facebook post, but I encounter a problem when there are more than 300 comments, as some comments turn into random numbers. Example. At first I thought that maybe I am making API requests too often, so I added 10s waiting time. However, it does not seem to help.
import requests
import json
import time
import pandas as pd
ACCESS_TOKEN = "xx"
POST_ID = "xx"
# main function to get comments
def get_all_comments(post_id, limit=1000):
comments = []
url = f".0/{post_id}/comments?fields=id,message,from&limit=50&access_token={ACCESS_TOKEN}"
while url:
response = requests.get(url)
data = response.json()
if "data" in data:
for comment in data["data"]:
comment_id = comment.get("id")
commenter_name = comment.get("from", {}).get("name", "Nežinomas komentuotojas")
comment_message = str(comment.get("message", "Nėra komentaro"))
comments.append({"Vartotojas": commenter_name, "Komentaras": comment_message})
# Stopping when gets the amount of comments equal to limits
if len(comments) >= limit:
return comments
paging = data.get("paging", {})
cursor = paging.get("cursors", {}).get("after", None)
if cursor:
time.sleep(10)
url = f".0/{post_id}/comments?fields=id,message,from&limit=50&after={cursor}&access_token={ACCESS_TOKEN}"
else:
url = None
return comments
comments = get_all_comments(POST_ID, limit=1000)
# Saving in JSON
with open("comments_from_page.json", "w", encoding="utf-8") as file:
json.dump(comments, file, ensure_ascii=False, indent=4)
print(f" Iš viso gauta {len(comments)} komentarų.")
# Saving in excel
df = pd.DataFrame(comments)
df.to_excel("comments_from_page.xlsx", index=False, engine='openpyxl')
print("Komentarai sėkmingai perkelti į 'comments_from_page.xlsx'.")