I created a code with Jupyter Notebook to download all geotags from Flickr pictures with specific tags. The code works but when I open the shapefile in ArcGIS, there are like 400,000 points, which seems right, but when I delete the duplicates only 4,000 points are left. I think something is wrong with the code, and it saves the same points (not all, just some) multiple times.
Here is the code I use. If anyone has any recommendations or changes I can do, I would be very thankful. Thanks in advance!
import requests
import time
import geopandas as gpd
from shapely.geometry import Point
API_KEY = "de874d66a7e115fe8d805b9630a246ce"
TAGS = ["Alpen", "Alpi", "Alpes", "Alps", "Alpe", "Альпы", "阿尔卑斯山", "アルプス山脈", "جبال الألب", "आल्प्स ", "آلپس"]
PER_PAGE = 500
def fetch_flickr_geotags(api_key, tags, per_page):
url = "/"
all_photos = []
page = 1
tags_string = ",".join(tags)
params = {
"method": "flickr.photos.search",
"api_key": api_key,
"tags": tags_string,
"tag_mode": "any",
"has_geo": 1,
"format": "json",
"nojsoncallback": 1,
"per_page": per_page,
"page": page,
"extras": "geo"
}
response = requests.get(url, params=params)
data = response.json()
if data["stat"] != "ok":
raise Exception(f"Flickr API Fehler: {data['message']}")
total_pages = data["photos"]["pages"]
print(f"Gesamtzahl der Seiten: {total_pages}")
while page <= total_pages:
params["page"] = page
response = requests.get(url, params=params)
data = response.json()
if "stat" in data and data["stat"] != "ok":
print(f"API-Fehler: {data['message']}")
break
photos = data["photos"]["photo"]
all_photos.extend(photos)
print(f"Seite {page} von {total_pages} heruntergeladen.")
time.sleep(1)
page += 1
return all_photos
def save_photos_as_shapefile(photos, filename):
records = []
for photo in photos:
if "latitude" in photo and "longitude" in photo:
lat = float(photo["latitude"])
lon = float(photo["longitude"])
point = Point(lon, lat)
record = {
"id": photo["id"],
"title": photo["title"],
"geometry": point
}
records.append(record)
if not records:
print("Keine Geotags gefunden.")
return
gdf = gpd.GeoDataFrame(records, crs="EPSG:4326")
gdf.to_file(filename)
print(f"Shapefile gespeichert: {filename}")
photos = fetch_flickr_geotags(API_KEY, TAGS, PER_PAGE)
if photos:
save_photos_as_shapefile(photos, "flickr_tags_shapefile.shp")
else:
print("Keine Fotos mit Geotags gefunden.")