I want to use the data in my codes with Transfermark Scraper for my own special purpose. I get all the desired data in the codes except Current Club, but I can't get the Club name. I tried all the ways I couldn't succeed. I write with Python, my knowledge is not very good. I need suggestions.
import scrapy
class PlayersSpider(scrapy.Spider):
name = "players"
allowed_domains = ["transfermarkt.co.uk"]
start_urls = [
";,
]
def parse(self, response):
def extract_text(xpath):
return response.xpath(xpath).get(default="").strip()
def extract_all_texts(xpath):
return response.xpath(xpath).getall()
citizenship_names = extract_all_texts("//span[contains(text(), 'Citizenship:')]/following-sibling::span/img/@title")
citizenship_flags = extract_all_texts("//span[contains(text(), 'Citizenship:')]/following-sibling::span/img/@src")
citizenship = [
{"name": name, "flag_url": flag}
for name, flag in zip(citizenship_names, citizenship_flags)
if name.lower() != "verified"
]
current_club_name = extract_text("//span[@class='dataValue']/a[@title]/text()")
yield {
"Full Name": extract_text("//span[contains(text(), 'Full name:')]/following-sibling::span/text()"),
"Date of Birth": extract_text("//span[contains(text(), 'Date of birth/Age:')]/following-sibling::span/a/text()"),
"Citizenship": citizenship,
"Position": extract_text("//span[contains(text(), 'Position:')]/following-sibling::span/text()"),
"Foot": extract_text("//span[contains(text(), 'Foot:')]/following-sibling::span/text()"),
"Current Club": current_club_name,
"Contract Expires": extract_text("//span[contains(text(), 'Contract expires:')]/following-sibling::span/text()"),
}