I created a data scraping script using Python and it is working fine. My issue now is when I try to run it on multiple instance it stops on after 25 mins. how to handle multi instance of chromium browser and ensure its not gonna crash? Also how to handle slow response of site? I notice that it usually happen when the site is performing too slow below is my code (Note: indents might be due to copy/paste)
# Function to open the browser and process each parcel id. dirs are array of parcel ids
def executeTask(dirs):
stepid = 0
chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument( '--disable-blink-features=AutomationControlled' )
# chrome_options.add_argument( '--headless' )
# chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
# chrome_options.add_experimental_option("useAutomationExtension", False)
driver = webdriver.Chrome(options=chrome_options)
print("Step success. if a new Chrome Browser Pop Up, you can proceed to next step")
isSearchPage = 0
driver.get(f";)
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, 'PropertySearch'))
)
print("page loaded successfully")
except TimeoutException:
print("Page was not able to load.")
driver.quit()
return 0
for index, dir in enumerate(dirs):
dir = dir.strip()
if len(dir) <= 8:
dir = dir + "."
if index == 0:
time.sleep(2)
actions = ActionChains(driver)
actions.move_by_offset(0, 0).click().perform()
actions.move_by_offset(0, 0).click().perform()
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
driver.execute_script(formula)
set_value_script = f"$('#xxxPARCEL_ID').val('{dir}');"
driver.execute_script(set_value_script)
submit_script = "$('#btParcelSearches').click();"
driver.execute_script(submit_script)
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, 'back2Top'))
)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
getPageDetails(soup,dir)
except TimeoutException:
print("Loading took too much time! The body element was not found.")
driver.close()
return "Ok"
Then this is the code for multithreading:
instanceCount = 3
parcelIds = getAllParcelToFind()
print(f"Parcel ID Count: {len(parcelIds)}")
arr = dataSplit(parcelIds,instanceCount)
with concurrent.futures.ThreadPoolExecutor() as executor:
# Map processData to each item in array_container
results = list(executor.map(executeTask, arr))
# Print the results
print("Results:", results)
Thanks in advance.
Ant