i have in my base file a ID = 123 region = CN and a year = 2019. Now i would like to try some checks if i found a match.
In the last step i will check it with a seperate file where i have add some fallbacks. In this case 2 fallbacks:
ID = 123 region = RoW region_ovverride = CN/not_assigned year = 2011
ID = 123 region = EU region_ovverride = DE year = 2018
What i try is that in the third loop they should find for the base file the mathc first the year what fits to region = CN - so year 2011 But it find always 2018 and i have no idea how i can say check region_ovverride and take year.
# This function resolves the next possible year fallback that we can use
def find_climatiq_year(climatiq_records: list, year: int):
filtered_list = filter_list(climatiq_records, lambda x: x['CF_year'] <= year)
if not filtered_list:
filtered_list = filter_list(climatiq_records, lambda x: x['CF_year'] >= year)
return list_first(sorted(filtered_list, key=lambda x: x['CF_year'], reverse=True))['CF_year']
else:
return list_first(sorted(filtered_list, key=lambda x: x['CF_year'], reverse=True))['CF_year']
def find_climatiq_year_for_region(climatiq_records: list, year: int, region: str):
# Filter records where CF_year is less than or equal to the given year and region matches (case-insensitive)
filtered_list = list(filter(lambda x: (x['CF_year'] <= year) & (x['CF_region'].upper() == region.upper()), climatiq_records))
if not filtered_list:
filtered_list = list(filter(lambda x: (x['CF_year'] >= year) & (x['CF_region'].upper() == region.upper()), climatiq_records))
if not filtered_list:
return year
return sorted(filtered_list, key=lambda x: x['CF_year'], reverse=True)[0]['CF_year']
# This method resolves the relevant regions from the region map table that we can use for looking up fallback regions for a specific region and year
region_map = df_climatiq_region_map.rdd.collect()
# Step 2 for Fallback
def find_climatiq_regions(row_num: int, activity_id: str, year: int, region: str):
print ("(" + str(row_num) + ") Trying to find region fallbacks for " + activity_id + " in year " + str(year) + " in region " + region)
upper_region = region.upper()
filtered_list = filter_list(region_map, lambda x: (x['CR_activity_id'].lower() == activity_id.lower()) & (x['CR_year'] == year)
& (any(y.upper() == region.upper() for y in x['CR_region_overrides_split'])))
if len(filtered_list) > 1:
print ("(" + str(row_num) + ") Found multiple matching region overrides for regions for " + activity_id + " in year " + str(year) + " in region " + region + ". Using the first entry.")
found_map = list_first(filtered_list)
if not found_map:
print ("(" + str(row_num) + ") There is no override region for " + activity_id + " in year " + str(year) + " in region " + region)
return region.upper()
print ("(" + str(row_num) + ") Found override regions for " + activity_id + " in year " + str(year) + " in region " + region + ". New region is: " + found_map["CR_region"])
overrides = [found_map["CR_region"]]
return overrides
# Tries to find the climatiq record for querying using region fallbacks
# If no fallback region matches, it will return all the climatiq records for the year for calculating the average later on
def find_climatiq_records_with_region_fallback(row_num: int, climatiq_records: list, source: str, activity_id: str, year: int, fallback_regions: list[str]):
fallback_regions = [*fallback_regions] ### dynamisch pflegbar
for region in fallback_regions:
fallback_year = find_climatiq_year_for_region(climatiq_records, year, region)
result = find_exact_climatiq_record(climatiq_records, source, activity_id, fallback_year, region)
if result:
return result
# Fallback that will take the average over all other regions in that year
# The fallback for average over all regions of the year is disabled to lead the entry into second stage fallback
#return filter_list(climatiq_records, lambda x: x['CF_year'] == year)
return []
def find_exact_climatiq_record(climatiq_records: list, soruce: str, activity_id: str, year: int, region: str):
return list_first(filter_list(climatiq_records, lambda x: (x['CF_activity_id'].lower() == activity_id.lower()) & (int(x['CF_year']) == int(year)) & (x['CF_region'].upper() == region.upper())))
# Step 1 :
# This function is used to resolve the climatiq record that we can use for querying the estimate endpoint
# When no exact match is found it will apply different fallback mechanism for year and region
def find_climatiq_records_for_query(row_num: int, climatiq_records: list, soruce: str, activity_id: str, year: int, region: str):
print("(" + str(row_num) + ") Trying to find climatiq records for " + activity_id + " in year " + str(year) + " in region " + region + "...")
result = find_exact_climatiq_record(climatiq_records, soruce, activity_id, year, region)
if (not result):
# If we did not have a direct match, we need to apply the region fallback
print("(" + str(row_num) + ") There is no direct climatiq record. Trying region fallback...")
climatiq_regions = find_climatiq_regions(row_num, activity_id, year, region)
result = find_climatiq_records_with_region_fallback(row_num, climatiq_records, soruce, activity_id, year, climatiq_regions)
# We still have no result by just using the region fallback, try again using the year fallback
if (not result) | (len(result) == 0):
print("(" + str(row_num) + ") There is no climatiq record with just region fallback. Trying year fallback...")
print(year)
print
climatiq_year = find_climatiq_year(climatiq_records, year)
print (climatiq_year)
result = find_exact_climatiq_record(climatiq_records, soruce, activity_id, climatiq_year, region)
# We still ahve no results using the year fallback, try again using the fallback year and region fallback
if not result:
print("(" + str(row_num) + ") There is no climatiq record with region and year fallbacks. Trying combined region and year fallback...")
climatiq_regions = find_climatiq_regions(row_num, activity_id, climatiq_year, region)
result = find_climatiq_records_with_region_fallback(row_num, climatiq_records, soruce, activity_id, climatiq_year, climatiq_regions)
return result if type(result) == list else [result]