
python - Function issue by find match based on one value not on first number - Stack Overflow


i have in my base file a ID = 123 region = CN and a year = 2019. Now i would like to try some checks if i found a match.

In the last step i will check it with a seperate file where i have add some fallbacks. In this case 2 fallbacks:

ID = 123 region = RoW region_ovverride = CN/not_assigned year = 2011

ID = 123 region = EU region_ovverride = DE year = 2018

What i try is that in the third loop they should find for the base file the mathc first the year what fits to region = CN - so year 2011 But it find always 2018 and i have no idea how i can say check region_ovverride and take year.

# This function resolves the next possible year fallback that we can use 
def find_climatiq_year(climatiq_records: list, year: int):
    filtered_list = filter_list(climatiq_records, lambda x: x['CF_year'] <= year)
    if not filtered_list:
        filtered_list = filter_list(climatiq_records, lambda x: x['CF_year'] >= year)       
        return list_first(sorted(filtered_list, key=lambda x: x['CF_year'], reverse=True))['CF_year']
        return list_first(sorted(filtered_list, key=lambda x: x['CF_year'], reverse=True))['CF_year']

def find_climatiq_year_for_region(climatiq_records: list, year: int, region: str):
    # Filter records where CF_year is less than or equal to the given year and region matches (case-insensitive)
    filtered_list = list(filter(lambda x: (x['CF_year'] <= year) & (x['CF_region'].upper() == region.upper()), climatiq_records))
    if not filtered_list:
        filtered_list = list(filter(lambda x: (x['CF_year'] >= year) & (x['CF_region'].upper() == region.upper()), climatiq_records))
    if not filtered_list:
        return year
    return sorted(filtered_list, key=lambda x: x['CF_year'], reverse=True)[0]['CF_year']

# This method resolves the relevant regions from the region map table that we can use for looking up fallback regions for a specific region and year
region_map = df_climatiq_region_map.rdd.collect()

# Step 2 for Fallback

def find_climatiq_regions(row_num: int, activity_id: str, year: int, region: str):
    print ("(" + str(row_num) + ") Trying to find region fallbacks for " + activity_id + " in year " + str(year) + " in region " + region)
    upper_region = region.upper()
    filtered_list = filter_list(region_map, lambda x: (x['CR_activity_id'].lower() == activity_id.lower()) & (x['CR_year'] == year) 
                                & (any(y.upper() == region.upper() for y in x['CR_region_overrides_split'])))

    if len(filtered_list) > 1:
        print ("(" + str(row_num) + ") Found multiple matching region overrides for regions for " + activity_id + " in year " + str(year) + " in region " + region + ". Using the first entry.")
    found_map = list_first(filtered_list)

    if not found_map:
        print ("(" + str(row_num) + ") There is no override region for " + activity_id + " in year " + str(year) + " in region " + region)
        return region.upper()

    print ("(" + str(row_num) + ") Found override regions for " + activity_id + " in year " + str(year) + " in region " + region + ". New region is: " + found_map["CR_region"])

    overrides = [found_map["CR_region"]]
    return overrides

# Tries to find the climatiq record for querying using region fallbacks
# If no fallback region matches, it will return all the climatiq records for the year for calculating the average later on
def find_climatiq_records_with_region_fallback(row_num: int, climatiq_records: list, source: str, activity_id: str, year: int, fallback_regions: list[str]):
    fallback_regions = [*fallback_regions] ### dynamisch pflegbar
    for region in fallback_regions:
        fallback_year = find_climatiq_year_for_region(climatiq_records, year, region)
        result = find_exact_climatiq_record(climatiq_records, source, activity_id, fallback_year, region)
        if result:
            return result
    # Fallback that will take the average over all other regions in that year
    # The fallback for average over all regions of the year is disabled to lead the entry into second stage fallback
    #return filter_list(climatiq_records, lambda x: x['CF_year'] == year)
    return []

def find_exact_climatiq_record(climatiq_records: list, soruce: str, activity_id: str, year: int, region: str):
    return list_first(filter_list(climatiq_records, lambda x: (x['CF_activity_id'].lower() == activity_id.lower()) & (int(x['CF_year']) == int(year)) & (x['CF_region'].upper() == region.upper())))

# Step 1 :
# This function is used to resolve the climatiq record that we can use for querying the estimate endpoint
# When no exact match is found it will apply different fallback mechanism for year and region 

def find_climatiq_records_for_query(row_num: int, climatiq_records: list, soruce: str, activity_id: str, year: int, region: str):
    print("(" + str(row_num) + ") Trying to find climatiq records for " + activity_id + " in year " + str(year) + " in region " + region + "...")
    result = find_exact_climatiq_record(climatiq_records, soruce, activity_id, year, region)

    if (not result):
        # If we did not have a direct match, we need to apply the region fallback
        print("(" + str(row_num) + ") There is no direct climatiq record. Trying region fallback...")
        climatiq_regions = find_climatiq_regions(row_num, activity_id, year, region)
        result = find_climatiq_records_with_region_fallback(row_num, climatiq_records, soruce, activity_id, year, climatiq_regions)

        # We still have no result by just using the region fallback, try again using the year fallback
        if (not result) | (len(result) == 0):
            print("(" + str(row_num) + ") There is no climatiq record with just region fallback. Trying year fallback...")
            climatiq_year = find_climatiq_year(climatiq_records, year)
            print (climatiq_year)
            result = find_exact_climatiq_record(climatiq_records, soruce, activity_id, climatiq_year, region)

            # We still ahve no results using the year fallback, try again using the fallback year and region fallback
            if not result:
                print("(" + str(row_num) + ") There is no climatiq record with region and year fallbacks. Trying combined region and year fallback...")
                climatiq_regions = find_climatiq_regions(row_num, activity_id, climatiq_year, region)
                result = find_climatiq_records_with_region_fallback(row_num, climatiq_records, soruce, activity_id, climatiq_year, climatiq_regions)

    return result if type(result) == list else [result]


  1. 暂无评论