最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

powerbi - Extract used and unused measures and columns from power bi project folders using python - Stack Overflow

programmeradmin4浏览0评论

I'm trying to build a python code that reads model.bim and report.json from power bi project to get used and unused measures and columns, for my thoughts

A column is marked as "used" if referenced in:

A chart (from report.json), relationship, parameter / calculated table, filter, or a measure And a measure is marked as "used" if referenced in: A chart (from report.json), parameter / calculated table or referenced in another measure

This is what I'm having and it's not marking the measures nor columns correct at all

import json
import pandas as pd
import os

def parse_model_bim(model_bim_path):
    with open(model_bim_path, "r", encoding="utf-8") as file:
        model_data = json.load(file)
    
    tables = {}
    used_columns_measures = set()
    
    for table in model_data["model"]["tables"]:
        table_name = table["name"]
        table_type = "Calculated" if table.get("type") == "calculated" else "Regular"
        columns = {col["name"]: col.get("isHidden", False) for col in table.get("columns", [])}
        measures = {m["name"]: (m["expression"], m.get("isHidden", False)) for m in table.get("measures", [])}
        
        tables[table_name] = {
            "type": table_type,
            "columns": columns,
            "measures": measures,
        }
        
        if table_type == "Calculated":
            used_columns_measures.update(columns.keys())
            used_columns_measures.update(measures.keys())
    
    return tables, used_columns_measures

def parse_report_json(report_json_path):
    with open(report_json_path, "r", encoding="utf-8") as file:
        report_data = json.load(file)
    
    used_columns_measures = set()
    
    for page in report_data.get("sections", []):
        for visual in page.get("visualContainers", []):
            if "query" in visual:
                for item in visual["query"].get("Select", []):
                    used_columns_measures.add(item.get("Name", ""))
            
            if "filters" in visual:
                for filter_item in visual["filters"]:
                    used_columns_measures.add(filter_item.get("column", ""))
    
    return used_columns_measures

def analyze_metadata(model_bim_path, report_json_path, drop_unused=False):
    tables, used_in_model = parse_model_bim(model_bim_path)
    used_in_report = parse_report_json(report_json_path)
    
    used_columns_measures = used_in_model | used_in_report
    
    table_data = []
    column_data = []
    measure_data = []
    
    for table_name, table_info in tables.items():
        total_columns = len(table_info["columns"])
        used_columns = sum(1 for col in table_info["columns"] if col in used_columns_measures)
        
        total_measures = len(table_info["measures"])
        used_measures = sum(1 for measure in table_info["measures"] if measure in used_columns_measures)
        
        is_unused = (used_columns + used_measures) == 0
        quality_score = (used_columns + used_measures) / max(1, (total_columns + total_measures))
        
        table_data.append([table_name, total_columns, used_columns, total_columns - used_columns,
                           total_measures, used_measures, total_measures - used_measures,
                           is_unused, table_info["type"], quality_score])
        
        for col_name, hidden in table_info["columns"].items():
            column_data.append([table_name, col_name, col_name in used_columns_measures, hidden])
        
        for measure_name, (dax_expr, hidden) in table_info["measures"].items():
            measure_data.append([table_name, measure_name, dax_expr, measure_name in used_columns_measures, hidden])
    
    output_file = "powerbi_metadata.xlsx"
    with pd.ExcelWriter(output_file) as writer:
        pd.DataFrame(table_data, columns=["table_name", "total_columns", "used_columns", "unused_columns", "total_measures", "used_measures", "unused_measures", "is_unused", "table_type", "quality_score"]).to_excel(writer, sheet_name="Tables", index=False)
        pd.DataFrame(column_data, columns=["table_name", "column_name", "Used", "Hidden"]).to_excel(writer, sheet_name="Columns", index=False)
        pd.DataFrame(measure_data, columns=["table_name", "measure_name", "dax_expression", "Used", "Hidden"]).to_excel(writer, sheet_name="Measures", index=False)
    
    print(f"Excel report generated: {output_file}")
    
    if drop_unused:
        with open(model_bim_path, "r", encoding="utf-8") as file:
            model_data = json.load(file)
        
        for table in model_data["model"]["tables"]:
            if "measures" in table:
                table["measures"] = [m for m in table["measures"] if m["name"] in used_columns_measures]
        
        with open(model_bim_path, "w", encoding="utf-8") as file:
            json.dump(model_data, file, indent=2)
        
        print("Unused measures removed from model.bim")

# Example usage
# analyze_metadata("path/to/model.bim", "path/to/report.json", drop_unused=True)

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论