I'm trying to build a python code that reads model.bim and report.json from power bi project to get used and unused measures and columns, for my thoughts
A column is marked as "used" if referenced in:
A chart (from report.json), relationship, parameter / calculated table, filter, or a measure And a measure is marked as "used" if referenced in: A chart (from report.json), parameter / calculated table or referenced in another measure
This is what I'm having and it's not marking the measures nor columns correct at all
import json
import pandas as pd
import os
def parse_model_bim(model_bim_path):
with open(model_bim_path, "r", encoding="utf-8") as file:
model_data = json.load(file)
tables = {}
used_columns_measures = set()
for table in model_data["model"]["tables"]:
table_name = table["name"]
table_type = "Calculated" if table.get("type") == "calculated" else "Regular"
columns = {col["name"]: col.get("isHidden", False) for col in table.get("columns", [])}
measures = {m["name"]: (m["expression"], m.get("isHidden", False)) for m in table.get("measures", [])}
tables[table_name] = {
"type": table_type,
"columns": columns,
"measures": measures,
}
if table_type == "Calculated":
used_columns_measures.update(columns.keys())
used_columns_measures.update(measures.keys())
return tables, used_columns_measures
def parse_report_json(report_json_path):
with open(report_json_path, "r", encoding="utf-8") as file:
report_data = json.load(file)
used_columns_measures = set()
for page in report_data.get("sections", []):
for visual in page.get("visualContainers", []):
if "query" in visual:
for item in visual["query"].get("Select", []):
used_columns_measures.add(item.get("Name", ""))
if "filters" in visual:
for filter_item in visual["filters"]:
used_columns_measures.add(filter_item.get("column", ""))
return used_columns_measures
def analyze_metadata(model_bim_path, report_json_path, drop_unused=False):
tables, used_in_model = parse_model_bim(model_bim_path)
used_in_report = parse_report_json(report_json_path)
used_columns_measures = used_in_model | used_in_report
table_data = []
column_data = []
measure_data = []
for table_name, table_info in tables.items():
total_columns = len(table_info["columns"])
used_columns = sum(1 for col in table_info["columns"] if col in used_columns_measures)
total_measures = len(table_info["measures"])
used_measures = sum(1 for measure in table_info["measures"] if measure in used_columns_measures)
is_unused = (used_columns + used_measures) == 0
quality_score = (used_columns + used_measures) / max(1, (total_columns + total_measures))
table_data.append([table_name, total_columns, used_columns, total_columns - used_columns,
total_measures, used_measures, total_measures - used_measures,
is_unused, table_info["type"], quality_score])
for col_name, hidden in table_info["columns"].items():
column_data.append([table_name, col_name, col_name in used_columns_measures, hidden])
for measure_name, (dax_expr, hidden) in table_info["measures"].items():
measure_data.append([table_name, measure_name, dax_expr, measure_name in used_columns_measures, hidden])
output_file = "powerbi_metadata.xlsx"
with pd.ExcelWriter(output_file) as writer:
pd.DataFrame(table_data, columns=["table_name", "total_columns", "used_columns", "unused_columns", "total_measures", "used_measures", "unused_measures", "is_unused", "table_type", "quality_score"]).to_excel(writer, sheet_name="Tables", index=False)
pd.DataFrame(column_data, columns=["table_name", "column_name", "Used", "Hidden"]).to_excel(writer, sheet_name="Columns", index=False)
pd.DataFrame(measure_data, columns=["table_name", "measure_name", "dax_expression", "Used", "Hidden"]).to_excel(writer, sheet_name="Measures", index=False)
print(f"Excel report generated: {output_file}")
if drop_unused:
with open(model_bim_path, "r", encoding="utf-8") as file:
model_data = json.load(file)
for table in model_data["model"]["tables"]:
if "measures" in table:
table["measures"] = [m for m in table["measures"] if m["name"] in used_columns_measures]
with open(model_bim_path, "w", encoding="utf-8") as file:
json.dump(model_data, file, indent=2)
print("Unused measures removed from model.bim")
# Example usage
# analyze_metadata("path/to/model.bim", "path/to/report.json", drop_unused=True)