I need to read data from an Excel file. The first cell contains the property name, and the second cell contains the property value. However, some of the property names in the first column are merged across two or more columns, and the corresponding values are in the next cell. For example, the property name "Ref" is in columns A and B, and its value is in column C.
I want to retrieve the value of the "Ref" property from column C in my Excel file.
Here is my excel image:
I am using python. Here is the output:
Approval Memo of : SHILPI AKTER
Name of the Applicant : SHILPI AKTER
Name of Territory : Comilla
Total Family Expenses : 30000
Ref : N/A
Amount : N/A
Total Amount : 3000
Ref and Amount Properties value not found. Here is my code:
import os
import openpyxl
from openpyxl.utils import column_index_from_string
file_path = r"D:\file\input\example.xlsx"
if os.path.exists(file_path):
print("File exists!")
else:
print("File not found! Check the path.")
exit()
target_sheet = "Output Approval Templete"
# Define the properties to extract
properties = [
"Approval Memo of",
"Name of the Applicant",
"Name of Territory",
"Total Family Expenses",
"Ref",
"Amount",
"Total Amount"
]
# Function to get the actual value from a merged cell
def get_merged_cell_value(sheet, row, col):
for merged_range in sheet.merged_cells.ranges:
min_row, min_col, max_row, max_col = merged_range.bounds # Extract merged cell bounds
if min_row <= row <= max_row and min_col <= col <= max_col:
return sheet.cell(min_row, min_col).value # Return the first cell's value of the merged range
return sheet.cell(row, col).value
# Function to format numeric values properly
def format_value(value):
if isinstance(value, float) and value > 1e10: # Large numbers like NID
return str(int(value)) # Convert to integer and string to avoid scientific notation
elif isinstance(value, (int, float)): # General number formatting
return str(value)
elif value is None:
return "N/A" # Handle missing values
return str(value).strip()
try:
# Load the workbook
wb = openpyxl.load_workbook(file_path, data_only=True)
if target_sheet not in wb.sheetnames:
print(f"Sheet '{target_sheet}' not found in the file.")
else:
ws = wb[target_sheet]
extracted_data = {}
# Iterate over rows to extract data
for row in ws.iter_rows():
for cell in row:
# Check if the cell value is a property we are looking for
if cell.value and isinstance(cell.value, str) and cell.value.strip() in properties:
prop_name = cell.value.strip()
col_idx = cell.column # Get column index (1-based)
next_col_idx = col_idx + 1 # Next column index
# Ensure next column exists within sheet bounds
if next_col_idx <= ws.max_column:
# Check if the cell is merged, and get its value
next_value = get_merged_cell_value(ws, cell.row, next_col_idx)
# Store the formatted value for the property
extracted_data[prop_name] = format_value(next_value) # Store extracted value
# Print extracted values
for key, value in extracted_data.items():
print(f"{key} : {value}")
except Exception as e:
print(f"Error loading workbook: {e}")
Please help me to find out merge cell properties value.
I need to read data from an Excel file. The first cell contains the property name, and the second cell contains the property value. However, some of the property names in the first column are merged across two or more columns, and the corresponding values are in the next cell. For example, the property name "Ref" is in columns A and B, and its value is in column C.
I want to retrieve the value of the "Ref" property from column C in my Excel file.
Here is my excel image:
I am using python. Here is the output:
Approval Memo of : SHILPI AKTER
Name of the Applicant : SHILPI AKTER
Name of Territory : Comilla
Total Family Expenses : 30000
Ref : N/A
Amount : N/A
Total Amount : 3000
Ref and Amount Properties value not found. Here is my code:
import os
import openpyxl
from openpyxl.utils import column_index_from_string
file_path = r"D:\file\input\example.xlsx"
if os.path.exists(file_path):
print("File exists!")
else:
print("File not found! Check the path.")
exit()
target_sheet = "Output Approval Templete"
# Define the properties to extract
properties = [
"Approval Memo of",
"Name of the Applicant",
"Name of Territory",
"Total Family Expenses",
"Ref",
"Amount",
"Total Amount"
]
# Function to get the actual value from a merged cell
def get_merged_cell_value(sheet, row, col):
for merged_range in sheet.merged_cells.ranges:
min_row, min_col, max_row, max_col = merged_range.bounds # Extract merged cell bounds
if min_row <= row <= max_row and min_col <= col <= max_col:
return sheet.cell(min_row, min_col).value # Return the first cell's value of the merged range
return sheet.cell(row, col).value
# Function to format numeric values properly
def format_value(value):
if isinstance(value, float) and value > 1e10: # Large numbers like NID
return str(int(value)) # Convert to integer and string to avoid scientific notation
elif isinstance(value, (int, float)): # General number formatting
return str(value)
elif value is None:
return "N/A" # Handle missing values
return str(value).strip()
try:
# Load the workbook
wb = openpyxl.load_workbook(file_path, data_only=True)
if target_sheet not in wb.sheetnames:
print(f"Sheet '{target_sheet}' not found in the file.")
else:
ws = wb[target_sheet]
extracted_data = {}
# Iterate over rows to extract data
for row in ws.iter_rows():
for cell in row:
# Check if the cell value is a property we are looking for
if cell.value and isinstance(cell.value, str) and cell.value.strip() in properties:
prop_name = cell.value.strip()
col_idx = cell.column # Get column index (1-based)
next_col_idx = col_idx + 1 # Next column index
# Ensure next column exists within sheet bounds
if next_col_idx <= ws.max_column:
# Check if the cell is merged, and get its value
next_value = get_merged_cell_value(ws, cell.row, next_col_idx)
# Store the formatted value for the property
extracted_data[prop_name] = format_value(next_value) # Store extracted value
# Print extracted values
for key, value in extracted_data.items():
print(f"{key} : {value}")
except Exception as e:
print(f"Error loading workbook: {e}")
Please help me to find out merge cell properties value.
Share Improve this question asked Mar 16 at 16:29 Enamul HaqueEnamul Haque 5,0852 gold badges42 silver badges64 bronze badges 2- It would help us a lot if you shared an excel example file, as well as what is your expected output. – PaulS Commented Mar 16 at 17:11
- 1 The best fix would be to avoid merged cells altogether. In this sample, the look and feel can be achieved with formatting alone and merging is not required. Merging cells should be avoided wherever possible. It creates more complications than it has benefits. – teylyn Commented Mar 16 at 23:46
1 Answer
Reset to default 1Just get the last cell in the range column number and add 1 as you have with the other fields.
This code assumes the merge cells are row only.
Also assumes the key name is cells is exactly the same as the name in the properties List
import os
import openpyxl
def get_next_col(lc): # lc = Left cell in the merge range
for merge in ws.merged_cells:
if lc in merge.coord:
print(f"Merge Range: {merge.coord}")
return merge.top[-1][1]+1 # Return 2nd value of last tuple incremented by 1
def format_value(value):
if isinstance(value, float) and value > 1e10: # Large numbers like NID
return str(int(value)) # Convert to integer and string to avoid scientific notation
elif isinstance(value, (int, float)): # General number formatting
return str(value)
elif value is None:
return "N/A" # Handle missing values
return str(value).strip()
# Define the properties to extract
properties = [
"Approval Memo of",
"Name of the Applicant",
"Name of Territory",
"Total Family Expenses",
"Ref",
"Amount",
"Total Amount"
]
# Init Dictionary
extracted_data = {}
# Set working sheet name
target_sheet = "Output Approval Templete"
# Load the workbook
file_path = r"D:\file\input\example.xlsx"
if os.path.exists(file_path):
print("File exists!\n")
else:
print("File not found! Check the path.")
exit()
wb = openpyxl.load_workbook(file_path, data_only=True)
ws = wb.active
# Check working sheet exists
if target_sheet not in wb.sheetnames:
print(f"Sheet '{target_sheet}' not found in the file.")
else:
ws = wb[target_sheet]
# Process rows
for row in ws.iter_rows():
for cell in row:
cv = cell.value
if isinstance(cv, str): # Strip if the cell value is a string
cv = cv.strip()
if cv in properties: # Process only cells with value in the 'properties' List
co = cell.coordinate
print(f"Processing '{cv}' in 'Properties' List at cell {co}")
if co in ws.merged_cells: # Check if the current cell is in a merge
print('This is also a merged cell:')
col = get_next_col(co) # If merged get the next col number after the merge range
else:
col = cell.col_idx + 1 # If not merged get the next col number after the cell
next_value = ws.cell(cell.row, col).value # Get next cell value as determined by value of 'col'
print(f"Inserting Key: '{cv}' with Value: {next_value}")
extracted_data[cv] = format_value(next_value) # Add key and value to the dictionary
print("-----------\n")
for key, val in extracted_data.items():
print(f"{key} : {val}")
Output
Extracted data from example Sheet.
Approval Memo of : SHILPI AKTER
Name of the Applicant : SHILPI AKTER
Name of Territory : Comilla
Total Family Expenses : 30000
Ref : 22000
Amount : 5000
Total Amount : 3000