I have the following Python code which tries to color rows of an Excel spreadsheet conditionally upon the values of the columns. Due to the number of rows, the run time is very slow at more than 30 mins
. I am wondering if there are ways to make this much faster? Thanks.
import openpyxl
from openpyxl.styles import PatternFill, Font
import time
import os
from concurrent.futures import ThreadPoolExecutor
# Create sample data
wb = openpyxl.Workbook()
ws = wb.active
# Add headers
headers = ["ID", "Type", "Value"]
for col, header in enumerate(headers, 1):
ws.cell(row=1, column=col, value=header)
# Add 100000 rows of sample data
for row in range(2, 100002):
ws.cell(row=row, column=1, value=row-1)
ws.cell(row=row, column=2, value="Type 1" if row % 3 == 0 else
"Type 2" if row % 3 == 1 else "Type 3")
ws.cell(row=row, column=3, value=f"Value {row-1}")
# Define fills
fills = {
"Type 1": PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid"),
"Type 2": PatternFill(start_color="DBEEF4", end_color="DBEEF4", fill_type="solid"),
"Type 3": PatternFill(start_color="FFC0CB", end_color="FFC0CB", fill_type="solid")
}
# Loop approach
start = time.perf_counter()
for row_idx in range(2, ws.max_row + 1):
category = ws.cell(row=row_idx, column=2).value
fill = fills.get(category)
if fill:
for cell in ws[row_idx]:
cell.fill = fill
cell.font = Font(bold=True)
print(f"Run time: {time.perf_counter() - start:.2f} seconds")
wb.save("output.xlsx")
I have the following Python code which tries to color rows of an Excel spreadsheet conditionally upon the values of the columns. Due to the number of rows, the run time is very slow at more than 30 mins
. I am wondering if there are ways to make this much faster? Thanks.
import openpyxl
from openpyxl.styles import PatternFill, Font
import time
import os
from concurrent.futures import ThreadPoolExecutor
# Create sample data
wb = openpyxl.Workbook()
ws = wb.active
# Add headers
headers = ["ID", "Type", "Value"]
for col, header in enumerate(headers, 1):
ws.cell(row=1, column=col, value=header)
# Add 100000 rows of sample data
for row in range(2, 100002):
ws.cell(row=row, column=1, value=row-1)
ws.cell(row=row, column=2, value="Type 1" if row % 3 == 0 else
"Type 2" if row % 3 == 1 else "Type 3")
ws.cell(row=row, column=3, value=f"Value {row-1}")
# Define fills
fills = {
"Type 1": PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid"),
"Type 2": PatternFill(start_color="DBEEF4", end_color="DBEEF4", fill_type="solid"),
"Type 3": PatternFill(start_color="FFC0CB", end_color="FFC0CB", fill_type="solid")
}
# Loop approach
start = time.perf_counter()
for row_idx in range(2, ws.max_row + 1):
category = ws.cell(row=row_idx, column=2).value
fill = fills.get(category)
if fill:
for cell in ws[row_idx]:
cell.fill = fill
cell.font = Font(bold=True)
print(f"Run time: {time.perf_counter() - start:.2f} seconds")
wb.save("output.xlsx")
Share
Improve this question
asked Feb 14 at 6:02
user321627user321627
2,5724 gold badges27 silver badges49 bronze badges
2 Answers
Reset to default 4Indexing in openpyxl is notoriously slow. Use iterators instead.
import openpyxl
from openpyxl.styles import PatternFill, Font
import time
# Create sample data
wb = openpyxl.Workbook()
ws = wb.active
# Add headers
headers = ["ID", "Type", "Value"]
for col, header in enumerate(headers, 1):
ws.cell(row=1, column=col, value=header)
# Add 100000 rows of sample data
for row in range(2, 100002):
ws.cell(row=row, column=1, value=row-1)
ws.cell(row=row, column=2, value="Type 1" if row % 3 == 0 else
"Type 2" if row % 3 == 1 else "Type 3")
ws.cell(row=row, column=3, value=f"Value {row-1}")
# Define fills
fills = {
"Type 1": PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid"),
"Type 2": PatternFill(start_color="DBEEF4", end_color="DBEEF4", fill_type="solid"),
"Type 3": PatternFill(start_color="FFC0CB", end_color="FFC0CB", fill_type="solid")
}
# Loop approach
bold = Font(bold=True)
start = time.perf_counter()
for row in ws.iter_rows():
if (fill := fills.get(row[1].value)) is not None:
for cell in row:
cell.fill = fill
cell.font = bold
print(f"Run time: {time.perf_counter() - start:.2f} seconds")
wb.save("output.xlsx")
In testing, this ran in ~2 seconds.
Platform
MacOS 15.3.1
Apple M2
Python 3.13.2
openpyxl 3.1.5
Here's an easier to read and maintain version that is faster.
import openpyxl
from openpyxl.styles import PatternFill, Font, NamedStyle
import time
wb = openpyxl.Workbook()
ws = wb.active
# Add headers
ws.append(["ID", "Type", "Value"])
fill1 = PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid")
fill2 = PatternFill(start_color="DBEEF4", end_color="DBEEF4", fill_type="solid")
fill3 = PatternFill(start_color="FFC0CB", end_color="FFC0CB", fill_type="solid")
styles = [
NamedStyle("Type 1", fill=fill1, font=Font(bold=True)),
NamedStyle("Type 2", fill=fill2, font=Font(bold=True)),
NamedStyle("Type 3", fill=fill3, font=Font(bold=True)),
]
for style in styles:
wb.add_named_style(style)
# Add 100000 rows of sample data
def data(size=100_000):
lookup = {0: "Type 1", 1: "Type 2", 2: "Type 3",}
for idx in range(2, size+2):
mod = idx % 3
typ = lookup[mod]
row = [idx, typ, f"Value {idx - 1}"]
yield row
start = time.perf_counter()
data = data()
for x, row in enumerate(data, 2):
style = row[1]
for y, value in enumerate(row, 1):
cell = ws.cell(row=x, column=y, value=value)
cell.style = style
print(f"Run time: {time.perf_counter() - start:.2f} seconds")
wb.save("output.xlsx")