How to speed up Python for highlighting cells for an Excel spreadsheet conditionally?

I have the following Python code which tries to color rows of an Excel spreadsheet conditionally upon the values of the columns. Due to the number of rows, the run time is very slow at more than 30 mins. I am wondering if there are ways to make this much faster? Thanks.

import openpyxl
from openpyxl.styles import PatternFill, Font
import time
import os
from concurrent.futures import ThreadPoolExecutor

# Create sample data
wb = openpyxl.Workbook()
ws = wb.active

# Add headers
headers = ["ID", "Type", "Value"]
for col, header in enumerate(headers, 1):
    ws.cell(row=1, column=col, value=header)

# Add 100000 rows of sample data
for row in range(2, 100002):
    ws.cell(row=row, column=1, value=row-1)
    ws.cell(row=row, column=2, value="Type 1" if row % 3 == 0 else 
            "Type 2" if row % 3 == 1 else "Type 3")
    ws.cell(row=row, column=3, value=f"Value {row-1}")

# Define fills
fills = {
    "Type 1": PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid"),
    "Type 2": PatternFill(start_color="DBEEF4", end_color="DBEEF4", fill_type="solid"),
    "Type 3": PatternFill(start_color="FFC0CB", end_color="FFC0CB", fill_type="solid")
}

# Loop approach
start = time.perf_counter()

for row_idx in range(2, ws.max_row + 1):
    category = ws.cell(row=row_idx, column=2).value
    fill = fills.get(category)
    
    if fill:
        for cell in ws[row_idx]:
            cell.fill = fill
            cell.font = Font(bold=True)

print(f"Run time: {time.perf_counter() - start:.2f} seconds")
wb.save("output.xlsx")

import openpyxl
from openpyxl.styles import PatternFill, Font
import time
import os
from concurrent.futures import ThreadPoolExecutor

# Create sample data
wb = openpyxl.Workbook()
ws = wb.active

# Add headers
headers = ["ID", "Type", "Value"]
for col, header in enumerate(headers, 1):
    ws.cell(row=1, column=col, value=header)

# Add 100000 rows of sample data
for row in range(2, 100002):
    ws.cell(row=row, column=1, value=row-1)
    ws.cell(row=row, column=2, value="Type 1" if row % 3 == 0 else 
            "Type 2" if row % 3 == 1 else "Type 3")
    ws.cell(row=row, column=3, value=f"Value {row-1}")

# Define fills
fills = {
    "Type 1": PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid"),
    "Type 2": PatternFill(start_color="DBEEF4", end_color="DBEEF4", fill_type="solid"),
    "Type 3": PatternFill(start_color="FFC0CB", end_color="FFC0CB", fill_type="solid")
}

# Loop approach
start = time.perf_counter()

for row_idx in range(2, ws.max_row + 1):
    category = ws.cell(row=row_idx, column=2).value
    fill = fills.get(category)
    
    if fill:
        for cell in ws[row_idx]:
            cell.fill = fill
            cell.font = Font(bold=True)

print(f"Run time: {time.perf_counter() - start:.2f} seconds")
wb.save("output.xlsx")

Share Improve this question asked Feb 14 at 6:02 user321627 2,5724 gold badges27 silver badges49 bronze badges

Add a comment |

2 Answers 2

Sorted by: Reset to default 4

Indexing in openpyxl is notoriously slow. Use iterators instead.

import openpyxl
from openpyxl.styles import PatternFill, Font
import time

# Create sample data
wb = openpyxl.Workbook()
ws = wb.active

# Add headers
headers = ["ID", "Type", "Value"]
for col, header in enumerate(headers, 1):
    ws.cell(row=1, column=col, value=header)

# Add 100000 rows of sample data

for row in range(2, 100002):
    ws.cell(row=row, column=1, value=row-1)
    ws.cell(row=row, column=2, value="Type 1" if row % 3 == 0 else 
            "Type 2" if row % 3 == 1 else "Type 3")
    ws.cell(row=row, column=3, value=f"Value {row-1}")


# Define fills
fills = {
    "Type 1": PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid"),
    "Type 2": PatternFill(start_color="DBEEF4", end_color="DBEEF4", fill_type="solid"),
    "Type 3": PatternFill(start_color="FFC0CB", end_color="FFC0CB", fill_type="solid")
}

# Loop approach
bold = Font(bold=True)

start = time.perf_counter()

for row in ws.iter_rows():
    if (fill := fills.get(row[1].value)) is not None:
        for cell in row:
            cell.fill = fill
            cell.font = bold

print(f"Run time: {time.perf_counter() - start:.2f} seconds")
wb.save("output.xlsx")

In testing, this ran in ~2 seconds.

Platform

MacOS 15.3.1
Apple M2
Python 3.13.2
openpyxl 3.1.5

Here's an easier to read and maintain version that is faster.

import openpyxl
from openpyxl.styles import PatternFill, Font, NamedStyle
import time

wb = openpyxl.Workbook()
ws = wb.active

# Add headers
ws.append(["ID", "Type", "Value"])

fill1 = PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid")
fill2 = PatternFill(start_color="DBEEF4", end_color="DBEEF4", fill_type="solid")
fill3 = PatternFill(start_color="FFC0CB", end_color="FFC0CB", fill_type="solid")


styles = [
    NamedStyle("Type 1", fill=fill1, font=Font(bold=True)),
    NamedStyle("Type 2", fill=fill2, font=Font(bold=True)),
    NamedStyle("Type 3", fill=fill3, font=Font(bold=True)),
]

for style in styles:
    wb.add_named_style(style)

# Add 100000 rows of sample data
def data(size=100_000):
    lookup = {0: "Type 1", 1: "Type 2", 2: "Type 3",}
    for idx in range(2, size+2):
        mod = idx % 3
        typ = lookup[mod]
        row = [idx, typ, f"Value {idx - 1}"]
        yield row

start = time.perf_counter()
data = data()

for x, row in enumerate(data, 2):
    style = row[1]
    for y, value in enumerate(row, 1):
        cell = ws.cell(row=x, column=y, value=value)
        cell.style = style

print(f"Run time: {time.perf_counter() - start:.2f} seconds")
wb.save("output.xlsx")

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

How to speed up Python for highlighting cells for an Excel spreadsheet conditionally? - Stack Overflow

2 Answers 2

与本文相关的文章

评论列表(0)