When performing OCR on user-selected regions using PaddleOCR, certain regions fail to be detected. The issue occurs in a Streamlit-based application with the following implementation characteristics:
1、Camera feed is initialized via a daemon thread (threading.Thread
):
threading.Thread(target=video_capture_thread, daemon=True).start()
in the video_capture_thread
function my code like:
ret, frame = cap.read()
frame = cv2.resize(frame, target_resolution)
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if detection_active:
process_frame = frame_gray
detection_time = time.time()
detection_result = check_detection_region(process_frame, current_detection_region,
last_process_time, last_check_time)
# print("*"*10, f"Detection Used Time: {time.time()- detection_time}", "*"*10, "\n")
if detection_result:
process_result(detection_result)
for i, region in enumerate(current_detection_region):
cv2.rectangle(frame_rgb,
(region[0], region[1]),
(region[2], region[3]),
(0, 255, 0), 5)
cv2.putText(frame_rgb,
text=f"Region-{i}",
=(int(region[0]), int(region[1] - 10)),
fontFace= cv2.FONT_HERSHEY_SIMPLEX,
fontScale=1.2,
color=(0, 255, 0),
thickness=4)
frame_buffer.append(frame_rgb)
frame_ready.set()
time.sleep(0.01)
2、Interactive region selection through canvas drawing 3、Asynchronous OCR processing on selected regions
ocr = PaddleOCR(
det_model_dir="./ch_PP-OCRv4_det_infer",
rec_model_dir="./ch_PP-OCRv4_rec_infer",
use_angle_cls=True,
lang="en",
log_level='ERROR'
)
def preprocess_bbox(region, frame):
x1, y1, x2, y2 = region
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
if x1 > x2:
x1, x2 = x2, x1
if y1 > y2:
y1, y2 = y2, y1
height, width = frame.shape[:2]
x1 = max(0, min(x1, width - 1)) # 防止 x1 >= width
x2 = max(0, min(x2, width - 1))
y1 = max(0, min(y1, height - 1))
y2 = max(0, min(y2, height - 1))
return x1, y1, x2, y2
def change_num(region, text):
if region in [2, 3]:
if float(text)> 999:
return float(text)/100
elif region in [1, 4, 5, 6, 7, 8, 9]:
if float(text)> 100:
return float(text)/10
elif region in [0, 10, 11]:
if float(text)> 10:
return float(text)/100
return text
def check_detection_region(frame, detection_region, last_process_time, last_check_time):
region_data = {}
process_time_lock = threading.Lock()
for region_idx in range(len(detection_region)):
region_key = region_idx
rd = {
'detection_cache': [],
'last_cache_clear': time.time(),
'datetime_info': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'last_process_time': 0.0
}
region_data[region_key] = rd
# max_workers = min(len(detection_region), os.cpu_count())
max_workers = 2
alarm_result = []
with ThreadPoolExecutor(max_workers= max_workers) as executor:
futures = []
futures = {executor.submit(process_region, region_idx, region,
frame, region_data,
last_process_time, last_check_time,
process_time_lock, ocr)
for region_idx, region in enumerate(detection_region)}
future_process_time = time.time()
for future in as_completed(futures):
try:
result = future.result()
if result:
alarm_result.append(result)
except Exception as e:
continue
print("*"*10, f"Process Time:{time.time()- future_process_time} Rgions: {len(detection_region)}-{len(list(set(detection_region)))}", "*"*10)
if alarm_result:
return alarm_result
else:
return None
def process_region(region_idx, region, frame, region_data, last_process_time,
last_check_time, process_time_lock, ocr= None
):
current_time = time.time()
# with process_time_lock:
# last_proc = last_process_time.get(region_idx, 0.0)
# last_check = last_check_time.get(region_idx, 0.0)
# if (current_time - last_proc < PROCESS_INTERVAL or
# current_time - last_check < CHECK_INTERVAL):
# return None
last_proc = last_process_time.get(region_idx, 0.0)
last_check_time[region_idx] = current_time
last_process_time[region_idx] = abs(current_time- last_proc)
region_key = region_idx
rd = region_data.get(region_key, None)
rd['last_process_time'] = current_time
x1, y1, x2, y2 = preprocess_bbox(region, frame)
roi = frame[y1:y2, x1:x2]
if roi.size == 0:
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
return None
ocr_time = time.time()
ocr_result = ocr.ocr(roi, cls=True)
if not ocr_result:
print(f"ERROR: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
return None
# 处理识别结果
current_time = time.time()
date_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
for line in ocr_result:
for detection in line:
text, conf = detection[1]
print(f"{date_time}-{time.time()- ocr_time}-Region({region}):{region_idx} Text: {text}, Conf: {conf}")
if conf > 0.7 and re.match(r"(\d+(\.\d+)?)", text):
match = re.match(r"(\d+(\.\d+)?)", text)
current_value = float(change_num(region= region_idx, text= match.group(1)))
rd['detection_cache'].append({
'match_text': current_value,
'conf': conf,
'time': current_time,
'ocr_text': text
})
# if (current_time - rd['last_cache_clear'] >= 1) and rd['detection_cache']:
best = max(rd['detection_cache'], key=lambda x: x['conf'])
rd['detection_cache'][:] = []
rd['last_cache_clear'] = current_time
return {
'time': current_time,
'region_id': region_key,
'date_time': date_time,
'value': {
'num': best['match_text'],
'conf': round(best['conf'], 4),
'raw_text': best['ocr_text']
}
}
there are some results:
2025-03-31 17:15:22-0.3458406925201416-Region((748, 1100, 851, 1177)):0 Text: 31, Conf: 0.9988272190093994
2025-03-31 17:15:22-0.3458406925201416-Region((1014, 1158, 1100, 1238)):1 Text: 31, Conf: 0.9988272190093994
2025-03-31 17:15:22-0.3442258834838867-Region((1958, 1897, 2025, 1977)):2 Text: 06, Conf: 0.9809097051620483
2025-03-31 17:15:23-0.22141623497009277-Region((2332, 1916, 2416, 1987)):5 Text: 82, Conf: 0.9945308566093445
2025-03-31 17:15:23-0.30546092987060547-Region((886, 1555, 972, 1625)):7 Text: 195, Conf: 0.9964171051979065
2025-03-31 17:15:23-0.1500873565673828-Region((598, 1552, 665, 1612)):9 Text: 130, Conf: 0.9943342804908752
********** Process Time:1.477724552154541 Rgions: 10-10 **********
2025-03-31 17:15:24-0.38674497604370117-Region((1014, 1158, 1100, 1238)):1 Text: 31, Conf: 0.9986810088157654
2025-03-31 17:15:24-0.3962728977203369-Region((748, 1100, 851, 1177)):0 Text: 2025, Conf: 0.9948824644088745
2025-03-31 17:15:24-0.3871188163757324-Region((1958, 1897, 2025, 1977)):2 Text: 36, Conf: 0.9956271648406982
2025-03-31 17:15:24-0.3814249038696289-Region((2092, 1916, 2163, 1993)):3 Text: 36, Conf: 0.9956271648406982
2025-03-31 17:15:24-0.4036712646484375-Region((2332, 1916, 2416, 1987)):5 Text: 82, Conf: 0.995288610458374
2025-03-31 17:15:25-0.3978700637817383-Region((886, 1555, 972, 1625)):7 Text: 195, Conf: 0.9958855509757996
2025-03-31 17:15:25-0.7991609573364258-Region((739, 1529, 812, 1596)):6 Text: 133, Conf: 0.9976328015327454
2025-03-31 17:15:25-0.2980964183807373-Region((598, 1552, 665, 1612)):9 Text: 130, Conf: 0.9968142509460449
********** Process Time:2.189676523208618 Rgions: 10-10 **********
2025-03-31 17:15:26-0.4011368751525879-Region((748, 1100, 851, 1177)):0 Text: 2025, Conf: 0.9921661615371704
2025-03-31 17:15:26-0.43567562103271484-Region((1014, 1158, 1100, 1238)):1 Text: 31, Conf: 0.9990317821502686
2025-03-31 17:15:26-0.22664427757263184-Region((2092, 1916, 2163, 1993)):3 Text: 36, Conf: 0.9965022802352905
2025-03-31 17:15:26-0.40086793899536133-Region((1958, 1897, 2025, 1977)):2 Text: 06, Conf: 0.9625216722488403
2025-03-31 17:15:27-0.33641600608825684-Region((2198, 1926, 2275, 1984)):4 Text: 34, Conf: 0.842035174369812
2025-03-31 17:15:27-0.33869314193725586-Region((2332, 1916, 2416, 1987)):5 Text: 82, Conf: 0.9974746704101562
2025-03-31 17:15:27-0.41469645500183105-Region((739, 1529, 812, 1596)):6 Text: 133, Conf: 0.9956898093223572
2025-03-31 17:15:27-0.2816200256347656-Region((886, 1555, 972, 1625)):7 Text: 195, Conf: 0.9957630634307861
2025-03-31 17:15:27-0.3099501132965088-Region((598, 1552, 665, 1612)):9 Text: 130, Conf: 0.9934964179992676
********** Process Time:1.8019309043884277 Rgions: 10-10 **********
When performing OCR on user-selected regions using PaddleOCR, certain regions fail to be detected. The issue occurs in a Streamlit-based application with the following implementation characteristics:
1、Camera feed is initialized via a daemon thread (threading.Thread
):
threading.Thread(target=video_capture_thread, daemon=True).start()
in the video_capture_thread
function my code like:
ret, frame = cap.read()
frame = cv2.resize(frame, target_resolution)
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if detection_active:
process_frame = frame_gray
detection_time = time.time()
detection_result = check_detection_region(process_frame, current_detection_region,
last_process_time, last_check_time)
# print("*"*10, f"Detection Used Time: {time.time()- detection_time}", "*"*10, "\n")
if detection_result:
process_result(detection_result)
for i, region in enumerate(current_detection_region):
cv2.rectangle(frame_rgb,
(region[0], region[1]),
(region[2], region[3]),
(0, 255, 0), 5)
cv2.putText(frame_rgb,
text=f"Region-{i}",
=(int(region[0]), int(region[1] - 10)),
fontFace= cv2.FONT_HERSHEY_SIMPLEX,
fontScale=1.2,
color=(0, 255, 0),
thickness=4)
frame_buffer.append(frame_rgb)
frame_ready.set()
time.sleep(0.01)
2、Interactive region selection through canvas drawing 3、Asynchronous OCR processing on selected regions
ocr = PaddleOCR(
det_model_dir="./ch_PP-OCRv4_det_infer",
rec_model_dir="./ch_PP-OCRv4_rec_infer",
use_angle_cls=True,
lang="en",
log_level='ERROR'
)
def preprocess_bbox(region, frame):
x1, y1, x2, y2 = region
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
if x1 > x2:
x1, x2 = x2, x1
if y1 > y2:
y1, y2 = y2, y1
height, width = frame.shape[:2]
x1 = max(0, min(x1, width - 1)) # 防止 x1 >= width
x2 = max(0, min(x2, width - 1))
y1 = max(0, min(y1, height - 1))
y2 = max(0, min(y2, height - 1))
return x1, y1, x2, y2
def change_num(region, text):
if region in [2, 3]:
if float(text)> 999:
return float(text)/100
elif region in [1, 4, 5, 6, 7, 8, 9]:
if float(text)> 100:
return float(text)/10
elif region in [0, 10, 11]:
if float(text)> 10:
return float(text)/100
return text
def check_detection_region(frame, detection_region, last_process_time, last_check_time):
region_data = {}
process_time_lock = threading.Lock()
for region_idx in range(len(detection_region)):
region_key = region_idx
rd = {
'detection_cache': [],
'last_cache_clear': time.time(),
'datetime_info': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'last_process_time': 0.0
}
region_data[region_key] = rd
# max_workers = min(len(detection_region), os.cpu_count())
max_workers = 2
alarm_result = []
with ThreadPoolExecutor(max_workers= max_workers) as executor:
futures = []
futures = {executor.submit(process_region, region_idx, region,
frame, region_data,
last_process_time, last_check_time,
process_time_lock, ocr)
for region_idx, region in enumerate(detection_region)}
future_process_time = time.time()
for future in as_completed(futures):
try:
result = future.result()
if result:
alarm_result.append(result)
except Exception as e:
continue
print("*"*10, f"Process Time:{time.time()- future_process_time} Rgions: {len(detection_region)}-{len(list(set(detection_region)))}", "*"*10)
if alarm_result:
return alarm_result
else:
return None
def process_region(region_idx, region, frame, region_data, last_process_time,
last_check_time, process_time_lock, ocr= None
):
current_time = time.time()
# with process_time_lock:
# last_proc = last_process_time.get(region_idx, 0.0)
# last_check = last_check_time.get(region_idx, 0.0)
# if (current_time - last_proc < PROCESS_INTERVAL or
# current_time - last_check < CHECK_INTERVAL):
# return None
last_proc = last_process_time.get(region_idx, 0.0)
last_check_time[region_idx] = current_time
last_process_time[region_idx] = abs(current_time- last_proc)
region_key = region_idx
rd = region_data.get(region_key, None)
rd['last_process_time'] = current_time
x1, y1, x2, y2 = preprocess_bbox(region, frame)
roi = frame[y1:y2, x1:x2]
if roi.size == 0:
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
return None
ocr_time = time.time()
ocr_result = ocr.ocr(roi, cls=True)
if not ocr_result:
print(f"ERROR: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
return None
# 处理识别结果
current_time = time.time()
date_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
for line in ocr_result:
for detection in line:
text, conf = detection[1]
print(f"{date_time}-{time.time()- ocr_time}-Region({region}):{region_idx} Text: {text}, Conf: {conf}")
if conf > 0.7 and re.match(r"(\d+(\.\d+)?)", text):
match = re.match(r"(\d+(\.\d+)?)", text)
current_value = float(change_num(region= region_idx, text= match.group(1)))
rd['detection_cache'].append({
'match_text': current_value,
'conf': conf,
'time': current_time,
'ocr_text': text
})
# if (current_time - rd['last_cache_clear'] >= 1) and rd['detection_cache']:
best = max(rd['detection_cache'], key=lambda x: x['conf'])
rd['detection_cache'][:] = []
rd['last_cache_clear'] = current_time
return {
'time': current_time,
'region_id': region_key,
'date_time': date_time,
'value': {
'num': best['match_text'],
'conf': round(best['conf'], 4),
'raw_text': best['ocr_text']
}
}
there are some results:
2025-03-31 17:15:22-0.3458406925201416-Region((748, 1100, 851, 1177)):0 Text: 31, Conf: 0.9988272190093994
2025-03-31 17:15:22-0.3458406925201416-Region((1014, 1158, 1100, 1238)):1 Text: 31, Conf: 0.9988272190093994
2025-03-31 17:15:22-0.3442258834838867-Region((1958, 1897, 2025, 1977)):2 Text: 06, Conf: 0.9809097051620483
2025-03-31 17:15:23-0.22141623497009277-Region((2332, 1916, 2416, 1987)):5 Text: 82, Conf: 0.9945308566093445
2025-03-31 17:15:23-0.30546092987060547-Region((886, 1555, 972, 1625)):7 Text: 195, Conf: 0.9964171051979065
2025-03-31 17:15:23-0.1500873565673828-Region((598, 1552, 665, 1612)):9 Text: 130, Conf: 0.9943342804908752
********** Process Time:1.477724552154541 Rgions: 10-10 **********
2025-03-31 17:15:24-0.38674497604370117-Region((1014, 1158, 1100, 1238)):1 Text: 31, Conf: 0.9986810088157654
2025-03-31 17:15:24-0.3962728977203369-Region((748, 1100, 851, 1177)):0 Text: 2025, Conf: 0.9948824644088745
2025-03-31 17:15:24-0.3871188163757324-Region((1958, 1897, 2025, 1977)):2 Text: 36, Conf: 0.9956271648406982
2025-03-31 17:15:24-0.3814249038696289-Region((2092, 1916, 2163, 1993)):3 Text: 36, Conf: 0.9956271648406982
2025-03-31 17:15:24-0.4036712646484375-Region((2332, 1916, 2416, 1987)):5 Text: 82, Conf: 0.995288610458374
2025-03-31 17:15:25-0.3978700637817383-Region((886, 1555, 972, 1625)):7 Text: 195, Conf: 0.9958855509757996
2025-03-31 17:15:25-0.7991609573364258-Region((739, 1529, 812, 1596)):6 Text: 133, Conf: 0.9976328015327454
2025-03-31 17:15:25-0.2980964183807373-Region((598, 1552, 665, 1612)):9 Text: 130, Conf: 0.9968142509460449
********** Process Time:2.189676523208618 Rgions: 10-10 **********
2025-03-31 17:15:26-0.4011368751525879-Region((748, 1100, 851, 1177)):0 Text: 2025, Conf: 0.9921661615371704
2025-03-31 17:15:26-0.43567562103271484-Region((1014, 1158, 1100, 1238)):1 Text: 31, Conf: 0.9990317821502686
2025-03-31 17:15:26-0.22664427757263184-Region((2092, 1916, 2163, 1993)):3 Text: 36, Conf: 0.9965022802352905
2025-03-31 17:15:26-0.40086793899536133-Region((1958, 1897, 2025, 1977)):2 Text: 06, Conf: 0.9625216722488403
2025-03-31 17:15:27-0.33641600608825684-Region((2198, 1926, 2275, 1984)):4 Text: 34, Conf: 0.842035174369812
2025-03-31 17:15:27-0.33869314193725586-Region((2332, 1916, 2416, 1987)):5 Text: 82, Conf: 0.9974746704101562
2025-03-31 17:15:27-0.41469645500183105-Region((739, 1529, 812, 1596)):6 Text: 133, Conf: 0.9956898093223572
2025-03-31 17:15:27-0.2816200256347656-Region((886, 1555, 972, 1625)):7 Text: 195, Conf: 0.9957630634307861
2025-03-31 17:15:27-0.3099501132965088-Region((598, 1552, 665, 1612)):9 Text: 130, Conf: 0.9934964179992676
********** Process Time:1.8019309043884277 Rgions: 10-10 **********
Share
Improve this question
edited Mar 31 at 10:10
Mark Rotteveel
110k229 gold badges156 silver badges223 bronze badges
asked Mar 31 at 9:20
Big-Yellow-JBig-Yellow-J
116 bronze badges
1 Answer
Reset to default 0I know the problem now. The issue arises because in the threading setup: threading.Thread(video_capture_thread, daemon=True).start()
, the OCR function is running within the thread. While the frame updates quickly, the OCR process is slow. Each time the frame updates, it is sent to the OCR function (check_detection_region
), which causes the OCR function to break. As a result, some regions are not detected.