[关注] 2025-12-15 圈点657
摘要:图片识别文字之图片中文字的分割思路
一张图片中一大堆字符,如何识别?思路如下:1,先分割成单个字符;2,再将每个字符独立识别。3,组合已识别的单个文字;4,“语义”修正最后的识别结果。
Python图片分割图片的示例(原文:https://www.jianshu.com/p/64808391285e)
其思路:使用cv2读取图片文件,然后二值化,根据垂直和水平方向像素与空间阈值确定方格分割文字,再分别读取每个已经分割的文字。
代码如下:
import os
import cv2
import numpy as np
base_dir = "./origin/"
dst_dir = "./result/"
min_val = 10
min_range = 30
count = 0
def extract_peek(array_vals, minimun_val, minimun_range):
start_i = None
end_i = None
peek_ranges = []
for i, val in enumerate(array_vals):
if val > minimun_val and start_i is None:
start_i = i
elif val > minimun_val and start_i is not None:
pass
elif val < minimun_val and start_i is not None:
if i - start_i >= minimun_range:
end_i = i
print(end_i - start_i)
peek_ranges.append((start_i, end_i))
start_i = None
end_i = None
elif val < minimun_val and start_i is None:
pass
else:
raise ValueError("cannot parse this case...")
return peek_ranges
def cutImage(img, peek_range):
global count
for i, peek_range in enumerate(peek_ranges):
for vertical_range in vertical_peek_ranges2d[i]:
x = vertical_range[0]
y = peek_range[0]
w = vertical_range[1] - x
h = peek_range[1] - y
pt1 = (x, y)
pt2 = (x + w, y + h)
count += 1
img1 = img[y:peek_range[1], x:vertical_range[1]]
new_shape = (150, 150)
img1 = cv2.resize(img1, new_shape)
cv2.imwrite(dst_dir + str(count) + ".png", img1)
# cv2.rectangle(img, pt1, pt2, color)
for fileName in os.listdir(base_dir):
img = cv2.imread(base_dir + fileName)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
adaptive_threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \
cv2.THRESH_BINARY_INV, 11, 2)
horizontal_sum = np.sum(adaptive_threshold, axis=1)
peek_ranges = extract_peek(horizontal_sum, min_val, min_range)
line_seg_adaptive_threshold = np.copy(adaptive_threshold)
for i, peek_range in enumerate(peek_ranges):
x = 0
y = peek_range[0]
w = line_seg_adaptive_threshold.shape[1]
h = peek_range[1] - y
pt1 = (x, y)
pt2 = (x + w, y + h)
cv2.rectangle(line_seg_adaptive_threshold, pt1, pt2, 255)
vertical_peek_ranges2d = []
for peek_range in peek_ranges:
start_y = peek_range[0]
end_y = peek_range[1]
line_img = adaptive_threshold[start_y:end_y, :]
vertical_sum = np.sum(line_img, axis=0)
vertical_peek_ranges = extract_peek(
vertical_sum, min_val, min_range)
vertical_peek_ranges2d.append(vertical_peek_ranges)
cutImage(img, peek_range)上一篇[关注]Tensorflow
下一篇[关注]常用的几种口罩标准和说明