您当前的位置:首页 > IT编程 > 深度学习
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch |

自学教程:keras-文本图片文字识别

51自学网 2021-11-08 22:12:57
  深度学习
这篇教程keras-文本图片文字识别写得很实用,希望能帮到您。

keras-文本图片文字识别

 

1. Keras环境安装

##参考Keras安装点击打开链接

2. 文本图片素材-文字切割并保存切割图片

# -*- coding: UTF-8 -*-
import cv2
import numpy as np
import matplotlib.pyplot as plt

def median_split_ranges(peek_ranges):
    new_peek_ranges = []
    widthes = []
    for peek_range in peek_ranges:
        w = peek_range[1] - peek_range[0] + 1
        widthes.append(w)
    widthes = np.asarray(widthes)
    median_w = np.median(widthes)
    for i, peek_range in enumerate(peek_ranges):
        num_char = int(round(widthes[i]/median_w, 0))
        if num_char > 1:
            char_w = float(widthes[i] / num_char)
            for i in range(num_char):
                start_point = peek_range[0] + int(i * char_w)
                end_point = peek_range[0] + int((i + 1) * char_w)
                new_peek_ranges.append((start_point, end_point))
        else:
            new_peek_ranges.append(peek_range)
    return new_peek_ranges

def extract_peek_ranges_from_array(array_vals, minimun_val=10, minimun_range=2):
    start_i = None
    end_i = None
    peek_ranges = []
    for i, val in enumerate(array_vals):
        if val > minimun_val and start_i is None:
            start_i = i
        elif val > minimun_val and start_i is not None:
            pass
        elif val < minimun_val and start_i is not None:
            end_i = i
            if end_i - start_i >= minimun_range:
                peek_ranges.append((start_i, end_i))
            start_i = None
            end_i = None
        elif val < minimun_val and start_i is None:
            pass
        else:
            raise ValueError("cannot parse this case...")
    return peek_ranges

def get_font_face_peek_ranges(path_test_image):
    image_color = cv2.imread(path_test_image)
    new_shape = (image_color.shape[1] * 2, image_color.shape[0] * 2)
    image_color = cv2.resize(image_color, new_shape)
    image = cv2.cvtColor(image_color, cv2.COLOR_BGR2GRAY)
    
    adaptive_threshold = cv2.adaptiveThreshold(
        image,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, 11, 2)
    
    horizontal_sum = np.sum(adaptive_threshold, axis=1)
    
    plt.plot(horizontal_sum, range(horizontal_sum.shape[0]))
    plt.gca().invert_yaxis()
    # plt.show()
    
    peek_ranges = extract_peek_ranges_from_array(horizontal_sum)
    
    vertical_peek_ranges2d = []
    for peek_range in peek_ranges:
        start_y = peek_range[0]
        end_y = peek_range[1]
        line_img = adaptive_threshold[start_y:end_y, :]
        vertical_sum = np.sum(line_img, axis=0)
        vertical_peek_ranges = extract_peek_ranges_from_array(
            vertical_sum,
            minimun_val=40,
            minimun_range=1)
        vertical_peek_ranges2d.append(vertical_peek_ranges)
    
    vertical_peek_ranges2d = []
    for peek_range in peek_ranges:
        start_y = peek_range[0]
        end_y = peek_range[1]
        line_img = adaptive_threshold[start_y:end_y, :]
        vertical_sum = np.sum(line_img, axis=0)
        vertical_peek_ranges = extract_peek_ranges_from_array(
            vertical_sum,
            minimun_val=40,
            minimun_range=1)
        vertical_peek_ranges = median_split_ranges(vertical_peek_ranges)
        vertical_peek_ranges2d.append(vertical_peek_ranges)
    return peek_ranges,vertical_peek_ranges2d,image_color

color = (0, 0, 255)
path_test_image = "tmp/font.png"
peek_ranges,vertical_peek_ranges2d,image_color = get_font_face_peek_ranges(path_test_image)

for i, peek_range in enumerate(peek_ranges):
    for (j,vertical_range) in enumerate(vertical_peek_ranges2d[i]):
        x = vertical_range[0]
        y = peek_range[0]
        
        w = vertical_range[1] - x
        h = peek_range[1] - y
        image = image_color[y - 2:y + h + 2, x - 2:x + w + 2]
    
        pt1 = (x, y)
        pt2 = (x + w, y + h)
        cv2.rectangle(image_color, pt1, pt2, color)
            
cv2.imshow('image', image_color)
cv2.waitKey(0)

3. 训练

FILE_PATH = "model.h5"   #模型进行存储和读取的地方
IMAGE_SIZE = 128
PATH = "fonts"


imgs,labels,counter = read_file(PATH, IMAGE_SIZE)

X_train,X_test,y_train,y_test = train_test_split(imgs,labels,test_size=0.2,random_state=0)

X_train = X_train.reshape(X_train.shape[0], 1, IMAGE_SIZE, IMAGE_SIZE)/255.0
X_test = X_test.reshape(X_test.shape[0], 1, IMAGE_SIZE, IMAGE_SIZE) / 255.0

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

Y_train = np_utils.to_categorical(y_train, num_classes=counter)
Y_test = np_utils.to_categorical(y_test, num_classes=counter)

model = Sequential()
model.add(
    Convolution2D(
        filters=32,
        kernel_size=(5, 5),
        padding='same',
        dim_ordering='th',
        input_shape=X_train.shape[1:]
    )
)

model.add(Activation('relu'))
model.add(
    MaxPooling2D(
        pool_size=(2, 2),
        strides=(2, 2),
        padding='same'
    )
)


model.add(Convolution2D(filters=64, kernel_size=(5, 5), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'))


model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))


model.add(Dense(counter))
model.add(Activation('softmax'))
model.summary()

model.compile(
    optimizer='adam', optimizer,
    loss='categorical_crossentropy', 
    metrics=['accuracy'])

model.fit(X_train,Y_train,epochs=32,batch_size=32)

loss, accuracy = model.evaluate(X_test, Y_test)

print('test loss;', loss)
print('test accuracy:', accuracy)

model.save(FILE_PATH)

4. 识别文字图片

a. 图片文字切割

b. 文字识别

# -*- coding: UTF-8 -*-
from keras.models import load_model
import cv2
import numpy as np
import utils
import os

def getLetter(model, img, name_list, IMAGE_SIZE):
    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    img = img.reshape((1, 1, IMAGE_SIZE, IMAGE_SIZE))
    img = img.astype('float32')
    img = img/255.0
    
    result = model.predict_proba(img)
    max_index = np.argmax(result)
    
    picType,prob = max_index,result[0][max_index]
    
    if picType != -1:
        return name_list[picType],prob
    else:
        return ""


FILE_PATH = "model.h5" 
IMAGE_SIZE = 128
result = ""
path_test_image = "tmp/font.png"
name_list = readName()

model = load_model(FILE_PATH)
peek_ranges,vertical_peek_ranges2d,image_color = utils.get_font_face_peek_ranges(path_test_image)


for i, peek_range in enumerate(peek_ranges):
    for (j,vertical_range) in enumerate(vertical_peek_ranges2d[i]):
        x = vertical_range[0]
        y = peek_range[0]
        w = vertical_range[1] - x
        h = peek_range[1] - y
        image = image_color[y - 2:y + h + 2, x - 2:x + w + 2]
        letter,prob = getLetter(model, image, name_list, IMAGE_SIZE)
        code = letter
        result += code
        
print(result)


最后一段代码里的readName()函数是自定义的吗,可以发一下是什么吗2 年前回复
[点赞]

    u010379996
    zhuzihuaile回复:

        PATH_DIR = "fonts"
        name_list = []
        for child_dir in os.listdir(PATH_DIR):
            if os.path.isdir((PATH_DIR+"/"+child_dir)):
                name_list.append(child_dir)

c. 测试结果(还需优化)



 


python读Excel数据成numpy数组
用Keras实现手写汉字识别
51自学网,即我要自学网,自学EXCEL、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。
京ICP备13026421号-1