【全网唯一】Python 纯本地离线文字识别Windows版dll插件

最新推荐文章于 2026-03-30 09:45:04 发布

原创最新推荐文章于 2026-03-30 09:45:04 发布 · 579 阅读

6 ·

本内容遵循CC 4.0 BY-SA版权协议

GEO检测

标签

#python #开发语言

自动化专栏收录该内容

33 篇文章

订阅专栏

Python3.8

Python 是一种高级、解释型、通用的编程语言，以其简洁易读的语法而闻名，适用于广泛的应用，包括Web开发、数据分析、人工智能和自动化脚本

目的

Python语法和动态类型，以及解释型语言的本质，使它成为多数平台上写脚本和快速开发应用的编程语言，Python解释器易于扩展，可以使用C、C++或其他可以通过C调用的语言扩展新的功能和数据类型。这里我们通过加载dll的方式去实现我们的功能。

准备工作

1、搭建Python开发环境

2、下载对应的TomatoOCR.dll依赖包：下载

目前插件支持中英文、繁体字、日语、韩语识别；
支持小图、区域图和单行文字识别，准确率高达99%；
支持多种返回格式，json\文本\数字\自定义；
支持二值化；
支持找字返回坐标并点击；
超高的稳定性，速度快；
支持多线程；

依赖包集成

依赖包放置

注意：由于32位应用内存限制，初始化ocr线程不要太多，建议5个以下，64位应用不受限制

下载依赖包后，改名为TomatoOCR.dll，将文件放置在项目中

import ctypes
import json

class TomatoOCR(ctypes.Structure):
    pass
    
def init():
    dll_path = ""  # TomtoOCR.dll的路径
    tmo_ocr = ctypes.windll.LoadLibrary(dll_path)

    tmo_ocr.init.argtypes = [ctypes.c_int]  # 参数类型
    tmo_ocr.init.restype = None  # 返回类型

    tmo_ocr.setLicense.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
    tmo_ocr.setLicense.restype = ctypes.c_char_p
   
    tmo_ocr.lock.argtypes = []  # 参数类型
    tmo_ocr.lock.restype = ctypes.POINTER(TomatoOCR)  # 返回类型

    tmo_ocr.setRecType.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_char_p]
    tmo_ocr.setRecType.restype = None

    tmo_ocr.setDetBoxType.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_char_p]
    tmo_ocr.setDetBoxType.restype = None

    tmo_ocr.setDetUnclipRatio.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_float]
    tmo_ocr.setDetUnclipRatio.restype = None

    tmo_ocr.setRecScoreThreshold.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_float]
    tmo_ocr.setRecScoreThreshold.restype = None  # 返回类型

    tmo_ocr.setReturnType.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_char_p]
    tmo_ocr.setReturnType.restype = None  # 返回类型

    tmo_ocr.setBinaryThresh.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_int]
    tmo_ocr.setBinaryThresh.restype = None  # 返回类型

    tmo_ocr.setRunMode.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_char_p]
    tmo_ocr.setRunMode.restype = None
    
    tmo_ocr.setFilterColor.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_char_p, ctypes.c_char_p]
    tmo_ocr.setFilterColor.restype = None

    tmo_ocr.ocrFile.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_char_p, ctypes.c_int]
    tmo_ocr.ocrFile.restype = ctypes.c_char_p

    tmo_ocr.ocrBase64.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_char_p, ctypes.c_int]
    tmo_ocr.ocrBase64.restype = ctypes.c_char_p

    tmo_ocr.findTapPoint.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_char_p]
    tmo_ocr.findTapPoint.restype = ctypes.c_char_p

    tmo_ocr.findTapPoints.argtypes = [ctypes.POINTER(TomatoOCR), ctypes.c_char_p]
    tmo_ocr.findTapPoints.restype = ctypes.c_char_p
    
    tmo_ocr.unlock.argtypes = [ctypes.POINTER(TomatoOCR)]  # 参数类型
    tmo_ocr.unlock.restype = None  # 返回类型
   
   
    tmo_ocr.init(2) # 初始化两个线程
    license = ""  # 设置license，见授权码获取
    remark = "测试"  # 设置备注
    flag = tmo_ocr.setLicense(license.encode('utf-8'), remark.encode('utf-8')).decode("utf-8")
    print(flag)

    return tmo_ocr


def ocr_start(tmo_ocr)
    obj = tmo_ocr.lock()  # 获取句柄，与unlock一一对应
    if obj:
        # 注：ch、ch-2.0、ch-3.0版可切换使用，对部分场景可适当调整
        # "ch"：普通中英文识别，1.0版模型
        # "ch-2.0"：普通中英文识别，2.0版模型
        # "ch-3.0"：普通中英文识别，3.0版模型
        # "cht"：繁体，"japan"：日语，"korean"：韩语
        tmo_ocr.setRecType(obj, "ch-3.0".encode('utf-8'))
   
        tmo_ocr.setDetBoxType(obj, "rect".encode('utf-8'))  # 调整检测模型检测文本参数- 默认"rect": 由于手机上截图文本均为矩形文本，从该版本之后均改为rect，"quad"：可准确检测倾斜文本
        tmo_ocr.setDetUnclipRatio(obj, 1.9)  # 调整检测模型检测文本参数 - 默认1.9: 值范围1.6-2.5之间
        tmo_ocr.setRecScoreThreshold(obj, 0.3)  # 识别得分过滤 - 默认0.1，值范围0.1-0.9之间
        tmo_ocr.setReturnType(obj, "json".encode('utf-8'))
        # 返回类型 - 默认"json": 包含得分、坐标和文字；
        # "text"：纯文字；
        # "num"：纯数字；
        # 自定义输入想要返回的文本：".￥1234567890"，仅只返回这些内容
   
        tmo_ocr.setBinaryThresh(obj, 0)  # 二值化设定，非必须
        tmo_ocr.setRunMode(obj, "slow".encode('utf-8'))  # 默认“slow”；“fast”：小图识别上会加速，但准确率会降低，推荐用默认值“slow”
        tmo_ocr.setFilterColor(obj, "".encode('utf-8'), "black".encode('utf-8'))  # 设置滤色值和背景色(black\white)，滤色值默认是空的，详细使用见方法说明
      
   
        type = 3
        # type=0 : 只检测
        # type=1 : 方向分类 + 识别
        # type=2 : 只识别
        # type=3 : 检测 + 识别
   
        # 只检测文字位置：type=0
        # 全屏识别: type=3或者不传type
        # 截取单行文字识别：type=1或者type=2
   
        # 例子一
        result1 = tmo_ocr.ocrFile(obj, "test.png".encode('utf-8'), type)
        print(result1.decode('utf-8'))
   
        # 例子二
        result2 = tmo_ocr.ocrBase64(obj, 图片的base64字符串.encode('utf-8'), type)
        print(result2.decode('utf-8'))
   
        # 找字返回坐标，没有找到字返回“”空字符串，返回的是“百度”的中心点坐标
        point = tmo_ocr.findTapPoint(obj, "百度".encode('utf-8')).decode("utf-8")
        if point != "":
            json_data_point = json.loads(point)
            print(json_data_point[0], json_data_point[1])
           
        points = tmo_ocr.findTapPoints(obj, "百度".encode('utf-8')).decode("utf-8")
        if points != "":
            json_data_points = json.loads(points)
            for json_data_point in json_data_points:
                print(json_data_point["words"], json_data_point["point"])
         
        tmo_ocr.unlock(obj)

tmo = init()
ocr_start(tmo)