部署方法
使用模型的onnx版本进行推理,模型在此处下载。
依赖安装:
pip install numpy opencv-python onnxruntime
python推理
此类封装了推理和可视化:
# DocLayoutV3模型推理和可视化
import numpy as np
import cv2
import onnxruntime as ort
import json
class DocLayoutV3Detector:
LABEL_MAP = [
"abstract", "algorithm", "aside_text", "chart", "content",
"display_formula", "doc_title", "figure_title", "footer",
"footer_image", "footnote", "formula_number", "header",
"header_image", "image", "inline_formula", "number",
"paragraph_title", "reference", "reference_content",
"seal", "table", "text", "vertical_text", "vision_footnote"
]
def __init__(self, model_path, input_size=(800, 800), conf_threshold=0.5):
"""
Args:
model_path: ONNX模型路径
input_size: 模型输入尺寸
conf_threshold: 置信度阈值
"""
self.model_path = model_path
self.input_size = input_size
self.conf_threshold = conf_threshold
self.session = ort.InferenceSession(model_path)
self.input_names = [i.name for i in self.session.get_inputs()]
self.output_names = [o.name for o in self.session.get_outputs()]
self.colors = self._generate_colors(len(self.LABEL_MAP))
def _generate_colors(self, num_classes):
"""生成类别颜色"""
np.random.seed(42)
colors = []
for i in range(num_classes):
hue = int(255 * i / num_classes)
color = cv2.cvtColor(
np.uint8([[[hue, 255, 255]]]),
cv2.COLOR_HSV2BGR
)[0][0]
colors.append(tuple(int(c) for c in color))
return colors
def preprocess(self, image):
orig_h, orig_w = image.shape[:2]
target_h, target_w = self.input_size
scale_h = target_h / orig_h
scale_w = target_w / orig_w
resized = cv2.resize(
image,
(target_w, target_h),
interpolation=cv2.INTER_LINEAR
)
img = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32) / 255.0
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
img = (img - mean) / std
img = img.transpose(2, 0, 1)[np.newaxis, ...]
return img, scale_h, scale_w
def infer(self, image):
input_blob, scale_h, scale_w = self.preprocess(image)
preprocess_shape = [np.array(self.input_size, dtype=np.float32)]
input_feed = {
self.input_names[0]: preprocess_shape,
self.input_names[1]: input_blob,
self.input_names[2]: [[scale_h, scale_w]]
}
outputs = self.session.run(self.output_names, input_feed)[0]
return outputs
def postprocess(self, outputs):
boxes = outputs[outputs[:, 1] > self.conf_threshold]
# 按阅读顺序排序
boxes = boxes[np.argsort(boxes[:, 6])]
return boxes
def to_json(self, boxes):
layout_results = []
for box in boxes:
label_idx = int(box[0])
score = float(box[1])
xmin, ymin, xmax, ymax = box[2:6]
label_name = (
self.LABEL_MAP[label_idx]
if label_idx < len(self.LABEL_MAP)
else "unknown"
)
points = [
[float(xmin), float(ymin)],
[float(xmax), float(ymin)],
[float(xmax), float(ymax)],
[float(xmin), float(ymax)]
]
layout_results.append({
"type": label_name,
"points": points,
"confidence": round(score, 2)
})
return {"layout_results": layout_results}
def visualize(self, image, boxes, output_path=None, alpha=0.35):
vis_image = image.copy()
overlay = vis_image.copy()
h, w = vis_image.shape[:2]
font_scale = max(0.5, min(h, w) / 1000)
# 先绘制半透明框
for box in boxes:
label_idx = int(box[0])
xmin, ymin, xmax, ymax = map(int, box[2:6])
color = self.colors[label_idx % len(self.colors)]
# 实心矩形画在 overlay
cv2.rectangle(
overlay,
(xmin, ymin),
(xmax, ymax),
color,
-1
)
# 半透明融合
vis_image = cv2.addWeighted(overlay, alpha, vis_image, 1 - alpha, 0)
# 绘制标签
for box in boxes:
label_idx = int(box[0])
score = float(box[1])
xmin, ymin, xmax, ymax = map(int, box[2:6])
read_order = int(box[6])
label_name = self.LABEL_MAP[label_idx]
text = f"{label_name}|{score:.2f}#{read_order}"
cv2.putText(
vis_image,
text,
(xmin, max(ymin - 5, 20)),
cv2.FONT_HERSHEY_SIMPLEX,
font_scale,
(255, 0, 0),
1
)
if output_path:
cv2.imwrite(output_path, vis_image)
return vis_image
# ---------------------------------------------------------
# 对外接口
def predict(self, image):
if isinstance(image, str):
image = cv2.imread(image)
outputs = self.infer(image)
boxes = self.postprocess(outputs)
return boxes
def predict_json(self, image):
boxes = self.predict(image)
return self.to_json(boxes)
def predict_and_visualize(self, image, save_path=None):
if isinstance(image, str):
image = cv2.imread(image)
boxes = self.predict(image)
vis = self.visualize(image, boxes, save_path)
return boxes, vis
if __name__ == '__main__':
detector = DocLayoutV3Detector(
"./PP-DocLayoutV3.onnx")
result = detector.predict_json("./test_cases/complex_latex.png")
print(json.dumps(result, indent=2, ensure_ascii=False))
boxes, vis = detector.predict_and_visualize(
image="./test_cases/complex_latex.png",
save_path="./vis.jpg"
)
win7 兼容
win7支持的onnxruntime版本最高到1.5,其ONNX IR version 只支持到8,不支持上述模型。
故将上面的原始onnx模型进行转换,使其兼容老的IR版本:
# pip install onnx
import onnx
# 加载模型
model = onnx.load("./PP-DocLayoutV3.onnx")
# 查看当前IR版本
print("Original IR version:", model.ir_version)
# 设置为IR 8
model.ir_version = 8
# 保存新模型
onnx.save(model, "PP-DocLayoutV3.onnx")
转换后的模型在win7 SP1, python 3.7, onnxruntime 1.14.1环境下测试成功。

34

被折叠的 条评论
为什么被折叠?



