1. 之前研究ofd文件的一些总结,ofd文件的组成是由各种xml以及上下文组成,主要的还是要看ofd.xml这个文件是最主要,记录了ofd的类型以及来源等信息
2.pom.xml如下:需要注意log4j和slf4j设计到的包,可能会跟项目的上的jar冲突
<!--ofd操作类-->
<dependency>
<groupId>org.ofdrw</groupId>
<artifactId>ofdrw-full</artifactId>
<version>2.0.5</version>
<exclusions><!--ofd 转换时需要去掉 否则会报jar冲突以及启动堆栈溢出-->
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
3. ofd的一些操作类
3.1 图片合成ofd
import org.ofdrw.graphics2d.OFDGraphicsDocument;
import org.ofdrw.graphics2d.OFDPageGraphics2D;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
/**
* @Description //TODO
* @Date 2023/06/01 15:38
* @Author kdc
**/
public class ImageToOfd {
/**
* @description: 图片转pdf
* @param: [文件路径, 图片路径,多图片已“,”分隔]
**/
public static JSONObject imageToOfd(String filepath, String imgUrl){
JSONObject returnResult=new JSONObject();
Path dst = Paths.get(filepath);
try {
// File files=new File(filepath);
// files.createNewFile();
OFDGraphicsDocument doc = new OFDGraphicsDocument(dst);
int width=210;
ArrayList<String> imageUrllist = new ArrayList<String>(); //图片list集合
String[] imgUrls = imgUrl.split(",");
for (int i=0; i<imgUrls.length; i++) {
File sourceimage = new File(imgUrls[i]);
BufferedImage image = ImageIO.read(sourceimage);
int w = image.getWidth();
int h = image.getHeight();
double hh= width*1.0000/w;
double higth= hh*h;
OFDPageGraphics2D g = doc.newPage(width,higth);
g.drawImage(image, 0, 0, width, (int) higth, null);
g.dispose();
doc.addResImg(image);
}
doc.close();
System.out.println(">> " + dst.toAbsolutePath());
returnResult.put("code", ResultCode.SUCCESS.getCode());
returnResult.put("msg","ofd合成成功");
returnResult.put("fileSize",new File(filepath).length());
} catch (Exception e) {
returnResult.put("code",ResultCode.ERROR.getCode());
returnResult.put("msg","OFD合成失败");
e.printStackTrace();
}
return returnResult;
}
3.2 ofd按页数拆图
import org.ofdrw.converter.ImageMaker;
import org.ofdrw.reader.OFDReader;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
/**
* @Description //TODO ofd转图
* @Author kdc
**/
public class OfdToImage {
//////ofd文件所有也都转图
public static JSONArray OfdToImageByPage(String rootPath, String file_id, String batchId, String filename, String type) {
// 将pdf装图片 并且自定义图片得格式大小
filename =filename.substring(0, filename.lastIndexOf("."));
JSONArray returnObj = new JSONArray();
String filepath =TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, file_id, 0);
Path src=Paths.get(filepath);
try{
OFDReader reader = new OFDReader(src);
ImageMaker imageMaker = new ImageMaker(reader, 15);
for (int i = 0; i < imageMaker.pageSize(); i++) {
// 4. 指定页码转换图片
BufferedImage image = imageMaker.makePage(i);
String newFileId = TecrunUtils.createFileId();
String imageName=filename+"_"+i+"."+type;
String imagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 0);
String thumbImagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 1);
// String imagePath=fileFloder+newFileId+"_0";
// String thumbImagePath=fileFloder+newFileId+"_1.jpg";
Path dist = Paths.get(imagePath);
// 5. 存储为指定格式图片
ImageIO.write(image, "png", dist.toFile());
///生成每一页的缩略图
Thumbnails.of(imagePath).size(200, 200)
.outputFormat("jpg").toFile(thumbImagePath);
JSONObject ofdimage=new JSONObject();
ofdimage.put("file_id",newFileId);
ofdimage.put("imagePath",imagePath);
ofdimage.put("thumbImagePath",thumbImagePath);
ofdimage.put("file_name",imageName);
ofdimage.put("file_size", new File(imagePath).length());
ofdimage.put("file_suffix",type);
returnObj.add(ofdimage);
}
System.out.println("ofdtoPic-ok");
}catch(Exception e){
e.printStackTrace();
}
return returnObj;
}
//////ofd文件第一页转图
public static JSONObject OfdToImageByPageOne(String rootPath, String file_id, String batchId, String filename, String type) {
// 将pdf装图片 并且自定义图片得格式大小
filename =filename.substring(0, filename.lastIndexOf("."));
JSONObject ofdimage=new JSONObject();
// JSONArray returnObj = new JSONArray();
String filepath =TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, file_id, 0);
Path src=Paths.get(filepath);
try{
OFDReader reader = new OFDReader(src);
ImageMaker imageMaker = new ImageMaker(reader, 15);
// 4. 指定页码转换图片
BufferedImage image = imageMaker.makePage(0);
String newFileId = TecrunUtils.createFileId();
String imageName=filename+"_"+0+"."+type;
String imagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 0);
String thumbImagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 1);
// String imagePath=fileFloder+newFileId+"_0";
// String thumbImagePath=fileFloder+newFileId+"_1.jpg";
Path dist = Paths.get(imagePath);
// 5. 存储为指定格式图片
ImageIO.write(image, "png", dist.toFile());
///生成每一页的缩略图
Thumbnails.of(imagePath).size(200, 200)
.outputFormat("jpg").toFile(thumbImagePath);
ofdimage.put("file_id",newFileId);
ofdimage.put("imagePath",imagePath);
ofdimage.put("thumbImagePath",thumbImagePath);
ofdimage.put("file_name",imageName);
ofdimage.put("file_size", new File(imagePath).length());
ofdimage.put("file_suffix",type);
System.out.println("ofdtoPic-ok");
}catch(Exception e){
e.printStackTrace();
}
return ofdimage;
}
//////ofd文件从那一页转图
public static JSONArray OfdToImageByPageByIndex(String rootPath, String file_id, String batchId, String filename, String type,int Index) {
// 将pdf装图片 并且自定义图片得格式大小
filename =filename.substring(0, filename.lastIndexOf("."));
JSONArray returnObj = new JSONArray();
String filepath =TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, file_id, 0);
Path src=Paths.get(filepath);
try{
OFDReader reader = new OFDReader(src);
ImageMaker imageMaker = new ImageMaker(reader, 15);
for (int i = Index; i < imageMaker.pageSize(); i++) {
// 4. 指定页码转换图片
BufferedImage image = imageMaker.makePage(i);
String newFileId = TecrunUtils.createFileId();
String imageName=filename+"_"+i+"."+type;
String imagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 0);
String thumbImagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 1);
// String imagePath=fileFloder+newFileId+"_0";
// String thumbImagePath=fileFloder+newFileId+"_1.jpg";
Path dist = Paths.get(imagePath);
// 5. 存储为指定格式图片
ImageIO.write(image, "png", dist.toFile());
///生成每一页的缩略图
Thumbnails.of(imagePath).size(200, 200)
.outputFormat("jpg").toFile(thumbImagePath);
JSONObject ofdimage=new JSONObject();
ofdimage.put("file_id",newFileId);
ofdimage.put("imagePath",imagePath);
ofdimage.put("thumbImagePath",thumbImagePath);
ofdimage.put("file_name",imageName);
ofdimage.put("file_size", new File(imagePath).length());
ofdimage.put("file_suffix",type);
returnObj.add(ofdimage);
}
System.out.println("ofdtoPic-ok");
}catch(Exception e){
e.printStackTrace();
}
return returnObj;
}
//////ofd文件所有也都转图
public static JSONObject OfdToImageByPageNum(String rootPath, String file_id, String batchId, String filename, String type,int PageNum) {
// 将pdf装图片 并且自定义图片得格式大小
filename =filename.substring(0, filename.lastIndexOf("."));
JSONObject ofdimage=new JSONObject();
String filepath =TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, file_id, 0);
Path src=Paths.get(filepath);
OFDReader reader =null;
ImageMaker imageMaker =null;
BufferedImage image =null;
try{
reader = new OFDReader(src);
imageMaker = new ImageMaker(reader, 15);
// 4. 指定页码转换图片
image = imageMaker.makePage(PageNum);
String newFileId = TecrunUtils.createFileId();
String imageName=filename+"_"+PageNum+"."+type;
String imagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 0);
String thumbImagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 1);
// String imagePath=fileFloder+newFileId+"_0";
// String thumbImagePath=fileFloder+newFileId+"_1.jpg";
Path dist = Paths.get(imagePath);
// 5. 存储为指定格式图片
ImageIO.write(image, "png", dist.toFile());
///生成每一页的缩略图
Thumbnails.of(imagePath).size(200, 200)
.outputFormat("jpg").toFile(thumbImagePath);
ofdimage.put("file_id",newFileId);
ofdimage.put("imagePath",imagePath);
ofdimage.put("thumbImagePath",thumbImagePath);
ofdimage.put("file_name",imageName);
ofdimage.put("file_size", new File(imagePath).length());
ofdimage.put("file_suffix",type);
System.out.println("ofdtoPic-ok");
}catch(Exception e){
e.printStackTrace();
}finally {
reader =null;
imageMaker =null;
image =null;
}
return ofdimage;
}
public static void main(String[] args) {
OfdToImage.OfdToImageByPage("","","","","");
}
}
3.3 ofd发票的识别解析,这个类之前参照其他博主的修改过,目前来看比较标准的数电,电票等解析效果比较好
import com.tecrun.common.utils.ConvertUpMoneyUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.*;
import org.dom4j.io.SAXReader;
import org.springframework.util.StreamUtils;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
/**
* 专用于处理电子发票识别的类
*
*
*/
@Slf4j
public class OfdInvoiceExtractor {
/*
解析ofd发票,但有局限,ofd文件解压后必须要有Doc_0/Attachs/original_invoice.xml这个
*/
public static Invoice extract(File file) throws IOException, DocumentException {
ZipFile zipFile = new ZipFile(file);
ZipEntry entry = zipFile.getEntry("Doc_0/Attachs/original_invoice.xml");
ZipEntry entry1 = zipFile.getEntry("Doc_0/Pages/Page_0/Content.xml");
InputStream input = zipFile.getInputStream(entry);
InputStream input1 = zipFile.getInputStream(entry1);
String body = StreamUtils.copyToString(input, Charset.forName("utf-8"));
String content = StreamUtils.copyToString(input1, Charset.forName("utf-8"));
zipFile.close();
Document document = DocumentHelper.parseText(body);
Element root = document.getRootElement();
Invoice invoice = new Invoice();
invoice.setMachineNumber(root.elementTextTrim("MachineNo"));
invoice.setCode(root.elementTextTrim("InvoiceCode"));
invoice.setNumber(root.elementTextTrim("InvoiceNo"));
invoice.setDate(root.elementTextTrim("IssueDate"));
invoice.setChecksum(root.elementTextTrim("InvoiceCheckCode"));
invoice.setAmount( root.elementTextTrim("TaxExclusiveTotalAmount"));
invoice.setTaxAmount(root.elementTextTrim("TaxTotalAmount"));
int ind = content.indexOf("圆整</ofd:TextCode>");
invoice.setTotalAmountString(content.substring(content.lastIndexOf(">", ind) + 1, ind + 2));
invoice.setTotalAmount(root.elementTextTrim("TaxInclusiveTotalAmount"));
invoice.setPayee(root.elementTextTrim("Payee"));
invoice.setReviewer(root.elementTextTrim("Checker"));
invoice.setDrawer(root.elementTextTrim("InvoiceClerk"));
int index = content.indexOf("</ofd:TextCode>");
invoice.setTitle(content.substring(content.lastIndexOf(">", index) + 1, index));
invoice.setType("普通发票");
if (invoice.getTitle().contains("专用发票")) {
invoice.setType("专用发票");
} else if (invoice.getTitle().contains("通行费")) {
invoice.setType("通行费");
}
invoice.setPassword(root.elementText("TaxControlCode"));
Element buyer = root.element("Buyer");
{
invoice.setBuyerName(buyer.elementTextTrim("BuyerName"));
invoice.setBuyerCode(buyer.elementTextTrim("BuyerTaxID"));
invoice.setBuyerAddress(buyer.elementTextTrim("BuyerAddrTel"));
invoice.setBuyerAccount(buyer.elementTextTrim("BuyerFinancialAccount"));
}
Element seller = root.element("Seller");
{
invoice.setSellerName(seller.elementTextTrim("SellerName"));
invoice.setSellerCode(seller.elementTextTrim("SellerTaxID"));
invoice.setSellerAddress(seller.elementTextTrim("SellerAddrTel"));
invoice.setSellerAccount(seller.elementTextTrim("SellerFinancialAccount"));
}
Element details = root.element("GoodsInfos");
{
List<Detail> detailList = new ArrayList<>();
List<Element> elements = details.elements();
for (


7832

被折叠的 条评论
为什么被折叠?



