ofd文件解析和操作小记

1. 之前研究ofd文件的一些总结,ofd文件的组成是由各种xml以及上下文组成,主要的还是要看ofd.xml这个文件是最主要,记录了ofd的类型以及来源等信息

2.pom.xml如下:需要注意log4j和slf4j设计到的包,可能会跟项目的上的jar冲突

<!--ofd操作类-->
       <dependency>
           <groupId>org.ofdrw</groupId>
           <artifactId>ofdrw-full</artifactId>
           <version>2.0.5</version>
           <exclusions><!--ofd 转换时需要去掉 否则会报jar冲突以及启动堆栈溢出-->
               <exclusion>
                   <groupId>org.apache.logging.log4j</groupId>
                   <artifactId>log4j-slf4j-impl</artifactId>
               </exclusion>
           </exclusions>
       </dependency>

3. ofd的一些操作类

3.1  图片合成ofd


import org.ofdrw.graphics2d.OFDGraphicsDocument;
import org.ofdrw.graphics2d.OFDPageGraphics2D;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;

/**
 * @Description //TODO
 * @Date 2023/06/01 15:38
 * @Author kdc
 **/
public class ImageToOfd {


    /**
     * @description: 图片转pdf
     * @param: [文件路径, 图片路径,多图片已“,”分隔]
     **/
    public static JSONObject imageToOfd(String filepath, String imgUrl){
        JSONObject returnResult=new JSONObject();
        Path dst = Paths.get(filepath);
        try {
//            File files=new File(filepath);
//            files.createNewFile();
            OFDGraphicsDocument doc = new OFDGraphicsDocument(dst);
            int width=210;
            ArrayList<String> imageUrllist = new ArrayList<String>(); //图片list集合
            String[] imgUrls = imgUrl.split(",");
            for (int i=0; i<imgUrls.length; i++) {
                File sourceimage = new File(imgUrls[i]);
                BufferedImage image = ImageIO.read(sourceimage);
                int w = image.getWidth();
                int h = image.getHeight();
                double hh= width*1.0000/w;
                double higth= hh*h;
                OFDPageGraphics2D g = doc.newPage(width,higth);
                g.drawImage(image, 0, 0, width, (int) higth, null);
                g.dispose();
                doc.addResImg(image);
            }
            doc.close();
            System.out.println(">> " + dst.toAbsolutePath());
            returnResult.put("code", ResultCode.SUCCESS.getCode());
            returnResult.put("msg","ofd合成成功");
            returnResult.put("fileSize",new File(filepath).length());
        } catch (Exception e) {
            returnResult.put("code",ResultCode.ERROR.getCode());
            returnResult.put("msg","OFD合成失败");
            e.printStackTrace();
        }
        return returnResult;
    }

3.2    ofd按页数拆图


import org.ofdrw.converter.ImageMaker;
import org.ofdrw.reader.OFDReader;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
 * @Description //TODO   ofd转图
 * @Author kdc
 **/
public class OfdToImage {


    //////ofd文件所有也都转图
   public static JSONArray OfdToImageByPage(String rootPath, String file_id, String batchId, String filename, String type) {
        // 将pdf装图片 并且自定义图片得格式大小
        filename =filename.substring(0, filename.lastIndexOf("."));
        JSONArray returnObj = new JSONArray();
       String filepath =TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, file_id, 0);
        Path src=Paths.get(filepath);
        try{
            OFDReader reader = new OFDReader(src);
            ImageMaker imageMaker = new ImageMaker(reader, 15);
            for (int i = 0; i < imageMaker.pageSize(); i++) {
                // 4. 指定页码转换图片
                BufferedImage image = imageMaker.makePage(i);
                String newFileId = TecrunUtils.createFileId();
                String imageName=filename+"_"+i+"."+type;
                String imagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 0);
                String thumbImagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 1);
//                String imagePath=fileFloder+newFileId+"_0";
//                String thumbImagePath=fileFloder+newFileId+"_1.jpg";
                Path dist = Paths.get(imagePath);
                // 5. 存储为指定格式图片
                ImageIO.write(image, "png", dist.toFile());
                ///生成每一页的缩略图
                Thumbnails.of(imagePath).size(200, 200)
                        .outputFormat("jpg").toFile(thumbImagePath);

                JSONObject ofdimage=new JSONObject();
                ofdimage.put("file_id",newFileId);
                ofdimage.put("imagePath",imagePath);
                ofdimage.put("thumbImagePath",thumbImagePath);
                ofdimage.put("file_name",imageName);
                ofdimage.put("file_size", new File(imagePath).length());
                ofdimage.put("file_suffix",type);
                returnObj.add(ofdimage);

            }
            System.out.println("ofdtoPic-ok");
        }catch(Exception e){
            e.printStackTrace();
        }
        return returnObj;
    }

    //////ofd文件第一页转图
    public static JSONObject OfdToImageByPageOne(String rootPath, String file_id, String batchId, String filename, String type) {
        // 将pdf装图片 并且自定义图片得格式大小
        filename =filename.substring(0, filename.lastIndexOf("."));
        JSONObject ofdimage=new JSONObject();
//        JSONArray returnObj = new JSONArray();
        String filepath =TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, file_id, 0);
        Path src=Paths.get(filepath);
        try{
            OFDReader reader = new OFDReader(src);
            ImageMaker imageMaker = new ImageMaker(reader, 15);
                // 4. 指定页码转换图片
                BufferedImage image = imageMaker.makePage(0);
                String newFileId = TecrunUtils.createFileId();
                String imageName=filename+"_"+0+"."+type;
                String imagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 0);
                String thumbImagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 1);
//                String imagePath=fileFloder+newFileId+"_0";
//                String thumbImagePath=fileFloder+newFileId+"_1.jpg";
                Path dist = Paths.get(imagePath);
                // 5. 存储为指定格式图片
                ImageIO.write(image, "png", dist.toFile());
                ///生成每一页的缩略图
                Thumbnails.of(imagePath).size(200, 200)
                        .outputFormat("jpg").toFile(thumbImagePath);

                ofdimage.put("file_id",newFileId);
                ofdimage.put("imagePath",imagePath);
                ofdimage.put("thumbImagePath",thumbImagePath);
                ofdimage.put("file_name",imageName);
                ofdimage.put("file_size", new File(imagePath).length());
                ofdimage.put("file_suffix",type);
            System.out.println("ofdtoPic-ok");
        }catch(Exception e){
            e.printStackTrace();
        }
        return ofdimage;
    }


    //////ofd文件从那一页转图
    public static JSONArray OfdToImageByPageByIndex(String rootPath, String file_id, String batchId, String filename, String type,int Index) {
        // 将pdf装图片 并且自定义图片得格式大小
        filename =filename.substring(0, filename.lastIndexOf("."));
        JSONArray returnObj = new JSONArray();
        String filepath =TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, file_id, 0);
        Path src=Paths.get(filepath);
        try{
            OFDReader reader = new OFDReader(src);
            ImageMaker imageMaker = new ImageMaker(reader, 15);
            for (int i = Index; i < imageMaker.pageSize(); i++) {
                // 4. 指定页码转换图片
                BufferedImage image = imageMaker.makePage(i);
                String newFileId = TecrunUtils.createFileId();
                String imageName=filename+"_"+i+"."+type;
                String imagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 0);
                String thumbImagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 1);
//                String imagePath=fileFloder+newFileId+"_0";
//                String thumbImagePath=fileFloder+newFileId+"_1.jpg";
                Path dist = Paths.get(imagePath);
                // 5. 存储为指定格式图片
                ImageIO.write(image, "png", dist.toFile());
                ///生成每一页的缩略图
                Thumbnails.of(imagePath).size(200, 200)
                        .outputFormat("jpg").toFile(thumbImagePath);

                JSONObject ofdimage=new JSONObject();
                ofdimage.put("file_id",newFileId);
                ofdimage.put("imagePath",imagePath);
                ofdimage.put("thumbImagePath",thumbImagePath);
                ofdimage.put("file_name",imageName);
                ofdimage.put("file_size", new File(imagePath).length());
                ofdimage.put("file_suffix",type);
                returnObj.add(ofdimage);

            }
            System.out.println("ofdtoPic-ok");
        }catch(Exception e){
            e.printStackTrace();
        }
        return returnObj;
    }


    //////ofd文件所有也都转图
    public static JSONObject OfdToImageByPageNum(String rootPath, String file_id, String batchId, String filename, String type,int PageNum) {
        // 将pdf装图片 并且自定义图片得格式大小
        filename =filename.substring(0, filename.lastIndexOf("."));
        JSONObject ofdimage=new JSONObject();
        String filepath =TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, file_id, 0);
        Path src=Paths.get(filepath);
        OFDReader reader =null;
        ImageMaker imageMaker =null;
        BufferedImage image =null;
        try{
            reader = new OFDReader(src);
            imageMaker = new ImageMaker(reader, 15);
            // 4. 指定页码转换图片
            image = imageMaker.makePage(PageNum);
            String newFileId = TecrunUtils.createFileId();
            String imageName=filename+"_"+PageNum+"."+type;
            String imagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 0);
            String thumbImagePath = TecrunUtils.getFilePathFromBatchIdAndFileId(rootPath, batchId, newFileId, 1);
//                String imagePath=fileFloder+newFileId+"_0";
//                String thumbImagePath=fileFloder+newFileId+"_1.jpg";
            Path dist = Paths.get(imagePath);
            // 5. 存储为指定格式图片
            ImageIO.write(image, "png", dist.toFile());
            ///生成每一页的缩略图
            Thumbnails.of(imagePath).size(200, 200)
                    .outputFormat("jpg").toFile(thumbImagePath);

            ofdimage.put("file_id",newFileId);
            ofdimage.put("imagePath",imagePath);
            ofdimage.put("thumbImagePath",thumbImagePath);
            ofdimage.put("file_name",imageName);
            ofdimage.put("file_size", new File(imagePath).length());
            ofdimage.put("file_suffix",type);
            System.out.println("ofdtoPic-ok");
        }catch(Exception e){
            e.printStackTrace();
        }finally {
              reader =null;
              imageMaker =null;
              image =null;
        }
        return ofdimage;
    }

    public static void main(String[] args) {
       OfdToImage.OfdToImageByPage("","","","","");
    }

}

3.3 ofd发票的识别解析,这个类之前参照其他博主的修改过,目前来看比较标准的数电,电票等解析效果比较好


import com.tecrun.common.utils.ConvertUpMoneyUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.*;
import org.dom4j.io.SAXReader;
import org.springframework.util.StreamUtils;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;


/**
 * 专用于处理电子发票识别的类
 *
 *
 */

@Slf4j
public class OfdInvoiceExtractor {


    /*
    解析ofd发票,但有局限,ofd文件解压后必须要有Doc_0/Attachs/original_invoice.xml这个
     */
    public static Invoice extract(File file) throws IOException, DocumentException {
        ZipFile zipFile = new ZipFile(file);
        ZipEntry entry = zipFile.getEntry("Doc_0/Attachs/original_invoice.xml");
        ZipEntry entry1 = zipFile.getEntry("Doc_0/Pages/Page_0/Content.xml");
        InputStream input = zipFile.getInputStream(entry);
        InputStream input1 = zipFile.getInputStream(entry1);
        String body = StreamUtils.copyToString(input, Charset.forName("utf-8"));
        String content = StreamUtils.copyToString(input1, Charset.forName("utf-8"));
        zipFile.close();
        Document document = DocumentHelper.parseText(body);
        Element root = document.getRootElement();
        Invoice invoice = new Invoice();
        invoice.setMachineNumber(root.elementTextTrim("MachineNo"));
        invoice.setCode(root.elementTextTrim("InvoiceCode"));
        invoice.setNumber(root.elementTextTrim("InvoiceNo"));
        invoice.setDate(root.elementTextTrim("IssueDate"));
        invoice.setChecksum(root.elementTextTrim("InvoiceCheckCode"));
        invoice.setAmount( root.elementTextTrim("TaxExclusiveTotalAmount"));
        invoice.setTaxAmount(root.elementTextTrim("TaxTotalAmount"));
        int ind = content.indexOf("圆整</ofd:TextCode>");
        invoice.setTotalAmountString(content.substring(content.lastIndexOf(">", ind) + 1, ind + 2));
        invoice.setTotalAmount(root.elementTextTrim("TaxInclusiveTotalAmount"));
        invoice.setPayee(root.elementTextTrim("Payee"));
        invoice.setReviewer(root.elementTextTrim("Checker"));
        invoice.setDrawer(root.elementTextTrim("InvoiceClerk"));
        int index = content.indexOf("</ofd:TextCode>");
        invoice.setTitle(content.substring(content.lastIndexOf(">", index) + 1, index));
        invoice.setType("普通发票");
        if (invoice.getTitle().contains("专用发票")) {
            invoice.setType("专用发票");
        } else if (invoice.getTitle().contains("通行费")) {
            invoice.setType("通行费");
        }
        invoice.setPassword(root.elementText("TaxControlCode"));
        Element buyer = root.element("Buyer");
        {
            invoice.setBuyerName(buyer.elementTextTrim("BuyerName"));
            invoice.setBuyerCode(buyer.elementTextTrim("BuyerTaxID"));
            invoice.setBuyerAddress(buyer.elementTextTrim("BuyerAddrTel"));
            invoice.setBuyerAccount(buyer.elementTextTrim("BuyerFinancialAccount"));
        }
        Element seller = root.element("Seller");
        {
            invoice.setSellerName(seller.elementTextTrim("SellerName"));
            invoice.setSellerCode(seller.elementTextTrim("SellerTaxID"));
            invoice.setSellerAddress(seller.elementTextTrim("SellerAddrTel"));
            invoice.setSellerAccount(seller.elementTextTrim("SellerFinancialAccount"));
        }
        Element details = root.element("GoodsInfos");
        {
            List<Detail> detailList = new ArrayList<>();
            List<Element> elements = details.elements();
            for (
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值