数据库单个插入操作转为批量插入

原创已于 2022-05-15 18:30:31 修改 · 1.2k 阅读

0 ·

本内容遵循CC 4.0 BY-SA版权协议

标签

#数据库 #java

于 2020-03-20 17:09:35 首次发布

java 专栏收录该内容

14 篇文章

订阅专栏

在高并发插入场景下，单行插入可能导致数据库瓶颈。本文介绍了如何通过DbBatchInsertProcessor实现将单个插入请求转化为批量操作，以减少通信次数和减轻数据库压力，提高插入效率。该处理器等待队列达到一定阈值或时间后执行批量插入，类似ElasticSearch的bulk API思想。

在业务中，我们常常会遇到很多单行插入的场景，当插入的并发数比较小时，并不会有什么问题。但是一旦插入的速度大大加快时，可能就会遇到数据库插入瓶颈。有人用多线程去并行插入，其实这样不仅没有解决问题，反而比单线程插入的速度更慢，因为多线程插入时，会竞争数据库自增锁。此时我们更应该考虑的是将单个插入操作，改成批量插入操作。这样不仅减少了通信次数，同时也减轻了数据库压力，可以更快地插入。

但是很多业务场景就是需要一条一条数据插入，此时我们可以写一个转换器，自动将单个插入操作，转换为批量插入操作。

基本思路

当单个插入请求发生时，不立即执行插入动作，而是将请求放到一个队列中。当队列中请求数量达到一定的阈值，或者等到一定的时间之后，自动将一批请求执行插入动作。

类似于ElasticSearch的bulk api的思想。

代码实现

1.DbBatchInsertProcessor 数据库批量处理器，将单个插入操作，转换为批量操作执行。

import com.jd.purchase.regular.common.util.JsonUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;

/**
 * @author bobo
 * @date 2020/1/20
 * 数据库批量处理器
 */
@Slf4j
public class DbBatchInsertProcessor<T> {

    /**
     * 一次批量提交多少个，默认100
     */
    private int bulkNum;

    /**
     * 最多隔多久进行一次提交,单位秒
     */
    private int flushInterval;

    /**
     * 保存数据的队列
     */
    private BlockingQueue<T> itemQueue;

    private volatile boolean closed = false;

    /**
     * 执行插入动作的执行器
     */
    private DoInsert<T> doInsert;

    private static final int DEFAULT_BULK_NUM = 100;
    private static final int DEFAULT_CAPACITY = 1024;
    private static final int DEFAULT_FLUSH_INTERVAL = 5;

    public DbBatchInsertProcessor(DoInsert<T> doInsert) {
        this(doInsert, DEFAULT_BULK_NUM, DEFAULT_FLUSH_INTERVAL, DEFAULT_CAPACITY);
    }

    public DbBatchInsertProcessor(DoInsert<T> doInsert, int bulkNum) {
        this(doInsert, bulkNum, DEFAULT_FLUSH_INTERVAL, DEFAULT_CAPACITY);
    }

    public DbBatchInsertProcessor(DoInsert<T> doInsert, int bulkNum, int flushInterval) {
        this(doInsert, bulkNum, flushInterval, DEFAULT_CAPACITY);
    }
    public DbBatchInsertProcessor(DoInsert<T> doInsert, int bulkNum, int flushInterval, int capacity) {
        if (bulkNum < 1) {
            //不合法时，使用默认值
            bulkNum = DEFAULT_BULK_NUM;
        }
        if (capacity < 1 || capacity < bulkNum) {
            //不合法时，使用默认值或批量提交数量的2倍
            capacity = Math.max(DEFAULT_CAPACITY, bulkNum * 2);
        }
        if (flushInterval < 1) {
            flushInterval = DEFAULT_FLUSH_INTERVAL;
        }

        this.bulkNum = bulkNum;
        this.doInsert = doInsert;
        this.flushInterval = flushInterval;
        itemQueue = new ArrayBlockingQueue<>(capacity);
        //开始flash任务
        this.startFlushTask();

    }

    /**
     * 阻塞添加到队列中
     *
     * @param item
     * @return 最后添加成功就返回true，添加失败就返回false
     */
    public boolean addItem(T item) {
        if (closed) {
            return false;
        }
        try {
            itemQueue.put(item);
            return true;
        } catch (InterruptedException e) {
            log.error("添加到队列时中断！item={}", JsonUtil.write2JsonStr(item));

        }
        return false;
    }

    /**
     * 将队列中的数据全部提交到数据库中
     */
    public void flushAllItem() {
        while (!itemQueue.isEmpty()) {
            List<T> list = new ArrayList<>(bulkNum);
            itemQueue.drainTo(list, bulkNum);
            if (!list.isEmpty()) {
                flushToDB(list);
            }

        }
        log.info("flushAllItem success!");
    }

    /**
     * 关闭批量插入处理器，并提交队列中所有的数据
     */
    public void close() {
        this.closed = true;
        flushAllItem();
        log.info("DbBatchInsertProcessor 成功关闭");
    }

    /**
     * 开始flush任务
     */
    private void startFlushTask() {
        Thread t = new Thread(() -> {
            int waitSecond = 0;
            while (true) {
                if (closed) {
                    break;
                }
                if (itemQueue.size() >= bulkNum || waitSecond >= flushInterval) {
                    //队列数量大于批量提交数或等待超过指定的时间时，进行提交
                    if (!itemQueue.isEmpty()) {
                        List<T> list = new ArrayList<>(bulkNum);
                        itemQueue.drainTo(list, bulkNum);
                        if (!list.isEmpty()) {
                            flushToDB(list);
                        }
                    }
                    waitSecond = 0;

                } else {
                    //还没到批量提交点，进行等待
                    try {
                        Thread.sleep(1000);
                        waitSecond++;
                    } catch (InterruptedException e) {
                        log.error("startFlushTask 异常中断！");
                    }
                }
            }
        });
        t.setName("DbBatchInsertProcessor thread" + this.hashCode());
        t.start();
    }

    private void flushToDB(List<T> list) {
        try {
            int insertRow = doInsert.batchInsert(list);
            log.info("{}表插入{}条记录", doInsert.tableName(), insertRow);
        } catch (Throwable e) {
            log.error("{}表批量插入时发生异常，list={}", doInsert.tableName(), JsonUtil.write2JsonStr(list), e);
        }


    }

    /**
     * 执行真正批量插入的接口
     *
     * @param <T>
     */
    interface DoInsert<T> {
        int batchInsert(List<T> list);
        String tableName();
    }

}

2.使用例子

public class Test {
    public static void main(String[] args) {
        //提前创建好批处理器
        DbBatchInsertProcessor planDbBatchInsertProcessor = new DbBatchInsertProcessor<>(new DbBatchInsertProcessor.DoInsert<Plan>() {
            @Override
            public int batchInsert(List<Plan> list) {
                return planMapper.batchInsert(list);
            }

            @Override
            public String tableName() {
                return "plan";
            }
        }
        );

        //将单个数据添加到批处理器中
        Plan newPlan = dataHandlerService.convertToPlan(oldPlan, planPeriodInfoList);
        planDbBatchInsertProcessor.addItem(newPlan);


        //项目关闭时，记得关闭批处理器
        planDbBatchInsertProcessor.close();
    }
}