1、IK分词器源码下载
下载版本地址:https://github.com/medcl/elasticsearch-analysis-ik
下载发行版地址:https://github.com/medcl/elasticsearch-analysis-ik/releases
2. 导入依赖和修改es版本到对应的安装版本
1.本次从下载的是7.x版本,因为我用的是7.10.1版本的ES这里需要改一下版本
<elasticsearch.version>7.10.1</elasticsearch.version>
官方是7.17.1,我这版本这个类(PathUtils)地址需要更换一下

2.引入MySql驱动到项目中
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.18</version>
</dependency>
3. 创建数据库,新建扩展词典和停用词典
CREATE TABLE `es_lexicon` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '词库id',
`lexicon_text` varchar(20) NOT NULL COMMENT '词条关键词',
`lexicon_type` int(1) NOT NULL DEFAULT 0 COMMENT '0扩展词库 1停用词库',
`lexicon_status` int(1) NOT NULL DEFAULT 0 COMMENT '词条状态 0正常 1暂停使用',
`is_deleted` int(1) NOT NULL DEFAULT 0 COMMENT '作废标志 0正常 1作废',
`create_time` datetime(0) NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` datetime(0) NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 1 COMMENT = 'ES远程扩展词库表' ROW_FORMAT = Dynamic;
4. 在config下新建jdbc.properties配置相关数据库属性
# 数据库地址
jdbc.url=jdbc:mysql://127.1.0.0:3306/es?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=Asia/Shanghai
jdbc.username=root
jdbc.password=root
jdbc.driver=com.mysql.cj.jdbc.Driver
#数据库查询扩展词库sql语句
jdbc.update.dic.sql=select gel.lexicon_text as word,gel.is_deleted,gel.update_time from es_lexicon gel where gel.lexicon_type = 0 and gel.lexicon_status = 0 and gel.update_time > ? order by gel.update_time asc
#数据库查询停用词sql语句
jdbc.update.stopword.sql=select gel.lexicon_text as word,gel.is_deleted,gel.update_time from es_lexicon gel where gel.lexicon_type = 1 and gel.lexicon_status = 0 and gel.update_time > ? order by gel.update_time asc
#数据库查询间隔时间 每隔600秒请求一次
jdbc.update.interval=600

5. 打包配置
修改src/main/assemblies/plugin.xml 将 MySQL 驱动的依赖写入,否则打成 zip 后会没有 MySQL 驱动的 jar 包

<include>mysql:mysql-connector-java</include>
6.修改权限
src/main/resources/plugin-security.policy 添加permission java.lang.RuntimePermission “setContextClassLoader”;,否则会因为权限问题抛出以下异常

grant {
// needed because of the hot reload functionality
permission java.net.SocketPermission "*", "connect,resolve";
permission java.lang.RuntimePermission "setContextClassLoader";
};
3.开始修改源码
1.在org.wltea.analyzer.*下创建DatabaseMonitor类
package org.wltea.analyzer.config;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.SpecialPermission;
import org.wltea.analyzer.dic.Dictionary;
import org.wltea.analyzer.help.ESPluginLoggerFactory;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.sql.*;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
/**
* @author Qiangteng Ruan
* @version 1.0.0
* @description: 通过 mysql 更新词典 仿Monitor
* @date 2023/2/22 17:17
*/
public class DatabaseMonitor implements Runnable {
private static final Logger logger = ESPluginLoggerFactory.getLogger(DatabaseMonitor.class.getName());
public static final String PATH_JDBC_PROPERTIES = "jdbc.properties";
private static final String JDBC_URL = "jdbc.url";
private static final String JDBC_USERNAME = "jdbc.username";
private static final String JDBC_PASSWORD = "jdbc.password";
private static final String JDBC_DRIVER = "jdbc.driver";
private static final String SQL_UPDATE_DIC = "jdbc.update.dic.sql";
private static final String SQL_UPDATE_STOPWORD = "jdbc.update.stopword.sql";
/*** 更新间隔*/
public final static String JDBC_UPDATE_INTERVAL = "jdbc.update.interval";
private static final Timestamp DEFAULT_LAST_UPDATE = Timestamp.valueOf(LocalDateTime.of(LocalDate.of(2023, 1, 1), LocalTime.MIN));
private static Timestamp lastUpdateTimeOfMainDic = null;
private static Timestamp lastUpdateTimeOfStopword = null;
public String getUrl() {
return Dictionary.getSingleton().getProperty(JDBC_URL);
}
public String getUsername() {
return Dictionary.getSingleton().getProperty(JDBC_USERNAME);
}
public String getPassword() {
return Dictionary.getSingleton().getProperty(JDBC_PASSWORD);
}
public String getDriver() {
return Dictionary.getSingleton().getProperty(JDBC_DRIVER);
}
public String getUpdateMainDicSql() {
return Dictionary.getSingleton().getProperty(SQL_UPDATE_DIC);
}
public String getUpdateStopwordSql() {
return Dictionary.getSingleton().getProperty(SQL_UPDATE_STOPWORD);
}
/*** 加载MySQL驱动*/
public DatabaseMonitor() {
SpecialPermission.check();
AccessController.doPrivileged((PrivilegedAction<Void>) () -> {
try {
Class.forName(getDriver());
} catch (ClassNotFoundException e) {
logger.error("Mysql JDBC驱动程序未找到", e);
}
return null;
});
}
@Override
public void run() {
SpecialPermission.check();
AccessController.doPrivileged((PrivilegedAction<Void>) () -> {
//获取数据库连接
Connection conn = getConnection();
// 更新主词典
updateMainDic(conn);
// 更新停用词
updateStopword(conn);
//关闭连接
closeConnection(conn);
return null;
});
}
public Connection getConnection() {
Connection connection = null;
try {
connection = DriverManager.getConnection(getUrl(), getUsername(), getPassword());
// logger.info("连接mysql信息:{}",connection);
} catch (SQLException e) {
logger.error("mysql连接失败", e);
}
return connection;
}
/*** 扩展词典*/
public synchronized void updateMainDic(Connection conn) {
logger.info("开始添加扩展词库..");
int numberOfAddWords = 0;
int numberOfDisableWords = 0;
PreparedStatement ps = null;
// Statement ps = null;
ResultSet rs = null;
try {
String sql = getUpdateMainDicSql();
//获取上一次最后更新时间
Timestamp param = lastUpdateTimeOfMainDic == null ? DEFAULT_LAST_UPDATE : lastUpdateTimeOfMainDic;
logger.info("param: " + param);
ps = conn.prepareStatement(sql);
ps.setTimestamp(1, param);
// ps = conn.createStatement();
rs = ps.executeQuery();
while (rs.next()) {
String word = rs.getString("word");
word = word.trim();
if (word.isEmpty()) {
continue;
}
//记录最后更新时间
lastUpdateTimeOfMainDic = rs.getTimestamp("update_time");
if (rs.getBoolean("is_deleted")) {
logger.info("[main dic] 扩展词库删除失效数据: {}", word);
// 删除
Dictionary.disableWord(word);
numberOfDisableWords++;
} else {
logger.info("[main dic] 扩展词库添加词: {}", word);
// 添加
Dictionary.addWord(word);
numberOfAddWords++;
}
}
logger.info("扩展词库添加数 -> 添加数: {}, 去除数: {}", numberOfAddWords, numberOfDisableWords);
} catch (SQLException e) {
logger.error("更新失败", e);
// 关闭 ResultSet、 PreparedStatement
closeRsAndPs(rs, ps);
}
}
/*** 停用词*/
public synchronized void updateStopword(Connection conn) {
logger.info("开始添加停用词库...");
int numberOfAddWords = 0;
int numberOfDisableWords = 0;
PreparedStatement ps = null;
// Statement ps = null;
ResultSet rs = null;
try {
String sql = getUpdateStopwordSql();
//获取上一次最后更新时间
Timestamp param = lastUpdateTimeOfStopword == null ? DEFAULT_LAST_UPDATE : lastUpdateTimeOfStopword;
logger.info("param: " + param);
ps = conn.prepareStatement(sql);
ps.setTimestamp(1, param);
// ps = conn.createStatement();
rs = ps.executeQuery();
while (rs.next()) {
String word = rs.getString("word");
word = word.trim();
if (word.isEmpty()) {
continue;
}
//记录最后更新时间
lastUpdateTimeOfStopword = rs.getTimestamp("update_time");
if (rs.getBoolean("is_deleted")) {
logger.info("[stopword] 停用词库删除失效数据: {}", word);
// 删除
Dictionary.disableStopword(word);
numberOfDisableWords++;
} else {
logger.info("[stopword] 停用词库添加词: {}", word);
// 添加
Dictionary.addStopword(word);
numberOfAddWords++;
}
}
logger.info("停用词库添加数 -> 添加数: {}, 去除数: {}", numberOfAddWords, numberOfDisableWords);
} catch (SQLException e) {
logger.error("更新失败!", e);
} finally {
// 关闭 ResultSet、PreparedStatement
closeRsAndPs(rs, ps);
}
}
public void closeConnection(Connection conn) {
if (conn != null) {
try {
conn.close();
} catch (SQLException e) {
logger.error("关闭mysql连接失败", e);
}
}
}
public void closeRsAndPs(ResultSet rs, PreparedStatement ps) {
if (rs != null) {
try {
rs.close();
} catch (SQLException e) {
logger.error("日志:关闭ResultSet失败", e);
}
}
if (ps != null) {
try {
ps.close();
} catch (SQLException e) {
logger.error("日志:关闭Statement失败", e);
}
}
}
}
2.修改Dictionary类
1、在当前类中添加几个方法,用于增删词条

/*** 加载新词条*/
public static void addWord(String word) {
singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
}
/*** 移除(屏蔽)词条*/
public static void disableWord(String word) {
singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
}
/*** 加载新停用词*/
public static void addStopword(String word) {
singleton._StopWords.fillSegment(word.trim().toLowerCase().toCharArray());
}
/*** 移除(屏蔽)停用词*/
public static void disableStopword(String word) {
singleton._StopWords.disableSegment(word.trim().toLowerCase().toCharArray());
}
/*** 加载 jdbc.properties*/
public void loadJdbcProperties() {
Path file = PathUtils.get(getDictRoot(), DatabaseMonitor.PATH_JDBC_PROPERTIES);
try {
props.load(new FileInputStream(file.toFile()));
logger.info("====================================properties start====================================");
for (Map.Entry<Object, Object> entry : props.entrySet()) {
logger.info("获取到文件key:{}:", entry.getKey());
}
logger.info("====================================properties end====================================");
} catch (IOException e) {
logger.error("未找到: " + DatabaseMonitor.PATH_JDBC_PROPERTIES, e);
}
}
2.然后在在构造方法Dictionary 中加载 jdbc.properties的方法

3、如果getProperty()方法不是 public 就改为 public

4.initial()启动自己实现的数据库监控线程

// 建立数据库监控线程
pool.scheduleAtFixedRate(new DatabaseMonitor(), 10, Long.parseLong(getSingleton().getProperty(DatabaseMonitor.JDBC_UPDATE_INTERVAL)), TimeUnit.SECONDS);
1、打包

2.把这个压缩包上传到es插件目录解压即可

该文介绍了如何下载IK分词器源码并修改以适配Elasticsearch7.10.1版本,包括更换PathUtils类、引入MySQL驱动、创建数据库表结构、配置jdbc.properties文件以及设置数据库查询间隔。此外,文章还详细讲解了如何打包配置、修改权限,并提供了新建DatabaseMonitor类以实现实时更新扩展词库和停用词库的功能。

933

被折叠的 条评论
为什么被折叠?



