文章目录
0.GitHub项目连接:
1. 案例-离厂超时预警 实现思路
利用Flink CEP 的实现思路,暂时没考虑其他的干扰条件。假定只刷卡 出 进一次。
/**
解决思路:
利用Flink CEP进行时间流的模式匹配,并设定超时时间(90 minutes)。
具体如下(简单思路,只考虑员工 出 进一次,即door_status从2变为1):
按照某个员工的id聚合的某日的刷卡进出事件:
员工入厂,进,door_status=1
员工中途离厂,出,door_status=2
员工再次进厂,进,door_status=1
员工中途离厂,出,door_status=2,如果距离上次刷卡出厂的时间超过90minutes仍未检测到员工刷卡入厂,则进行超时预警。
*/
/**
* 从员工刷卡出门开始,与当前时间做比较,相差90分钟以上时,则检测此员工的刷卡进入状态,如果没有检测到,则进行离厂预警。
* *****
* 1.CEP 复杂事件处理
* *****
* 1.1 定义一个Pattern,匹配出Pattern里in door与out door间隔大于90分钟的事件
* 1.2 对这些进行输出预警,获取超时未匹配的流
* *****
1.1 定义一个刷卡事件类
- 1.IN: DataSource -> DataStream -> Transformations -> DataStream -> keyBy ->KeyedStream
- 2.Pattern:Pattern.begin.where.next.where…within(Time windowTime)
- 3.PatternStream:CEP.pattern(KeyedStream,Pattern)
- 4.OutputTag:new OutputTag(…)
- 5.SingleOutputStreamOperator: PatternStream.flatSelect(OutputTag,PatternFlatTimeoutFunction,PatternFlatSelectFunction)
- 6.DataStream:SingleOutputStreamOperator.getSideOutput(OutputTag)
- 7.OUT:DataStream -> Transformations -> DataStream -> DataSink
//AccessEvent,刷卡访问事件
package com.events;
import lombok.NoArgsConstructor;
import java.io.Serializable;
import java.util.Objects;
/**
* AccessEvent 刷卡访问事件的实体类对象
* */
//@Data
//@AllArgsConstructor
@NoArgsConstructor
public class AccessEvent implements Serializable {
public Integer id;
public Integer door_id;
public String door_status;
public Integer event_type;
public String employee_sys_no;
public String datetime;
public AccessEvent(AccessEvent indoor) {
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getDoor_id() {
return door_id;
}
public void setDoor_id(int door_id) {
this.door_id = door_id;
}
public String getDoor_status() {
return door_status;
}
public void setDoor_status(String door_status) {
this.door_status = door_status;
}
public int getEvent_type() {
return event_type;
}
public void setEvent_type(int event_type) {
this.event_type = event_type;
}
public String getEmployee_sys_no() {
return employee_sys_no;
}
public void setEmployee_sys_no(String employee_sys_no) {
this.employee_sys_no = employee_sys_no;
}
public String getDatetime() {
return datetime;
}
public void setDatetime(String datetime) {
this.datetime = datetime;
}
public AccessEvent(int id, int door_id, String door_status, int event_type, String employee_sys_no, String datetime) {
this.id = id;
this.door_id = door_id;
this.door_status = door_status;
this.event_type = event_type;
this.employee_sys_no = employee_sys_no;
this.datetime = datetime;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
AccessEvent that = (AccessEvent) o;
return id == that.id &&
door_id == that.door_id &&
door_status == that.door_status &&
event_type == that.event_type &&
employee_sys_no == that.employee_sys_no &&
Objects.equals(datetime, that.datetime);
}
@Override
public int hashCode() {
return Objects.hash(id, door_id, door_status, event_type, employee_sys_no, datetime);
}
@Override
public String toString() {
return "AccessEvent{" +
"id=" + id +
", door_id=" + door_id +
", door_status=" + door_status +
", event_type=" + event_type +
", employee_sys_no=" + employee_sys_no +
", datetime='" + datetime + '\'' +
'}';
}
}
1.2 定义一个事件模式(Pattern)
/**
* 定义一个事件模式(Pattern)
* */
Pattern<AccessEvent,AccessEvent> warningPattern=Pattern.<AccessEvent>begin("outdoor")
.where(new SimpleCondition<AccessEvent>() {
private static final long serialVersionUID = -6847788055093903603L;
@Override
public boolean filter(AccessEvent accessEvent) throws Exception {
return accessEvent.getDoor_status().equals("2");
}
})
.next("indoor").where(new SimpleCondition<AccessEvent>() {
@Override
public boolean filter(AccessEvent accessEvent) throws Exception {
return accessEvent.getDoor_status().equals("1");
}
})
.within(Time.seconds(10)).times(1);//为方便测试,这里将间隔时间设置为10s
/**
可以设置Pattern模式的属性,固定次数(times)、匹配发生一次以上(oneOrMore)、匹配发生多次以上(timesOrMore)
*/
1.3 Build pattern stream,模式匹配输出
PatternStream<AccessEvent> accessEventPatternStream=CEP.pattern(dataStreamKeyBy,warningPattern);;//按照员工ID去匹配
1.4 Use side output get timeout stream,获取超时输出流
/**
创建OutputTag利用side output 获取超时未匹配的流
*/
OutputTag<AccessEvent> outputTag=new OutputTag<AccessEvent>("timedout"){
private static final long serialVersionUID = 773503794597666247L;
};
SingleOutputStreamOperator<AccessEvent> timeout=accessEventPatternStream.flatSelect(
outputTag,
new AccessTimedOut(),
new FlatSelect()
);
/**
* 把超时的事件收集起来
* */
public static class AccessTimedOut implements PatternFlatTimeoutFunction<AccessEvent,AccessEvent> {
private static final long serialVersionUID = -4214641891396057732L;
@Override
public void timeout(Map<String, List<AccessEvent>> pattern, long timeStamp, Collector<AccessEvent> out) throws Exception {
if (null!=pattern.get("outdoor")){
for (AccessEvent accessEvent:pattern.get("outdoor")){
System.out.println("timeout outdoor:"+accessEvent.getEmployee_sys_no());
out.collect(accessEvent);
}
}
//因为indoor 超时了,还没有收到indoor,所以这里是拿不到 indoor 的
System.out.println("timeout end"+pattern.get("indoor"));
}
}
案例 Demo
package com;
import com.events.AccessEvent;
import com.utils.JsonFilter;
import com.utils.KafkaConfigUtil;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.cep.*;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.IngestionTimeExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple6;
import java.util.List;
import java.util.Map;
import java.util.Properties;
public class Test2 {
private static Logger log = LoggerFactory.getLogger(Test2.class);
public static void main(String[] args) throws Exception {

该博客通过实例展示了如何使用Flink CEP实现离厂超时预警,详细介绍了定义刷卡事件类、构建事件模式、获取超时输出流的步骤,并提供了订单超时统计、空气质量检测等其他CEP应用案例。

1328

被折叠的 条评论
为什么被折叠?



