从clickhouse中查询各维度(SQL里要通过group by 维度)指标,clickhouse通过JavaAPI来实现实时展示。
命令行开启clickhouse:
Eg: Select osname, isNew,count(distinct deviced ) from tb_user_event group by isNew,channel,osname;
优化:

引入ROCKSDB(一种statebackend)的原因:如果把状态全都放入内存中,statede里的数据比较多,而且状态会实时变化,每一个taskmanager里只要一个rocksdb, 将数据以二进制的数组存入到数据库,单独的一个key或者一个value不能超过2G.作用:可以实现增量的checkpoint.
将数据写到hdfs的缺点就是一旦数据变化了要把新的全量的数据通过checkpoint写入到hdfs,如果只把变化的增量checkpoint进去会更好。

FSstatebackend是把数据存储到HDFS里面,再把数据定期持久化到磁盘里面.
merge方法只有在SessionWindow时可能会调用,其他类型的Window不会调用,实时的统计累计关众、实时在线观众 :为了写入数据减少对外部的数据库压力,我们使用窗口,将数据进行增量聚合,这样输出的数据就变少了,对数据库的压力也变少了
1)
import com.doit.live.cn.demoText.POJO.DataBean;
import com.doit.live.cn.demoText.kafka.MyKafkaDeserializationSchema;
import com.doit.live.cn.demoText.udfs.JsonToBeanWithIdFunction;
import com.doit.live.cn.demoText.utils.FlinkUtil;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
//存在一个问题,如果来一条计算一次,并且还要输出一次,这样对外部的数据库压力比较大
//怎样改进
//划分滚动窗口,滚动窗口进行聚合,仅会累加当前窗口中的数据,必须要累加历史数据
//keyedStream.window(TumblingProcessingTimeWindows.of(Time.seconds(1))).red
public class liveaudiencecount {
public static void main(String[] args) throws Exception{
DataStream<Tuple2<String, String>> kafkaStreamWithId = FlinkUtil.createKafkaStreamWithId(args[0], MyKafkaDeserializationSchema.class);
SingleOutputStreamOperator<DataBean> jsonbean = kafkaStreamWithId.process(new JsonToBeanWithIdFunction());
SingleOutputStreamOperator<DataBean> filterBean = jsonbean.filter(bean -> "liveEnter".equals(bean.getEventId()) || "liveLeave".equals(bean.getEventId()));
KeyedStream<DataBean, String> keyedStream = filterBean.keyBy(bean -> bean.getProperties().get("anchor_id").toString());
SingleOutputStreamOperator<Tuple4<String, String, Integer, Integer>> res = keyedStream.process(new AudienceCount());
res.print();
}
}
import com.doit.live.cn.demoText.POJO.DataBean;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
public class AudienceCount extends KeyedProcessFunction<String, DataBean, Tuple4<String,String,Integer,Integer>> {
private transient MapState<String,Integer> audiencestate;
@Override
public void open(Configuration parameters) throws Exception {
MapStateDescriptor<String, Integer> audiencestatede = new MapStateDescriptor<>("audiencestate", String.class, Integer.class);
audiencestate = getRuntimeContext().getMapState(audiencestatede);
}
@Override
public void processElement(DataBean value, Context ctx, Collector<Tuple4<String, String, Integer, Integer>> out) throws Exception {
String eventId = value.getEventId();
String room_id = value.getProperties().get("room_id").toString();
String totalroomid= room_id+"-total";
Integer totalnum= audiencestate.get(totalroomid);
if(totalnum==null){
totalnum=0;
}
String onlineNum=room_id+"-online";
Integer onlinenum= audiencestate.get(onlineNum);
if(onlinenum==null){
onlinenum=0;
}
if("liveEnter".equals(eventId)){
totalnum+=1;
onlinenum+=1;
}else if("liveLeave".equals(eventId)){
onlinenum-=1;
}
audiencestate.put(totalroomid,totalnum);
audiencestate.put(onlineNum,onlinenum);
out.collect(Tuple4.of(ctx.getCurrentKey(),room_id,totalnum,onlinenum));
}
}
2)优化 为什么使用aggregate是因为reduce要求输入输出类型一致必须是DataBean不够灵活
import com.doit.live.cn.demoText.POJO.DataBean;
import com.doit.live.cn.demoText.kafka.MyKafkaDeserializationSchema;
import com.doit.live.cn.demoText.udfs.JsonToBeanWithIdFunction;
import com.doit.live.cn.demoText.utils.FlinkUtil;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
public class liveaudiencecount2 {
public static void main(String[] args) throws Exception{
DataStream<Tuple2<String, String>> kafkaStreamWithId = FlinkUtil.createKafkaStreamWithId(args[0], MyKafkaDeserializationSchema.class);
SingleOutputStreamOperator<DataBean> jsonbean = kafkaStreamWithId.process(new JsonToBeanWithIdFunction());
SingleOutputStreamOperator<DataBean> filterBean = jsonbean.filter(bean -> "liveEnter".equals(bean.getEventId()) || "liveLeave".equals(bean.getEventId()));
KeyedStream<DataBean, String> keyedStream = filterBean.keyBy(bean -> bean.getProperties().get("anchor_id").toString());
WindowedStream<DataBean, String, TimeWindow> WindowedStream= keyedStream.window(TumblingProcessingTimeWindows.of(Time.seconds(5)));
SingleOutputStreamOperator<Tuple3<String, Integer, Integer>> res = WindowedStream.aggregate(new AudienceCount2());
res.print();
}
}
import com.doit.live.cn.demoText.POJO.DataBean;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.java.tuple.Tuple3;
public class AudienceCount2 implements AggregateFunction<DataBean, Tuple3<String, Integer, Integer>, Tuple3<String, Integer, Integer>> {
@Override
public Tuple3<String, Integer, Integer> createAccumulator() {
return Tuple3.of(null,0,0);
}
@Override
public Tuple3<String, Integer, Integer> add(DataBean bean, Tuple3<String, Integer, Integer> acc) {
String roomId = bean.getProperties().get("room_id").toString();
acc.f0=roomId;
String eventId = bean.getEventId();
if("liveEnter".equals(eventId)){
acc.f1+=1;
acc.f2+=1;
}else if("liveLeave".equals(eventId)){
acc.f2-=1;
}
return acc;
}
@Override
public Tuple3<String, Integer, Integer> getResult(Tuple3<String, Integer, Integer> acc) {
return acc;
}
@Override
public Tuple3<String, Integer, Integer> merge(Tuple3<String, Integer, Integer> stringIntegerIntegerTuple3, Tuple3<String, Integer, Integer> acc1) {
return null;
}
}
&spm=1001.2101.3001.5002&articleId=118829917&d=1&t=3&u=c5d6bf2ad91041b498b464c73d404261)
1759

被折叠的 条评论
为什么被折叠?



