1 Star 0 Fork 355

licq / flink-streaming-platform-web

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
demo_6.md 8.87 KB
一键复制 编辑 原始数据 按行查看 历史
zhuhp 提交于 2021-01-23 18:53 . 代码重构、sql提示 sql语法校验

参考

https://github.com/ververica/flink-cdc-connectors/wiki/%E4%B8%AD%E6%96%87%E6%95%99%E7%A8%8B
[基于 Flink SQL CDC的实时数据同步方案](http://www.dreamwu.com/post-1594.html)

1. mysql配置

1.1. 主从同步配置、数据准备

1、修改配置
    [mysqld]
    # 前面还有其他配置
    # 添加的部分
    server-id = 12345
    log-bin = mysql-bin
    # 必须为ROW
    binlog_format = ROW
    # 必须为FULL,MySQL-5.7后才有该参数
    binlog_row_image  = FULL
    expire_logs_days  = 15

2、验证
SHOW VARIABLES LIKE '%binlog%';
	
3、设置权限
    -- 设置拥有同步权限的用户
    CREATE USER 'flinkuser' IDENTIFIED BY 'flinkpassword';
    -- 赋予同步相关权限
    GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'flinkuser';
    
    创建用户并赋予权限成功后,使用该用户登录MySQL,可以使用以下命令查看主从同步相关信息
    SHOW MASTER STATUS
    SHOW SLAVE STATUS
    SHOW BINARY LOGS

2. CDC Streaming ETL

模拟电商公司的订单表和物流表,需要对订单数据进行统计分析,对于不同的信息需要进行关联后续形成订单的大宽表后,交给下游的业务方使用 ES 做数据分析, 这个案例演示了如何只依赖 Flink 不依赖其他组件,借助 Flink 强大的计算能力实时把 Binlog 的数据流关联一次并同步至 ES

2.1 实时根据binlog将商品表、订单表、物流表合成宽表写到ES中

2.1.1. 数据库建表SQL

CREATE TABLE products (
  id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
  name VARCHAR(255) NOT NULL,
  description VARCHAR(512)
);
ALTER TABLE products AUTO_INCREMENT = 101;

INSERT INTO products
VALUES (default,"scooter","Small 2-wheel scooter"),
       (default,"car battery","12V car battery"),
       (default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3"),
       (default,"hammer","12oz carpenter's hammer"),
       (default,"hammer","14oz carpenter's hammer"),
       (default,"hammer","16oz carpenter's hammer"),
       (default,"rocks","box of assorted rocks"),
       (default,"jacket","water resistent black wind breaker"),
       (default,"spare tire","24 inch spare tire");

CREATE TABLE orders (
  order_id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
  order_date DATETIME NOT NULL,
  customer_name VARCHAR(255) NOT NULL,
  price DECIMAL(10, 5) NOT NULL,
  product_id INTEGER NOT NULL,
  order_status BOOLEAN NOT NULL -- 是否下单
) AUTO_INCREMENT = 10001;

INSERT INTO orders
VALUES (default, '2020-07-30 10:08:22', 'Jark', 50.50, 102, false),
       (default, '2020-07-30 10:11:09', 'Sally', 15.00, 105, false),
       (default, '2020-07-30 12:00:30', 'Edward', 25.25, 106, false);

CREATE TABLE shipments (
  shipment_id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
  order_id INTEGER NOT NULL,
  origin VARCHAR(255) NOT NULL,
  destination VARCHAR(255) NOT NULL,
  is_arrived BOOLEAN NOT NULL
) AUTO_INCREMENT = 1001;;

INSERT INTO shipments
VALUES (default,10001,'Beijing','Shanghai',false),
       (default,10002,'Hangzhou','Shanghai',false),
       (default,10003,'Shanghai','Hangzhou',false);

2.1.2. flink配置

CREATE TABLE products (
  id INT,
  name STRING,
  description STRING
) WITH (
  'connector' = 'mysql-cdc',
  'hostname' = '172.25.21.29',
  'port' = '3306',
  'username' = 'flinkuser',
  'password' = 'flinkpassword',
  'database-name' = 'db_inventory_cdc',
  'table-name' = 'products'
);

CREATE TABLE orders (
  order_id INT,
  order_date TIMESTAMP(0),
  customer_name STRING,
  price DECIMAL(10, 5),
  product_id INT,
  order_status BOOLEAN
) WITH (
  'connector' = 'mysql-cdc',
  'hostname' = '172.25.21.29',
  'port' = '3306',
  'username' = 'flinkuser',
  'password' = 'flinkpassword',
  'database-name' = 'db_inventory_cdc',
  'table-name' = 'orders'
);

CREATE TABLE shipments (
  shipment_id INT,
  order_id INT,
  origin STRING,
  destination STRING,
  is_arrived BOOLEAN
) WITH (
  'connector' = 'mysql-cdc',
  'hostname' = '172.25.21.29',
  'port' = '3306',
  'username' = 'flinkuser',
  'password' = 'flinkpassword',
  'database-name' = 'db_inventory_cdc',
  'table-name' = 'shipments'
);

CREATE TABLE enriched_orders (
  order_id INT,
  order_date TIMESTAMP(0),
  customer_name STRING,
  price DECIMAL(10, 5),
  product_id INT,
  order_status BOOLEAN,
  product_name STRING,
  product_description STRING,
  shipment_id INT,
  origin STRING,
  destination STRING,
  is_arrived BOOLEAN,
  PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
    'connector' = 'elasticsearch-7',
    'hosts' = 'http://172.25.23.15:9401',
    'index' = 'enriched_orders'
);

INSERT INTO enriched_orders
SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived
FROM orders AS o
LEFT JOIN products AS p ON o.product_id = p.id
LEFT JOIN shipments AS s ON o.order_id = s.order_id;

2.1.3. 验证SQL

--增加记录
INSERT INTO orders VALUES (default, '2020-07-30 15:22:00', 'Jark', 29.71, 104, false);
INSERT INTO shipments VALUES (default,10004,'Shanghai','Beijing',false);
--更新记录
UPDATE orders SET order_status = true WHERE order_id = 10004;
UPDATE shipments SET is_arrived = true WHERE shipment_id = 4;
--删除记录
DELETE FROM orders WHERE order_id = 10004;

2.1.4. 手工启动脚本

注意:相应的jar包需要到https://maven.aliyun.com/mvn/search下载,特别是cdc相关的jar

/data1/flink/flink/bin/flink run -d -p 2 
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-connector-jdbc_2.11-1.12.0.jar  
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-sql-connector-kafka_2.11-1.12.0.jar  
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-sql-connector-elasticsearch7_2.11-1.12.0.jar 
-c  com.flink.streaming.core.JobApplication /data1/flink/flink-streaming-platform-web/lib/flink-streaming-core_flink_1.12.0-1.2.0.RELEASE.jar 
-sql /data1/flink/flink-streaming-platform-web/sql/job_sql_8.sql

2.2. 按天统计销售额

2.2.1. flink 配置

set table.exec.source.cdc-events-duplicate = true;
CREATE TABLE orders (
  order_id INT,
  order_date TIMESTAMP(0),
  customer_name STRING,
  price DECIMAL(10, 5),
  product_id INT,
  order_status BOOLEAN,
  PRIMARY KEY (day_str) NOT ENFORCED,
) WITH (
  'connector' = 'mysql-cdc',
  'hostname' = '172.25.21.29',
  'port' = '3306',
  'username' = 'flinkuser',
  'password' = 'flinkpassword',
  'database-name' = 'db_inventory_cdc',
  'table-name' = 'orders'
);

CREATE TABLE kafka_gmv (
  day_str STRING,
  gmv DECIMAL(10, 5),
  PRIMARY KEY (day_str) NOT ENFORCED
) WITH (
    'connector' = 'kafka',
    'topic' = 'flink_test_6',
    'scan.startup.mode' = 'earliest-offset',
    'properties.group.id' = 'flink_gp_test6',
    'properties.bootstrap.servers' = '172.25.20.76:9092',
    'format' = 'debezium-json',
    'debezium-json.ignore-parse-errors' = 'true',
    'debezium-json.timestamp-format.standard' = 'SQL',
    'debezium-json.map-null-key.mode' = 'DROP'
);

INSERT INTO kafka_gmv
SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv
FROM orders
WHERE order_status = true
GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd');

-- 读取 Kafka 的 changelog 数据,观察 materialize 后的结果
 CREATE TABLE print_table (
   day_str STRING,
   gmv DECIMAL(10, 5),
   PRIMARY KEY (day_str) NOT ENFORCED
 ) WITH (
  'connector' = 'print'
 );
  
insert into print_table SELECT * FROM kafka_gmv;

2.2.2. 验证SQL

UPDATE orders SET order_status = true WHERE order_id = 10001;
UPDATE orders SET order_status = true WHERE order_id = 10002;
UPDATE orders SET order_status = true WHERE order_id = 10003;

INSERT INTO orders
VALUES (default, '2020-07-30 17:33:00', 'Timo', 50.00, 104, true);

UPDATE orders SET price = 40.00 WHERE order_id = 10005;

DELETE FROM orders WHERE order_id = 10005;

2.2.2. 手工启动脚本

注意: 相应的jar包需要到https://maven.aliyun.com/mvn/search下载,特别是cdc相关的jar 不能将kafka 表中的format配置为changelog-json,否则会因为flink1.12.0版本中,JsonRowDataSerializationSchema中的方法 JsonRowDataSerializationSchema(RowType rowType, TimestampFormat timestampFormat)被修改了,所以报: java.lang.NoSuchMethodError: org.apache.flink.formats.json.JsonRowDataSerializationSchema.(Lorg/apache/flink/table/types/logical/RowType;Lorg/apache/flink/formats/json/TimestampFormat;)V 方法不存在

 /data1/flink/flink/bin/flink run -d -p 2 
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-connector-jdbc_2.11-1.12.0.jar 
-C file:///data1/flink/flink-streaming-platform-web/jars/flink-sql-connector-kafka_2.11-1.12.0.jar 
-c com.flink.streaming.core.JobApplication /data1/flink/flink-streaming-platform-web/lib/flink-streaming-core_flink_1.12.0-1.2.0.RELEASE.jar 
-sql /data1/flink/flink-streaming-platform-web/sql/job_sql_8.sql
Java
1
https://gitee.com/liccqq/flink-streaming-platform-web.git
git@gitee.com:liccqq/flink-streaming-platform-web.git
liccqq
flink-streaming-platform-web
flink-streaming-platform-web
master

搜索帮助