【YashanDB知识库】DataX迁移Hive到崖山分布式

概述

本文主要介绍通过Datax实现Hive数据迁移到崖山分布式。

环境

源Hive版本:3.1.3

目标YashanDB版本:23.2.3.100

建表脚本

-- hive

CREATE TABLE IF NOT EXISTS product(

    product_no char(5),

    product_name varchar(30),

    cost double,

    price duble

)

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'

STORED AS textfile;

-- yashandb

CREATE TABLE product

(

    product_no CHAR(5),

    product_name VARCHAR2(30),

    cost NUMBER,

    price NUMBER

);

hive表和DataX数据类型映射

DataX 内部类型

Hive表 数据类型

Long

TINYINT,SMALLINT,INT,BIGINT

Double

FLOAT,DOUBLE

String

String,CHAR,VARCHAR,STRUCT,MAP,ARRAY,UNION,BINARY

Boolean

BOOLEAN

Date

Date,TIMESTAMP

hive同步到崖山job配置

{

    "job": {

        "content": [

            {

                "reader": {

                    "name":"hdfsreader",

                    "parameter":{

                        "column":[

                            {

                                "index":0,

                                "type":"string"

                            },

                            {

                                "index":1,

                                "type":"string"

                            },

                            {

                                "index":2,

                                "type":"double"

                            },

                            {

                                "index":3,

                                "type":"double"

                            }

                        ],

                        "defaultFS":"hdfs://127.0.0.1:8020",

                        "encoding":"UTF-8",

                        "fieldDelimiter":"\u0001",

                        "fileType":"text",

                        "path":"/usr/hive/warehouse/sales.db/product"

                    }

                },

                "writer": {

                    "name": "yashandbwriter",

                    "parallel": {

                        "binder": 6

                    },

                    "parameter": {

                        "batchError": true,

                        "column":[

                            "PRODUCT_NO",

                            "PRODUCT_NAME",

                            "COST",

                            "PRICE"

                        ],

                        "connection": [

                            {

                                "jdbcUrl": "jdbc:yasdb://127.0.0.1:1688/yashandb",

                                "table": [

                                    "SALES.PRODUCT"

                                ]

                            }

                        ],

                        "batchSize": 4096,

                        "batchesPerTxn": 1000,

                        "password": "sales",

                        "preSql": ["truncate table SALES.PRODUCT"],

                        "session": [],

                        "username": "sales",

                        "writeMode": "bulkinsert"

                    }

                }

            }

        ],

        "setting": {

            "speed": {

                "channel": "1"

            }

        }

    }

}

执行同步

python bin/datax.py job/hive2yashandb.json

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值