brokerLoad语句
LOAD
LABEL gaofeng_broker_load_HDD
(DATA INFILE("hdfs://eoop/user/coue_data/hive_db/couta_test/ader_lal_offline_0813_1/*")INTO TABLE ads_user
)WITH BROKER "hdfs_broker"
("dfs.nameservices"="eadhadoop","dfs.ha.namenodes.eadhadoop" = "nn1,nn2","dfs.namenode.rpc-address.eadhadoop.nn1" = "h4:8000","dfs.namenode.rpc-address.eadhadoop.nn2" = "z7:8000","dfs.client.failover.proxy.provider.eadhadoop" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider","hadoop.security.authentication" = "kerberos","kerberos_principal" = "ou3.CN","kerberos_keytab_content" = "BQ8uMTYzLkNPTQALY291cnNlXgAAAAFfVyLbAQABAAgCtp0qmxxP8QAAAAE="
);
报错
任务Cancelled
type:ETL_QUALITY_UNSATISFIED; msg:quality not good enough to cancel
解决
通常这个报错一定是有更深层次的原因
可以show load 看这个brokerLoad任务的URL字段
show load warnings on ‘{URL}’
或者网页直接打开
字段数量不一致或者其他原因,究其根本
是因为所要导入的文件中有某些行字段数跟表字段数不一致,或者文件某些行中某个字段大小超出对应表字段的大小上限,所导致的数据质量问题,需要作相应调整
如果想忽略掉这些错误数据
则修改任务语句配置参数 “max_filter_ratio” = “1”
LOAD
LABEL gaofeng_broker_load_HDD
(DATA INFILE("hdfs://eoop/user/coue_data/hive_db/couta_test/ader_lal_offline_0813_1/*")INTO TABLE ads_user
)WITH BROKER "hdfs_broker"
("dfs.nameservices"="eadhadoop","dfs.ha.namenodes.eadhadoop" = "nn1,nn2","dfs.namenode.rpc-address.eadhadoop.nn1" = "h4:8000","dfs.namenode.rpc-address.eadhadoop.nn2" = "z7:8000","dfs.client.failover.proxy.provider.eadhadoop" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider","hadoop.security.authentication" = "kerberos","kerberos_principal" = "ou3.CN","kerberos_keytab_content" = "BQ8uMTYzLkNPTQALY291cnNlXgAAAAFfVyLbAQABAAgCtp0qmxxP8QAAAAE="
)
PROPERTIES
("max_filter_ratio" = "1"
);