当前位置: 代码迷 >> 综合 >> SparkSql 读mysql数据写入到Es
  详细解决方案

SparkSql 读mysql数据写入到Es

热度:43   发布时间:2024-01-22 02:39:17.0

话不多说直接上代码
1.Scala版本 (将mysql的两个表join然后将结果插入到ES中)

<dependencies><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.27</version></dependency><dependency><groupId>org.apache.spark</groupId><artifactId>spark-sql_2.11</artifactId><version>2.2.1</version></dependency><dependency><groupId>org.elasticsearch</groupId><artifactId>elasticsearch-spark-20_2.11</artifactId><version>6.8.2</version></dependency></dependencies>object SparkMain {
    def main(args: Array[String]): Unit = {
    val config = new SparkConf().setMaster("local[*]").setAppName("OCRMSparkSql")config.set("es.nodes.wan.only","true")config.set("es.nodes","https://地址:9200")// config.set("es.nodes","地址")//config.set("es.port","9200")//println(InetAddress.getByName("域名地址").getHostAddress)//config.set("es.node",InetAddress.getByName("域名地址").getHostAddress)config.set("es.mapping.id","app_id") //采用update方式需要指定mapping列config.set("es.index.auto.create", "true")//config.set("es.write.operation", "index") //insert更新方式config.set("es.write.operation", "upsert")  //update更新方式config.set("es.net.http.auth.user", "user") //访问es的用户名config.set("es.net.http.auth.pass", "password") //访问es的密码val spark = SparkSession.builder.config(config).getOrCreate()val jdbcUrl = "jdbc:mysql://10.0.0.10:5457/db_ex_after_sales?zeroDateTimeBehavior=convertToNull"def dbConnProperties(user:String, pass:String):Properties = {
    val ConnProperties = new Properties();ConnProperties.put("driver", "com.mysql.jdbc.Driver");ConnProperties.put("user", user);ConnProperties.put("password", pass);return ConnProperties;}val dbUser = "user"val dbPass = "password"val readConnProperties = dbConnProperties(dbUser,dbPass);val sql = "select concat(t1.app_id,'tag_merchant') as app_id ,version_type ,sum_count from t_app_base_info t1 left join t_customer_deliver_records t2 on t1.app_id=t2.app_id limit 1"var df: DataFrame = spark.read.jdbc(jdbcUrl, s"(${sql}) t", readConnProperties)EsSparkSQL.saveToEs(df,"index1/type1" )}}

2.java版本

import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.elasticsearch.spark.sql.EsSparkSQL;import java.util.Properties;public class SparkMain {
    public static void main(String[] args) {
    SparkConf config = new SparkConf().setAppName("OCRMSparkSql").setMaster("local[4]");config.set("es.nodes.wan.only","true");config.set("es.nodes","10.0.0.1:9200");config.set("es.mapping.id","app_id");config.set("es.index.auto.create", "true");//config.set("es.write.operation", "index") //更新方式为插入config.set("es.write.operation", "upsert") ; //更新方式为updateconfig.set("es.net.http.auth.user", "user") ; //访问es的用户名config.set("es.net.http.auth.pass", "password") ; //访问es的密码SparkSession spark = SparkSession.builder().config(config).getOrCreate();String jdbcUrl = "jdbc:mysql://10.0.0.10:5457/db_ex_after_sales?zeroDateTimeBehavior=convertToNull" ;
zeroDateTimeBehavior=convertToNull用这个是因为数据里的时间戳默认为0000-00-00 00:00:00 这个会报错  所以采用这种Properties properties = new Properties();properties.put("driver", "com.mysql.jdbc.Driver");properties.put("user", "user");properties.put("password", "password");String sql = "select concat(t1.app_name,'tag_merchant') as app_id,t1.merchant_id,t1.version_type,t1.authentic_state,t1.authentic_body,t1.sign_time ,t1.expire_time,t1.opened_time," +"t1.sell_stage,t1.sell_follower_id,t1.sum_count,t1.pay_count,t1.sum_income,t1.consume,t1.seven_active_days,t1.thirty_active_days,t1.sum_active_days,t1.industry_type,t1.active,t1.trans_time,t1.pay_time,t1.pay_money,t2.deliver_staff_id from from t_app_base_info t1 left join t_customer_deliver_records t2 on t1.app_id=t2.app_id " ;SQLContext sqlContext = spark.sqlContext();DataFrameReader reader = sqlContext.read().format("jdbc");Dataset<Row> app_base_info = reader.jdbc(jdbcUrl, "t_app_base_info", properties);Dataset<Row> t_customer_deliver_records = reader.jdbc(jdbcUrl, "t_customer_deliver_records", properties);app_base_info.createOrReplaceTempView("t_app_base_info");t_customer_deliver_records.createOrReplaceTempView("t_customer_deliver_records");Dataset<Row> data = spark.sql(sql);EsSparkSQL.saveToEs(data,"ocrm-tags-realtime/ocrm-tags-realtime-type" ) ;}
}
  相关解决方案