准备orders.json文件
{"id":"1", "userId":"1", "userName":"Join", "totalPrice":80.0,"qty":3.0}
{"id":"2", "userId":"1", "userName":"Join", "totalPrice":50.0,"qty":3.0}
{"id":"3", "userId":"2", "userName":"Jeffy", "totalPrice":200.0,"qty":3.0}
{"id":"4", "userId":"99999", "userName":"zombie", "totalPrice":222.0,"qty":3.0}
演示如下api
groupBy().count()
groupBy().avg()
groupBy().max()
groupBy().min()
groupBy().sum()
groupBy().mean()
代码示例
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;public class test_28_1 {public static void main(String[] args) {SparkSession spark = SparkSession.builder().config("spark.driver.host", "localhost").appName("GroupApiTest").master("local").getOrCreate();spark.sparkContext().setLogLevel("ERROR");Dataset<Row> ordersDataSet = spark.read().json(Utils.BASE_PATH + "/join/orders.json");ordersDataSet.show();/*+---+---+----------+------+--------+| id|qty|totalPrice|userId|userName|+---+---+----------+------+--------+| 1|3.0| 80.0| 1| Join|| 2|3.0| 50.0| 1| Join|| 3|3.0| 200.0| 2| Jeffy|| 4|3.0| 222.0| 99999| zombie|+---+---+----------+------+--------+*///1: 基础的聚合函数ordersDataSet.groupBy("userId").count().show();/*+------+-----+|userId|count|+------+-----+| 1| 2|| 99999| 1|| 2| 1|+------+-----+*/ordersDataSet.groupBy("userId").avg("totalPrice", "qty").show();/*+------+---------------+--------+|userId|avg(totalPrice)|avg(qty)|+------+---------------+--------+| 1| 65.0| 3.0|| 99999| 222.0| 3.0|| 2| 200.0| 3.0|+------+---------------+--------+*/ordersDataSet.groupBy("userId").max("totalPrice", "qty").show();/*+------+---------------+--------+|userId|max(totalPrice)|max(qty)|+------+---------------+--------+| 1| 80.0| 3.0|| 99999| 222.0| 3.0|| 2| 200.0| 3.0|+------+---------------+--------+*/ordersDataSet.groupBy("userId").min("totalPrice", "qty").show();/*+------+---------------+--------+|userId|min(totalPrice)|min(qty)|+------+---------------+--------+| 1| 50.0| 3.0|| 99999| 222.0| 3.0|| 2| 200.0| 3.0|+------+---------------+--------+*/ordersDataSet.groupBy("userId").sum("totalPrice", "qty").show();/*+------+---------------+--------+|userId|sum(totalPrice)|sum(qty)|+------+---------------+--------+| 1| 130.0| 6.0|| 99999| 222.0| 3.0|| 2| 200.0| 3.0|+------+---------------+--------+*///均值ordersDataSet.groupBy("userId").mean("totalPrice", "qty").show();/*+------+---------------+--------+|userId|avg(totalPrice)|avg(qty)|+------+---------------+--------+| 1| 65.0| 3.0|| 99999| 222.0| 3.0|| 2| 200.0| 3.0|+------+---------------+--------+*/}
}