当前位置: 代码迷 >> 综合 >> wordcount- scala
  详细解决方案

wordcount- scala

热度:77   发布时间:2024-02-20 19:23:32.0

一、reduce集合高级函数

package com.atguigu.scala.chapter07/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter07* Version: 1.0** Created by wushengran on 2020/9/19 9:07*/
object Test13_HighLevelFuction_Reduce {
    def main(args: Array[String]): Unit = {
    val list = List(1,2,3,4)// 1. reduceval result0 = list.sumval result1 = list.reduce( (sum, data) => sum + data )val result2 = list.reduce( _ + _ )val result3 = list.reduceLeft( _ + _ )val result4 = list.reduceRight( _ + _ )println(result1)println(result2)println(result3)println(result4)println("==========================")println(list.reduce(_ - _))    // -8, 1-2-3-4println(list.reduceRight(_ - _))    // -2, 1-(2-(3-4))val list2 = List(3,4,5,8,10)println(list2.reduce(_ - _))    // -24, 3-4-5-8-10println(list2.reduceRight(_ - _))    // 6, 3-(4-(5-(8-10)))// 2. foldval res5 = list.fold(10)(_ + _)println(res5)println(list2.foldLeft(10)(_ - _))    // -20, 10-3-4-5-8-10println(list2.foldRight(11)(_ - _))     // -5, 3-(4-(5-(8-(10-11))))}
}

二、合并map

package com.atguigu.scala.chapter07import scala.collection.mutable/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter07* Version: 1.0** Created by wushengran on 2020/9/19 10:09*/
object Test14_MergeMap {
    def main(args: Array[String]): Unit = {
    val map1 = mutable.Map("a" -> 1, "b" -> 12, "c" -> 32, "d" -> 25)val map2 = mutable.Map("a" -> 4, "b" -> 7, "d" -> 57, "e" -> 23)// 需求:合并之后得到("a" -> 5, "b" -> 19, "c" -> 32, "d" -> 82, "e" -> 23)// 用fold来实现,以map2作为初始值val result: mutable.Map[String, Int] = map1.foldLeft(map2)(// 操作函数,第一个参数aggMap是当前聚合之后的状态,保存在一个Map里;第二参数是当前数据,key-value对(aggMap, kv) => {
    // 先从数据中取出当前的key和valueval key: String = kv._1val value: Int = kv._2// 判断当前状态map中是否有key,如果有就叠加value;如果没有就直接保存//appMap通过key来获取值aggMap(key) = aggMap.getOrElse(key, 0) + valueaggMap})println(result)}
}

三、wordcount简单实现及复杂实现

package com.atguigu.scala.chapter07/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter07* Version: 1.0** Created by wushengran on 2020/9/19 10:33*/
object Test15_WordCount {
    def main(args: Array[String]): Unit = {
    // 一、简单版本val textLines: List[String] = List("hello","hello world","hello scala","hello spark","hello spark from scala","hello flink spark from scala")// 1. 分词
// val wordList: List[String] = textLines.map( _.split(" ") ).flattenval wordList: List[String] = textLines.flatMap( _.split(" ") )println(wordList)// 2. 分组(按照word本身)val groupedWordMap: Map[String, List[String]] = wordList.groupBy( word => word )println(groupedWordMap)// 3. 统计每个key的value的个数val wordCountMap: Map[String, Int] = groupedWordMap.map( kv => (kv._1, kv._2.size) )println(wordCountMap)// 4. 排序取Top Nval topNWordCountList: List[(String, Int)] =  wordCountMap.toList.sortBy( _._2 )(Ordering[Int].reverse).take(3)println(topNWordCountList)println("=======================================")// 二、复杂版本val lineCountTupleList: List[(String, Int)] = List(("hello", 1),("hello world", 2),("hello scala", 3),("hello spark from scala", 4))// 直接把每一个二元组拆开,转换成一个大的Stringval newTextLines: List[String] = lineCountTupleList.map(kv => (kv._1.trim + " ") * kv._2)println(newTextLines)// 接下来流程一样val resultWordCountList = newTextLines.flatMap( _.split(" ") ).groupBy( word => word ).map( kv => (kv._1, kv._2.length) ).toList.sortWith( _._2 > _._2 ).take(3)println(resultWordCountList)println("===============================")// 代码改进// 1. 将原行数据的元组,扩展为word的count元组val wordCountTupleList: List[(String, Int)] = lineCountTupleList.flatMap( kv => {
    val arr: Array[String] = kv._1.split(" ")println(arr)arr.map( str => (str, kv._2) )})println(wordCountTupleList)// 2. 按照word分组val newWordCountMap: Map[String, List[(String, Int)]] = wordCountTupleList.groupBy( _._1 )println(newWordCountMap)// 3. 将每个组内的所有元组的count值叠加,得到一个总count值val totalWordCountMap: Map[String, Int] = newWordCountMap.map( kv => {
    
// val keyCountList: List[Int] = kv._2.map( countKv => countKv._2 )
// (kv._1, keyCountList.sum)(kv._1, kv._2.map( _._2 ).sum)} )println(totalWordCountMap)// 排序取top nval resultList = totalWordCountMap.toList.sortWith(_._2 > _._2).take(3)println(resultList)}
}

四、队列

package com.atguigu.scala.chapter07import scala.collection.immutable.Queue
import scala.collection.mutable
import scala.collection.parallel.immutable/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter07* Version: 1.0** Created by wushengran on 2020/9/19 11:30*/
object Test16_Queue {
    def main(args: Array[String]): Unit = {
    // 1. 可变队列val que = new mutable.Queue[String]()val queue = mutable.Queue("abc", "def", "hello")println(que)que.enqueue("scala")println(que)queue.enqueue("spark")println(queue)println(queue.dequeue())println(queue.dequeue())println(queue.dequeue())println(queue)// 2. 不可变队列val que2 = Queue("a", "b", "c")val que3 = que2.enqueue("d")println(que2)println(que3)println("========================================")// 并行集合val result = (1 to 100).map{
     case _ => Thread.currentThread().getName }val parResult = (1 to 100).par.map( {
     case _ => Thread.currentThread().getName } )println(result)println(parResult)}
}

五、模式匹配

package com.atguigu.scala.chapter08/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter08* Version: 1.0** Created by wushengran on 2020/9/19 11:53*/
object Test01_PatternMatchBase {
    def main(args: Array[String]): Unit = {
    // 1. 基本定义语法val x: Int = 12val y: String = x match {
    case 1 => "One"case 2 => "Two"case 10 => "Ten"case _ => "Other"}println(y)// 2. 复杂示例val a: Int = 25val b: Int = 17def matchOpAandB(op: Char): Any = {
    op match {
    case '+' => a + bcase '-' => a - bcase '*' => a * bcase '/' => a / bcase _ => "运算符不合法"}}println(matchOpAandB('+'))println(matchOpAandB('/'))println(matchOpAandB('&'))// 3. 模式守卫def abs(num: Int): Int = {
    num match {
    case i if i > 0 => icase i if i < 0 => -icase _ => 0}}println(abs(8))println(abs(-235))println(abs(0))}
}

六、匹配类型

package com.atguigu.scala.chapter08/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter08* Version: 1.0** Created by wushengran on 2020/9/19 14:26*/
object Test02_MatchTypes {
    def main(args: Array[String]): Unit = {
    // 1. 匹配一个常量值def describeConst(x: Any): Any = x match {
    case 10 => "Int Ten"case "hello" => "String hello"case true => "Boolean true"case '+' => "Char +"case _ =>}println(describeConst(10))println(describeConst("hello"))println(describeConst(true))println(describeConst("abc"))println("===========================")// 2. 匹配类型def describeType(x: Any): String = x match {
    case i: Int => "Int " + icase s: String => "String " + scase b: Boolean => "Boolean " + bcase l: List[String] => "List " + l    // 泛型擦除case arr: Array[Int] => "Array[Int] " + arr.mkString(",")case _ => "something else"}println(describeType(10))println(describeType("hello"))println(describeType(true))println(describeType("abc"))println(describeType(List()))println(describeType(List("abc", "def")))println(describeType(List(2,3,4,6)))println(describeType(Array(2,5,2,65)))println(describeType(Array("hello", "world")))// 3. 匹配数组val arrList: List[Any] = List(Array(0),Array(0, 1),Array(1, 0),Array(0, 1, 0),Array(1, 1, 0),Array(35, 47, 96, 45),Array("hello", "world"))def describeArray(arr: Any): String = arr match {
    case Array(0) => "Array(0)"case Array(x, y) => "Array 2: " + x + " " + ycase Array(0, _*) => "Array with 0 start"case Array(_, 1, _) => "Array 3 with 1 in middle"case _ => "something else"}for( arr <- arrList ) println(describeArray(arr))println(describeArray(Array('c', 'a')))println("===========================")// 4. 匹配列表for( list <- List(List(0),List(0, 1),List(1, 0),List(0, 1, 0),List(1, 1, 0),List(35, 47, 96, 45),List("hello"),List('a', 'b'),Map(('a', 1))) ){
    val description: String = list match {
    case List(0) => "List(0)"case List(_, _) => "List 2 elements"case List(0, _*) => "List with 0 start"case List(a) => "List " + acase _ => "something else"}println(description)}println("======================================")val list1: List[Int] = List(2, 45, 62, 17, 56, 91)val list2 = List(1,2,3)val list3 = List(1,2)val list4 = List(1)list4 match {
    case first :: second :: rest => println(s"first: $first \t second: $second \t rest: $rest")case _ => println("something else")}println("=========================")// 5. 匹配元组for( tuple <- List((0, 0),(0, 1),(1, 0),(1, 1),(1, 0, 2),("hello", 1, 0.5)) ){
    val result = tuple match {
    case (0, _) => "0, _"case (y, 0) => y + " ,0"case (a, b) => "二元组"case (_, 1, a) => "_, 1, " + acase _ => "其它"}println(result)}}
}

七、模式匹配扩展

package com.atguigu.scala.chapter08/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter08* Version: 1.0** Created by wushengran on 2020/9/19 15:34*/
object Test03_MatchExtendCase {
    def main(args: Array[String]): Unit = {
    // 1. 变量声明时的模式匹配// 元组形式val (x, y): (Int, Int) = (1, 2)val (id, name, age): (Int, String, Int) = (9527, "DongLiang", 18)// 列表形式val List(first, second, _*) = List(2, 45, 62, 17, 56, 91)// 2. for推导式中的模式匹配val list: List[(String, Int)] = List(("a", 1), ("b", 2), ("c", 3))// 本来的遍历方式for( elem <- list ) println(elem._1 + " " + elem._2)for( elem <- list ) {
    val word = elem._1val count = elem._2}// 用元组的模式匹配for( (word, count) <- list ) println(word + " " + count)// 可以不考虑某些位置for( (word, _) <- list ) println(word)// 可以直接指定某个字段的值for( ("b", count) <- list ) println("b count: " + count)}
}

八、匹配对象及样例类

package com.atguigu.scala.chapter08/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter08* Version: 1.0** Created by wushengran on 2020/9/19 15:46*/
object Test04_MatchObject {
    def main(args: Array[String]): Unit = {
    val student1 = new Student("alice", 18)val student2 = new Student("alice", 19)val student3 = new Student("bob", 18)val result = student3 match {
    case Student("alice", 18) => "Yes, this is alice 18"case _ => "No"}println(result)}
}class Student(val name: String, val age: Int)// 定义伴生对象
object Student {
    def apply(name: String, age: Int): Student = new Student(name, age)def unapply(student: Student): Option[(String, Int)] = {
    if( student == null )NoneelseSome( (student.name, student.age) )}
}
package com.atguigu.scala.chapter08/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter08* Version: 1.0** Created by wushengran on 2020/9/19 15:59*/
object Test05_MatchCaseClass {
    def main(args: Array[String]): Unit = {
    val student1 = Student1("alice", 18)val student2 = Student1("alice", 19)val student3 = Student1("bob", 18)val result = student1 match {
    case Student1("alice", 18) => "Yes, this is alice 18"case _ => "No"}println(result)}
}case class Student1( name: String, age: Int )

九、异常

package com.atguigu.scala.chapter09plus/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chaper09plus* Version: 1.0** Created by wushengran on 2020/9/19 16:27*/
object Test01_Exception {
    def main(args: Array[String]): Unit = {
    try{
    val n = 1 / 0} catch {
    case e: ArithmeticException => println("算术异常")case e: RuntimeException => println("其它异常")case _ =>} finally {
    println("异常处理结束")}println("异常处理外的逻辑")}
}

十、隐式转换

package com.atguigu.scala.chapter09plus/*** Copyright (c) 2018-2028 尚硅谷 All Rights Reserved ** Project: scala0621* Package: com.atguigu.scala.chapter09plus* Version: 1.0** Created by wushengran on 2020/9/19 16:35*/
object Test02_Implicit {
    def main(args: Array[String]): Unit = {
    // 1. 隐式函数
// implicit def convert(num: Int): MyRichInt = new MyRichInt(num)val a: Int = 23println(a.myMax(35))// 3. 隐式参数implicit val str: String = "hello world"
// implicit val str2: String = "hello world 2"def sayHello(name: String)(implicit arg: String = "good bye"): Unit ={
    println(name + " " + arg)}sayHello("alice")}// 2. 隐式类implicit class MyRichInt(val self: Int) {
    // 定义一个独特的方法def myMax(n: Int): Int = {
    if(self > n) self else n}}
} 自定义一个RichInt类
//class MyRichInt(val self: Int) {
    
// // 定义一个独特的方法
// def myMax(n: Int): Int = {
    
// if(self > n) self else n
// }
//}
  相关解决方案