pom文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.smj</groupId>
<artifactId>test</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.2.2</version>
</dependency>
</dependencies>
</project>
源码1
package wordcount
import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object ScalaSparkWordCount {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.INFO)
Logger.getLogger("org.apache.hadoop").setLevel(Level.INFO)
Logger.getLogger("org.spark_project").setLevel(Level.INFO)
// 1 创建编程入口
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName("SparkWordCount")
val sc = new SparkContext(conf)
// 2 加载本地文件数据
val linesRDD:RDD[String] = sc.textFile("file:\\D:\\Workspace\\test\\src\\main\\resources\\hello.txt")
// 3 用算子对数据进行处理
val ret = linesRDD.flatMap(_.split(",")).map((_,1)).reduceByKey(_ + _)
// 打印输出
ret.foreach(t => println(t._1 + "-" + t._2))
sc.stop()
}
}
源码2
package wordcount
import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object ScalaSparkWordCount {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.INFO)
Logger.getLogger("org.apache.hadoop").setLevel(Level.INFO)
Logger.getLogger("org.spark_project").setLevel(Level.INFO)
// 1 创建编程入口
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName("SparkWordCount")
val sc = new SparkContext(conf)
// 2 加载本地文件数据
val linesRDD:RDD[String] = sc.textFile("file:\\D:\\Workspace\\test\\src\\main\\resources\\hello.txt")
// 3 用算子进行运算
val wordsRDD:RDD[String] = linesRDD.flatMap(line => line.split(","))
val pairsRDD:RDD[(String, Int)] = wordsRDD.map(word => (word, 1))
val ret:RDD[(String, Int)] = pairsRDD.reduceByKey((v1, v2) => v1 + v2)
// 打印输出
ret.foreach{case (word, count) => println(word + "-" + count)}
sc.stop()
}
}