1.通过RDD+case class创建DataFrame
package com.doit.spark.day10
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{
DataFrame, SparkSession}
object CaseClassCreateDataFrame {
def main(args: Array[String]): Unit = {
val sparkSession: SparkSession = SparkSession.builder().appName(this.getClass().getSimpleName).master("local[*]").getOrCreate()
val lineRDD: RDD[String] = sparkSession.sparkContext.textFile("C:\\Users\\WoBo\\Desktop\\user.txt")
val userRDD: RDD[User] = lineRDD.map(x => {
val arr: Array[String] = x.split(",")
val name: String = arr(0)
val age: Int = arr(1).toInt
val fv: Double = arr(2).toDouble
User(name, age, fv)
})
val dataFrame: DataFrame = sparkSession.createDataFrame(userRDD)
dataFrame.printSchema()
dataFrame.show()
}
}
case class User(name:String,age:Int,fv :Double)
2.通过RowRDD+StructType创建DataFrame
package com.doit.spark.day10
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{
DataTypes, StructField, StructType}
import org.apache.spark.sql.{
DataFrame, Row, SparkSession}
object DataStructTypeCreateDataFrame {
def main(args: Array[String]): Unit = {
val sparkSession: SparkSession = SparkSession.builder().appName(this.getClass().getSimpleName).master("local[*]").getOrCreate()
val lineRDD: RDD[String] = sparkSession.sparkContext.textFile("C:\\Users\\WoBo\\Desktop\\user.txt")
val rowRDD: RDD[Row] = lineRDD.map(x => {
val arr: Array[String] = x.split(",")
val name: String = arr(0)
val age: Int = arr<