scala> import org.apache.spark.sql.sparksessionimport org.apache.spark.sql.sparksession
scala> val spark=sparksession.builder().getorcreate()
spark: org.apache.spark.sql.sparksession = org.apache.spark.sql.sparksession@2bdab835
//使支援rdds轉換為dataframes及後續sql操作
scala> import spark.implicits._
import spark.implicits._
scala> val df = spark.read.json("file:///usr/local/spark/examples/src/main/resources/people.json")
df: org.apache.spark.sql.dataframe = [age: bigint, name: string]
scala> df.show()
+----+-------+
| age| name|
+----+-------+
|null|michael|
| 30| andy|
| 19| justin|
+----+-------+
// 列印模式資訊
scala> df.printschema()
root
|-- age: long (nullable = true)
|-- name: string (nullable = true)
// 選擇多列
scala> df.select(df("name"),df("age")+1).show()
+-------+---------+
| name|(age + 1)|
+-------+---------+
|michael| null|
| andy| 31|
| justin| 20|
+-------+---------+
// 條件過濾
scala> df.filter(df("age") > 20 ).show()
+---+----+
|age|name|
+---+----+
| 30|andy|
+---+----+
// 分組聚合
scala> df.groupby("age").count().show()
+----+-----+
| age|count|
+----+-----+
| 19| 1|
|null| 1|
| 30| 1|
+----+-----+
// 排序
scala> df.sort(df("age").desc).show()
+----+-------+
| age| name|
+----+-------+
| 30| andy|
| 19| justin|
|null|michael|
+----+-------+
//多列排序
scala> df.sort(df("age").desc, df("name").asc).show()
+----+-------+
| age| name|
+----+-------+
| 30| andy|
| 19| justin|
|null|michael|
+----+-------+
//對列進行重新命名
scala> df.select(df("name").as("username"),df("age")).show()
+--------+----+
|username| age|
+--------+----+
| michael|null|
| andy| 30|
| justin| 19|
+--------+----+
//使用spark sql語句
scala>df.createtempview("table1")
scala> spark.sql("select * from table1 limit 10")
以上是我們常用的dataframe的基礎操作
具體見一下部落格
sparksql官網
pyspark dataframe的常用操作
1 列重新命名 train data train data.todf imei pkgname timestamp 2 刪除某一列 df df.drop col 3 選取某些列 train data train data.filter train data.date 2021 03 01 4 自定義...
Oracle data guard常用維護操作命令
data guard是oracle提供的一種高可用性解決方案,用於資料保護和容災,通過日誌同步來把資料及時傳送到備用節點,現總結一下data guard環境下常用的維護命令 1 在生產庫停止data guard操作 sql show parameter log archive dest sql al...
JavaWeb response物件常用操作
方式一response.setcontenttype contenttype 方式二response.setheader content type contenttype response.setcontenttype mime 的作用 讓伺服器告訴瀏覽器它傳送的資料屬於什麼檔案型別,使客戶端瀏覽器...