今天主要學習了實驗 7 spark 機器學習庫 mllib 程式設計實踐,
主要**:
import org.apache.spark.ml.feature.pca在繼續這個實驗時遇到乙個問題,現在還沒解決,如圖:import org.apache.spark.sql.row
import org.apache.spark.ml.linalg.
import org.apache.spark.ml.evaluation.multiclassclassificationevaluator
import org.apache.spark.ml.
import org.apache.spark.ml.feature.
import org.apache.spark.ml.classification.logisticregression
import org.apache.spark.ml.classification.logisticregressionmodel
import org.apache.spark.ml.classification.
import org.apache.spark.sql.functions;
import spark.implicits._
case class adult(features: org.apache.spark.ml.linalg.vector, label: string)
val df = sc.textfile("adult.data.txt").map(_.split(",")).map(p =>adult(vectors.dense(p(0).todouble,p(2).todouble,p(4).todouble, p(10).todouble, p(11).todouble, p(12).todouble), p(14).tostring())).todf()
val test = sc.textfile("adult.test.txt").map(_.split(",")).map(p =>adult(vectors.dense(p(0).todouble,p(2).todouble,p(4).todouble, p(10).todouble, p(11).todouble, p(12).todouble), p(14).tostring())).todf()
val pca = new pca().setinputcol("features").setoutputcol("pcafeatures").setk(3).fit(df)
val result = pca.transform(df)
val testdata = pca.transform(test)
result.show(false)
testdata.show(false)
val labelindexer = new stringindexer().setinputcol("label").setoutputcol("indexedlabel").fit(result)
labelindexer.labels.foreach(println)
val featureindexer = new vectorindexer().setinputcol("pcafeatures").setoutputcol("indexedfeatures").fit(result)
println(featureindexer.numfeatures)
val labelconverter = new indextostring().setinputcol("prediction").setoutputcol("predictedlabel").setlabels(labelindexer.labels)
val lr = new logisticregression().setlabelcol("indexedlabel").setfeaturescol("indexedfeatures").setmaxiter(100)
val lrpipeline = new pipeline().setstages(array(labelindexer, featureindexer, lr, labelconverter))
val lrpipelinemodel = lrpipeline.fit(result)
val lrmodel = lrpipelinemodel.stages(2).asinstanceof[logisticregressionmodel]
println("coefficients: " + lrmodel.coefficientmatrix+"intercept: "+lrmodel.interceptvector+"numclasses: "+lrmodel.numclasses+"numfeatures: "+lrmodel.numfeatures)
val lrpredictions = lrpipelinemodel.transform(testdata)
val evaluator = new multiclassclassificationevaluator().setlabelcol("indexedlabel").setpredictioncol("prediction")
val lraccuracy = evaluator.evaluate(lrpredictions)
println("test error = " + (1.0 - lraccuracy))
val pca = new pca().setinputcol("features").setoutputcol("pcafeatures")
val labelindexer = new stringindexer().setinputcol("label").setoutputcol("indexedlabel").fit(df)
val featureindexer = new vectorindexer().setinputcol("pcafeatures").setoutputcol("indexedfeatures")
val labelconverter = new indextostring().setinputcol("prediction").setoutputcol("predictedlabel").setlabels(labelindexer.labels)
val lr = new logisticregression().setlabelcol("indexedlabel").setfeaturescol("indexedfeatures").setmaxiter(100)
val lrpipeline = new pipeline().setstages(array(pca, labelindexer, featureindexer, lr, labelconverter))
val paramgrid = new paramgridbuilder().addgrid(pca.k, array(1,2,3,4,5,6)).addgrid(lr.elasticnetparam, array(0.2,0.8)).addgrid(lr.regparam, array(0.01, 0.1, 0.5)).build()
val cv = new crossvalidator().setestimator(lrpipeline).setevaluator(new multiclassclassificationevaluator().setlabelcol("indexedlabel").setpredictioncol("prediction")).setestimatorparammaps(paramgrid).setnumfolds(3)
val cvmodel = cv.fit(df)
val lrpredictions=cvmodel.transform(test)
val evaluator = new multiclassclassificationevaluator().setlabelcol("indexedlabel").setpredictioncol("prediction")
val lraccuracy = evaluator.evaluate(lrpredictions)
println("準確率為"+lraccuracy)
val bestmodel= cvmodel.bestmodel.asinstanceof[pipelinemodel]
val lrmodel = bestmodel.stages(3).asinstanceof[logisticregressionmodel]
println("coefficients: " + lrmodel.coefficientmatrix + "intercept: "+lrmodel.interceptvector+ "numclasses: "+lrmodel.numclasses+"numfeatures: "+lrmodel.numfeatures)
val pcamodel = bestmodel.stages(0).asinstanceof[pcamodel]
println("primary component: " + pcamodel.pc)
經過查詢這個問題的原因是無法執行定義的函式,但是我完全按照教程中的**進行就會產生這個問題,網上沒有這個問題的解析,所以還未解決。
學習第十天
一 介面 jdk1.8及之後新增了2中可以定義存在方法體的方法 預設方法 default關鍵字修飾的方法 使用 通過實現類物件使用 靜態方法 使用 通過介面名去呼叫 二 單例模式 保證類只能存在乙個例項 餓漢式 先建立物件,然後需要的人要這個物件,保證永遠使用的都是這個建立好的物件 執行緒安全的,效...
python學習第十天
class student count 0 def init self,name,age,address self.name name self.age age self.address address student.count 1 k print k w open a.txt w encodin...
菜鳥學習第十天
1.字串最大的特點 一旦初始化就不可以改變。不可改變的字串內容而不是指向字串的引用 2.string s abc 其中s是乙個類型別變數,abc 是乙個物件。3.string s1 abc 和string s2 new string abc s1 s2 和s1.equals s2 比較的是他們在記憶...