決策樹一般採用整合,具有隨機,不純度最優
from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split#用於劃分訓練測試集數目
wine=load_wine()
x_train,x_test,y_train,y_test=train_test_split(wine.data,wine.target,test_size=0.3)
#help(train_test_split)
建模型
#bulid model
clf=tree.decisiontreeclassifier( criterion="entropy"
,random_state=30)#隨機數,決策樹是隨機的,設定了該引數,則模型不變
#help(tree.decisiontreeclassifier)
clf = clf.fit(x_train, y_train)
score=clf.score(x_test,y_test)
clf.predict(x_test)#返回標籤
#視覺化
import pandas as pd
pd.concat([pd.dataframe(wine.data),pd.dataframe(wine.target)],axis=1)
import graphviz
dot_data = tree.export_graphviz(clf
,feature_names=wine.feature_names
,class_names=wine.target_names
,filled=true
,rounded=true
, out_file=none)
#help(tree.export_graphviz)
graph = graphviz.source(dot_data)
graph.render("iris")
clf.feature_importances_
[*zip(wine.feature_names,clf.feature_importances_)]
clf=tree.decisiontreeclassifier( criterion="entropy"
,random_state=30
,splitter='random')
剪枝
import matplotlib.pyplot as plt
test=
for i in range (10):
clf=tree.decisiontreeclassifier( criterion="entropy"
,random_state=30
,splitter='random'
,max_depth=i+1
#,min_samples_leaf =10
#,min_impurity_decrease=3)
)clf = clf.fit(x_train, y_train)
#score1=clf.score(x_train, y_train)
score=clf.score(x_test,y_test)
plt.plot(range(1,11),test,color='red',label='max_depth')
plt.show()
sklearn學習筆記
1.波士頓房價線性回歸模型 from sklearn import datasets from sklearn.linear model import linearregression boston datasets.load boston data x boston.data y boston.t...
sklearn學習筆記
from sklearn.datasets import load iris from sklearn.model selection import train test split from sklearn.feature extraction import dictvectorizer from...
sk learn 學習筆記八
感知器通常用下面的圖形表示 x1,和是輸入單元。每個輸入單元分別代表乙個特徵。感知器通常用另外乙個輸入單元代表乙個常用誤差項,但是這個輸入單元在圖形中通常被忽略了。中間的圓圈是乙個計算單元,類似神經元的細胞核。連線輸入單元和計算單元的邊類似於樹突。每條邊是乙個權重,或者是乙個引數。引數容易解釋,如果...