python3 4之決策樹

2021-07-25 13:22:51 字數 2378 閱讀 4884

#!/usr/bin/env python

# coding=utf-8

import numpy as np

from sklearn import tree

from sklearn.metrics import precision_recall_curve

from sklearn.metrics import classification_report

from sklearn.cross_validation import train_test_split

import pydot

from sklearn.externals.six import stringio

defloaddataset

(): data =

label =

with open('d:python/fat.txt') as file:

for line in file:

tokens = line.strip().split(' ')

x = np.array(data)

print('x:')

print(x)

label = np.array(label)

y = np.zeros(label.shape)

y[label == 'fat'] = 1

print('y:')

print(y)

return x, y

defdecisiontreeclf

(): x, y = loaddataset()

# 拆分資料集和訓練集

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

print('x_train:');

print(x_train)

print('x_test:');

print(x_test)

print('y_train:');

print(y_train)

print('y_test:');

print(y_test)

# 使用資訊熵作為劃分標準

clf = tree.decisiontreeclassifier(criterion='entropy')

print(clf)

clf.fit(x_train, y_train)

dot_data = stringio()

with open("iris.dot", 'w') as f:

f=tree.export_graphviz(clf, out_file=f)

tree.export_graphviz(clf, out_file=dot_data)

graph = pydot.graph_from_dot_data(dot_data.getvalue())

graph[0].write_pdf("ex.pdf")

# image(graph.create_png())

# 列印特徵在分類起到的作用性

print(clf.feature_importances_)

# 列印測試結果

answer = clf.predict(x_train)

print('x_train:')

print(x_train)

print('answer:')

print(answer)

print('y_train:')

print(y_train)

print('計算正確率:')

print(np.mean(answer == y_train))

# 準確率與召回率

precision, recall, thresholds = precision_recall_curve(y_train, clf.predict(x_train)

) answer = clf.predict_proba(x)[:, 1]

print(classification_report(y, answer, target_names=['thin', 'fat']))

decisiontreeclf()

# print('ll')

資料集fat.txt檔案內容如下:

1.5 50 thin

1.5 60 fat

1.6 40 thin

1.6 60 fat

1.7 60 thin

1.7 80 fat

1.8 60 thin

1.8 90 fat

1.9 70 thin

1.9 80 fat

決策樹之CART

本系列分享由三篇部落格組成,建議從前往後閱讀學習。決策樹之id3 決策樹之c4.5 決策樹之cart 前面我們講到了決策樹演算法id3,和c4.5。c4.5是在id3的基礎上發展而來的,c4.5還存在的缺陷有 1 c4.5不能解決回歸的問題。2 c4.5需要進行多個對數計算,效率比較低。3 對於離散...

SKlearn之決策樹

決策樹是一種非引數的監督學習方法。模組 sklearn.tree sklearn建模的步驟 1 選擇並建立模型 例 clf tree.decisiontreeclassifier 2 提供資料訓練模型 例 clf clf.fit x train,y train 3 獲取需要的資訊 例 result ...

Python3 4安裝日記

環境 python3.4.0,win32 安裝過python2.7,然而謎之原因解除安裝了,其實2.7和3.4可以共存。在登錄檔和path內刪除了python2.7的記錄,然後安裝py3.4。尚未找到原因,估計是某個lib過於古老。解除安裝重新安裝python 3.4,可以使用pip 始終建議使用p...