機器學習統計學學習方法決策樹

決策樹的定義：

分類決策模型是一種描述對例項金星分類的樹形結構。決策樹是由節點和有向邊組成的。節點有兩種型別，分別是內部節點和葉節點。內部節點表示乙個特徵或是是屬性，葉節點表示乙個類。

一般而言：決策樹學習是由特徵選擇，決策樹生成和剪枝的三個過程。

在特徵的選擇上是採用資訊增益或者說資訊增益比來進行區分的。這就分別對應著決策樹生成上的兩種不同的演算法，即id3和c4.5兩種型別。

在其c++ 的實現上是我的重點

#include #include #include #include #include #include using namespace std;

#define maxlen 6//輸入每行的資料個數

//多叉樹的實現

//1 廣義表

//2 父指標表示法，適於經常找父結點的應用

//3 子女鏈表示法，適於經常找子結點的應用

//4 左長子，右兄弟表示法,實現比較麻煩

//5 每個結點的所有孩子用vector儲存

//教訓:資料結構的設計很重要，本演算法採用5比較合適，同時

//注意維護剩餘樣例和剩餘屬性資訊，建樹時橫向遍歷考迴圈屬性的值，

//縱向遍歷靠遞迴呼叫

vector > state;//例項集

vector item(maxlen);//對應一行例項集

vector attribute_row;//儲存首行即屬性行資料

string end("end");//輸入結束

string yes("yes");

string no("no");

string blank("");

map> map_attribute_values;//儲存屬性對應的所有的值

int tree_size = 0;

struct node

};node * root;

//根據資料例項計算屬性與值組成的map

void computemapfrom2dvector()

if(!exited)

exited = false;

} map_attribute_values[state[0][i]] = values;

values.erase(values.begin(), values.end());

} }//根據具體屬性和值來計算熵

double computeentropy(vector > remain_state, string attribute, string value,bool ifparent)

else count[1]++;}}

done_flag = true;

} }if(count[0] == 0 || count[1] == 0 ) return 0;//全部是正例項或者負例項

//具體計算熵根據[+count[0],-count[1]],log2為底通過換底公式換成自然數底數

double sum = count[0] + count[1];

double entropy = -count[0]/sum*log(count[0]/sum)/log(2.0) - count[1]/sum*log(count[1]/sum)/log(2.0);

return entropy;}

//計算按照屬性attribute劃分當前剩餘例項的資訊增益

double computegain(vector > remain_state, string attribute)}}

} count_values.push_back(tempint); }

for(j = 0; j < values.size(); j++)

double temp_entropy;

for(j = 0; j < values.size(); j++)

return (parent_entropy - children_entropy);

}int findattrinumbyname(string attri)

cerr<<"can't find the numth of attribute"<> remain_state)

if(p >= n) return yes;

else return no;

}//判斷樣例是否正負性都為label

bool allthesamelabel(vector > remain_state, string label)

if(count == remain_state.size()-1) return true;

else return false;

}//計算資訊增益，dfs構建決策樹

//current_node為當前的節點

//remain_state為剩餘待分類的樣例

//remian_attribute為剩餘還沒有考慮的屬性

//返回根結點指標

node * buliddecisiontreedfs(node * p, vector > remain_state, vector remain_attribute)

if (p == null)

p = new node();

//先看搜尋到樹葉的情況

if (allthesamelabel(remain_state, yes))

if (allthesamelabel(remain_state, no))

if(remain_attribute.size() == 0)

double max_gain = 0, temp_gain;

vector ::iterator max_it = remain_attribute.begin();

vector ::iterator it1;

for(it1 = remain_attribute.begin(); it1 < remain_attribute.end(); it1++)

} //下面根據max_it指向的屬性來劃分當前樣例，更新樣例集和屬性集

vector new_attribute;

vector > new_state;

for(vector ::iterator it2 = remain_attribute.begin(); it2 < remain_attribute.end(); it2++)

//確定了最佳劃分屬性，注意儲存

p->attribute = *max_it;

vector values = map_attribute_values[*max_it];

int attribue_num = findattrinumbyname(*max_it);

new_state.push_back(attribute_row);

for(vector ::iterator it3 = values.begin(); it3 < values.end(); it3++)

} node * new_node = new node();

new_node->arrived_value = *it3;

if(new_state.size() == 0)

else

buliddecisiontreedfs(new_node, new_state, new_attribute);

//遞迴函式返回時即回溯時需要1 將新結點加入父節點孩子容器 2清除new_state容器

p->childs.push_back(new_node);

new_state.erase(new_state.begin()+1,new_state.end());//注意先清空new_state中的前乙個取值的樣例，準備遍歷下乙個取值樣例

} return p;

}void input()

state.push_back(item);//注意首行資訊也輸入進去，即屬性

} for(int j = 0; j < maxlen; j++)

}void printtree(node *p, int depth)

}void freetree(node *p)

delete p;

tree_size++;

}int main()

computemapfrom2dvector();

root = buliddecisiontreedfs(root,remain_state,remain_attribute);

cout<<"the decision tree is :"<

統計學習方法決策樹

決策樹是一種基本的分類與回歸方法。一決策樹模型決策樹可以轉換成乙個if then規則的集合，也可以看作是定義在特徵空間劃分的類的條件概率分布特徵為變數，類為概率 cart與id3 id4.5的區別 cart假設決策樹是二叉樹，特徵取值為是或否二，決策樹的生成演算法 2.1 id3 id...

統計學習方法決策樹

決策樹學習的三個步驟特徵選擇決策樹的生成決策樹的修剪決策樹的結點內部結點表示乙個特徵或屬性，葉節點表示乙個分類決策樹的路徑或其對應的if then規則集合滿足性質互斥且完備決策樹學習本質上是從訓練資料集中歸納出一組分類規則與訓練集不相矛盾的決策樹可能有很多，我們需要的是乙個與訓練資...

統計學習方法 5 決策樹

分類決策樹模型是一種描述對例項進行分類的樹形結構。決策樹由結點和有向邊組成。結點有兩種型別內部結點和葉結點。內部結點表示乙個特徵或屬性，葉結點表示乙個類。決策樹的路徑或其對應的if then規則集合具有乙個重要的性質互斥並且完備。這就是說，每乙個例項都被一條路徑或一條規則所覆蓋，而且只被一條路徑...

機器學習 統計學學習方法 決策樹

統計學習方法 決策樹

統計學習方法 決策樹

統計學習方法 5 決策樹

相關推薦

機器學習統計學學習方法決策樹

統計學習方法決策樹

統計學習方法決策樹