adaboost基於錯誤提公升分類器效能
def stumpclassify(datamatrix,dimen,threshval,threshineq):#根據閾值分類
retarray=ones((shape(datamatrix)[0],1))
if threshineq == 'lt':
retarray[datamatrix[:,dimen] <= threshval] = -1.0
else:
retarray[datamatrix[:,dimen] > threshval] = -1.0
return retarray
def buildstump(dataarr,classlabels,d):
datamatrix = mat(dataarr); labelmat = mat(classlabels).t
m,n = shape(datamatrix)
numsteps = 10.0; beststump = {}; bestclasest = mat(zeros((m,1)))
minerror = inf #init error sum, to +infinity
for i in range(n):#loop over all dimensions
rangemin = datamatrix[:,i].min(); rangemax = datamatrix[:,i].max();
stepsize = (rangemax-rangemin)/numsteps#步長
for j in range(-1,int(numsteps)+1):#loop over all range in current dimension
for inequal in ['lt', 'gt']: #go over less than and greater than
threshval = (rangemin + float(j) * stepsize)
predictedvals = stumpclassify(datamatrix,i,threshval,inequal)#call stump classify with i, j, lessthan
errarr = mat(ones((m,1)))
errarr[predictedvals == labelmat] = 0
weightederror = d.t*errarr #根據錯誤率調權值,分錯資料權值越大
#print "split: dim %d, thresh %.2f, thresh ineqal: %s, the weighted error is %.3f" % (i, threshval, inequal, weightederror)
if weightederror < minerror:
minerror = weightederror
bestclasest = predictedvals.copy()
beststump['dim'] = i
beststump['thresh'] = threshval
beststump['ineq'] = inequal
return beststump,minerror,bestclasest
d=mat(ones((5,1))/5)#初始權值相等,總和為1
beststump,minerror,bestclasest=buildstump(datmat,classlabels,d)
基於單層決策樹的adaboost
def adaboosttrainds(dataarr,classlabels,numit=40):
weakclassarr =
m = shape(dataarr)[0]
d = mat(ones((m,1))/m) #init d to all equal
aggclassest = mat(zeros((m,1)))
for i in range(numit):
beststump,error,classest = buildstump(dataarr,classlabels,d)#build stump
#print "d:",d.t
alpha = float(0.5*log((1.0-error)/max(error,1e-16)))#計算權值,防止除零溢位
beststump['alpha'] = alpha
#print "classest: ",classest.t
expon = multiply(-1*alpha*mat(classlabels).t,classest) #exponent for d calc, getting messy
d = multiply(d,exp(expon)) #calc new d for next iteration
d = d/d.sum()
#calc training error of all classifiers, if this is 0 quit for loop early (use break)
aggclassest += alpha*classest
#print "aggclassest: ",aggclassest.t
aggerrors = multiply(sign(aggclassest) != mat(classlabels).t,ones((m,1)))
errorrate = aggerrors.sum()/m
print ("total error: ",errorrate)
if errorrate == 0.0: break
return weakclassarr
分類
def adaclassify(dattoclass,classifierarr):
datamatrix = mat(dattoclass)#do stuff similar to last aggclassest in adaboosttrainds
m = shape(datamatrix)[0]
aggclassest = mat(zeros((m,1)))
for i in range(len(classifierarr)):
classest = stumpclassify(datamatrix, classifierarr[i]['dim'],\
classifierarr[i]['thresh'],\
classifierarr[i]['ineq'])#call stump classify
aggclassest += classifierarr[i]['alpha']*classest
print (aggclassest)
return sign(aggclassest)
def loaddataset(filename): #general function to parse tab -delimited floats
numfeat = len(open(filename).readline().split('\t')) #get number of fields
datamat = ; labelmat =
fr = open(filename)
for line in fr.readlines():
linearr =
curline = line.strip().split('\t')
for i in range(numfeat-1):
return datamat,labelmat
filename='e:/ml/machinelearninginaction/ch07/horsecolictraining2.txt'
datamat,labelmat=loaddataset(filename)
testdatamat,testlabelmat=loaddataset('e:/ml/machinelearninginaction/ch07/horsecolictest2.txt')
predict=adaclassify(testdatamat,weakclassarr)
errar=mat(ones((shape(testdatamat)[0],1)))
errar[predict!=mat(testlabelmat).t].sum()/shape(testdatamat)[0]
機器學習實戰之adaboost
adaboost,是英文 adaptive boosting 自適應增強 的縮寫。它的自適應在於 前乙個基本分類器分錯的樣本會得到加強,加權後的全體樣本再次被用來訓練下乙個基本分類器。同時,在每一輪中加入乙個新的弱分類器,直到達到某個預定的足夠小的錯誤率或達到預先指定的最大迭代次數。1.演算法流程 ...
機器學習實戰5 AdaBoost
整合學習,通過構建並結合多個學習器來完成學習任務,主要分以下兩個種類 個體學習器間存在強依賴關係 必須序列生成的序列化方法。代表為boosting,如adaboost。個體學習器間不存在強依賴關係 可同時生成的並行化方法。代表為bagging和隨機森林 rf 本質上就是三個臭皮匠頂個諸葛亮,將有限個...
機器學習實戰之AdaBoost元演算法
今天學習的機器學習演算法不是乙個單獨的演算法,我們稱之為元演算法或整合演算法 ensemble 其實就是對其他演算法進行組合的一種方式。俗話說的好 三個臭皮匠,賽過諸葛亮 整合演算法有多種形式 對同一資料集,使用多個演算法,通過投票或者平均等方法獲得最後的 模型 同一演算法在不同設定下的整合 同一演...