Logistic 回歸梯度上公升優化函式

in [183]:

def loaddataset():

datamat =

labelmat =

fr = open('testset.txt')

for line in fr.readlines():

linearr = line.strip().split()

return datamat,labelmat

in [184]:

def sigmoid(inx):

return 1.0/(1+exp(-inx))

批量梯度下降

in [185]:

def gradascent(datamatin, classlabels):

datamatrix = mat(datamatin)

labelmat = mat(classlabels).transpose()

m,n = shape(datamatrix)

alpha = 0.001

maxcycles = 500

weights = ones((n,1))

for k in range(maxcycles):

h = sigmoid(datamatrix*weights) # h是乙個矩陣

error = (labelmat - h)

weights = weights + alpha * datamatrix.transpose() * error

return weights

隨機梯度下降

in [186]:

def stocgradascent0(datamatrix, classlabels):

m,n = shape(datamatrix)

alpha = 0.01

weights = ones(n)

#weights = [0.1,0.1,0.1]

for i in range(m):

h = sigmoid(sum(datamatrix[i]*weights))# h是乙個數值

print datamatrix[i]

print weights

print datamatrix[i]*weights

error = classlabels[i] - h

weights = weights + alpha * error * datamatrix[i]

return weights

sum()的引數是乙個list 下面是改進的隨機梯度上公升演算法：

in [187]:

def stocgradascent1(datamatrix, classlabels, numiter=150):

m,n = shape(datamatrix)

weights = ones(n)

#weights = [0.1,0.1,0.1]

for j in range(numiter):

dataindex = range(m)

for i in range(m):

alpha = 4/(1.0+j+i)+0.01

randindex = int(random.uniform(0,len(dataindex)))

h = sigmoid(sum(datamatrix[randindex]*weights))# h是乙個數值

error = classlabels[randindex] - h

weights = weights + alpha * error * datamatrix[randindex]

del(dataindex[randindex])

return weights

in [188]:

#import logregres

in [189]:

dataarr,labelmat = loaddataset()

in [190]:

#weights=gradascent(dataarr,labelmat)

weights=stocgradascent1(array(dataarr),labelmat,500)

in [191]:

def plotbestfit(wei):

import matplotlib.pyplot as plt

weights = wei

datamat,labelmat = loaddataset()

dataarr = array(datamat)

n = shape(dataarr)[0]

xcord1 = ; ycord1 =

xcord2 = ; ycord2 =

for i in range(n):

if int(labelmat[i])==1:

else:

fig = plt.figure()

ax = fig.add_subplot(111)

ax.scatter(xcord1,ycord1,s=30,c='red',marker='s')

ax.scatter(xcord2,ycord2,s=30,c='green')

x = arange(-3.0,3.0,0.1)

y = (-weights[0]-weights[1]*x)/weights[2]

ax.plot(x,y)

plt.xlabel('x1')

plt.ylabel('x2')

plt.show()

h = subplot(m,n,p)/subplot(mnp) 將figure劃分為m×n塊，在第p塊建立座標系，並返回它的控制代碼。當m,n,p<10時，可以簡化為subplot(mnp)或者subplot mnp （注：subplot（m,n,p）或者subplot（mnp）此函式最常用：subplot是將多個圖畫到乙個平面上的工具。其中，m表示是圖排成m行，n表示圖排成n列，也就是整個figure中有n個圖是排成一行的，一共m行，如果第乙個數字是2就是表示2行圖。p是指你現在要把曲線畫到figure中哪個圖上，最後乙個如果是1表示是從左到右第乙個位置。）

in [192]:

from numpy import *

#reload

print weights

plotbestfit(weights)

Logistic 回歸梯度上公升優化函式

Logistic回歸演算法（梯度上公升）

Logistic回歸（隨機梯度上公升）

梯度上公升法求解Logistic回歸

Logistic 回歸梯度上公升優化函式

Logistic回歸演算法（梯度上公升）

Logistic回歸（隨機梯度上公升）

梯度上公升法求解Logistic回歸

相關推薦