in [183]:
def loaddataset():
datamat =
labelmat =
fr = open('testset.txt')
for line in fr.readlines():
linearr = line.strip().split()
return datamat,labelmat
in [184]:
def sigmoid(inx):
return 1.0/(1+exp(-inx))
批量梯度下降
in [185]:
def gradascent(datamatin, classlabels):
datamatrix = mat(datamatin)
labelmat = mat(classlabels).transpose()
m,n = shape(datamatrix)
alpha = 0.001
maxcycles = 500
weights = ones((n,1))
for k in range(maxcycles):
h = sigmoid(datamatrix*weights) # h是乙個矩陣
error = (labelmat - h)
weights = weights + alpha * datamatrix.transpose() * error
return weights
隨機梯度下降
in [186]:
def stocgradascent0(datamatrix, classlabels):
m,n = shape(datamatrix)
alpha = 0.01
weights = ones(n)
#weights = [0.1,0.1,0.1]
for i in range(m):
h = sigmoid(sum(datamatrix[i]*weights))# h是乙個數值
print datamatrix[i]
print weights
print datamatrix[i]*weights
error = classlabels[i] - h
weights = weights + alpha * error * datamatrix[i]
return weights
sum()的引數是乙個list 下面是改進的隨機梯度上公升演算法:
in [187]:
def stocgradascent1(datamatrix, classlabels, numiter=150):
m,n = shape(datamatrix)
weights = ones(n)
#weights = [0.1,0.1,0.1]
for j in range(numiter):
dataindex = range(m)
for i in range(m):
alpha = 4/(1.0+j+i)+0.01
randindex = int(random.uniform(0,len(dataindex)))
h = sigmoid(sum(datamatrix[randindex]*weights))# h是乙個數值
error = classlabels[randindex] - h
weights = weights + alpha * error * datamatrix[randindex]
del(dataindex[randindex])
return weights
in [188]:
#import logregres
in [189]:
dataarr,labelmat = loaddataset()
in [190]:
#weights=gradascent(dataarr,labelmat)
weights=stocgradascent1(array(dataarr),labelmat,500)
in [191]:
def plotbestfit(wei):
import matplotlib.pyplot as plt
weights = wei
datamat,labelmat = loaddataset()
dataarr = array(datamat)
n = shape(dataarr)[0]
xcord1 = ; ycord1 =
xcord2 = ; ycord2 =
for i in range(n):
if int(labelmat[i])==1:
else:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1,ycord1,s=30,c='red',marker='s')
ax.scatter(xcord2,ycord2,s=30,c='green')
x = arange(-3.0,3.0,0.1)
y = (-weights[0]-weights[1]*x)/weights[2]
ax.plot(x,y)
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()
h = subplot(m,n,p)/subplot(mnp) 將figure劃分為m×n塊,在第p塊建立座標系,並返回它的控制代碼。當m,n,p<10時,可以簡化為subplot(mnp)或者subplot mnp (注:subplot(m,n,p)或者subplot(mnp)此函式最常用:subplot是將多個圖畫到乙個平面上的工具。其中,m表示是圖排成m行,n表示圖排成n列,也就是整個figure中有n個圖是排成一行的,一共m行,如果第乙個數字是2就是表示2行圖。p是指你現在要把曲線畫到figure中哪個圖上,最後乙個如果是1表示是從左到右第乙個位置。 )
in [192]:
from numpy import *
#reload
print weights
plotbestfit(weights)
Logistic回歸演算法(梯度上公升)
logistic回歸演算法是乙個最優化演算法,回歸就是擬合的過程。logistic回歸的思想就是利用現有資料對分類邊界建立線性回歸公式,今天我們用這個演算法來解決二值分類問題。from numpy import def loaddataset datamat labelmat fr open tes...
Logistic回歸(隨機梯度上公升)
由於梯度上公升優化演算法在每次更新資料集時都需要遍歷整個資料集,計算複雜都較高,這裡有乙個隨機梯度上公升演算法也可以求得回歸係數,這種演算法一次只用乙個樣本點來更新回歸係數。def stocgradascent0 datamatrix,classlabels m,n shape datamatrix...
梯度上公升法求解Logistic回歸
對率函式h 1 1 e z z t x 1 p p yi p 1 yi 極大似然函式為 p p yi p 1 yi 假定p h xi 則p 1 h xi 則 p p yi p 1 yi h xi yi 1 h xi 1 yi 兩邊同時取對數,則 lnp ln h xi yi 1 h xi 1 yi ...