《機器學習第五章 Logistic回歸實踐》

2021-09-12 11:25:32 字數 4288 閱讀 7229

import numpy as np

def loaddataset():

datamat = ; labelmat =

fr = open('testset.txt')

for line in fr.readlines():

linearr = line.strip().split()

return datamat,labelmat

dataarr,labelmat = loaddataset()

##print(dataarr)

##print(labelmat)

def sigmoid(inx):

return 1.0/(1+np.exp(-inx))

def gradascent(datamatin, classlabels):

datamatrix = np.mat(datamatin) #轉化成mat

## print(datamatrix)

labelmat = np.mat(classlabels).transpose() #轉化成mat矩陣並進行轉置

## print(labelmat)

m,n = np.shape(datamatrix)

## print(m,n)

alpha = 0.001 #移動步長,或者叫做學習速率,控制更新的幅度。

maxcycles = 500 #最大迭代次數

weights = np.ones((n,1))

## print(weights)

for k in range(maxcycles):

h = sigmoid(datamatrix*weights) #梯度上公升向量化公式

#通過sigmoid函式得到h_θ(x)

## print(np.shape(h))

error = (labelmat - h) #計算真實值和與測試值的差值

#按照差值方向調整回歸係數

weights = weights + alpha * datamatrix.transpose()* error #matrix mult

return weights

gradascent(dataarr,labelmat)

def plotbestfit(weights):

import matplotlib.pyplot as plt

datamat,label = loaddataset()

dataarr = array(datamat)

## print(dataarr)

n = np.shape(dataarr)[0]

xcord1 = ;ycord1 =

xcord2 = ;ycord2 =

for i in range(n):

if int(labelmat[i]) == 1:

else:

fig = plt.figure()

ax = fig.add_subplot(111)

ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')

ax.scatter(xcord2, ycord2, s=30, c='green')

x = arange(-3.0, 3.0, 0.1)

y = (-weights[0]-weights[1]*x)/weights[2]

## print(len(x))

## print(len(y))

ax.plot(x, y)

plt.xlabel('x1'); plt.ylabel('x2');

plt.show()

##from numpy import *

##weights = gradascent(dataarr,labelmat)

##plotbestfit(weights.geta())

##print(len(weights))

##print(len(weights.geta()))

def stocgradascent0(datamatrix, classlabels):

m,n = shape(datamatrix)

alpha = 0.01

weights = ones(n) #initialize to all ones

for i in range(m):

h = sigmoid(sum(datamatrix[i]*weights))

## print(h)

error = classlabels[i] - h

weights = weights + alpha * error * datamatrix[i]

return weights

##from numpy import *

##weights = stocgradascent0(array(dataarr),labelmat)

##plotbestfit(weights)

def stocgradascent1(datamatrix, classlabels, numiter=150):

m,n = shape(datamatrix)

weights = ones(n) #initialize to all ones

for j in range(numiter):

dataindex = list(range(m))

for i in range(m):

alpha = 4/(1.0+j+i)+0.0001 #apha decreases with iteration, does not

randindex = int(random.uniform(0,len(dataindex)))#go to 0 because of the constant

print(randindex)

h = sigmoid(sum(datamatrix[randindex]*weights))

error = classlabels[randindex] - h

weights = weights + alpha * error * datamatrix[randindex]

del(dataindex[randindex])

return weights

##from numpy import *

##weights = stocgradascent1(array(dataarr),labelmat)

##plotbestfit(weights)

def classifyvector(inx, weights):

prob = sigmoid(sum(inx*weights))

if prob > 0.5: return 1.0

else: return 0.0

def colictest():

frtrain = open('horsecolictraining.txt'); frtest = open('horsecolictest.txt')

trainingset = ; traininglabels =

for line in frtrain.readlines():

currline = line.strip().split('\t')

linearr =

for i in range(21):

trainweights = stocgradascent1(array(trainingset), traininglabels, 1000)

errorcount = 0; numtestvec = 0.0

for line in frtest.readlines():

numtestvec += 1.0

currline = line.strip().split('\t')

linearr =

for i in range(21):

if int(classifyvector(array(linearr), trainweights))!= int(currline[21]):

errorcount += 1

errorrate = (float(errorcount)/numtestvec)

print ("the error rate of this test is: %f" % errorrate)

return errorrate

3 機器學習實戰 第五章 logistic回歸

1.geta 方法 def plotbestfit weights datamat,labelmat loaddataset dataarr array datamat 將每個資料點的x,y座標存為矩陣的形式 n shape dataarr 0 取其行數,也即資料點的個數 畫資料點 xcord1 y...

機器學習第五章

今天看了 機器學習 的第五章神經網路部分。本章主要講述的是 1.神經元是神經網路模型的最基本的單元 2.m p神經元模型包括輸入層 隱層和輸出層。2.1輸入訊號進行加權式的連線 2.2輸入層僅接受輸入,不進行函式處理,隱層和輸出層包含神經功能元 3.比較經典的神經網路的演算法是bp演算法 標準bp演...

機器學習(第五章)

最普遍的定義是 神經網路是由具有適應性的簡單單元組成的廣泛並行互聯的網路,它的組織能夠模擬生物神經系統對真實世界物體做出的互動反應。神經網路的最基本成分是神經元模型。在生物網路中,神經元的活動是以0或1的狀態存在的,乙個神經元可以接受多個神經元的訊號輸入,當所有輸入超出閾值時,接受輸入的神經元就會發...