import numpy as np
def loaddataset():
datamat = ; labelmat =
fr = open('testset.txt')
for line in fr.readlines():
linearr = line.strip().split()
return datamat,labelmat
dataarr,labelmat = loaddataset()
##print(dataarr)
##print(labelmat)
def sigmoid(inx):
return 1.0/(1+np.exp(-inx))
def gradascent(datamatin, classlabels):
datamatrix = np.mat(datamatin) #轉化成mat
## print(datamatrix)
labelmat = np.mat(classlabels).transpose() #轉化成mat矩陣並進行轉置
## print(labelmat)
m,n = np.shape(datamatrix)
## print(m,n)
alpha = 0.001 #移動步長,或者叫做學習速率,控制更新的幅度。
maxcycles = 500 #最大迭代次數
weights = np.ones((n,1))
## print(weights)
for k in range(maxcycles):
h = sigmoid(datamatrix*weights) #梯度上公升向量化公式
#通過sigmoid函式得到h_θ(x)
## print(np.shape(h))
error = (labelmat - h) #計算真實值和與測試值的差值
#按照差值方向調整回歸係數
weights = weights + alpha * datamatrix.transpose()* error #matrix mult
return weights
gradascent(dataarr,labelmat)
def plotbestfit(weights):
import matplotlib.pyplot as plt
datamat,label = loaddataset()
dataarr = array(datamat)
## print(dataarr)
n = np.shape(dataarr)[0]
xcord1 = ;ycord1 =
xcord2 = ;ycord2 =
for i in range(n):
if int(labelmat[i]) == 1:
else:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = arange(-3.0, 3.0, 0.1)
y = (-weights[0]-weights[1]*x)/weights[2]
## print(len(x))
## print(len(y))
ax.plot(x, y)
plt.xlabel('x1'); plt.ylabel('x2');
plt.show()
##from numpy import *
##weights = gradascent(dataarr,labelmat)
##plotbestfit(weights.geta())
##print(len(weights))
##print(len(weights.geta()))
def stocgradascent0(datamatrix, classlabels):
m,n = shape(datamatrix)
alpha = 0.01
weights = ones(n) #initialize to all ones
for i in range(m):
h = sigmoid(sum(datamatrix[i]*weights))
## print(h)
error = classlabels[i] - h
weights = weights + alpha * error * datamatrix[i]
return weights
##from numpy import *
##weights = stocgradascent0(array(dataarr),labelmat)
##plotbestfit(weights)
def stocgradascent1(datamatrix, classlabels, numiter=150):
m,n = shape(datamatrix)
weights = ones(n) #initialize to all ones
for j in range(numiter):
dataindex = list(range(m))
for i in range(m):
alpha = 4/(1.0+j+i)+0.0001 #apha decreases with iteration, does not
randindex = int(random.uniform(0,len(dataindex)))#go to 0 because of the constant
print(randindex)
h = sigmoid(sum(datamatrix[randindex]*weights))
error = classlabels[randindex] - h
weights = weights + alpha * error * datamatrix[randindex]
del(dataindex[randindex])
return weights
##from numpy import *
##weights = stocgradascent1(array(dataarr),labelmat)
##plotbestfit(weights)
def classifyvector(inx, weights):
prob = sigmoid(sum(inx*weights))
if prob > 0.5: return 1.0
else: return 0.0
def colictest():
frtrain = open('horsecolictraining.txt'); frtest = open('horsecolictest.txt')
trainingset = ; traininglabels =
for line in frtrain.readlines():
currline = line.strip().split('\t')
linearr =
for i in range(21):
trainweights = stocgradascent1(array(trainingset), traininglabels, 1000)
errorcount = 0; numtestvec = 0.0
for line in frtest.readlines():
numtestvec += 1.0
currline = line.strip().split('\t')
linearr =
for i in range(21):
if int(classifyvector(array(linearr), trainweights))!= int(currline[21]):
errorcount += 1
errorrate = (float(errorcount)/numtestvec)
print ("the error rate of this test is: %f" % errorrate)
return errorrate
3 機器學習實戰 第五章 logistic回歸
1.geta 方法 def plotbestfit weights datamat,labelmat loaddataset dataarr array datamat 將每個資料點的x,y座標存為矩陣的形式 n shape dataarr 0 取其行數,也即資料點的個數 畫資料點 xcord1 y...
機器學習第五章
今天看了 機器學習 的第五章神經網路部分。本章主要講述的是 1.神經元是神經網路模型的最基本的單元 2.m p神經元模型包括輸入層 隱層和輸出層。2.1輸入訊號進行加權式的連線 2.2輸入層僅接受輸入,不進行函式處理,隱層和輸出層包含神經功能元 3.比較經典的神經網路的演算法是bp演算法 標準bp演...
機器學習(第五章)
最普遍的定義是 神經網路是由具有適應性的簡單單元組成的廣泛並行互聯的網路,它的組織能夠模擬生物神經系統對真實世界物體做出的互動反應。神經網路的最基本成分是神經元模型。在生物網路中,神經元的活動是以0或1的狀態存在的,乙個神經元可以接受多個神經元的訊號輸入,當所有輸入超出閾值時,接受輸入的神經元就會發...