上面這段用的是隨機梯度下降演算法,原來無法收斂是因為1)1樓提到的error1沒有復位置零和下標問題;2)步長alpha和誤差epsilon的取值不合適。aplha和epsilon的取值感覺比較關鍵,我用原先的輸入資料,怎麼調整這兩個引數也沒法收斂,換了一組數(也就是上面這段**裡的訓練資料)很快就得到結果了。import sys
#training data set
#each element in x represents (x0,x1,x2)
x = [(1,0.,3) , (1,1.,3) ,(1,2.,3), (1,3.,2) , (1,4.,4)]
#y[i] is the output of y = theta0 * x[0] + theta1 * x[1] +theta2 * x[2]
y = [95.364,97.217205,75.195834,60.105519,49.342380]
epsilon = 0.0001
#learning rate
alpha = 0.01
diff = [0,0]
max_itor = 1000
error1 = 0
error0 =0
cnt = 0
m = len(x)
#init the parameters to zero
theta0 = 0
theta1 = 0
theta2 = 0
while true:
cnt = cnt + 1
#calculate the parameters
for i in range(m):
diff[0] = y[i]-( theta0 + theta1 * x[i][1] + theta2 * x[i][2] )
theta0 = theta0 + alpha * diff[0] * x[i][0]
theta1 = theta1 + alpha * diff[0]* x[i][1]
theta2 = theta2 + alpha * diff[0]* x[i][2]
#calculate the cost function
error1 = 0
for lp in range(len(x)):
error1 += ( y[i]-( theta0 + theta1 * x[i][1] + theta2 * x[i][2] ) )**2/2
if abs(error1-error0) < epsilon:
error0 = error1
print ' theta0 : %f, theta1 : %f, theta2 : %f, error1 : %f'%(theta0,theta1,theta2,error1)
print 'done: theta0 : %f, theta1 : %f, theta2 : %f'%(theta0,theta1,theta2)
#training data set
data1 = [(0.000000,95.364693) ,
(1.000000,97.217205) ,
(3.000000,60.105519) ,
(11.000000, -4.383926),
data2 = [(2104.,400.),
def create_hypothesis(theta1, theta0):
return lambda x: theta1*x + theta0
def linear_regression(data, learning_rate=0.001, variance=0.00001):
""" takes a set of data points in the form: [(1,1), (2,2), ...] and outputs (slope, y0). """
#init the parameters to zero
theta0_guess = 1.
theta1_guess = 1.
theta0_last = 100.
theta1_last = 100.
m = len(data)
while (abs(theta1_guess-theta1_last) > variance or abs(theta0_guess - theta0_last) > variance):
theta1_last = theta1_guess
theta0_last = theta0_guess
hypothesis = create_hypothesis(theta1_guess, theta0_guess)
theta0_guess = theta0_guess - learning_rate * (1./m) * sum([hypothesis(point[0]) - point[1] for point in data])
theta1_guess = theta1_guess - learning_rate * (1./m) * sum([ (hypothesis(point[0]) - point[1]) * point[0] for point in data])
return ( theta0_guess,theta1_guess )
points = [(float(x),float(y)) for (x,y) in data1]
res = linear_regression(points)
print res
