# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
definit()
: df = pd.read_csv(
"./breast-cancer.csv"
)# 處理無用列
df = df.drop(
"id",1
) df = df.drop(
"unnamed: 32",1
)# 處理標籤列
df['diagnosis'
]= df[
'diagnosis'].
map(
)# 劃分訓練集 (70%) 和測試集 (30%)
train, test = train_test_split(df, test_size =
0.3, random_state=1)
# 處理訓練資料
train_x = train.loc[:,
'radius_mean'
:'fractal_dimension_worst'
] train_y = train.loc[:,
['diagnosis']]
# 處理測試資料
test_x = test.loc[:,
'radius_mean'
:'fractal_dimension_worst'
] test_y = test.loc[:,
['diagnosis']]
# 轉換資料為np陣列
train_x = np.asarray(train_x)
train_y = np.asarray(train_y)
test_x = np.asarray(test_x)
test_y = np.asarray(test_y)
# 使用訓練資料建立邏輯回歸模型
d = model(train_x.t, train_y.t, num_of_iterations=
10000
, alpha=
0.000001
)
costs = d [
"costs"
] w = d[
"w"]
b = d[
"b"]
# 繪圖
plt.plot(costs)
plt.title(
"損失-迭代次數"
) plt.xlabel(
"迭代次數(x100)"
) plt.ylabel(
"損失"
)# 計算精確度
y_prediction_train = predict(train_x.t, w, b)
y_prediction_test = predict(test_x.t, w, b)
print
("\n訓練資料測試精確度: {}%"
.format
(100
- np.mean(np.
abs(y_prediction_train - train_y.t))*
100)
)print
("\n測試資料測試精確度: {}%"
.format
(100
- np.mean(np.
abs(y_prediction_test - test_y.t))*
100)
)
plt.show(
)# 初始化權值
definitialize
(m):
w = np.zeros(
(m,1))
b =0return w , b
# sigmoid函式
defsigmoid
(x):
return1/
(1+ np.exp(
- x)
)# 正反向傳播
defpropogate
(x, y, w, b)
:# 樣本數量
m = x.shape[1]
# 正向傳播 計算損失
z = np.dot(w.t, x)
+ b;
a = sigmoid(z)
cost=-(
1/m)
* np.
sum(y * np.log(a)+(
1-y)
* np.log(
1-a)
)# 反向傳播 計算梯度
dw =(1
/m)* np.dot(x,
(a-y)
.t) db =(1
/m)* np.
sum(a-y)
grads=
return grads, cost
# 執行梯度下降
defoptimize
(x, y, w, b, num_of_iterations, alpha)
: costs=
for i in
range
(num_of_iterations)
: grads, cost = propogate(x, y, w, b)
dw = grads[
"dw"
] db = grads[
"db"
]
w = w - alpha * dw
b = b - alpha * db
# 每十次迭代儲存乙個損耗
if i %
100==0:
print
("次迭代後的損失度: %f"
%(i, cost)
)
parameters =
grads =
return parameters, grads, costs
# 對資料集進行**
defpredict
(x, w, b)
:# 訓練集數量
m = x.shape[1]
y_prediction = np.zeros((1
,m))
w = w.reshape(x.shape[0]
,1)
a=sigmoid(np.dot(w.t, x)
+b)for i in
range
(a.shape[1]
):if(a[
0,i]
<
0.5)
: y_prediction[
0,i]=0
else
: y_prediction[
0,i]=1
return y_prediction
# 計算邏輯回歸模型
defmodel
(xtrain, ytrain, num_of_iterations, alpha)
:# 獲取特徵數量
機器學習 邏輯回歸 Python實現邏輯回歸
coding utf 8 author 蔚藍的天空tom import numpy as np import os import matplotlib.pyplot as plt from sklearn.datasets import make blobs global variable path...
邏輯回歸模型 SAS邏輯回歸模型訓練
邏輯回歸模型是金融信貸行業製作各類評分卡模型的核心,幾乎80 的機器學習 統計學習模型演算法都是邏輯回歸模型,按照邏輯美國金融公司總結的sas建模過程,大致總結如下 一般通用模型訓練過程 a 按照指定需求和模型要求製作driver資料集,包含欄位有user id,dep b 其中,空值賦預設值即 c...
線性回歸與邏輯回歸
cost functionj 12m i 1m h x i y i hypothesish x tx 梯度下降求解 為了最小化j j j 1m i 1m h x i y i x i j 每一次迭代更新 j j 1m i 1m h x i y i x i j 正規方程求解 最小二乘法 xtx 1x t...