from sklearn.datasets import load_boston # 波士頓房價資料集使用api
from sklearn.linear_model import logisticregression ##回歸**時使用的api ridge嶺回歸 logisticregression邏輯回歸
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import standardscaler ## 標準化api
from sklearn.metrics import mean_squared_error,classification_report
from sklearn.externals import joblib
import pandas as pd
import numpy as np
from sklearn.externals import joblib
defcharge_data()
:# 構造標籤名字
colums=
["colun1"
,"colum2"
,"colum3"
,"colum4"
,"colum5"
,"colum6"
,"colum7"
,"colum8"
,"colum9"
,"colum10"
,"target"
]# 讀取資料
data=pd.read_csv(
"./breast-cancer-wisconsin.data"
,names=colums)
# 缺失值處理
data=data.replace(to_replace=
"?",value=np.nan)
data=data.dropna(
)# 資料集分割
x_train,x_text,y_train,y_text=train_test_split(data[colums[1:
10]],data[colums[10]
],test_size=
0.25
)# print("特徵值,訓練集的\n",x_train)
# print("特徵值,測試集的\n",x_text)
# print("目標值,訓練集的\n",y_train)
# print("目標值,測試集的\n",y_text)
# 特徵值進行標準化處理
std=standardscaler(
) std.fit_transform(x_train)
std.transform(x_text)
# 邏輯回歸**
lg=logisticregression(c=
1.0)
lg.fit(x_train,y_train)
print
("回歸引數:"
,lg.coef_)
pre=lg.predict(x_train)
print
("**值"
,pre)
print
("準確率:"
,lg.score(x_text,y_text)
)print
("召回率:\n"
,classification_report(y_train,pre,labels=[2
,4],target_names=
["良性"
,"惡性"])
)return
none
# defget_train()
:# 構造標籤名字
colums =
["colun1"
,"colum2"
,"colum3"
,"colum4"
,"colum5"
,"colum6"
,"colum7"
,"colum8"
,"colum9"
,"colum10"
,"target"
]# 讀取資料
data = pd.read_csv(
"./breast-cancer-wisconsin.data"
, names=colums)
# 缺失值處理
data = data.replace(to_replace=
"?", value=np.nan)
data = data.dropna(
)# 資料集分割
x_train, x_text, y_train, y_text = train_test_split(data[colums[1:
10]], data[colums[10]
], test_size=
0.25
)# print("特徵值,訓練集的\n",x_train)
# print("特徵值,測試集的\n",x_text)
# print("目標值,訓練集的\n",y_train)
# print("目標值,測試集的\n",y_text)
# 特徵值進行標準化處理
std = standardscaler(
) std.fit_transform(x_train)
std.transform(x_text)
# 邏輯回歸**
lg = logisticregression(c=
1.0)
lg.fit(x_train, y_train)
joblib.dump(lg,filename=
"text.pkl"
)print
("儲存成功"
)return
none
if __name__ ==
'__main__'
:# get_train()
# lg=joblib.load("text.pkl")
# print(print("回歸引數是",lg.coef_))
colums =
["colun1"
,"colum2"
,"colum3"
,"colum4"
,"colum5"
,"colum6"
,"colum7"
,"colum8"
,"colum9"
,"colum10"
,"target"
] data=pd.read_csv(
"./breast-cancer-wisconsin.data"
,names=colums)
data=data.replace(to_replace=
"?",value=np.nan)
data=data.dropna(
) x_train,x_text,y_train,y_text=train_test_split(data[colums[1:
10]],data[colums[10]
],test_size=
0.25
) lg=joblib.load(
"text.pkl"
) pre=lg.predict(x_text)
print
("**資料是"
,pre)
print
("**準確率"
,lg.score(x_train,y_train)
)# 明天計畫:
# 1.儲存訓練結果並用訓練結果**測試集結果 判斷是否準確率為100%
# 2.利用訓練集訓練結果**訓練集 看看是不是100%
# 3.回顧以前不懂的地方
訓練模型儲存和載入(sklearn)
很多模型訓練完成之後,可以進行儲存,下次使用時直接呼叫即可,不需要再次訓練資料。接下來我將介紹sklearn中模型的儲存和載入。from sklean.externals import joblib 儲存訓練模型 joblib.dump lr,tmp test.pkl 匯入模型資料 lr2 jobl...
儲存和載入模型
在訓練模型過程中,由於資料集較大,模型訓練迭代次數較多等原因,使得模型訓練較耗時,因此將訓練好的模型進行儲存以便下次直接使用是很有必要,下面介紹兩種模型的儲存和載入方法 1.使用pickle模組 1 儲存模型 with open 模型儲存的位置 wb as f pickle.dump model,f...
模型儲存和載入
當訓練或者計算好乙個模型之後,那麼如果別人需要我們提供結果 就需要儲存模型 主要是儲存演算法的引數 使用線性模型進行 使用正規方程求解 lr linearregression 此時在幹什麼?lr.fit x train,y train 儲存訓練完結束的模型 joblib.dump lr,test.p...