網上有很多網格調參方法,筆者整理**,編寫了以下模型應用框架。不喜勿噴
import pandas as pd
import numpy as np
from sklearn.tree import decisiontreeclassifier
from sklearn.pipeline import make_pipeline
from sklearn.grid_search import gridsearchcv
from sklearn.pipeline import pipeline
train_data = pd.read_csv('c://users//holy//desktop//train_feature.csv')
test_data= pd.read_csv('c://users//holy//desktop//test_feature.csv')
ztrain_data=train_data[(train_data.is_attributed==1)]#取正樣例
n=len(ztrain_data)*2#取負樣例為正樣例的兩倍
ftrain_data=train_data[(train_data.is_attributed==0)].sample(n=n)#取n倍負樣例
df_train=pd.concat([ztrain_data,ftrain_data])#新訓練資料
df_test=test_data
#3# 使用pipeline定義文字分類問題常見的工作流,包含向量化和乙個簡單的分類器
pipeline = pipeline([
('1-dtc',decisiontreeclassifier()),
])#引數空間
parameters =
#通過gridsearchcv尋求最佳引數空間
grid_search = gridsearchcv(pipeline,parameters)#網格搜尋
clf=grid_search#本模型用線性回歸
#特徵工程
#1將day特徵變成三列
names=df_train['day'].str.split('-',expand=true)
names.columns=['year','month','day0']
df_train=df_train.join(names)#轉換日期特徵,變成三列特徵
names2=df_test['day'].str.split('-',expand=true)
names2.columns=['year','month','day0']
df_test=df_test.join(names2)#轉換日期特徵,變成三列特徵
#資料集array
x_train =np.array(df_train.drop(['is_attributed','day'],axis = 1))
x_test=np.array(df_test.drop(['is_attributed','day','click_id'],axis = 1))
y_train =np.array(df_train['is_attributed'])
y_test=np.array(df_test['is_attributed'])
#定義準確率函式
def pre_rate(res):
count = 0
for i in range(len(res)):
if (y_test[i] == res[i]):
count = count + 1
print(float(count / len(res)))
#訓練模型
clf.fit(x_train,y_train)
res=clf.best_estimator_.predict(x_test)
#誤差分析
pre_rate(res)
# 輸出最佳的分類器到底使用了怎樣的引數
best_parameters = clf.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
print("\t%s: %r" % (param_name, best_parameters[param_name]))
GridSearchCV交叉驗證
實現 基於邏輯回歸演算法 1 coding utf 8 2 3created on sat sep 1 11 54 48 201845 author zhen67 交叉驗證 8 9import numpy as np 10from sklearn import datasets 11from skl...
網格搜尋GridSearchCV引數詳細解析
一網格搜尋 尋求引數最優的一種方法 使用 from sklearn.model selection import gridsearchcv classsklearn.model selection.gridsearchcv estimator param grid scoring none fit ...
轉 網格搜尋GridSearchCV引數詳細解析
一網格搜尋 尋求引數最優的一種方法 首先為想要調參的引數設定一組候選值,然後網格搜尋會窮舉各種引數組合,根據設定的評分機制找到最好的那一組設定。使用 from sklearn.model selection import gridsearchcv class sklearn.model select...