使用網格搜尋的方式:
# -*- coding: utf-8 -*-
# 信用卡違約率分析
import pandas as pd
from sklearn.model_selection import learning_curve, train_test_split,gridsearchcv
from sklearn.preprocessing import standardscaler
from sklearn.pipeline import pipeline
from sklearn.metrics import accuracy_score
from sklearn.svm import svc
from sklearn.tree import decisiontreeclassifier
from sklearn.ensemble import randomforestclassifier
from sklearn.neighbors import kneighborsclassifier
from matplotlib import pyplot as plt
import seaborn as sns
# 資料載入
data = pd.read_csv(
'./uci_credit_card.csv'
)# 資料探索
print
(data.shape)
# 檢視資料集大小
print
(data.describe())
# 資料集概覽
# 檢視下乙個月違約率的情況
next_month = data[
'default.payment.next.month'
].value_counts(
)print
(next_month)
df = pd.dataframe(
)plt.rcparams[
'font.sans-serif']=
['simhei'
]#用來正常顯示中文標籤
plt.figure(figsize =(6
,6))
plt.title(
'信用卡違約率客戶\n (違約:1,守約:0)'
)sns.set_color_codes(
"pastel"
)sns.barplot(x =
'default.payment.next.month'
, y=
"values"
, data=df)
locs, labels = plt.xticks(
)plt.show(
)# 特徵選擇,去掉id欄位、最後乙個結果字段即可
data.drop(
['id'
], inplace=
true
, axis =1)
#id這個字段沒有用
target = data[
'default.payment.next.month'
].values
columns = data.columns.tolist(
)columns.remove(
'default.payment.next.month'
)features = data[columns]
.values
# 30%作為測試集,其餘作為訓練集
train_x, test_x, train_y, test_y = train_test_split(features, target, test_size=
0.30
, stratify = target, random_state =1)
# 構造各種分類器
classifiers =
[ svc(random_state =
1, kernel =
'rbf'
),
decisiontreeclassifier(random_state =
1, criterion =
'gini'),
randomforestclassifier(random_state =
1, criterion =
'gini'),
kneighborsclassifier(metric =
'minkowski'),
]# 分類器名稱
classifier_names =
['svc'
,'decisiontreeclassifier'
,'randomforestclassifier'
,'kneighborsclassifier',]
# 分類器引數
classifier_param_grid =[,
,,,]
# 對具體的分類器進行gridsearchcv引數調優
defgridsearchcv_work
(pipeline, train_x, train_y, test_x, test_y, param_grid, score =
'accuracy'):
response =
gridsearch = gridsearchcv(estimator = pipeline, param_grid = param_grid, scoring = score)
# 尋找最優的引數 和最優的準確率分數
search = gridsearch.fit(train_x, train_y)
print
("gridsearch最優引數:"
, search.best_params_)
print
("gridsearch最優分數: %0.4lf"
%search.best_score_)
predict_y = gridsearch.predict(test_x)
print
("準確率 %0.4lf"
%accuracy_score(test_y, predict_y)
) response[
'predict_y'
]= predict_y
response[
'accuracy_score'
]= accuracy_score(test_y,predict_y)
return response
for model, model_name, model_param_grid in
zip(classifiers, classifier_names, classifier_param_grid)
: pipeline = pipeline([(
'scaler'
, standardscaler())
,(model_name, model)])
result = gridsearchcv_work(pipeline, train_x, train_y, test_x, test_y, model_param_grid , score =
'accuracy'
)
python信用卡違約 Python信用卡驗證
以下是luhn演算法 mod10 check 的步驟從右到左每隔兩位數。如果這個 翻倍 結果是兩位數,則將兩位數相加 得到乙個數字的數字。現在將步驟1中的所有單個數字相加。將信用卡號碼中從右到左的奇數字數相加。將步驟2和步驟3的結果相加。如果步驟4的結果可被10整除,則卡號有效 否則無效。我的輸出應...
python信用卡管理 python信用卡操作
python python開發 python語言 python信用卡操作 import datetime user dict dict 用來儲存使用者的賬號 註冊系統 def registered try print welcome to register atm system registered...
python信用卡管理 python信用卡操作
import datetime user dict dict 用來儲存使用者的賬號 註冊系統 def registered try print welcome to register atm system registered user input 請輸入您的賬號 registered pwd in...