import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# 匯入資料
col_names = ["id","k1k2驅動訊號","電子鎖驅動訊號","急停訊號","門禁訊號","thdv-m","thdi-m","label"]
data = pd.read_csv("data_train.csv",names=col_names)
dataset_x = data[["k1k2驅動訊號","電子鎖驅動訊號","急停訊號","門禁訊號","thdv-m","thdi-m"]].as_matrix()
dataset_y = data[["label"]].as_matrix()
dataset_y = np.array(dataset_y).reshape(len(dataset_y))
x_train, x_test, y_train, y_test = train_test_split(dataset_x, dataset_y,test_size=0.2,)
# create dataset for lightgbm
lgb_train = lgb.dataset(x_train, y_train)
lgb_eval = lgb.dataset(x_test, y_test, reference=lgb_train)
param =
param['is_unbalance']='true'
param['metric'] = 'auc'
print('start training...')
gbm = lgb.train(param,
lgb_train,
num_boost_round=500,
valid_sets=lgb_eval,
)print('start predicting...')
y_predict_test = gbm.predict(x_test)
print(y_predict_test)
def read_data():
col_names = ["id", "k1k2驅動訊號", "電子鎖驅動訊號", "急停訊號", "門禁訊號", "thdv-m", "thdi-m"]
data = pd.read_csv("data_test.csv", names=col_names)
# print(data.info())
data = data.fillna(0)
return data[["k1k2驅動訊號", "電子鎖驅動訊號", "急停訊號", "門禁訊號", "thdv-m", "thdi-m"]],data["id"]
subdata ,id = read_data()
y_predict = gbm.predict(subdata)
print(y_predict)
for i in range(len(y_predict)):
if y_predict[i]>0.5:
y_predict[i]=1
else:
y_predict[i]=0
y_predict = [int(item) for item in y_predict]
print(y_predict)
submission = pd.dataframe()
submission.to_csv("lgb_test1234.csv",index=none,header=none)
使用KNN演算法實現水果分類器
還是水果分類原始資料,這次使用knn演算法實現水果分類器。k值選擇1 3 5 7,看 結果。結果截選如下 k 1時,整體準確率 accuracy 是 66.67 值是 0 真實值是 0 值是 3 真實值是 3 值是 2 真實值是 2 k 3時,整體準確率 accuracy 是 75.00 值是 0 ...
python使用knn實現特徵向量分類
這是乙個使用knn把特徵向量進行分類的demo。knn演算法的思想簡單說就是 看輸入的sample點周圍的k個點都屬於哪個類,哪個類的點最多,就把sample歸為哪個類。也就是說,訓練集是一些已經被手動打好標籤的資料,knn會根據你打好的標籤來挖掘同類物件的相似點,從而推算sample的標籤。knn...
Matlab使用KNN分類
knn是通過測量不同特徵值之間的距離進行分類。它的思路是 如果乙個樣本在特徵空間中的k個最相似 即特徵空間中最鄰近 的樣本中的大多數屬於某乙個類別,則該樣本也屬於這個類別,其中k通常是不大於20的整數。如下圖,綠色圓要被決定賦予哪個類,是紅色三角形還是藍色四方形?如果k 3,由於紅色三角形所佔比例為...