資料讀取
import pandas as pd
import jieba
data = pd.read_csv(r"e:\資料\實驗data\messages.csv",encoding='gbk', header=0, ,names=[「id」,'label','text'])
#print(data.head())
簡訊分詞
#print(data.head())
x = data['cut_message'].values
y = data['label'].values
訓練集、測試集劃分
from sklearn.cross_validation import train_test_split
train_x,test_x,train_y,test_y = train_test_split(x,y,test_size=0.1)#測試集:訓練集 =1:9
模型訓練與**
from sklearn.feature_extraction.text import tfidftransformer,countvectorizer
vectorizer = countvectorizer()
x_train_termcounts = vectorizer.fit_transform(train_x)
tfidf_transformer = tfidftransformer()
x_train_tfidf = tfidf_transformer.fit_transform(x_train_termcounts)
from sklearn.*****_bayes import gaussiannb,multinomialnb
classifier = multinomialnb().fit(x_train_tfidf,train_y)
x_input_termcounts = vectorizer.transform(test_x)
x_input_tfidf = tfidf_transformer.transform(x_input_termcounts)
predicted_categories = classifier.predict(x_input_tfidf) #**分類
準確率、召回率
from sklearn.metrics import accuracy_score,recall_score
accuracy_s = accuracy_score(test_y,predicted_categories)
recall_s = recall_score(test_y,predicted_categories)
混淆矩陣
from sklearn.metrics import confusion_matrix
confusion_matrix(test_y,predicted_categories)
輸出一部分例項
category_map =
for sentence,category,real in zip(test_x[:10],predicted_categories[:10],test_y[:10]):
print('\nmessage_content:',sentence,'\npredicted_type:',category_map[category],'real_values:',category_map[real])
python sklearn庫實現簡單邏輯回歸
import xlrd import matplotlib.pyplot as plt import numpy as np from sklearn import model selection from sklearn.linear model import logisticregression...
Python sklearn 交叉驗證
from sklearn.datasets import load boston from sklearn.model selection import cross val score from sklearn.tree import decisiontreeregressor boston loa...
Python sklearn 中的SVM示例
coding utf 8 import pandas as pd from numpy.random import shuffle from sklearn import svm import joblib from sklearn import metrics inputfile data mom...