Python中scikit learn資料轉換

# coding: utf-8
import sys
import numpy
from sklearn import metrics
from sklearn.feature_extraction.text import hashingvectorizer
from sklearn.feature_extraction.text import tfidfvectorizer
from sklearn.*****_bayes import multinomialnb
from sklearn.feature_extraction.text import  countvectorizer,tfidftransformer
from sklearn.neighbors import kneighborsclassifier
from sklearn.svm import svc
import codecs
'''train_words = [
'我們 我們 長城 故宮 。 就是 中國',
'我們 好孩子 長城 。 中國',
'我們 好孩子 泡菜 。 孤獨 南韓',
'我們 泡菜 認證 。 南韓',
]train_tags = ['中國','中國','南韓','南韓']
test_words = [
'我 泡菜 南韓 好吃',
'長城 好孩子 認證 。 中國',
]test_tags = ['中國','南韓']
'''train_words = [
'長城 故宮 就是 中國',
'我們 中國',
'我們 好孩子 泡菜 。 孤獨 南韓',    
]train_tags = ['中國','中國','南韓']
test_words = [
'我 泡菜 南韓 好吃',     
]test_tags = ['南韓']
train_words, train_tags, test_words, test_tags
def vectorize(train_words, test_words):
#v = hashingvectorizer(n_features=10, non_negative=true)
v = hashingvectorizer(non_negative=true)
#v = countvectorizer(min_df=1)
train_data = v.fit_transform(train_words)
test_data = v.fit_transform(test_words)
#print v.
return train_data, test_data
'''tf_idf
'''def vectorize1(train_words, test_words):
tv = tfidfvectorizer(sublinear_tf = true,max_df = 0.5);
tv = tfidfvectorizer(sublinear_tf = true);
train_data = tv.fit_transform(train_words);
tv2 = tfidfvectorizer(vocabulary = tv.vocabulary_);
test_data = tv2.fit_transform(test_words);
print ' '.join(tv2.get_feature_names())
return train_data, test_data
'''tf_idf
'''def vectorize2(train_words, test_words):
count_v1= countvectorizer(stop_words = 'english', max_df = 0.5);  
counts_train = count_v1.fit_transform(train_words);  
count_v2 = countvectorizer(vocabulary=count_v1.vocabulary_);  
counts_test = count_v2.fit_transform(test_words);  
tfidftransformer = tfidftransformer();  
train_data = tfidftransformer.fit(counts_train).transform(counts_train);  
test_data = tfidftransformer.fit(counts_test).transform(counts_test);
return train_data, test_data
def main():
#print len(train_words), len(train_tags), len(test_words), len(test_words),
train_data, test_data = vectorize1(train_words, test_words)
print type(train_data)
print train_data.toarray()
print train_data.shape
print test_data.toarray()
print test_data.shape
'''count_v1= countvectorizer(stop_words = 'english', max_df = 0.5);
counts_train = count_v1.fit_transform(train_words);
print counts_train
'''if __name__ == '__main__':
main()

Windows環境下安裝scikit learn

scikit learn是python的乙個機器學習庫，請按照以下步驟進行安裝。1 首先確保你的機器安裝了python並且配置好了環境變數。2 安裝pip 3 安裝numpy mkl 安裝命令 pip install numpy 1.12.0 mkl cp35 cp35m win amd64.whl...

python中 python中的與

這一部分首先要理解python記憶體機制，python中萬物皆物件。對於不可變物件，改變了原來的值，其別名變數名繫結到了新值上面,id肯定會改變對於可變物件，操作改變了值，id肯定會變，而是本地操作，其值原地修改對於號操作，可變物件和不可變物件呼叫的都是 add 操作對於號操作，可變...

python中否定for 在python中否定函式

有沒有一種方法可以否定乙個函式，使它返回負數。在我的函式中，我有條件句，每個條件句都讓這個烏龜移動。有沒有一種方法可以否定這一點，所以烏龜的每乙個動作都是否定的。我說的是狀況在def ttinterpret program interpret program as a tinyturtle ...

Python中scikit learn資料轉換

Windows環境下安裝scikit learn

python中 python中的 與

python中否定for 在python中否定函式

相關推薦

python中 python中的與