載入imdb資料集
x_train[0]=[1,14,22,.....32] 長度為228
x_train=sequence.pad_sequences(x_train,maxlen=500)
x_train[0]變為[0,0,0.......1,14,22,....32] 長度為500
import numpy
from keras.datasets import imdb
from matplotlib import pyplot
from keras.preprocessing import sequence
(x_train,y_train),(x_test,y_test)=imdb.load_data()
print("train data:")
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(x_train[0])
print("first length:")
print(len(x_train[0]))
print("classes:")
print(numpy.unique(y_train))
print("number of words:")
print(len(numpy.unique(numpy.hstack(x_train))))
print("review length:")
result=map(len, x_train)
#sequence.pad_sequences
#將228長度的句子,填充到500,在前面前衝0
x_train=sequence.pad_sequences(x_train,maxlen=500)
print(x_train[0])
print(len(x_train[0]))
print("mean %.2f words(%f)"% (numpy.mean(result),numpy.std(result)))
pyplot.subplot(121)
pyplot.boxplot(result)
pyplot.subplot(122)
pyplot.hist(result)
pyplot.show()
word embeddings
imdb.load_data(nb_words=5000,test__split=0.33)
x_train=sequence.pad_sequences(x_train,maxlen=500)
x_test=sequence.pad_sequences(x_test,maxlen=500)
model.add(embedding(5000,32,input_length=500))
5000詞彙量,每個句子500長度,每個詞用32位向量表示
普通神經網路
import numpy
from keras.datasets import imdb
from keras.models import sequential
from keras.layers import dense
from keras.layers import flatten
from keras.layers.embeddings import embedding
from keras.preprocessing import sequence
seed=7
numpy.random.seed(seed)
top_words=5000
test_split=0.33
(x_train,y_train),(x_test,y_test)=imdb.load_data(num_words=top_words)
num_lizi=x_train.shape[0]/10
num_lizi2=x_test.shape[0]/10
x_train=x_train[0:num_lizi]
y_train=y_train[0:num_lizi]
x_test=x_test[0:num_lizi2]
y_test=y_test[0:num_lizi2]
max_words=500
x_train=sequence.pad_sequences(x_train,maxlen=max_words)
x_test=sequence.pad_sequences(x_test,maxlen=max_words)
model=sequential()
model.add(embedding(top_words,32,input_length=max_words))
model.add(flatten())
model.add(dense(250,activation='relu'))
model.add(dense(1))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())
model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=2,batch_size=128,verbose=1)
scores=model.evaluate(x_test, y_test,verbose=0)
print("accuracy: %.2f%%"%(scores[1]*100))
一維cnn處理imdb問題
# cnn for the imdb problem
import numpy
from keras.datasets import imdb
from keras.models import sequential
from keras.layers import dense
from keras.layers import flatten
from keras.layers.convolutional import convolution1d
from keras.layers.convolutional import maxpooling1d
from keras.layers.embeddings import embedding
from keras.preprocessing import sequence
seed = 7
numpy.random.seed(seed)
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
test_split = 0.33
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=top_words)
# pad dataset to a maximum review length in words
max_words = 500
x_train = sequence.pad_sequences(x_train, maxlen=max_words)
x_test = sequence.pad_sequences(x_test, maxlen=max_words)
model = sequential()
model.add(embedding(top_words, 32, input_length=max_words))
model.add(convolution1d(nb_filter=32, filter_length=3, border_mode='same', activation='relu'))
model.add(maxpooling1d(pool_length=2))
model.add(flatten())
model.add(dense(250, activation='relu'))
model.add(dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(x_train, y_train, validation_data=(x_test, y_test), nb_epoch=2, batch_size=128, verbose=1)
scores = model.evaluate(x_test, y_test, verbose=0)
print("accuracy: %.2f%%" % (scores[1]*100))
基於Keras的imdb資料集的情感二分類
簡單的 後注上解析 from keras.preprocessing import sequence from keras.models import sequential from keras.layers import dense,embedding from keras.layers impo...
深度學習 Keras情感分析實戰
from keras.preprocessing import sequence 句子序列 from keras.models import sequential ai模型 from keras.layers import dense,embedding 控制層 from keras.layers ...
影評情感分類(基於IMDB資料集)
回顧以前的筆記 在keras中,內建了imdb電影評分資料集,來進行評價 安裝keras conda install kerasconda就幫依賴全部搞定,記得加源 匯入imdb from keras.datasets import imdb資料集簡要說明 在資料中不是單詞,而是單詞的索引 一共就5...