IMDB情感分析例子 keras

2021-08-02 03:37:40 字數 4492 閱讀 4543

載入imdb資料集

x_train[0]=[1,14,22,.....32]    長度為228

x_train=sequence.pad_sequences(x_train,maxlen=500) 

x_train[0]變為[0,0,0.......1,14,22,....32]   長度為500

import numpy

from keras.datasets import imdb

from matplotlib import pyplot

from keras.preprocessing import sequence

(x_train,y_train),(x_test,y_test)=imdb.load_data()

print("train data:")

print(x_train.shape)

print(y_train.shape)

print(x_test.shape)

print(y_test.shape)

print(x_train[0])

print("first length:")

print(len(x_train[0]))

print("classes:")

print(numpy.unique(y_train))

print("number of words:")

print(len(numpy.unique(numpy.hstack(x_train))))

print("review length:")

result=map(len, x_train)

#sequence.pad_sequences

#將228長度的句子,填充到500,在前面前衝0

x_train=sequence.pad_sequences(x_train,maxlen=500)

print(x_train[0])

print(len(x_train[0]))

print("mean %.2f words(%f)"% (numpy.mean(result),numpy.std(result)))

pyplot.subplot(121)

pyplot.boxplot(result)

pyplot.subplot(122)

pyplot.hist(result)

pyplot.show()

word embeddings

imdb.load_data(nb_words=5000,test__split=0.33)

x_train=sequence.pad_sequences(x_train,maxlen=500)

x_test=sequence.pad_sequences(x_test,maxlen=500)

model.add(embedding(5000,32,input_length=500))

5000詞彙量,每個句子500長度,每個詞用32位向量表示

普通神經網路

import numpy

from keras.datasets import imdb

from keras.models import sequential

from keras.layers import dense

from keras.layers import flatten

from keras.layers.embeddings import embedding

from keras.preprocessing import sequence

seed=7

numpy.random.seed(seed)

top_words=5000

test_split=0.33

(x_train,y_train),(x_test,y_test)=imdb.load_data(num_words=top_words)

num_lizi=x_train.shape[0]/10

num_lizi2=x_test.shape[0]/10

x_train=x_train[0:num_lizi]

y_train=y_train[0:num_lizi]

x_test=x_test[0:num_lizi2]

y_test=y_test[0:num_lizi2]

max_words=500

x_train=sequence.pad_sequences(x_train,maxlen=max_words)

x_test=sequence.pad_sequences(x_test,maxlen=max_words)

model=sequential()

model.add(embedding(top_words,32,input_length=max_words))

model.add(flatten())

model.add(dense(250,activation='relu'))

model.add(dense(1))

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

print(model.summary())

model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=2,batch_size=128,verbose=1)

scores=model.evaluate(x_test, y_test,verbose=0)

print("accuracy: %.2f%%"%(scores[1]*100))

一維cnn處理imdb問題

# cnn for the imdb problem

import numpy

from keras.datasets import imdb

from keras.models import sequential

from keras.layers import dense

from keras.layers import flatten

from keras.layers.convolutional import convolution1d

from keras.layers.convolutional import maxpooling1d

from keras.layers.embeddings import embedding

from keras.preprocessing import sequence

seed = 7

numpy.random.seed(seed)

# load the dataset but only keep the top n words, zero the rest

top_words = 5000

test_split = 0.33

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=top_words)

# pad dataset to a maximum review length in words

max_words = 500

x_train = sequence.pad_sequences(x_train, maxlen=max_words)

x_test = sequence.pad_sequences(x_test, maxlen=max_words)

model = sequential()

model.add(embedding(top_words, 32, input_length=max_words))

model.add(convolution1d(nb_filter=32, filter_length=3, border_mode='same', activation='relu'))

model.add(maxpooling1d(pool_length=2))

model.add(flatten())

model.add(dense(250, activation='relu'))

model.add(dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print(model.summary())

model.fit(x_train, y_train, validation_data=(x_test, y_test), nb_epoch=2, batch_size=128, verbose=1)

scores = model.evaluate(x_test, y_test, verbose=0)

print("accuracy: %.2f%%" % (scores[1]*100))

基於Keras的imdb資料集的情感二分類

簡單的 後注上解析 from keras.preprocessing import sequence from keras.models import sequential from keras.layers import dense,embedding from keras.layers impo...

深度學習 Keras情感分析實戰

from keras.preprocessing import sequence 句子序列 from keras.models import sequential ai模型 from keras.layers import dense,embedding 控制層 from keras.layers ...

影評情感分類(基於IMDB資料集)

回顧以前的筆記 在keras中,內建了imdb電影評分資料集,來進行評價 安裝keras conda install kerasconda就幫依賴全部搞定,記得加源 匯入imdb from keras.datasets import imdb資料集簡要說明 在資料中不是單詞,而是單詞的索引 一共就5...