lstm = rnn.basiclstmcell(lstm_size, forget_bias=1.0, state_is_tuple=true)

x_split =tf.split(xr, time_step_size, 0)

t ,lstm cell會產生兩個內部狀態 ctct

和htht (關於rnn與lstm的介紹可參考:



和htht 就是分開記錄,放在乙個二元tuple中返回,如果這個引數沒有設定或設定成false,兩個狀態就按列連線起來返回。官方說這種形式馬上就要被deprecated了,所有我們在使用lstm的時候要加上state_is_tuple=true。

.rnn_cell.multirnncell([lstm] * num_layers, state_is_tuple=true)

# -*- coding: utf-8 -*-

import tensorflow as tf

from tensorflow.contrib import rnn

import numpy as np

import input_data

# configuration

# o * w + b -> 10 labels for each image, o[? 28], w[28 10], b[10]

# ^ (o: output 28 vec from 28 vec input)

# |

# +-+ +-+ +--+

# |1|->|2|-> ... |28| time_step_size = 28

# +-+ +-+ +--+

# ^ ^ ... ^

# | | |

# img1:[28] [28] ... [28]

# img2:[28] [28] ... [28]

# img3:[28] [28] ... [28]

# ...

# img128 or img256 (batch_size or test_size 256)

# each input size = input_vec_size=lstm_size=28

# configuration variables

input_vec_size = lstm_size = 28

# 輸入向量的維度

time_step_size = 28

# 迴圈層長度

batch_size = 128

test_size = 256

def init_weights(shape):

return tf.variable(tf.random_normal(shape, stddev=0.01))

def model(x, w, b, lstm_size):

# x, input shape: (batch_size, time_step_size, input_vec_size)

# xt shape: (time_step_size, batch_size, input_vec_size)

xt = tf.transpose(x, [1, 0, 2]) # permute time_step_size and batch_size,[28, 128, 28]

# xr shape: (time_step_size * batch_size, input_vec_size)

xr = tf.reshape(xt, [-1, lstm_size]) # each row has input for each lstm cell (lstm_size=input_vec_size)

# each array shape: (batch_size, input_vec_size)

x_split = tf.split(xr, time_step_size, 0) # split them to time_step_size (28 arrays),shape = [(128, 28),(128, 28)...]

# make lstm with lstm_size (each input vector size). num_units=lstm_size; forget_bias=1.0

lstm = rnn.basiclstmcell(lstm_size, forget_bias=1.0, state_is_tuple=true)

# get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size)

# rnn..static_rnn()的輸出對應於每乙個timestep,如果只關心最後一步的輸出,取outputs[-1]即可

outputs, _states = rnn.static_rnn(lstm, x_split, dtype=tf.float32) # 時間序列上每個cell的輸出:[... shape=(128, 28)..]

# linear activation

# get the last output

return tf.matmul(outputs[-1], w) + b, lstm.state_size # state size to initialize the stat

mnist = input_data.read_data_sets("mnist_data/", one_hot=true) # 讀取資料

# mnist.train.images是乙個55000 * 784維的矩陣, mnist.train.labels是乙個55000 * 10維的矩陣

trx, try, tex, tey = mnist.train

.images, mnist.train

.labels, mnist.test

.images, mnist.test


# 將每張圖用乙個28x28的矩陣表示,(55000,28,28,1)

trx = trx.reshape(-1, 28, 28)

tex = tex.reshape(-1, 28, 28)

x = tf.placeholder("float", [none, 28, 28])

y = tf.placeholder("float", [none, 10])

# get lstm_size and output 10 labels

w = init_weights([lstm_size, 10]) # 輸出層權重矩陣28×10

b = init_weights([10]) # 輸出層bais

py_x, state_size = model(x, w, b, lstm_size)

cost = tf.reduce_mean(tf.nn

.softmax_cross_entropy_with_logits(logits=py_x, labels=y))

train_op = tf.train

.rmspropoptimizer(0.001, 0.9).minimize(cost)

predict_op = tf.argmax(py_x, 1)

session_conf = tf.configproto()

session_conf.gpu_options.allow_growth = true

# launch the graph in a session

with tf.session(config=session_conf) as sess:

# you need to initialize all variables


for i in range(100):

for start, end in zip(range(0, len(trx), batch_size), range(batch_size, len(trx)+1, batch_size)):

sess.run(train_op, feed_dict=)

test_indices = np.arange(len(tex)) # get a test batch



test_indices = test_indices[0:test_size]

print(i, np.mean(np.argmax(tey[test_indices], axis=1) ==

sess.run(predict_op, feed_dict=)))


