# TensorFlow学习

import tensorflow as tf

import numpy as np

1.1、数据的呈现（Variable（）：定义变量）：

x=np.array([[1,1,1],[1,-8,1],[1,1,1]])

w=tf.Variable(initial_value=x)

w=tf.Variable(tf.zeros([3,3]))

init=tf.global_variables_initializer()

withtf.Session() as sess:

sess.run(init)

print(sess.run(w))

a=tf.placeholder(tf.int16)

b=tf.placeholder(tf.int16)

mul=tf.multiply(a,b)

withtf.Session() as sess:

print(“a*b=”, sess.run(mul,feed_dict={a:2, b:3}))

1.3、矩阵相乘（matmul）运算：

a=tf.Variable(tf.ones([3,3]))

b=tf.Variable(tf.ones([3,3]))

product=tf.matmul(tf.multiply(5.0,a),tf.multiply(4.0,b))

init=tf.initialize_all_variables()

withtf.Session() as sess:

sess.run(init)

print(sess.run(product))

1.4、argmax的练习：获取最大值的下标向量

a=tf.get_variable(name=’a’,shape=[3,4],dtype=tf.float32,initializer=tf.random_uniform_initializer(minval=-1,maxval=1))

# 最大值所在的下标向量

b=tf.argmax(input=a,axis=0)

c=tf.argmax(input=a,dimension=1)

sess=tf.InteractiveSession()

sess.run(tf.initialize_all_variables())

print(sess.run(a))

print(sess.run(b))

print(sess.run(c))

1.5、创建全一/全零矩阵：

tf.ones(shape,type=tf.float32,name=None)

tf.ones([2, 3], int32) ==> [[1, 1, 1], [1, 1, 1]]

tf.zeros(shape,type=tf.float32,name=None)

tf.zeros([2, 3], int32) ==> [[0, 0, 0],[0, 0, 0]]

1.7、tf.ones_like(tensor,dype=None,name=None)

新建一个与给定的tensor类型大小一致的tensor，其所有元素为1。

# ‘tensor’ is [[1, 2, 3], [4, 5, 6]]

tf.ones_like(tensor) ==> [[1, 1, 1], [1, 1, 1]]

1.8、tf.zeros_like(tensor,dype=None,name=None)

新建一个与给定的tensor类型大小一致的tensor，其所有元素为0。

# ‘tensor’ is [[1, 2, 3], [4, 5, 6]]

tf.ones_like(tensor) ==> [[0, 0, 0],[0, 0, 0]]

1.9、tf.fill(dim,value,name=None)

创建一个形状大小为dim的tensor，其初始值为value

# Output tensor has shape [2, 3].

fill([2, 3], 9) ==> [[9, 9, 9]

[9, 9, 9]]

1.10、tf.constant(value,dtype=None,shape=None,name=’Const’)

创建一个常量tensor，先给出value，可以设定其shape

# Constant 1-D Tensor populated with value list.

tensor = tf.constant([1, 2, 3, 4, 5, 6, 7]) => [1 2 3 4 5 67]

# Constant 2-D tensor populated with scalarvalue -1.

tensor = tf.constant(-1.0, shape=[2, 3]) => [[-1. -1. -1.] [-1.-1. -1.]

1.11、tf.linspace(start,stop,num,name=None)

返回一个tensor，该tensor中的数值在start到stop区间之间取等差数列（包含start和stop），如果num>1则差值为(stop-start)/(num-1)，以保证最后一个元素的值为stop。

其中，start和stop必须为tf.float32或tf.float64。num的类型为int。

tf.linspace(10.0, 12.0, 3, name=”linspace”) => [ 10.011.0 12.0]

1.12、tf.range(start,limit=None,delta=1,name=’range’)

返回一个tensor等差数列，该tensor中的数值在start到limit之间，不包括limit，delta是等差数列的差值。

start，limit和delta都是int32类型。

# ‘start’ is 3

# ‘limit’ is 18

# ‘delta’ is 3

tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]

# ‘limit’ is 5 start is 0

tf.range(start, limit) ==> [0, 1, 2, 3, 4]

1.13、tf.random_normal(shape,mean=0.0,stddev=1.0,dtype=tf.float32,seed=None,name=None)

返回一个tensor其中的元素的值服从正态分布。

seed: A Python integer. Used to create a random seed for thedistribution.See set_random_seed forbehavior

1.14、tf.truncated_normal(shape, mean=0.0, stddev=1.0, dtype=tf.float32,seed=None, name=None)

返回一个tensor其中的元素服从截断正态分布（？概念不懂，留疑）

1.15、tf.random_uniform(shape,minval=0,maxval=None,dtype=tf.float32,seed=None,name=None)

返回一个形状为shape的tensor，其中的元素服从minval和maxval之间的均匀分布。

1.16、tf.random_shuffle(value,seed=None,name=None)

对value（是一个tensor）的第一维进行随机化。

[[1,2],               [[2,3],

[2,3],        ==>  [1,2],

[3,4]]                [3,4]]

1.17、tf.set_random_seed(seed)

设置产生随机数的种子。

## 二、常规神经网络（NN）：

import tensorflow as tf

import tensorflow.examples.tutorials.mnist.input_data as input_data

# val_data=mnist.validation.images

# val_label=mnist.validation.labels

#print(“______________________________”)

# print(mnist.train.images.shape)

# print(mnist.train.labels.shape)

# print(mnist.validation.images.shape)

# print(mnist.validation.labels.shape)

# print(mnist.test.images.shape)

# print(mnist.test.labels.shape)

# print(val_data)

# print(val_label)

# print(“==============================”)

x = tf.placeholder(tf.float32, [None,784])

y_actual = tf.placeholder(tf.float32, shape=[None,10])

W = tf.Variable(tf.zeros([784,10]))

b = tf.Variable(tf.zeros([10]))

y_predict = tf.nn.softmax(tf.matmul(x, W)+b)

cross_entropy=tf.reduce_mean(-tf.reduce_sum(y_actual*tf.log(y_predict),reduction_indices=1))

correct_prediction=tf.equal(tf.argmax(y_predict,1),tf.argmax(y_actual, 1))

accuracy=tf.reduce_mean(tf.cast(correct_prediction,“float”))

init = tf.initialize_all_variables()

with tf.Session() assess:

sess.run(init)

fori in range(1000):

batch_xs, batch_ys = mnist.train.next_batch(100)

sess.run(train_step, feed_dict={x:batch_xs, y_actual:batch_ys})

if(i%100==0):

print(“accuracy:” ,sess.run(accuracy, feed_dict={x:mnist.test.images,y_actual:mnist.test.labels}))

## 三、线性网络模型：

import tensorflow as tf

import numpy as np

# numpy随机生成100个数：

x_data=np.float32(np.random.rand(2,100))

y_data=np.dot([0.100,0.200], x_data)+0.300

# 构造一个线性模型：

b=tf.Variable(tf.zeros([1]))

W=tf.Variable(tf.random_uniform([1,2],-1.0, 1.0))

y=tf.matmul(W, x_data)+b

# 最小化方差

loss=tf.reduce_mean(tf.square(y-y_data))

train = optimizer.minimize(loss)

# 初始化变量

init=tf.initialize_all_variables()

# 启动图

sess=tf.Session()

sess.run(init)

# 拟合平面

for step inrange(0, 201):

sess.run(train)

ifstep % 20 == 0:

print(step,sess.run(W), sess.run(b))

## 四、CNN卷积神经网络：

# -*- coding: utf-8 -*-

“””

Created on ThuSep  8 15:29:48 2016

@author: root

“””

import tensorflow as tf

import tensorflow.examples.tutorials.mnist.input_data asinput_data

mnist = input_data.read_data_sets(“MNIST_data/”, one_hot=True)

x = tf.placeholder(tf.float32, [None,784])

y_actual = tf.placeholder(tf.float32, shape=[None,10])

# 定义实际x与y的值。

# placeholder中shape是参数的形状，默认为none，即一维数据，[2,3]表示为两行三列；[none，3]表示3列，行不定。

def weight_variable(shape):

initial = tf.truncated_normal(shape, stddev=0.1)

returntf.Variable(initial)

# 截尾正态分布，保留[mean-2*stddev, mean+2*stddev]范围内的随机数。用于初始化所有的权值，用做卷积核。

def bias_variable(shape):

initial = tf.constant(0.1,shape=shape)

returntf.Variable(initial)

# 创建常量0.1；用于初始化所有的偏置项，即b，用作偏置。

def conv2d(x,W):

returntf.nn.conv2d(x, W, strides=[1,1, 1,1], padding=‘SAME’)

# 定义一个函数，用于构建卷积层；

def max_pool(x):

returntf.nn.max_pool(x, ksize=[1,2, 2,1], strides=[1,2, 2,1], padding=‘SAME’)

# 定义一个函数，用于构建池化层，池化层是为了获取特征比较明显的值，一般会取最大值max，有时也会取平均值mean。

# ksize=[1,2,2,1]：shape为[batch，height，width， channels]设为1个池化，池化矩阵的大小为2*2,有1个通道。

# strides是表示步长[1,2,2,1]:水平步长为2，垂直步长为2，strides[0]与strides[3]皆为1。

x_image = tf.reshape(x, [-1,28,28,1])

# 在reshape方法中-1维度表示为自动计算此维度，将x按照28*28进行图片转换，转换成一个大包下一个小包中28行28列的四维数组；

W_conv1 = weight_variable([5,5, 1,32])

# 构建一定形状的截尾正态分布，用做第一个卷积核；

b_conv1 = bias_variable([32])

# 构建一维的偏置量。

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1)+ b_conv1)

# 将卷积后的结果进行relu函数运算，通过激活函数进行激活。

h_pool1 = max_pool(h_conv1)

# 将激活函数之后的结果进行池化，降低矩阵的维度。

W_conv2 = weight_variable([5,5, 32,64])

# 构建第二个卷积核；

b_conv2 = bias_variable([64])

# 第二个卷积核的偏置；

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2)+ b_conv2)

# 第二次进行激活函数运算；

h_pool2 = max_pool(h_conv2)

# 第二次进行池化运算，输出一个2*2的矩阵，步长是2*2；

W_fc1 = weight_variable([7* 7 * 64,1024])

# 构建新的卷积核，用来进行全连接层运算，通过这个卷积核，将最后一个池化层的输出数据转化为一维的向量1*1024

b_fc1 = bias_variable([1024])

# 构建1*1024的偏置；

h_pool2_flat = tf.reshape(h_pool2, [-1,7*7*64])

# 对 h_pool2第二个池化层结果进行变形。

h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1) + b_fc1)

# 将矩阵相乘，并进行relu函数的激活。

keep_prob = tf.placeholder(“float”)

# 定义一个占位符。

h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# 是防止过拟合的，使输入tensor中某些元素变为0，其他没变为零的元素变为原来的1/keep_prob大小，

# 形成防止过拟合之后的矩阵。

W_fc2 = weight_variable([1024,10])

b_fc2 = bias_variable([10])

y_predict=tf.nn.softmax(tf.matmul(h_fc1_drop,W_fc2) + b_fc2)

# softmax进行激励函数运算，得到预期结果；

# 在每次进行加和运算之后，需要用到激活函数进行转换，激活函数是用来做非线性变换的，因为sum出的线性函数自身在分类中存在有限性。

cross_entropy =-tf.reduce_sum(y_actual*tf.log(y_predict))

# 求交叉熵，用来检测运算结果的熵值大小。

# 通过训练获取到最小交叉熵的数据，训练权重参数。

correct_prediction =tf.equal(tf.argmax(y_predict,1),tf.argmax(y_actual,1))

accuracy =tf.reduce_mean(tf.cast(correct_prediction, “float”))

# 计算模型的精确度。

sess=tf.InteractiveSession()

sess.run(tf.initialize_all_variables())

for i inrange(20000):

batch = mnist.train.next_batch(50)

ifi%100 == 0:

train_acc = accuracy.eval(feed_dict={x:batch[0],y_actual: batch[1], keep_prob: 1.0})

# 用括号中的参数，带入accuracy中，进行精确度计算。

print(‘step’,i,‘training accuracy’,train_acc)

train_step.run(feed_dict={x: batch[0],y_actual: batch[1], keep_prob: 0.5})

# 训练参数，形成最优模型。

test_acc=accuracy.eval(feed_dict={x:mnist.test.images, y_actual: mnist.test.labels, keep_prob: 1.0})

print(“test accuracy”,test_acc)

Ø  解析：

1）卷积层运算：

# -*- coding: utf-8 -*-

importtensorflow as tf

# 构建一个两维的数组，命名为k

k=tf.constant([[1,0,1],[2,1,0],[0,0,1]],dtype=tf.float32, name=‘k’)

# 构建一个两维的数组，命名为i

i=tf.constant([[4,3,1,0],[2,1,0,1],[1,2,4,1],[3,1,0,2]],dtype=tf.float32, name=‘i’)

# 定义一个卷积核，将上面的k形状转化为[3,3,1,1]：长3宽3,1个通道，1个核。

kernel=tf.reshape(k,[3,3,1,1],name=‘kernel’)

# 定义一个原始图像，将上面的i形状转化为[1,4,4,1]：1张图片，长4宽4，1个通道。

image=tf.reshape(i, [1,4,4,1],name=‘image’)

# 用kernel对image做卷积，[1,1,1,1]:每个方向上的滑动步长，此时为四维，故四个方向上的滑动步长全部为1，

sss=tf.nn.conv2d(image, kernel, [1,1,1,1],“VALID”)

# 从数组的形状中删除单维条目，即把shape为1的维度去掉，一个降维的过程，得到一个二维的。

res=tf.squeeze(sss)

with tf.Session() assess:

print(sess.run(k))

print(sess.run(sss))

print(sess.run(res))

tensorflow提供了LSTM实现的一个basic版本，不包含lstm的一些高级扩展，同时也提供了一个标准接口，其中包含了lstm的扩展。分别为：tf.nn.rnn_cell.BasicLSTMCell(), tf.nn.rnn_cell.LSTMCell()

LSTM的结构

tensorflow

#tf.nn.rnn_cell.BasicLSTMCell(num_units,forget_bias, input_size, state_is_tupe=Flase, activation=tanh)

cell =tf.nn.rnn_cell.BasicLSTMCell(num_units, forget_bias=1.0, input_size=None,state_is_tupe=Flase, activation=tanh)

#num_units:图一中ht的维数，如果num_units=10,那么ht就是10维行向量

#forget_bias：还不清楚这个是干嘛的

#input_size:[batch_size,max_time, size]。假设要输入一句话，这句话的长度是不固定的，max_time就代表最长的那句话是多长，size表示你打算用多长的向量代表一个word，即embedding_size（embedding_size和size的值不一定要一样）

#state_is_tuple:true的话，返回的状态是一个tuple:(c=array([[]]), h=array([[]]):其中c代表Ct的最后时间的输出，h代表Ht最后时间的输出，h是等于最后一个时间的output的

#图三向上指的ht称为output

#此函数返回一个lstm_cell，即图一中的一个A

lstm_cell = tf.nn.rnn_cell.MultiRNNCells(cells,state_is_tuple=False)

#cells:一个cell列表，将列表中的cell一个个堆叠起来，如果使用cells=[cell]*4的话，就是四曾，每层cell输入输出结构相同

#如果state_is_tuple:则返回的是 n-tuple，其中n=len(cells): tuple:(c=[batch_size, num_units],h=[batch_size,num_units])

initial_state =lstm_cell.zero_state(batch_size, dtype=)

#返回[batch_size, 2*len(cells)],或者[batch_size, s]

#这个函数只是用来生成初始化值的

tf.nn.dynamic_rnn(cell, inputs,sequence_length=None, initial_state=None,dtype=None,time_major=False)

#此函数会通过，inputs中的max_time将网络按时间展开

#cell:将上面的lstm_cell传入就可以

#inputs:[batch_size,max_time, size]如果time_major=Flase. [max_time,batch_size, size]如果time_major=True

#sequence_length:是一个list，如果你要输入三句话，且三句话的长度分别是5,10,25,那么sequence_length=[5,10,25]

#返回：（outputs, states）:output，[batch_size, max_time, num_units]如果time_major=False。 [max_time,batch_size,num_units]如果time_major=True。states:[batch_size, 2*len(cells)]或[batch_size,s]

#outputs输出的是最上面一层的输出，states保存的是最后一个时间输出的states

outputs = []

states = initial_states

with tf.variable_scope(“RNN”):

fortime_step in range(max_time):

if time_step>0:tf.get_variable_scope().reuse_variables()#LSTM同一曾参数共享，

(cell_out, state) = lstm_cell(inputs[:,time_step,:], state)

outputs.append(cell_out)

GRU

GRU结构图

tenforflow

cell = tenforflow提供了tf.nn.rnn_cell.GRUCell(num_units, input_size=None, activation=tanh)

#参考lstm cell 使用

tf.unstack()

将给定的R维张量拆分成R-1维张量

将value根据axis分解成num个张量，返回的值是list类型，如果没有指定num则根据axis推断出！

DEMO

 1 2 3 4 5 6 7 8 9 10 11 12 13 import tensorflow as tf a = tf.constant([3,2,4,5,6]) b = tf.constant([1,6,7,8,0]) c = tf.stack([a,b],axis=0) d = tf.stack([a,b],axis=1) e = tf.unstack([a,b],axis=0) f = tf.unstack([a,b],axis=1)   with tf.Session() as sess:     print(sess.run(c))     print(sess.run(d))     print(sess.run(e))     print(sess.run(f))

[[3 2 4 5 6]
[1 6 7 8 0]]

——————–
[[3 1]
[2 6]
[4 7]
[5 8]
[6 0]]

———————-
[array([3, 2, 4, 5, 6]), array([1, 6, 7, 8, 0])]

———————-
[array([3, 1]), array([2, 6]), array([4, 7]), array([5, 8]), array([6, 0])]

tf.nn.softmax_cross_entropy_with_logits(logits,labels, name=None)

import tensorflow as tf

#our NN’s output

logits=tf.constant([[1.0,2.0,3.0],[1.0,2.0,3.0],[1.0,2.0,3.0]])

#step1:do softmax

y=tf.nn.softmax(logits)

#true label

y_=tf.constant([[0.0,0.0,1.0],[0.0,0.0,1.0],[0.0,0.0,1.0]])

#step2:do cross_entropy

cross_entropy =-tf.reduce_sum(y_*tf.log(y))

#do cross_entropy just one step

cross_entropy2=tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits,y_))#dont forget tf.reduce_sum()!!

with tf.Session() as sess:

softmax=sess.run(y)

c_e = sess.run(cross_entropy)

c_e2 = sess.run(cross_entropy2)

print(“step1:softmax result=”)

print(softmax)

print(“step2:cross_entropy result=”)

print(c_e)

print(“Function(softmax_cross_entropy_with_logits)result=”)

print(c_e2)

step1:softmax result=

[[ 0.09003057  0.24472848 0.66524094]

[0.09003057  0.24472848  0.66524094]

[0.09003057  0.24472848  0.66524094]]

step2:cross_entropy result=

1.22282

Function(softmax_cross_entropy_with_logits)result=

1.2228

Ø  RNN案例（一）

# -*- coding: utf-8 -*-

import tensorflow as tf

importtensorflow.examples.tutorials.mnist.input_data as input_data

lr = 0.001

training_iters = 100000

batch_size = 128

n_inputs = 28

n_steps = 28

n_hidden_units = 128

n_classes = 10

# 生成两个占位符；

x = tf.placeholder(tf.float32, [None,n_steps, n_inputs])

y = tf.placeholder(tf.float32, [None,n_classes])

weights = {

# 随机生成一个符合正态图形的矩阵，作为in和out的初始值。

‘in’:tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),

‘out’:tf.Variable(tf.random_normal(n_hidden_units, n_classes)),

}

biases = {

‘in’:tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),

‘out’:tf.Variable(tf.constant(0.1, shape=[n_classes, ])),

}

def RNN(X, weights, biases):

# 第一步：输入的x为三维数据，因此需要进行相应的维度变换；转换成2维，然后与w、b进行交易，运算完成后，再将x转换成三维；

X=tf.reshape(X, [-1, n_inputs])

X_in = tf.matmul(X, weights[‘in’])+biases[‘in’]

X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])

# 第二步：即构建cell的初始值，并进行建模运算；

#n_hidden_units:是ht的维数，表示128维行向量；state_is_tuple表示tuple形式，返回一个lstm的单元，即一个ht。

lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units,forget_bias=1.0, state_is_tuple=True)

# 将LSTM的状态初始化全为0数组，batch_size给出一个batch大小。

init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)

# 运算一个神经单元的输出值与状态，动态构建RNN模型，在这个模型中实现ht与x的结合。

outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in,initial_state=init_state, time_major=False)

# 第三步：将输出值进行格式转换，然后运算输出，即可。

# 矩阵的转置，[0,1,2]为正常顺序[高，长，列]，想要更换哪个就更换哪个的顺序即可,并实现矩阵解析。

outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))

results = tf.matmul(outputs[-1], weights[‘out’]) + biases[‘out’]

return results

# 创建一个模型，然后进行测试。

pred = RNN(x, weights, biases)

# softmax_cross_entropy_with_logits：将神经网络最后一层的输出值pred与实际标签y作比较，然后计算全局平均值，即为损失。

cost =tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))

# 用梯度下降优化，下降速率为0.001

# 计算准确度。

correct_pred = tf.equal(tf.argmax(pred, 1),tf.argmax(y, 1))

accuracy =tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:

sess.run(init)

step = 0

while step*batch_size < training_iters:

batch_xs, batch_ys = mnist.train.next_batch(batch_size)

batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])

sess.run([train_op], feed_dict={

x:batch_xs,

y:batch_ys,

})

if step % 20 ==0:

print(sess.run(accuracy, feed_dict={

x:batch_xs,

y:batch_ys,

}))

step += 1

Ø  RNN案例（二）

# num_epochs = 100

# total_series_length = 50000

# truncated_backprop_length = 15

# state_size = 4

# num_classes = 2

# echo_step = 3

# batch_size = 5

# num_batches =total_series_length//batch_size//truncated_backprop_length

#

# def generateData():

#     x= np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))

#     y= np.roll(x, echo_step)

#    y[0:echo_step] = 0

#

#     x= x.reshape((batch_size, -1))  # Thefirst index changing slowest, subseries as rows

#     y= y.reshape((batch_size, -1))

#

#    return (x, y)

#

# batchX_placeholder =tf.placeholder(tf.float32, [batch_size, truncated_backprop_length])

# batchY_placeholder =tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])

#

# init_state = tf.placeholder(tf.float32,[batch_size, state_size])

#

# W =tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)

# b = tf.Variable(np.zeros((1,state_size)),dtype=tf.float32)

#

# W2 = tf.Variable(np.random.rand(state_size,num_classes),dtype=tf.float32)

# b2 = tf.Variable(np.zeros((1,num_classes)),dtype=tf.float32)

#

# # Unpack columns

# inputs_series =tf.unstack(batchX_placeholder, axis=1)

# labels_series =tf.unstack(batchY_placeholder, axis=1)

#

# # Forward pass

# current_state = init_state

# states_series = []

# for current_input in inputs_series:

#    current_input = tf.reshape(current_input, [batch_size, 1])

#    input_and_state_concatenated = tf.concat(1, [current_input,current_state])  # Increasing number ofcolumns

#

#    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) +b)  # Broadcasted addition

#    states_series.append(next_state)

#    current_state = next_state

#

# logits_series = [tf.matmul(state, W2) + b2for state in states_series] #Broadcasted addition

# predictions_series = [tf.nn.softmax(logits)for logits in logits_series]

#

# losses =[tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) for logits,labels in zip(logits_series,labels_series)]

# total_loss = tf.reduce_mean(losses)

#

# def plot(loss_list, predictions_series,batchX, batchY):

#    plt.subplot(2, 3, 1)

#    plt.cla()

#    plt.plot(loss_list)

#

#    for batch_series_idx in range(5):

#        one_hot_output_series = np.array(predictions_series)[:, batch_series_idx,:]

#        single_output_series = np.array([(1 if out[0] < 0.5 else 0) for outin one_hot_output_series])

#

#        plt.subplot(2, 3, batch_series_idx + 2)

#        plt.cla()

#        plt.axis([0, truncated_backprop_length, 0, 2])

#        left_offset = range(truncated_backprop_length)

#        plt.bar(left_offset, batchX[batch_series_idx, :], width=1,color=”blue”)

#        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1,color=”red”)

#        plt.bar(left_offset, single_output_series * 0.3, width=1,color=”green”)

#

#    plt.draw()

#    plt.pause(0.0001)

#

# with tf.Session() as sess:

#    sess.run(tf.initialize_all_variables())

#    plt.ion()

#    plt.figure()

#    plt.show()

#    loss_list = []

#

# for epoch_idx in range(num_epochs):

#    x,y = generateData()

#    _current_state = np.zeros((batch_size, state_size))

#    print(“New data, epoch”, epoch_idx)

#

# for batch_idx in range(num_batches):

#    start_idx = batch_idx * truncated_backprop_length

#    end_idx = start_idx + truncated_backprop_length

#

#    batchX = x[:,start_idx:end_idx]

#    batchY = y[:,start_idx:end_idx]

#

#    _total_loss, _train_step, _current_state, _predictions_series =sess.run(

#    [total_loss, train_step, current_state, predictions_series],

#    feed_dict={

#    batchX_placeholder:batchX,

#    batchY_placeholder:batchY,

#     init_state:_current_state

#    })

#    loss_list.append(_total_loss)

#    if batch_idx%100 == 0:

#        print(“Step”,batch_idx, “Loss”, _total_loss)

#        plot(loss_list, _predictions_series, batchX, batchY)

#    plt.ioff()

#    plt.show()

Ø  LSTM_RNN案例（三）：

# -*-coding: utf-8 -*-

import numpy as np

import tensorflow as tf

import matplotlib.pyplot as plt

from asn1crypto._ffi import null

BATCH_START=0     #建立batch data的时候的index

TIME_STEPS=20     #backpropagation throughtimetime_steps

BATCH_SIZE=50     #

INPUT_SIZE=1      #sin数据输入size

OUTPUT_SIZE=1     #cos数据输入size

CELL_SIZE=10      #RNNhiden unit size

LR=0.006         #学习率

# 定义一个生成数据的get_batchfunction

def get_batch():

global BATCH_START, TIME_STEPS

xs=np.arange(BATCH_START,BATCH_START+TIME_STEPS*BATCH_SIZE)

.reshape((BATCH_SIZE,TIME_STEPS))/(10*np.pi)

seq=np.sin(xs)

res=np.cos(xs)

BATCH_START+=TIME_STEPS

# np.newaxis:在功能上等价于none

return [seq[:,:,np.newaxis],res[:,:,np.newaxis],xs]

class LSTMRNN(object):

def __init__(self, n_steps, input_size, output_size, cell_size,batch_size):

self.n_steps=n_steps

self.input_size=input_size

self.output_size=output_size

self.cell_size=cell_size

self.batch_size=batch_size

# 构建命名空间，在inputs命名空间下的xsys与其他空间下的xsys是不冲突的，一般与variable一起用。

with tf.name_scope(‘inputs’):

self.xs=tf.placeholder(tf.float32,[None, n_steps, input_size], name=xs)

self.ys=tf.placeholder(tf.float32,[None, n_steps, output_size], name=ys)

# variable_scopeget_variable()一起用，实现变量共享，指向同一个内存空间。

with tf.variable_scope(‘in_hidden’):

with tf.variable_scope(‘LSTM_cell’):

with tf.variable_scope(‘out_hidden’):

with tf.name_scope(‘cost’):

self.compute_cost()

with tf.name_scope(‘train’):

l_in_x = tf.reshape(self.xs, [-1, self.input_size], name=‘2_2D’)

Ws_in=self._weight_variable([self.input_size, self.cell_size])

bs_in=self._bias_variable([self.cell_size,])

with tf.name_scope(‘Wx_plus_b’):

l_in_y=tf.matmul(l_in_x,Ws_in)+bs_in

self.l_in_y=tf.reshape(l_in_y,[-1, self.n_steps, self.cell_size],name=‘2_3D’)

lstm_cell=tf.contrib.rnn.BasicLSTMCell(self.cell_size,forget_bias=1.0, state_is_tuple=True)

with tf.name_scope(‘initial_state’):

self.cell_init_state=lstm_cell.zero_state(self.batch_size,dtype=tf.float32)

self.cell_outputs,self.cell_final_state=tf.nn.dynamic_rnn(lstm_cell, self.l_in_y,initial_state=self.cell_init_state, time_major=False)

l_out_x=tf.reshape(self.cell_outputs,[-1, self.cell_size], name=‘2_2D’)

Ws_out=self._weight_variable([self.cell_size,self.output_size])

bs_out=self._bias_variable([self.output_size,])

with tf.name_scope(‘Wx_plus_b’):

self.pred=tf.matmul(l_out_x,Ws_out)+bs_out

# 求交叉熵

def compute_cost(self):

losses=tf.contrib.legacy_seq2seq.sequence_loss_by_example(

[tf.reshape(self.pred, [-1], name=‘reshape_pred’)],

[tf.reshape(self.ys, [-1], name=‘reshape_target’)],

[tf.ones([self.batch_size*self.n_steps],dtype=tf.float32)],

average_across_timesteps=True,

softmax_loss_function=self.ms_error,

name=‘losses’

)

with tf.name_scope(‘average_cost’):

self.cost=tf.div(

tf.reduce_sum(losses, name=‘losses_sum’),

self.batch_size,

name=‘average_cost’,

)

tf.summary.scalar(‘cost’,self.cost)

def ms_error(self,labels,logits):

#求方差

return tf.square(tf.subtract(labels,logits))

def _weight_variable(self, shape, name=‘weights’):

initializer=tf.random_normal_initializer(mean=0, stddev=1.,)

return tf.get_variable(shape=shape,initializer=initializer, name=name)

def _bias_variable(self, shape, name=‘biases’):

initializer=tf.constant_initializer(0, 1)

return tf.get_variable(name=name, shape=shape,initializer=initializer)

if __name__==‘__main__’:

model=LSTMRNN(TIME_STEPS, INPUT_SIZE,OUTPUT_SIZE, CELL_SIZE, BATCH_SIZE)

sess=tf.Session()

sess.run(tf.global_variables_initializer())

state=null

for i in range(200):

seq, res, xs=get_batch()

if i == 0:

feed_dict={

model.xs:seq,

model.ys:res,

}

else:

feed_dict={

model.xs:seq,

model.ys:res,

model.cell_init_state:state

}

_, cost, state, pred=sess.run(

[model.train_op, model.cost,model.cell_final_state, model.pred],

feed_dict=feed_dict)

if i%20==0:

print(‘cost:’,round(cost, 4))

import tensorflow as tf

tf.app.flags.DEFINE_string(‘str_name’, ‘def_v_1’,“descrip1”)

tf.app.flags.DEFINE_integer(‘int_name’, 10,“descript2”)

tf.app.flags.DEFINE_boolean(‘bool_name’, False, “descript3”)

FLAGS = tf.app.flags.FLAGS

#必须带参数，否则：‘TypeError: main() takes no arguments (1given)’;   main的参数名随意定义，无要求

defmain(_):

# 在这个函数中添加脚本所需要处理的内容。

print(FLAGS.str_name)

print(FLAGS.int_name)

print(FLAGS.bool_name)

if __name__ == ‘__main__’:

tf.app.run()  #执行main函数

FLAGS命令是指编写一个脚本文件，在执行这个脚本时添加相应的参数；

python tt.py –str_name test_str–int_name 99 –bool_name True

#对于2-D

# Tensor变换主要是对矩阵进行相应的运算工作，包涵的方法主要有：reduce_……（a, axis）系列；如果不加axis的话都是针对整个矩阵进行运算。

tf.reduce_sum(a, 1#axis1

tf.reduce_mean(a,0) #每列均值

NOTE:返回的都是行向量,（axis等于几，就是对那维操作,i.e.:沿着那维操作, 其它维度保留）

#关于concat，可以用来进行降维 3D->2D , 2D->1D

tf.concat(concat_dim, data)

#arr = np.zeros([2,3,4,5,6])

In [6]: arr2.shape

Out[6]: (2, 3, 4, 5)

In [7]: np.concatenate(arr2, 0).shape

Out[7]: (6, 4, 5)   🙁2*3, 4, 5)

In [9]: np.concatenate(arr2, 1).shape

Out[9]: (3, 8, 5)   🙁3, 2*4, 5)

#tf.concat()

t1 = [[1, 2, 3], [4, 5, 6]]

t2 = [[7, 8, 9], [10, 11, 12]]

# t1, t2进行concataxis0，等价于将shape=[2,2, 3]Tensor concat

#shape=[4, 3]tensor。在新生成的Tensortensor[:2,:]代表之前的t1

#tensor[2:,:]是之前的t2

tf.concat(0, [t1, t2]) ==> [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]

# t1, t2进行concataxis1，等价于将shape=[2,2, 3]Tensor concat

#shape=[2, 6]tensor。在新生成的Tensortensor[:,:3]代表之前的t1

#tensor[:,3:]是之前的t2

tf.concat(1, [t1, t2]) ==> [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]

concat是将list中的向量给连接起来，axis表示将那维的数据连接起来，而其他维的结构保持不变

#squeeze 降维维度为1的降掉

tf.squeeze(arr, [])

arr = tf.Variable(tf.truncated_normal([3,4,1,6,1], stddev=0.1))

arr2 = tf.squeeze(arr, [2,4])

arr3 = tf.squeeze(arr) #降掉所以是1的维

# split(dimension, num_split, input)dimension的意思就是输入张量的哪一个维度，如果是0就表示对第0维度进行切割。num_split就是切割的数量，如果是2就表示输入张量被切成2份，每一份是一个列表。

tf.split(split_dim, num_split, value, name=‘split’)

# ‘value’ is a tensor with shape [5, 30]

# Split ‘value’ into 3 tensors along dimension 1

split0, split1, split2 = tf.split(1, 3, value)

tf.shape(split0) ==> [5, 10]

#embedding: embedding_lookup是按照向量获取矩阵中的值，[0,2,3,1]是取第0,2,3,1个向量。

mat = np.array([1,2,3,4,5,6,7,8,9]).reshape((3,-1))

ids = [[1,2], [0,1]]

res = tf.nn.embedding_lookup(mat, ids)

res.eval()

array([[[4, 5, 6],

[7, 8, 9]],

[[1, 2, 3],

[4, 5, 6]]])

# expand_dims：扩展维度，如果想用广播特性的话，经常会用到这个函数

# ‘t’ is a tensor of shape [2]

#一次扩展一维

shape(tf.expand_dims(t, 0)) ==> [1, 2]

shape(tf.expand_dims(t, 1)) ==> [2, 1]

shape(tf.expand_dims(t, –1)) ==> [2, 1]

# ‘t2’ is a tensor of shape [2, 3, 5]

shape(tf.expand_dims(t2, 0)) ==> [1, 2, 3, 5]

shape(tf.expand_dims(t2, 2)) ==> [2, 3, 1, 5]

shape(tf.expand_dims(t2, 3)) ==> [2, 3, 5, 1]

tf.slice()

tf.slice(input_, begin, size, name=None)

o   切片的尺寸是size，切片的开始位置是begin。

o   切片的尺寸size表示输出tensor的数据维度，其中size[i]表示在第i维度上面的元素个数。

o   开始位置begin表示切片相对于输入数据input_的每一个偏移量

import tensorflow as tf

import numpy as np

sess = tf.Session()

input=tf.constant([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]]])

data = tf.slice(input, [1, 0, 0], [1, 1, 3])

print(sess.run(data))

“””[1,0,0]表示第一维偏移了1 则是从[[[3, 3, 3],[4, 4, 4]],[[5, 5, 5], [6, 6, 6]]]中选取数据然后选取第一维的第一个，第二维的第一个数据，第三维的三个数据“””

# [[[3 3 3]]]

#array([[ 6,  7],

#       [11, 12]])

tf.stack()

tf.stack(values, axis=0, name=’stack’)

tf.stack（）这是一个矩阵拼接的函数，tf.unstack（）则是一个矩阵分解的函数

Given a list of length N of tensors of shape (A, B, C);
if axis == 0 then the output tensor will have the shape (N, A, B, C)

if axis == 1 thenthe output tensor will have the shape (A, N, B, C).

Etc.

# ‘x’ is [1, 4]

# ‘y’ is [2, 5]

# ‘z’ is [3, 6]

stack([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.

stack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]

tf.gather()：按照指定的下标集合从axis=0中抽取子集

tf.gather(params, indices, validate_indices=None,name=None)

·        tf.slice(input_,begin, size, name=None)：按照指定的下标范围抽取连续区域的子集

·        tf.gather(params,indices, validate_indices=None, name=None)：按照指定的下标集合从axis=0中抽取子集，适合抽取不连续区域的子集

``input = [[[1, 1, 1], [2, 2, 2]],``
``         [[3, 3, 3], [4, 4, 4]],``
``         [[5, 5, 5], [6, 6, 6]]]``
``tf.slice(input, [1, 0, 0], [1, 1, 3]) ==> [[[3, 3, 3]]]``
``tf.slice(input, [1, 0, 0], [1, 2, 3]) ==> [[[3, 3, 3],``
``                                            [4, 4, 4]]]``
``tf.slice(input, [1, 0, 0], [2, 1, 3]) ==> [[[3, 3, 3]],``
``                                           [[5, 5, 5]]]``
``tf.gather(input, [0, 2]) ==> [[[1, 1, 1], [2, 2, 2]],``
``                              [[5, 5, 5], [6, 6, 6]]]``

indices must be an integer tensor of any dimension(usually 0-D or 1-D).

Produces an output tensor with shape indices.shape +params.shape[1:]

# Scalar indices, 会降维

output[:, …, :] = params[indices, :, … :]

# Vector indices

output[i, :, …, :] = params[indices[i], :, … :]

# Higher rank indices，会升维

output[i, …, j, :, … :] = params[indices[i, …, j],:, …, :]

·        tensor: 任意shapetensor，维度 Dn

·        modeCONSTANT表示填0REFLECT表示反射填充,SYMMETRIC表示对称填充。

·        函数原型：

（输入数据，填充的模式，填充的内容，名称）

·        举个例子：

[[ 1, 2],
[1, 2]]

paddings = [[1, 1], [1, 1]] [[上，下],[左，右]]

Tensor=[[1,2],[1,2]]

Init=tf.global_variables_initializer()

Withtf.Session() as sess:

Sess.run(init)

[[0, 0, 0, 0],
[0, 1, 2, 0],
[0, 1, 2, 0],
[0, 0, 0, 0]]

``tf.nn.softmax_cross_entropy_with_logits(logits, labels, name=None)``
``除去name参数用以指定该操作的name，与方法有关的一共两个参数：``
``第一个参数logits：就是神经网络最后一层的输出，如果有batch的话，它的大小就是[batchsize，num_classes]，单样本的话，大小就是num_classes``
``第二个参数labels：实际的标签，大小同上``
``具体的执行流程大概分为两步：``
``第一步是先对网络最后一层的输出做一个softmax，这一步通常是求取输出属于某一类的概率，对于单样本而言，输出就是一个num_classes大小的向量（[Y1，Y2,Y3...]其中Y1，Y2，Y3...分别代表了是属于该类的概率）。``
``第二步是softmax的输出向量[Y1，Y2,Y3...]和样本的实际标签做一个交叉熵。``
``第三步是求一个平均，得到我们想要的loss``
``最后上代码:``
``import tensorflow as tf  ``
``#our NN's output  ``
``logits=tf.constant([[1.0,2.0,3.0],[1.0,2.0,3.0],[1.0,2.0,3.0]])  ``
``#step1:do softmax  ``
``y=tf.nn.softmax(logits)  ``
``#true label  ``
``y_=tf.constant([[0.0,0.0,1.0],[0.0,0.0,1.0],[0.0,0.0,1.0]])  ``
``#step2:do cross_entropy  ``
``cross_entropy = -tf.reduce_sum(y_*tf.log(y))  ``
``#do cross_entropy just one step  ``
``cross_entropy2=tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits, y_))#dont forget tf.reduce_sum()!!  ``
``with tf.Session() as sess:  ``
``    softmax=sess.run(y)  ``
``    c_e = sess.run(cross_entropy)  ``
``    c_e2 = sess.run(cross_entropy2)  ``
``    print("step1:softmax result=")  ``
``    print(softmax)  ``
``    print("step2:cross_entropy result=")  ``
``    print(c_e)  ``
``    print("Function(softmax_cross_entropy_with_logits) result=")  ``
``    print(c_e2)  ``
``输出结果是：``
``step1:softmax result=  ``
``[[ 0.09003057  0.24472848  0.66524094]  ``
`` [ 0.09003057  0.24472848  0.66524094]  ``
`` [ 0.09003057  0.24472848  0.66524094]]  ``
``step2:cross_entropy result=  1.22282  ``
``Function(softmax_cross_entropy_with_logits) result=  1.2228  ``
``交叉熵可在神经网络(机器学习)中作为损失函数，p表示真实标记的分布，q则为训练后的模型的预测标记分布，交叉熵损失函数可以衡量p与q的相似性。交叉熵作为损失函数还有一个好处是使用sigmoid函数在梯度下降时能避免均方误差损失函数学习速率降低的问题，因为学习速率可以被输出的误差所控制。tensorflow中自带的函数可以轻松的实现交叉熵的计算。``
``tf.nn.softmax_cross_entropy_with_logits(_sentinel=None, labels=None, logits=None, dim=-1, name=None)``
``Computes softmax cross entropy between logits and labels.``
``注意：如果labels的每一行是one-hot表示，也就是只有一个地方为1，其他地方为0，可以使用tf.sparse_softmax_cross_entropy_with_logits()``
``警告：``
``1. 这个操作的输入logits是未经缩放的，该操作内部会对logits使用softmax操作``
``2. 参数labels,logits必须有相同的形状 [batch_size, num_classes] 和相同的类型(float16, float32, float64)中的一种``
``参数：``
``_sentinel: 一般不使用``
``labels: labels的每一行labels[i]必须为一个概率分布``
``logits: 未缩放的对数概率``
``dims: 类的维度，默认-1，也就是最后一维``
``name: 该操作的名称``
``返回值：长度为batch_size的一维Tensor``
``下面用个小例子来看看该函数的用法``
``import tensorflow as tf``
``labels = [[0.2,0.3,0.5],``
``          [0.1,0.6,0.3]]``
``logits = [[2,0.5,1],``
``          [0.1,1,3]]``
``logits_scaled = tf.nn.softmax(logits)``
``result1 = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)``
``result2 = -tf.reduce_sum(labels*tf.log(logits_scaled),1)``
``result3 = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits_scaled)``
``with tf.Session() as sess:``
``    print sess.run(result1)``
``    print sess.run(result2)``
``    print sess.run(result3)``
``>>>[ 1.41436887  1.66425455]``
``>>>[ 1.41436887  1.66425455]``
``>>>[ 1.17185783  1.17571414]``
``上述例子中，labels的每一行是一个概率分布，而logits未经缩放(每行加起来不为1)，我们用定义法计算得到交叉熵result2,和套用tf.nn.softmax_cross_entropy_with_logits()得到相同的结果, 但是将缩放后的logits_scaled输tf.nn.softmax_cross_entropy_with_logits(), 却得到错误的结果，所以一定要注意，这个操作的输入logits是未经缩放的``
``下面来看tf.nn.sparse_softmax_cross_entropy_with_logits(_sentinel=None, labels=None, logits=None, name=None)``
``这个函数与上一个函数十分类似，唯一的区别在于labels.``
``注意：``
``对于此操作，给定标签的概率被认为是排他的。labels的每一行为真实类别的索引``
``警告：``
``1. 这个操作的输入logits同样是是未经缩放的，该操作内部会对logits使用softmax操作``
``2. 参数logits的形状 [batch_size, num_classes] 和labels的形状[batch_size]``
``返回值：长度为batch_size的一维Tensor, 和label的形状相同，和logits的类型相同``
``import tensorflow as tf``
``labels = [0,2]``
``logits = [[2,0.5,1],``
``          [0.1,1,3]]``
``logits_scaled = tf.nn.softmax(logits)``
``result1 = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)``
``with tf.Session() as sess:``
``    print sess.run(result1)``
``>>>[ 0.46436879  0.17425454]``

tf.nn.sparse_softmax_cross_entropy_with_logits(logits,labels, name=None)

``def sparse_softmax_cross_entropy_with_logits(logits, labels, name=None):``
``#logits是最后一层的z（输入）``
``#A common use case is to have logits of shape `[batch_size, num_classes]` and``
``#labels of shape `[batch_size]`. But higher dimensions are supported.``
``#Each entry in `labels` must be an index in `[0, num_classes)```
``#输出：loss [batch_size]``

tf. nn.softmax_cross_entropy_with_logits(logits,targets, dim=-1, name=None)

``def softmax_cross_entropy_with_logits(logits, targets, dim=-1, name=None):``
``#`logits` and `labels` must have the same shape `[batch_size, num_classes]```
``#return loss:[batch_size], 里面保存是batch中每个样本的cross entropy``

tf.nn.sigmoid_cross_entropy_with_logits(logits,targets, name=None)

``def sigmoid_cross_entropy_with_logits(logits, targets, name=None):``
``#logits:[batch_size, num_classes],targets:[batch_size, size].logits作为用最后一层的输入就好，不需要进行sigmoid运算，函数内部进行了sigmoid操作。``
``#输出loss [batch_size, num_classes]。。。说的是logits，其实内部实现是relu``

tf.nn.nce_loss(nce_weights,nce_biases, embed, train_labels, num_sampled, vocabulary_size)

``def nce_loss(nce_weights, nce_biases, embed, train_labels, num_sampled, vocabulary_size):``
``#word2vec中用到了这个函数``
``#weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor```
``#        objects whose concatenation along dimension 0 has shape``
``#        [num_classes, dim].  The (possibly-partitioned) class embeddings.``
``#biases: A `Tensor` of shape `[num_classes]`.  The class biases.``
``#inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward``
``#        activations of the input network.``
``#labels: A `Tensor` of type `int64` and shape `[batch_size,``
``#    num_true]`. The target classes.``
``#num_sampled: An `int`.  The number of classes to randomly sample per batch.``
``#num_classes: An `int`. The number of possible classes.``
``#num_true: An `int`.  The number of target classes per training example.``

tf.nn.sequence_loss_by_example(logits,targets, weights, average_across_timesteps=True, softmax_loss_function=None,name=None):

``def sequence_loss_by_example(logits, targets, weights,``
``                             average_across_timesteps=True,``
``                             softmax_loss_function=None, name=None):``
``#logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].``
``#targets: List of 1D batch-sized int32 Tensors of the same length as logits.``
``#weights: List of 1D batch-sized float-Tensors of the same length as logits.``
``#return:log_pers 形状是 [batch_size].``
``   `for` logit, target, weight `in` zip(logits, targets, weights):``
``      `if` softmax_loss_function `is` `None`:``
``        `# TODO(irving,ebrevdo): This reshape is needed because`
``        `# sequence_loss_by_example is called with scalars sometimes, which`
``        `# violates our general scalar strictness policy.`
``        target = array_ops.reshape(target, [-`1`])``
``        crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(``
``            logit, target)``
``      `else`:``
``        crossent = softmax_loss_function(logit, target)``
``      log_perp_list.append(crossent * weight)``
``    log_perps = math_ops.add_n(log_perp_list)``
``    `if` average_across_timesteps:``
``      total_size = math_ops.add_n(weights) ``
``      total_size += `1e-12`  `# Just to avoid division by 0 for all-0 weights.`
``      log_perps /= total_size``
``  `return` log_perps``

Dropout 技术：Dropout是一个同正则化完全不同的技术，与L1L2范式正则化不同。dropout并不会修改代价函数而是修改深度网络本身。在我描述dropout的工作机制和dropout导致何种结果前，让我们假设我们正在训练如下一个网络。

我们前向传播输入项xx通过修改后的网络。然后反向传播拿到的结果通过修改后的网络。对此昨晚一个样本化的迷你批次的样本后。我们更新相应的权重和偏置。这样重复迭代处理。首先存储dropout的神经元，然后选择一个新的随机隐层神经元的子集去删除。估计不同样本批次的梯度。最后更新网络的权重和偏置。

dropout处理看起来是奇怪并且没有规律的。为什么我们希望他对正则化有帮助呢。来解释dropout到底发生了什么。我们先不要思考dropout技术。而是想象我们用一个正常的方式训练一个神经网络。特别的。假设我们训练了几个完全不同的神经网络。用的是完全相同的训练数据。当然了。因为随机初始化参数或其他原因。训练得到的结果也许是不同的。当这种情况发生的时候，我们就可以平均这几种网络的结果，或者根据相应的规则决定使用哪一种神经网络输出的结果。例如。如果我们训练了五个网络。其中三个分类一个数字为3，最终的结果就是他是3的可能性更大一些。其他的两个网络也许有些错误。这种平均的架构被发现通常是十分有用的来减少过拟合。（当然这种训练多个网络的代价也是昂贵的。）出现这种结果的原因就是不同的网络也是在不同的方式上过你和。通过平均可以排除掉这种过拟合的。

### 如何使用tensorflow内置的参数导出和导入方法：基本用法

`import` tensorflow `as` tf``
``"""``
``变量声明，运算声明 例：w = tf.get_variable(name="vari_name", shape=[], dtype=tf.float32)``
``初始化op声明``
``"""``
``#创建saver对象，它添加了一些op用来save和restore模型参数``
``saver = tf.train.Saver()``
`` ``
`with` tf.Session() `as` sess:``
``    sess.run(init_op)``
``    `#训练模型。。。`
``    `#使用saver提供的简便方法去调用 save op`
``    saver.save(sess, `"save_path/file_name.ckpt"`) `#file_name.ckpt如果不存在的话，会自动创建`
``#后缀可加可不加``

`key-value`来进行的(详见)

`import` tensorflow `as` tf``
``"""``
``变量声明，运算声明``
``初始化op声明``
``"""``
``#创建saver 对象``
``saver = tf.train.Saver()``
`with` tf.Session() `as` sess:``
``    sess.run(init_op)`#在这里，可以执行这个语句，也可以不执行，即使执行了，初始化的值也会被restore的值给override`
``    saver.restore(sess, `"save_path/file_name.ckpt-???"`) ``
``    `#会将已经保存的变量值resotre到变量中,自己看好要restore哪步的`

## 如何restore变量的子集,然后使用初始化op初始化其他变量

``#想要实现这个功能的话,必须从Saver的构造函数下手``
``saver=tf.train.Saver([sub_set])``
``init = tf.initialize_all_variables()``
`with` tf.Session() `as` sess:``
``  `#这样你就可以使用restore的变量替换掉初始化的变量的值,而其它初始化的值不受影响`
``  sess.run(init)``
``  `if` restor_from_checkpoint:``
``      saver.restore(sess,`"file.ckpt"`)``
``  `# train`
``  saver.save(sess,`"file.ckpt"`)``

## Saver

`tensorflow` 中的 `Saver` 对象是用于 参数保存和恢复的。如何使用呢？

``v1 = tf.Variable(..., name=`'v1'`)``
``v2 = tf.Variable(..., name=`'v2'`)``
``# Pass the variables as a dict:``
``saver = tf.train.Saver({`'v1'`: v1, `'v2'`: v2})``
``# Or pass them as a list.``
``saver = tf.train.Saver([v1, v2])``
``# Passing a list is equivalent to passing a dict with the variable op names``
``# as keys:``
``saver = tf.train.Saver({v.op.name: v `for` v `in` [v1, v2]})``
``#注意，如果不给Saver传var_list 参数的话， 他将已 所有可以保存的 variable作为其var_list的值。``

`import` tensorflow `as` tf``
``# Create some variables.``
``v1 = tf.Variable(`1.0`, name=`"v1"`)``
``v2 = tf.Variable(`2.0`, name=`"v2"`)``
``saver = tf.train.Saver({`"variable_1"`:v1, `"variable_2"`: v2})``
``# Use the saver object normally after that.``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    saver.save(sess, `'test-ckpt/model-2'`)``

`from` tensorflow.python.tools.inspect_checkpoint `import` print_tensors_in_checkpoint_file``
``print_tensors_in_checkpoint_file(`"test-ckpt/model-2"`, `None`, `True`)``
``# 输出:``
``#tensor_name:  variable_1``
``#1.0``
``#tensor_name:  variable_2``
``#2.0``

`import` tensorflow `as` tf``
``# Create some variables.``
``v1 = tf.Variable(`1.0`, name=`"v1"`)``
``v2 = tf.Variable(`2.0`, name=`"v2"`)``
``saver = tf.train.Saver([v1, v2])``
``# Use the saver object normally after that.``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    saver.save(sess, `'test-ckpt/model-2'`)``

``tensor_name:  v1``
``1.0``
``tensor_name:  v2``
``2.0``

`import` tensorflow `as` tf``
``# Create some variables.``
``v1 = tf.Variable(`1.0`, name=`"v1"`)``
``v2 = tf.Variable(`2.0`, name=`"v2"`)``
``saver = tf.train.Saver({`"variable_1"`:v1, `"variable_2"`: v2})``
``# Use the saver object normally after that.``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    saver.save(sess, `'test-ckpt/model-2'`)``

`save` 部分的代码如上所示，下面写 `restore` 的代码，和`save`代码有点不同。

`import` tensorflow `as` tf``
``# Create some variables.``
``v1 = tf.Variable(`1.0`, name=`"v1"`)``
``v2 = tf.Variable(`2.0`, name=`"v2"`)``
``#restore的时候，variable_1对应到v2，variable_2对应到v1，就可以实现目的了。``
``saver = tf.train.Saver({`"variable_1"`:v2, `"variable_2"`: v1})``
``# Use the saver object normally after that.``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    saver.restore(sess, `'test-ckpt/model-2'`)``
``    print(sess.run(v1), sess.run(v2))``
``# 输出的结果是 2.0 1.0，如我们所望``

·        save时，表示：`variable`的值应该保存到 `checkpoint`文件中的哪个 `key`

·        restore时，表示：`checkpoint`文件中`key`对应的值，应该`restore`到哪个`variable`

## 其它

``ckpt = tf.train.get_checkpoint_state(ckpt_dir)``
`if` ckpt `and` ckpt.model_checkpoint_path:``
``    saver.restore(sess, ckpt.model_checkpoint_path)``

Ø  VALID的方式是采用丢弃的方式，比如上述的input_width=13，只允许滑动2次，多余的元素全部丢掉；

Ø  SAME的方式是采用的是补全的方式，对于上述的情况，允许滑动3次，但是需要补3个元素，左奇右偶，在左边补一个0，右边补20

1tf.shape(a)a.get_shape()比较：

相同点：都可以得到tensor a的尺寸

可以为：tf.shape(x)[2],即取第三个值。

a.get_shape()a的数据类型只能是tensor,且返回的是一个元组（tuple）；

2，例子：

[python] view plain copy

1.  import tensorflow as tf

2.  import numpy as np

3.

4.  x=tf.constant([[1,2,3],[4,5,6]]

5.  y=[[1,2,3],[4,5,6]]

6.  z=np.arange(24).reshape([2,3,4]))

7.

8.  sess=tf.Session()

9.  # tf.shape()

10.    x_shape=tf.shape(x)                    #  x_shape 是一个tensor

11.    y_shape=tf.shape(y)                    #  <tf.Tensor ‘Shape_2:0’ shape=(2,) dtype=int32>

12.    z_shape=tf.shape(z)                    #  <tf.Tensor ‘Shape_5:0’ shape=(3,) dtype=int32>

13.    print sess.run(x_shape)              结果:[2 3]

14.    print sess.run(y_shape)              结果:[2 3]

15.    print sess.run(z_shape)              结果:[2 3 4]

16.

17.

18.    #a.get_shape()

19.    x_shape=x.get_shape()  返回的是TensorShape([Dimension(2), Dimension(3)]),不能使用 sess.run() 因为返回的不是tensor string,而是元组

20.    x_shape=x.get_shape().as_list()  可以使用 as_list()得到具体的尺寸，x_shape=[2 3]

21.    y_shape=y.get_shape()  # AttributeError: ‘list’ object has no attribute ‘get_shape’

22.    z_shape=z.get_shape()  # AttributeError: ‘numpy.ndarray’ object has no attribute ‘get_shape’

23.

Fetch的意思就是在一个会话（session）中可以同时运行多个op

24.    #coding:utf-8

25.    import tensorflow as tf

26.    #Fetch

27.    input1 = tf.constant(3.0)

28.    input2 = tf.constant(1.0)

29.    input3 = tf.constant(5.0)

31.    mul = tf.multiply(input1,add)

32.    with tf.Session() as sess:

33.        result =sess.run([mul,add]) #同时运行两个op

34.        print (result)

35.    Total memory: 10.91GiB

36.    Free memory: 10.21GiB

37.    I tensorflow/core/common_runtime/gpu/gpu_device.cc:906] DMA: 0

38.    I tensorflow/core/common_runtime/gpu/gpu_device.cc:916] 0:   Y

39.    I tensorflow/core/common_runtime/gpu/gpu_device.cc:975] CreatingTensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1080 Ti, pci busid: 0000:03:00.0)

40.    [12.0, 4.0]

Feed的字面意思是喂养，流入。在tensorflow里面就是说先声明一个或者几个tensor，先用占位符给他们留几个位置，等到后面run的时候，再以其他形式比如字典的形式把值传进去，相当于买了两个存钱罐，先不存钱，等我想存的时候我再把钱一张一张进去。

41.    #Feed

42.    #创建占位符

43.    input1 = tf.placeholder(tf.float32)

44.    input2 = tf.placeholder(tf.float32)

45.    output = tf.multiply(input1,input2)

46.

47.    with tf.Session() as sess:

48.        #feed的数据以字典的形式传入

49.        print(sess.run(output,feed_dict={input1:[7.], input2:[8.]}))

50.    I tensorflow/core/common_runtime/gpu/gpu_device.cc:975] Creating TensorFlowdevice (/gpu:0) -> (device: 0, name: GeForce GTX 1080 Ti, pci bus id:0000:03:00.0)

51.    [ 56.]

52.

## 执行sess.run()时，tensorflow是否计算了整个图。

53.    import tensorflow as tf

54.    state = tf.Variable(0.0,dtype=tf.float32)

55.    one = tf.constant(1.0,dtype=tf.float32)

56.    new_val = tf.add(state, one)

57.    update = tf.assign(state, new_val) #返回tensor值为new_val

58.    update2 = tf.assign(state, 10000) #没有fetch，便没有执行

59.    init = tf.initialize_all_variables()

60.    with tf.Session() as sess:

61.        sess.run(init)

62.        for _ in range(3):

63.            printsess.run(update)

## sess.run()中的feed_dict

64.    import tensorflow as tf

65.    y = tf.Variable(1)

66.    b = tf.identity(y)

67.    with tf.Session() as sess:

68.        tf.global_variables_initializer().run()

69.        print(sess.run(b,feed_dict={y:3})) #使用3 替换掉

70.        #tf.Variable(1)的输出结果，所以打印出来3

71.        #feed_dict{y.name:3} 和上面写法等价

72.

73.        print(sess.run(b))  #由于feed只在调用他的方法范围内有效，所以这个打印的结果是 1

74.

## 前言

Seq2seq-attention模型的原理：

## 实战代码：

1. 数据集

– source_data: 每一行是一个单词

– target_data: 每一行是经过字母排序后的“单词”，它的每一行与source_data中每一行一一对应

2. 数据预览

3. 数据预处理

·        < PAD>: 补全字符。

·        < EOS>: 解码器端的句子结束标识符。

·        < UNK>: 低频词或者一些未遇到过的词等。

·        < GO>: 解码器端的句子起始标识符。

4. 模型构建

Encoder

``tf.contrib.layers.embed_sequence(features,vocab_size=n_words, embed_dim=10)``

Decoder

·        target数据进行处理

·        构造Decoder

·        Embedding

·        构造Decoder

·        构造输出层，输出层会告诉我们每个时间序列的RNN输出结果

·        Training Decoder

·        Predicting Decoder

1. target数据处理

·        在训练过程中，我们需要将我们的target序列作为输入传给DecoderRNN的每个阶段，而不是使用前一阶段预测输出，这样会使得模型更加准确。（这就是为什么我们会构建TrainingPredicting两个Decoder的原因，下面还会有对这部分的解释）。

·        需要用target数据来计算模型的loss

2. 构造Decoder

·        target数据进行embedding

·        构造Decoder端的RNN单元。

·        构造输出层，从而得到每个时间序列上的预测结果。

·        构造training decoder

·        构造predicting decoder

decoder层的代码如下：

``# ``超参数``
``# Number of Epochs``
``epochs = 60``
``# Batch Size``
``batch_size = 128``
``# RNN Size``
``rnn_size = 50``
``# Number of Layers``
``num_layers = 2``
``# Embedding Size``
``encoding_embedding_size = 15``
``decoding_embedding_size = 15``
``# Learning Rate``
``learning_rate = 0.001``

``[['h', 'e', 'l', 'l', 'o'],``
`` ['w', 'h', 'a', 't']]``

``[['h', 'e', 'l', 'l', 'o'],``
`` ['w', 'h', 'a', 't', '<PAD>']]``

## Seq2seq的常用函数：

### Ø  tf.sampled_softmax_loss()：

``进行候选采样的时候，每次只评估所有类别的一个很小的子集，这样可以提高运算效率，在这样的方式下，计算函数的对应损失需要用到tf.sampled_softmax_loss()。``
``tf.sampled_softmax_loss()中调用了_compute_sampled_logits() ``
``#此函数和nce_loss是差不多的, 取样求loss``
``def sampled_softmax_loss(weights, #[num_classes, dim]``
``                           biases,  #[num_classes]``
``                           inputs,  #[batch_size, dim]``
``                           labels,  #[batch_size, num_true]``
``                           num_sampled,``
``                           num_classes,``
``                           num_true=1,``
``                         sampled_values=None,``
``                         remove_accidental_hits=True,``
``                         partition_strategy="mod",``
``                         name="sampled_softmax_loss"):``
``#return: [batch_size]``

### Ø  tf.nn.seq2seq.embedding_attention_seq2seq（）

``def embedding_attention_seq2seq(encoder_inputs, #[T， batch_size]``
``                                decoder_inputs, #[out_T， batch_size]``
``                                cell,``
``                                num_encoder_symbols,``
``                                num_decoder_symbols,``
``                                embedding_size,``
``                                num_heads=1, #只采用一个read head``
``                                output_projection=None,``
``                                feed_previous=False,``
``                                dtype=None,``
``                                scope=None,``
``                                initial_state_attention=False):``
``#output_projection: (W, B) W:[output_size, num_decoder_symbols]``
``#B: [num_decoder_symbols]                    ``

(1)这个函数创建了一个inputs 的 embeddingmatrix.
(2)计算了encoder的 output，并保存起来，用于计算attention

``encoder_cell = rnn_cell.EmbeddingWrapper(``
``      cell, embedding_classes=num_encoder_symbols,``
``      embedding_size=embedding_size)`# 创建了inputs的 embedding matrix`
``encoder_outputs, encoder_state = rnn.rnn(``
``      encoder_cell, encoder_inputs, dtype=dtype) `#return [T ，batch_size，size]`

（3）生成attention states

``  top_states = [array_ops.reshape(e, [-`1`, `1`, cell.output_size])``
``                `for` e `in` encoder_outputs]  `# T * batch_size * 1 * size`
``  attention_states = array_ops.concat(`1`, top_states) `# batch_size*T*size`

（4）剩下的工作交给embedding_attention_decoder,embedding_attention_decoder中创建了decoder的embeddingmatrix

``# Decoder.``
``  output_size = `None`
``  `if` output_projection `is` `None`:``
``    cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)``
``    output_size = num_decoder_symbols``
`` ``
``  `if` isinstance(feed_previous, bool):``
``    `return` embedding_attention_decoder(``
``        decoder_inputs,``
``        encoder_state,``
``        attention_states,``
``        cell,``
``        num_decoder_symbols,``
``        embedding_size,``
``        num_heads=num_heads,``
``        output_size=output_size,``
``        output_projection=output_projection,``
``        feed_previous=feed_previous,``
``        initial_state_attention=initial_state_attention)``

tf.nn.rnn_cell.EmbeddingWrapper()

embedding_attention_seq2seq中调用了这个类

``class EmbeddingWrapper(RNNCell):``
``  `def __init__(self, cell, embedding_classes, embedding_size, initializer=None):`
``  `def __call__(self, inputs, state, scope=None):#生成embedding矩阵[embedding_classes,embedding_size]`
``  `#inputs: [batch_size, 1]`
``  `#return : (output, state)`

tf.nn.rnn_cell.OutputProgectionWrapper()

``class OutputProjectionWrapper(RNNCell):``
``  `def __init__(self, cell, output_size):` `# output_size:映射后的size`
``  `def __call__(self, inputs, state, scope=None):`
``#init 返回一个带output projection的 rnn_cell``

tf.nn.seq2seq.embedding_attention_decoder()

``#生成embedding matrix ：[num_symbols, embedding_size]``
``def embedding_attention_decoder(decoder_inputs, # T*batch_size``
``                                initial_state,``
``                                attention_states,``
``                                cell,``
``                                num_symbols,``
``                                embedding_size,``
``                                num_heads=1,``
``                                output_size=None,``
``                                output_projection=None,``
``                                feed_previous=False,``
``                                update_embedding_for_previous=True,``
``                                dtype=None,``
``                                scope=None,``
``                                initial_state_attention=False):``
``#核心代码``
``  embedding = variable_scope.get_variable(`"embedding"`,``
``                                          [num_symbols, embedding_size])  `#output embedding`
``  loop_function = _extract_argmax_and_embed(``
``      embedding, output_projection,``
``      update_embedding_for_previous) `if` feed_previous `else` `None`
``  emb_inp = [``
``      embedding_ops.embedding_lookup(embedding, i) `for` i `in` decoder_inputs]``
``  `return` attention_decoder(``
``      emb_inp,``
``      initial_state,``
``      attention_states,``
``      cell,``
``      output_size=output_size,``
``      num_heads=num_heads,``
``      loop_function=loop_function,``
``      initial_state_attention=initial_state_attention)``

emb_in是embedded input ：[T, batch_size, embedding_size]

tf.nn.attention_decoder()

``def attention_decoder(decoder_inputs, #[T, batch_size, input_size]``
``                      initial_state,  #[batch_size, cell.states]``
``                      attention_states, #[batch_size , attn_length , attn_size]``
``                      cell,``
``                      output_size=None,``
``                      num_heads=1,``
``                      loop_function=None,``
``                      dtype=None,``
``                      scope=None,``
``                      initial_state_attention=False):``

(1)uti=vTtanh(W1hi+W2dt)(1)uit=vTtanh(W1hi+W2dt)

(2)sti=softmax(ati)(2)sit=softmax(ait)

(3)d′=i=1TAstihi(3)d′=∑i=1TAsithi

``hidden = array_ops.reshape(``
``      attention_states, [-`1`, attn_length, `1`, attn_size]) `#[batch_size * T * 1 * input_size]`
``  hidden_features = []``
``  v = []``
``  attention_vec_size = attn_size  `# Size of query vectors for attention.`
``  `for` a `in` xrange(num_heads):``
``    k = variable_scope.get_variable(`"AttnW_%d"` % a,``
``                                    [`1`, `1`, attn_size, attention_vec_size])``
``    hidden_features.append(nn_ops.conv2d(hidden, k, [`1`, `1`, `1`, `1`], `"SAME"`))``
``    v.append(``
``        variable_scope.get_variable(`"AttnV_%d"` % a, [attention_vec_size])) `#attention_vec_size = attn_size`

``     y = linear(query, attention_vec_size, `True`)``
``     y = array_ops.reshape(y, [-`1`, `1`, `1`, attention_vec_size])``
``     `# Attention mask is a softmax of v^T * tanh(...).`
``     s = math_ops.reduce_sum(``
``         v[a] * math_ops.tanh(hidden_features[a] + y), [`2`, `3`]) `#[batch_size, attn_length, 1, attn_size]`
``     a = nn_ops.softmax(s) `#s" [batch_size * attn_len]`
``     `# Now calculate the attention-weighted vector d.`
``     d = math_ops.reduce_sum(``
``         array_ops.reshape(a, [-`1`, attn_length, `1`, `1`]) * hidden,``
``         [`1`, `2`])``
``     ds.append(array_ops.reshape(d, [-`1`, attn_size]))``

y=W2∗dt,d=d′y=W2∗dt,d=d′

def rnn()

`from` tensorflow.python.ops `import` rnn``
``rnn.rnn()``
``def rnn(cell, inputs, initial_state=None, dtype=None,``
``        sequence_length=None, scope=None):``
``#inputs: A length T list of inputs, each a `Tensor` of shape`[batch_size, input_size]```
``#sequence_length: [batch_size], 指定sample 序列的长度``
``#return : (outputs, states), outputs: T*batch_size*output_size. states:batch_size*state``

## seq2seqModel

·        创建映射参数 proj_w, proj_b

·        声明：sampled_loss，看了word2vec的就会理解

·        声明：seq2seq_f()，构建了inputsembeddingoutputsembedding，进行核心计算

·        使用model_with_buckets(),model_with_buckets中调用了seq2seq_f sampled_loss

### model_with_buckets()

``def model_with_buckets(encoder_inputs, decoder_inputs, targets, weights,``
``                       buckets, seq2seq, softmax_loss_function=None,``
``                       per_example_loss=False, name=None):``
``"""Create a sequence-to-sequence model with support for bucketing.``
``The seq2seq argument is a function that defines a sequence-to-sequence model,``
``e.g., seq2seq = lambda x, y: basic_rnn_seq2seq(x, y, rnn_cell.GRUCell(24))``
``Args:``
``encoder_inputs: A list of Tensors to feed the encoder; first seq2seq input.``
``decoder_inputs: A list of Tensors to feed the decoder; second seq2seq input.``
``targets: A list of 1D batch-sized int32 Tensors (desired output sequence).``
``weights: List of 1D batch-sized float-Tensors to weight the targets.``
``buckets: A list of pairs of (input size, output size) for each bucket.``
``seq2seq: A sequence-to-sequence model function; it takes 2 input that agree with encoder_inputs and decoder_inputs, and returns a pair consisting of outputs and states (as, e.g., basic_rnn_seq2seq).``
``softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch``
``to be used instead of the standard softmax (the default if this is None).``
``per_example_loss: Boolean. If set, the returned loss will be a batch-sized``
``tensor of losses for each sequence in the batch. If unset, it will be``
``a scalar with the averaged loss from all examples.``
``name: Optional name for this operation, defaults to "model_with_buckets".``
``Returns:``
``A tuple of the form (outputs, losses), where:``
``outputs: The outputs for each bucket. Its j'th element consists of a list``
``of 2D Tensors. The shape of output tensors can be either``
``[batch_size x output_size] or [batch_size x num_decoder_symbols]``
``depending on the seq2seq model used.``
``losses: List of scalar Tensors, representing losses for each bucket, or,``
``if per_example_loss is set, a list of 1D batch-sized float Tensors.``
``Raises:``
``ValueError: If length of encoder_inputsut, targets, or weights is smaller``
``than the largest (last) bucket.``
``"""``

One hot representation用来表示词向量非常简单，但是却有很多问题。最大的问题是我们的词汇表一般都非常大，比如达到百万级别，这样每个词都用百万维的向量来表示简直是内存的灾难。这样的向量其实除了一个位置是1，其余的位置全部都是0，表达的效率不高，能不能把词向量的维度变小呢？

Dristributed representation可以解决One hot representation的问题，它的思路是通过训练，将每个词都映射到一个较短的词向量上来。所有的这些词向量就构成了向量空间，进而可以用普通的统计学的方法来研究词与词之间的关系。这个较短的词向量维度是多大呢？这个一般需要我们在训练时自己来指定。

比如下图我们将词汇表里的词用“Royalty”,”Masculinity”,”Femininity”“Age”4个维度来表示，King这个词对应的词向量可能是(0.99,0.99,0.05,0.7)(0.99,0.99,0.05,0.7)。当然在实际情况中，我们并不能对词向量的每个维度做一个很好的解释。

Word2Vec 代码库中关于CBOW训练的代码，其实就是神经元网路的标准反向传播算法。

Word2Vec 代码库中关于Skip-gram训练的代码，其实就是神经元网路的标准反向传播算法。

Ø  用图标来表示word2vec模型：

CBOW模型

Skip-gram模型

·        计算所有 trainablevariables 梯度

·        apply them to variables

Clip：

2.    Process the gradients as you wish. 处理梯度

3.    Apply the processed gradients withapply_gradients(). apply处理后的梯度给variables

# Create an optimizer.optimizer必须和variable在一个设备上声明

# Compute the gradients for a list of variables.

# grads_and_vars is a list of tuples (gradient,variable).  Do whatever you

# need to the ‘gradient’ part, for example cap them, etc.

capped_grads_and_vars = [(MyCapper(gv[0]), gv[1]) for gv in grads_and_vars]

# Ask the optimizer to apply the capped gradients.

#return a list of trainable variable in you model

params = tf.trainable_variables()

#create an optimizer

#compute gradients for params

tf.get_variable tf.Variable不同的一点是，前者拥有一个变量检查机制，会检测已经存在的变量是否设置为共享变量，如果已经存在的变量没有设置为共享变量，TensorFlow 运行到第二个拥有相同名字的变量的时候，就会报错。

def my_image_filter(input_images):

conv1_weights =tf.Variable(tf.random_normal([5, 5, 32, 32]),

name=”conv1_weights”)

conv1_biases =tf.Variable(tf.zeros([32]), name=”conv1_biases”)

conv1 =tf.nn.conv2d(input_images, conv1_weights,

strides=[1,1, 1, 1], padding=’SAME’)

return  tf.nn.relu(conv1 + conv1_biases)

# First call creates one set of 2 variables.

result1 = my_image_filter(image1)

# Another set of 2 variables is created in the secondcall.

result2 = my_image_filter(image2)

result1 = my_image_filter(image1)

result2 = my_image_filter(image2)

# Raises ValueError(… conv1/weights already exists …)

1

with tf.variable_scope(“image_filters”) asscope:

result1 =my_image_filter(image1)

scope.reuse_variables() # or

#tf.get_variable_scope().reuse_variables()

result2 =my_image_filter(image2)

2)

with tf.variable_scope(“image_filters1”) asscope1:

result1 =my_image_filter(image1)

with tf.variable_scope(scope1, reuse = True)

result2 =my_image_filter(image2)

with tf.variable_scope(“foo”):

withtf.name_scope(“bar”):

v = tf.get_variable(“v”, [1])

x = 1.0 + v

assert v.name == “foo/v:0”

assert x.op.name == “foo/bar/add”

`with` tf.name_scope(`"hello"`) `as` name_scope:``
``    arr1 = tf.get_variable(`"arr1"`, shape=[`2`,`10`],dtype=tf.float32)``
`` ``
``    `print` name_scope``
``    `print` arr1.name``
``    `print` `"scope_name:%s "` % tf.get_variable_scope().original_name_scope``

hello/
arr1:0
scope_name:

·        tf.name_scope() 返回的是一个string,”hello/”

·        name_scope使用 `get_variable()` 中定义的 variable name 并没有 “hello/”前缀

·        tf.get_variable_scope()original_name_scope 是空

`with` tf.variable_scope(`"hello"`) `as` variable_scope:``
``    arr1 = tf.get_variable(`"arr1"`, shape=[`2`, `10`], dtype=tf.float32)``
`` ``
``    `print` variable_scope``
``    `print` variable_scope.name `#打印出变量空间名字`
``    `print` arr1.name``
``    `print` tf.get_variable_scope().original_name_scope``
``    `#tf.get_variable_scope() 获取的就是variable_scope`
`` ``
``    `with` tf.variable_scope(`"xixi"`) `as` v_scope2:``
``        `print` tf.get_variable_scope().original_name_scope``
``        `#tf.get_variable_scope() 获取的就是v _scope2`

``<tensorflow`.python.ops.variable`_scope`.VariableScope` object at `0x7fbc09959210`>``
``hello``
``hello/arr1:`0`
``hello/``
``hello/xixi/``

·        tf.variable_scope() 返回的是一个 `VariableScope` 对象

·        variable_scope使用 `get_variable` 定义的variable name加上了”hello/”前缀

·        tf.get_variable_scope()original_name_scope 嵌套后的scopename

`with` tf.name_scope(`"name1"`):``
``    `with` tf.variable_scope(`"var1"`):``
``        w = tf.get_variable(`"w"`,shape=[`2`])``
``        res = tf.add(w,[`3`])``
`` ``
`print` w.name``
`print` res.name``
``# 输出``
``var1/w:`0`
``name1/var1/Add:`0`

·        `variable scope``name scope`都会给`op``name`加上前缀

·        这实际上是因为创建 `v``ariable_scope` 时内部会创建一个同名的 `n``ame_scope`

·        `name_scope` 返回的是 string,  `variable_scope` 返回的是对象. 这也可以感觉到`variable_scope` 能干的事情比 `name_scope` 要多.

·        name_scope get_variable()创建的变量的名字不会有任何影响,而创建的`op`会被加上前缀.

·        tf.get_variable_scope() 返回的只是 variable_scope,不管 name_scope. 所以以后我们在使用tf.get_variable_scope().reuse_variables()时可以无视name_scope

## 其它

`with` tf.name_scope(`"scope1"`) `as` scope1:``
``    `with` tf.name_scope(`"scope2"`) `as` scope2:``
``        `print` scope2``
``#输出：scope1/scope2/``
`import` tensorflow `as` tf``
`with` tf.variable_scope(`"scope1"`) `as` scope1:``
``    `with` tf.variable_scope(`"scope2"`) `as` scope2:``
``        `print` scope2.name``
``#输出:scope1/scope2``

## name_scope可以用来干什么

`import` tensorflow `as` tf``
`with` tf.name_scope(`'hidden'`) `as` scope:``
``  a = tf.constant(`5`, name=`'alpha'`)``
``  W = tf.Variable(tf.random_uniform([`1`, `2`], -`1.0`, `1.0`), name=`'weights'`)``
``  b = tf.Variable(tf.zeros([`1`]), name=`'biases'`)``
``  `print` a.name``
``  `print` W.name``
``  `print` b.name``

hidden/alpha
hidden/weights
hidden/biases
name_scope 是给op_name加前缀,variable_scope是给get_variable()创建的变量的名字加前缀。

tf.variable_scope有时也会处理命名冲突

`import` tensorflow `as` tf``
``def test(name=None):``
``    `with` tf.variable_scope(name, default_name=`"scope"`) `as` scope:``
``        w = tf.get_variable(`"w"`, shape=[`2`, `10`])``
``test()``
``test()``
``ws = tf.trainable_variables()``
`for` w `in` ws:``
``    print(w.name)``
``#scope/w:0``
``#scope_1/w:0``
``#可以看出，如果只是使用default_name这个属性来创建variable_scope``
``#的时候，会处理命名冲突``

## 其它

·        tf.name_scope(None) 有清除name scope的作用

`import` tensorflow `as` tf``
`with` tf.name_scope(`"hehe"`):``
``    w1 = tf.Variable(`1.0`)``
``    `with` tf.name_scope(`None`):``
``        w2 = tf.Variable(`2.0`)``
``print(w1.name)``
``print(w2.name)``
``#hehe/Variable:0``
``#Variable:0``

## 总结

1. 使用`tf.Variable()`的时候，`tf.name_scope()``tf.variable_scope()` 都会给 `Variable` 和 `op` 的 `name`属性加上前缀。
2. 使用`tf.get_variable()`的时候，`tf.name_scope()`就不会给 `tf.get_variable()`创建出来的`Variable`加前缀。但是 `tf.Variable()` 创建出来的就会受到 `name_scope` 的影响.

Ø  run_cell._linear()

def_linear(args,output_size, bias, bias_start=0.0, scope=None):

·        args: list of tensor [batch_size, size]. 注意,list中的每个tensorsize 并不需要一定相同,batch_size要保证一样.

·        output_size : 一个整数

·        bias: bool, True表示bias,False表示不加

·        return : [batch_size, output_size]

PS: _ref referente-typed is mutable

Ø  rnn_cell.BasicLSTMCell()

classBasicLSTMCell(RNNCell):

def__init__(self,num_units, forget_bias=1.0,input_size=None,

state_is_tuple=True, activation=tanh):

“””

It does not allow cell clipping, a projection layer, anddoes not

use peep-hole connections: it is the basic baseline.

“””

·        num_units: lstm单元的output_size

·        input_size: 这个参数没必要输入, 官方说马上也要禁用了

·        state_is_tuple: True的话, (c_state,h_state)作为tuple返回

·        activation: 激活函数

rnn_cell.GRUCell()

classGRUCell(RNNCell):

def__init__(self,num_units, input_size=None, activation=tanh):

rnn_cell.LSTMCell()

classLSTMCell(RNNCell):

def__init__(self,num_units, input_size=None,

use_peepholes=False, cell_clip=None,

initializer=None, num_proj=None, proj_clip=None,

num_unit_shards=1, num_proj_shards=1,

forget_bias=1.0, state_is_tuple=True,

activation=tanh):

·        num_proj: python Innteger ,映射输出的size, 用了这个就不需要下面那个类了

rnn_cell.OutputProjectionWrapper()

classOutputProjectionWrapper(RNNCell):

def__init__(self,cell, output_size):

·        output_size: 要映射的 size

·        return: 返回一个带有 OutputProjection Layer cell(s)

rnn_cell.InputProjectionWrapper():

classInputProjectionWrapper(RNNCell):

def__init__(self,cell, num_proj, input_size=None):

·        和上面差不多,一个输出映射,一个输入映射

rnn_cell.DropoutWrapper()

classDropoutWrapper(RNNCell):

def__init__(self,cell, input_keep_prob=1.0,output_keep_prob=1.0,

seed=None):

·        dropout

rnn_cell.EmbeddingWrapper():

classEmbeddingWrapper(RNNCell):

def__init__(self,cell, embedding_classes, embedding_size, initializer=None):

·        返回一个带有 embedding cell

rnn_cell.MultiRNNCell():

classMultiRNNCell(RNNCell):

def__init__(self,cells, state_is_tuple=True):

·        用来增加 rnn 的层数

·        cells : list of cell

·        返回一个多层的 cell

tensorflow的可视化是使用summarytensorboard合作完成的.

with tf.Session() as sess:

writer =tf.summary.FileWriter(your_dir, sess.graph)

#ops

loss = …

tf.summary.scalar(“loss”, loss)

merged_summary = tf.summary.merge_all()

init = tf.global_variable_initializer()

with tf.Session() as sess:

writer= tf.summary.FileWriter(your_dir, sess.graph)

sess.run(init)

for i in xrange(100):

_,summary =sess.run([train_op,merged_summary], feed_dict)

·        tf.summary.merge_all: 将之前定义的所有summary op整合到一起

·        FileWriter: 创建一个filewriter用来向硬盘写summary数据,

·        tf.summary.scalar(summary_tags,Tensor/variable, collections=None): 用于标量的 summary

·        tf.summary.image(tag, tensor, max_images=3,collections=None, name=None):tensor,必须4维,形状[batch_size, height, width, channels],max_images(最多只能生成3张图片的summary),觉着这个用在卷积中的kernel可视化很好用.max_images确定了生成的图片是[-max_images: ,height, width, channels]，还有一点就是，TensorBord中看到的imagesummary永远是最后一个global step

·        tf.summary.histogram(tag, values,collections=None, name=None):values,任意形状的tensor,生成直方图summary

·        tf.summary.audio(tag, tensor, sample_rate,max_outputs=3, collections=None, name=None)

FileWriter

tf.summary.FileWriter.__init__(logdir, graph=None, max_queue=10,flush_secs=120, graph_def=None)

Creates a FileWriter and an eventfile.

# max_queue: 在向disk写数据之前，最大能够缓存event的个数

# flush_secs: 每多少秒像disk中写数据，并清空对象缓存

2.    只要是在计算图上的Summary op，都会被merge_all捕捉到， 不需要考虑变量生命周期问题！

3.    如果执行一次，disk上没有保存Summary数据的话，可以尝试下file_writer.flush()

tf.summary有诸多函数：

1tf.summary.scalar

tf.summary.scalar(tags, values, collections=None,name=None)

2tf.summary.histogram

tf.summary.histogram(tags, values, collections=None,name=None)

3tf.summary.distribution

4tf.summary.text

text =”””/a/b/c\\_d/f\\_g\\_h\\_2017″””

summary_op0 = tf.summary.text(‘text’,tf.convert_to_tensor(text))

5tf.summary.image

6tf.summary.audio

7tf.summary.merge_all

merge_all 可以将所有summary全部保存到磁盘，以便tensorboard显示。如果没有特殊要求，一般用这一句就可一显示训练时的各种信息了。

8tf.summary.FileWriter

Tensorflow Summary 用法示例:

tf.summary.scalar(‘accuracy’,acc)                   #生成准确率标量图

merge_summary = tf.summary.merge_all()

train_writer = tf.summary.FileWriter(dir,sess.graph)#定义一个写入summary的目标文件，dir为写入文件地址

……(交叉熵、优化器等定义

for step in xrange(training_step):                  #训练循环

train_summary =sess.run(merge_summary,feed_dict = {…})#调用sess.run运行图，生成一步的训练过程数据

tensorboard –logdir=/summary_dir

9tf.summary.merge

tf.summary.scalar(‘accuracy’,acc)                   #生成准确率标量图

merge_summary =tf.summary.merge([tf.get_collection(tf.GraphKeys.SUMMARIES,’accuracy’),…(其他要显示的信息)])

train_writer = tf.summary.FileWriter(dir,sess.graph)#定义一个写入summary的目标文件，dir为写入文件地址

……(交叉熵、优化器等定义

for step in xrange(training_step):                  #训练循环

train_summary =sess.run(merge_summary,feed_dict = {…})#调用sess.run运行图，生成一步的训练过程数据

tf.GraphKeys.SUMMARIES  summarycollection中的标志。

acc_summary = tf.summary.scalar(‘accuracy’,acc)                   #生成准确率标量图

merge_summary = tf.summary.merge([acc_summary ,…(其他要显示的信息)])  #这里的[]不可省

tf.ConfigProto一般用在创建session的时候。用来对session进行参数配置

with tf.Session(config = tf.ConfigProto(…),…)

#tf.ConfigProto()的参数

log_device_placement=True : 是否打印设备分配日志

allow_soft_placement=True如果你指定的设备不存在，允许TF自动分配设备

tf.ConfigProto(log_device_placement=True,allow_soft_placement=True)

Ø  控制GPU资源使用率

#allow growth

config = tf.ConfigProto()

config.gpu_options.allow_growth = True

session = tf.Session(config=config, …)

# 使用allow_growthoption，刚一开始分配少量的GPU容量，然后按需慢慢的增加，由于不会释放

#内存，所以会导致碎片

# per_process_gpu_memory_fraction

gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.7)

config=tf.ConfigProto(gpu_options=gpu_options)

session = tf.Session(config=config, …)

#设置每个GPU应该拿出多少容量给进程使用，0.4代表 40%

Ø  控制使用哪块GPU

~/ CUDA_VISIBLE_DEVICES=0  python your.py#使用GPU0

~/ CUDA_VISIBLE_DEVICES=0,1 pythonyour.py#使用GPU0,1

#注意单词不要打错

#或者在程序开头

os.environ[‘CUDA_VISIBLE_DEVICES’] = ‘0’#使用 GPU 0

os.environ[‘CUDA_VISIBLE_DEVICES’] = ‘0,1’# 使用 GPU 01

batch_size: batch的大小
mini_batch: 将训练样本以batch_size分组
epoch_size: 样本分为几个min_batch
num_epoch : 训练几轮

1.    如何处理数据

2.    如何构建计算图

3.    如何计算梯度

4.    如何Summary，如何save模型参数

5.    如何执行计算图

train_set, valid_set, test_set = split_set(data)

classDataManager(object):

#raw_datatrain_set, valid_datatest_set

def__init__(self,raw_data, batch_size):

self.raw_data =raw_data

self.batch_size= batch_size

self.epoch_size= len(raw_data)/batch_size

self.counter = 0#监测batch index

defnext_batch(self):

…

self.counter +=1

return batched_x,batched_label, …

1. 因为如果我们在训练的时候加dropout的话，那么在测试的时候是需要把这个dropout层去掉的。这样的话，在写代码的时候，你就可以创建两个对象。这就相当于建了两个模型，然后让这两个模型参数共享，就可以达到训练测试一起运行的效果了。具体看下面代码。

classModel(object):

def__init__(self,is_training, config, scope,…):#scope可以使你正确的summary

self.is_training = is_training

self.config =config

#placeholder:用于feed数据

# 一个train op

self.graph(self.is_training) #构建图

self.merge_op =tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES,scope))

defgraph(self,is_training):

…

#定义计算图

self.predict =…

self.loss = …

batch_size: batch的大小
mini_batch: 将训练样本以batch_size分组
epoch_size: 样本分为几个min_batch
num_epoch : 训练几轮

#eval_op是用来指定是否需要训练模型，需要的话，传入模型的eval_op

#draw_ata用于接收train_data,valid_datatest_data

defrun_epoch(raw_data ，session, model, is_training_set, …):

data_manager =DataManager(raw_data, model.config.batch_size)

#通过is_training_set来决定fetch哪些Tensor

1.    分解原始数据为trainvalidtest

2.    设置默认图

3.    建图 trian, test 分别建图

4.    一个或多个Saver对象，用来保存模型参数

5.    创建session初始化变量

6.    一个summary.FileWriter对象，用来将summary写入到硬盘中

7.    run epoch

FileWriter 和 Saver对象，一个计算图只需要一个就够了，所以放在Model类的外面

classPTBInput(object):

“””Theinput data.”””

def__init__(self,config, data, name=None):

self.batch_size= batch_size = config.batch_size

self.num_steps= num_steps = config.num_steps

self.epoch_size= ((len(data) // batch_size) – 1) // num_steps

self.input_data, self.targets = reader.ptb_producer(

data,batch_size, num_steps, name=name)

classPTBModel(object):

“””ThePTB model.”””

def__init__(self,is_training, config, input_):

self._input =input_

batch_size =input_.batch_size

num_steps =input_.num_steps

size =config.hidden_size

vocab_size =config.vocab_size

# Slightlybetter results can be obtained with forget gate biases

#initialized to 1 but the hyperparameters of the model would need to be

# different than reported in thepaper.

lstm_cell =tf.contrib.rnn.BasicLSTMCell(

size,forget_bias=0.0, state_is_tuple=True)

if is_trainingand config.keep_prob < 1:

lstm_cell =tf.contrib.rnn.DropoutWrapper(

lstm_cell, output_keep_prob=config.keep_prob)

cell =tf.contrib.rnn.MultiRNNCell(

[lstm_cell]* config.num_layers, state_is_tuple=True)

self._initial_state = cell.zero_state(batch_size, data_type())

with tf.device(“/cpu:0”):

embedding =tf.get_variable(

“embedding”,[vocab_size, size], dtype=data_type())

inputs =tf.nn.embedding_lookup(embedding, input_.input_data)

if is_trainingand config.keep_prob < 1:

inputs =tf.nn.dropout(inputs, config.keep_prob)

#Simplified version of models/tutorials/rnn/rnn.py’s rnn().

# Thisbuilds an unrolled LSTM for tutorial purposes only.

# Ingeneral, use the rnn() or state_saving_rnn() from rnn.py.

#

# Thealternative version of the code below is:

#

# inputs =tf.unstack(inputs, num=num_steps, axis=1)

# outputs,state = tf.nn.rnn(cell, inputs,

#                           initial_state=self._initial_state)

outputs = []

state =self._initial_state

withtf.variable_scope(“RNN”):

for time_step inrange(num_steps):

if time_step> 0: tf.get_variable_scope().reuse_variables()

(cell_output, state) = cell(inputs[:, time_step, :], state)

outputs.append(cell_output)

output =tf.reshape(tf.concat_v2(outputs, 1), [-1, size])

softmax_w =tf.get_variable(

“softmax_w”, [size,vocab_size], dtype=data_type())

softmax_b =tf.get_variable(“softmax_b”, [vocab_size],dtype=data_type())

logits =tf.matmul(output, softmax_w) + softmax_b

loss =tf.contrib.legacy_seq2seq.sequence_loss_by_example(

[logits],

[tf.reshape(input_.targets, [-1])],

[tf.ones([batch_size * num_steps], dtype=data_type())])

self._cost =cost = tf.reduce_sum(loss) / batch_size

self._final_state = state

ifnotis_training:

return

self._lr =tf.Variable(0.0, trainable=False)

tvars =tf.trainable_variables()

global_step=tf.contrib.framework.get_or_create_global_step())

self._new_lr =tf.placeholder(

tf.float32,shape=[], name=“new_learning_rate”)

self._lr_update= tf.assign(self._lr, self._new_lr)

defassign_lr(self,session, lr_value):

session.run(self._lr_update, feed_dict={self._new_lr: lr_value})

defrun_epoch(session,model, eval_op=None, verbose=False):

“””Runsthe model on the given data.”””

start_time =time.time()

costs = 0.0

iters = 0

state =session.run(model.initial_state)

fetches = {

“cost”:model.cost,

“final_state”:model.final_state,

}

if eval_op isnotNone:

fetches[“eval_op”] = eval_op

for step inrange(model.input.epoch_size):

feed_dict = {}

for i, (c, h) inenumerate(model.initial_state):

feed_dict[c]= state[i].c

feed_dict[h]= state[i].h

vals =session.run(fetches, feed_dict)

cost = vals[“cost”]

state = vals[“final_state”]

costs += cost

iters +=model.input.num_steps

if verbose and step %(model.input.epoch_size // 10) == 10:

print(“%.3fperplexity: %.3f speed: %.0f wps” %

(step *1.0 / model.input.epoch_size, np.exp(costs /iters),

iters* model.input.batch_size / (time.time() – start_time)))

returnnp.exp(costs / iters)

defmain(_):

ifnotFLAGS.data_path:

raise ValueError(“Mustset –data_path to PTB data directory”)

train_data,valid_data, test_data, _ = raw_data

config =get_config()

eval_config =get_config()

eval_config.batch_size = 1

eval_config.num_steps = 1

withtf.Graph().as_default():

initializer =tf.random_uniform_initializer(-config.init_scale,

config.init_scale)

withtf.name_scope(“Train”):

train_input =PTBInput(config=config, data=train_data, name=“TrainInput”)

withtf.variable_scope(“Model”, reuse=None,initializer=initializer):

m =PTBModel(is_training=True, config=config,input_=train_input)

tf.contrib.deprecated.scalar_summary(“TrainingLoss”, m.cost)

tf.contrib.deprecated.scalar_summary(“LearningRate”, m.lr)

withtf.name_scope(“Valid”):

valid_input =PTBInput(config=config, data=valid_data, name=“ValidInput”)

withtf.variable_scope(“Model”, reuse=True,initializer=initializer):

mvalid =PTBModel(is_training=False,config=config, input_=valid_input)

tf.contrib.deprecated.scalar_summary(“ValidationLoss”, mvalid.cost)

withtf.name_scope(“Test”):

test_input =PTBInput(config=eval_config, data=test_data, name=“TestInput”)

withtf.variable_scope(“Model”, reuse=True,initializer=initializer):

mtest =PTBModel(is_training=False,config=eval_config,

input_=test_input)

sv =tf.train.Supervisor(logdir=FLAGS.save_path)

withsv.managed_session() as session:

for i inrange(config.max_max_epoch):

lr_decay =config.lr_decay ** max(i + 1 – config.max_epoch, 0.0)

m.assign_lr(session, config.learning_rate * lr_decay)

print(“Epoch:%d Learning rate: %.3f” % (i + 1,session.run(m.lr)))

train_perplexity = run_epoch(session, m, eval_op=m.train_op,

verbose=True)

print(“Epoch:%d Train Perplexity: %.3f” % (i + 1,train_perplexity))

valid_perplexity = run_epoch(session, mvalid)

print(“Epoch:%d Valid Perplexity: %.3f” % (i + 1,valid_perplexity))

test_perplexity = run_epoch(session, mtest)

print(“TestPerplexity: %.3f” % test_perplexity)

ifFLAGS.save_path:

print(“Savingmodel to %s.” % FLAGS.save_path)

sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)

if __name__ == “__main__”:

tf.app.run()

tensorflowcollection提供一个全局的存储机制，不会受到变量名生存空间的影响。一处保存，到处可取。

#collection中存数据

#Stores value in the collection with the given name.

#Note that collections are not sets, so it is possible toadd a value to a collection

#several times.

#tf.get_collection(“haha”)获得的是 [[a,b]], 并不是[a,b]

#这个和上面函数功能上没有区别，区别是，这个函数是给默认图使用的

#collection中获取数据

tf.Graph.get_collection(name, scope=None)

1.   在训练深度神经网络的时候，我们经常会使用Dropout，然而在`test`的时候，需要把`dropout`撤掉.为了应对这种问题，我们通常要建立两个模型，让他们共享变量。详情.也可以通过设置 train_flag, 这里只讨论第一个方法可能会碰到的问题.

2.   为了使用`Tensorboard`来可视化我们的数据，我们会经常使用`Summary`，最终都会用一个简单的`merge_all`函数来管理我们的`Summary`

## 错误示例

`import` tensorflow `as` tf``
`import` numpy `as` np``
``class Model(object):``
``    `def __init__(self):`
``        self.graph()``
``        self.merged_summary = tf.summary.merge_all()`# 引起血案的地方`
``    `def graph(self):`
``        self.x = tf.placeholder(dtype=tf.float32,shape=[`None`,`1`])``
``        self.label = tf.placeholder(dtype=tf.float32, shape=[`None`,`1`])``
``        w = tf.get_variable(`"w"`,shape=[`1`,`1`])``
``        self.predict = tf.matmul(self.x,w)``
``        self.loss = tf.reduce_mean(tf.reduce_sum(tf.square(self.label-self.predict),axis=`1`))``
``        self.train_op = tf.train.GradientDescentOptimizer(`0.01`).minimize(self.loss)``
``        tf.summary.scalar(`"loss"`,self.loss)``
``def run_epoch(session, model):``
``    x = np.random.rand(`1000`).reshape(-`1`,`1`)``
``    label = x*`3`
``    feed_dic = {model.x.name:x, model.label:label}``
``    su = session.run([model.merged_summary], feed_dic)``
``def main():``
``    `with` tf.Graph().as_default():``
``        `with` tf.name_scope(`"train"`):``
``            `with` tf.variable_scope(`"var1"`,dtype=tf.float32):``
``                model1 = Model()``
``        `with` tf.name_scope(`"test"`):``
``            `with` tf.variable_scope(`"var1"`,reuse=`True`,dtype=tf.float32):``
``                model2 = Model()``
``        `with` tf.Session() `as` sess:``
``            tf.global_variables_initializer().run()``
``            run_epoch(sess,model1)``
``            run_epoch(sess,model2)``
`if` __name__ == `"__main__"`:``
``    main()``

## 错误原因

``class Model(object):``
``    `def __init__(self):`
``        self.graph()``
``        self.merged_summary = tf.summary.merge_all()`# 引起血案的地方`
``...``
`with` tf.name_scope(`"train"`):``
``    `with` tf.variable_scope(`"var1"`,dtype=tf.float32):``
``        model1 = Model() `# 这里的merge_all只是管理了自己的summary`
`with` tf.name_scope(`"test"`):``
``    `with` tf.variable_scope(`"var1"`,reuse=`True`,dtype=tf.float32):``
``        model2 = Model()`# 这里的merge_all管理了自己的summary和上边模型的Summary`

## 解决方法

``class Model(object):``
``    `def __init__(self，scope):`
``        self.graph()``
``        self.merged_summary = tf.summary.merge(``
``        tf.get_collection(tf.GraphKeys.SUMMARIES,scope)``
``        )``
``...``
`with` tf.Graph().as_default():``
``    `with` tf.name_scope(`"train"`) `as` train_scope:``
``        `with` tf.variable_scope(`"var1"`,dtype=tf.float32):``
``            model1 = Model(train_scope)``
``    `with` tf.name_scope(`"test"`) `as` test_scope:``
``        `with` tf.variable_scope(`"var1"`,reuse=`True`,dtype=tf.float32):``
``            model2 = Model(test_scope)``

## error

tensorflow.python.framework.errors_impl.InvalidArgumentError:You must feed a value for placeholder tensor ‘train/var1/Placeholder’ withdtype float
[Node:train/var1/Placeholder = Placeholder[dtype=DT_FLOAT, shape=[],_device=”/job:localhost/replica:0/task:0/gpu:0”]]

#wrong

import tensorflow as tf

w1 = tf.Variable([[1,2]])

w2 = tf.Variable([[3,4]])

res = tf.matmul(w1, [[2],[1]])

with tf.Session() as sess:

tf.global_variables_initializer().run()

print(re)

TypeError: Fetch argument None has invalid type

# right

import tensorflow as tf

w1 = tf.Variable([[1,2]])

w2 = tf.Variable([[3,4]])

res = tf.matmul(w1, [[2],[1]])

with tf.Session() as sess:

tf.global_variables_initializer().run()

print(re)

#  [array([[2, 1]],dtype=int32)]

import tensorflow as tf

w1 = tf.get_variable(‘w1’, shape=[3])

w2 = tf.get_variable(‘w2’, shape=[3])

w3 = tf.get_variable(‘w3’, shape=[3])

w4 = tf.get_variable(‘w4’, shape=[3])

z1 = w1 + w2+ w3

z2 = w3 + w4

tf.convert_to_tensor([3.,2.,4.])])

with tf.Session() as sess:

tf.global_variables_initializer().run()

[array([ 2.2.3.],dtype=float32),

array([ 2.2.3.],dtype=float32),

array([ 5.4.7.],dtype=float32),

array([ 3.2.4.],dtype=float32)]

import tensorflow as tf

w1 = tf.Variable(2.0)

w2 = tf.Variable(2.0)

a = tf.multiply(w1, 3.0)

# b=w1*3.0*w2

b = tf.multiply(a_stoped, w2)

#输出

a = tf.Variable(1.0)

b = tf.Variable(1.0)

c = tf.add(a, b)

d = tf.add(a, b)

e = tf.add(c_stoped, d)

with tf.Session() as sess:

tf.global_variables_initializer().run()

#输出 [1.0, 1.0]

import tensorflow as tf

w1 = tf.Variable(2.0)

w2 = tf.Variable(2.0)

a = tf.multiply(w1, 3.0)

# b=w1*3.0*w2

b = tf.multiply(a_stoped, w2)

#其它地方都会运行正常，无论是梯度的计算还是变量的更新。总觉着tensorflow这么设计有点不好，

#不如改成流过去的梯度为0

with tf.Session() as sess:

tf.global_variables_initializer().run()

print(sess.run(train_op))

print(sess.run([w1, w2]))

Ø  高阶导数

tensorflow 求 高阶导数可以使用 tf.gradients 来实现

import tensorflow as tf

with tf.device(‘/cpu:0’):

a =tf.constant(1.)

b = tf.pow(a, 2)

with tf.Session() as sess:

Note: 有些 op，tf 没有实现其高阶导的计算，例如 tf.add …, 如果计算了一个没有实现 高阶导的 op的高阶导， gradients 会返回 None。

## 结构

1.   先构建单GPU代码

2.   写个函数`multi_gpu_model(num_gpus)`来生成多`GPU`代码，并将对象保存在`collection`

3.   feed data

4.   run

## 如何实现`multi_gpu_model`函数

``def multi_gpu_model(num_gpus=1):``
``  grads = []``
``  `for` i `in` range(num_gpus):``
``    `with` tf.device(`"/gpu:%d"`%i):``
``      `with` tf.name_scope(`"tower_%d"`%i):``
``        model = Model(is_training, config, scope)``
``        `# 放到collection中，方便feed的时候取`
``        tf.add_to_collection(`"train_model"`, model)``
``        grads.append(model.grad) `#grad 是通过tf.gradients(loss, vars)求得`
``        `#以下这些add_to_collection可以直接在模型内部完成。`
``        `# 将loss放到 collection中， 方便以后操作`
``        tf.add_to_collection(`"loss"`,model.loss)``
``        `#将predict放到collection中，方便操作`
``        tf.add_to_collection(`"predict"`, model.predict)``
``        `#将 summary.merge op放到collection中，方便操作`
``        tf.add_to_collection(`"merge_summary"`, model.merge_summary)``
``        `# ...`
``  `with` tf.device(`"cpu:0"`):``
``    averaged_gradients = average_gradients(grads)`# average_gradients后面说明`
``    opt = tf.train.GradientDescentOptimizer(learning_rate)``
``    train_op=opt.apply_gradients(zip(average_gradients,tf.trainable_variables()))``
`` ``
``  `return` train_op``

## 如何`feed data`

``def generate_feed_dic(model, feed_dict, batch_generator):``
``  x, y = batch_generator.next_batch()``
``  feed_dict[model.x] = x``
``  feed_dict[model.y] = y``

## 如何实现run_epoch

``#这里的scope是用来区别 train 还是 test``
``def run_epoch(session, data_set, scope, train_op=None, is_training=True):``
``  batch_generator = BatchGenerator(data_set, batch_size)``
``  ...``
``  ...``
``  `if` is_training `and` train_op `is` `not` `None`:``
``    models = tf.get_collection(`"train_model"`)``
``    `# 生成 feed_dict`
``    feed_dic = {}``
``    `for` model `in` models:``
``      generate_feed_dic(model, feed_dic, batch_generator)``
``    `#生成fetch_dict`
``    losses = tf.get_collection(`"loss"`, scope)`#保证了在 test的时候，不会fetch train的loss`
``    ...``
``    ...``

## main函数

main函数干了以下几件事：
1. 数据处理
2. 建立多GPU训练模型
3. 建立单/多GPU测试模型
4. 创建`Saver`对象和`FileWriter`对象
5. 创建`session`
6. run_epoch

``data_process()``
`with` tf.name_scope(`"train"`) `as` train_scope:``
``  train_op = multi_gpu_model(..)``
`with` tf.name_scope(`"test"`) `as` test_scope:``
``  model = Model(...)``
``saver = tf.train.Saver()``
``# 建图完毕，开始执行运算``
`with` tf.Session() `as` sess:``
``  writer = tf.summary.FileWriter(...)``
``  ...``
``  run_epoch(...,train_scope)``
``  run_epoch(...,test_scope)``

``def average_gradients(grads):#grads:[[grad0, grad1,..], [grad0,grad1,..]..]``
``  averaged_grads = []``
``  `for` grads_per_var `in` zip(*grads):``
``    grads = []``
``    `for` grad `in` grads_per_var:``
``      expanded_grad = tf.expanded_dim(grad,`0`)``
``      grads.append(expanded_grad)``
``    grads = tf.concat_v2(grads, `0`)``
``    grads = tf.reduce_mean(grads, `0`)``
``    averaged_grads.append(grads)``
`` ``
``  `return` averaged_grads``

``def average_gradients(grads):#grads:[[grad0, grad1,..], [grad0,grad1,..]..]``
``  averaged_grads = []``
``  `for` grads_per_var `in` zip(*grads):``
``    grads = tf.reduce_mean(grads_per_var, `0`)``
``    averaged_grads.append(grads)``
``  `return` averaged_grads``

deconv解卷积，实际是叫做conv_transposeconv_transpose实际是卷积的一个逆向过程，tf 中， 编写conv_transpose代码的时候，心中想着一个正向的卷积过程会很有帮助。

input_shape = [1,5,5,3]
kernel_shape=[2,2,3,1]
strides=[1,2,2,1]

import tensorflow as tf

tf.set_random_seed(1)

x = tf.random_normal(shape=[1,3,3,1])

#正向卷积的kernel的模样

kernel = tf.random_normal(shape=[2,2,3,1])

y = tf.nn.conv2d_transpose(x,kernel,output_shape=[1,5,5,3],

# 在这里，output_shape=[1,6,6,3]也可以，考虑正向过程，[1,6,6,3]

# 通过kernel_shape:[2,2,3,1],strides:[1,2,2,1]也可以

# 获得x_shape:[1,3,3,1]

# output_shape 也可以是一个 tensor

sess = tf.Session()

tf.global_variables_initializer().run(session=sess)

print(y.eval(session=sess))

conv2d_transpose 中会计算 output_shape 能否通过给定的参数计算出 inputs的维度，如果不能，则报错

import tensorflow as tf

from tensorflow.contrib import slim

inputs = tf.random_normal(shape=[3, 97, 97, 10])

conv1 = slim.conv2d(inputs, num_outputs=20,kernel_size=3, stride=4)

de_weight = tf.get_variable(‘de_weight’, shape=[3, 3, 10, 20])

deconv1 = tf.nn.conv2d_transpose(conv1, filter=de_weight,output_shape=tf.shape(inputs),

strides=[1, 3, 3, 1], padding=‘SAME’)

# ValueError: Shapes (3, 33, 33, 20) and (3, 25, 25, 20)are not compatible

·        conv1 shape (3, 25, 25, 20)

·        但是 deconv1 conv1 求导的时候，得到的导数 shape 却是 [3, 33, 33, 20]，这个和 conv1 shape 不匹配，当然要报错咯。

import tensorflow as tf

from tensorflow.contrib import slim

import numpy as np

inputs = tf.placeholder(tf.float32, shape=[None, None, None, 3])

conv1 = slim.conv2d(inputs, num_outputs=20,kernel_size=3, stride=4)

de_weight = tf.get_variable(‘de_weight’, shape=[3, 3, 3, 20])

deconv1 = tf.nn.conv2d_transpose(conv1, filter=de_weight,output_shape=tf.shape(inputs),

strides=[1, 3, 3, 1], padding=‘SAME’)

loss = deconv1 – inputs

with tf.Session() as sess:

tf.global_variables_initializer().run()

for i in range(10):

data_in =np.random.normal(size=[3, 97, 97, 3])

_, los_ =sess.run([train_op, loss], feed_dict={inputs: data_in})

print(los_)

# InvalidArgumentError (see above for traceback):Conv2DSlowBackpropInput: Size of out_backprop doesn’t match computed: actual =25, computed = 33

·        conv1  shape 第二维或第三维的 shape 25

·        但是 deconv1 conv1 求导的时候，得到的倒数 shape 的第二位或第三维却是 33

deconv 求导就相当于 拿着 conv_transpose 中的参数对 deconv 输出的值的导数做卷积。

·        传入 tensor

# 可以用 placeholder

outputs_shape = tf.placeholder(dtype=tf.int32, shape=[4])

deconv1 = tf.nn.conv2d_transpose(conv1, filter=de_weight,output_shape=output_shape,

strides=[1, 3, 3, 1], padding=‘SAME’)

# 可以用 inputs shape，但是有点改变

inputs_shape = tf.shape(inputs)

outputs_shape = [inputs_shape[0],inputs_shape[1], inputs_shape[2],some_value]

deconv1 = tf.nn.conv2d_transpose(conv1, filter=de_weight,output_shape=outputs_shape,

strides=[1, 3, 3, 1], padding=‘SAME’)

# ExponentialMovingAverage

Sometraining algorithms, such as GradientDescent and Momentum often benefit frommaintaining a moving average of variables during optimization. Using the movingaverages for evaluations often improve results significantly.
`tensorflow` 官网上对于这个方法功能的介绍。`GradientDescent` 和 `Momentum` 方式的训练 都能够从 `ExponentialMovingAverage` 方法中获益。

{a1,a2,a3,…,at1,at,…}{a1,a2,a3,…,at−1,at,…}

,那么，这串时间序列的 `MovingAverage` 就是：

mvt=decaymvt1+(1decay)atmvt=decaymvt−1+(1−decay)at

mvtmvt1mvt2=(1decay)at+decaymvt1=(1decay)at1+decaymvt2=(1decay)at2+decaymvt3mvt=(1−decay)at+decaymvt−1mvt−1=(1−decay)at−1+decaymvt−2mvt−2=(1−decay)at−2+decaymvt−3…

mvt=i=1tdecayti(1decay)aimvt=∑i=1tdecayt−i(1−decay)ai

ti>Ct−i>C CC 为某足够大的数时

decayti(1decay)ai0decayt−i(1−decay)ai≈0

, 所以

mvti=tCtdecayti(1decay)aimvt≈∑i=t−Ctdecayt−i(1−decay)ai

。即， mvtmvt 的值只和 {atC,…,at}{at−C,…,at} 有关。

## tensorflow中的ExponentialMovingAverage

,就知道各代表什么意思了。
`shadowvariables are created with trainable=False`。用其来存放 ema 的值

`import` tensorflow `as` tf``
``w = tf.Variable(`1.0`)``
``ema = tf.train.ExponentialMovingAverage(`0.9`)``
``update = tf.assign_add(w, `1.0`)``
`with` tf.control_dependencies([update]):``
``    `#返回一个op,这个op用来更新moving_average,i.e. shadow value`
``    ema_op = ema.apply([w])`#这句和下面那句不能调换顺序`
``# 以 w 当作 key， 获取 shadow value 的值``
``ema_val = ema.average(w)`#参数不能是list，有点蛋疼`
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    `for` i `in` range(`3`):``
``        sess.run(ema_op)``
``        print(sess.run(ema_val))``
``# 创建一个时间序列 1 2 3 4``
``#输出：``
``#1.1      =0.9*1 + 0.1*2``
``#1.29     =0.9*1.1+0.1*3``
``#1.561    =0.9*1.29+0.1*4``

``# Create variables.``
``var0 = tf.Variable(...)``
``var1 = tf.Variable(...)``
``# ... use the variables to build a training model...``
``...``
``# Create an op that applies the optimizer.  This is what we usually``
``# would use as a training op.``
``opt_op = opt.minimize(my_loss, [var0, var1])``
``# Create an ExponentialMovingAverage object``
``ema = tf.train.ExponentialMovingAverage(decay=`0.9999`)``
``# Create the shadow variables, and add ops to maintain moving averages``
``# of var0 and var1.``
``maintain_averages_op = ema.apply([var0, var1])``
``# Create an op that will update the moving averages after each training``
``# step.  This is what we will use in place of the usual training op.``
`with` tf.control_dependencies([opt_op]):``
``    training_op = tf.group(maintain_averages_op)``
``    `# run这个op获取当前时刻 ema_value`
``    get_var0_average_op = ema.average(var0)``

## 使用 ExponentialMovingAveraged parameters

``# Create a Saver that loads variables from their saved shadow values.``
``shadow_var0_name = ema.average_name(var0)``
``shadow_var1_name = ema.average_name(var1)``
``saver = tf.train.Saver({shadow_var0_name: var0, shadow_var1_name: var1})``
``saver.restore(...checkpoint filename...)``
``# var0 and var1 now hold the moving average values``

``#Returns a map of names to Variables to restore.``
``variables_to_restore = ema.variables_to_restore()``
``saver = tf.train.Saver(variables_to_restore)``
``...``
``saver.restore(...checkpoint filename...)``

# Saver

`tensorflow` 中的 `Saver` 对象是用于 参数保存和恢复的。如何使用呢？

``v1 = tf.Variable(..., name=`'v1'`)``
``v2 = tf.Variable(..., name=`'v2'`)``
``# Pass the variables as a dict:``
``saver = tf.train.Saver({`'v1'`: v1, `'v2'`: v2})``
``# Or pass them as a list.``
``saver = tf.train.Saver([v1, v2])``
``# Passing a list is equivalent to passing a dict with the variable op names``
``# as keys:``
``saver = tf.train.Saver({v.op.name: v `for` v `in` [v1, v2]})``
``#注意，如果不给Saver传var_list 参数的话， 他将已 所有可以保存的 variable作为其var_list的值。``

`import` tensorflow `as` tf``
``# Create some variables.``
``v1 = tf.Variable(`1.0`, name=`"v1"`)``
``v2 = tf.Variable(`2.0`, name=`"v2"`)``
``saver = tf.train.Saver({`"variable_1"`:v1, `"variable_2"`: v2})``
``# Use the saver object normally after that.``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    saver.save(sess, `'test-ckpt/model-2'`)``

`from` tensorflow.python.tools.inspect_checkpoint `import` print_tensors_in_checkpoint_file``
``print_tensors_in_checkpoint_file(`"test-ckpt/model-2"`, `None`, `True`)``
``# 输出:``
``#tensor_name:  variable_1``
``#1.0``
``#tensor_name:  variable_2``
``#2.0``

`import` tensorflow `as` tf``
``# Create some variables.``
``v1 = tf.Variable(`1.0`, name=`"v1"`)``
``v2 = tf.Variable(`2.0`, name=`"v2"`)``
``saver = tf.train.Saver([v1, v2])``
``# Use the saver object normally after that.``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    saver.save(sess, `'test-ckpt/model-2'`)``

``tensor_name:  v1``
``1.0``
``tensor_name:  v2``
``2.0``

`import` tensorflow `as` tf``
``# Create some variables.``
``v1 = tf.Variable(`1.0`, name=`"v1"`)``
``v2 = tf.Variable(`2.0`, name=`"v2"`)``
``saver = tf.train.Saver({`"variable_1"`:v1, `"variable_2"`: v2})``
``# Use the saver object normally after that.``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    saver.save(sess, `'test-ckpt/model-2'`)``

`save` 部分的代码如上所示，下面写 `restore` 的代码，和`save`代码有点不同。

`````python``
`import` tensorflow `as` tf``
``# Create some variables.``
``v1 = tf.Variable(`1.0`, name=`"v1"`)``
``v2 = tf.Variable(`2.0`, name=`"v2"`)``
``#restore的时候，variable_1对应到v2，variable_2对应到v1，就可以实现目的了。``
``saver = tf.train.Saver({`"variable_1"`:v2, `"variable_2"`: v1})``
``# Use the saver object normally after that.``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    saver.restore(sess, `'test-ckpt/model-2'`)``
``    print(sess.run(v1), sess.run(v2))``
``# 输出的结果是 2.0 1.0，如我们所望``

·        save时，表示：`variable`的值应该保存到 `checkpoint`文件中的哪个 `key`

·        restore时，表示：`checkpoint`文件中`key`对应的值，应该`restore`到哪个`variable`

## 其它

``ckpt = tf.train.get_checkpoint_state(ckpt_dir)``
`if` ckpt `and` ckpt.model_checkpoint_path:``
``    saver.restore(sess, ckpt.model_checkpoint_path)``

## tf.cond(pred, fn1, fn2,name=None)

``res = fn1() `if` pred `else` fn2()``

``z = tf.mul(a, b)``
``result = tf.cond(x < y, `lambda`: tf.add(x, z), `lambda`: tf.square(y))``

## tf.case(pred_fn_pairs,default, exclusive=False, name=’case’)

`pred_fn_pairs`:以下两种形式都是正确的
1. [(pred_1, fn_1), (pred_2, fn_2)]
2. {pred_1:fn_1, pred_2:fn_2}

`tf.case()`等价于:

`if` pred_1:``
``  `return` fn_1()``
`elif` pred_2:``
``  `return` fn_2()``
`else`:``
``  `return` default()``

·        exclusive: 如果为True，那么pred至多有一个为True，如果有多余一个，会报错。如果False，则不会检查所有条件。

`import` tensorflow `as` tf``
``x = tf.constant(`0`)``
``y = tf.constant(`1`)``
``z = tf.constant(`2`)``
`def f1():` `return` tf.constant(`17`)``
`def f2():` `return` tf.constant(`23`)``
`def f3():` `return` tf.constant(-`1`)``
``r = tf.case({tf.less(x, y): f2, tf.less(x, z): f1},``
``         default=f3, exclusive=`False`)``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    print(sess.run(r))``

## tf.group()与 tf.tuple()

``w = tf.Variable(`1`)``
``mul = tf.multiply(w, `2`)``
``add = tf.add(w, `2`)``
``group = tf.group(mul, add)``
``tuple = tf.tuple([mul, add])``
``# sess.run(group)和sess.run(tuple)都会求Tensor(add)``
``#Tensor(mul)的值。区别是，tf.group()返回的是`op```
``#tf.tuple()返回的是list of tensor。``
``#这样就会导致，sess.run(tuple)的时候，会返回 Tensor(mul),Tensor(add)的值.``
``#而 sess.run(group)不会``

## tf.identity()

http://stackoverflow.com/questions/34877523/in-tensorflow-what-is-tf-identity-used-for

## tf.while_loop()

`tf.while_loop(cond, body, loop_vars,shape_invariants=None, parallel_iterations=10, back_prop=True,swap_memory=False, name=None)`

`while_loop`可以这么理解

``loop_vars = [...]``
`while` cond(*loop_vars):``
``    loop_vars = body(*loop_vars)    ``

`import` tensorflow `as` tf``
``a = tf.get_variable(`"a"`, dtype=tf.int32, shape=[], initializer=tf.ones_initializer())``
``b = tf.constant(`2`)``
``f = tf.constant(`6`)``
``# Definition of condition and body``
``def cond(a, b, f):``
``    `return` a < `3`
``def body(a, b, f):``
``    `# do some stuff with a, b`
``    a = a + `1`
``    `return` a, b, f``
``# Loop, 返回的tensor while 循环后的 a，b，f；[a,b,f]是返回值。``
``a, b, f = tf.while_loop(cond, body, [a, b, f])``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    res = sess.run([a, b, f])``
``    print(res)``

learning rate decay

2.然后通过迭代逐步减小学习率(目的：为使模型在训练后期更加稳定);

[html] view plain copy

1.  decayed_learning_rate=learining_rate*decay_rate^(global_step/decay_steps)

learning_rate为事先设定的初始学习率；

decay_rate为衰减系数；

decay_steps为衰减速度。

[html] view plain copy

1.  global_step = tf.Variable(0)

2.

3.  learning_rate = tf.train.exponential_decay(0.1, global_step, 100, 0.96, staircase=True)     #生成学习率

4.

5.  learning_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(….., global_step=global_step)  #使用指数衰减学习率
learning_rate：0.1；staircase=True;则每100轮训练后要乘以0.96.

tf.train.exponential_decay(learning_rate, global_step,decay_steps, decay_rate, staircase=False, name=None)

learning_rate : 初始的learning rate
global_step : 全局的step，与 decay_step 和 decay_rate一起决定了 learning rate的变化。
staircase : 如果为 True global_step/decay_step 向下取整

decayed_learning_rate = learning_rate *

decay_rate ^ (global_step / decay_steps)

import tensorflow as tf

global_step = tf.Variable(0, trainable=False)

initial_learning_rate = 0.1#初始学习率

learning_rate =tf.train.exponential_decay(initial_learning_rate,

global_step=global_step,

decay_steps=10,decay_rate=0.9)

with tf.Session() as sess:

tf.global_variables_initializer().run()

print(sess.run(learning_rate))

for i in range(10):

_, rate =sess.run([add_global, learning_rate])

print(rate)

import tensorflow as tf

global_step = tf.Variable(0, trainable=False)

initial_learning_rate = 0.1#初始学习率

learning_rate =tf.train.exponential_decay(initial_learning_rate,

global_step=global_step,

decay_steps=10,decay_rate=0.9)

train_op =opt.minimise(loss)

with tf.Session() as sess:

tf.global_variables_initializer().run()

print(sess.run(learning_rate))

for i in range(10):

_=sess.run(train_op)

print(rate)

LSTM 需要 initial state。一般情况下，我们都会使用 lstm_cell.zero_state()来获取 initial state。但有些时候，我们想要给 lstm_cell 的 initial state 赋予我们想要的值，而不是简单的用 0 来初始化，那么，应该怎么做呢？

LSTMStateTuple(c ,h)

fromtensorflow.contrib.rnn.python.ops.core_rnn_cell_impl importLSTMStateTuple

c_state = …

h_state = …

# c_state , h_state 都为Tensor，是需要初始化的值。

initial_state = LSTMStateTuple(c_state, h_state)

tensorflow Regularizers

tensorflow中对参数使用正则项分为两步:
1. 创建一个正则方法(函数/对象)
2. 将这个正则方法(函数/对象),应用到参数上

tf.contrib.layers.l1_regularizer(scale,scope=None)

·        scale: 正则项的系数.

·        scope: 可选的scope name

tf.contrib.layers.l2_regularizer(scale,scope=None)

tf.contrib.layers.sum_regularizer(regularizer_list,scope=None)

regularizer_list: regulizer的列表

tf.contrib.layers.apply_regularization(regularizer,weights_list=None)

·        regularizer:就是我们上一步创建的正则化方法

·        weights_list: 想要执行正则化方法的参数列表,如果为None的话,就取GraphKeys.WEIGHTS中的weights.

tensorflow中的Tensor是保存了计算这个值的路径(方法),当我们run的时候,tensorflow后端就通过路径计算出Tensor对应的值

import tensorflow as tf

from tensorflow.contrib import layers

regularizer = layers.l1_regularizer(0.1)

with tf.variable_scope(‘var’,initializer=tf.random_normal_initializer(),

regularizer=regularizer):

weight =tf.get_variable(‘weight’, shape=[8],initializer=tf.ones_initializer())

with tf.variable_scope(‘var2’,initializer=tf.random_normal_initializer(),

regularizer=regularizer):

weight2 =tf.get_variable(‘weight’, shape=[8],initializer=tf.ones_initializer())

regularization_loss =tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

#coding:utf-8

import tensorflow as tf

defget_weight(shape,lambda):

var =tf.Variable(tf.random_normal(shape),dtype=tf.float32)

return var

x = tf.placeholder(tf.float32,shape=(None,2))

y_ = tf.placeholder(tf.float32,shape=(none,1))#真值

batcg_size = 8

layer_dimension = [2,10,10,10,1]#神经网络层节点的个数

n_layers = len(layer_dimension)#神经网络的层数

cur_layer = x

in_dimension = layer_dimension[0]

for i in range (1,n_layers):

out_dimension =layer_dimension[i]

weight =get_weight([in_dimension,out_dimension],0.001)

bias = tf.Variable(tf.constant(0.1,shape(out_dimension)))

cur_layer =tf.nn.relu(tf.matmul(x,weight)) + bias)

in_dimension =layer_dimension[i]

ses_loss = tf.reduce_mean(tf.square(y_ – cur_layer))#计算最终输出与标准之间的loss

#tf.get_collection返回一个列表,内容是这个集合的所有元素

## 单层双向rnn

`tensorflow`中已经提供了双向`rnn`的接口,它就是`tf.nn.bidirectional_dynamic_rnn()`. 我们先来看一下这个接口怎么用.

``bidirectional_dynamic_rnn(``
``    cell_fw, `#前向 rnn cell`
``    cell_bw, `#反向 rnn cell`
``    inputs, `#输入序列.`
``    sequence_length=`None`,`# 序列长度`
``    initial_state_fw=`None`,`#前向rnn_cell的初始状态`
``    initial_state_bw=`None`,`#反向rnn_cell的初始状态`
``    dtype=`None`,`#数据类型`
``    parallel_iterations=`None`,``
``    swap_memory=`False`,``
``    time_major=`False`,``
``    scope=`None`
``)``

bidirectional_dynamic_rnn 在使用上和 dynamic_rnn是非常相似的.

1.   定义前向和反向rnn_cell

2.   定义前向和反向rnn_cell的初始状态

3.   准备好序列

4.   调用`bidirectional_dynamic_rnn`

`import` tensorflow `as` tf``
`from` tensorflow.contrib `import` rnn``
``cell_fw = rnn.LSTMCell(`10`)``
``cell_bw = rnn.LSTMCell(`10`)``
``initial_state_fw = cell_fw.zero_state(batch_size)``
``initial_state_bw = cell_bw.zero_state(batch_size)``
``seq = ...``
``seq_length = ...``
``(outputs, states)=tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, seq,``
`` seq_length, initial_state_fw,initial_state_bw)``
``out = tf.concat(outputs, `2`)``
``# ....``

## 多层双向rnn

`with` vs.variable_scope(scope `or` `"bidirectional_rnn"`):``
``  `# Forward direction`
``  `with` vs.variable_scope(`"fw"`) `as` fw_scope:``
``    output_fw, output_state_fw = dynamic_rnn(``
``        cell=cell_fw, inputs=inputs, sequence_length=sequence_length,``
``        initial_state=initial_state_fw, dtype=dtype,``
``        parallel_iterations=parallel_iterations, swap_memory=swap_memory,``
``        time_major=time_major, scope=fw_scope)``

## bidirectional_dynamic_rnn源码一探

1. 第一次`reverse`:将输入序列进行`reverse`,然后送入`dynamic_rnn`做一次运算.
2. 第二次`reverse`:将上面`dynamic_rnn`返回的`outputs`进行`reverse`,保证正向和反向输出 对应位置的 输入是一致的 是对上的.

``def _reverse(input_, seq_lengths, seq_dim, batch_dim):``
``  `if` seq_lengths `is` `not` `None`:``
``    `return` array_ops.reverse_sequence(``
``        input=input_, seq_lengths=seq_lengths,``
``        seq_dim=seq_dim, batch_dim=batch_dim)``
``  `else`:``
``    `return` array_ops.reverse(input_, axis=[seq_dim])``
`` ``
`with` vs.variable_scope(`"bw"`) `as` bw_scope:``
``  inputs_reverse = _reverse(``
``      inputs, seq_lengths=sequence_length,``
``      seq_dim=time_dim, batch_dim=batch_dim)``
``  tmp, output_state_bw = dynamic_rnn(``
``      cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length,``
``      initial_state=initial_state_bw, dtype=dtype,``
``      parallel_iterations=parallel_iterations, swap_memory=swap_memory,``
``      time_major=time_major, scope=bw_scope)``
``output_bw = _reverse(``
``  tmp, seq_lengths=sequence_length,``
``  seq_dim=time_dim, batch_dim=batch_dim)``
``outputs = (output_fw, output_bw)``
``output_states = (output_state_fw, output_state_bw)``
`return` (outputs, output_states)``

## tf.reverse_sequence

``reverse_sequence(``
``    input,`#输入序列,将被reverse的序列`
``    seq_lengths,`#1Dtensor,表示输入序列长度`
``    seq_axis=`None`,`# 哪维代表序列`
``    batch_axis=`None`, `#哪维代表 batch`
``    name=`None`,``
``    seq_dim=`None`,``
``    batch_dim=`None`
``)``

``# Given this:``
``batch_dim = `0`
``seq_dim = `1`
``input.dims = (`4`, `8`, ...)``
``seq_lengths = [`7`, `2`, `3`, `5`]``
``# then slices of input are reversed on seq_dim, but only up to seq_lengths:``
``output[`0`, `0`:`7`, :, ...] = input[`0`, `7`:`0`:-`1`, :, ...]``
``output[`1`, `0`:`2`, :, ...] = input[`1`, `2`:`0`:-`1`, :, ...]``
``output[`2`, `0`:`3`, :, ...] = input[`2`, `3`:`0`:-`1`, :, ...]``
``output[`3`, `0`:`5`, :, ...] = input[`3`, `5`:`0`:-`1`, :, ...]``
``# while entries past seq_lens are copied through:``
``output[`0`, `7`:, :, ...] = input[`0`, `7`:, :, ...]``
``output[`1`, `2`:, :, ...] = input[`1`, `2`:, :, ...]``
``output[`2`, `3`:, :, ...] = input[`2`, `3`:, :, ...]``
``output[`3`, `2`:, :, ...] = input[`3`, `2`:, :, ...]``

``# Given this:``
``batch_dim = `2`
``seq_dim = `0`
``input.dims = (`8`, ?, `4`, ...)``
``seq_lengths = [`7`, `2`, `3`, `5`]``
``# then slices of input are reversed on seq_dim, but only up to seq_lengths:``
``output[`0`:`7`, :, `0`, :, ...] = input[`7`:`0`:-`1`, :, `0`, :, ...]``
``output[`0`:`2`, :, `1`, :, ...] = input[`2`:`0`:-`1`, :, `1`, :, ...]``
``output[`0`:`3`, :, `2`, :, ...] = input[`3`:`0`:-`1`, :, `2`, :, ...]``
``output[`0`:`5`, :, `3`, :, ...] = input[`5`:`0`:-`1`, :, `3`, :, ...]``
``# while entries past seq_lens are copied through:``
``output[`7`:, :, `0`, :, ...] = input[`7`:, :, `0`, :, ...]``
``output[`2`:, :, `1`, :, ...] = input[`2`:, :, `1`, :, ...]``
``output[`3`:, :, `2`, :, ...] = input[`3`:, :, `2`, :, ...]``
``output[`2`:, :, `3`, :, ...] = input[`2`:, :, `3`, :, ...]``

`tf.control_dependencies()`设计是用来控制计算流图的，给图中的某些计算指定顺序。比如：我们想要获取参数更新后的值，那么我们可以这么组织我们的代码。

``opt = tf.train.Optimizer().minize(loss)``
`with` tf.control_dependencies([opt]):``
``  updated_weight = tf.identity(weight)``
`with` tf.Session() `as` sess:``
``  tf.global_variables_initializer().run()``
``  sess.run(updated_weight, feed_dict={...}) `# 这样每次得到的都是更新后的weight`

## 下面说明两种control_dependencies 不 work 的情况

`import` tensorflow `as` tf``
``w = tf.Variable(`1.0`)``
``ema = tf.train.ExponentialMovingAverage(`0.9`)``
``update = tf.assign_add(w, `1.0`)``
``ema_op = ema.apply([update])``
`with` tf.control_dependencies([ema_op]):``
``    ema_val = ema.average(update)``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    `for` i `in` range(`3`):``
``        print(sess.run([ema_val]))``

`import` tensorflow `as` tf``
``w = tf.Variable(`1.0`)``
``ema = tf.train.ExponentialMovingAverage(`0.9`)``
``update = tf.assign_add(w, `1.0`)``
`` ``
``ema_op = ema.apply([update])``
`with` tf.control_dependencies([ema_op]):``
``    ema_val = tf.identity(ema.average(update)) `#一个identity搞定`
`` ``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    `for` i `in` range(`3`):``
``        print(sess.run([ema_val]))``

`import` tensorflow `as` tf``
``w = tf.Variable(`1.0`)``
``ema = tf.train.ExponentialMovingAverage(`0.9`)``
``update = tf.assign_add(w, `1.0`)``
``ema_op = ema.apply([update])``
`with` tf.control_dependencies([ema_op]):``
``    w1 = tf.Variable(`2.0`)``
``    ema_val = ema.average(update)``
`with` tf.Session() `as` sess:``
``    tf.global_variables_initializer().run()``
``    `for` i `in` range(`3`):``
``        print(sess.run([ema_val, w1]))``

``#这段代码出现在Variable类定义文件中第287行，``
``# 在创建Varible时，tensorflow是移除了dependencies了的``
``#所以会出现 control 不住的情况``
`with` ops.control_dependencies(`None`):``
``    ...      ``

tensorflow 如何读取数据

tensorflow有三种把数据放入计算图中的方式:

·        通过feed_dict

·        通过文件名读取数据：一个输入流水线，在计算图的开始部分从文件中读取数据

·        把数据预加载到一个常量或者变量中

Queue

Queue,队列,用来存放数据(跟Variable似的),tensorflow中的Queue中已经实现了同步机制,所以我们可以放心的往里面添加数据还有读取数据.如果Queue中的数据满了,那么en_queue操作将会阻塞,如果Queue是空的,那么dequeue操作就会阻塞.在常用环境中,一般是有多个en_queue线程同时像Queue中放数据,有一个dequeue操作从Queue中取数据.一般来说enqueue线程就是准备数据的线程,dequeue线程就是训练数据的线程.
Coordinator(协调者)

Coordinator就是用来帮助多个线程同时停止.线程组需要一个Coordinator来协调它们之间的工作.

# Thread body: loop until the coordinator indicates astop was requested.

# If some condition becomes true, ask the coordinator tostop.

#coord传入到线程中,来帮助它们同时停止工作

defMyLoop(coord):

whilenotcoord.should_stop():

…dosomething…

if …somecondition…:

coord.request_stop()

# Main thread: create a coordinator.

coord = tf.train.Coordinator()

# Create 10 threads that run ‘MyLoop()’

# Start the threads and wait for all of them to stop.

for t in threads:

t.start()

QueueRunner

QueueRunner创建多个线程对Queue进行enqueue操作.它是一个op.这些线程可以通过上面所述的Coordinator来协调它们同时停止工作.

example = …ops to create one example…

# Create a queue, and an op that enqueues examples one ata time in the queue.

queue = tf.RandomShuffleQueue(…)

enqueue_op = queue.enqueue(example)

#enqueue_many中的数量多余`Queue`中剩余的数量时,会阻塞

#init = q.enqueue_many(([1.2,2.1,3.3],))

# Create a training graph that starts by dequeuing abatch of examples.

inputs = queue.dequeue_many(batch_size)

train_op = …use ‘inputs’ to buildthe training part of the graph…

# Create a queue runner that will run 4 threads inparallel to enqueue

# examples.

#定义了四个`enqueue`线程,但是还没有执行

qr = tf.train.QueueRunner(queue, [enqueue_op] * 4)

# Launch the graph.

sess = tf.Session()

# Create a coordinator, launch the queue runner threads.

coord = tf.train.Coordinator()

#执行 enqueue线程,queue中放数据

# Run the training loop, controlling termination with thecoordinator.

for step in xrange(1000000):

ifcoord.should_stop():

break

sess.run(train_op)

# When done, ask the threads to stop.

coord.request_stop()

# And wait for them to actually do it.

tensorflow 输入流水线

1.    准备文件名

3.    定义文件中数据的解码规则

4.    解析数据

import tensorflow as tf

#一个Queue,用来保存文件名字.对此Queue,只读取,dequeue

filename_queue = tf.train.string_input_producer([“file0.csv”, “file1.csv”])

# Default values, in case of empty columns. Alsospecifies the type of the

# decoded result.

record_defaults = [[1], [1], [1], [1], [1]]

col1, col2, col3, col4, col5 = tf.decode_csv(

value,record_defaults=record_defaults)

features = tf.stack([col1, col2, col3, col4])

with tf.Session() as sess:

# Startpopulating the filename queue.

coord =tf.train.Coordinator()

#在调用runeval执行读取之前，必须

#tf.train.start_queue_runners来填充队列

for i in range(10):

# Retrievea single instance:

example, label= sess.run([features, col5])

print(example,label)

coord.request_stop()

tf.train.string_input_producer([“file0.csv”, “file1.csv”])

q = data_flow_ops.FIFOQueue(capacity=capacity,

dtypes=[input_tensor.dtype.base_dtype],

shapes=[element_shape],

shared_name=shared_name, name=name)

enq = q.enqueue_many([input_tensor])

queue_runner.QueueRunner(

q, [enq],cancel_op=cancel_op))

if summary_name isnotNone:

summary.scalar(summary_name,

math_ops.cast(q.size(), dtypes.float32) * (1. /capacity))

return q

1.    创建一个Queue

2.    创建一个enqueue_op

3.    使用QueueRunner创建一个线程来执行enqueue_op,并把QueueRunner放入collection

4.    返回创建的Queue

record_defaults = [[1], [1], [1], [1], [1]]

col1, col2, col3, col4, col5 = tf.decode_csv(

value,record_defaults=record_defaults)

#定义数据的读取与解析规则

example, label =tf.some_decoder(record_string)

processed_example= some_processing(example)

returnprocessed_example, label

definput_pipeline(filenames,batch_size, num_epochs=None):

filename_queue =tf.train.string_input_producer(

filenames,num_epochs=num_epochs, shuffle=True)

#min_after_dequeue defines how big a buffer we will randomly sample

#   from — bigger means better shuffling butslower start up and more

#   memory used.

# capacitymust be larger than min_after_dequeue and the amount larger

#   determines the maximum we willprefetch.  Recommendation:

#   min_after_dequeue + (num_threads + a smallsafety margin) * batch_size

#dequeue后的所剩数据的最小值

min_after_dequeue= 10000

#queue的容量

capacity =min_after_dequeue + 3 * batch_size

example_batch,label_batch = tf.train.shuffle_batch(

[example,label], batch_size=batch_size, capacity=capacity,

min_after_dequeue=min_after_dequeue)

returnexample_batch, label_batch

1.    创建一个RandomShuffleQueue用来保存样本

2.    使用QueueRunner创建多个enqueue线程向Queue中放数据

3.    创建一个dequeue_many OP

4.    返回dequeue_many OP

tf.train.Feature(..)与tf.FixedLenFeature() 的对应关系

tfrecords 制作和解码时候,API接口是有一些对应关系的, 下面来看一下这些对应关系.

#制作时期

tf.train.Feature(int64_list=tf.train.Int64List(value=[1.0]))

#解码时期

tf.FixedLenFeature([],tf.int64)  # 返回 1.0

tf.FixedLenFeature([1],tf.int64) # 返回 [1.0]

#对于之前的制作代码,这两种解码策略都是可以的,只不过返回的不同.

#制作时期

tf.train.Feature(int64_list=tf.train.Int64List(value=[1.0, 2.0]))

#解码时期

tf.FixedLenFeature([2],tf.int64) # 返回[1.0, 2.0]

#对于bytes,制作时期

tf.train.Feature(bytes_list=tf.train.BytesList(value=[bytestring]))

#解码时期

tf.FixedLenFeature([],tf.string)

tf.FixedLenFeature([1],tf.string)

# 如果在制作过程中, value 的长度是变化的话,解码的时候是需要用tf.VarLenFeature(dtype)

# 上述只是说 value的长度变化, 而不是说bytestring 的大小变化,如果bytestring变化的话,是不需要担心的,

# 一个例子就是,如果制作tfrecords的图片大小是变化的,这时候改变的只是bytestring的大小,但是value的长度

# 还是1,这时候用FixedLenFeature解码是可以正确还原数据的.

tf.train.FloatList 保存的是 float32 还是 float64 ： 是 float32

``# 第一行： 引包``
`from` tensorflow.python `import` debug `as` tf_debug``
``sess = tf.Session()``
``# 初始化的 sess 没必要加上 debug wrapper``
``sess.run(tf.global_variables_initializer())``
``# 第二行，给 session 加个 wrapper``
``debug_sess = tf_debug.LocalCLIDebugWrapperSession(sess=sess)``
``debug_sess.run(train_op) `# 用 加了 wrapper 的 session，来代替之前的 session 做训练操作`

``python demo_debug.py``
``# 或者``
``python -m demo_debug``

Tips : debug 界面中下划线的东西都是可以用鼠标点一下，就会触发相应操作的

https://www.tensorflow.org/programmers_guide/debugger#debugging_model_training_with_tfdbg

·        run ：执行一次 debug_session.run() , 这次执行产生中间 tensor 的值都可以通过 debug 界面查看

·        exit 退出debug

## 注意事项

·        debug wrapper 要加在执行 `train_op`  session 上，因为要 debug 的是 `train` 过程。但是如果是想 debug input-pipeline 的话，感觉是可以将 wrapper 加在执行input-pipeline session 上的（没有测试过）。

·        如果代码中使用了 input-pipeline 的话， debug 非常慢（不知道原因是啥）

`import` tensorflow `as` tf``
``queue = tf.FIFOQueue(capacity=`100`, dtypes=[tf.string, tf.int64])``
``# enqueue_many 的写法，两个元素放在两个列表里。``
``en_m = queue.enqueue_many([[`'hello'`, `'world'`], [`1`, `2`]])``
``# enqueue 的写法``
``en = queue.enqueue([`'hello'`, `1`])``
``deq = queue.dequeue()``
`with` tf.Session() `as` sess:``
``    sess.run(en_m)``
``    print(sess.run(deq))``

·        global_variables_initializer 返回一个用来初始化计算图中所有globalvariable op
·        这个op 到底是啥，还不清楚。

·        函数中调用了 variable_initializer()  global_variables()

·        global_variables() 返回一个 Variable list ，里面保存的是 gloabalvariables

·        variable_initializer()  Variable list 中的所有 Variable 取出来，将其 variable.initializer 属性做成一个 op group

·        然后看 Variable 类的源码可以发现， variable.initializer 就是一个 assign op

defglobal_variables_initializer():

“””Returnsan Op that initializes global variables.

Returns:

An Op thatinitializes global variables in the graph.

“””

returnvariables_initializer(global_variables())

defglobal_variables():

“””Returnsglobal variables.

Returns:

A list of`Variable` objects.

“””

returnops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)

defvariables_initializer(var_list,name=“init”):

“””Returnsan Op that initializes a list of variables.

Args:

var_list: Listof `Variable` objects to initialize.

name: Optionalname for the returned operation.

Returns:

An Op that runthe initializers of all the specified variables.

“””

if var_list:

returncontrol_flow_ops.group(*[v.initializer for v in var_list],name=name)

returncontrol_flow_ops.no_op(name=name)

classVariable(object):

def_init_from_args(self,…):

self._initializer_op = state_ops.assign(

self._variable, self._initial_value,

validate_shape=validate_shape).op

@property

definitializer(self):

“””Theinitializer operation for this variable.”””

returnself._initializer_op