将原来在一个py文件里的程序进行重构。重构之后的代码将被拆成3个程序,第一个是mnist_inference.py,它定义了向前传播过程以及神经网络的参数。第二个是mnist_train.py,它定义了神经网络的训练过程。第三个是mnist_eval.py,它定义了测试过程。
mnist_inference.py
#总结
# 在这段代码中设定了神经网络的前向传播算法。无论是训练时还是测试时,
# 都可以直接调用inference这个函数,而不用关心具体的神经网络结构import tensorflow as tf
#定义神经网络结构相关的参数
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500#通过tf.get_variable函数来获取变量。在训练神经网络时会创建这些变量;
# 在测试时会通过保存的模型加载这些变量的取值。而且更加方便的是,因为
# 可以在加载变量时将滑动平均变量重命名,所以可以直接通过同样的名字在
# 训练时使用变量自身,而在测试时使用变量的滑动平均值。在这个函数中也
# 会将变量的正则化损失加入损失集
def get_weight_variable(shape, regularrizer):weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1))#当给出了正则化生成函数时,将当前变量的正则化损失加入名字为losses的集合。在这里#使用了add_to_collection函数将一个张量加入一个集合,而这个集合的名称为losses。#这是自定义的集合,不在Tensorflow自动管理的集合列表中。if regularrizer != None:tf.add_to_collection('losses', regularrizer(weights))return weights#定义神经网络的前向传播过程。
def inference(input_tensor, regularizer):#声明第一层神经网络的变量并完成前向传播过程。#在layer1的命名空间下with tf.variable_scope('layer1'):#这里通过tf.get_variable或tf.Variable没有本质区别,因为在训练或是测试中# 没有在同一个程序中多次调用这个函数。如果在同一个程序中多次调用,需要在#第一次调用之后将reuse的参数设置为Trueweights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer )biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0))layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)#类似的声明第二层神经网络的变量并完成前向传播过程。with tf.variable_scope('layer2'):weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0))layer2 = tf.matmul(layer1, weights) +biases#返回最后的前向传播结果return layer2
mnist_train.py
import tensorflow as tf
import os
from tensorflow.examples.tutorials.mnist import input_data# 加载mnist_inference.py中定义的常量和前向传播函数#我的程序是放在了“神经网络最佳实践”的文件夹里的,读者使用时注意修改
import 神经网络最佳实践.mnist_inference as mnistinfer # 配置神经网络的参数。
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
# 模型保存的路径和文件名
MODEL_SAVE_PATH = "/path/to/model/"
MODEL_NAME = "model.ckpt"def train(mnist):print("开始训练!")# 定义输入输出placeholderx = tf.placeholder(tf.float32, [None, mnistinfer.INPUT_NODE], name='x-inpyt')y_ = tf.placeholder(tf.float32, [None, mnistinfer.OUTPUT_NODE], name='y-input')regularizer = tf.contrib.layers.l2_regularizer(REGULARATION_RATE)# 直接使用mnist_inferense.py中定义的前向传播过程y = mnistinfer.inference(x, regularizer)global_step = tf.Variable(0, trainable=False)# 定义损失函数、学习率、滑动平均操作以及训练过程variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)variable_averages_op = variable_averages.apply(tf.trainable_variables())# 交叉熵与softmax函数一起使用cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))cross_entropy_mean = tf.reduce_mean(cross_entropy)loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,mnist.train.num_examples / BATCH_SIZE,LEARNING_RATE_DECAY)train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)with tf.control_dependencies([train_step, variable_averages_op]):train_op = tf.no_op(name='train')# 初始化Tensorflow持久类。#saver = tf.train.Saver()with tf.Session() as sess:print("变量初始化!")tf.global_variables_initializer().run()# 在训练过程中不在测试模型在验证数据上的表现,验证和测试的过程将会有一个独立的程序来完成for i in range(TRAINING_STEPS):xs, ys = mnist.train.next_batch(BATCH_SIZE)_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})# 每1000轮保存一次模型。if i % 1000 == 0:print("After %d training step(s), loss on training batch is %g." % (step, loss_value))#saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)def main(argv=None):print("进入主函数!")mnist = input_data.read_data_sets("MNIST_data", one_hot=True)print("准备训练!")train(mnist)if __name__ == '__main__':tf.app.run()
运行mnist_train可以得到类似下面的结果:
After 1 training step(s), loss on training batch is 3.10138.
After 1001 training step(s), loss on training batch is 0.296304.
After 2001 training step(s), loss on training batch is 0.169404.
After 3001 training step(s), loss on training batch is 0.158908.
After 4001 training step(s), loss on training batch is 0.122651.
After 5001 training step(s), loss on training batch is 0.11749.
After 6001 training step(s), loss on training batch is 0.100439.
After 7001 training step(s), loss on training batch is 0.090375.
After 8001 training step(s), loss on training batch is 0.0828235.
After 9001 training step(s), loss on training batch is 0.0755924.
After 10001 training step(s), loss on training batch is 0.0712517.
After 11001 training step(s), loss on training batch is 0.0624911.
After 12001 training step(s), loss on training batch is 0.0627382.
After 13001 training step(s), loss on training batch is 0.0628153.
After 14001 training step(s), loss on training batch is 0.0533004.
After 15001 training step(s), loss on training batch is 0.0542117.
After 16001 training step(s), loss on training batch is 0.0456152.
After 17001 training step(s), loss on training batch is 0.0531916.
After 18001 training step(s), loss on training batch is 0.0448747.
After 19001 training step(s), loss on training batch is 0.0425106.
mnist_eval
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data#加载mnist_inference.py和mnist_train.py中定义的常量和函数
import 神经网络最佳实践.mnist_inference as mn_inference
import 神经网络最佳实践.mnist_train as mn_train#每10秒加载一次最新模型,并在测试数据上测试最新模型的正确率。
EVAL_INTERVAL_SECS = 10def evaluate(mnist):with tf.Graph().as_default() as g:#定义输入输出格式x = tf.placeholder(tf.float32, [None, mn_inference.INPUT_NODE], name='x-input')y_= tf.placeholder(tf.float32, [None, mn_inference.OUTPUT_NODE], name='y-input')validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}#直接通过调用封装好的函数来计算前向传播的结果。因为测试时不关注正则化损失的值,#所以这里用于计算正则化损失的函数被设置为None。y = mn_inference.inference(x, None)#使用前向传播的结果计算正确率。如果需要对位置的样例进行分类,那么使用#tfargmax(y, 1)就可以得到输入样例的预测类别了。correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))#通过变量重命名的方式来加载模型, 这样在前向传播的过程中就不需要调用求滑动平均#的函数来获取平均值了。这样就可以完全共用mnist_inference.py中定义的前向传播过程variable_averages = tf.train.ExponentialMovingAverage(mn_train.MOVING_AVERAGE_DECAY)variable_to_restore = variable_averages.variables_to_restore()saver = tf.train.Saver(variable_to_restore)#每隔EVAL_INTERVAL_SECS秒调用一次计算正确率的过程以检测训练过程中正确率的变化while True:with tf.Session() as sess:#tf.train.getcheckpoint_state函数或通过checkpoint文件自动找到目录中最新模型的文件名ckpt = tf.train.get_checkpoint_state(mn_train.MODEL_SAVE_PATH)if ckpt and ckpt.model_checkpoint_path:#加载模型。saver.restore(sess, ckpt.model_checkpoint_path)#通过文件名得到模型保存时的迭代轮数global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]accuracy_score = sess.run(accuracy, feed_dict=validate_feed)print("After %s training step(s), validation accuracy = %g" % (global_step, accuracy_score))else:print("NO checkpoint file found")returntime.sleep(EVAL_INTERVAL_SECS)
def main(argv=None):mnist = input_data.read_data_sets("MNIST_data", one_hot=True)evaluate(mnist)if __name__ == '__main__':tf.app.run()
在运行mnist_train的同时运行mnist_eval会得到类似下面的结果:
After 1 training step(s), validation accuracy = 0.1308
After 1001 training step(s), validation accuracy = 0.9774
After 3001 training step(s), validation accuracy = 0.9818
After 4001 training step(s), validation accuracy = 0.983
After 5001 training step(s), validation accuracy = 0.9824
After 6001 training step(s), validation accuracy = 0.9836
After 7001 training step(s), validation accuracy = 0.9838
After 8001 training step(s), validation accuracy = 0.9838
After 10001 training step(s), validation accuracy = 0.9832
After 11001 training step(s), validation accuracy = 0.9844
After 12001 training step(s), validation accuracy = 0.9836
After 13001 training step(s), validation accuracy = 0.9842
After 14001 training step(s), validation accuracy = 0.9842
After 15001 training step(s), validation accuracy = 0.9846
After 17001 training step(s), validation accuracy = 0.9838
After 18001 training step(s), validation accuracy = 0.9838
After 19001 training step(s), validation accuracy = 0.9844
After 21001 training step(s), validation accuracy = 0.9852
After 22001 training step(s), validation accuracy = 0.9852
After 23001 training step(s), validation accuracy = 0.9852