从训练样例中取1000个进行训练,再对1000个测试样例进行检测,出现过拟合情况,而且损失函数值和测试精度值波动很大。
# coding=utf-8import os os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error """mnist_loader~~~~~~~~~~~~A library to load the MNIST image data. For details of the datastructures that are returned, see the doc strings for ``load_data``from tensorflow.python.ops.distributions.kullback_leibler import cross_entropyfrom lib2to3.tests.data.infinite_recursion import sess_cert_st"""#### Libraries# Standard libraryimport pickleimport gzip# Third-party librariesimport numpy as npdef load_data():"""Return the MNIST data as a tuple containing the training data,the validation data, and the test data.The ``training_data`` is returned as a tuple with two entries.The first entry contains the actual training images. This is anumpy ndarray with 50,000 entries. Each entry is, in turn, anumpy ndarray with 784 values, representing the 28 * 28 = 784pixels in a single MNIST image.The second entry in the ``training_data`` tuple is a numpy ndarraycontaining 50,000 entries. Those entries are just the digitvalues (0...9) for the corresponding images contained in the firstentry of the tuple.The ``validation_data`` and ``test_data`` are similar, excepteach contains only 10,000 images."""f = gzip.open('../data/mnist.pkl.gz', 'rb')training_data, validation_data, test_data = pickle.load(f,encoding='bytes')f.close()return (training_data, validation_data, test_data)def vectorized_result(j):"""Return a 10-dimensional unit vector with a 1.0 in the jthposition and zeroes elsewhere. This is used to convert a digit(0...9) into a corresponding desired output from the neuralnetwork."""e = np.zeros(10)e[j] = 1.0return eimport tensorflow as tfimport matplotlib.pyplot as pltfrom random import randintlogs_path=r'c:/temp/log_mnist_softmax_2layers'logs_path2=r'c:/temp/log_mnist_softmax_2layers_2'batch_size=10learning_rate=0.005 #当>0.05时误差很大training_epochs=30training_data, validation_data, test_data = load_data()trainData_in=training_data[0][:1000]trainData_out=[vectorized_result(j) for j in training_data[1][:1000]]validData_in=validation_data[0]validData_out=[vectorized_result(j) for j in validation_data[1]]testData_in=test_data[0]testData_out=[vectorized_result(j) for j in test_data[1]]x_input=tf.placeholder(tf.float32, [None,784], name='x_input')y_desired=tf.placeholder(tf.float32,[None,10])#########################################w1=tf.Variable(tf.zeros([784,30]))b1=tf.Variable(tf.zeros([30]))y1=tf.nn.sigmoid(tf.matmul(x_input,w1)+b1)w=tf.Variable(tf.zeros([30,10]))b=tf.Variable(tf.zeros([10]))##########################################y_output=tf.nn.softmax(tf.matmul(y1,w)+b,name='y_output')lossFun_crossEntropy=-tf.reduce_mean(y_desired*tf.log(y_output))*1000.0correct_prediction=tf.equal(tf.argmax(y_output,1),\tf.argmax(y_desired,1)) #1:按行索引,每行得一索引值accuracy=tf.reduce_mean(tf.cast(correct_prediction,\tf.float32))#将逻辑型变成数字型,再求均值train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(lossFun_crossEntropy)tf.summary.scalar('cost',lossFun_crossEntropy)tf.summary.scalar('accuracy',accuracy)summary_op=tf.summary.merge_all()with tf.Session() as sess:sess.run(tf.global_variables_initializer())logs_writer=tf.summary.FileWriter(logs_path,graph=tf.get_default_graph())logs_writer2=tf.summary.FileWriter(logs_path2)for epoch in range(training_epochs):batch_count=int(len(trainData_in)/batch_size)for i in range(batch_count):batch_x=trainData_in[batch_size*i:batch_size*(i+1)]batch_y=trainData_out[batch_size*i:batch_size*(i+1)]_,summary=sess.run([train_step,summary_op],\feed_dict={x_input:batch_x,\y_desired:batch_y})logs_writer.add_summary(summary,\epoch*batch_count+i)#以上将训练的Cost和Accuracy写到logs_path,以下将测试的Cost和Accuracy写到logs_path2 _,summary=sess.run([train_step,summary_op],\feed_dict={x_input:testData_in,\y_desired:testData_out})logs_writer2.add_summary(summary,\epoch*batch_count+i) print('Epoch',epoch)print('Accuracy_train:',accuracy.eval\(feed_dict={x_input:trainData_in,y_desired:trainData_out}))print('Accuracy:',accuracy.eval\(feed_dict={x_input:testData_in,y_desired:testData_out}))print('Done')n=randint(0,len(testData_in))try_input=testData_in[n] try_desired=testData_out[n] print(try_desired)print(y_output.eval(feed_dict={x_input:[try_input]}))try_input.resize(28,28)plt.imshow(try_input,cmap='Greys_r')plt.show()saver=tf.train.Saver()save_path=saver.save(sess,r'c:/temp/saved_mnist_cnn/saved_mnist_cnn.ckp')print('Model saved to %s' % save_path)
运行tensorboard:
tensorboard --logdir=run1:"C:\temp\log_mnist_softmax_2layers",run2:"C:\temp\log_mnist_softmax_2layers_2"
??为什么训练过程中,波动这么大?TensorFlow怎么计算梯度?怎么更新参数?
和理论:/chap3.html
有何不同?
下一步,打算构造一个最简单的网络,简单到可以手算,来进行深入研究。