现在已经有很很多优秀的神经网络模型,这些模型大部分都是使用ImageNet数据集1000类训练得到的。然而我们在实际应用中1000类的模型并不完全适用我们自己的数据集,而使用自己的数据集训练需要花费大量的计算时间,而且有可能数据不够无法得到理想的模型。这时候我们可以保留训练好的模型的大部分参数,而仅仅对网络的最后几层全连接层进行Finetuning。这里我们将适用猫狗大战数据集中的1500张图对VGG16网络网络的最后三层全连接层进行训练,因为我们最后输出只有两类,所有将最后一层全连接层输出维度改为2。
猫狗大战数据集地址:
(1)下载并分类处理猫狗大战训练集和VGG16权重
猫狗大战数据集下载链接如下
https://pan.baidu.com/s/13hw4LK8ihR6-6-8mpjLKDA
密码:dmp4
VGG16权重链接如下
链接: https://pan.baidu.com/s/123VBgiXNikGTRkDv57lj5Q
密码: nih7
下载后得到数据的训练集和测试集
在这里我们只需要训练集,训练集中包括猫狗图像25000张,数据太多了而且猫狗的图像都在一个文件夹中,只于图像的名字区分,很不利于我们制作训练集啊,因此我们写了一个简单的程序从训练集中取1000张图像,并将猫狗分别保存到文件夹内,对了为了方便大家单独新建个文件夹用于保存分类后的图像,可以参考下面代码中的路径。
import os
import shutil#下载得到的训练集图像
image_path='/home/cyy/python_code/tensorflow/kaggle/train'
#将猫狗分类保存的路径
train_path='/home/cyy/python_code/tensorflow/dog_cat_classification/data'image_list=os.listdir(image_path)
#读取1000张猫狗图像,按照图像名字分别保存
for image_name in image_list[0:1000]:class_name=image_name[0:3]save_path=os.path.join(train_path,class_name)if not(os.path.exists(save_path)):os.mkdir(save_path)file_name=os.path.join(image_path,image_name)save_name=os.path.join(save_path,image_name)shutil.copyfile(file_name,save_name)
这样就将1000张猫狗图像分别保存到了两个文件夹中,如下图所示
(2)搭建VGG16神经网络
VGG16 网络由ImageNet数据集训练1000类得到,所有最后一层全连接层输出为1000类,为了实现对猫狗的识别我们将最后一层输出改为2,并对最后的三层全连接层进行训练,所有在下面的VGG16_model.py文件中可以看到,只有最后三层全连接层我们选择了trainable=True。
import numpy as np
import tensorflow as tfclass vgg16:def __init__(self, imgs):self.parameters = []self.imgs = imgsself.convlayers()self.fc_layers()self.probs = self.fc8def saver(self):return tf.train.Saver()def maxpool(self,name,input_data, trainable):out = tf.nn.max_pool(input_data,[1,2,2,1],[1,2,2,1],padding="SAME",name=name)return outdef conv(self,name, input_data, out_channel, trainable):in_channel = input_data.get_shape()[-1]with tf.variable_scope(name):kernel = tf.get_variable("weights", [3, 3, in_channel, out_channel], dtype=tf.float32,trainable=False)biases = tf.get_variable("biases", [out_channel], dtype=tf.float32,trainable=False)conv_res = tf.nn.conv2d(input_data, kernel, [1, 1, 1, 1], padding="SAME")res = tf.nn.bias_add(conv_res, biases)out = tf.nn.relu(res, name=name)self.parameters += [kernel, biases]return outdef fc(self,name,input_data,out_channel,trainable = True):shape = input_data.get_shape().as_list()if len(shape) == 4:size = shape[-1] * shape[-2] * shape[-3]else:size = shape[1]input_data_flat = tf.reshape(input_data,[-1,size])with tf.variable_scope(name):weights = tf.get_variable(name="weights",shape=[size,out_channel],dtype=tf.float32,trainable = trainable)biases = tf.get_variable(name="biases",shape=[out_channel],dtype=tf.float32,trainable = trainable)res = tf.matmul(input_data_flat,weights)out = tf.nn.relu(tf.nn.bias_add(res,biases))self.parameters += [weights, biases]return outdef convlayers(self):# zero-mean input#conv1self.conv1_1 = self.conv("conv1re_1",self.imgs,64,trainable=False)self.conv1_2 = self.conv("conv1_2",self.conv1_1,64,trainable=False)self.pool1 = self.maxpool("poolre1",self.conv1_2,trainable=False)#conv2self.conv2_1 = self.conv("conv2_1",self.pool1,128,trainable=False)self.conv2_2 = self.conv("convwe2_2",self.conv2_1,128,trainable=False)self.pool2 = self.maxpool("pool2",self.conv2_2,trainable=False)#conv3self.conv3_1 = self.conv("conv3_1",self.pool2,256,trainable=False)self.conv3_2 = self.conv("convrwe3_2",self.conv3_1,256,trainable=False)self.conv3_3 = self.conv("convrew3_3",self.conv3_2,256,trainable=False)self.pool3 = self.maxpool("poolre3",self.conv3_3,trainable=False)#conv4self.conv4_1 = self.conv("conv4_1",self.pool3,512,trainable=False)self.conv4_2 = self.conv("convrwe4_2",self.conv4_1,512,trainable=False)self.conv4_3 = self.conv("conv4rwe_3",self.conv4_2,512,trainable=False)self.pool4 = self.maxpool("pool4",self.conv4_3,trainable=False)#conv5self.conv5_1 = self.conv("conv5_1",self.pool4,512,trainable=False)self.conv5_2 = self.conv("convrwe5_2",self.conv5_1,512,trainable=False)self.conv5_3 = self.conv("conv5_3",self.conv5_2,512,trainable=False)self.pool5 = self.maxpool("poorwel5",self.conv5_3,trainable=False)def fc_layers(self):self.fc6 = self.fc("fc6", self.pool5, 4096)self.fc7 = self.fc("fc7", self.fc6, 4096)self.fc8 = self.fc("fc8", self.fc7, 2)def load_weights(self, weight_file, sess):weights = np.load(weight_file)keys = sorted(weights.keys())for i, k in enumerate(keys):if i not in [30,31]:sess.run(self.parameters[i].assign(weights[k]))print("-----------all done---------------")
(3)读取训练数据
首先将训练数据分为两类读如list中,并分别给图像添加上标签。然后用tensorflow的队列函数将数据放入队列打乱顺序,并按照设置的batch分批次输出进行训练,最后将标签设置成onehot形式。
import tensorflow as tf
import numpy as np
import os
img_width = 224
img_height = 224def get_file(file_dir):images = []temp = []for root, sub_folders, files in os.walk(file_dir):for name in files:images.append(os.path.join(root, name))for name in sub_folders:temp.append(os.path.join(root, name))labels = []for one_folder in temp:n_img = len(os.listdir(one_folder))letter = one_folder.split('/')[-1]if letter == 'cat':labels = np.append(labels, n_img * [0])else:labels = np.append(labels, n_img * [1])# shuffletemp = np.array([images, labels])temp = temp.transpose()np.random.shuffle(temp)image_list = list(temp[:, 0])label_list = list(temp[:, 1])label_list = [int(float(i)) for i in label_list]return image_list, label_listdef get_batch(image_list, label_list, img_width, img_height, batch_size, capacity):image = tf.cast(image_list, tf.string)label = tf.cast(label_list, tf.int32)input_queue = tf.train.slice_input_producer([image,label])label = input_queue[1]image_contents = tf.read_file(input_queue[0])image = tf.image.decode_jpeg(image_contents,channels=3)image = tf.image.resize_image_with_crop_or_pad(image,img_width,img_height)image = tf.image.per_image_standardization(image) # 将图片标准化image_batch,label_batch = tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)label_batch = tf.reshape(label_batch,[batch_size])return image_batch,label_batchdef onehot(labels):n_sample = len(labels)n_class = max(labels) + 1onehot_labels = np.zeros((n_sample, n_class))onehot_labels[np.arange(n_sample), labels] = 1return onehot_labels
(4)主函数对模型进行训练,并保存模型
import numpy as np
import tensorflow as tf
import VGG16_model as model
import create_and_read_TFRecord2 as reader2if __name__ == '__main__':X_train, y_train = reader2.get_file('/home/cyy/python_code/tensorflow/dog_cat_classification/data/train')#输入训练数据路径image_batch, label_batch = reader2.get_batch(X_train, y_train, 224, 224, 25, 256)x_imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])y_imgs = tf.placeholder(tf.int32, [None, 2])vgg = model.vgg16(x_imgs)fc3_cat_and_dog = vgg.probsloss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc3_cat_and_dog, labels=y_imgs))optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001).minimize(loss)pre=tf.nn.softmax(fc3_cat_and_dog)correct_pred = tf.equal(tf.argmax(pre, 1), tf.argmax(y_imgs,1))accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))sess = tf.Session()sess.run(tf.global_variables_initializer())vgg.load_weights('/home/cyy/python_code/tensorflow/dog_cat_classification/data/vgg16_weights.npz', sess)#输入VGG16权重saver = vgg.saver()coord = tf.train.Coordinator()threads = tf.train.start_queue_runners(coord=coord, sess=sess)import timestart_time = time.time()for i in range(2000):image, label = sess.run([image_batch, label_batch])labels = reader2.onehot(label)sess.run(optimizer, feed_dict={x_imgs: image, y_imgs: labels})if i%10==0:loss_record = sess.run(loss, feed_dict={x_imgs: image, y_imgs: labels})print("now the loss is %f " % loss_record)print(sess.run(accuracy,feed_dict={x_imgs: image,y_imgs:labels}))end_time = time.time()print('time: ', (end_time - start_time))start_time = end_timeprint("----------epoch %d is finished---------------" % i)saver.save(sess, "model/")#保存模型路径print("Optimization Finished!")
训练运行train_model.py如下所示:
(5)对训练好的模型测试
import tensorflow as tf
from scipy.misc import imread, imresize
import VGG16_model as modelimgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
sess = tf.Session()
vgg = model.vgg16(imgs)
fc3_cat_and_dog = vgg.probs
pred=tf.nn.softmax(fc3_cat_and_dog)
saver = vgg.saver()
saver.restore(sess, 'model/')#加载保存的模型参数import os
#读取猫或者狗的测试图像
for root, sub_folders, files in os.walk('/home/cyy/python_code/tensorflow/dog_cat_classification/data/test/cat'):i = 0cat = 0dog = 0for name in files:i += 1filepath = os.path.join(root, name)try:img1 = imread(filepath, mode='RGB')img1 = imresize(img1, (224, 224))except:print("remove", filepath)prob = sess.run(pred, feed_dict={vgg.imgs: [img1]})import numpy as npmax_index = np.argmax(prob)if max_index == 0:cat += 1else:dog += 1#每50张图计算一次准确率if i % 50 == 0:acc = (cat * 1.)/(dog + cat)print(acc)print("-----------img number is %d------------" % i)
运行test.py结果如下所示:
可以看到对VGG16微调训练2000次后可以得到较好的分类效果。