文章目录
- 文章说明
- 导入需要的库
- 设置参数
- 创建保存路径
- 构建模型
- 测试
- 完整代码
文章说明
本系列文章旨在对 Github 上 malin9402 提供的代码进行说明,在这篇文章中,我们会对 YOLOv3 项目中的 test.py 文件进行说明。
如果只是想运行 Github 上的代码,可以参考对 YOLOv3 代码的说明一文。
导入需要的库
import cv2
import os
import shutil
import numpy as np
import tensorflow as tf
import core.utils as utils
from core.config import cfg
from core.yolov3 import YOLOv3, decode
设置参数
INPUT_SIZE = 416 # 测试集中图片的尺寸
NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) # 类别的数目
CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) # 类别的名称索引
创建保存路径
# 路径名称
predicted_dir_path = '../mAP/predicted'
ground_truth_dir_path = '../mAP/ground-truth'
# 若路径下有文件,则删除
if os.path.exists(predicted_dir_path):shutil.rmtree(predicted_dir_path)
if os.path.exists(ground_truth_dir_path):shutil.rmtree(ground_truth_dir_path)
if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH):shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH)
# 创建路径
os.mkdir(predicted_dir_path)
os.mkdir(ground_truth_dir_path)
os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH)
构建模型
# 确定模型输入
input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3])
# 确定模型输出
feature_maps = YOLOv3(input_layer)
bbox_tensors = []
for i, fm in enumerate(feature_maps):bbox_tensor = decode(fm, i) # 将检测框解码到原图上bbox_tensors.append(bbox_tensor)
# 构建模型
model = tf.keras.Model(input_layer, bbox_tensors)
# 加载模型参数
model.load_weights("./yolov3")
测试
1、首先要从 .txt 文件中导入每一张图片的真实框信息
with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file:
2、然后用 for 循环来遍历每一个图片中的信息
for num, line in enumerate(annotation_file):
假设此时是第 200 张图片,那么此时的 line 为:
line = 'E:\\Pycharm\\code\\Jupyter\\tensorflow2.0\\My_net\\YOLO_v3\\data\\dataset\\test\\000200.jpg 141,305,197,361,7 87,23,143,79,0 46,114,102,170,1 16,55,44,83,5 31,253,87,309,5 257,305,341,389,6 317,163,401,247,4 164,118,248,202,0\n'
3、使用 strip() 去除首尾空格,用 split() 按照指定的符号将字符串 line 分割成一个列表中的不同元素
annotation = line.strip().split()
annotation = ['E:\\Pycharm\\code\\Jupyter\\tensorflow2.0\\My_net\\YOLO_v3\\data\\dataset\\test\\000200.jpg','141,305,197,361,7','87,23,143,79,0','46,114,102,170,1','16,55,44,83,5','31,253,87,309,5','257,305,341,389,6','317,163,401,247,4','164,118,248,202,0']
4、将图片路径单独提取出来
image_path = annotation[0]
image_path = 'E:\\Pycharm\\code\\Jupyter\\tensorflow2.0\\My_net\\YOLO_v3\\data\\dataset\\test\\000200.jpg'
5、用 split() 提取图片名称
image_name = image_path.split('\\')[-1]
这里可能有的同学是用 ‘/’ 分割,可以自己尝试一下。
image_name = '000200.jpg'
6、读取图片
image = cv2.imread(image_path) # BGR 图像,像素在 0~255 之间
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 将 BGR 转成 RGB
image_size = image.shape[:2] # 这张图片的尺寸
7、用 map 函数将真实框信息从字符串类型变为数值类型,然后放到一个列表中
bbox_data_gt = np.array([list(map(int, box.split(','))) for box in annotation[1:]])
bbox_data_gt = array([[141, 305, 197, 361, 7],[ 87, 23, 143, 79, 0],[ 46, 114, 102, 170, 1],[ 16, 55, 44, 83, 5],[ 31, 253, 87, 309, 5],[257, 305, 341, 389, 6],[317, 163, 401, 247, 4],[164, 118, 248, 202, 0]])
8、将 bbox_data_gt 中的框坐标和分类分开
if len(bbox_data_gt) == 0:bboxes_gt = []classes_gt = []
else:bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
bboxes_gt = array([[141, 305, 197, 361],[ 87, 23, 143, 79],[ 46, 114, 102, 170],[ 16, 55, 44, 83],[ 31, 253, 87, 309],[257, 305, 341, 389],[317, 163, 401, 247],[164, 118, 248, 202]])
classes_gt = array([7, 0, 1, 5, 5, 6, 4, 0])
9、生成200份存储真实框信息的文本路径
ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') # os.path.join拼接路径名称
ground_truth_path = './mAP/ground-truth\\199.txt'
10、统计这张图片中真实框的数量
num_bbox_gt = len(bboxes_gt)
num_bbox_gt = 8
11、将图片上真实标注框信息写入刚得到的 .txt 文件中
with open(ground_truth_path, 'w') as f:for i in range(num_bbox_gt):class_name = CLASSES[classes_gt[i]]# 将列表中各个元素分别赋值给多个变量xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))# ' '.join() 将列表里面的元素都合成一个,以空格符作为间隔 '\n'表示换行bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'f.write(bbox_mess)print('\t' + str(bbox_mess).strip()) # '\t' 横向制表符
此时第 200 张图片对应的 199.txt 中的内容是:
7 141 305 197 361
0 87 23 143 79
1 46 114 102 170
5 16 55 44 83
5 31 253 87 309
6 257 305 341 389
4 317 163 401 247
0 164 118 248 202
12、生成200份存储预测框信息的文本路径
predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt')
predict_result_path = './mAP/predicted\\199.txt'
13、图像预处理
- 图像比例缩放
- 填充
- 均一化
image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE])
image_data = image_data[np.newaxis, ...].astype(np.float32)
14、模型预测输出
pred_bbox = model.predict(image_data)
pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] # shape [3, -1, 85],3 是因为有 3 个 feature map
pred_bbox = tf.concat(pred_bbox, axis=0) # shape [-1, 85]
15、确定预测框的信息
- 确定预测框在原始图片位置
- 确定超出边界的预测框索引
- 确定分数大于一定阈值的预测框索引
- 确定概率最大所对应类别索引
- 确定满足三个索引的预测框
# 分数=置信值*分类最大概率值
bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD) #bboxes格式为[预测框的数量,预测框位置+分数+类别] shape为[-1,6]
16、极大值抑制(预测框冗余处理)
- 找出拥有该类别最大分数的预测框
- 存储该预测框
- 计算该预测框与其他预测框的 IOU
- 根据 IOU 相关条件删除预测框
- 剩下的预测框继续执行上述四个步骤,直至没有预测框
bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms')
17、绘制预测框
if cfg.TEST.DECTECTED_IMAGE_PATH is not None:image = utils.draw_bbox(image, bboxes)cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH+image_name, image)
18、将预测结果写在存储预测框信息的文本中
with open(predict_result_path, 'w') as f:for bbox in bboxes:coor = np.array(bbox[:4], dtype=np.int32)score = bbox[4]class_ind = int(bbox[5])class_name = CLASSES[class_ind]score = '%.4f' % scorexmin, ymin, xmax, ymax = list(map(str, coor))bbox_mess = ' '.join([class_name, score, xmin, ymin, xmax, ymax]) + '\n'f.write(bbox_mess)print('\t' + str(bbox_mess).strip())
完整代码
import cv2
import os
import shutil
import numpy as np
import tensorflow as tf
import core.utils as utils
from core.config import cfg
from core.yolov3 import YOLOv3, decodeINPUT_SIZE = 416
NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)predicted_dir_path = './mAP/predicted'
ground_truth_dir_path = './mAP/ground-truth'
if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path)
if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path)
if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH)os.mkdir(predicted_dir_path)
os.mkdir(ground_truth_dir_path)
os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH)# Build Model
input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3])
feature_maps = YOLOv3(input_layer)bbox_tensors = []
for i, fm in enumerate(feature_maps):bbox_tensor = decode(fm, i)bbox_tensors.append(bbox_tensor)model = tf.keras.Model(input_layer, bbox_tensors)
model.load_weights("./yolov3")with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file:for num, line in enumerate(annotation_file):annotation = line.strip().split()image_path = annotation[0]image_name = image_path.split('/')[-1]image = cv2.imread(image_path)image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)bbox_data_gt = np.array([list(map(int, box.split(','))) for box in annotation[1:]])if len(bbox_data_gt) == 0:bboxes_gt=[]classes_gt=[]else:bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt')print('=> ground truth of %s:' % image_name)num_bbox_gt = len(bboxes_gt)with open(ground_truth_path, 'w') as f:for i in range(num_bbox_gt):class_name = CLASSES[classes_gt[i]]xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'f.write(bbox_mess)print('\t' + str(bbox_mess).strip())print('=> predict result of %s:' % image_name)predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt')# Predict Processimage_size = image.shape[:2]image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE])image_data = image_data[np.newaxis, ...].astype(np.float32)pred_bbox = model.predict(image_data)pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]pred_bbox = tf.concat(pred_bbox, axis=0)bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD)bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms')if cfg.TEST.DECTECTED_IMAGE_PATH is not None:image = utils.draw_bbox(image, bboxes)cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH+image_name, image)with open(predict_result_path, 'w') as f:for bbox in bboxes:coor = np.array(bbox[:4], dtype=np.int32)score = bbox[4]class_ind = int(bbox[5])class_name = CLASSES[class_ind]score = '%.4f' % scorexmin, ymin, xmax, ymax = list(map(str, coor))bbox_mess = ' '.join([class_name, score, xmin, ymin, xmax, ymax]) + '\n'f.write(bbox_mess)print('\t' + str(bbox_mess).strip())