当前位置: 代码迷 >> 综合 >> 碎点篇—— labelme json 数据制作 mask 实例分割 数据
  详细解决方案

碎点篇—— labelme json 数据制作 mask 实例分割 数据

热度:26   发布时间:2023-12-15 08:23:57.0

返回主目录

 

一. labelme 标记数据

            

 生成的数据如下: 

               

将数据放入到对应文件夹中:

                

              

 

二. 数据转换代码:

   config.py  和 json_2_dataset.py 文件。

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
# ============================================
# @Time     : 2020/04/15 22:53
# @Author   : WanDaoYi
# @FileName : config.py
# ============================================import os
from easydict import EasyDict as edict__C = edict()
# Consumers can get config by: from config import cfg
cfg = __C# common options 公共配置文件
__C.COMMON = edict()# 相对路径 当前路径
__C.COMMON.RELATIVE_PATH = "./"# mask 默认背景 类别, 背景为第一个类别
__C.COMMON.DEFAULT_CLASS_INFO = [{"source": "", "id": 0, "name": "BG"}]__C.COMMON.DATA_SET_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "dataset")# 原始图像 文件 路径
__C.COMMON.IMAGE_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "dataset/images")
# labelme 生成的 json 注释文件 路径
__C.COMMON.JSON_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "dataset/ann_json")# 生成的 info.yaml 文件 路径
__C.COMMON.INFO_YAML_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "dataset/info_yaml")
# 生成的 label.png 图像 文件 路径
__C.COMMON.LABEL_IMAGE_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "dataset/label_png")
# 生成的 label_name.txt 文件 路径
__C.COMMON.LABEL_NAME_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "dataset/label_name")
# 生成的 label_viz.png 图像 文件 路径
__C.COMMON.LABEL_VIZ_IMAGE_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "dataset/label_viz_png")# 是否删除已有文件,True 为删除,False 为不删除
__C.COMMON.FILE_EXISTS_FLAG = True# 数据划分比例
__C.COMMON.TEST_PERCENT = 0.7
__C.COMMON.VAL_PERCENT = 0.2
__C.COMMON.TEST_PERCENT = 0.1# 文件后缀名
__C.COMMON.JSON_SUFFIX = ".json"
__C.COMMON.PNG_SUFFIX = ".png"
__C.COMMON.JPG_SUFFIX = ".jpg"
__C.COMMON.YAML_SUFFIX = ".yaml"
__C.COMMON.TXT_SUFFIX = ".txt"# 划分数据的保存路径
__C.COMMON.TRAIN_DATA_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "infos/train_data.txt")
__C.COMMON.VAL_DATA_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "infos/val_data.txt")
__C.COMMON.TEST_DATA_PATH = os.path.join(__C.COMMON.RELATIVE_PATH, "infos/test_data.txt")
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
# ============================================
# @Time     : 2020/04/25 14:40
# @Author   : WanDaoYi
# @FileName : json_2_dataset.py
# ============================================import math
import json
import os
import io
import random
import shutil
import base64
from datetime import datetime
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageDraw
import numpy as np
import yamlfrom config import cfgclass Json2Dataset(object):def __init__(self):# 输入文件路径self.json_file = cfg.COMMON.JSON_PATHself.image_file = cfg.COMMON.IMAGE_PATH# 数据的百分比self.test_percent = cfg.COMMON.TEST_PERCENTself.val_percent = cfg.COMMON.VAL_PERCENT# 各成分数据保存路径self.train_data_path = cfg.COMMON.TRAIN_DATA_PATHself.val_data_path = cfg.COMMON.VAL_DATA_PATHself.test_data_path = cfg.COMMON.TEST_DATA_PATH# 输出文件路径self.info_yaml_file = cfg.COMMON.INFO_YAML_PATHself.label_image_file = cfg.COMMON.LABEL_IMAGE_PATHself.label_name_file = cfg.COMMON.LABEL_NAME_PATHself.label_viz_image_file = cfg.COMMON.LABEL_VIZ_IMAGE_PATH# 生成文件的文件夹处理方式self.deal_data_file(file_exists_flag=cfg.COMMON.FILE_EXISTS_FLAG)# 文件后缀名self.json_suffix = cfg.COMMON.JSON_SUFFIXself.png_suffix = cfg.COMMON.PNG_SUFFIXself.yaml_suffix = cfg.COMMON.YAML_SUFFIXself.txt_suffix = cfg.COMMON.TXT_SUFFIXpassdef divide_data(self):"""train, val, test 数据划分:return:"""# 图像名字的 listimage_name_list = os.listdir(self.image_file)# 统计有多少张图像image_number = len(image_name_list)# 根据百分比得到各成分 数据量n_test = int(image_number * self.test_percent)n_val = int(image_number * self.val_percent)n_train = image_number - n_test - n_valif os.path.exists(self.train_data_path):os.remove(self.train_data_path)passif os.path.exists(self.val_data_path):os.remove(self.val_data_path)passif os.path.exists(self.test_data_path):os.remove(self.test_data_path)pass# 随机划分数据n_train_val = n_train + n_valtrain_val_list = random.sample(image_name_list, n_train_val)train_list = random.sample(train_val_list, n_train)train_file = open(self.train_data_path, "w")val_file = open(self.val_data_path, "w")test_file = open(self.test_data_path, "w")for image_name in image_name_list:if image_name in train_val_list:if image_name in train_list:train_file.write(image_name + "\n")passelse:val_file.write(image_name + "\n")passpasselse:test_file.write(image_name + "\n")passpass# 生成文件的文件夹处理方式def deal_data_file(self, file_exists_flag=True):# 删除存在的文件夹if file_exists_flag:if os.path.exists(self.info_yaml_file):shutil.rmtree(self.info_yaml_file)passif os.path.exists(self.label_image_file):shutil.rmtree(self.label_image_file)passif os.path.exists(self.label_name_file):shutil.rmtree(self.label_name_file)passif os.path.exists(self.label_viz_image_file):shutil.rmtree(self.label_viz_image_file)passos.mkdir(self.info_yaml_file)os.mkdir(self.label_image_file)os.mkdir(self.label_name_file)os.mkdir(self.label_viz_image_file)pass# 不删除存在的else:if not os.path.exists(self.info_yaml_file):os.mkdir(self.info_yaml_file)if not os.path.exists(self.label_image_file):os.mkdir(self.label_image_file)if not os.path.exists(self.label_name_file):os.mkdir(self.label_name_file)if not os.path.exists(self.label_viz_image_file):os.mkdir(self.label_viz_image_file)passpassdef do_data(self):json_name_list = os.listdir(self.json_file)for json_name in json_name_list:json_data_path = os.path.join(self.json_file, json_name)if os.path.isfile(json_data_path):# 获取 .json 文件名: 如 000001.json -> 000001name_info = json_name.split(self.json_suffix)[0]# 文件的保存路径info_yaml_path = os.path.join(self.info_yaml_file, name_info + self.yaml_suffix)label_png_path = os.path.join(self.label_image_file, name_info + self.png_suffix)label_name_path = os.path.join(self.label_name_file, name_info + self.txt_suffix)label_viz_png_path = os.path.join(self.label_viz_image_file, name_info + self.png_suffix)# 加载 .json 文件data_info = json.load(open(json_data_path))# print(data_info)# 获取文件内 图像信息if data_info["imageData"]:image_data = data_info["imageData"]passelse:image_path = os.path.join(os.path.dirname(json_data_path), data_info['imagePath'])with open(image_path, 'rb') as f:image_data = f.read()image_data = base64.b64encode(image_data).decode('utf-8')pass# 将 base64 str 类型的图像 转为 np.array() 类型image_arr = self.img_b64_2_arr(image_data)# 设置 背景 labellabel_name_to_value = {'_background_': 0}for shape in data_info["shapes"]:label_name = shape["label"]# 如果 label_name 不在 label_name_to_value 这个 list 里面,则添加进去if label_name not in label_name_to_value:label_value = len(label_name_to_value)label_name_to_value[label_name] = label_valuepasslabel_values = []label_names = []# ln 为 label_name, lv 为 label_valuefor ln, lv in sorted(label_name_to_value.items(), key=lambda x: x[1]):label_names.append(ln)label_values.append(lv)passassert label_values == list(range(len(label_values)))lbl = self.shapes_2_label(image_arr.shape, data_info["shapes"], label_name_to_value)# 保存 label.png 图像self.lbl_save(label_png_path, lbl)captions = ["{}: {}".format(lv, ln) for ln, lv in label_name_to_value.items()]lbl_viz = self.draw_label(lbl, image_arr, captions)# 保存 label_viz.png 图像Image.fromarray(lbl_viz).save(label_viz_png_path)# 保存 label_name.txt 文件with open(label_name_path, "w") as file:for lbl_name in label_names:file.write(lbl_name + "\n")passinfo = dict(label_names=label_names)# print("info: {}".format(info))# 保存 info.yaml 文件with open(info_yaml_path, 'w') as f:yaml.safe_dump(info, f, default_flow_style=False)passprint("saved to {}".format(cfg.COMMON.DATA_SET_PATH))pass# 将 base64 图像 转为 np.array() list 图像def img_b64_2_arr(self, img_b64):file = io.BytesIO()file.write(base64.b64decode(img_b64))img_arr = np.array(Image.open(file))return img_arrpassdef shapes_2_label(self, image_shape, shapes, label_name_to_value, type='class'):assert type in ['class', 'instance']cls = np.zeros(image_shape[: 2], dtype=np.int32)if type.__eq__("instance"):ins = np.zeros(image_shape[: 2], dtype=np.int32)instance_names = ["_background_"]passelse:ins = Noneinstance_names = Nonefor shape in shapes:points = shape["points"]label = shape["label"]shape_type = shape.get("shape_type", None)# 分类处理if type.__eq__("class"):cls_name = labelins_id = None# 实例分割处理else:cls_name = label.split("-")[0]if label not in instance_names:instance_names.append(label)passins_id = instance_names.index(label)cls_id = label_name_to_value[cls_name]mask = self.shape_2_mask(image_shape[: 2], points, shape_type)cls[mask] = cls_idif type.__eq__("instance"):ins[mask] = ins_idpasspassif type.__eq__("instance"):return cls, inselse:return clspassdef shape_2_mask(self, image_shape, points, shape_type=None,line_width=10, point_size=5):mask = np.zeros(image_shape[: 2], dtype=np.uint8)mask = Image.fromarray(mask)draw = ImageDraw.Draw(mask)xy = [tuple(point) for point in points]if "circle".__eq__(shape_type):assert len(xy) == 2, 'Shape of shape_type=circle must have 2 points'(cx, cy), (px, py) = xyd = math.sqrt((cx - px) ** 2 + (cy - py) ** 2)draw.ellipse([cx - d, cy - d, cx + d, cy + d], outline=1, fill=1)passelif "rectangle".__eq__(shape_type):assert len(xy) == 2, 'Shape of shape_type=rectangle must have 2 points'draw.rectangle(xy, outline=1, fill=1)passelif "line".__eq__(shape_type):assert len(xy) == 2, 'Shape of shape_type=line must have 2 points'draw.line(xy=xy, fill=1, width=line_width)passelif "linestrip".__eq__(shape_type):draw.line(xy=xy, fill=1, width=line_width)passelif "point".__eq__(shape_type):assert len(xy) == 1, 'Shape of shape_type=point must have 1 points'cx, cy = xy[0]r = point_sizedraw.ellipse([cx - r, cy - r, cx + r, cy + r], outline=1, fill=1)passelse:assert len(xy) > 2, 'Polygon must have points more than 2'draw.polygon(xy=xy, outline=1, fill=1)mask = np.array(mask, dtype=bool)return maskdef draw_label(self, label, image=None, label_names=None,color_map=None, **kwargs):""":param label: ndarray, (H, W), Pixel-wise labels to colorize.:param image: ndarray, (H, W, 3), optional, Image on which the colorized label will be drawn.:param label_names: iterable, List of label names.:param color_map::param kwargs::return:"""backend_org = plt.rcParams["backend"]plt.switch_backend("agg")plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)plt.margins(0, 0)plt.gca().xaxis.set_major_locator(plt.NullLocator())plt.gca().yaxis.set_major_locator(plt.NullLocator())if label_names is None:label_names = [str(l) for l in range(label.max() + 1)]passcolor_map = self.validate_color_map(color_map, len(label_names))label_viz = self.label_2_rgb(label, image, n_labels=len(label_names), color_map=color_map)plt.imshow(label_viz)plt.axis("off")plt_handlers = []plt_titles = []for label_value, label_name in enumerate(label_names):if label_value not in label:continuepassfc = color_map[label_value]p = plt.Rectangle((0, 0), 1, 1, fc=fc)plt_handlers.append(p)plt_titles.append("{value}: {name}".format(value=label_value, name=label_name))file = io.BytesIO()plt.savefig(file, bbox_inches="tight", pad_inches=0)plt.cla()plt.close()plt.switch_backend(backend_org)out_size = (label_viz.shape[1], label_viz.shape[0])out = Image.open(file).resize(out_size, Image.BILINEAR).convert('RGB')out = np.asarray(out)return outpasspassdef validate_color_map(self, color_map, n_labels):if color_map is None:color_map = self.label_color_map(n_labels)passelse:assert color_map.shape == (color_map.shape[0], 3), \'color_map must be sequence of RGB values'assert 0 <= color_map.min() and color_map.max() <= 1, \'color_map must ranges 0 to 1'passreturn color_mappassdef label_color_map(self, num=256):color_map = np.zeros((num, 3))for i in range(num):id = ir, g, b = 0, 0, 0for j in range(8):r = np.bitwise_or(r, (self.bit_get(id, 0) << 7 - j))g = np.bitwise_or(g, (self.bit_get(id, 1) << 7 - j))b = np.bitwise_or(b, (self.bit_get(id, 2) << 7 - j))id = (id >> 3)passcolor_map[i, 0] = rcolor_map[i, 1] = gcolor_map[i, 2] = bpasscolor_map = color_map.astype(np.float32) / 255return color_mappassdef bit_get(self, byte_val, inx):return ((byte_val & (1 << inx)) != 0)passdef label_2_rgb(self, lbl, image=None, n_labels=None, alpha=0.5,color_map=None):if n_labels is None:n_labels = len(np.unique(lbl))passcolor_map = self.validate_color_map(color_map, n_labels)color_map = (color_map * 255).astype(np.uint8)lbl_viz = color_map[lbl]# 未标记的lbl_viz[lbl == -1] = (0, 0, 0)if image is not None:image_gray = Image.fromarray(image).convert('LA')image_gray = np.asarray(image_gray.convert("RGB"))# image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)# image_gray = cv2.cvtColor(image_gray, cv2.COLOR_GRAY2RGB)lbl_viz = alpha * lbl_viz + (1 - alpha) * image_graylbl_viz = lbl_viz.astype(np.uint8)passreturn lbl_vizpassdef lbl_save(self, file_name, lbl):if os.path.splitext(file_name)[1] != ".png":file_name += ".png"if lbl.min() >= -1 and lbl.max() < 255:lbl_pil = Image.fromarray(lbl.astype(np.uint8), mode='P')color_map = self.label_color_map(255)lbl_pil.putpalette((color_map * 255).astype(np.uint8).flatten())lbl_pil.save(file_name)passelse:raise ValueError('[%s] Cannot save the pixel-wise class label as PNG. ''Please consider using the .npy format.' % file_name)passpassif __name__ == "__main__":# 代码开始时间start_time = datetime.now()print("开始时间: {}".format(start_time))demo = Json2Dataset()demo.divide_data()demo.do_data()# 代码结束时间end_time = datetime.now()print("结束时间: {}, 训练模型耗时: {}".format(end_time, end_time - start_time))pass

 

运行代码后,得到

               

                  

 

在 实例分割中,有时候会用到这样的数据。

 

 

            

返回主目录

  相关解决方案