paddlecor检测可视化标注
推理缩放尺寸:
训练数据预处理增强
EastRandomCropData代码:
import os
import cv2
dir_path=r'E:\project\icdar_c4_train_tmp/'
labelpath=dir_path+'/Label.txt'
labeldict = {}
with open(labelpath, 'r', encoding='utf-8') as f:
data = f.readlines()
for each in data:
file, label = each.split('\t')
if label:
label = label.replace('false', 'False')
label = label.replace('true', 'True')
labeldict[file] = eval(label)
else:
labeldict[file] = []
for k,vs in labeldict.items():
img=cv2.imread(dir_path+k)
for v in vs:
if "transcription" in v:
for index, point in enumerate(v['points']):
color=(255, 0, 0)
if index==1:
color = (255, 255, 0)
if index == 2:
color = (255, 0, 255)
if index == 3:
color = (0, 0, 255)
cv2.circle(img, (point[0], point[1]), 1, color, 2)
print(k,v['points'])
cv2.imshow("asdf",img)
cv2.waitKey()
检测用的:resize_image_type1
DetResizeForTest参数: h,w。
transforms:
# - DecodeImage: # load image
# img_mode: BGR
# channel_first: False
- DetLabelEncode: # Class handling label
- DetResizeForTest:
image_shape: [128, 352]
缩放代码:
operators.py中:
class DetResizeForTest(object):
def __init__(self, **kwargs):
super(DetResizeForTest, self).__init__()
self.resize_type = 0
if 'image_shape' in kwargs:
self.image_shape = kwargs['image_shape']
self.resize_type = 1
elif 'limit_side_len' in kwargs:
self.limit_side_len = kwargs['limit_side_len']
self.limit_type = kwargs.get('limit_type', 'min')
elif 'resize_long' in kwargs:
self.resize_type = 2
self.resize_long = kwargs.get('resize_long', 960)
else:
self.limit_side_len = 736
self.limit_type = 'min'
def __call__(self, data):
img = data['image']
src_h, src_w, _ = img.shape
if self.resize_type == 0:
# img, shape = self.resize_image_type0(img)
img, [ratio_h, ratio_w] = self.resize_image_type0(img)
elif self.resize_type == 2:
img, [ratio_h, ratio_w] = self.resize_image_type2(img)
else:
# img, shape = self.resize_image_type1(img)
img, [ratio_h, ratio_w] = self.resize_image_type1(img)
data['image'] = img
data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
return data
def resize_image_type1(self, img):
resize_h, resize_w = self.image_shape
ori_h, ori_w = img.shape[:2] # (h, w, c)
ratio_h = float(resize_h) / ori_h
ratio_w = float(resize_w) / ori_w
img = cv2.resize(img, (int(resize_w), int(resize_h)))
# return img, np.array([ori_h, ori_w])
return img, [ratio_h, ratio_w]
def resize_image_type1(self, img):
# resize_h, resize_w = self.image_shape
# ori_h, ori_w = img.shape[:2] # (h, w, c)
# ratio_h = float(resize_h) / ori_h
# ratio_w = float(resize_w) / ori_w
# img = cv2.resize(img, (int(resize_w), int(resize_h)))
t_h, t_w = img.shape[:2]
to_w = 352
to_h = 128
img_b = np.zeros((to_h, to_w, 3), dtype=np.uint8)
if t_h / t_w > to_h / to_w:
x_scale = to_h / img.shape[0]
img = cv2.resize(img, None, fx=x_scale, fy=x_scale, interpolation=cv2.INTER_AREA)
t_h, t_w = img.shape[:2]
img_b[:, (to_w - t_w) // 2: (t_w + to_w) // 2, :] = img
else:
x_scale = to_w / img.shape[1]
img = cv2.resize(img, None, fx=x_scale, fy=x_scale, interpolation=cv2.INTER_AREA)
t_h, t_w = img.shape[:2]
img_b[(to_h - t_h) // 2:t_h + (to_h - t_h) // 2, :, :] = img
cv2.imshow('resize',img_b)
# return img, np.array([ori_h, ori_w])
return img_b, [x_scale, x_scale]
配置文件ch_det_mv3_db_v2.0.yml参数:
EastRandomCropData 参数:h w
因为NormalizeImage的参数为hwc
去掉了flip增强,减小了Resize比例,从[0.5-3]改为了[0.8,1.5]
修改了EastRandomCropData 缩放宽高
transforms:
# - DecodeImage: # load image
# img_mode: BGR
# channel_first: False
- DetLabelEncode: # Class handling label
- IaaAugment:
augmenter_args:
# - { 'type': Fliplr, 'args': { 'p': 0.5 } }
- { 'type': Affine, 'args': { 'rotate': [-5, 5] } }
- { 'type': Resize, 'args': { 'size': [0.8, 1.5] } }
- EastRandomCropData:
size: [128, 352] # w h
max_tries: 50
keep_ratio: true
- MakeBorderMap:
shrink_ratio: 0.4
thresh_min: 0.3
thresh_max: 0.7
- MakeShrinkMap:
shrink_ratio: 0.4
min_text_size: 12
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
这里面的size 顺序 w h,
class EastRandomCropData(object):
def __init__(self, size=(640, 640), max_tries=10, min_crop_side_ratio=0.3, keep_ratio=True, **kwargs):
self.size = size
self.size[0],self.size[1]=self.size[1],self.size[0]
self.max_tries = max_tries
self.min_crop_side_ratio = min_crop_side_ratio
self.keep_ratio = keep_ratio
def __call__(self, data):
img = data['image']
text_polys = data['polys']
ignore_tags = data['ignore_tags']
texts = data['texts']
all_care_polys = [text_polys[i] for i, tag in enumerate(ignore_tags) if not tag]
# 计算crop区域
crop_x, crop_y, crop_w, crop_h = crop_area(img, all_care_polys, self.min_crop_side_ratio, self.max_tries)
# crop 图片 保持比例填充
scale_w = self.size[0] / crop_w
scale_h = self.size[1] / crop_h
scale = min(scale_w, scale_h)
h = int(crop_h * scale)
w = int(crop_w * scale)
if self.keep_ratio:
padimg = np.zeros((self.size[1], self.size[0], img.shape[2]), img.dtype)
padimg[:h, :w] = cv2.resize(img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
# img_a=cv2.resize(img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
# print(img_a.shape)
# cv2.imshow("crop_area",img_a)
# cv2.waitKey()
img = padimg
else:
img = cv2.resize(img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], tuple(self.size))
# crop 文本框
text_polys_crop = []
ignore_tags_crop = []
texts_crop = []
for poly, text, tag in zip(text_polys, texts, ignore_tags):
poly = ((poly - (crop_x, crop_y)) * scale).tolist()
if not is_poly_outside_rect(poly, 0, 0, w, h):
text_polys_crop.append(poly)
ignore_tags_crop.append(tag)
texts_crop.append(text)
data['image'] = img
data['polys'] = np.array(text_polys_crop)
data['ignore_tags'] = ignore_tags_crop
data['texts'] = texts_crop
return data
版权说明 : 本文为转载文章, 版权归原作者所有 版权申明
原文链接 : https://blog.csdn.net/jacke121/article/details/124535340
内容来源于网络,如有侵权,请联系作者删除!