Unverified 提交 11f85e7e authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

Auto-fix corrupt JPEGs (#4548)

* Autofix corrupt JPEGs This PR automatically re-saves corrupt JPEGs and trains with the resaved images. WARNING: this will overwrite the existing corrupt JPEGs in a dataset and replace them with correct JPEGs, though the filesize may increase and the image contents may not be exactly the same due to lossy JPEG compression schemes. Results may vary by JPEG decoder and hardware. Current behavior is to exclude corrupt JPEGs from training with a warning to the user, but many users have been complaining about large parts of their dataset being excluded from training. * Clarify re-save reason
上级 2da6444c
...@@ -314,7 +314,7 @@ class LoadStreams: # multiple IP or RTSP cameras ...@@ -314,7 +314,7 @@ class LoadStreams: # multiple IP or RTSP cameras
print('') # newline print('') # newline
# check for common shapes # check for common shapes
s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs], 0) # shapes s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs])
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect: if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.') print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
...@@ -861,7 +861,7 @@ def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annota ...@@ -861,7 +861,7 @@ def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annota
def verify_image_label(args): def verify_image_label(args):
# Verify one image-label pair # Verify one image-label pair
im_file, lb_file, prefix = args im_file, lb_file, prefix = args
nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, corrupt nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments
try: try:
# verify images # verify images
im = Image.open(im_file) im = Image.open(im_file)
...@@ -872,10 +872,11 @@ def verify_image_label(args): ...@@ -872,10 +872,11 @@ def verify_image_label(args):
if im.format.lower() in ('jpg', 'jpeg'): if im.format.lower() in ('jpg', 'jpeg'):
with open(im_file, 'rb') as f: with open(im_file, 'rb') as f:
f.seek(-2, 2) f.seek(-2, 2)
assert f.read() == b'\xff\xd9', 'corrupted JPEG' if f.read() != b'\xff\xd9': # corrupt JPEG
im.save(im_file, format='JPEG', subsampling=0, quality=100) # re-save image
msg = f'{prefix}WARNING: corrupt JPEG restored and saved {im_file}'
# verify labels # verify labels
segments = [] # instance segments
if os.path.isfile(lb_file): if os.path.isfile(lb_file):
nf = 1 # label found nf = 1 # label found
with open(lb_file, 'r') as f: with open(lb_file, 'r') as f:
...@@ -896,7 +897,7 @@ def verify_image_label(args): ...@@ -896,7 +897,7 @@ def verify_image_label(args):
else: else:
nm = 1 # label missing nm = 1 # label missing
l = np.zeros((0, 5), dtype=np.float32) l = np.zeros((0, 5), dtype=np.float32)
return im_file, l, shape, segments, nm, nf, ne, nc, '' return im_file, l, shape, segments, nm, nf, ne, nc, msg
except Exception as e: except Exception as e:
nc = 1 nc = 1
msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}' msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论