Unverified 提交 548a98a3 authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

Recursive directories dataset capability (#1408)

* Recursive directories dataset capability * x.split('.')[-1] * f += glob.glob(f"{p}{os.sep}**.*", recursive=True) * f += glob.glob(str(p / '**.*'), recursive=True) * remove . from image and vid formats * .txt to txt * str(p / '**' / '*.*')
上级 9c91aeae
...@@ -23,8 +23,8 @@ from utils.torch_utils import torch_distributed_zero_first ...@@ -23,8 +23,8 @@ from utils.torch_utils import torch_distributed_zero_first
# Parameters # Parameters
help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng'] img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes
vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv'] vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
# Get orientation exif tag # Get orientation exif tag
for orientation in ExifTags.TAGS.keys(): for orientation in ExifTags.TAGS.keys():
...@@ -125,8 +125,8 @@ class LoadImages: # for inference ...@@ -125,8 +125,8 @@ class LoadImages: # for inference
else: else:
raise Exception('ERROR: %s does not exist' % p) raise Exception('ERROR: %s does not exist' % p)
images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats] images = [x for x in files if x.split('.')[-1].lower() in img_formats]
videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats] videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
ni, nv = len(images), len(videos) ni, nv = len(images), len(videos)
self.img_size = img_size self.img_size = img_size
...@@ -337,24 +337,23 @@ class LoadImagesAndLabels(Dataset): # for training/testing ...@@ -337,24 +337,23 @@ class LoadImagesAndLabels(Dataset): # for training/testing
def img2label_paths(img_paths): def img2label_paths(img_paths):
# Define label paths as a function of image paths # Define label paths as a function of image paths
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
return [x.replace(sa, sb, 1).replace(os.path.splitext(x)[-1], '.txt') for x in img_paths] return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths]
try: try:
f = [] # image files f = [] # image files
for p in path if isinstance(path, list) else [path]: for p in path if isinstance(path, list) else [path]:
p = str(Path(p)) # os-agnostic p = Path(p) # os-agnostic
parent = str(Path(p).parent) + os.sep if p.is_dir(): # dir
if os.path.isfile(p): # file f += glob.glob(str(p / '**' / '*.*'), recursive=True)
elif p.is_file(): # file
with open(p, 'r') as t: with open(p, 'r') as t:
t = t.read().splitlines() t = t.read().splitlines()
parent = str(p.parent) + os.sep
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
elif os.path.isdir(p): # folder
f += glob.iglob(p + os.sep + '*.*')
else: else:
raise Exception('%s does not exist' % p) raise Exception('%s does not exist' % p)
self.img_files = sorted( self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
[x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats]) assert self.img_files, 'No images found'
assert len(self.img_files) > 0, 'No images found'
except Exception as e: except Exception as e:
raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
......
...@@ -79,9 +79,9 @@ def check_dataset(dict): ...@@ -79,9 +79,9 @@ def check_dataset(dict):
# Download dataset if not found locally # Download dataset if not found locally
val, s = dict.get('val'), dict.get('download') val, s = dict.get('val'), dict.get('download')
if val and len(val): if val and len(val):
val = [os.path.abspath(x) for x in (val if isinstance(val, list) else [val])] # val path val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
if not all(os.path.exists(x) for x in val): if not all(x.exists() for x in val):
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [*val]) print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
if s and len(s): # download script if s and len(s): # download script
print('Downloading %s ...' % s) print('Downloading %s ...' % s)
if s.startswith('http') and s.endswith('.zip'): # URL if s.startswith('http') and s.endswith('.zip'): # URL
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论