Unverified 提交 c3ae4e4a authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

Multi-threaded VisDrone and VOC downloads (#7108)

* Multi-threaded VOC download * Update VOC.yaml * Update * Update general.py * Update general.py
上级 ecc2c7ba
......@@ -34,6 +34,7 @@ names: ['wheat_head'] # class names
download: |
from utils.general import download, Path
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
......
......@@ -65,6 +65,7 @@ download: |
from utils.general import Path, download, np, xyxy2xywhn
# Make Directories
dir = Path(yaml['path']) # dataset root dir
for p in 'images', 'labels':
......
......@@ -24,6 +24,7 @@ download: |
from tqdm import tqdm
from utils.general import np, pd, Path, download, xyxy2xywh
# Download
dir = Path(yaml['path']) # dataset root dir
parent = Path(dir.parent) # download dir
......
......@@ -62,7 +62,7 @@ download: |
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
download(urls, dir=dir / 'images', delete=False)
download(urls, dir=dir / 'images', delete=False, threads=3)
# Convert
path = dir / f'images/VOCdevkit'
......
......@@ -54,7 +54,7 @@ download: |
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
download(urls, dir=dir)
download(urls, dir=dir, threads=4)
# Convert
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
......
......@@ -30,6 +30,7 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
download: |
from utils.general import download, Path
# Download labels
segments = False # segment or box labels
dir = Path(yaml['path']) # dataset root dir
......
......@@ -449,8 +449,9 @@ def check_dataset(data, autodownload=True):
if val:
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
if not all(x.exists() for x in val):
LOGGER.info('\nDataset not found, missing paths: %s' % [str(x) for x in val if not x.exists()])
LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]))
if s and autodownload: # download script
t = time.time()
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename
......@@ -465,9 +466,11 @@ def check_dataset(data, autodownload=True):
r = os.system(s)
else: # python script
r = exec(s, {'yaml': data}) # return None
LOGGER.info(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n")
dt = f'({round(time.time() - t, 1)}s)'
s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
LOGGER.info(emojis(f"Dataset download {s}"))
else:
raise Exception('Dataset not found.')
raise Exception(emojis('Dataset not found ❌'))
return data # dictionary
......@@ -491,7 +494,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
if curl:
os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
else:
torch.hub.download_url_to_file(url, f, progress=True) # torch download
torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download
if unzip and f.suffix in ('.zip', '.gz'):
LOGGER.info(f'Unzipping {f}...')
if f.suffix == '.zip':
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论