Unverified 提交 f79d7479 authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

Add optional dataset.yaml `path` attribute (#3753)

* Add optional dataset.yaml `path` attribute @KalenMike * pass locals to python scripts * handle lists * update coco128.yaml * Capitalize first letter * add test key * finalize GlobalWheat2020.yaml * finalize objects365.yaml * finalize SKU-110K.yaml * finalize SKU-110K.yaml * finalize VisDrone.yaml * NoneType fix * update download comment * voc to VOC * update * update VOC.yaml * update VOC.yaml * remove dashes * delete get_voc.sh * force coco and coco128 to ../datasets * Capitalize Argoverse_HD.yaml * Capitalize Objects365.yaml * update Argoverse_HD.yaml * coco segments fix * VOC single-thread * update Argoverse_HD.yaml * update data_dict in test handling * create root
上级 417a2f42
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
# Train command: python train.py --data Argoverse_HD.yaml
# Default dataset location is next to YOLOv5:
# /parent
# /datasets/Argoverse
# /yolov5
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Argoverse # dataset root dir
train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
# Classes
nc: 8 # number of classes
names: [ 'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign' ] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import json
from tqdm import tqdm
from utils.general import download, Path
def argoverse2yolo(set):
labels = {}
a = json.load(open(set, "rb"))
for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
img_id = annot['image_id']
img_name = a['images'][img_id]['name']
img_label_name = img_name[:-3] + "txt"
cls = annot['category_id'] # instance class id
x_center, y_center, width, height = annot['bbox']
x_center = (x_center + width / 2) / 1920.0 # offset and scale
y_center = (y_center + height / 2) / 1200.0 # offset and scale
width /= 1920.0 # scale
height /= 1200.0 # scale
img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
if not img_dir.exists():
img_dir.mkdir(parents=True, exist_ok=True)
k = str(img_dir / img_label_name)
if k not in labels:
labels[k] = []
labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
for k in labels:
with open(k, "w") as f:
f.writelines(labels[k])
# Download
dir = Path('../datasets/Argoverse') # dataset root dir
urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
download(urls, dir=dir, delete=False)
# Convert
annotations_dir = 'Argoverse-HD/annotations/'
(dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
for d in "train.json", "val.json":
argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels
# Global Wheat 2020 dataset http://www.global-wheat.com/ # Global Wheat 2020 dataset http://www.global-wheat.com/
# Train command: python train.py --data GlobalWheat2020.yaml # Train command: python train.py --data GlobalWheat2020.yaml
# Default dataset location is next to YOLOv5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent
# /datasets/GlobalWheat2020 # /datasets/GlobalWheat2020
# /yolov5 # /yolov5
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
train: # 3422 images path: ../datasets/GlobalWheat2020 # dataset root dir
- ../datasets/GlobalWheat2020/images/arvalis_1 train: # train images (relative to 'path') 3422 images
- ../datasets/GlobalWheat2020/images/arvalis_2 - images/arvalis_1
- ../datasets/GlobalWheat2020/images/arvalis_3 - images/arvalis_2
- ../datasets/GlobalWheat2020/images/ethz_1 - images/arvalis_3
- ../datasets/GlobalWheat2020/images/rres_1 - images/ethz_1
- ../datasets/GlobalWheat2020/images/inrae_1 - images/rres_1
- ../datasets/GlobalWheat2020/images/usask_1 - images/inrae_1
- images/usask_1
val: # 748 images (WARNING: train set contains ethz_1) val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
- ../datasets/GlobalWheat2020/images/ethz_1 - images/ethz_1
test: # test images (optional) 1276 images
test: # 1276 images - images/utokyo_1
- ../datasets/GlobalWheat2020/images/utokyo_1 - images/utokyo_2
- ../datasets/GlobalWheat2020/images/utokyo_2 - images/nau_1
- ../datasets/GlobalWheat2020/images/nau_1 - images/uq_1
- ../datasets/GlobalWheat2020/images/uq_1
# Classes
# number of classes nc: 1 # number of classes
nc: 1 names: [ 'wheat_head' ] # class names
# class names
names: [ 'wheat_head' ] # Download script/URL (optional) ---------------------------------------------------------------------------------------
# download command/URL (optional) --------------------------------------------------------------------------------------
download: | download: |
from utils.general import download, Path from utils.general import download, Path
# Download # Download
dir = Path('../datasets/GlobalWheat2020') # dataset directory dir = Path(yaml['path']) # dataset root dir
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip', urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip'] 'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
download(urls, dir=dir) download(urls, dir=dir)
......
# Objects365 dataset https://www.objects365.org/ # Objects365 dataset https://www.objects365.org/
# Train command: python train.py --data objects365.yaml # Train command: python train.py --data Objects365.yaml
# Default dataset location is next to YOLOv5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent
# /datasets/objects365 # /datasets/Objects365
# /yolov5 # /yolov5
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: ../datasets/objects365/images/train # 1742289 images
val: ../datasets/objects365/images/val # 5570 images
# number of classes # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
nc: 365 path: ../datasets/Objects365 # dataset root dir
train: images/train # train images (relative to 'path') 1742289 images
val: images/val # val images (relative to 'path') 5570 images
test: # test images (optional)
# class names # Classes
nc: 365 # number of classes
names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup', names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book', 'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag', 'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
...@@ -56,7 +57,7 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl ...@@ -56,7 +57,7 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl
'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ] 'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ]
# download command/URL (optional) -------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
from pycocotools.coco import COCO from pycocotools.coco import COCO
from tqdm import tqdm from tqdm import tqdm
...@@ -64,7 +65,7 @@ download: | ...@@ -64,7 +65,7 @@ download: |
from utils.general import download, Path from utils.general import download, Path
# Make Directories # Make Directories
dir = Path('../datasets/objects365') # dataset directory dir = Path(yaml['path']) # dataset root dir
for p in 'images', 'labels': for p in 'images', 'labels':
(dir / p).mkdir(parents=True, exist_ok=True) (dir / p).mkdir(parents=True, exist_ok=True)
for q in 'train', 'val': for q in 'train', 'val':
......
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
# Train command: python train.py --data SKU-110K.yaml # Train command: python train.py --data SKU-110K.yaml
# Default dataset location is next to YOLOv5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent
# /datasets/SKU-110K # /datasets/SKU-110K
# /yolov5 # /yolov5
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
train: ../datasets/SKU-110K/train.txt # 8219 images path: ../datasets/SKU-110K # dataset root dir
val: ../datasets/SKU-110K/val.txt # 588 images train: train.txt # train images (relative to 'path') 8219 images
test: ../datasets/SKU-110K/test.txt # 2936 images val: val.txt # val images (relative to 'path') 588 images
test: test.txt # test images (optional) 2936 images
# number of classes # Classes
nc: 1 nc: 1 # number of classes
names: [ 'object' ] # class names
# class names
names: [ 'object' ]
# Download script/URL (optional) ---------------------------------------------------------------------------------------
# download command/URL (optional) --------------------------------------------------------------------------------------
download: | download: |
import shutil import shutil
from tqdm import tqdm from tqdm import tqdm
from utils.general import np, pd, Path, download, xyxy2xywh from utils.general import np, pd, Path, download, xyxy2xywh
# Download # Download
datasets = Path('../datasets') # download directory dir = Path(yaml['path']) # dataset root dir
parent = Path(dir.parent) # download dir
urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz'] urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
download(urls, dir=datasets, delete=False) download(urls, dir=parent, delete=False)
# Rename directories # Rename directories
dir = (datasets / 'SKU-110K')
if dir.exists(): if dir.exists():
shutil.rmtree(dir) shutil.rmtree(dir)
(datasets / 'SKU110K_fixed').rename(dir) # rename dir (parent / 'SKU110K_fixed').rename(dir) # rename dir
(dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
# Convert labels # Convert labels
......
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
# Train command: python train.py --data VOC.yaml
# Default dataset location is next to YOLOv5:
# /parent
# /datasets/VOC
# /yolov5
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VOC
train: # train images (relative to 'path') 16551 images
- images/train2012
- images/train2007
- images/val2012
- images/val2007
val: # val images (relative to 'path') 4952 images
- images/test2007
test: # test images (optional)
- images/test2007
# Classes
nc: 20 # number of classes
names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] # class names
# Download script/URL (optional) ---------------------------------------------------------------------------------------
download: |
import xml.etree.ElementTree as ET
from tqdm import tqdm
from utils.general import download, Path
def convert_label(path, lb_path, year, image_id):
def convert_box(size, box):
dw, dh = 1. / size[0], 1. / size[1]
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
out_file = open(lb_path, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
xmlbox = obj.find('bndbox')
bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
cls_id = yaml['names'].index(cls) # class id
out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
# Download
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
download(urls, dir=dir / 'images', delete=False)
# Convert
path = dir / f'images/VOCdevkit'
for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
imgs_path = dir / 'images' / f'{image_set}{year}'
lbs_path = dir / 'labels' / f'{image_set}{year}'
imgs_path.mkdir(exist_ok=True, parents=True)
lbs_path.mkdir(exist_ok=True, parents=True)
image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
for id in tqdm(image_ids, desc=f'{image_set}{year}'):
f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
f.rename(imgs_path / f.name) # move image
convert_label(path, lb_path, year, id) # convert labels to YOLO format
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
# Train command: python train.py --data VisDrone.yaml # Train command: python train.py --data VisDrone.yaml
# Default dataset location is next to YOLOv5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent
# /VisDrone # /datasets/VisDrone
# /yolov5 # /yolov5
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
train: ../VisDrone/VisDrone2019-DET-train/images # 6471 images path: ../datasets/VisDrone # dataset root dir
val: ../VisDrone/VisDrone2019-DET-val/images # 548 images train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
test: ../VisDrone/VisDrone2019-DET-test-dev/images # 1610 images val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
# number of classes # Classes
nc: 10 nc: 10 # number of classes
# class names
names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ] names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
# download command/URL (optional) -------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
from utils.general import download, os, Path from utils.general import download, os, Path
...@@ -49,7 +48,7 @@ download: | ...@@ -49,7 +48,7 @@ download: |
# Download # Download
dir = Path('../VisDrone') # dataset directory dir = Path(yaml['path']) # dataset root dir
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip', urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip', 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip', 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
......
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
# Train command: python train.py --data argoverse_hd.yaml
# Default dataset location is next to YOLOv5:
# /parent_folder
# /argoverse
# /yolov5
# download command/URL (optional)
download: bash data/scripts/get_argoverse_hd.sh
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: ../argoverse/Argoverse-1.1/images/train/ # 39384 images
val: ../argoverse/Argoverse-1.1/images/val/ # 15062 iamges
test: ../argoverse/Argoverse-1.1/images/test/ # Submit to: https://eval.ai/web/challenges/challenge-page/800/overview
# number of classes
nc: 8
# class names
names: [ 'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign' ]
# COCO 2017 dataset http://cocodataset.org # COCO 2017 dataset http://cocodataset.org
# Train command: python train.py --data coco.yaml # Train command: python train.py --data coco.yaml
# Default dataset location is next to YOLOv5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent
# /coco # /datasets/coco
# /yolov5 # /yolov5
# download command/URL (optional) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
download: bash data/scripts/get_coco.sh path: ../datasets/coco # dataset root dir
train: train2017.txt # train images (relative to 'path') 118287 images
val: val2017.txt # train images (relative to 'path') 5000 images
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] # Classes
train: ../coco/train2017.txt # 118287 images nc: 80 # number of classes
val: ../coco/val2017.txt # 5000 images
test: ../coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# number of classes
nc: 80
# class names
names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
...@@ -26,10 +22,22 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', ' ...@@ -26,10 +22,22 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush' ] 'hair drier', 'toothbrush' ] # class names
# Download script/URL (optional)
download: |
from utils.general import download, Path
# Download labels
segments = False # segment or box labels
dir = Path(yaml['path']) # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
download(urls, dir=dir.parent)
# Print classes # Download data
# with open('data/coco.yaml') as f: urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
# d = yaml.safe_load(f) # dict 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
# for i, x in enumerate(d['names']): 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
# print(i, x) download(urls, dir=dir / 'images', threads=3)
# COCO 2017 dataset http://cocodataset.org - first 128 training images # COCO 2017 dataset http://cocodataset.org - first 128 training images
# Train command: python train.py --data coco128.yaml # Train command: python train.py --data coco128.yaml
# Default dataset location is next to YOLOv5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent
# /coco128 # /datasets/coco128
# /yolov5 # /yolov5
# download command/URL (optional) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip path: ../datasets/coco128 # dataset root dir
train: images/train2017 # train images (relative to 'path') 128 images
val: images/train2017 # val images (relative to 'path') 128 images
test: # test images (optional)
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] # Classes
train: ../coco128/images/train2017/ # 128 images nc: 80 # number of classes
val: ../coco128/images/train2017/ # 128 images
# number of classes
nc: 80
# class names
names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
...@@ -25,4 +22,8 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', ' ...@@ -25,4 +22,8 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush' ] 'hair drier', 'toothbrush' ] # class names
# Download script/URL (optional)
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
\ No newline at end of file
# Hyperparameters for VOC finetuning # Hyperparameters for VOC finetuning
# python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50 # python train.py --batch 64 --weights yolov5m.pt --data VOC.yaml --img 512 --epochs 50
# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
......
#!/bin/bash
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
# Download command: bash data/scripts/get_argoverse_hd.sh
# Train command: python train.py --data argoverse_hd.yaml
# Default dataset location is next to YOLOv5:
# /parent_folder
# /argoverse
# /yolov5
# Download/unzip images
d='../argoverse/' # unzip directory
mkdir $d
url=https://argoverse-hd.s3.us-east-2.amazonaws.com/
f=Argoverse-HD-Full.zip
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &# download, unzip, remove in background
wait # finish background tasks
cd ../argoverse/Argoverse-1.1/
ln -s tracking images
cd ../Argoverse-HD/annotations/
python3 - "$@" <<END
import json
from pathlib import Path
annotation_files = ["train.json", "val.json"]
print("Converting annotations to YOLOv5 format...")
for val in annotation_files:
a = json.load(open(val, "rb"))
label_dict = {}
for annot in a['annotations']:
img_id = annot['image_id']
img_name = a['images'][img_id]['name']
img_label_name = img_name[:-3] + "txt"
cls = annot['category_id'] # instance class id
x_center, y_center, width, height = annot['bbox']
x_center = (x_center + width / 2) / 1920. # offset and scale
y_center = (y_center + height / 2) / 1200. # offset and scale
width /= 1920. # scale
height /= 1200. # scale
img_dir = "./labels/" + a['seq_dirs'][a['images'][annot['image_id']]['sid']]
Path(img_dir).mkdir(parents=True, exist_ok=True)
if img_dir + "/" + img_label_name not in label_dict:
label_dict[img_dir + "/" + img_label_name] = []
label_dict[img_dir + "/" + img_label_name].append(f"{cls} {x_center} {y_center} {width} {height}\n")
for filename in label_dict:
with open(filename, "w") as file:
for string in label_dict[filename]:
file.write(string)
END
mv ./labels ../../Argoverse-1.1/
#!/bin/bash
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
# Download command: bash data/scripts/get_voc.sh
# Train command: python train.py --data voc.yaml
# Default dataset location is next to YOLOv5:
# /parent_folder
# /VOC
# /yolov5
start=$(date +%s)
mkdir -p ../tmp
cd ../tmp/
# Download/unzip images and labels
d='.' # unzip directory
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images
f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
for f in $f3 $f2 $f1; do
echo 'Downloading' $url$f '...'
curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
done
wait # finish background tasks
end=$(date +%s)
runtime=$((end - start))
echo "Completed in" $runtime "seconds"
echo "Splitting dataset..."
python3 - "$@" <<END
import os
import xml.etree.ElementTree as ET
from os import getcwd
sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
"horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
def convert_box(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
def convert_annotation(year, image_id):
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert_box((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
cwd = getcwd()
for year, image_set in sets:
if not os.path.exists('VOCdevkit/VOC%s/labels/' % year):
os.makedirs('VOCdevkit/VOC%s/labels/' % year)
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
list_file = open('%s_%s.txt' % (year, image_set), 'w')
for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (cwd, year, image_id))
convert_annotation(year, image_id)
list_file.close()
END
cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
mkdir ../VOC ../VOC/images ../VOC/images/train ../VOC/images/val
mkdir ../VOC/labels ../VOC/labels/train ../VOC/labels/val
python3 - "$@" <<END
import os
print(os.path.exists('../tmp/train.txt'))
with open('../tmp/train.txt', 'r') as f:
for line in f.readlines():
line = "/".join(line.split('/')[-5:]).strip()
if os.path.exists("../" + line):
os.system("cp ../" + line + " ../VOC/images/train")
line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
if os.path.exists("../" + line):
os.system("cp ../" + line + " ../VOC/labels/train")
print(os.path.exists('../tmp/2007_test.txt'))
with open('../tmp/2007_test.txt', 'r') as f:
for line in f.readlines():
line = "/".join(line.split('/')[-5:]).strip()
if os.path.exists("../" + line):
os.system("cp ../" + line + " ../VOC/images/val")
line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
if os.path.exists("../" + line):
os.system("cp ../" + line + " ../VOC/labels/val")
END
rm -rf ../tmp # remove temporary directory
echo "VOC download done."
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
# Train command: python train.py --data voc.yaml
# Default dataset location is next to YOLOv5:
# /parent_folder
# /VOC
# /yolov5
# download command/URL (optional)
download: bash data/scripts/get_voc.sh
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: ../VOC/images/train/ # 16551 images
val: ../VOC/images/val/ # 4952 images
# number of classes
nc: 20
# class names
names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]
...@@ -76,6 +76,11 @@ def run(data, ...@@ -76,6 +76,11 @@ def run(data,
# if device.type != 'cpu' and torch.cuda.device_count() > 1: # if device.type != 'cpu' and torch.cuda.device_count() > 1:
# model = nn.DataParallel(model) # model = nn.DataParallel(model)
# Data
with open(data) as f:
data = yaml.safe_load(f)
check_dataset(data) # check
# Half # Half
half &= device.type != 'cpu' # half precision only supported on CUDA half &= device.type != 'cpu' # half precision only supported on CUDA
if half: if half:
...@@ -83,10 +88,6 @@ def run(data, ...@@ -83,10 +88,6 @@ def run(data,
# Configure # Configure
model.eval() model.eval()
if isinstance(data, str):
with open(data) as f:
data = yaml.safe_load(f)
check_dataset(data) # check
is_coco = type(data['val']) is str and data['val'].endswith('coco/val2017.txt') # COCO dataset is_coco = type(data['val']) is str and data['val'].endswith('coco/val2017.txt') # COCO dataset
nc = 1 if single_cls else int(data['nc']) # number of classes nc = 1 if single_cls else int(data['nc']) # number of classes
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
......
...@@ -453,7 +453,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary ...@@ -453,7 +453,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
if not evolve: if not evolve:
if is_coco: # COCO dataset if is_coco: # COCO dataset
for m in [last, best] if best.exists() else [last]: # speed, mAP tests for m in [last, best] if best.exists() else [last]: # speed, mAP tests
results, _, _ = test.run(data, results, _, _ = test.run(data_dict,
batch_size=batch_size // WORLD_SIZE * 2, batch_size=batch_size // WORLD_SIZE * 2,
imgsz=imgsz_test, imgsz=imgsz_test,
conf_thres=0.001, conf_thres=0.001,
......
...@@ -1255,7 +1255,7 @@ ...@@ -1255,7 +1255,7 @@
"source": [ "source": [
"# VOC\n", "# VOC\n",
"for b, m in zip([64, 48, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']): # zip(batch_size, model)\n", "for b, m in zip([64, 48, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']): # zip(batch_size, model)\n",
" !python train.py --batch {b} --weights {m}.pt --data voc.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}" " !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}"
], ],
"execution_count": null, "execution_count": null,
"outputs": [] "outputs": []
......
...@@ -222,9 +222,14 @@ def check_file(file): ...@@ -222,9 +222,14 @@ def check_file(file):
def check_dataset(data, autodownload=True): def check_dataset(data, autodownload=True):
# Download dataset if not found locally # Download dataset if not found locally
val, s = data.get('val'), data.get('download') path = Path(data.get('path', '')) # optional 'path' field
if path:
for k in 'train', 'val', 'test':
if data.get(k): # prepend path
data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
if val: if val:
root = Path(val).parts[0] + os.sep # unzip directory i.e. '../'
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
if not all(x.exists() for x in val): if not all(x.exists() for x in val):
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()]) print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
...@@ -233,12 +238,14 @@ def check_dataset(data, autodownload=True): ...@@ -233,12 +238,14 @@ def check_dataset(data, autodownload=True):
f = Path(s).name # filename f = Path(s).name # filename
print(f'Downloading {s} ...') print(f'Downloading {s} ...')
torch.hub.download_url_to_file(s, f) torch.hub.download_url_to_file(s, f)
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
Path(root).mkdir(parents=True, exist_ok=True) # create root
r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip
elif s.startswith('bash '): # bash script elif s.startswith('bash '): # bash script
print(f'Running {s} ...') print(f'Running {s} ...')
r = os.system(s) r = os.system(s)
else: # python script else: # python script
r = exec(s) # return None r = exec(s, {'yaml': data}) # return None
print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result
else: else:
raise Exception('Dataset not found.') raise Exception('Dataset not found.')
...@@ -258,7 +265,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1): ...@@ -258,7 +265,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
if unzip and f.suffix in ('.zip', '.gz'): if unzip and f.suffix in ('.zip', '.gz'):
print(f'Unzipping {f}...') print(f'Unzipping {f}...')
if f.suffix == '.zip': if f.suffix == '.zip':
s = f'unzip -qo {f} -d {dir} && rm {f}' # unzip -quiet -overwrite s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite
elif f.suffix == '.gz': elif f.suffix == '.gz':
s = f'tar xfz {f} --directory {f.parent}' # unzip s = f'tar xfz {f} --directory {f.parent}' # unzip
if delete: # delete zip file after unzip if delete: # delete zip file after unzip
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论