Unverified 提交 78fd0776 authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

VisDrone2019-DET Dataset Auto-Download (#2882)

* VisDrone Dataset Auto-Download * add visdrone.yaml * cleanup * add VisDrone2019-DET-test-dev * cleanup VOC
上级 d48a34dc
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
# Train command: python train.py --data argoverse_hd.yaml # Train command: python train.py --data argoverse_hd.yaml
# Default dataset location is next to /yolov5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent_folder
# /argoverse # /argoverse
# /yolov5 # /yolov5
......
# COCO 2017 dataset http://cocodataset.org # COCO 2017 dataset http://cocodataset.org
# Train command: python train.py --data coco.yaml # Train command: python train.py --data coco.yaml
# Default dataset location is next to /yolov5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent_folder
# /coco # /coco
# /yolov5 # /yolov5
......
# COCO 2017 dataset http://cocodataset.org - first 128 training images # COCO 2017 dataset http://cocodataset.org - first 128 training images
# Train command: python train.py --data coco128.yaml # Train command: python train.py --data coco128.yaml
# Default dataset location is next to /yolov5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent_folder
# /coco128 # /coco128
# /yolov5 # /yolov5
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
# Download command: bash data/scripts/get_argoverse_hd.sh # Download command: bash data/scripts/get_argoverse_hd.sh
# Train command: python train.py --data argoverse_hd.yaml # Train command: python train.py --data argoverse_hd.yaml
# Default dataset location is next to /yolov5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent_folder
# /argoverse # /argoverse
# /yolov5 # /yolov5
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# COCO 2017 dataset http://cocodataset.org # COCO 2017 dataset http://cocodataset.org
# Download command: bash data/scripts/get_coco.sh # Download command: bash data/scripts/get_coco.sh
# Train command: python train.py --data coco.yaml # Train command: python train.py --data coco.yaml
# Default dataset location is next to /yolov5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent_folder
# /coco # /coco
# /yolov5 # /yolov5
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/ # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
# Download command: bash data/scripts/get_voc.sh # Download command: bash data/scripts/get_voc.sh
# Train command: python train.py --data voc.yaml # Train command: python train.py --data voc.yaml
# Default dataset location is next to /yolov5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent_folder
# /VOC # /VOC
# /yolov5 # /yolov5
...@@ -29,34 +29,27 @@ echo "Completed in" $runtime "seconds" ...@@ -29,34 +29,27 @@ echo "Completed in" $runtime "seconds"
echo "Splitting dataset..." echo "Splitting dataset..."
python3 - "$@" <<END python3 - "$@" <<END
import xml.etree.ElementTree as ET
import pickle
import os import os
from os import listdir, getcwd import xml.etree.ElementTree as ET
from os.path import join from os import getcwd
sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
"horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
def convert_box(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
return x * dw, y * dh, w * dw, h * dh
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(year, image_id): def convert_annotation(year, image_id):
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
tree=ET.parse(in_file) tree = ET.parse(in_file)
root = tree.getroot() root = tree.getroot()
size = root.find('size') size = root.find('size')
w = int(size.find('width').text) w = int(size.find('width').text)
...@@ -65,74 +58,58 @@ def convert_annotation(year, image_id): ...@@ -65,74 +58,58 @@ def convert_annotation(year, image_id):
for obj in root.iter('object'): for obj in root.iter('object'):
difficult = obj.find('difficult').text difficult = obj.find('difficult').text
cls = obj.find('name').text cls = obj.find('name').text
if cls not in classes or int(difficult)==1: if cls not in classes or int(difficult) == 1:
continue continue
cls_id = classes.index(cls) cls_id = classes.index(cls)
xmlbox = obj.find('bndbox') xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
bb = convert((w,h), b) float(xmlbox.find('ymax').text))
bb = convert_box((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
cwd = getcwd()
for year, image_set in sets: for year, image_set in sets:
if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): if not os.path.exists('VOCdevkit/VOC%s/labels/' % year):
os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) os.makedirs('VOCdevkit/VOC%s/labels/' % year)
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
list_file = open('%s_%s.txt'%(year, image_set), 'w') list_file = open('%s_%s.txt' % (year, image_set), 'w')
for image_id in image_ids: for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (cwd, year, image_id))
convert_annotation(year, image_id) convert_annotation(year, image_id)
list_file.close() list_file.close()
END END
cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
python3 - "$@" <<END mkdir ../VOC ../VOC/images ../VOC/images/train ../VOC/images/val
mkdir ../VOC/labels ../VOC/labels/train ../VOC/labels/val
import shutil python3 - "$@" <<END
import os import os
os.system('mkdir ../VOC/')
os.system('mkdir ../VOC/images')
os.system('mkdir ../VOC/images/train')
os.system('mkdir ../VOC/images/val')
os.system('mkdir ../VOC/labels')
os.system('mkdir ../VOC/labels/train')
os.system('mkdir ../VOC/labels/val')
import os
print(os.path.exists('../tmp/train.txt')) print(os.path.exists('../tmp/train.txt'))
f = open('../tmp/train.txt', 'r') with open('../tmp/train.txt', 'r') as f:
lines = f.readlines() for line in f.readlines():
line = "/".join(line.split('/')[-5:]).strip()
for line in lines: if os.path.exists("../" + line):
line = "/".join(line.split('/')[-5:]).strip() os.system("cp ../" + line + " ../VOC/images/train")
if (os.path.exists("../" + line)):
os.system("cp ../"+ line + " ../VOC/images/train")
line = line.replace('JPEGImages', 'labels')
line = line.replace('jpg', 'txt')
if (os.path.exists("../" + line)):
os.system("cp ../"+ line + " ../VOC/labels/train")
line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
if os.path.exists("../" + line):
os.system("cp ../" + line + " ../VOC/labels/train")
print(os.path.exists('../tmp/2007_test.txt')) print(os.path.exists('../tmp/2007_test.txt'))
f = open('../tmp/2007_test.txt', 'r') with open('../tmp/2007_test.txt', 'r') as f:
lines = f.readlines() for line in f.readlines():
line = "/".join(line.split('/')[-5:]).strip()
for line in lines: if os.path.exists("../" + line):
line = "/".join(line.split('/')[-5:]).strip() os.system("cp ../" + line + " ../VOC/images/val")
if (os.path.exists("../" + line)):
os.system("cp ../"+ line + " ../VOC/images/val") line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
if os.path.exists("../" + line):
line = line.replace('JPEGImages', 'labels') os.system("cp ../" + line + " ../VOC/labels/val")
line = line.replace('jpg', 'txt')
if (os.path.exists("../" + line)):
os.system("cp ../"+ line + " ../VOC/labels/val")
END END
rm -rf ../tmp # remove temporary directory rm -rf ../tmp # remove temporary directory
......
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
# Train command: python train.py --data visdrone.yaml
# Default dataset location is next to YOLOv5:
# /parent_folder
# /VisDrone
# /yolov5
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: ../VisDrone/VisDrone2019-DET-train/images # 6471 images
val: ../VisDrone/VisDrone2019-DET-val/images # 548 images
test: ../VisDrone/VisDrone2019-DET-test-dev/images # 1610 images
# number of classes
nc: 10
# class names
names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
# download command/URL (optional) --------------------------------------------------------------------------------------
download: |
import os
from pathlib import Path
from utils.general import download
def visdrone2yolo(dir):
from PIL import Image
from tqdm import tqdm
def convert_box(size, box):
# Convert VisDrone box to YOLO xywh box
dw = 1. / size[0]
dh = 1. / size[1]
return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
(dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
for f in pbar:
img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
lines = []
with open(f, 'r') as file: # read annotation.txt
for row in [x.split(',') for x in file.read().strip().splitlines()]:
if row[4] == '0': # VisDrone 'ignored regions' class 0
continue
cls = int(row[5]) - 1
box = convert_box(img_size, tuple(map(int, row[:4])))
lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
fl.writelines(lines) # write label.txt
# Download
dir = Path('../VisDrone') # dataset directory
urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip'
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
download(urls, dir=dir)
# Convert
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/ # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
# Train command: python train.py --data voc.yaml # Train command: python train.py --data voc.yaml
# Default dataset location is next to /yolov5: # Default dataset location is next to YOLOv5:
# /parent_folder # /parent_folder
# /VOC # /VOC
# /yolov5 # /yolov5
......
...@@ -9,6 +9,8 @@ import random ...@@ -9,6 +9,8 @@ import random
import re import re
import subprocess import subprocess
import time import time
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
import cv2 import cv2
...@@ -161,18 +163,40 @@ def check_dataset(dict): ...@@ -161,18 +163,40 @@ def check_dataset(dict):
if not all(x.exists() for x in val): if not all(x.exists() for x in val):
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()]) print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
if s and len(s): # download script if s and len(s): # download script
print('Downloading %s ...' % s)
if s.startswith('http') and s.endswith('.zip'): # URL if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename f = Path(s).name # filename
print(f'Downloading {s} ...')
torch.hub.download_url_to_file(s, f) torch.hub.download_url_to_file(s, f)
r = os.system('unzip -q %s -d ../ && rm %s' % (f, f)) # unzip r = os.system(f'unzip -q {f} -d ../ && rm {f}') # unzip
else: # bash script elif s.startswith('bash '): # bash script
print(f'Running {s} ...')
r = os.system(s) r = os.system(s)
print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value else: # python script
r = exec(s) # return None
print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result
else: else:
raise Exception('Dataset not found.') raise Exception('Dataset not found.')
def download(url, dir='.', multi_thread=False):
# Multi-threaded file download function
def download_one(url, dir):
# Download 1 file
f = dir / Path(url).name # filename
print(f'Downloading {url} to {f}...')
torch.hub.download_url_to_file(url, f, progress=True) # download
if f.suffix == '.zip':
os.system(f'unzip -qo {f} -d {dir} && rm {f}') # unzip -quiet -overwrite
dir = Path(dir)
dir.mkdir(parents=True, exist_ok=True) # make directory
if multi_thread:
ThreadPool(8).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # 8 threads
else:
for u in tuple(url) if isinstance(url, str) else url:
download_one(u, dir)
def make_divisible(x, divisor): def make_divisible(x, divisor):
# Returns x evenly divisible by divisor # Returns x evenly divisible by divisor
return math.ceil(x / divisor) * divisor return math.ceil(x / divisor) * divisor
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论