Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
Y
yolov5
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
Administrator
yolov5
Commits
47288407
Unverified
提交
47288407
authored
3月 06, 2022
作者:
Glenn Jocher
提交者:
GitHub
3月 06, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update `--cache disk` deprecate `*_npy/` dirs (#6876)
* Updates * Updates * Updates * Updates * Updates * Updates * Updates * Updates * Updates * Updates * Cleanup * Cleanup
上级
8a66ebad
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
40 行增加
和
40 行删除
+40
-40
datasets.py
utils/datasets.py
+38
-38
wandb_utils.py
utils/loggers/wandb/wandb_utils.py
+1
-1
val.py
val.py
+1
-1
没有找到文件。
utils/datasets.py
浏览文件 @
47288407
...
...
@@ -407,19 +407,19 @@ class LoadImagesAndLabels(Dataset):
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
else
:
raise
Exception
(
f
'{prefix}{p} does not exist'
)
self
.
im
g
_files
=
sorted
(
x
.
replace
(
'/'
,
os
.
sep
)
for
x
in
f
if
x
.
split
(
'.'
)[
-
1
]
.
lower
()
in
IMG_FORMATS
)
self
.
im_files
=
sorted
(
x
.
replace
(
'/'
,
os
.
sep
)
for
x
in
f
if
x
.
split
(
'.'
)[
-
1
]
.
lower
()
in
IMG_FORMATS
)
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
assert
self
.
im
g
_files
,
f
'{prefix}No images found'
assert
self
.
im_files
,
f
'{prefix}No images found'
except
Exception
as
e
:
raise
Exception
(
f
'{prefix}Error loading data from {path}: {e}
\n
See {HELP_URL}'
)
# Check cache
self
.
label_files
=
img2label_paths
(
self
.
im
g
_files
)
# labels
self
.
label_files
=
img2label_paths
(
self
.
im_files
)
# labels
cache_path
=
(
p
if
p
.
is_file
()
else
Path
(
self
.
label_files
[
0
])
.
parent
)
.
with_suffix
(
'.cache'
)
try
:
cache
,
exists
=
np
.
load
(
cache_path
,
allow_pickle
=
True
)
.
item
(),
True
# load dict
assert
cache
[
'version'
]
==
self
.
cache_version
# same version
assert
cache
[
'hash'
]
==
get_hash
(
self
.
label_files
+
self
.
im
g
_files
)
# same hash
assert
cache
[
'hash'
]
==
get_hash
(
self
.
label_files
+
self
.
im_files
)
# same hash
except
Exception
:
cache
,
exists
=
self
.
cache_labels
(
cache_path
,
prefix
),
False
# cache
...
...
@@ -437,7 +437,7 @@ class LoadImagesAndLabels(Dataset):
labels
,
shapes
,
self
.
segments
=
zip
(
*
cache
.
values
())
self
.
labels
=
list
(
labels
)
self
.
shapes
=
np
.
array
(
shapes
,
dtype
=
np
.
float64
)
self
.
im
g
_files
=
list
(
cache
.
keys
())
# update
self
.
im_files
=
list
(
cache
.
keys
())
# update
self
.
label_files
=
img2label_paths
(
cache
.
keys
())
# update
n
=
len
(
shapes
)
# number of images
bi
=
np
.
floor
(
np
.
arange
(
n
)
/
batch_size
)
.
astype
(
np
.
int
)
# batch index
...
...
@@ -466,7 +466,7 @@ class LoadImagesAndLabels(Dataset):
s
=
self
.
shapes
# wh
ar
=
s
[:,
1
]
/
s
[:,
0
]
# aspect ratio
irect
=
ar
.
argsort
()
self
.
im
g_files
=
[
self
.
img
_files
[
i
]
for
i
in
irect
]
self
.
im
_files
=
[
self
.
im
_files
[
i
]
for
i
in
irect
]
self
.
label_files
=
[
self
.
label_files
[
i
]
for
i
in
irect
]
self
.
labels
=
[
self
.
labels
[
i
]
for
i
in
irect
]
self
.
shapes
=
s
[
irect
]
# wh
...
...
@@ -485,24 +485,20 @@ class LoadImagesAndLabels(Dataset):
self
.
batch_shapes
=
np
.
ceil
(
np
.
array
(
shapes
)
*
img_size
/
stride
+
pad
)
.
astype
(
np
.
int
)
*
stride
# Cache images into RAM/disk for faster training (WARNING: large datasets may exceed system resources)
self
.
imgs
,
self
.
img_npy
=
[
None
]
*
n
,
[
None
]
*
n
self
.
ims
=
[
None
]
*
n
self
.
npy_files
=
[
Path
(
f
)
.
with_suffix
(
'.npy'
)
for
f
in
self
.
im_files
]
if
cache_images
:
if
cache_images
==
'disk'
:
self
.
im_cache_dir
=
Path
(
Path
(
self
.
img_files
[
0
])
.
parent
.
as_posix
()
+
'_npy'
)
self
.
img_npy
=
[
self
.
im_cache_dir
/
Path
(
f
)
.
with_suffix
(
'.npy'
)
.
name
for
f
in
self
.
img_files
]
self
.
im_cache_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
gb
=
0
# Gigabytes of cached images
self
.
img_hw0
,
self
.
img_hw
=
[
None
]
*
n
,
[
None
]
*
n
results
=
ThreadPool
(
NUM_THREADS
)
.
imap
(
self
.
load_image
,
range
(
n
))
self
.
im_hw0
,
self
.
im_hw
=
[
None
]
*
n
,
[
None
]
*
n
fcn
=
self
.
cache_images_to_disk
if
cache_images
==
'disk'
else
self
.
load_image
results
=
ThreadPool
(
NUM_THREADS
)
.
imap
(
fcn
,
range
(
n
))
pbar
=
tqdm
(
enumerate
(
results
),
total
=
n
)
for
i
,
x
in
pbar
:
if
cache_images
==
'disk'
:
if
not
self
.
img_npy
[
i
]
.
exists
():
np
.
save
(
self
.
img_npy
[
i
]
.
as_posix
(),
x
[
0
])
gb
+=
self
.
img_npy
[
i
]
.
stat
()
.
st_size
gb
+=
self
.
npy_files
[
i
]
.
stat
()
.
st_size
else
:
# 'ram'
self
.
im
gs
[
i
],
self
.
img_hw0
[
i
],
self
.
img
_hw
[
i
]
=
x
# im, hw_orig, hw_resized = load_image(self, i)
gb
+=
self
.
im
g
s
[
i
]
.
nbytes
self
.
im
s
[
i
],
self
.
im_hw0
[
i
],
self
.
im
_hw
[
i
]
=
x
# im, hw_orig, hw_resized = load_image(self, i)
gb
+=
self
.
ims
[
i
]
.
nbytes
pbar
.
desc
=
f
'{prefix}Caching images ({gb / 1E9:.1f}GB {cache_images})'
pbar
.
close
()
...
...
@@ -512,8 +508,8 @@ class LoadImagesAndLabels(Dataset):
nm
,
nf
,
ne
,
nc
,
msgs
=
0
,
0
,
0
,
0
,
[]
# number missing, found, empty, corrupt, messages
desc
=
f
"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
with
Pool
(
NUM_THREADS
)
as
pool
:
pbar
=
tqdm
(
pool
.
imap
(
verify_image_label
,
zip
(
self
.
im
g
_files
,
self
.
label_files
,
repeat
(
prefix
))),
desc
=
desc
,
total
=
len
(
self
.
im
g
_files
))
pbar
=
tqdm
(
pool
.
imap
(
verify_image_label
,
zip
(
self
.
im_files
,
self
.
label_files
,
repeat
(
prefix
))),
desc
=
desc
,
total
=
len
(
self
.
im_files
))
for
im_file
,
lb
,
shape
,
segments
,
nm_f
,
nf_f
,
ne_f
,
nc_f
,
msg
in
pbar
:
nm
+=
nm_f
nf
+=
nf_f
...
...
@@ -530,8 +526,8 @@ class LoadImagesAndLabels(Dataset):
LOGGER
.
info
(
'
\n
'
.
join
(
msgs
))
if
nf
==
0
:
LOGGER
.
warning
(
f
'{prefix}WARNING: No labels found in {path}. See {HELP_URL}'
)
x
[
'hash'
]
=
get_hash
(
self
.
label_files
+
self
.
im
g
_files
)
x
[
'results'
]
=
nf
,
nm
,
ne
,
nc
,
len
(
self
.
im
g
_files
)
x
[
'hash'
]
=
get_hash
(
self
.
label_files
+
self
.
im_files
)
x
[
'results'
]
=
nf
,
nm
,
ne
,
nc
,
len
(
self
.
im_files
)
x
[
'msgs'
]
=
msgs
# warnings
x
[
'version'
]
=
self
.
cache_version
# cache version
try
:
...
...
@@ -543,7 +539,7 @@ class LoadImagesAndLabels(Dataset):
return
x
def
__len__
(
self
):
return
len
(
self
.
im
g
_files
)
return
len
(
self
.
im_files
)
# def __iter__(self):
# self.count = -1
...
...
@@ -622,17 +618,15 @@ class LoadImagesAndLabels(Dataset):
img
=
img
.
transpose
((
2
,
0
,
1
))[::
-
1
]
# HWC to CHW, BGR to RGB
img
=
np
.
ascontiguousarray
(
img
)
return
torch
.
from_numpy
(
img
),
labels_out
,
self
.
im
g
_files
[
index
],
shapes
return
torch
.
from_numpy
(
img
),
labels_out
,
self
.
im_files
[
index
],
shapes
def
load_image
(
self
,
i
):
#
l
oads 1 image from dataset index 'i', returns (im, original hw, resized hw)
im
=
self
.
imgs
[
i
]
#
L
oads 1 image from dataset index 'i', returns (im, original hw, resized hw)
im
,
f
,
fn
=
self
.
ims
[
i
],
self
.
im_files
[
i
],
self
.
npy_files
[
i
],
if
im
is
None
:
# not cached in RAM
npy
=
self
.
img_npy
[
i
]
if
npy
and
npy
.
exists
():
# load npy
im
=
np
.
load
(
npy
)
if
fn
.
exists
():
# load npy
im
=
np
.
load
(
fn
)
else
:
# read image
f
=
self
.
img_files
[
i
]
im
=
cv2
.
imread
(
f
)
# BGR
assert
im
is
not
None
,
f
'Image Not Found {f}'
h0
,
w0
=
im
.
shape
[:
2
]
# orig hw
...
...
@@ -643,7 +637,13 @@ class LoadImagesAndLabels(Dataset):
interpolation
=
cv2
.
INTER_LINEAR
if
(
self
.
augment
or
r
>
1
)
else
cv2
.
INTER_AREA
)
return
im
,
(
h0
,
w0
),
im
.
shape
[:
2
]
# im, hw_original, hw_resized
else
:
return
self
.
imgs
[
i
],
self
.
img_hw0
[
i
],
self
.
img_hw
[
i
]
# im, hw_original, hw_resized
return
self
.
ims
[
i
],
self
.
im_hw0
[
i
],
self
.
im_hw
[
i
]
# im, hw_original, hw_resized
def
cache_images_to_disk
(
self
,
i
):
# Saves an image as an *.npy file for faster loading
f
=
self
.
npy_files
[
i
]
if
not
f
.
exists
():
np
.
save
(
f
.
as_posix
(),
cv2
.
imread
(
self
.
im_files
[
i
]))
def
load_mosaic
(
self
,
index
):
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
...
...
@@ -777,16 +777,16 @@ class LoadImagesAndLabels(Dataset):
@staticmethod
def
collate_fn
(
batch
):
im
g
,
label
,
path
,
shapes
=
zip
(
*
batch
)
# transposed
im
,
label
,
path
,
shapes
=
zip
(
*
batch
)
# transposed
for
i
,
lb
in
enumerate
(
label
):
lb
[:,
0
]
=
i
# add target image index for build_targets()
return
torch
.
stack
(
im
g
,
0
),
torch
.
cat
(
label
,
0
),
path
,
shapes
return
torch
.
stack
(
im
,
0
),
torch
.
cat
(
label
,
0
),
path
,
shapes
@staticmethod
def
collate_fn4
(
batch
):
img
,
label
,
path
,
shapes
=
zip
(
*
batch
)
# transposed
n
=
len
(
shapes
)
//
4
im
g
4
,
label4
,
path4
,
shapes4
=
[],
[],
path
[:
n
],
shapes
[:
n
]
im4
,
label4
,
path4
,
shapes4
=
[],
[],
path
[:
n
],
shapes
[:
n
]
ho
=
torch
.
tensor
([[
0.0
,
0
,
0
,
1
,
0
,
0
]])
wo
=
torch
.
tensor
([[
0.0
,
0
,
1
,
0
,
0
,
0
]])
...
...
@@ -800,13 +800,13 @@ class LoadImagesAndLabels(Dataset):
else
:
im
=
torch
.
cat
((
torch
.
cat
((
img
[
i
],
img
[
i
+
1
]),
1
),
torch
.
cat
((
img
[
i
+
2
],
img
[
i
+
3
]),
1
)),
2
)
lb
=
torch
.
cat
((
label
[
i
],
label
[
i
+
1
]
+
ho
,
label
[
i
+
2
]
+
wo
,
label
[
i
+
3
]
+
ho
+
wo
),
0
)
*
s
im
g
4
.
append
(
im
)
im4
.
append
(
im
)
label4
.
append
(
lb
)
for
i
,
lb
in
enumerate
(
label4
):
lb
[:,
0
]
=
i
# add target image index for build_targets()
return
torch
.
stack
(
im
g
4
,
0
),
torch
.
cat
(
label4
,
0
),
path4
,
shapes4
return
torch
.
stack
(
im4
,
0
),
torch
.
cat
(
label4
,
0
),
path4
,
shapes4
# Ancillary functions --------------------------------------------------------------------------------------------------
...
...
@@ -999,12 +999,12 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil
'image_stats'
:
{
'total'
:
dataset
.
n
,
'unlabelled'
:
int
(
np
.
all
(
x
==
0
,
1
)
.
sum
()),
'per_class'
:
(
x
>
0
)
.
sum
(
0
)
.
tolist
()},
'labels'
:
[{
str
(
Path
(
k
)
.
name
):
round_labels
(
v
.
tolist
())}
for
k
,
v
in
zip
(
dataset
.
im
g
_files
,
dataset
.
labels
)]}
zip
(
dataset
.
im_files
,
dataset
.
labels
)]}
if
hub
:
im_dir
=
hub_dir
/
'images'
im_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
for
_
in
tqdm
(
ThreadPool
(
NUM_THREADS
)
.
imap
(
hub_ops
,
dataset
.
im
g
_files
),
total
=
dataset
.
n
,
desc
=
'HUB Ops'
):
for
_
in
tqdm
(
ThreadPool
(
NUM_THREADS
)
.
imap
(
hub_ops
,
dataset
.
im_files
),
total
=
dataset
.
n
,
desc
=
'HUB Ops'
):
pass
# Profile
...
...
utils/loggers/wandb/wandb_utils.py
浏览文件 @
47288407
...
...
@@ -403,7 +403,7 @@ class WandbLogger():
# TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
artifact
=
wandb
.
Artifact
(
name
=
name
,
type
=
"dataset"
)
img_files
=
tqdm
([
dataset
.
path
])
if
isinstance
(
dataset
.
path
,
str
)
and
Path
(
dataset
.
path
)
.
is_dir
()
else
None
img_files
=
tqdm
(
dataset
.
im
g
_files
)
if
not
img_files
else
img_files
img_files
=
tqdm
(
dataset
.
im_files
)
if
not
img_files
else
img_files
for
img_file
in
img_files
:
if
Path
(
img_file
)
.
is_dir
():
artifact
.
add_dir
(
img_file
,
name
=
'data/images'
)
...
...
val.py
浏览文件 @
47288407
...
...
@@ -297,7 +297,7 @@ def run(data,
pred
=
anno
.
loadRes
(
pred_json
)
# init predictions api
eval
=
COCOeval
(
anno
,
pred
,
'bbox'
)
if
is_coco
:
eval
.
params
.
imgIds
=
[
int
(
Path
(
x
)
.
stem
)
for
x
in
dataloader
.
dataset
.
im
g
_files
]
# image IDs to evaluate
eval
.
params
.
imgIds
=
[
int
(
Path
(
x
)
.
stem
)
for
x
in
dataloader
.
dataset
.
im_files
]
# image IDs to evaluate
eval
.
evaluate
()
eval
.
accumulate
()
eval
.
summarize
()
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论