Unverified 提交 1aea74cd authored 作者: VELC's avatar VELC 提交者: GitHub

Add new `--vid-stride` inference parameter for videos (#9256)

* fps feature/skip frame added * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * predict.py updates * Update dataloaders.py Signed-off-by: 's avatarGlenn Jocher <glenn.jocher@ultralytics.com> * Update dataloaders.py Signed-off-by: 's avatarGlenn Jocher <glenn.jocher@ultralytics.com> * remove unused attribute Signed-off-by: 's avatarGlenn Jocher <glenn.jocher@ultralytics.com> * Cleanup Signed-off-by: 's avatarGlenn Jocher <glenn.jocher@ultralytics.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update predict.py Signed-off-by: 's avatarGlenn Jocher <glenn.jocher@ultralytics.com> * Update detect.py Signed-off-by: 's avatarGlenn Jocher <glenn.jocher@ultralytics.com> * Update dataloaders.py Signed-off-by: 's avatarGlenn Jocher <glenn.jocher@ultralytics.com> * Rename skip_frame to vid_stride * cleanup * cleanup2 Signed-off-by: 's avatarGlenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: 's avatarpre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: 's avatarGlenn Jocher <glenn.jocher@ultralytics.com>
上级 e45d335b
...@@ -66,6 +66,7 @@ def run( ...@@ -66,6 +66,7 @@ def run(
exist_ok=False, # existing project/name ok, do not increment exist_ok=False, # existing project/name ok, do not increment
half=False, # use FP16 half-precision inference half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
): ):
source = str(source) source = str(source)
save_img = not nosave and not source.endswith('.txt') # save inference images save_img = not nosave and not source.endswith('.txt') # save inference images
...@@ -88,10 +89,10 @@ def run( ...@@ -88,10 +89,10 @@ def run(
# Dataloader # Dataloader
if webcam: if webcam:
view_img = check_imshow() view_img = check_imshow()
dataset = LoadStreams(source, img_size=imgsz, transforms=classify_transforms(imgsz[0])) dataset = LoadStreams(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
bs = len(dataset) # batch_size bs = len(dataset) # batch_size
else: else:
dataset = LoadImages(source, img_size=imgsz, transforms=classify_transforms(imgsz[0])) dataset = LoadImages(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
bs = 1 # batch_size bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs vid_path, vid_writer = [None] * bs, [None] * bs
...@@ -196,6 +197,7 @@ def parse_opt(): ...@@ -196,6 +197,7 @@ def parse_opt():
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
opt = parser.parse_args() opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt)) print_args(vars(opt))
......
...@@ -74,6 +74,7 @@ def run( ...@@ -74,6 +74,7 @@ def run(
hide_conf=False, # hide confidences hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
): ):
source = str(source) source = str(source)
save_img = not nosave and not source.endswith('.txt') # save inference images save_img = not nosave and not source.endswith('.txt') # save inference images
...@@ -96,10 +97,10 @@ def run( ...@@ -96,10 +97,10 @@ def run(
# Dataloader # Dataloader
if webcam: if webcam:
view_img = check_imshow() view_img = check_imshow()
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
bs = len(dataset) # batch_size bs = len(dataset) # batch_size
else: else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
bs = 1 # batch_size bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs vid_path, vid_writer = [None] * bs, [None] * bs
...@@ -236,6 +237,7 @@ def parse_opt(): ...@@ -236,6 +237,7 @@ def parse_opt():
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
opt = parser.parse_args() opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt)) print_args(vars(opt))
......
...@@ -187,7 +187,7 @@ class _RepeatSampler: ...@@ -187,7 +187,7 @@ class _RepeatSampler:
class LoadImages: class LoadImages:
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None): def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
files = [] files = []
for p in sorted(path) if isinstance(path, (list, tuple)) else [path]: for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
p = str(Path(p).resolve()) p = str(Path(p).resolve())
...@@ -212,6 +212,7 @@ class LoadImages: ...@@ -212,6 +212,7 @@ class LoadImages:
self.mode = 'image' self.mode = 'image'
self.auto = auto self.auto = auto
self.transforms = transforms # optional self.transforms = transforms # optional
self.vid_stride = vid_stride # video frame-rate stride
if any(videos): if any(videos):
self._new_video(videos[0]) # new video self._new_video(videos[0]) # new video
else: else:
...@@ -232,6 +233,7 @@ class LoadImages: ...@@ -232,6 +233,7 @@ class LoadImages:
# Read video # Read video
self.mode = 'video' self.mode = 'video'
ret_val, im0 = self.cap.read() ret_val, im0 = self.cap.read()
self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.vid_stride * (self.frame + 1)) # read at vid_stride
while not ret_val: while not ret_val:
self.count += 1 self.count += 1
self.cap.release() self.cap.release()
...@@ -242,7 +244,7 @@ class LoadImages: ...@@ -242,7 +244,7 @@ class LoadImages:
ret_val, im0 = self.cap.read() ret_val, im0 = self.cap.read()
self.frame += 1 self.frame += 1
# im0 = self._cv2_rotate(im0) # for use if cv2 auto rotation is False # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False
s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
else: else:
...@@ -265,7 +267,7 @@ class LoadImages: ...@@ -265,7 +267,7 @@ class LoadImages:
# Create a new video capture object # Create a new video capture object
self.frame = 0 self.frame = 0
self.cap = cv2.VideoCapture(path) self.cap = cv2.VideoCapture(path)
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees
# self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493 # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493
...@@ -285,11 +287,12 @@ class LoadImages: ...@@ -285,11 +287,12 @@ class LoadImages:
class LoadStreams: class LoadStreams:
# YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams` # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True, transforms=None): def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
torch.backends.cudnn.benchmark = True # faster for fixed-size inference torch.backends.cudnn.benchmark = True # faster for fixed-size inference
self.mode = 'stream' self.mode = 'stream'
self.img_size = img_size self.img_size = img_size
self.stride = stride self.stride = stride
self.vid_stride = vid_stride # video frame-rate stride
sources = Path(sources).read_text().rsplit() if Path(sources).is_file() else [sources] sources = Path(sources).read_text().rsplit() if Path(sources).is_file() else [sources]
n = len(sources) n = len(sources)
self.sources = [clean_str(x) for x in sources] # clean source names for later self.sources = [clean_str(x) for x in sources] # clean source names for later
...@@ -329,11 +332,11 @@ class LoadStreams: ...@@ -329,11 +332,11 @@ class LoadStreams:
def update(self, i, cap, stream): def update(self, i, cap, stream):
# Read stream `i` frames in daemon thread # Read stream `i` frames in daemon thread
n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame n, f = 0, self.frames[i] # frame number, frame array
while cap.isOpened() and n < f: while cap.isOpened() and n < f:
n += 1 n += 1
cap.grab() # .read() = .grab() followed by .retrieve() cap.grab() # .read() = .grab() followed by .retrieve()
if n % read == 0: if n % self.vid_stride == 0:
success, im = cap.retrieve() success, im = cap.retrieve()
if success: if success:
self.imgs[i] = im self.imgs[i] = im
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论