Unverified 提交 2bf34f50 authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

PyTorch Hub amp.autocast() inference (#2641)

I think this should help speed up CUDA inference, as currently models may be running in FP32 inference mode on CUDA devices unnecesarily.
上级 ee169834
...@@ -8,6 +8,7 @@ import requests ...@@ -8,6 +8,7 @@ import requests
import torch import torch
import torch.nn as nn import torch.nn as nn
from PIL import Image from PIL import Image
from torch.cuda import amp
from utils.datasets import letterbox from utils.datasets import letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
...@@ -219,8 +220,8 @@ class autoShape(nn.Module): ...@@ -219,8 +220,8 @@ class autoShape(nn.Module):
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
t.append(time_synchronized()) t.append(time_synchronized())
with torch.no_grad(), amp.autocast(enabled=p.device.type != 'cpu'):
# Inference # Inference
with torch.no_grad():
y = self.model(x, augment, profile)[0] # forward y = self.model(x, augment, profile)[0] # forward
t.append(time_synchronized()) t.append(time_synchronized())
...@@ -228,8 +229,8 @@ class autoShape(nn.Module): ...@@ -228,8 +229,8 @@ class autoShape(nn.Module):
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
for i in range(n): for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i]) scale_coords(shape1, y[i][:, :4], shape0[i])
t.append(time_synchronized())
t.append(time_synchronized())
return Detections(imgs, y, files, t, self.names, x.shape) return Detections(imgs, y, files, t, self.names, x.shape)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论