Unverified 提交 2bf34f50 authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

PyTorch Hub amp.autocast() inference (#2641)

I think this should help speed up CUDA inference, as currently models may be running in FP32 inference mode on CUDA devices unnecesarily.
上级 ee169834
......@@ -8,6 +8,7 @@ import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp
from utils.datasets import letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
......@@ -219,8 +220,8 @@ class autoShape(nn.Module):
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
t.append(time_synchronized())
with torch.no_grad(), amp.autocast(enabled=p.device.type != 'cpu'):
# Inference
with torch.no_grad():
y = self.model(x, augment, profile)[0] # forward
t.append(time_synchronized())
......@@ -228,8 +229,8 @@ class autoShape(nn.Module):
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])
t.append(time_synchronized())
t.append(time_synchronized())
return Detections(imgs, y, files, t, self.names, x.shape)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论