Merge branch 'master' into advanced_logging

e18e6811 · Alex Stoken · GitHub · de191655 · 77fb8ee0 · e18e6811
--- a/.github/ISSUE_TEMPLATE/--bug-report.md
+++ b/.github/ISSUE_TEMPLATE/--bug-report.md
@@ -7,29 +7,43 @@ assignees: ''

 ---

-Before submitting a bug report, please ensure that you are using the latest versions of:
- - Python
- - PyTorch
- - This repository (run `git fetch && git status -uno` to check and `git pull` to update)
+Before submitting a bug report, please be aware that your issue **must be reproducible** with all of the following, otherwise it is non-actionable, and we can not help you:
+ - **Current repo**: run `git fetch && git status -uno` to check and `git pull` to update repo
+ - **Common dataset**: coco.yaml or coco128.yaml
+ - **Common environment**: Colab, Google Cloud, or Docker image. See https://github.com/ultralytics/yolov5#reproduce-our-environment
 
-**Your issue must be reproducible on a public dataset (i.e COCO) using the latest version of the repository, and you must supply code to reproduce, or we can not help you.**
-
-If this is a custom training question we suggest you include your `train*.jpg`, `test*.jpg` and `results.png` figures.
+If this is a custom dataset/training question you **must include** your `train*.jpg`, `test*.jpg` and `results.png` figures, or we can not help you. You can generate these with `utils.plot_results()`.


 ## 🐛 Bug
 A clear and concise description of what the bug is.

-## To Reproduce
-**REQUIRED**: Code to reproduce your issue below
+
+## To Reproduce (REQUIRED)
+
+Input:
+```
+import torch
+
+a = torch.tensor([5])
+c = a / 0
+```
+
+Output:
 ```
-python train.py ...
+Traceback (most recent call last):
+  File "/Users/glennjocher/opt/anaconda3/envs/env1/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
+    exec(code_obj, self.user_global_ns, self.user_ns)
+  File "<ipython-input-5-be04c762b799>", line 5, in <module>
+    c = a / 0
+RuntimeError: ZeroDivisionError
 ```


 ## Expected behavior
 A clear and concise description of what you expected to happen.

+
 ## Environment
 If applicable, add screenshots to help explain your problem.


--- a/Dockerfile
+++ b/Dockerfile
 # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
 FROM nvcr.io/nvidia/pytorch:20.03-py3
-
-# Install dependencies (pip or conda)
 RUN pip install -U gsutil
-# RUN pip install -U -r requirements.txt

 # Create working directory
 RUN mkdir -p /usr/src/app
@@ -12,6 +9,9 @@ WORKDIR /usr/src/app
 # Copy contents
 COPY . /usr/src/app

+# Install dependencies (pip or conda)
+#RUN pip install -r requirements.txt
+
 # Copy weights
 #RUN python3 -c "from models import *; \
 #attempt_download('weights/yolov5s.pt'); \
@@ -41,7 +41,7 @@ COPY . /usr/src/app

 # Bash into running container
 # sudo docker container exec -it ba65811811ab bash
-# python -c "from utils.utils import *; create_backbone('weights/last.pt')" && gsutil cp weights/backbone.pt gs://*
+# python -c "from utils.utils import *; create_pretrained('weights/last.pt')" && gsutil cp weights/pretrained.pt gs://*

 # Bash into stopped container
 # sudo docker commit 6d525e299258 user/test_image && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco --entrypoint=sh user/test_image

--- a/README.md
+++ b/README.md
--- a/detect.py
+++ b/detect.py
 import argparse

+import torch.backends.cudnn as cudnn
+
+from utils import google_utils
 from utils.datasets import *
 from utils.utils import *

@@ -36,14 +39,14 @@ def detect(save_img=False):
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
-        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
+        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
-    names = model.names if hasattr(model, 'names') else model.modules.names
+    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
@@ -62,8 +65,7 @@ def detect(save_img=False):
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
-        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres,
-                                   fast=True, classes=opt.classes, agnostic=opt.agnostic_nms)
+        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = torch_utils.time_synchronized()

        # Apply Classifier
@@ -78,6 +80,7 @@ def detect(save_img=False):
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
+            txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  #  normalization gain whwh
            if det is not None and len(det):
@@ -93,8 +96,8 @@ def detect(save_img=False):
                for *xyxy, conf, cls in det:
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-                        with open(save_path[:save_path.rfind('.')] + '.txt', 'a') as file:
-                            file.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format
+                        with open(txt_path + '.txt', 'a') as f:
+                            f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
@@ -154,3 +157,8 @@ if __name__ == '__main__':

    with torch.no_grad():
        detect()
+
+        # Update all models
+        # for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
+        #    detect()
+        #    create_pretrained(opt.weights, opt.weights)
--- a/models/common.py
+++ b/models/common.py
@@ -13,7 +13,8 @@ class Conv(nn.Module):
    # Standard convolution
    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
        super(Conv, self).__init__()
-        self.conv = nn.Conv2d(c1, c2, k, s, k // 2, groups=g, bias=False)
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # padding
+        self.conv = nn.Conv2d(c1, c2, k, s, p, groups=g, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()


--- a/models/yolov3-spp.yaml
+++ b/models/yolov3-spp.yaml
@@ -25,8 +25,7 @@ backbone:
   [-1, 4, Bottleneck, [1024]],  # 10
  ]

-# yolov3-spp head
-# na = len(anchors[0])
+# YOLOv3-SPP head
 head:
  [[-1, 1, Bottleneck, [1024, False]],  # 11
   [-1, 1, SPP, [512, [5, 9, 13]]],

--- a/models/hub/yolov5-fpn.yaml
+++ b/models/hub/yolov5-fpn.yaml
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+   [-1, 6, BottleneckCSP, [1024]],  # 9
+  ]
+
+# YOLOv5 FPN head
+head:
+  [[-1, 3, BottleneckCSP, [1024, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 11 (P5/32-large)
+
+   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 3, BottleneckCSP, [512, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 16 (P4/16-medium)
+
+   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 3, BottleneckCSP, [256, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 21 (P3/8-small)
+
+   [[], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/models/hub/yolov5-panet.yaml
+++ b/models/hub/yolov5-panet.yaml
+# parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+
+# anchors
+anchors:
+  - [116,90, 156,198, 373,326]  # P5/32
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [10,13, 16,30, 33,23]  # P3/8
+
+# YOLOv5 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 9, BottleneckCSP, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, BottleneckCSP, [512]],
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+   [-1, 1, SPP, [1024, [5, 9, 13]]],
+  ]
+
+# YOLOv5 PANet head
+head:
+  [[-1, 3, BottleneckCSP, [1024, False]],
+   [-1, 1, Conv, [512, 1, 1]],  # 10
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],  # 14
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, BottleneckCSP, [256, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 18 (P3/8-small)
+
+   [-2, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 26 (P5/32-large)
+
+   [[], 1, Detect, [nc, anchors]],  # Detect(P5, P4, P3)
+  ]
--- a/models/onnx_export.py
+++ b/models/onnx_export.py
 """Exports a pytorch *.pt model to *.onnx format

 Usage:
-    import torch
    $ export PYTHONPATH="$PWD" && python models/onnx_export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
 """

@@ -10,6 +9,7 @@ import argparse
 import onnx

 from models.common import *
+from utils import google_utils

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
@@ -25,7 +25,7 @@ if __name__ == '__main__':

    # Load pytorch model
    google_utils.attempt_download(opt.weights)
-    model = torch.load(opt.weights, map_location=torch.device('cpu'))['model']
+    model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
    model.eval()
    model.fuse()


--- a/models/yolo.py
+++ b/models/yolo.py
 import argparse

-import yaml
-
 from models.experimental import *


@@ -61,8 +59,9 @@ class Model(nn.Module):

        # Build strides, anchors
        m = self.model[-1]  # Detect()
-        m.stride = torch.tensor([64 / x.shape[-2] for x in self.forward(torch.zeros(1, ch, 64, 64))])  # forward
+        m.stride = torch.tensor([128 / x.shape[-2] for x in self.forward(torch.zeros(1, ch, 128, 128))])  # forward
        m.anchors /= m.stride.view(-1, 1, 1)
+        check_anchor_order(m)
        self.stride = m.stride

        # Init weights, biases
@@ -97,8 +96,11 @@ class Model(nn.Module):
                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers

            if profile:
-                import thop
-                o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # FLOPS
+                try:
+                    import thop
+                    o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # FLOPS
+                except:
+                    o = 0
                t = torch_utils.time_synchronized()
                for _ in range(10):
                    _ = m(x)
@@ -208,7 +210,7 @@ if __name__ == '__main__':
    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    opt = parser.parse_args()
-    opt.cfg = glob.glob('./**/' + opt.cfg, recursive=True)[0]  # find file
+    opt.cfg = check_file(opt.cfg)  # check file
    device = torch_utils.select_device(opt.device)

    # Create model
@@ -218,11 +220,10 @@ if __name__ == '__main__':
    # Profile
    # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
    # y = model(img, profile=True)
-    # print([y[0].shape] + [x.shape for x in y[1]])

    # ONNX export
    # model.model[-1].export = True
-    # torch.onnx.export(model, img, f.replace('.yaml', '.onnx'), verbose=True, opset_version=11)
+    # torch.onnx.export(model, img, opt.cfg.replace('.yaml', '.onnx'), verbose=True, opset_version=11)

    # Tensorboard
    # from torch.utils.tensorboard import SummaryWriter

--- a/models/yolov5l.yaml
+++ b/models/yolov5l.yaml
@@ -5,41 +5,48 @@ width_multiple: 1.0  # layer channel multiple

 # anchors
 anchors:
-  - [10,13, 16,30, 33,23]  # P3/8
-  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [10,13, 16,30, 33,23]  # P3/8

-# yolov5 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 1-P1/2
-   [-1, 1, Conv, [128, 3, 2]],  # 2-P2/4
-   [-1, 3, Bottleneck, [128]],
-   [-1, 1, Conv, [256, 3, 2]],  # 4-P3/8
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
-   [-1, 1, Conv, [512, 3, 2]],  # 6-P4/16
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
-   [-1, 1, Conv, [1024, 3, 2]], # 8-P5/32
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 6, BottleneckCSP, [1024]],  # 10
  ]

-# yolov5 head
+# YOLOv5 head
 head:
-  [[-1, 3, BottleneckCSP, [1024, False]],  # 11
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 12 (P5/32-large)
+  [[-1, 3, BottleneckCSP, [1024, False]],  # 9

-   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 3, BottleneckCSP, [512, False]],
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 17 (P4/16-medium)
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13

-   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, BottleneckCSP, [256, False]],
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 22 (P3/8-small)
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 18 (P3/8-small)
+
+   [-2, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 26 (P5/32-large)

-   [[], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+   [[], 1, Detect, [nc, anchors]],  # Detect(P5, P4, P3)
  ]
--- a/models/yolov5m.yaml
+++ b/models/yolov5m.yaml
@@ -5,41 +5,48 @@ width_multiple: 0.75  # layer channel multiple

 # anchors
 anchors:
-  - [10,13, 16,30, 33,23]  # P3/8
-  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [10,13, 16,30, 33,23]  # P3/8

-# yolov5 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 1-P1/2
-   [-1, 1, Conv, [128, 3, 2]],  # 2-P2/4
-   [-1, 3, Bottleneck, [128]],
-   [-1, 1, Conv, [256, 3, 2]],  # 4-P3/8
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
-   [-1, 1, Conv, [512, 3, 2]],  # 6-P4/16
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
-   [-1, 1, Conv, [1024, 3, 2]], # 8-P5/32
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 6, BottleneckCSP, [1024]],  # 10
  ]

-# yolov5 head
+# YOLOv5 head
 head:
-  [[-1, 3, BottleneckCSP, [1024, False]],  # 11
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 12 (P5/32-large)
+  [[-1, 3, BottleneckCSP, [1024, False]],  # 9

-   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 3, BottleneckCSP, [512, False]],
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 17 (P4/16-medium)
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13

-   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, BottleneckCSP, [256, False]],
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 22 (P3/8-small)
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 18 (P3/8-small)
+
+   [-2, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 26 (P5/32-large)

-   [[], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+   [[], 1, Detect, [nc, anchors]],  # Detect(P5, P4, P3)
  ]
--- a/models/yolov5s.yaml
+++ b/models/yolov5s.yaml
@@ -5,41 +5,48 @@ width_multiple: 0.50  # layer channel multiple

 # anchors
 anchors:
-  - [10,13, 16,30, 33,23]  # P3/8
-  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [10,13, 16,30, 33,23]  # P3/8

-# yolov5 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 1-P1/2
-   [-1, 1, Conv, [128, 3, 2]],  # 2-P2/4
-   [-1, 3, Bottleneck, [128]],
-   [-1, 1, Conv, [256, 3, 2]],  # 4-P3/8
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
-   [-1, 1, Conv, [512, 3, 2]],  # 6-P4/16
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
-   [-1, 1, Conv, [1024, 3, 2]], # 8-P5/32
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 6, BottleneckCSP, [1024]],  # 10
  ]

-# yolov5 head
+# YOLOv5 head
 head:
-  [[-1, 3, BottleneckCSP, [1024, False]],  # 11
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 12 (P5/32-large)
+  [[-1, 3, BottleneckCSP, [1024, False]],  # 9

-   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 3, BottleneckCSP, [512, False]],
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 17 (P4/16-medium)
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13

-   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, BottleneckCSP, [256, False]],
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 22 (P3/8-small)
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 18 (P3/8-small)
+
+   [-2, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 26 (P5/32-large)

-   [[], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+   [[], 1, Detect, [nc, anchors]],  # Detect(P5, P4, P3)
  ]
--- a/models/yolov5x.yaml
+++ b/models/yolov5x.yaml
@@ -5,41 +5,48 @@ width_multiple: 1.25  # layer channel multiple

 # anchors
 anchors:
-  - [10,13, 16,30, 33,23]  # P3/8
-  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [10,13, 16,30, 33,23]  # P3/8

-# yolov5 backbone
+# YOLOv5 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 1-P1/2
-   [-1, 1, Conv, [128, 3, 2]],  # 2-P2/4
-   [-1, 3, Bottleneck, [128]],
-   [-1, 1, Conv, [256, 3, 2]],  # 4-P3/8
+  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, BottleneckCSP, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
-   [-1, 1, Conv, [512, 3, 2]],  # 6-P4/16
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
-   [-1, 1, Conv, [1024, 3, 2]], # 8-P5/32
+   [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 6, BottleneckCSP, [1024]],  # 10
  ]

-# yolov5 head
+# YOLOv5 head
 head:
-  [[-1, 3, BottleneckCSP, [1024, False]],  # 11
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 12 (P5/32-large)
+  [[-1, 3, BottleneckCSP, [1024, False]],  # 9

-   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 3, BottleneckCSP, [512, False]],
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 17 (P4/16-medium)
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, BottleneckCSP, [512, False]],  # 13

-   [-2, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, BottleneckCSP, [256, False]],
-   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 22 (P3/8-small)
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 18 (P3/8-small)
+
+   [-2, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, BottleneckCSP, [512, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, BottleneckCSP, [1024, False]],
+   [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]],  # 26 (P5/32-large)

-   [[], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+   [[], 1, Detect, [nc, anchors]],  # Detect(P5, P4, P3)
  ]
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@
 Cython
 numpy==1.17
 opencv-python
-torch>=1.5
+torch>=1.4
 matplotlib
 pillow
 tensorboard
@@ -21,4 +21,4 @@ git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
 # conda install -yc conda-forge scikit-image pycocotools tensorboard
 # conda install -yc spyder-ide spyder-line-profiler
 # conda install -yc pytorch pytorch torchvision
-# conda install -yc conda-forge protobuf numpy && pip install onnx  # https://github.com/onnx/onnx#linux-and-macos
+# conda install -yc conda-forge protobuf numpy && pip install onnx==1.6.0  # https://github.com/onnx/onnx#linux-and-macos
--- a/test.py
+++ b/test.py
 import argparse
 import json

-import yaml
-from torch.utils.data import DataLoader
-
+from utils import google_utils
 from utils.datasets import *
 from utils.utils import *

@@ -17,16 +15,18 @@ def test(data,
         save_json=False,
         single_cls=False,
         augment=False,
+         verbose=False,
         model=None,
         dataloader=None,
         fast=False,
         verbose=False,
-         save_dir='.'):
+         save_dir='.',
+         merge=False):
+
    # Initialize/load model and set device
    if model is None:
        training = False
        device = torch_utils.select_device(opt.device, batch_size=batch_size)
-        half = device.type != 'cpu'  # half precision only supported on CUDA

        # Remove previous
        for f in glob.glob(f'{save_dir}/test_batch*.jpg'):
@@ -38,18 +38,19 @@ def test(data,
        torch_utils.model_info(model)
        model.fuse()
        model.to(device)
-        if half:
-            model.half()  # to FP16

-        if device.type != 'cpu' and torch.cuda.device_count() > 1:
-            model = nn.DataParallel(model)
+        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
+        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
+        #     model = nn.DataParallel(model)

    else:  # called by train.py
        training = True
        device = next(model.parameters()).device  # get model device
-        half = device.type != 'cpu'  # half precision only supported on CUDA
-        if half:
-            model.half()  # to FP16
+
+    # Half
+    half = device.type != 'cpu' and torch.cuda.device_count() == 1  # half precision only supported on single-GPU
+    if half:
+        model.half()  # to FP16

    # Configure
    model.eval()
@@ -57,29 +58,16 @@ def test(data,
        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
    nc = 1 if single_cls else int(data['nc'])  # number of classes
    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
-    # iouv = iouv[0].view(1)  # comment for mAP@0.5:0.95
    niou = iouv.numel()

    # Dataloader
    if dataloader is None:  # not training
+        merge = opt.merge  # use Merge NMS
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
-
-        fast |= conf_thres > 0.001  # enable fast mode
        path = data['test'] if opt.task == 'test' else data['val']  # path to val/test images
-        dataset = LoadImagesAndLabels(path,
-                                      imgsz,
-                                      batch_size,
-                                      rect=True,  # rectangular inference
-                                      single_cls=opt.single_cls,  # single class mode
-                                      pad=0.5)  # padding
-        batch_size = min(batch_size, len(dataset))
-        nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
-        dataloader = DataLoader(dataset,
-                                batch_size=batch_size,
-                                num_workers=nw,
-                                pin_memory=True,
-                                collate_fn=dataset.collate_fn)
+        dataloader = create_dataloader(path, imgsz, batch_size, int(max(model.stride)), opt,
+                                       hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]

    seen = 0
    names = model.names if hasattr(model, 'names') else model.module.names
@@ -109,7 +97,7 @@ def test(data,

            # Run NMS
            t = torch_utils.time_synchronized()
-            output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, fast=fast)
+            output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge)
            t1 += torch_utils.time_synchronized() - t

        # Statistics per image
@@ -235,6 +223,7 @@ def test(data,
                  'See https://github.com/cocodataset/cocoapi/issues/356')

    # Return results
+    model.float()  # for training
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
@@ -244,7 +233,7 @@ def test(data,
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(prog='test.py')
    parser.add_argument('--weights', type=str, default='weights/yolov5s.pt', help='model.pt path')
-    parser.add_argument('--data', type=str, default='data/coco.yaml', help='*.data path')
+    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
    parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
@@ -254,6 +243,7 @@ if __name__ == '__main__':
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--merge', action='store_true', help='use Merge NMS')
    parser.add_argument('--verbose', action='store_true', help='report mAP by class')
    opt = parser.parse_args()
    opt.img_size = check_img_size(opt.img_size)
@@ -271,12 +261,13 @@ if __name__ == '__main__':
             opt.iou_thres,
             opt.save_json,
             opt.single_cls,
-             opt.augment)
+             opt.augment,
+             opt.verbose)

    elif opt.task == 'study':  # run over a range of settings and save/plot
-        for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
+        for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
            f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem)  # filename to save to
-            x = list(range(288, 896, 64))  # x axis
+            x = list(range(352, 832, 64))  # x axis
            y = []  # y axis
            for i in x:  # img-size
                print('\nRunning %s point %s...' % (f, i))

--- a/train.py
+++ b/train.py
@@ -4,10 +4,12 @@ import torch.distributed as dist
 import torch.nn.functional as F
 import torch.optim as optim
 import torch.optim.lr_scheduler as lr_scheduler
+import torch.utils.data
 from torch.utils.tensorboard import SummaryWriter

 import test  # import test.py to get mAP after each epoch
 from models.yolo import Model
+from utils import google_utils
 from utils.datasets import *
 from utils.utils import *

@@ -72,6 +74,7 @@ def train(hyp):
    # Create model
    model = Model(opt.cfg).to(device)
    assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
+    model.names = data_dict['names']

    # Image sizes
    gs = int(max(model.stride))  # grid size (max stride)
@@ -148,37 +151,17 @@ def train(hyp):
                                world_size=1,  # number of nodes
                                rank=0)  # node rank
        model = torch.nn.parallel.DistributedDataParallel(model)
+        # pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html

-    # Dataset
-    dataset = LoadImagesAndLabels(train_path, imgsz, batch_size,
-                                  augment=True,
-                                  hyp=hyp,  # augmentation hyperparameters
-                                  rect=opt.rect,  # rectangular training
-                                  cache_images=opt.cache_images,
-                                  single_cls=opt.single_cls)
+    # Trainloader
+    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
+                                            hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect)
    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Correct your labels or your model.' % (mlc, nc, opt.cfg)

-    # Dataloader
-    batch_size = min(batch_size, len(dataset))
-    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
-    dataloader = torch.utils.data.DataLoader(dataset,
-                                             batch_size=batch_size,
-                                             num_workers=nw,
-                                             shuffle=not opt.rect,  # Shuffle=True unless rectangular training is used
-                                             pin_memory=True,
-                                             collate_fn=dataset.collate_fn)
-
    # Testloader
-    testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, imgsz_test, batch_size,
-                                                                 hyp=hyp,
-                                                                 rect=True,
-                                                                 cache_images=opt.cache_images,
-                                                                 single_cls=opt.single_cls),
-                                             batch_size=batch_size,
-                                             num_workers=nw,
-                                             pin_memory=True,
-                                             collate_fn=dataset.collate_fn)
+    testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
+                                            hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]

    # Model parameters
    hyp['cls'] *= nc / 80.  # scale coco-tuned hyp['cls'] to current dataset
@@ -186,7 +169,6 @@ def train(hyp):
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
-    model.names = data_dict['names']

    #save hyperparamter and training options in run folder
    with open(os.path.join(log_dir, 'hyp.yaml'), 'w') as f:
@@ -200,11 +182,17 @@ def train(hyp):
    c = torch.tensor(labels[:, 0])  # classes
    # cf = torch.bincount(c.long(), minlength=nc) + 1.
    # model._initialize_biases(cf.to(device))
+
+    #always plot labels to log_dir
    plot_labels(labels, save_dir=log_dir)
-    tb_writer.add_histogram('classes', c, 0)
+
+    if tb_writer:
+        tb_writer.add_histogram('classes', c, 0)
+

    # Check anchors
-    check_best_possible_recall(dataset, anchors=model.model[-1].anchor_grid, thr=hyp['anchor_t'], imgsz=imgsz)
+    if not opt.noautoanchor:
+        check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)

    # Exponential moving average
    ema = torch_utils.ModelEMA(model)
@@ -216,7 +204,7 @@ def train(hyp):
    maps = np.zeros(nc)  # mAP per class
    results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
    print('Image sizes %g train, %g test' % (imgsz, imgsz_test))
-    print('Using %g dataloader workers' % nw)
+    print('Using %g dataloader workers' % dataloader.num_workers)
    print('Starting training for %g epochs...' % epochs)
    # torch.autograd.set_detect_anomaly(True)
    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
@@ -285,10 +273,10 @@ def train(hyp):

            # Plot
            if ni < 3:
-                f = os.path.join(log_dir, 'train_batch%g.jpg' % i)  # filename
-                res = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
-                if tb_writer:
-                    tb_writer.add_image(f, res, dataformats='HWC', global_step=epoch)
+                f = os.path.join(log_dir, 'train_batch%g.jpg' % ni)  # filename
+                result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
+                if tb_writer and result is not None:
+                    tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
                    # tb_writer.add_graph(model, imgs)  # add model to tensorboard

            # end batch ------------------------------------------------------------------------------------------------
@@ -307,7 +295,6 @@ def train(hyp):
                                             model=ema.ema,
                                             single_cls=opt.single_cls,
                                             dataloader=testloader,
-                                             fast=epoch < epochs / 2,
                                             save_dir=log_dir)

        # Write
@@ -362,7 +349,7 @@ def train(hyp):
    if not opt.evolve:
        plot_results(save_dir = log_dir)  # save as results.png
    print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
-    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
+    dist.destroy_process_group() if device.type != 'cpu' and torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()
    return results

@@ -379,6 +366,7 @@ if __name__ == '__main__':
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--notest', action='store_true', help='only test final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')

--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -18,7 +18,7 @@ from utils.utils import xyxy2xywh, xywh2xyxy

 help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
 img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']
-vid_formats = ['.mov', '.avi', '.mp4']
+vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']

 # Get orientation exif tag
 for orientation in ExifTags.TAGS.keys():
@@ -41,6 +41,26 @@ def exif_size(img):
    return s


+def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False):
+    dataset = LoadImagesAndLabels(path, imgsz, batch_size,
+                                  augment=augment,  # augment images
+                                  hyp=hyp,  # augmentation hyperparameters
+                                  rect=rect,  # rectangular training
+                                  cache_images=cache,
+                                  single_cls=opt.single_cls,
+                                  stride=stride,
+                                  pad=pad)
+
+    batch_size = min(batch_size, len(dataset))
+    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
+    dataloader = torch.utils.data.DataLoader(dataset,
+                                             batch_size=batch_size,
+                                             num_workers=nw,
+                                             pin_memory=True,
+                                             collate_fn=LoadImagesAndLabels.collate_fn)
+    return dataloader, dataset
+
+
 class LoadImages:  # for inference
    def __init__(self, path, img_size=416):
        path = str(Path(path))  # os-agnostic
@@ -63,7 +83,8 @@ class LoadImages:  # for inference
            self.new_video(videos[0])  # new video
        else:
            self.cap = None
-        assert self.nF > 0, 'No images or videos found in ' + path
+        assert self.nF > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
+                            (path, img_formats, vid_formats)

    def __iter__(self):
        self.count = 0
@@ -257,7 +278,7 @@ class LoadStreams:  # multiple IP or RTSP cameras

 class LoadImagesAndLabels(Dataset):  # for training/testing
    def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-                 cache_images=False, single_cls=False, pad=0.0):
+                 cache_images=False, single_cls=False, stride=32, pad=0.0):
        try:
            path = str(Path(path))  # os-agnostic
            parent = str(Path(path).parent) + os.sep
@@ -324,7 +345,7 @@ class LoadImagesAndLabels(Dataset):  # for training/testing
                elif mini > 1:
                    shapes[i] = [1, 1 / mini]

-            self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32
+            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride

        # Cache labels
        self.imgs = [None] * n
@@ -711,7 +732,7 @@ def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10,
        area = w * h
        area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2])
        ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))  # aspect ratio
-        i = (w > 4) & (h > 4) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 10)
+        i = (w > 2) & (h > 2) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 20)

        targets = targets[i]
        targets[:, 1:5] = xy[i]

--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -7,6 +7,7 @@ import torch
 import torch.backends.cudnn as cudnn
 import torch.nn as nn
 import torch.nn.functional as F
+import torchvision.models as models


 def init_seeds(seed=0):
@@ -120,18 +121,22 @@ def model_info(model, verbose=False):

 def load_classifier(name='resnet101', n=2):
    # Loads a pretrained model reshaped to n-class output
-    import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch#torchvision
-    model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet')
+    model = models.__dict__[name](pretrained=True)

    # Display model properties
-    for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']:
+    input_size = [3, 224, 224]
+    input_space = 'RGB'
+    input_range = [0, 1]
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    for x in [input_size, input_space, input_range, mean, std]:
        print(x + ' =', eval(x))

    # Reshape output to n classes
-    filters = model.last_linear.weight.shape[1]
-    model.last_linear.bias = torch.nn.Parameter(torch.zeros(n))
-    model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
-    model.last_linear.out_features = n
+    filters = model.fc.weight.shape[1]
+    model.fc.bias = torch.nn.Parameter(torch.zeros(n), requires_grad=True)
+    model.fc.weight = torch.nn.Parameter(torch.zeros(n, filters), requires_grad=True)
+    model.fc.out_features = n
    return model



--- a/utils/utils.py
+++ b/utils/utils.py