Single-source training (#680)

* Single-source training * Extract hyperparameters into seperate files * weight decay scientific notation yaml reader bug fix * remove import glob * intersect_dicts() implementation * 'or' bug fix * .to(device) bug fix

Single-source training (#680)
3c6e2f76 · Glenn Jocher · GitHub · d7cfbc47 · 3c6e2f76 · 3c6e2f76
--- a/data/hyp.finetune.yaml
+++ b/data/hyp.finetune.yaml
+# Hyperparameters for VOC fine-tuning
+# python train.py --batch 64 --cfg '' --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
+# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
+lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.0005  # optimizer weight decay 5e-4
+giou: 0.05  # GIoU loss gain
+cls: 0.5  # cls loss gain
+cls_pw: 1.0  # cls BCELoss positive_weight
+obj: 1.0  # obj loss gain (scale with pixels)
+obj_pw: 1.0  # obj BCELoss positive_weight
+iou_t: 0.20  # IoU training threshold
+anchor_t: 4.0  # anchor-multiple threshold
+fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # image HSV-Value augmentation (fraction)
+degrees: 0.0  # image rotation (+/- deg)
+translate: 0.5  # image translation (+/- fraction)
+scale: 0.5  # image scale (+/- gain)
+shear: 0.0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # image flip up-down (probability)
+fliplr: 0.5  # image flip left-right (probability)
+mixup: 0.0  # image mixup (probability)
--- a/data/hyp.scratch.yaml
+++ b/data/hyp.scratch.yaml
+# Hyperparameters for COCO training from scratch
+# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
+# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
+lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.0005  # optimizer weight decay 5e-4
+giou: 0.05  # GIoU loss gain
+cls: 0.5  # cls loss gain
+cls_pw: 1.0  # cls BCELoss positive_weight
+obj: 1.0  # obj loss gain (scale with pixels)
+obj_pw: 1.0  # obj BCELoss positive_weight
+iou_t: 0.20  # IoU training threshold
+anchor_t: 4.0  # anchor-multiple threshold
+fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # image HSV-Value augmentation (fraction)
+degrees: 0.0  # image rotation (+/- deg)
+translate: 0.5  # image translation (+/- fraction)
+scale: 0.5  # image scale (+/- gain)
+shear: 0.0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # image flip up-down (probability)
+fliplr: 0.5  # image flip left-right (probability)
+mixup: 0.0  # image mixup (probability)
--- a/train.py
+++ b/train.py
--- a/utils/general.py
+++ b/utils/general.py
@@ -120,7 +120,7 @@ def check_anchor_order(m):
 def check_file(file):
    # Searches for file if not found locally
-    if os.path.isfile(file):
+    if os.path.isfile(file) or file == '':
        return file
    else:
        files = glob.glob('./**/' + file, recursive=True)  # find file

--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -55,10 +55,14 @@ def time_synchronized():
 def is_parallel(model):
-    # is model is parallel with DP or DDP
    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
+def intersect_dicts(da, db, exclude=()):
+    # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
+    return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
 def initialize_weights(model):
    for m in model.modules():
        t = type(m)
@@ -72,7 +76,7 @@ def initialize_weights(model):
 def find_modules(model, mclass=nn.Conv2d):
-    # finds layer indices matching module class 'mclass'
+    # Finds layer indices matching module class 'mclass'
    return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]