Curl update (#3004)

* Curl update * Curl update

Curl update (#3004)
37eaffec · Glenn Jocher · GitHub · 4d7bca76 · 37eaffec · 37eaffec
--- a/data/scripts/get_objects365.py
+++ b/data/scripts/get_objects365.py
@@ -22,7 +22,7 @@ for p in 'images', 'labels':
 # Download
 url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
 download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir)  # annotations json
-download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', threads=8)
+download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', curl=True, threads=8)

 # Labels
 coco = COCO(dir / 'zhiyuan_objv2_train.json')

--- a/utils/general.py
+++ b/utils/general.py
@@ -183,14 +183,17 @@ def check_dataset(dict):
                raise Exception('Dataset not found.')


-def download(url, dir='.', unzip=True, threads=1):
+def download(url, dir='.', unzip=True, curl=False, threads=1):
    # Multi-threaded file download and unzip function
    def download_one(url, dir):
        # Download 1 file
        f = dir / Path(url).name  # filename
        if not f.exists():
            print(f'Downloading {url} to {f}...')
-            torch.hub.download_url_to_file(url, f, progress=True)  # download
+            if curl:
+                os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -")  # curl download, retry and resume on fail
+            else:
+                torch.hub.download_url_to_file(url, f, progress=True)  # torch download
        if unzip and f.suffix in ('.zip', '.gz'):
            print(f'Unzipping {f}...')
            if f.suffix == '.zip':

--- a/utils/google_utils.py
+++ b/utils/google_utils.py
@@ -47,7 +47,7 @@ def attempt_download(file, repo='ultralytics/yolov5'):
                assert redundant, 'No secondary mirror'
                url = f'https://storage.googleapis.com/{repo}/ckpt/{name}'
                print(f'Downloading {url} to {file}...')
-                os.system(f'curl -L {url} -o {file}')  # torch.hub.download_url_to_file(url, weights)
+                os.system(f"curl -L '{url}' -o '{file}' --retry 3 -C -")  # curl download, retry and resume on fail
            finally:
                if not file.exists() or file.stat().st_size < 1E6:  # check
                    file.unlink(missing_ok=True)  # remove partial downloads