Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()` (#4919)

* Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()` * Cleanup

Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()` (#4919)
a64a4c83 · Glenn Jocher · GitHub · 14926327 · a64a4c83 · a64a4c83
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -15,6 +15,7 @@ from itertools import repeat
 from multiprocessing.pool import ThreadPool, Pool
 from pathlib import Path
 from threading import Thread
+from zipfile import ZipFile
 import cv2
 import numpy as np
@@ -928,8 +929,8 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil
        # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
        if str(path).endswith('.zip'):  # path is data.zip
            assert Path(path).is_file(), f'Error unzipping {path}, file not found'
-            assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}'
+            ZipFile(path).extractall(path=path.parent)  # unzip
-            dir = path.with_suffix('')  # dataset directory
+            dir = path.with_suffix('')  # dataset directory == zip name
            return True, str(dir), next(dir.rglob('*.yaml'))  # zipped, data_dir, yaml_path
        else:  # path is data.yaml
            return False, None, path

--- a/utils/downloads.py
+++ b/utils/downloads.py
@@ -9,6 +9,7 @@ import subprocess
 import time
 import urllib
 from pathlib import Path
+from zipfile import ZipFile
 import requests
 import torch
@@ -104,8 +105,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
    # Unzip if archive
    if file.suffix == '.zip':
        print('unzipping... ', end='')
-        os.system(f'unzip -q {file}')  # unzip
+        ZipFile(file).extractall(path=file.parent)  # unzip
-        file.unlink()  # remove zip to free space
+        file.unlink()  # remove zip
    print(f'Done ({time.time() - t:.1f}s)')
    return r

--- a/utils/general.py
+++ b/utils/general.py
@@ -18,6 +18,7 @@ from itertools import repeat
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
 from subprocess import check_output
+from zipfile import ZipFile
 import cv2
 import numpy as np
@@ -353,17 +354,19 @@ def check_dataset(data, autodownload=True):
            if s and autodownload:  # download script
                if s.startswith('http') and s.endswith('.zip'):  # URL
                    f = Path(s).name  # filename
-                    print(f'Downloading {s} ...')
+                    print(f'Downloading {s} to {f}...')
                    torch.hub.download_url_to_file(s, f)
                    root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
                    Path(root).mkdir(parents=True, exist_ok=True)  # create root
-                    r = os.system(f'unzip -q {f} -d {root} && rm {f}')  # unzip
+                    ZipFile(f).extractall(path=root)  # unzip
+                    Path(f).unlink()  # remove zip
+                    r = None  # success
                elif s.startswith('bash '):  # bash script
                    print(f'Running {s} ...')
                    r = os.system(s)
                else:  # python script
                    r = exec(s, {'yaml': data})  # return None
-                print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure'))  # print result
+                print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}")
            else:
                raise Exception('Dataset not found.')
@@ -393,12 +396,11 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
        if unzip and f.suffix in ('.zip', '.gz'):
            print(f'Unzipping {f}...')
            if f.suffix == '.zip':
-                s = f'unzip -qo {f} -d {dir}'  # unzip -quiet -overwrite
+                ZipFile(f).extractall(path=dir)  # unzip
            elif f.suffix == '.gz':
-                s = f'tar xfz {f} --directory {f.parent}'  # unzip
+                os.system(f'tar xfz {f} --directory {f.parent}')  # unzip
-            if delete:  # delete zip file after unzip
+            if delete:
-                s += f' && rm {f}'
+                f.unlink()  # remove zip
-            os.system(s)
    dir = Path(dir)
    dir.mkdir(parents=True, exist_ok=True)  # make directory