Unverified 提交 a64a4c83 authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()` (#4919)

* Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()` * Cleanup
上级 14926327
...@@ -15,6 +15,7 @@ from itertools import repeat ...@@ -15,6 +15,7 @@ from itertools import repeat
from multiprocessing.pool import ThreadPool, Pool from multiprocessing.pool import ThreadPool, Pool
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Thread
from zipfile import ZipFile
import cv2 import cv2
import numpy as np import numpy as np
...@@ -928,8 +929,8 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil ...@@ -928,8 +929,8 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil
# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/' # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
if str(path).endswith('.zip'): # path is data.zip if str(path).endswith('.zip'): # path is data.zip
assert Path(path).is_file(), f'Error unzipping {path}, file not found' assert Path(path).is_file(), f'Error unzipping {path}, file not found'
assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}' ZipFile(path).extractall(path=path.parent) # unzip
dir = path.with_suffix('') # dataset directory dir = path.with_suffix('') # dataset directory == zip name
return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
else: # path is data.yaml else: # path is data.yaml
return False, None, path return False, None, path
......
...@@ -9,6 +9,7 @@ import subprocess ...@@ -9,6 +9,7 @@ import subprocess
import time import time
import urllib import urllib
from pathlib import Path from pathlib import Path
from zipfile import ZipFile
import requests import requests
import torch import torch
...@@ -104,8 +105,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): ...@@ -104,8 +105,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Unzip if archive # Unzip if archive
if file.suffix == '.zip': if file.suffix == '.zip':
print('unzipping... ', end='') print('unzipping... ', end='')
os.system(f'unzip -q {file}') # unzip ZipFile(file).extractall(path=file.parent) # unzip
file.unlink() # remove zip to free space file.unlink() # remove zip
print(f'Done ({time.time() - t:.1f}s)') print(f'Done ({time.time() - t:.1f}s)')
return r return r
......
...@@ -18,6 +18,7 @@ from itertools import repeat ...@@ -18,6 +18,7 @@ from itertools import repeat
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
from subprocess import check_output from subprocess import check_output
from zipfile import ZipFile
import cv2 import cv2
import numpy as np import numpy as np
...@@ -353,17 +354,19 @@ def check_dataset(data, autodownload=True): ...@@ -353,17 +354,19 @@ def check_dataset(data, autodownload=True):
if s and autodownload: # download script if s and autodownload: # download script
if s.startswith('http') and s.endswith('.zip'): # URL if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename f = Path(s).name # filename
print(f'Downloading {s} ...') print(f'Downloading {s} to {f}...')
torch.hub.download_url_to_file(s, f) torch.hub.download_url_to_file(s, f)
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../' root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
Path(root).mkdir(parents=True, exist_ok=True) # create root Path(root).mkdir(parents=True, exist_ok=True) # create root
r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip ZipFile(f).extractall(path=root) # unzip
Path(f).unlink() # remove zip
r = None # success
elif s.startswith('bash '): # bash script elif s.startswith('bash '): # bash script
print(f'Running {s} ...') print(f'Running {s} ...')
r = os.system(s) r = os.system(s)
else: # python script else: # python script
r = exec(s, {'yaml': data}) # return None r = exec(s, {'yaml': data}) # return None
print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}")
else: else:
raise Exception('Dataset not found.') raise Exception('Dataset not found.')
...@@ -393,12 +396,11 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1): ...@@ -393,12 +396,11 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
if unzip and f.suffix in ('.zip', '.gz'): if unzip and f.suffix in ('.zip', '.gz'):
print(f'Unzipping {f}...') print(f'Unzipping {f}...')
if f.suffix == '.zip': if f.suffix == '.zip':
s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite ZipFile(f).extractall(path=dir) # unzip
elif f.suffix == '.gz': elif f.suffix == '.gz':
s = f'tar xfz {f} --directory {f.parent}' # unzip os.system(f'tar xfz {f} --directory {f.parent}') # unzip
if delete: # delete zip file after unzip if delete:
s += f' && rm {f}' f.unlink() # remove zip
os.system(s)
dir = Path(dir) dir = Path(dir)
dir.mkdir(parents=True, exist_ok=True) # make directory dir.mkdir(parents=True, exist_ok=True) # make directory
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论