Unverified 提交 d0df6c84 authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

Update ZipFile to context manager (#9843)

* Update zipFile to context manager * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Cleanup Co-authored-by: 's avatarpre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
上级 010cd0db
...@@ -17,7 +17,6 @@ from multiprocessing.pool import Pool, ThreadPool ...@@ -17,7 +17,6 @@ from multiprocessing.pool import Pool, ThreadPool
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Thread
from urllib.parse import urlparse from urllib.parse import urlparse
from zipfile import ZipFile
import numpy as np import numpy as np
import torch import torch
...@@ -31,7 +30,8 @@ from tqdm import tqdm ...@@ -31,7 +30,8 @@ from tqdm import tqdm
from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
cutout, letterbox, mixup, random_perspective) cutout, letterbox, mixup, random_perspective)
from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, xywh2xyxy, xywhn2xyxy,
xyxy2xywhn)
from utils.torch_utils import torch_distributed_zero_first from utils.torch_utils import torch_distributed_zero_first
# Parameters # Parameters
...@@ -1053,7 +1053,7 @@ class HUBDatasetStats(): ...@@ -1053,7 +1053,7 @@ class HUBDatasetStats():
if not str(path).endswith('.zip'): # path is data.yaml if not str(path).endswith('.zip'): # path is data.yaml
return False, None, path return False, None, path
assert Path(path).is_file(), f'Error unzipping {path}, file not found' assert Path(path).is_file(), f'Error unzipping {path}, file not found'
ZipFile(path).extractall(path=path.parent) # unzip unzip_file(path, path=path.parent)
dir = path.with_suffix('') # dataset directory == zip name dir = path.with_suffix('') # dataset directory == zip name
assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
......
...@@ -5,12 +5,9 @@ Download utils ...@@ -5,12 +5,9 @@ Download utils
import logging import logging
import os import os
import platform
import subprocess import subprocess
import time
import urllib import urllib
from pathlib import Path from pathlib import Path
from zipfile import ZipFile
import requests import requests
import torch import torch
...@@ -109,81 +106,3 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'): ...@@ -109,81 +106,3 @@ def attempt_download(file, repo='ultralytics/yolov5', release='v6.2'):
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
return str(file) return str(file)
def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
t = time.time()
file = Path(file)
cookie = Path('cookie') # gdrive cookie
print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
if file.exists():
file.unlink() # remove existing file
if cookie.exists():
cookie.unlink() # remove existing cookie
# Attempt file download
out = "NUL" if platform.system() == "Windows" else "/dev/null"
os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
if os.path.exists('cookie'): # large file
s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
else: # small file
s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
r = os.system(s) # execute, capture return
if cookie.exists():
cookie.unlink() # remove existing cookie
# Error check
if r != 0:
if file.exists():
file.unlink() # remove partial
print('Download error ') # raise Exception('Download error')
return r
# Unzip if archive
if file.suffix == '.zip':
print('unzipping... ', end='')
ZipFile(file).extractall(path=file.parent) # unzip
file.unlink() # remove zip
print(f'Done ({time.time() - t:.1f}s)')
return r
def get_token(cookie="./cookie"):
with open(cookie) as f:
for line in f:
if "download" in line:
return line.split()[-1]
return ""
# Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
#
#
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
# # Uploads a file to a bucket
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
#
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(destination_blob_name)
#
# blob.upload_from_filename(source_file_name)
#
# print('File {} uploaded to {}.'.format(
# source_file_name,
# destination_blob_name))
#
#
# def download_blob(bucket_name, source_blob_name, destination_file_name):
# # Uploads a blob from a bucket
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(source_blob_name)
#
# blob.download_to_filename(destination_file_name)
#
# print('Blob {} downloaded to {}.'.format(
# source_blob_name,
# destination_file_name))
...@@ -511,7 +511,7 @@ def check_dataset(data, autodownload=True): ...@@ -511,7 +511,7 @@ def check_dataset(data, autodownload=True):
LOGGER.info(f'Downloading {s} to {f}...') LOGGER.info(f'Downloading {s} to {f}...')
torch.hub.download_url_to_file(s, f) torch.hub.download_url_to_file(s, f)
Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root
ZipFile(f).extractall(path=DATASETS_DIR) # unzip unzip_file(f, path=DATASETS_DIR) # unzip
Path(f).unlink() # remove zip Path(f).unlink() # remove zip
r = None # success r = None # success
elif s.startswith('bash '): # bash script elif s.startswith('bash '): # bash script
...@@ -566,6 +566,16 @@ def yaml_save(file='data.yaml', data={}): ...@@ -566,6 +566,16 @@ def yaml_save(file='data.yaml', data={}):
yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False) yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False)
def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
# Unzip a *.zip file to path/, excluding files containing strings in exclude list
if path is None:
path = Path(file).parent # default path
with ZipFile(file) as zipObj:
for f in zipObj.namelist(): # list all archived filenames in the zip
if all(x not in f for x in exclude):
zipObj.extract(f, path=path)
def url2file(url): def url2file(url):
# Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
...@@ -601,7 +611,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry ...@@ -601,7 +611,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry
if unzip and success and f.suffix in ('.zip', '.tar', '.gz'): if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
LOGGER.info(f'Unzipping {f}...') LOGGER.info(f'Unzipping {f}...')
if f.suffix == '.zip': if f.suffix == '.zip':
ZipFile(f).extractall(path=dir) # unzip unzip_file(f, dir) # unzip
elif f.suffix == '.tar': elif f.suffix == '.tar':
os.system(f'tar xf {f} --directory {f.parent}') # unzip os.system(f'tar xf {f} --directory {f.parent}') # unzip
elif f.suffix == '.gz': elif f.suffix == '.gz':
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论