Unverified 提交 f8651c38 authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

Parse URL authentication (#3424)

* Parse URL authentication * urllib.parse.unquote() * improved error handling * improved error handling * remove %3F * update check_file()
上级 3cb9ad4f
......@@ -9,6 +9,7 @@ import random
import re
import subprocess
import time
import urllib
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
......@@ -183,7 +184,8 @@ def check_file(file):
if Path(file).is_file() or file == '': # exists
return file
elif file.startswith(('http://', 'https://')): # download
url, file = file, Path(file).name
url, file = file, Path(urllib.parse.unquote(str(file))).name # url, file (decode '%2F' to '/' etc.)
file = file.split('?')[0] # parse authentication https://url.com/file.txt?auth...
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file)
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
......
......@@ -4,6 +4,7 @@ import os
import platform
import subprocess
import time
import urllib
from pathlib import Path
import requests
......@@ -19,30 +20,32 @@ def gsutil_getsize(url=''):
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
file = Path(file)
try: # GitHub
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file))
assert file.exists() and file.stat().st_size > min_bytes # check
except Exception as e: # GCP
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
file.unlink(missing_ok=True) # remove partial downloads
print(f'Download error: {e}\nRe-attempting {url2 or url} to {file}...')
print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
finally:
if not file.exists() or file.stat().st_size < min_bytes: # check
file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: Download failure: {error_msg or url}')
print(f"ERROR: {assert_msg}\n{error_msg}")
print('')
def attempt_download(file, repo='ultralytics/yolov5'):
def attempt_download(file, repo='ultralytics/yolov5'): # from utils.google_utils import *; attempt_download()
# Attempt file download if does not exist
file = Path(str(file).strip().replace("'", ''))
if not file.exists():
# URL specified
name = file.name
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
safe_download(file=name, url=url, min_bytes=1E5)
return name
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论