Unverified 提交 f8651c38 authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

Parse URL authentication (#3424)

* Parse URL authentication * urllib.parse.unquote() * improved error handling * improved error handling * remove %3F * update check_file()
上级 3cb9ad4f
...@@ -9,6 +9,7 @@ import random ...@@ -9,6 +9,7 @@ import random
import re import re
import subprocess import subprocess
import time import time
import urllib
from itertools import repeat from itertools import repeat
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
...@@ -183,7 +184,8 @@ def check_file(file): ...@@ -183,7 +184,8 @@ def check_file(file):
if Path(file).is_file() or file == '': # exists if Path(file).is_file() or file == '': # exists
return file return file
elif file.startswith(('http://', 'https://')): # download elif file.startswith(('http://', 'https://')): # download
url, file = file, Path(file).name url, file = file, Path(urllib.parse.unquote(str(file))).name # url, file (decode '%2F' to '/' etc.)
file = file.split('?')[0] # parse authentication https://url.com/file.txt?auth...
print(f'Downloading {url} to {file}...') print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file) torch.hub.download_url_to_file(url, file)
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
......
...@@ -4,6 +4,7 @@ import os ...@@ -4,6 +4,7 @@ import os
import platform import platform
import subprocess import subprocess
import time import time
import urllib
from pathlib import Path from pathlib import Path
import requests import requests
...@@ -19,30 +20,32 @@ def gsutil_getsize(url=''): ...@@ -19,30 +20,32 @@ def gsutil_getsize(url=''):
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
file = Path(file) file = Path(file)
try: # GitHub assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
print(f'Downloading {url} to {file}...') print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file)) torch.hub.download_url_to_file(url, str(file))
assert file.exists() and file.stat().st_size > min_bytes # check assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # GCP except Exception as e: # url2
file.unlink(missing_ok=True) # remove partial downloads file.unlink(missing_ok=True) # remove partial downloads
print(f'Download error: {e}\nRe-attempting {url2 or url} to {file}...') print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
finally: finally:
if not file.exists() or file.stat().st_size < min_bytes: # check if not file.exists() or file.stat().st_size < min_bytes: # check
file.unlink(missing_ok=True) # remove partial downloads file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: Download failure: {error_msg or url}') print(f"ERROR: {assert_msg}\n{error_msg}")
print('') print('')
def attempt_download(file, repo='ultralytics/yolov5'): def attempt_download(file, repo='ultralytics/yolov5'): # from utils.google_utils import *; attempt_download()
# Attempt file download if does not exist # Attempt file download if does not exist
file = Path(str(file).strip().replace("'", '')) file = Path(str(file).strip().replace("'", ''))
if not file.exists(): if not file.exists():
# URL specified # URL specified
name = file.name name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
safe_download(file=name, url=url, min_bytes=1E5) safe_download(file=name, url=url, min_bytes=1E5)
return name return name
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论