Unverified 提交 c6b5bfca authored 作者: Glenn Jocher's avatar Glenn Jocher 提交者: GitHub

Updated cache v0.2 with `hashlib` (#3350)

* Update cache v0.2 to include parent hash Possible fix for https://github.com/ultralytics/yolov5/issues/3349 * Update datasets.py
上级 1f8d716e
# Dataset utils and dataloaders # Dataset utils and dataloaders
import glob import glob
import hashlib
import logging import logging
import math import math
import os import os
...@@ -36,9 +37,12 @@ for orientation in ExifTags.TAGS.keys(): ...@@ -36,9 +37,12 @@ for orientation in ExifTags.TAGS.keys():
break break
def get_hash(files): def get_hash(paths):
# Returns a single hash value of a list of files # Returns a single hash value of a list of paths (files or dirs)
return sum(os.path.getsize(f) for f in files if os.path.isfile(f)) size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
h = hashlib.md5(str(size).encode()) # hash sizes
h.update(''.join(paths).encode()) # hash paths
return h.hexdigest() # return hash
def exif_size(img): def exif_size(img):
...@@ -383,7 +387,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing ...@@ -383,7 +387,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
if cache_path.is_file(): if cache_path.is_file():
cache, exists = torch.load(cache_path), True # load cache, exists = torch.load(cache_path), True # load
if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed if cache['hash'] != get_hash(self.label_files + self.img_files): # changed
cache, exists = self.cache_labels(cache_path, prefix), False # re-cache cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
else: else:
cache, exists = self.cache_labels(cache_path, prefix), False # cache cache, exists = self.cache_labels(cache_path, prefix), False # cache
...@@ -501,9 +505,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing ...@@ -501,9 +505,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing
x['hash'] = get_hash(self.label_files + self.img_files) x['hash'] = get_hash(self.label_files + self.img_files)
x['results'] = nf, nm, ne, nc, i + 1 x['results'] = nf, nm, ne, nc, i + 1
x['version'] = 0.1 # cache version x['version'] = 0.2 # cache version
try: try:
torch.save(x, path) # save for next time torch.save(x, path) # save cache for next time
logging.info(f'{prefix}New cache created: {path}') logging.info(f'{prefix}New cache created: {path}')
except Exception as e: except Exception as e:
logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论