Coverage for langbrainscore/utils/cache.py: 55%

20 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-06-07 21:22 +0000

1''' 

2utilities related to caching, including creating a directory structure, 

3managing a disk-backed LRU cache, etc.  

4''' 

5 

6import typing 

7from pathlib import Path 

8import os 

9from dataclasses import dataclass 

10 

11@dataclass 

12class CacheDescriptor: 

13 ''' 

14 A class to conveniently hold various paths within the LBS_CACHE directory structure 

15 ''' 

16 root: Path 

17 subdir: Path 

18 # human_readable_name: Path 

19 

20 def mkdirs(self): 

21 '''creates directories if they don't already exist''' 

22 self.subdir.mkdir(parents=True, exist_ok=True) 

23 

24 

25def pathify(fpth: typing.Union[Path, str, typing.Any]) -> Path: 

26 ''' 

27 returns a resolved `Path` object after expanding user and shorthands/symlinks 

28 ''' 

29 return Path(fpth).expanduser().resolve() 

30 

31 

32def get_cache_directory(prefix: typing.Union[str, Path] = '~/.cache', 

33 calling_class = None, 

34 # subdirs: typing.List[str] = ['dataset', 'encoder', 'mapping', 'metric', 'brainscore'], 

35 # randomize: bool = False 

36 ) -> CacheDescriptor: 

37 ''' 

38 returns the "root" of langbrainscore cache. any instance-specific runs must make sure 

39 to make their own directory structure within this root and identify themselves uniquely 

40 so as not to get overwritten by other runs 

41 ''' 

42 if 'LBS_CACHE' in os.environ: # if environment variable is specified, use that with first priority 

43 prefix = os.environ['LBS_CACHE'] 

44 

45 prefix = pathify(prefix) 

46 root = prefix / 'langbrainscore' 

47 

48 # if randomize: 

49 # import randomname 

50 # while (root / (human_readable := randomname.generate())).exists(): 

51 # pass 

52 

53 CD = CacheDescriptor(root=root, **{'subdir': root / subdir for subdir in [calling_class or 'uncategorized']}) 

54 CD.mkdirs() 

55 return CD 

56 

57