def get_metadata(self, recordID, sandbox=False, exceptions=True, timeout=15): if not sandbox: url = 'https://zenodo.org/api/records/' else: url = 'https://sandbox.zenodo.org/api/records/' try: r = requests.get(url + recordID, timeout=timeout) except requests.exceptions.ConnectTimeout: self.logger.critical('Connection timeout during metadata reading.') raise except Exception: self.logger.critical('Connection error during metadata reading.') raise js = {} if r.ok: js = json.loads(r.text)['files'] for f in json.loads(r.text)['files']: path = 'zenodo' if not sandbox else 'sandbox' self.attr_cache[f'/{path}/{recordID}/{f["key"]}'] = SBox( f, default_box=True) self.content[f"/{path}/{recordID}.json"] = ( SBox(metadata=js).to_json() + "\n").encode() self.content[f"/{path}/{recordID}.yaml"] = SBox( metadata=js).to_yaml().encode() return js
def __init__(self, recordIDs, sandbox_recordIDs, chunksize=64, largefile=1024): self.records = { 'sandbox': [], 'zenodo': [], } self.attr_cache = SBox(default_box=True) self.dir_cache = SBox(default_box=True) self.open_files = {} self.content = {} self.chunksize = chunksize self.largefile = largefile self.logger = logging.getLogger() for rid in recordIDs: self.get_metadata(rid, sandbox=False) for rid in sandbox_recordIDs: self.get_metadata(rid, sandbox=True)
def test_property_box(self): td = test_dict.copy() td["inner"] = {"CamelCase": "Item"} pbox = SBox(td, camel_killer_box=True) assert isinstance(pbox.inner, SBox) assert pbox.inner.camel_case == "Item" assert json.loads(pbox.json)["inner"]["camel_case"] == "Item" test_item = yaml.load(pbox.yaml, Loader=yaml.SafeLoader) assert test_item["inner"]["camel_case"] == "Item" assert repr(pbox["inner"]).startswith("<ShorthandBox") assert not isinstance(pbox.dict, Box) assert pbox.dict["inner"]["camel_case"] == "Item" assert pbox.toml.startswith('key1 = "value1"')
def test_property_box(self): td = test_dict.copy() td['inner'] = {'CamelCase': 'Item'} pbox = SBox(td, camel_killer_box=True) assert isinstance(pbox.inner, SBox) assert pbox.inner.camel_case == 'Item' assert json.loads(pbox.json)['inner']['camel_case'] == 'Item' test_item = yaml.load(pbox.yaml, Loader=yaml.SafeLoader) assert test_item['inner']['camel_case'] == 'Item' assert repr(pbox['inner']).startswith('<ShorthandBox') assert not isinstance(pbox.dict, Box) assert pbox.dict['inner']['camel_case'] == 'Item' assert pbox.toml.startswith('key1 = "value1"')
'frozen': 'box', }, frozen_box=True) # frozen_box.frozen = 'Box' # box.BoxError: Box is frozen empty_box = Box(default_box=True) print(empty_box.a.b.c) # <Box: {}> empty_box.a.b.c.d = "h" print(empty_box) camel_killer_box = Box(HelloWorld="HELLO, WORLD!", camel_killer_box=True) print(camel_killer_box.hello_world) box_of_order = Box(ordered_box=True) box_of_order.c = 1 box_of_order.a = 2 box_of_order.d = 3 print(box_of_order.keys() == ['c', 'a', 'd']) from box import BoxList box_list = BoxList({'item': x} for x in range(10)) print(box_list[5].item) from box import SBox s_box = SBox(hello='world') print(s_box.json)
class ZenodoFS(LoggingMixIn, Operations): def __init__(self, recordIDs, sandbox_recordIDs, chunksize=64, largefile=1024): self.records = { 'sandbox': [], 'zenodo': [], } self.attr_cache = SBox(default_box=True) self.dir_cache = SBox(default_box=True) self.open_files = {} self.content = {} self.chunksize = chunksize self.largefile = largefile self.logger = logging.getLogger() for rid in recordIDs: self.get_metadata(rid, sandbox=False) for rid in sandbox_recordIDs: self.get_metadata(rid, sandbox=True) @cache(maxsize=1024) def get_metadata(self, recordID, sandbox=False, exceptions=True, timeout=15): if not sandbox: url = 'https://zenodo.org/api/records/' else: url = 'https://sandbox.zenodo.org/api/records/' try: r = requests.get(url + recordID, timeout=timeout) except requests.exceptions.ConnectTimeout: self.logger.critical('Connection timeout during metadata reading.') raise except Exception: self.logger.critical('Connection error during metadata reading.') raise js = {} if r.ok: js = json.loads(r.text)['files'] for f in json.loads(r.text)['files']: path = 'zenodo' if not sandbox else 'sandbox' self.attr_cache[f'/{path}/{recordID}/{f["key"]}'] = SBox( f, default_box=True) self.content[f"/{path}/{recordID}.json"] = ( SBox(metadata=js).to_json() + "\n").encode() self.content[f"/{path}/{recordID}.yaml"] = SBox( metadata=js).to_yaml().encode() return js def readdir(self, path, fh): level = len(path.split('/')) content = [ name for name in self.attr_cache.keys() if name.startswith(path) ] if path == '/': return ['.', 'sandbox', 'zenodo'] elif path in ('/sandbox', '/zenodo'): content = [ name for name in self.attr_cache.keys() if name.startswith(path) ] else: parts = path.split('/') if len(parts) >= 3: recordID = parts[2] self.get_metadata(recordID) content = [ name for name in self.attr_cache.keys() if name.startswith(path) ] N = len(path) + 1 content = list({ name[N:].split('/')[0] for name in content if len(name) > N and name[N - 1] == '/' }) if level == 2: content = content + [ f"{name}.yaml" for name in content if name.find('.') == -1 ] + [f"{name}.json" for name in content if name.find('.') == -1] return list(set(content)) def getattr(self, path, fh=None): parts = path.split('/') level = len(parts) st = {} if path in ['/', '/sandbox', '/zenodo']: st['st_mode'] = (S_IFDIR | 0o755) st['st_nlink'] = 2 elif level == 3: if path.find('.') > -1: size = len(self.content[path]) st = {'st_mode': (S_IFREG | 0o444), 'st_size': size} else: st['st_mode'] = (S_IFDIR | 0o755) st['st_nlink'] = 2 else: size = 0 st = {'st_mode': (S_IFREG | 0o444), 'st_size': size} if level >= 3: recordID = parts[2] self.get_metadata(recordID) if level == 4: fn = self.attr_cache[path] if 'size' in fn: st['st_size'] = fn['size'] st['st_ctime'] = st['st_mtime'] = st['st_atime'] = time() return st def open(self, path, mode): if path not in self.open_files: url = self.attr_cache[path]['links'].get('self') size = self.attr_cache[path].get('size', 0) self.open_files[path] = WebFile(url, size, self.chunksize, self.largefile) return 0 def read(self, path, size, offset, fh): if path in self.content: return self.content[path][offset:offset + size] return self.open_files[path][offset:offset + size] def release(self, path, fh): if path in self.open_files: wf = self.open_files.pop(path) wf.close()