def generate_remote_files_from_manifest(remote_file_manifest, algs, strict=False): logger.info("Generating remote file references from %s" % remote_file_manifest) remote_files = dict() with open(remote_file_manifest, "r") as rfm_in: line = rfm_in.readline().lstrip() rfm_in.seek(0) is_json_stream = False if line.startswith('{'): fetch = rfm_in is_json_stream = True else: fetch = json.load(rfm_in, object_pairs_hook=OrderedDict) try: for entry in fetch: if is_json_stream: entry = json.loads(entry, object_pairs_hook=OrderedDict) filename = ''.join(['data', '/', entry['filename']]) url = entry['url'][0] if isinstance(entry['url'], list) else entry['url'] hash_provided = (bdbagit.CHECKSUM_ALGOS - set(entry.keys())) != bdbagit.CHECKSUM_ALGOS if not hash_provided: raise ValueError( "A remote file manifest entry did not provide a required hash value: %s" % json.dumps(entry)) add = True for alg in bdbagit.CHECKSUM_ALGOS: if alg in entry: if strict and alg not in algs: add = False if add: bdbagit.make_remote_file_entry( remote_files, filename, url, entry['length'], alg, entry[alg]) finally: rfm_in.close() return remote_files
def generate_remote_files_from_manifest(remote_file_manifest, algs, strict=False): logger.info("Generating remote file references from %s" % remote_file_manifest) remote_files = dict() with open(remote_file_manifest, "r") as rfm_in: line = rfm_in.readline().lstrip() rfm_in.seek(0) is_json_stream = False if line.startswith('{'): fetch = rfm_in is_json_stream = True else: fetch = json.load(rfm_in, object_pairs_hook=OrderedDict) for entry in fetch: if is_json_stream: entry = json.loads(entry, object_pairs_hook=OrderedDict) entry['filename'] = ''.join(['data', '/', entry['filename']]) add = True for alg in bdbagit.CHECKSUM_ALGOS: if alg in entry: if strict and alg not in algs: add = False if add: bdbagit.make_remote_file_entry(remote_files, entry['filename'], entry['url'], entry['length'], alg, entry[alg]) rfm_in.close() return remote_files