def fetch_and_preprocess_irods_metadata_by_metadata( search_criteria, irods_zone, issues_dict, reference): """ This function takes some filtering/matching criteria for selecting data from iRODS based on metadata. The client also passes an issues_dict to this function as parameter, which the current function just needs to update with the issues found on the files found in iRODS to match the criteria. :param issues_dict: an existing dictionary of issues, to which this function needs to add the issues found :param irods_zone: the irods zone where to search for the data matching the criteria given :param search_criteria: a dict formed of key= attr name, val = attr value. The operator is by default =. :return: a dict of key: fpath, value: the iRODS metadata for that path """ irods_metadata_by_path = {} try: all_files_metadata_objs_list = iRODSMetadataProvider.retrieve_raw_files_metadata_by_metadata( search_criteria, irods_zone) except Exception as e: print(e) sys.exit(1) else: for raw_metadata in all_files_metadata_objs_list: check_results = [] file_metadata = IrodsSeqFileMetadata.from_raw_metadata( raw_metadata) check_results.extend(file_metadata.check_metadata(reference)) irods_metadata_by_path[raw_metadata.fpath] = file_metadata issues_dict[raw_metadata.fpath].extend(check_results) return irods_metadata_by_path
def fetch_and_preprocess_irods_metadata_by_path(irods_fpaths, issues_dict, reference): """ This function fetches the irods metadata by file path and preprocesses it. It also adds the issues found to the issues_dict given as parameter. :param irods_fpaths: :param issues_dict: :param reference: :return: """ irods_metadata_dict = defaultdict(list) for fpath in irods_fpaths: try: raw_metadata = iRODSMetadataProvider.fetch_raw_file_metadata_by_path( fpath) except Exception as e: print(e) sys.exit(1) else: check_results = [] file_metadata = IrodsSeqFileMetadata.from_raw_metadata( raw_metadata) check_results.extend(file_metadata.check_metadata(reference)) irods_metadata_dict[fpath] = file_metadata issues_dict[fpath].extend(check_results) return irods_metadata_dict
def test_from_raw_metadata_only_replicas(self): replicas = [ baton_models.DataObjectReplica(number=1, checksum="123abc"), baton_models.DataObjectReplica(number=2, checksum="abc"),] raw_metadata = IrodsRawFileMetadata(fpath='/seq/123.bam', file_replicas=replicas) seq_metadata = IrodsSeqFileMetadata.from_raw_metadata(raw_metadata) expected = {'name': set(), 'accession_number': set(), 'internal_id': set()} self.assertEqual(seq_metadata.samples, expected) self.assertEqual(seq_metadata.libraries, expected) self.assertEqual(seq_metadata.checksum_in_meta, set())