def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None): """Upload compiled parts for the given prefix to AWS.""" if s3_client is None: s3_client = boto3.client("s3") logger.info("[%s] getting id->name mapping", prefix) get_id_name_mapping(prefix) id_name_path = prefix_cache_join(prefix, name="names.tsv", version=get_version(prefix)) if not id_name_path.exists(): raise FileNotFoundError id_name_key = os.path.join(prefix, "cache", "names.tsv") logger.info("[%s] uploading id->name mapping", prefix) upload_file(path=id_name_path, bucket=bucket, key=id_name_key, s3_client=s3_client) logger.info("[%s] getting id->synonyms mapping", prefix) get_id_synonyms_mapping(prefix) id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=get_version(prefix)) if not id_synonyms_path.exists(): raise FileNotFoundError id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv") logger.info("[%s] uploading id->synonyms mapping", prefix) upload_file(path=id_synonyms_path, bucket=bucket, key=id_synonyms_key, s3_client=s3_client) logger.info("[%s] getting xrefs", prefix) get_xrefs_df(prefix) xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=get_version(prefix)) if not xrefs_path.exists(): raise FileNotFoundError xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv") logger.info("[%s] uploading xrefs", prefix) upload_file(path=xrefs_path, bucket=bucket, key=xrefs_key, s3_client=s3_client) logger.info("[%s] getting relations", prefix) get_relations_df(prefix) relations_path = prefix_cache_join(prefix, name="relations.tsv", version=get_version(prefix)) if not relations_path.exists(): raise FileNotFoundError relations_key = os.path.join(prefix, "cache", "relations.tsv") logger.info("[%s] uploading relations", prefix) upload_file(path=relations_path, bucket=bucket, key=relations_key, s3_client=s3_client) logger.info("[%s] getting properties", prefix) get_properties_df(prefix) properties_path = prefix_cache_join(prefix, name="properties.tsv", version=get_version(prefix)) if not properties_path.exists(): raise FileNotFoundError properties_key = os.path.join(prefix, "cache", "properties.tsv") logger.info("[%s] uploading properties", prefix) upload_file(path=properties_path, bucket=bucket, key=properties_key, s3_client=s3_client) logger.info("[%s] getting alternative identifiers", prefix) get_id_to_alts(prefix) alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=get_version(prefix)) if not alts_path.exists(): raise FileNotFoundError alts_key = os.path.join(prefix, "cache", "alt_ids.tsv") logger.info("[%s] uploading alternative identifiers", prefix) upload_file(path=alts_path, bucket=bucket, key=alts_key)
def test_get_xrefs(self): """Test getting xrefs.""" with chebi_patch: df = get_xrefs_df("chebi") self.assertIsInstance(df, pd.DataFrame) self.assertEqual(["chebi_id", TARGET_PREFIX, TARGET_ID], list(df.columns))
def test_get_xrefs(self): """Test getting xrefs.""" df = get_xrefs_df('chebi', url=TEST_CHEBI_OBO_PATH, local=True) self.assertIsInstance(df, pd.DataFrame) for key, value in df[['source_ns', 'source_id']].values: # no need for targets since are external self.assertFalse(value.startswith(key)) self.assertFalse(value.lower().startswith(key.lower()), msg=f'Bad value: {value}') self.assertFalse(value.startswith(f'{key}:')) self.assertFalse(value.lower().startswith(f'{key.lower()}:'))