Exemplo n.º 1
0
def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
    """Upload compiled parts for the given prefix to AWS."""
    if s3_client is None:
        s3_client = boto3.client("s3")

    logger.info("[%s] getting id->name mapping", prefix)
    get_id_name_mapping(prefix)
    id_name_path = prefix_cache_join(prefix, name="names.tsv", version=get_version(prefix))
    if not id_name_path.exists():
        raise FileNotFoundError
    id_name_key = os.path.join(prefix, "cache", "names.tsv")
    logger.info("[%s] uploading id->name mapping", prefix)
    upload_file(path=id_name_path, bucket=bucket, key=id_name_key, s3_client=s3_client)

    logger.info("[%s] getting id->synonyms mapping", prefix)
    get_id_synonyms_mapping(prefix)
    id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=get_version(prefix))
    if not id_synonyms_path.exists():
        raise FileNotFoundError
    id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv")
    logger.info("[%s] uploading id->synonyms mapping", prefix)
    upload_file(path=id_synonyms_path, bucket=bucket, key=id_synonyms_key, s3_client=s3_client)

    logger.info("[%s] getting xrefs", prefix)
    get_xrefs_df(prefix)
    xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=get_version(prefix))
    if not xrefs_path.exists():
        raise FileNotFoundError
    xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv")
    logger.info("[%s] uploading xrefs", prefix)
    upload_file(path=xrefs_path, bucket=bucket, key=xrefs_key, s3_client=s3_client)

    logger.info("[%s] getting relations", prefix)
    get_relations_df(prefix)
    relations_path = prefix_cache_join(prefix, name="relations.tsv", version=get_version(prefix))
    if not relations_path.exists():
        raise FileNotFoundError
    relations_key = os.path.join(prefix, "cache", "relations.tsv")
    logger.info("[%s] uploading relations", prefix)
    upload_file(path=relations_path, bucket=bucket, key=relations_key, s3_client=s3_client)

    logger.info("[%s] getting properties", prefix)
    get_properties_df(prefix)
    properties_path = prefix_cache_join(prefix, name="properties.tsv", version=get_version(prefix))
    if not properties_path.exists():
        raise FileNotFoundError
    properties_key = os.path.join(prefix, "cache", "properties.tsv")
    logger.info("[%s] uploading properties", prefix)
    upload_file(path=properties_path, bucket=bucket, key=properties_key, s3_client=s3_client)

    logger.info("[%s] getting alternative identifiers", prefix)
    get_id_to_alts(prefix)
    alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=get_version(prefix))
    if not alts_path.exists():
        raise FileNotFoundError
    alts_key = os.path.join(prefix, "cache", "alt_ids.tsv")
    logger.info("[%s] uploading alternative identifiers", prefix)
    upload_file(path=alts_path, bucket=bucket, key=alts_key)
Exemplo n.º 2
0
 def test_get_xrefs(self):
     """Test getting xrefs."""
     with chebi_patch:
         df = get_xrefs_df("chebi")
     self.assertIsInstance(df, pd.DataFrame)
     self.assertEqual(["chebi_id", TARGET_PREFIX, TARGET_ID],
                      list(df.columns))
Exemplo n.º 3
0
    def test_get_xrefs(self):
        """Test getting xrefs."""
        df = get_xrefs_df('chebi', url=TEST_CHEBI_OBO_PATH, local=True)
        self.assertIsInstance(df, pd.DataFrame)

        for key, value in df[['source_ns', 'source_id']].values:  # no need for targets since are external
            self.assertFalse(value.startswith(key))
            self.assertFalse(value.lower().startswith(key.lower()), msg=f'Bad value: {value}')
            self.assertFalse(value.startswith(f'{key}:'))
            self.assertFalse(value.lower().startswith(f'{key.lower()}:'))