def get_gene_name_of_transcript_id(transcript_id: str, ensembl: pyensembl.Genome, raise_on_error: bool = False): """ Extract the gene name (symbol) for this transcript id. The difference between this function and gene_name_of_transcript_id is that this function will (optionally) issue a warning rather than raise an exception for transcript ids not in the database. Parameters ---------- transcript_id: string The transcript identifier (e.g., "ENSMUST00000035194") ensembl: pyensembl.Genome The annotation database raise_on_error: bool Whether to issue a warning (False) or raise a ValueError (True) if the transcript identifier is not in the annotation database Returns ------- gene_name: string The gene name (also called gene symbol, e.g., "Mapkapk3") --- OR --- None, if the transcript id is not in the database of annotations """ gene_name = None try: gene_name = ensembl.gene_name_of_transcript_id(transcript_id) except ValueError as ve: msg = ("[pyensembl_utils.get_gene_name_of_transcript_id]: could not " "find match for transcript id: {}".format(transcript_id)) if raise_on_error: raise ValueError(msg) from ve else: logger.warning(msg) return gene_name
def get_gene_ids_of_transcript_id(transcript_id: str, ensembl: pyensembl.Genome, raise_on_error: bool = False): """ Extract all gene ids associated with the given transcript. Parameters ---------- transcript_id: string The transcript identifier ensembl: pyensembl.Genome The annotations raise_on_error: bool Whether to raise an exception if the transcript id is not found in the annotations database Returns ------- transcript_gene_id_df: pd.DataFrame A dataframe with columns to map between transcripts and genes. Its columns are: transcript_id gene_id """ try: gene_name = ensembl.gene_name_of_transcript_id(transcript_id) gene_ids = ensembl.gene_ids_of_gene_name(gene_name) except ValueError as ve: msg = ("['pyensembl_utils.get_gene_ids_of_transcript_id]: could not " "find transcript id in database: {}".format(transcript_id)) if raise_on_error: raise ValueError(msg) from ve else: logger.warning(msg) return None ret = [{'transcript_id': transcript_id, 'gene_id': g} for g in gene_ids] return ret