def pubtator_results_for_lex(lex, gene_name=None): """ Takes an LVG object ("lex") (metavariant.VariantLVG) and uses each variant found in lex.variants to do a search in PubTator for associated PMIDs. Returns a dictionary of results mapping hgvs_text to PMIDs found -- i.e.: { hgvs_text: {'components': VariantComponents object, 'pmids': [<pmids>] } } :param lex: lexical variant object (metavariant.VariantLVG) :return: dictionary of results """ gene_id = None if gene_name: gene_id = GeneID(gene_name) else: try: gene_name = lex.gene_name gene_id = GeneID(lex.gene_name) except TypeError: # no gene_name? it happens -- but our results will be basically bunk without it. return [] log.info('[%s] %s (Gene ID: %s)', lex.seqvar, gene_name, gene_id) results = {} for seqtype in lex.variants: for seqvar in lex.variants[seqtype].values(): try: result = pubtator_results_for_seqvar(seqvar, gene_id) results.update(result) try: for row in results['%s' % seqvar]: log.info( '[%s] [[%s]] Mentions: %s PMID: %s Components: %s', lex.seqvar, seqvar, row['Mentions'], row['PMID'], row['Components']) except Exception as error: print(error) #from IPython import embed; embed() except RejectedSeqVar: log.debug( '[%s] [[%s]] VariantComponents raised RejectedSeqVar', lex.seqvar, seqvar) except PubtatorDBError as error: log.info('[%s] [[%s]] %r', lex.seqvar, seqvar, error) return results
def pubtator_lex_to_pmid(lex, gene_name=None): """ Takes an LVG object ("lex") (metavariant.VariantLVG) and uses each variant found in lex.variants to do a search in PubTator for associated PMIDs. Returns a dictionary of results mapping VariantComponents objects to PMIDs found -- i.e.: { hgvs_text: {'comp': VariantComponents object, 'pmids': [<pmids>] } :param lex: lexical variant object (metavariant.VariantLVG) :return: dictionary of results """ gene_id = None if gene_name: gene_id = GeneID(gene_name) else: try: gene_name = lex.gene_name gene_id = GeneID(lex.gene_name) except TypeError: # no gene_name? it happens -- but our results will be basically bunk without it. return [] log.info('[%s] %s (Gene ID: %s)', lex.seqvar, gene_name, gene_id) pmids = set() for seqtype in lex.variants: for seqvar in lex.variants[seqtype].values(): try: components = VariantComponents(seqvar) except RejectedSeqVar: log.debug('[%s] Rejected sequence variant: %r' % (lex.seqvar, seqvar)) continue log.info('[%s] [[%s]] %s', lex.seqvar, seqvar, components) try: if seqtype == 'p': results = pubtator_db.search_proteins(components, gene_id) else: results = pubtator_db.search_m2p(components, gene_id) for res in results: pmids.add(res['PMID']) except PubtatorDBError as error: log.info('[%s] (%s) %r', lex.seqvar, seqvar, error) return list(pmids)
def __init__(self, gene_id=None, gene_name=None): if gene_id: self.gene_id = gene_id self.gene_name = GeneName(gene_id) elif gene_name: self.gene_name = gene_name self.gene_id = GeneID(gene_name)
def search_aminoDBs(gene, achg): print('[%s]' % achg) comp = VariantComponents(aminochange=achg) if not comp: print('[%s] INVALID Amino Change' % achg) return print('[%s] Posedit: %s' % (achg, comp.posedit)) print('[%s] Slang: %r' % (achg, comp.posedit_slang)) gene_id = GeneID(gene) print('[%s] Gene: %s (ID: %i)' % (achg, gene, gene_id)) #results = cvdb.search(comp, gene_id, strict=False) #print('[%s] Clinvar LOOSE matches: %r' % (achg, results)) results = cvdb.search(comp, gene_id, strict=True) print('[%s] Clinvar STRICT matches: %i' % (achg, len(results))) for res in results: print('[%s]' % achg, res['PMID'], res['HGVS'], res['VariationID'], res['GeneSymbol'], res['Ref'], res['Pos'], res['Alt']) results = pubdb.search_proteins(comp, gene_id) print('[%s] PubtatorDB matches: %i' % (achg, len(results))) for res in results: print(res)
def process_hgvs_through_pubtator(hgvs_text): print() print('[%s]' % hgvs_text) lex = LVG(hgvs_text) edittype = VariantComponents(lex.seqvar).edittype if edittype not in ['SUB', 'DEL', 'INS', 'FS', 'INDEL']: print('[%s] Cannot process edit type %s; skipping' % (hgvs_text, edittype)) return None try: gene_id = GeneID(lex.gene_name) except TypeError: # no gene_name? it happens. gene_id = None print('[%s]' % hgvs_text, lex.gene_name, '(Gene ID: %s)' % gene_id) pmids = set() for seqtype in lex.variants: for seqvar in lex.variants[seqtype]: try: components = VariantComponents(seqvar) except RejectedSeqVar: print('[%s] Rejected sequence variant: %r' % (hgvs_text, seqvar)) continue print('[%s]' % hgvs_text, seqtype, components) if seqtype == 'p': results = pubtator_db.search_proteins(components, gene_id) else: results = pubtator_db.search_m2p(components, gene_id) for res in results: pmids.add(res['PMID']) return pmids
def hgvs_to_pmid_results_dict(hgvs_text): print() print('[%s]' % hgvs_text) lex = LVG(hgvs_text) edittype = VariantComponents(lex.seqvar).edittype if edittype not in ['SUB', 'DEL', 'INS', 'FS', 'INDEL']: print('[%s] Cannot process edit type %s; skipping' % (hgvs_text, edittype)) return None try: gene_id = GeneID(lex.gene_name) except TypeError: # no gene_name? it happens. gene_id = None print('[%s]' % hgvs_text, lex.gene_name, '(Gene ID: %s)' % gene_id) pmid_results = {} pmid_results['PubTator'] = PubtatorHgvs2Pmid(lex) pmid_results['ClinVar'] = ClinvarHgvs2Pmid(lex) return pmid_results
def test_id(self): gene = Gene(self.gene_id) assert_that(GeneID(self.gene_id), is_(self.gene_id)) assert_that(GeneName(self.gene_id), is_(self.gene_name)) assert_that(gene.id, is_(self.gene_id)) assert_that(gene.name, is_(self.gene_name))