Exemplo n.º 1
0
def pubtator_results_for_lex(lex, gene_name=None):
    """ Takes an LVG object ("lex") (metavariant.VariantLVG) and uses each
    variant found in lex.variants to do a search in PubTator for associated PMIDs.

    Returns a dictionary of results mapping hgvs_text to PMIDs found -- i.e.:

        { hgvs_text: {'components': VariantComponents object,
                      'pmids': [<pmids>]
                     }
        }

    :param lex: lexical variant object (metavariant.VariantLVG)
    :return: dictionary of results
    """
    gene_id = None
    if gene_name:
        gene_id = GeneID(gene_name)
    else:
        try:
            gene_name = lex.gene_name
            gene_id = GeneID(lex.gene_name)
        except TypeError:
            # no gene_name? it happens -- but our results will be basically bunk without it.
            return []

    log.info('[%s] %s (Gene ID: %s)', lex.seqvar, gene_name, gene_id)

    results = {}
    for seqtype in lex.variants:
        for seqvar in lex.variants[seqtype].values():

            try:
                result = pubtator_results_for_seqvar(seqvar, gene_id)
                results.update(result)
                try:
                    for row in results['%s' % seqvar]:
                        log.info(
                            '[%s] [[%s]] Mentions: %s  PMID: %s  Components: %s',
                            lex.seqvar, seqvar, row['Mentions'], row['PMID'],
                            row['Components'])
                except Exception as error:
                    print(error)
                    #from IPython import embed; embed()

            except RejectedSeqVar:
                log.debug(
                    '[%s] [[%s]] VariantComponents raised RejectedSeqVar',
                    lex.seqvar, seqvar)

            except PubtatorDBError as error:
                log.info('[%s] [[%s]] %r', lex.seqvar, seqvar, error)

    return results
Exemplo n.º 2
0
def pubtator_lex_to_pmid(lex, gene_name=None):
    """ Takes an LVG object ("lex") (metavariant.VariantLVG) and uses each
    variant found in lex.variants to do a search in PubTator for associated PMIDs.

    Returns a dictionary of results mapping VariantComponents objects to PMIDs found -- i.e.:

        { hgvs_text: {'comp': VariantComponents object,
                      'pmids': [<pmids>]
                     }

    :param lex: lexical variant object (metavariant.VariantLVG)
    :return: dictionary of results
    """
    gene_id = None
    if gene_name:
        gene_id = GeneID(gene_name)
    else:
        try:
            gene_name = lex.gene_name
            gene_id = GeneID(lex.gene_name)
        except TypeError:
            # no gene_name? it happens -- but our results will be basically bunk without it.
            return []

    log.info('[%s] %s (Gene ID: %s)', lex.seqvar, gene_name, gene_id)

    pmids = set()
    for seqtype in lex.variants:
        for seqvar in lex.variants[seqtype].values():
            try:
                components = VariantComponents(seqvar)
            except RejectedSeqVar:
                log.debug('[%s] Rejected sequence variant: %r' %
                          (lex.seqvar, seqvar))
                continue

            log.info('[%s] [[%s]] %s', lex.seqvar, seqvar, components)
            try:
                if seqtype == 'p':
                    results = pubtator_db.search_proteins(components, gene_id)
                else:
                    results = pubtator_db.search_m2p(components, gene_id)
                for res in results:
                    pmids.add(res['PMID'])
            except PubtatorDBError as error:
                log.info('[%s] (%s) %r', lex.seqvar, seqvar, error)

    return list(pmids)
Exemplo n.º 3
0
 def __init__(self, gene_id=None, gene_name=None):
     if gene_id:
         self.gene_id = gene_id
         self.gene_name = GeneName(gene_id)
     elif gene_name:
         self.gene_name = gene_name
         self.gene_id = GeneID(gene_name)
Exemplo n.º 4
0
def search_aminoDBs(gene, achg):
    print('[%s]' % achg)
    comp = VariantComponents(aminochange=achg)
    if not comp:
        print('[%s] INVALID Amino Change' % achg)
        return

    print('[%s] Posedit: %s' % (achg, comp.posedit))
    print('[%s] Slang: %r' % (achg, comp.posedit_slang))

    gene_id = GeneID(gene)
    print('[%s] Gene: %s (ID: %i)' % (achg, gene, gene_id))

    #results = cvdb.search(comp, gene_id, strict=False)
    #print('[%s] Clinvar LOOSE matches: %r' % (achg, results))

    results = cvdb.search(comp, gene_id, strict=True)
    print('[%s] Clinvar STRICT matches: %i' % (achg, len(results)))
    
    for res in results:
        print('[%s]' % achg, res['PMID'], res['HGVS'], res['VariationID'], res['GeneSymbol'], res['Ref'], res['Pos'], res['Alt'])

    results = pubdb.search_proteins(comp, gene_id)
    print('[%s] PubtatorDB matches: %i' % (achg, len(results)))
    for res in results:
        print(res)
Exemplo n.º 5
0
def process_hgvs_through_pubtator(hgvs_text):
    print()
    print('[%s]' % hgvs_text)

    lex = LVG(hgvs_text)

    edittype = VariantComponents(lex.seqvar).edittype
    if edittype not in ['SUB', 'DEL', 'INS', 'FS', 'INDEL']:
        print('[%s] Cannot process edit type %s; skipping' %
              (hgvs_text, edittype))
        return None

    try:
        gene_id = GeneID(lex.gene_name)
    except TypeError:
        # no gene_name? it happens.
        gene_id = None

    print('[%s]' % hgvs_text, lex.gene_name, '(Gene ID: %s)' % gene_id)

    pmids = set()
    for seqtype in lex.variants:
        for seqvar in lex.variants[seqtype]:
            try:
                components = VariantComponents(seqvar)
            except RejectedSeqVar:
                print('[%s] Rejected sequence variant: %r' %
                      (hgvs_text, seqvar))
                continue

            print('[%s]' % hgvs_text, seqtype, components)
            if seqtype == 'p':
                results = pubtator_db.search_proteins(components, gene_id)
            else:
                results = pubtator_db.search_m2p(components, gene_id)

            for res in results:
                pmids.add(res['PMID'])

    return pmids
Exemplo n.º 6
0
def hgvs_to_pmid_results_dict(hgvs_text):
    print()
    print('[%s]' % hgvs_text)

    lex = LVG(hgvs_text)

    edittype = VariantComponents(lex.seqvar).edittype
    if edittype not in ['SUB', 'DEL', 'INS', 'FS', 'INDEL']:
        print('[%s] Cannot process edit type %s; skipping' %
              (hgvs_text, edittype))
        return None

    try:
        gene_id = GeneID(lex.gene_name)
    except TypeError:
        # no gene_name? it happens.
        gene_id = None

    print('[%s]' % hgvs_text, lex.gene_name, '(Gene ID: %s)' % gene_id)

    pmid_results = {}
    pmid_results['PubTator'] = PubtatorHgvs2Pmid(lex)
    pmid_results['ClinVar'] = ClinvarHgvs2Pmid(lex)
    return pmid_results
Exemplo n.º 7
0
 def test_id(self):
     gene = Gene(self.gene_id)
     assert_that(GeneID(self.gene_id), is_(self.gene_id))
     assert_that(GeneName(self.gene_id), is_(self.gene_name))
     assert_that(gene.id, is_(self.gene_id))
     assert_that(gene.name, is_(self.gene_name))