Пример #1
0
 def test1(self):
     self.setup()
     # To retrieve IDs and relvancy scores from pubmed for PMID 9298984
     # to the pubmed database:
     #   dbfrom=pubmed&id=9298984&cmd=neighbor
     results = self.client.from_dbids(EUtils.DBIds(
         "pubmed", "9298984")).neighbor_links()
Пример #2
0
 def test2(self):
     self.setup()
     # To retrieve IDs from nucleotide for GI 18250303, 18250301, 18250299
     # to protein:
     #    dbfrom=nucleotide&db=protein&id=18250303,18250307
     records = self.client.from_dbids(
         EUtils.DBIds("nucleotide", ["18250303", "18250301", "18250299"]))
     neighbors = records.neighbor_links("protein")
Пример #3
0
    def testRelatedSequences(self):
        # Get all protein sequences similar to 4579714 (bacteriorhodopsin)
        # in FASTA format
        client = HistoryClient.HistoryClient(eutils=picklestore.client())

        result = client.post(EUtils.DBIds("protein", "4579714"))
        related = result.neighbor_links("protein")
        related_dbids = related.linksetdbs["protein_protein"].dbids

        proteins = client.post(related_dbids)
        fasta_infile = proteins.efetch(retmode="text", rettype="fasta")
        fasta = fasta_infile.read()
        print fasta[:1000]
Пример #4
0
 def test3(self):
     self.setup()
     # To retrieve pubmed related articles for PMIDs 11812492 11774222
     # with a publication date from 1995 to the present:
     #   dbfrom=pubmed&id=11812492,11774222&db=pubmed&mindate=1995&datetype=pdat
     # NOTE: this example is wrong -- it need the current date too!
     records = self.client.from_dbids(
         EUtils.DBIds("pubmed", ["11812492", "11774222"]))
     # Can't actually use this since that would cause the regression
     # tests to fail.
     today = time.strftime("%Y/%m/%d")
     today = "2003/01/12"
     related = records.neighbor_links(
         daterange=EUtils.DateRange("1995", today, "pdat"))
Пример #5
0
    def testRelatedItems(self):
        # Get proteins similar to 4579714 (bacteriorhodopsin) which
        # were published in 2002
        client = DBIdsClient.DBIdsClient(eutils=picklestore.client())
        results = client.from_dbids(EUtils.DBIds("protein", "4579714"))

        neighbors = results.neighbor_links("protein",
                                           daterange=EUtils.DateRange(
                                               "2002/01/01", "2002/12/31",
                                               "pdat"))

        dbids = neighbors.linksetdbs["protein_protein"].dbids

        print
        print len(
            dbids), "sequences similar to GI:4579714 were published in 2002"
        print "The record identifiers are:", ", ".join(map(str, dbids))
        print client.from_dbids(dbids).efetch(retmode="text",
                                              rettype="summary").read()
Пример #6
0
    def testSeqFetch(self):
        entrez = HistoryClient.HistoryClient(eutils=picklestore.client())
        dbids = EUtils.DBIds("nucleotide", "8886082")
        result = entrez.post(dbids)
        lines = result.efetch(retmode="text", rettype="gb").readlines()
        _trim_lines(lines)
        self.assertEquals(lines[-2], """\
     2761 ttcctagata aacac
""")
        lines = result.efetch(retmode="text",
                              rettype="gb",
                              seq_start=1,
                              seq_stop=60).readlines()
        _trim_lines(lines)
        self.assertEquals(
            lines[-2], """\
        1 ttcagtttat ttggacggaa gaatggtggc tcaattattg acgacttgtg gcctaaattt
""")
        lines = result.efetch(retmode="text",
                              rettype="gb",
                              seq_start=3,
                              seq_stop=40,
                              strand=EUtils.PLUS_STRAND).readlines()
        _trim_lines(lines)
        self.assertEquals(
            lines[-2], """\
        1 cagtttattt ggacggaaga atggtggctc aattattg
""")
        lines = result.efetch(retmode="text",
                              rettype="gb",
                              seq_start=3,
                              seq_stop=40,
                              strand=EUtils.MINUS_STRAND).readlines()
        _trim_lines(lines)
        self.assertEquals(
            lines[-2], """\
        1 caataattga gccaccattc ttccgtccaa ataaactg
""")
Пример #7
0
    def testForNewItems(self):
        # See if there are Halobacterium salinarum hits

        known_dbids = EUtils.DBIds(
            'protein',
            [
                #            '461608', '133739', '121859', '121858', '114808',
                '26399547',
                '21362599',
                '20807968',
                '18310162',
                '10954592',
                '7674074',
                '3913880',
                '3913879',
                '3913877',
                '1350915',
                '462366',
                '462365',
                '461784',
                '119171',
                '21617825',
                '18202987',
                '14423949',
                '13878670',
                '13431821',
                '13124532',
                '12230579',
                '7531150',
                '6094165',
                '1352354',
                '1350833',
                '1169376',
                '140149',
                '133039',
                '133038',
                '132873',
                '132841',
                '114547',
                '114516',
                '18138454',
                '11385314',
                '11385312',
                '11385311',
                '11383975',
                '11383843',
                '11383705',
                '11383685',
                '11383631',
                '11383600',
                '11383544',
                '11383521',
                '11383449',
                '11383413',
                '11362758',
                '11362757',
                '11362756',
                '11362633',
                '11361208',
                '11361204',
                '11361203',
                '11361202',
                '11360968',
                '11360864',
                '11360863',
                '11360531',
                '11291672',
                '11290031',
                '11283208',
                '11281612',
                '11280741',
                '11280740',
                '11280737',
                '11280563',
                '11280281',
                '11280223',
                '11279456',
                '11277771',
                '11277752',
                '11277727',
                '11277613',
                '11277612',
                '11277611',
                '11277609',
                '11277608',
                '11277607',
                '11277606',
                '11277605',
                '11277604',
                '11277603',
                '11277602',
                '11277601',
                '11277597',
                '11277160',
                '11277109',
                '11277100',
                '11271301',
                '11270360',
                '11261072',
                '11260624',
                '11255573',
                '9972746',
                '7443396',
                '7440307',
                '7427602',
                '2129406',
                '2129405',
                '2129404',
                '2117914',
                '1363464',
                '1363463',
                '1084274',
                '1076149',
                '1076148',
                '1076147',
                '629399',
                '629398',
                '629396',
                '629394',
                '487075',
                '487074',
                '486684',
                '477959',
                '477726',
                '477027',
                '421715',
                '421714',
                '282664',
                '282663',
                '282662',
                '282661',
                '282660',
                '282659',
                '282658',
                '282657',
                '282656',
                '282655',
                '282654',
                '282653',
                '282652',
                '281164',
                '281162',
                '281161',
                '281160',
                '281159',
                '281158',
                '281157',
                '280348',
                '99214',
                '99212',
                '99211',
                '99210',
                '99209',
                '99207',
                '99206',
                '99205',
                '99203',
                '99202',
                '99201',
                '99200',
                '99198',
                '99195',
                '99192',
                '99191',
                '99190',
                '99188',
                '99187',
                '99185',
                '99183',
                '81076',
                '81071',
                '81070',
                '81069',
                '81067',
                '81065',
                '81063',
                '81062',
                '81061',
                '81060',
                '81058',
                '81057',
                '81055',
                '81054',
                '81052',
                '81051',
                '81047',
                '81045',
                '81043',
                '81042',
                '81041',
                '81040',
                '81039',
                '81038',
                '81037',
                '81036',
                '81035',
                '81034',
                '81032',
                '81031',
                '81029',
                '81026',
                '81023',
                '81022',
                '81020',
                '81019',
                '81018',
                '81017',
                '81016',
                '81015',
                '81013',
                '81012',
                '81011',
                '76372',
                '76329',
                '76324',
                '72632',
                '71284',
                '71211',
                '71179',
                '71146',
                '71063',
                '66382',
                '21264507',
                '18000395',
                '7531067',
                '132751',
                '117568',
                '14916722',
                '133992',
                '3122880',
                '114811',
                '24158915',
                '24158914',
                '24158913',
                '10640268',
                '3913878',
                '3023997',
                '544287',
                '19909691',
                '19909603',
                '3183098',
                '141254',
                '2492920',
                '809698',
                '43513',
                '18144841',
                '4467437',
                '4467436',
                '6226495',
                '3122803',
                '3122663',
                '2499389',
                '2494605',
                '1350834',
                '548760',
                '141356',
                '141353',
                '140804',
                '139955',
                '134663',
                '134662',
                '133462',
                '133078',
                '132785',
                '132646',
                '120245',
                '120232',
                '114812',
                '20873485',
                '20873480',
                '17942995',
                '17942994',
                '17942993',
                '20516540',
                '20151159',
                '20150922',
                '20150921',
                '12644298',
                '12644014',
                '120249',
                '10120917',
                '13124533',
                '133398',
                '11132078',
                '6225334',
                '1168858',
                '549772',
                '140827',
                '140825',
                '120239',
                '114838',
                '16974947',
                '16974946',
                '3219755',
                '347255',
                '8569313',
                '4929918',
                '899266',
                '10121032',
                '1621047',
                '14278674',
                '14278519',
                '11071687',
                '11071686',
                '11071685',
                '11071684',
                '6729723',
                '6729722',
                '6435624',
                '6435623',
                '3745775',
                '12655906',
                '12655904',
                '12655903',
                '6435626',
                '6435625',
                '11992133',
                '8918496',
                '10121037',
                '10121036',
                '10121033',
                '10120892',
                '10120851',
                '10120850',
                '2851428',
                '1710568',
                '1710498',
                '548772',
                '548769',
                '133031',
                '132995',
                '132985',
                '132879',
                '132784',
                '132750',
                '132728',
                '132708',
                '132639',
                '120251',
                '7327959',
                '7107278',
                '6435593',
                '455306',
                '455305',
                '455304',
                '455303',
                '455302',
                '455301',
                '455300',
                '455299',
                '455298',
                '455297',
                '150418',
                '150417',
                '6435594',
                '6172231',
                '6172230',
                '6172229',
                '6172228',
                '6172227',
                '6137453',
                '130153',
                '118549',
                '2499384',
                '2499383',
                '461612',
                '461611',
                '461610',
                '5822280',
                '285806',
                '4930169',
                '2425186',
                '2425185',
                '2425184',
                '2425183',
                '2425182',
                '2425181',
                '2425180',
                '2425179',
                '2425178',
                '2425177',
                '2425176',
                '2425175',
                '2425174',
                '3659953',
                '3659944',
                '4469246',
                '4001706',
                '4001704',
                '4388967',
                '4378986',
                '4377598',
                '809702',
                '809701',
                '43668',
                '43667',
                '43666',
                '43665',
                '43663',
                '43662',
                '43661',
                '43660',
                '43658',
                '43491',
                '4322492',
                '4104487',
                '4104485',
                '4104483',
                '3928158',
                '43508',
                '1633466',
                '2072795',
                '1527138',
                '1527137',
                '1353676',
                '3015619',
                '598123',
                '515085',
                '493889',
                '493888',
                '229726',
                '2351849',
                '2351848',
                '1154790',
                '1154789',
                '1154788',
                '1154787',
                '1154786',
                '1154785',
                '1154784',
                '1154783',
                '1154782',
                '1154781',
                '1154780',
                '1154779',
                '1154778',
                '1154777',
                '2190417',
                '2190416',
                '984742',
                '984741',
                '984740',
                '984739',
                '2760612',
                '2648028',
                '1070346',
                '1070345',
                '1070344',
                '2209068',
                '1487875',
                '1199752',
                '1199750',
                '509675',
                '509674',
                '285817',
                '285810',
                '285808',
                '216709',
                '43455',
                '43454',
                '43453',
                '43452',
                '43451',
                '43450',
                '807110',
                '1654427',
                '1654425',
                '1654423',
                '1654421',
                '1654419',
                '225948',
                '1583108',
                '1583107',
                '1094422',
                '448273',
                '226716',
                '226715',
                '350080',
                '226309',
                '225904',
                '225428',
                '1483625',
                '1235894',
                '1435134',
                '1435132',
                '1435131',
                '1435129',
                '994803',
                '994802',
                '226310',
                '223370',
                '223077',
                '223076',
                '223063',
                '1333716',
                '671101',
                '671100',
                '550341',
                '311841',
                '297410',
                '49046',
                '43656',
                '43655',
                '43654',
                '43653',
                '43641',
                '43640',
                '43559',
                '43557',
                '43552',
                '43551',
                '43550',
                '43548',
                '43546',
                '43545',
                '43544',
                '43543',
                '43542',
                '43541',
                '43540',
                '43539',
                '43537',
                '43535',
                '43534',
                '43533',
                '43531',
                '43530',
                '43526',
                '43525',
                '43524',
                '43523',
                '43522',
                '43521',
                '43520',
                '43519',
                '43518',
                '43517',
                '43511',
                '43510',
                '43505',
                '43504',
                '43503',
                '43501',
                '43499',
                '43498',
                '43496',
                '43495',
                '43493',
                '517390',
                '305353',
                '305352',
                '148816',
                '148814',
                '148812',
                '148794',
                '148793',
                '148792',
                '148768',
                '148767',
                '148766',
                '148764',
                '148763',
                '148759',
                '148757',
                '148753',
                '148749',
                '148747',
                '148745',
                '305350'
            ])

        # Upload to the server
        client = HistoryClient.HistoryClient(eutils=picklestore.client())

        old_records = client.post(known_dbids)

        # Now see if there's anything new
        new_records = client.search("Halobacterium salinarum BUTNOT #%s" %
                                    (old_records.query_key, ),
                                    db="protein")
        print
        print "There are", len(
            new_records), "new Halobacterium salinarum records"
        new_dbids = new_records.dbids
        print "The record identifiers are:", ", ".join(map(str, new_dbids))
        print new_records.efetch(retmode="text", rettype="summary").read()

        # These should exist, since I commented them out above :)
        for x in ('461608', '133739', '121859', '121858', '114808'):
            assert x in new_dbids, "Cannot find expected record %r" % x
Пример #8
0
def post(ids, entrez_db="protein"):
    "Make a request to NCBI, yield each response separately"
    result = client().post(EUtils.DBIds("protein", ids))
    tree = ET.parse(result.efetch())
    for seq_node in tree.findall('.//GBSeq'):
        yield seq_node
Пример #9
0
 def setup(self):
     self.entrez = HistoryClient.HistoryClient(eutils=picklestore.client(1))
     self.result1 = self.entrez.post(EUtils.DBIds("protein", "4579714"))
     self.result2 = self.entrez.post(
         EUtils.DBIds("nucleotide", ["18250303", "18250301", "18250299"]))
Пример #10
0
    def testClient(self):
        eutils = picklestore.client()
        infile = eutils.esearch("Dalke",
                                field="au",
                                daterange=EUtils.DateRange("1995", "1998"),
                                retstart=1,
                                retmax=5,
                                usehistory=1,
                                webenv=None)
        s = infile.read()
        try:
            counts = map(int, re.findall(r"<Count>(-?\d+)</Count>", s))
            assert len(counts) == 4, counts
            assert counts[0] == 10
            assert counts[1] >= 30
            assert counts[2] == -1
            assert counts[3] == -1

            assert s.find("<RetMax>5</RetMax>") != -1
            assert s.find("<RetStart>1</RetStart>") != -1
            ids = re.findall(r"<Id>(\d+)</Id>", s)
            assert len(ids) == 5, ids

            terms = re.findall(r"<Term>([^<]+)</Term>", s)
            assert len(terms) == 3
            assert terms[0] == "Dalke[Author]"
            assert terms[1] == "1995[EDAT]"
            assert terms[2] == "1998[EDAT]"

            query_key1 = re.findall(r"<QueryKey>(\d+)</QueryKey>", s)[0]
            assert query_key1 == "1", query_key1  # always true?

            quoted_webenv = re.findall("<WebEnv>([^>]+)</WebEnv>", s)[0]
            webenv = urllib.unquote(quoted_webenv)

        except:
            print "ERROR!"
            print s
            raise

        try:
            # Can I refetch those same Ids using the history?
            t = ""
            t = eutils.efetch_using_history(db="pubmed",
                                            webenv=webenv,
                                            query_key=query_key1,
                                            retstart=1,
                                            retmax=5,
                                            retmode="text",
                                            rettype="uilist").read()
            new_ids = t.split()
            assert ids == new_ids, (ids, new_ids)  # Must be in same order too!
        except:
            print "ERROR!"
            print s
            print " -- and --"
            print t
            raise

        # Make sure I'm getting the same XML summary through history and id
        sum1 = sum2 = None
        try:
            sum1 = eutils.esummary_using_history(db="pubmed",
                                                 webenv=webenv,
                                                 query_key=query_key1,
                                                 retstart=1,
                                                 retmax=1).read()
            sum2 = eutils.esummary_using_dbids(
                dbids=EUtils.DBIds("pubmed", [ids[0]])).read()
            assert sum1 == sum2
        except:
            print "Summary 1"
            print sum1
            print "-----------------"
            print "Summary 2"
            print sum2
            raise

        # Make sure I'm getting the same XML version of the records
        rec1 = rec2 = None
        try:
            rec1 = eutils.efetch_using_history(db="pubmed",
                                               webenv=webenv,
                                               query_key=query_key1,
                                               retmode="xml",
                                               retstart=1,
                                               retmax=1).read()
            rec2 = eutils.efetch_using_dbids(dbids=EUtils.DBIds(
                "pubmed", [ids[0]]),
                                             retmode="xml").read()
            assert rec1 == rec2
        except:
            print "Record 1"
            print rec1
            print "-----------------"
            print "Record 2"
            print rec2
            raise

        # Post a few GIs (from the protein database) to the server
        # This appends to the existing history so should be query_key #2.
        post_ids = ["914034", "5263173", "1769808", "1060883"]
        infile = eutils.epost(EUtils.DBIds("protein", post_ids), webenv=webenv)
        post_results = infile.read()
        try:
            query_key2 = re.findall(r"<QueryKey>(\d+)</QueryKey>",
                                    post_results)[0]
            assert query_key2 == "2"

            quoted_webenv = re.findall("<WebEnv>([^>]+)</WebEnv>",
                                       post_results)[0]
            webenv = urllib.unquote(quoted_webenv)
        except:
            print "ERROR"
            print post_results
            raise

        # Verify that the posted ids are correct
        posted_ids = eutils.efetch_using_history(
            db="pubmed",
            webenv=webenv,
            query_key=query_key2,
            retstart=0,
            retmax=len(post_ids),
            retmode="text",
            rettype="uilist").read().split()
        x1 = posted_ids[:]  # Make copies since I need the correct
        x1.sort()  # order for getting the FASTA version, below
        x2 = post_ids[:]
        x2.sort()
        assert x1 == x2, (post_ids, posted_ids)

        # Now fetch them as FASTA format
        fasta1 = fasta2 = None
        try:
            fasta1 = eutils.efetch_using_history(db="protein",
                                                 webenv=webenv,
                                                 query_key=query_key2,
                                                 retstart=0,
                                                 retmax=len(post_ids),
                                                 retmode="text",
                                                 rettype="fasta").read()
            fasta2 = eutils.efetch_using_dbids(dbids=EUtils.DBIds(
                "protein", posted_ids),
                                               retmode="text",
                                               rettype="fasta").read()
            assert fasta1 == fasta2
        except:
            print "ERROR FASTA1"
            print fasta1
            print "ERROR FASTA2"
            print fasta2
            raise

        # It's much harder to test the ELink capabilities.

        # Get the VMD paper
        results = None
        try:
            results = eutils.esearch(
                "Humphrey W. AND Dalke A. AND Schulten K. AND VMD[Title]",
                field="au").read()
            # There should only be one match
            ids = re.findall(r"<Id>(\d+)</Id>", results)
            assert ids == ["8744570"]
        except:
            print "Error"
            print results
            raise

        # Look at the related publications and we should find
        # my Tcl paper, which is 9390282
        links = None
        try:
            links = eutils.elink_using_dbids(EUtils.DBIds("pubmed", ids),
                                             cmd="neighbor").read()

            # remember, the first id comes from the <Id> in <IdList>
            related_ids = re.findall(r"<Id>(\d+)</Id>", links)[1:]
            assert "9390282" in related_ids
        except:
            print "Error"
            print links
            raise

        # Get the taxonomy record for the "posted_ids".
        # NOTE: This test original compared the 2nd element from
        # that list, but elink_using_history doesn't support the
        # retstart/retmax parameters.
        #
        # This comes from query_key2 in the history.  Do it both ways
        # to compare results.
        link1 = link2 = None
        try:
            link1 = eutils.elink_using_dbids(EUtils.DBIds(
                "protein", posted_ids),
                                             db="taxonomy",
                                             cmd="neighbor").read()
            link2 = eutils.elink_using_history(dbfrom="protein",
                                               webenv=webenv,
                                               query_key=query_key2,
                                               db="taxonomy",
                                               cmd="neighbor").read()
            assert link1 == link2
            taxids = re.findall(r"<Id>(\d+)</Id>", link1)[len(posted_ids):]
            assert taxids == ["43776", "29282", "28442", "2237"], taxids
        except:
            print "Error",
            print link1
            print "----------------"
            print link2
            raise

        # See if there are linkouts 914034
        # Should be at least one, to DART 3240.
        llinks = None
        try:
            llinks = eutils.elink_using_dbids(EUtils.DBIds(
                "protein", [posted_ids[1]]),
                                              cmd="llinks").read()
            assert llinks.find("<ObjUrl>") != -1
            assert "3240" in re.findall("<Id>(\d+)</Id>", llinks)
        except:
            print "ERROR"
            print llinks
            raise

        # Finally, check that I can limit the seach to an Entrez query string
        # I'm using the example
        #   "retrieve MEDLINE indexed only related articles for PMID 12242737"
        #   elink.fcgi?dbfrom=pubmed&id=12242737&db=pubmed&term=medline[sb]
        full = restricted = None
        try:
            full = eutils.elink_using_dbids(
                EUtils.DBIds("pubmed", ["12242737"])).read()
            restricted = eutils.elink_using_dbids(EUtils.DBIds(
                "pubmed", ["12242737"]),
                                                  term="medline[sb]").read()
            counts1 = full.count("<Link>")
            counts2 = restricted.count("<Link>")
            assert counts1 > counts2
        except:
            print "ERROR"
            print full
            print "---------"
            print restricted
            raise