Exemplo n.º 1
0
    def search(self):

        if self.database == 'PubMed':
            from Bio import PubMed
            from Bio import GenBank

        searchIds = PubMed.search_for(self.searchTerm, max_ids=self.maxResults)

        GBrecParser = GenBank.FeatureParser()
        ncbiDict = GenBank.NCBIDictionary(self.type,
                                          'genbank',
                                          parser=GBrecParser)

        from Bio import Medline

        MLrecParser = Medline.RecordParser()
        medlineDict = PubMed.Dictionary(delay=1.0, parser=MLrecParser)
        for id in searchIds:
            MLrecord = medlineDict[id]
            GBrecord = ncbiDict[id]
            newDBItem = DBItem(self.project,
                               seq=GBrecord.seq,
                               descript=GBrecord.description,
                               id=id,
                               record=MLrecord)
            self.items[id] = newDBItem
Exemplo n.º 2
0
"""Example script showing how to interact with PubMed."""
# standard library
import string

# biopython
from Bio import PubMed
from Bio import Medline

# do the search and get the ids
search_term = 'orchid'
orchid_ids = PubMed.search_for(search_term)

print orchid_ids

# access Medline through a dictionary interface that returns PubMed Records
rec_parser = Medline.RecordParser()
medline_dict = PubMed.Dictionary(parser=rec_parser)

for id in orchid_ids[0:5]:
    cur_record = medline_dict[id]
    print 'title:', string.rstrip(cur_record.title)
    print 'authors:', cur_record.authors
    print 'source:', string.strip(cur_record.source)
    print
Exemplo n.º 3
0
        elif opt == '-c':
            count_only = 1
        elif opt == '-d':
            try:
                delay = float(arg)
            except ValueError:
                print "Delay must be a floating point value"
                sys.exit(0)
            if delay < 0:
                print "Delay cannot be negative"
                sys.exit(0)
    if help:
        print_usage()
        sys.exit(0)

    print "Doing a PubMed search for %s..." % repr(query)

    ids = PubMed.search_for(query)
    print "Found %d citations" % len(ids)

    if count_only:
        sys.exit(0)

    pm = PubMed.Dictionary(delay=delay)
    for id in ids:
        try:
            print pm[id]
        except KeyError, x:
            print "Couldn't download %s, %s" % (id, x)
        sys.stdout.flush()
Exemplo n.º 4
0
         if reduce(operator.or_, 
                   [x in chunk.lower() for x in useless_lines]): continue
         if len(chunk) < 2: continue
         outfile.write('%010d|%s\n' % (chunkid, chunk))
         chunkmap[fakename].append(chunkid)
         chunkid += 1
 outfile.close()
 print "Saving chunkmap"
 pickle.dump(chunkmap, open(outmapname, "wb"), pickle.HIGHEST_PROTOCOL)
 print "These files couldn't be processed:"
 print '\n'.join(skipped)
 print "Opening (or creating) cache in", sys.argv[2]
 the_cache=StringDBDict(os.path.join(sys.argv[2], DEFAULT_CACHE_NAME),
                        file_mode='c')
 PubMed.download_many([str(x) for x in known_articles if str(x) not in 
                       the_cache.keys()], download_callback,
                      parser=Medline.RecordParser())
 mti_filename=sys.argv[1]+'.mti'
 print "Finished processing the cache. Using the cache to build", \
        mti_filename
 mti_file=open(mti_filename, "w")
 chunkmap={}
 hexfinder=re.compile(r'\\x[a-f0-9][a-f0-9]', re.IGNORECASE)
 for article in known_articles:
     try:
         article_record=the_cache[str(article)]
     except KeyError:
         print "Article doesn't exist in cache. Skipping."
         continue
     if article_record.abstract=='':
         print "Article", article, "has no abstract. Skipping."
Exemplo n.º 5
0
                      help="output format." , action = "store_true"  )

    parser.add_option("-l", "--library", dest="library",
                      help="library to add medline entries to." , metavar="FILE"  )

    parser.set_defaults(
        pmid = None,
        format= "oo",
        clipboard = False,
        library = None,
        )

    (options, args) = Experiment.Start( parser )

    if options.pmid:
        ids = PubMed.search_for(options.pmid)

    outlines = []

    medline_parser = Medline.RecordParser()
    medline_dict = PubMed.Dictionary(parser = medline_parser)
            
    for id in ids:

        this_record = medline_dict[id]

        year = this_record.publication_date.split(" ")[0]

        last_names = map( lambda x: x.split(" ")[0], this_record.authors)

        if options.format == "oo":
Exemplo n.º 6
0
        elif opt == '-c':
            count_only = 1
        elif opt == '-d':
            try:
                delay = float(arg)
            except ValueError:
                print "Delay must be a floating point value"
                sys.exit(0)
            if delay < 0:
                print "Delay cannot be negative"
                sys.exit(0)
    if help:
        print_usage()
        sys.exit(0)

    print "Doing a PubMed search for %s..." % repr(query)
    
    ids = PubMed.search_for(query)
    print "Found %d citations" % len(ids)

    if count_only:
        sys.exit(0)

    pm = PubMed.Dictionary(delay=delay)
    for id in ids:
        try:
            print pm[id]
        except KeyError, x:
            print "Couldn't download %s, %s" % (id, x)
        sys.stdout.flush()
Exemplo n.º 7
0
"""Example script showing how to interact with PubMed."""
# standard library
import string

# biopython
from Bio import PubMed
from Bio import Medline

# do the search and get the ids
search_term = 'orchid'
orchid_ids = PubMed.search_for(search_term)

print orchid_ids

# access Medline through a dictionary interface that returns PubMed Records
rec_parser = Medline.RecordParser()
medline_dict = PubMed.Dictionary(parser = rec_parser)

for id in orchid_ids[0:5]:
    cur_record = medline_dict[id]
    print 'title:', string.rstrip(cur_record.title)
    print 'authors:', cur_record.authors
    print 'source:', string.strip(cur_record.source)
    print