def fetch_abstr(term, **kwargs): """Query PubMed and return a list of JSON-like PubMed abstract data models.""" # Note: The memory usage is about 1 Mb per 100 PubMed abstracts. abstr_list = [] parser = make_parser() parser.setContentHandler(eFetchResultHandler(abstr_list)) parser.parse(fetch_XML(term, **kwargs)) return abstr_list
def fetch_ids(id_list, **kwargs): """Like fetch_abstr, but query PubMed with a list of PMIDs.""" # Query a limited amount of ids at a time (otherwise the URL # is too long). n = len(id_list) k = MAX_ID_NUMBER_PER_REQUEST id_chunk_list = [ # Looks like ['12345678,12345678,12...', '12345678,123...', ...] ','.join(id_list[i*k:(i+1)*k-1]) for i in range(1+(n-1)/k) ] abstr_list = [] parser = make_parser() parser.setContentHandler(eFetchResultHandler(abstr_list)) for id_chunk in id_chunk_list: parser.parse(eFetch_query(id=id_chunk)) return abstr_list