def run_new_query(self, prior_query): # Create a new search search = PubMedSearch(term=self.search.search_string_text) search.get_ids_count() if search.id_count > self.MAX_QUERY_SIZE: raise TooManyPubMedResults( "Too many PubMed references found: {0}; reduce query scope to " "fewer than {1}".format(search.id_count, self.MAX_QUERY_SIZE)) search.get_ids() results = {"ids": search.ids, "added": search.ids, "removed": []} if prior_query: old_ids_list = json.loads(prior_query.results)['ids'] changes = search.get_changes_from_previous_search(old_ids_list=old_ids_list) results['added'] = list(changes['added']) results['removed'] = list(changes['removed']) self.results = json.dumps(results) self.save() self.create_identifiers() return results
def run_new_query(self, prior_query): # Create a new search search = PubMedSearch(term=self.search.search_string_text) search.get_ids_count() query_size_limit = 5000 if search.id_count > query_size_limit: raise TooManyPubMedResults( "Too many PubMed references found: {0}; reduce query scope to fewer than {1}" .format(search.id_count, query_size_limit)) search.get_ids() results = {"ids": search.ids, "added": search.ids, "removed": None} if prior_query: old_ids_list = json.loads(prior_query.results)['ids'] changes = search.get_changes_from_previous_search( old_ids_list=old_ids_list) results['added'] = list(changes['added']) results['removed'] = list(changes['removed']) self.results = json.dumps(results) self.save() self.create_identifiers() return results
class PubMedSearchTest(TestCase): """ Make sure that a PubMed search with returns the expected number of IDS, and that all IDs are identical to what were expected. Example from the PubMed quickstart guide here: http://www.ncbi.nlm.nih.gov/books/NBK25500/ """ def setUp(self): self.term = "science[journal] AND breast cancer AND 2008[pdat]" self.results_list = [ 19008416, 18927361, 18787170, 18487186, 18239126, 18239125 ] def test_standard_query(self): self.search = PubMedSearch(term=self.term) self.search.get_ids_count() self.search.get_ids() self.assertEqual(self.search.request_count, 1) self._results_check() def test_multiquery(self): self.search = PubMedSearch(term=self.term, retmax=3) self.search.get_ids_count() self.search.get_ids() self.assertEqual(self.search.request_count, 2) self._results_check() def test_changes_from_previous_search(self): self.search = PubMedSearch(term=self.term) self.search.get_ids_count() self.search.get_ids() old_ids_list = [999999, 19008416, 18927361, 18787170, 18487186] changes = self.search.get_changes_from_previous_search( old_ids_list=old_ids_list) self.assertEqual(changes['added'], set([18239126, 18239125])) self.assertEqual(changes['removed'], set([999999])) def test_complex_query(self): """ Make sure that we can send a very complicated search term and the results that at least some results are returned. This is commonly done when using MeSH search terms in PubMed. """ self.term = """(monomethyl OR MEP OR mono-n-butyl OR MBP OR mono (3-carboxypropyl) OR mcpp OR monobenzyl OR mbzp OR mono-isobutyl OR mibp OR mono (2-ethylhexyl) OR mono (2-ethyl-5-oxohexyl) OR meoph OR mono (2-ethyl-5-carboxypentyl) OR mecpp OR mepp OR mono (2-ethyl-5-hydroxyhexyl) OR mehp OR mono (2-ethyl-5-oxyhexyl) OR mono (2-ethyl-4-hydroxyhexyl) OR mono (2-ethyl-4-oxyhexyl) OR mono (2-carboxymethyl) OR mmhp OR mehp OR dehp OR 2-ethylhexanol OR (phthalic acid)) AND (liver OR hepato* OR hepat*) AND ((cell proliferation) OR (cell growth) OR (dna replication) OR (dna synthesis) OR (replicative dna synthesis) OR mitosis OR (cell division) OR (growth response) OR hyperplasia OR hepatomegaly) AND (mouse OR rat OR hamster OR rodent OR murine OR Mus musculus or Rattus)""" self.search = PubMedSearch(term=self.term) self.search.get_ids_count() self.search.get_ids() self.assertTrue(self.search.ids >= 212) def _results_check(self): self.assertEqual(self.search.id_count, 6) self.assertListEqual(self.search.ids, self.results_list)
class PubMedSearchTest(TestCase): """ Make sure that a PubMed search with returns the expected number of IDS, and that all IDs are identical to what were expected. Example from the PubMed quickstart guide here: http://www.ncbi.nlm.nih.gov/books/NBK25500/ """ def setUp(self): self.term = "science[journal] AND breast cancer AND 2008[pdat]" self.results_list = [19008416, 18927361, 18787170, 18487186, 18239126, 18239125] def test_standard_query(self): self.search = PubMedSearch(term=self.term) self.search.get_ids_count() self.search.get_ids() self.assertEqual(self.search.request_count, 1) self._results_check() def test_multiquery(self): self.search = PubMedSearch(term=self.term, retmax=3) self.search.get_ids_count() self.search.get_ids() self.assertEqual(self.search.request_count, 2) self._results_check() def test_changes_from_previous_search(self): self.search = PubMedSearch(term=self.term) self.search.get_ids_count() self.search.get_ids() old_ids_list = [999999, 19008416, 18927361, 18787170, 18487186] changes = self.search.get_changes_from_previous_search(old_ids_list=old_ids_list) self.assertEqual(changes['added'], set([18239126, 18239125])) self.assertEqual(changes['removed'], set([999999])) def test_complex_query(self): """ Make sure that we can send a very complicated search term and the results that at least some results are returned. This is commonly done when using MeSH search terms in PubMed. """ self.term = """(monomethyl OR MEP OR mono-n-butyl OR MBP OR mono (3-carboxypropyl) OR mcpp OR monobenzyl OR mbzp OR mono-isobutyl OR mibp OR mono (2-ethylhexyl) OR mono (2-ethyl-5-oxohexyl) OR meoph OR mono (2-ethyl-5-carboxypentyl) OR mecpp OR mepp OR mono (2-ethyl-5-hydroxyhexyl) OR mehp OR mono (2-ethyl-5-oxyhexyl) OR mono (2-ethyl-4-hydroxyhexyl) OR mono (2-ethyl-4-oxyhexyl) OR mono (2-carboxymethyl) OR mmhp OR mehp OR dehp OR 2-ethylhexanol OR (phthalic acid)) AND (liver OR hepato* OR hepat*) AND ((cell proliferation) OR (cell growth) OR (dna replication) OR (dna synthesis) OR (replicative dna synthesis) OR mitosis OR (cell division) OR (growth response) OR hyperplasia OR hepatomegaly) AND (mouse OR rat OR hamster OR rodent OR murine OR Mus musculus or Rattus)""" self.search = PubMedSearch(term=self.term) self.search.get_ids_count() self.search.get_ids() self.assertTrue(self.search.ids >= 212) def _results_check(self): self.assertEqual(self.search.id_count, 6) self.assertListEqual(self.search.ids, self.results_list)