def test_descs(self): geo_id='GSM32100' f=Factory() sample=f.newGEO(geo_id) self.assertIsInstance(sample, Sample) self.assertEqual(sample.geo_id, geo_id) descs=sample.descriptions() self.assertIsInstance(descs, dict)
def test_new_geo(self): f=Factory() pairs={'GSE10072':Series, 'GSM15718':Sample, 'GDS994':Dataset, 'GPL96':Platform } for geo_id, geo_class in pairs.items(): geo=f.newGEO(geo_id) self.assertIsInstance(geo, geo_class) self.assertEqual(geo.geo_id, geo_id)
def test_new_geo(self): f = Factory() pairs = { 'GSE10072': Series, 'GSM15718': Sample, 'GDS994': Dataset, 'GPL96': Platform } for geo_id, geo_class in pairs.items(): geo = f.newGEO(geo_id) self.assertIsInstance(geo, geo_class) self.assertEqual(geo.geo_id, geo_id)
def test_get_field_words(self): geo_id='GSE10072' geo=Factory().newGEO(geo_id) words=Word2Geo.get_field_words(geo) self.assertEqual(len(words['title']), 42) self.assertEqual(len(words['description']), 0) self.assertEqual(len(words['summary']), 738) # not quite sure why this isn't 741
def main(): options=get_options() geo_ids=get_geo_ids(options) f=Factory() warn("insert_geo_words starting: %s" % (datetime.datetime.now().__str__())) fuse=options.fuse for geo_id in geo_ids: geo=f.newGEO(geo_id) warn("inserting %s" % (geo.geo_id)) stats=insert_series(geo) warn("%s: %s" % (geo_id, stats)) fuse-=1 if (fuse==0): break warn("insert_geo_words done: %s" % (datetime.datetime.now().__str__())) return 0
def _geolist2pmidlist(geo_ids): ''' converts a list of mixed pmids and geo_ids to all pmids by doing the lookups on the geo objects ''' pmidlist = [] for id in geo_ids: if re.match('^\d+$', id): pmidlist.append(id) else: try: geo = Factory().newGEO(id) pmids = geo.pubmed_id # might be single value or list, so: except Exception as e: warn("caught %s" % (e)) continue # id not a geo id, or geo didn't have any pubmed_id try: pmidlist.append(pmids) except: pmidlist.extend(pmids) return pmidlist