def create_filled_repository_from_scratch(self, sources=2): # create a repo filled with some data self.repo.db.metadata.create_all() url = 'file://%s' % os.path.abspath(os.path.join(THIS_DIR, 'data/knaw/list.xml')) source = Source(id=u'knaw', url=url, description='test') self.repo.add_source(source) self.repo.download_biographies(source) url = 'file://%s' % os.path.abspath(os.path.join(THIS_DIR, 'data/knaw2/list.xml')) if sources > 1: source = Source(id=u'knaw2', url=url, description='test') self.repo.add_source(source) self.repo.download_biographies(source) self.repo.db._update_category_table() # also add Bioport source src = Source('bioport', repository=self.repo) self.repo.add_source(src) src.set_quality(10000) def parse_dsn(s): return sqlalchemy.engine.url._parse_rfc1738_args(s) dsn = parse_dsn(DSN) username = dsn.username or "" passwd = dsn.password or "" if not passwd: sh('mysqldump -u %s bioport_test > %s' % (username, SQLDUMP_FILENAME)) else: sh('mysqldump -u %s -p%s bioport_test > %s' % (username, passwd, SQLDUMP_FILENAME)) self._is_filled = True return self.repo
def get_bioport_biography(self, person, create_if_not_exists=True): """get, or if it does not yet exist, create, a biodes document that represents the interventions of the editors in the biographical portal arguments: person - an instance of Person returns: an instance of Biography """ source = BioPortSource() if source.id not in [s.id for s in self.get_sources()]: src = Source('bioport', repository=self) self.add_source(src) src.set_quality(10000) ls = self.get_biographies(source=source, bioport_id=person.get_bioport_id()) ls = list(ls) # turn generator into list if not ls: if create_if_not_exists: # create a new biography return self._create_bioport_biography(person) else: return else: # disabled warning - this is not so bad after all # if len(ls) != 1: # logging.warning( 'There was more than one Bioport Biography found for the person with bioport_id %s' % # person.get_bioport_id()) # if we have more than one biography, we take the one that has the same bioport_id as the person # (if such exists) - otherwise, arbitrarily, the one with the highest id return ls[0] if len(ls) == 1: return ls[0] ls_with_our_bioid = [b for b in ls if person.get_bioport_id() in b.id] if ls_with_our_bioid: if not len(ls_with_our_bioid) == 1: raise Exception() return ls_with_our_bioid[0] else: # ls = [(b.id, b) for b in ls] # ls.sort(reverse=True) #we sort reverse, because that is also how we sort in "get_biographies" # ls = [b for (x, b) in ls] return ls[0]
def test_set_quality(self): i = self.repo sources = i.get_sources() src1 = Source(id=u'test1', url='x', repository=i) i.save(src1) self.assertEqual(src1.quality, 0) src2 = Source(id=u'test2', url='x', repository=i) i.save(src2) self.assertEqual(src2.quality, 0) src3 = Source(id=u'test3', url='x', repository=i) i.save(src3) self.assertEqual(src3.quality, 0) src4 = Source(id=u'test4', url='x', repository=i) i.save(src4) self.assertEqual(src4.quality, 0) # last in is, by default, of lowest quality self.assertEqual(i.get_sources(), sources + [src1, src2, src3, src4]) # we say that src2 should have lowest quality src2.set_quality(0) src1.set_quality(1) src4.set_quality(2) self.assertEqual(src4.quality, 2, [(src.id, src.quality) for src in i.get_sources()]) src3.set_quality(9) self.assertEqual(src3.quality, len(i.get_sources()) - 1)