def _create_harvester(self, config=True): harv = DDIHarvester() harv.config = "{}" harvest_job = HarvestJob() harvest_job.source = HarvestSource() harvest_job.source.title = "Test" harvest_job.source.url = "http://foo" if config: harvest_job.source.config = '' else: harvest_job.source.config = None harvest_job.source.type = "DDI" Session.add(harvest_job) return harv, harvest_job
def test_zzcomplete(self): raise SkipTest('Takes ages, do not run') urllib2.urlopen = realopen harv = DDIHarvester() harv.config = "{}" harvest_job = HarvestJob() harvest_job.source = HarvestSource() harvest_job.source.title = "Test" harvest_job.source.url = "http://www.fsd.uta.fi/fi/aineistot/luettelo/fsd-ddi-records-uris-fi.txt" harvest_job.source.config = '' harvest_job.source.type = "DDI" Session.add(harvest_job) gathered = harv.gather_stage(harvest_job) diffs = [] for gath in gathered: harvest_object = HarvestObject.get(gath) print json.loads(harvest_object.content)['url'] before = datetime.now() harv.fetch_stage(harvest_object) harv.import_stage(harvest_object) diff = datetime.now() - before print diff diffs.append(diff) print sum(diffs, timedelta)