def test_archiveitems_are_executed_in_right_order(self): from datetime import datetime, timedelta aai1, aai2 = self.aggregator.aggregatorarchiveitem_set.all() aai1.first_workflow_success = datetime.utcnow() - timedelta(hours=1) aai2.first_workflow_success = datetime.utcnow() aai1.save() aai2.save() with patch('webui.scheduler.tasks._aggregator_process_archiveitems') \ as process_archiveitems: process_aggregator.delay(self.aggregator, force=False) archiveitems = tuple(process_archiveitems.call_args[0][0]) self.assertEqual(len(archiveitems), 2) self.assertEqual(archiveitems[0].pk, aai1.archiveitem.pk) self.assertEqual(archiveitems[1].pk, aai2.archiveitem.pk) # double check this: execute again in reverse order aai1.first_workflow_success = datetime.utcnow() aai2.first_workflow_success = datetime.utcnow() - timedelta(hours=1) aai1.save() aai2.save() with patch('webui.scheduler.tasks._aggregator_process_archiveitems') \ as process_archiveitems: process_aggregator.delay(self.aggregator, force=False) archiveitems = tuple(process_archiveitems.call_args[0][0]) self.assertEqual(len(archiveitems), 2) self.assertEqual(archiveitems[0].pk, aai2.archiveitem.pk) self.assertEqual(archiveitems[1].pk, aai1.archiveitem.pk)
def test_data_copied_if_rule_missing(self): Scheduler.objects.all().delete() self.aggregator.silk_rule = '' self.aggregator.save() process_aggregator.delay(self.aggregator, force=True) self.assertEqual(Scheduler.objects.count(), 1) scheduler = Scheduler.objects.get() self.assertEqual( scheduler.status, Scheduler.INCOMPLETE, scheduler.error) for archive_item in self.aggregator.archiveitems.all(): graph_name = archive_item.datagraph_mapped_name query_a = "SELECT * WHERE {GRAPH <%s> {?a ?b ?c}}" \ "ORDER BY ?a ?b ?c" % graph_name query_b = "g.V('name', 'gt:{}').in('source')".format( archive_item.tablename ) result_a = list(self.virtuoso.client_query(query_a).fetchall()) result_b = list(self.virtuoso_master.client_query(query_b)) self.assertEqual(len(result_a), 20) self.assertEqual(len(result_b), 4) for elem_a in result_a: self._assertTitanTriple(elem_a, result_b)
def test_archiveitems_are_executed_again_if_forced(self): process_aggregator.delay(self.aggregator) with patch('webui.scheduler.tasks._aggregator_process_archiveitems') \ as process_archiveitems: process_aggregator.delay(self.aggregator, force=True) archiveitems = tuple(process_archiveitems.call_args[0][0]) self.assertEqual(len(archiveitems), 2)
def test_does_not_crash(self): from webui.controller.models import Aggregator, Source from webui.scheduler.tasks import process_aggregator, process_source osm_source = Source.objects.get(name='OSM (test)') process_source.delay(osm_source) poi_aggregator = Aggregator.objects.get(name='POI') process_aggregator.delay(poi_aggregator) response = self.client.get('/l/slicer/{}/dump/'.format(self.slicer.pk)) self.assertEqual(response.status_code, 200) data = json.loads(''.join(response.streaming_content)) self.assertEqual(len(data['features']), 10)
def test_archiveitems_are_executed_again_if_they_change(self): process_aggregator.delay(self.aggregator) # create fake Schedulers (needed by aggregators) for archiveitem in self.aggregator.archiveitems.all()[:1]: dataset = archiveitem.dataset SchedulerFactory( content_object=dataset ) with patch('webui.scheduler.tasks._aggregator_process_archiveitems') \ as process_archiveitems: process_aggregator.delay(self.aggregator, force=False) archiveitems = tuple(process_archiveitems.call_args[0][0]) self.assertEqual(len(archiveitems), 1) self.assertEqual(archiveitems[0].pk, archiveitem.pk)
def test_silk_executed_correctly(self): Scheduler.objects.all().delete() self.assertEqual( self.virtuoso_master.graph.gremlin.command( "g.V('type', '{0}').out('bristle').count() " "- g.V('type', '{0}').count()".format("sd:BoardGame") ), 0 ) process_aggregator.delay(self.aggregator, force=True) self.assertEqual(Scheduler.objects.count(), 1) scheduler = Scheduler.objects.get() self.assertEqual(scheduler.status, Scheduler.SUCCESS, scheduler.error) for archive_item in self.aggregator.archiveitems.all(): graph_name = archive_item.datagraph_mapped_name query_a = "SELECT * WHERE {GRAPH <%s> {?a ?b ?c}}" \ "ORDER BY ?a ?b ?c" % graph_name query_b = "g.V('name', 'gt:{}').in('source')".format( archive_item.tablename ) result_a = list(self.virtuoso.client_query(query_a).fetchall()) result_b = list(self.virtuoso_master.client_query(query_b)) self.assertEqual(len(result_a), 20) self.assertEqual(len(result_b), 4) for elem_a in result_a: self._assertTitanTriple(elem_a, result_b) self.assertEqual( self.virtuoso_master.graph.gremlin.command( "g.V('type', '{0}').out('bristle').count() " "- g.V('type', '{0}').count()".format("sd:BoardGame") ), 2 ) self.assertEqual( len({ x.eid for x in self.virtuoso_master.client_query( "g.V('sd:name', 'Dominion').in('bristle')" ) }), 1 )
def test_master_namedgraph_is_dropped_before_add(self): self.virtuoso.clear(self.bgg_archiveitem.datagraph_mapped_name) self.virtuoso_master.ingest( self._get_test_file('boardgamegeek-games-mapped.nt', 'scheduler'), graph=self.bgg_archiveitem.datagraph_mapped_name ) self.virtuoso.ingest( self._get_test_file('boardgamegeek-games-mapped-cropped.nt', 'scheduler'), graph=self.bgg_archiveitem.datagraph_mapped_name ) Scheduler.objects.all().delete() self.aggregator.silk_rule = '' self.aggregator.save() process_aggregator.delay(self.aggregator, force=True) self.assertEqual(Scheduler.objects.count(), 1) scheduler = Scheduler.objects.get() self.assertEqual( scheduler.status, Scheduler.INCOMPLETE, scheduler.error) archive_item = self.bgg_archiveitem graph_name = archive_item.datagraph_mapped_name query_a = "SELECT * WHERE {GRAPH <%s> {?a ?b ?c}}" \ "ORDER BY ?a ?b ?c" % graph_name query_b = "g.V('name', 'gt:{}').in('source')".format( archive_item.tablename ) result_a = list(self.virtuoso.client_query(query_a).fetchall()) result_b = list(self.virtuoso_master.client_query(query_b)) self.assertEqual(len(result_a), 10) self.assertEqual(len(result_b), 2) for elem_a in result_a: self._assertTitanTriple(elem_a, result_b)