Пример #1
0
    def test_archiveitems_are_executed_in_right_order(self):
        from datetime import datetime, timedelta

        aai1, aai2 = self.aggregator.aggregatorarchiveitem_set.all()
        aai1.first_workflow_success = datetime.utcnow() - timedelta(hours=1)
        aai2.first_workflow_success = datetime.utcnow()
        aai1.save()
        aai2.save()

        with patch('webui.scheduler.tasks._aggregator_process_archiveitems') \
                as process_archiveitems:
            process_aggregator.delay(self.aggregator, force=False)
            archiveitems = tuple(process_archiveitems.call_args[0][0])

        self.assertEqual(len(archiveitems), 2)
        self.assertEqual(archiveitems[0].pk, aai1.archiveitem.pk)
        self.assertEqual(archiveitems[1].pk, aai2.archiveitem.pk)

        # double check this: execute again in reverse order

        aai1.first_workflow_success = datetime.utcnow()
        aai2.first_workflow_success = datetime.utcnow() - timedelta(hours=1)
        aai1.save()
        aai2.save()

        with patch('webui.scheduler.tasks._aggregator_process_archiveitems') \
                as process_archiveitems:
            process_aggregator.delay(self.aggregator, force=False)
            archiveitems = tuple(process_archiveitems.call_args[0][0])

        self.assertEqual(len(archiveitems), 2)
        self.assertEqual(archiveitems[0].pk, aai2.archiveitem.pk)
        self.assertEqual(archiveitems[1].pk, aai1.archiveitem.pk)
Пример #2
0
    def test_data_copied_if_rule_missing(self):
        Scheduler.objects.all().delete()
        self.aggregator.silk_rule = ''
        self.aggregator.save()

        process_aggregator.delay(self.aggregator, force=True)
        self.assertEqual(Scheduler.objects.count(), 1)
        scheduler = Scheduler.objects.get()
        self.assertEqual(
            scheduler.status, Scheduler.INCOMPLETE, scheduler.error)

        for archive_item in self.aggregator.archiveitems.all():
            graph_name = archive_item.datagraph_mapped_name
            query_a = "SELECT * WHERE {GRAPH <%s> {?a ?b ?c}}" \
                      "ORDER BY ?a ?b ?c" % graph_name
            query_b = "g.V('name', 'gt:{}').in('source')".format(
                archive_item.tablename
            )

            result_a = list(self.virtuoso.client_query(query_a).fetchall())
            result_b = list(self.virtuoso_master.client_query(query_b))

            self.assertEqual(len(result_a), 20)
            self.assertEqual(len(result_b), 4)
            for elem_a in result_a:
                self._assertTitanTriple(elem_a, result_b)
Пример #3
0
    def test_archiveitems_are_executed_again_if_forced(self):
        process_aggregator.delay(self.aggregator)

        with patch('webui.scheduler.tasks._aggregator_process_archiveitems') \
                as process_archiveitems:
            process_aggregator.delay(self.aggregator, force=True)
            archiveitems = tuple(process_archiveitems.call_args[0][0])

        self.assertEqual(len(archiveitems), 2)
Пример #4
0
    def test_does_not_crash(self):
        from webui.controller.models import Aggregator, Source
        from webui.scheduler.tasks import process_aggregator, process_source

        osm_source = Source.objects.get(name='OSM (test)')
        process_source.delay(osm_source)

        poi_aggregator = Aggregator.objects.get(name='POI')
        process_aggregator.delay(poi_aggregator)

        response = self.client.get('/l/slicer/{}/dump/'.format(self.slicer.pk))
        self.assertEqual(response.status_code, 200)

        data = json.loads(''.join(response.streaming_content))

        self.assertEqual(len(data['features']), 10)
Пример #5
0
    def test_does_not_crash(self):
        from webui.controller.models import Aggregator, Source
        from webui.scheduler.tasks import process_aggregator, process_source

        osm_source = Source.objects.get(name='OSM (test)')
        process_source.delay(osm_source)

        poi_aggregator = Aggregator.objects.get(name='POI')
        process_aggregator.delay(poi_aggregator)

        response = self.client.get('/l/slicer/{}/dump/'.format(self.slicer.pk))
        self.assertEqual(response.status_code, 200)

        data = json.loads(''.join(response.streaming_content))

        self.assertEqual(len(data['features']), 10)
Пример #6
0
    def test_archiveitems_are_executed_again_if_they_change(self):
        process_aggregator.delay(self.aggregator)

        # create fake Schedulers (needed by aggregators)
        for archiveitem in self.aggregator.archiveitems.all()[:1]:
            dataset = archiveitem.dataset
            SchedulerFactory(
                content_object=dataset
            )

        with patch('webui.scheduler.tasks._aggregator_process_archiveitems') \
                as process_archiveitems:
            process_aggregator.delay(self.aggregator, force=False)
            archiveitems = tuple(process_archiveitems.call_args[0][0])

        self.assertEqual(len(archiveitems), 1)
        self.assertEqual(archiveitems[0].pk, archiveitem.pk)
Пример #7
0
    def test_silk_executed_correctly(self):
        Scheduler.objects.all().delete()

        self.assertEqual(
            self.virtuoso_master.graph.gremlin.command(
                "g.V('type', '{0}').out('bristle').count() "
                "- g.V('type', '{0}').count()".format("sd:BoardGame")
            ), 0
        )

        process_aggregator.delay(self.aggregator, force=True)
        self.assertEqual(Scheduler.objects.count(), 1)
        scheduler = Scheduler.objects.get()
        self.assertEqual(scheduler.status, Scheduler.SUCCESS, scheduler.error)

        for archive_item in self.aggregator.archiveitems.all():
            graph_name = archive_item.datagraph_mapped_name
            query_a = "SELECT * WHERE {GRAPH <%s> {?a ?b ?c}}" \
                      "ORDER BY ?a ?b ?c" % graph_name
            query_b = "g.V('name', 'gt:{}').in('source')".format(
                archive_item.tablename
            )

            result_a = list(self.virtuoso.client_query(query_a).fetchall())
            result_b = list(self.virtuoso_master.client_query(query_b))

            self.assertEqual(len(result_a), 20)
            self.assertEqual(len(result_b), 4)
            for elem_a in result_a:
                self._assertTitanTriple(elem_a, result_b)

        self.assertEqual(
            self.virtuoso_master.graph.gremlin.command(
                "g.V('type', '{0}').out('bristle').count() "
                "- g.V('type', '{0}').count()".format("sd:BoardGame")
            ), 2
        )

        self.assertEqual(
            len({
                x.eid for x in self.virtuoso_master.client_query(
                    "g.V('sd:name', 'Dominion').in('bristle')"
                )
                }), 1
        )
Пример #8
0
    def test_master_namedgraph_is_dropped_before_add(self):
        self.virtuoso.clear(self.bgg_archiveitem.datagraph_mapped_name)
        self.virtuoso_master.ingest(
            self._get_test_file('boardgamegeek-games-mapped.nt', 'scheduler'),
            graph=self.bgg_archiveitem.datagraph_mapped_name
        )
        self.virtuoso.ingest(
            self._get_test_file('boardgamegeek-games-mapped-cropped.nt',
                                'scheduler'),
            graph=self.bgg_archiveitem.datagraph_mapped_name
        )

        Scheduler.objects.all().delete()
        self.aggregator.silk_rule = ''
        self.aggregator.save()

        process_aggregator.delay(self.aggregator, force=True)
        self.assertEqual(Scheduler.objects.count(), 1)
        scheduler = Scheduler.objects.get()
        self.assertEqual(
            scheduler.status, Scheduler.INCOMPLETE, scheduler.error)

        archive_item = self.bgg_archiveitem
        graph_name = archive_item.datagraph_mapped_name
        query_a = "SELECT * WHERE {GRAPH <%s> {?a ?b ?c}}" \
                  "ORDER BY ?a ?b ?c" % graph_name
        query_b = "g.V('name', 'gt:{}').in('source')".format(
            archive_item.tablename
        )

        result_a = list(self.virtuoso.client_query(query_a).fetchall())
        result_b = list(self.virtuoso_master.client_query(query_b))

        self.assertEqual(len(result_a), 10)
        self.assertEqual(len(result_b), 2)
        for elem_a in result_a:
            self._assertTitanTriple(elem_a, result_b)