Exemplo n.º 1
0
    def test_no_merge_on_blank_value(self, Graph):
        blank_cited_as = [
            Publication(identifiers=[WorkIdentifier(1)],
                        agent_relations=[
                            Publisher(cited_as='', agent=Organization(1)),
                        ])
        ]
        initial_cg = ChangeGraph(Graph(*blank_cited_as))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg,
                                     NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 1
        assert models.Organization.objects.count() == 1

        additional_pub = [
            Publication(identifiers=[WorkIdentifier(1)],
                        agent_relations=[
                            Publisher(cited_as='', agent=Organization(1)),
                            Publisher(cited_as='', agent=Organization(2)),
                        ])
        ]

        next_cg = ChangeGraph(Graph(*additional_pub))
        next_cg.process()
        ChangeSet.objects.from_graph(next_cg,
                                     NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 2
        assert models.Organization.objects.count() == 2
Exemplo n.º 2
0
    def test_can_delete_work(self, john_doe, normalized_data_id):
        graph = ChangeGraph([{
            '@id': '_:abc',
            '@type': 'workidentifier',
            'uri': 'http://osf.io/faq',
            'creative_work': {'@id': '_:789', '@type': 'preprint'}
        }, {
            '@id': '_:789',
            '@type': 'preprint',
            'title': 'All About Cats',
        }])

        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)

        preprint, identifier = change_set.accept()

        assert preprint.is_deleted is False

        graph = ChangeGraph([{
            '@id': '_:abc',
            '@type': 'workidentifier',
            'uri': 'http://osf.io/faq',
            'creative_work': {'@id': '_:789', '@type': 'preprint'}
        }, {
            '@id': '_:789',
            'is_deleted': True,
            '@type': 'preprint',
        }])
        graph.process()

        models.ChangeSet.objects.from_graph(graph, normalized_data_id).accept()

        preprint.refresh_from_db()
        assert preprint.is_deleted is True
Exemplo n.º 3
0
    def test_no_timetraveling(self, Graph):
        newer_graph = ChangeGraph(
            Graph(
                Publication(
                    id=1,
                    sparse=True,
                    identifiers=[WorkIdentifier(1)],
                    date_updated='2017-02-03T18:07:53.385000',
                    is_deleted=False,
                )))

        newer_graph.process()
        ChangeSet.objects.from_graph(newer_graph,
                                     NormalizedDataFactory().id).accept()

        older_graph = ChangeGraph(
            Graph(
                Publication(id=1,
                            sparse=True,
                            identifiers=[WorkIdentifier(1)],
                            date_updated='2017-02-03T18:07:50.000000',
                            is_deleted=True,
                            title='Not Previously Changed')))

        older_graph.process()
        assert older_graph.nodes[0].change == {
            'title': 'Not Previously Changed'
        }
Exemplo n.º 4
0
    def test_add_relation_related(self, normalized_data_id):
        '''
        A work exists. Add a second work with a relation to the first work.
        The first work should have the appropriate inverse relation to the
        second work.
        '''

        uri = 'http://osf.io/special-snowflake'
        models.ChangeSet.objects.from_graph(ChangeGraph([{
            '@id': '_:1234',
            '@type': 'article',
            'title': 'All About Cats',
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'article'}
        }]), normalized_data_id).accept()

        assert models.Article.objects.count() == 1

        graph = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'title': 'Dogs are okay too',
            'related_works': [{'@id': '_:foo', '@type': 'cites'}]
        }, {
            '@id': '_:foo',
            '@type': 'cites',
            'subject': {'@id': '_:1234', '@type': 'preprint'},
            'related': {'@id': '_:2345', '@type': 'creativework'},
        }, {
            '@id': '_:2345',
            '@type': 'creativework',
            'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}]
        }, {
            '@id': '_:4567',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:2345', '@type': 'creativework'}
        }])
        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)
        change_set.accept()

        assert models.Article.objects.count() == 1
        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0

        cat = models.Article.objects.first()
        dog = models.Preprint.objects.first()

        assert dog.outgoing_creative_work_relations.count() == 1
        assert dog.outgoing_creative_work_relations.first()._meta.model_name == 'cites'
        assert dog.outgoing_creative_work_relations.first().related == cat
        assert cat.incoming_creative_work_relations.count() == 1
        assert cat.incoming_creative_work_relations.first()._meta.model_name == 'cites'
        assert cat.incoming_creative_work_relations.first().subject == dog
Exemplo n.º 5
0
    def test_no_changes(self, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.discarded_ids.clear()
        cg = ChangeGraph(Graph(*initial))
        cg.process()
        assert ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id) is None
Exemplo n.º 6
0
    def test_change_work_type(self, normalized_data_id):
        '''
        A CreativeWork with an Identifier exists. Accept a new changeset
        with a Preprint with the same Identifier. The preprint should
        disambiguate to the existing work, and the work's type should be
        updated to Preprint
        '''
        title = 'Ambiguous Earthquakes'
        uri = 'http://osf.io/special-snowflake'

        cg = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'project',
            'title': title,
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'project'}
        }])

        cg.process()

        original_change_set = models.ChangeSet.objects.from_graph(cg, normalized_data_id)

        work, identifier = original_change_set.accept()
        id = work.id

        assert identifier.uri == uri
        assert models.Project.objects.count() == 1
        assert models.Preprint.objects.count() == 0
        assert models.CreativeWork.objects.count() == 1
        assert models.Project.objects.all()[0].changes.count() == 1

        cg = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'preprint'}
        }])

        cg.process()
        change_set = models.ChangeSet.objects.from_graph(cg, normalized_data_id)

        change_set.accept()

        assert models.Project.objects.count() == 0
        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.count() == 1
        assert models.Preprint.objects.get(id=id).title == title
        assert models.Preprint.objects.all()[0].changes.count() == 2
Exemplo n.º 7
0
    def test_generic_creative_work(self, normalized_data_id):
        '''
        A Preprint with an Identifier exists. Accept a changeset with a
        CreativeWork with the same Identifier and a different title.
        The Preprint's title should be updated to the new value, but its type
        should remain the same.
        '''
        old_title = 'Ambiguous Earthquakes'
        uri = 'http://osf.io/special-snowflake'

        original_change_set = models.ChangeSet.objects.from_graph(ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'title': old_title,
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'preprint'}
        }]), normalized_data_id)

        preprint, identifier = original_change_set.accept()
        id = preprint.id

        assert identifier.uri == uri
        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0
        assert models.Preprint.objects.get(id=id).title == old_title

        new_title = 'Ambidextrous Earthquakes'

        graph = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'creativework',
            'title': new_title,
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'creativework'}
        }])

        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)
        change_set.accept()

        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0
        assert models.Preprint.objects.get(id=id).title == new_title
Exemplo n.º 8
0
    def test_split_brain(self, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg,
                                     NormalizedDataFactory().id).accept()

        # Multiple matches found for a thing should break
        cg = ChangeGraph(
            Graph(Preprint(identifiers=[WorkIdentifier(1),
                                        WorkIdentifier(2)])))
        with pytest.raises(NotImplementedError) as e:
            cg.process()
        assert e.value.args[
            0] == "Multiple <class 'share.models.creative.Preprint'>s found"
Exemplo n.º 9
0
    def test_accept_subject(self, normalized_data_id):
        models.Subject.objects.bulk_create([
            models.Subject(name='Felines')
        ])

        assert models.Subject.objects.filter(name='Felines').count() == 1

        graph = ChangeGraph([{
            '@id': '_:987',
            '@type': 'subject',
            'name': 'Felines'
        }, {
            '@id': '_:678',
            '@type': 'throughsubjects',
            'subject': {'@id': '_:987', '@type': 'subject'},
            'creative_work': {'@id': '_:789', '@type': 'preprint'},
        }, {
            '@id': '_:789',
            '@type': 'preprint',
            'title': 'All About Cats',
        }])

        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)

        change_set.accept()

        assert models.Preprint.objects.filter(subjects__name='Felines').count() == 1
        assert models.Preprint.objects.filter(subjects__name='Felines').first().title == 'All About Cats'
Exemplo n.º 10
0
    def test_disambiguate(self, input, model, delta, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()
        # Nasty hack to avoid progres' fuzzy counting
        before = model.objects.exclude(change=None).count()

        cg = ChangeGraph(Graph(*input))
        cg.process()
        cs = ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id)
        if cs is not None:
            cs.accept()

        assert (model.objects.exclude(change=None).count() - before) == delta
Exemplo n.º 11
0
    def _apply_changes(self, job, normalized_datum):
        updated = None

        try:
            # Load all relevant ContentTypes in a single query
            ContentType.objects.get_for_models(*apps.get_models('share'), for_concrete_models=False)

            with transaction.atomic():
                cg = ChangeGraph(normalized_datum.data['@graph'], namespace=normalized_datum.source.username)
                cg.process()
                cs = ChangeSet.objects.from_graph(cg, normalized_datum.id)
                if cs and (normalized_datum.source.is_robot or normalized_datum.source.is_trusted or Source.objects.filter(user=normalized_datum.source).exists()):
                    updated = cs.accept()

        # Retry if it was just the wrong place at the wrong time
        except (exceptions.IngestConflict, OperationalError) as e:
            job.retries = (job.retries or 0) + 1
            job.save(update_fields=('retries',))
            if job.retries > self.MAX_RETRIES:
                raise
            job.reschedule()
            return

        if not updated:
            return  # Nothing to index

        # Index works that were added or directly updated
        updated_works = set(x.id for x in updated if isinstance(x, AbstractCreativeWork))
        # and works that matched, even if they didn't change, in case any related objects did
        existing_works = set(n.instance.id for n in cg.nodes if isinstance(n.instance, AbstractCreativeWork))

        return list(updated_works | existing_works)
Exemplo n.º 12
0
def change_node():
    return ChangeGraph([{
        '@id': '_:1234',
        '@type': 'person',
        'given_name': 'No',
        'family_name': 'Matter',
    }]).nodes[0]
Exemplo n.º 13
0
def create_graph():
    return ChangeGraph([{
        '@id': '_:1234',
        '@type': 'person',
        'given_name': 'Jane',
        'family_name': 'Doe',
    }])
Exemplo n.º 14
0
    def test_topological_sort_many_to_many(self):
        graph = ChangeGraph([{
            '@id':
            '_:91011',
            '@type':
            'preprint',
            'contributors': [{
                '@id': '_:5678',
                '@type': 'contributor'
            }]
        }, {
            '@id': '_:5678',
            '@type': 'contributor',
            'agent': {
                '@id': '_:1234',
                '@type': 'person'
            },
            'creative_work': {
                '@id': '_:91011',
                '@type': 'preprint'
            },
        }, {
            '@id': '_:1234',
            '@type': 'person',
            'given_name': 'Doe',
            'family_name': 'Jane',
        }])

        assert len(graph.nodes) == 3
        # assert graph.nodes[0].id == '_:1234'
        # assert graph.nodes[1].id == '_:91011'
        assert graph.nodes[2].id == '_:5678'
Exemplo n.º 15
0
    def test_update_dependencies_accept(self, john_doe, normalized_data_id):
        graph = ChangeGraph([{
            '@id': IDObfuscator.encode(john_doe),
            '@type': 'person',
            'given_name': 'Jane',
        }, {
            '@id': '_:456',
            '@type': 'Creator',
            'agent': {
                '@id': IDObfuscator.encode(john_doe),
                '@type': 'person'
            },
            'creative_work': {
                '@id': '_:789',
                '@type': 'preprint'
            },
        }, {
            '@id': '_:789',
            '@type': 'preprint',
            'title': 'All About Cats',
        }])

        change_set = models.ChangeSet.objects.from_graph(
            graph, normalized_data_id)

        change_set.accept()

        john_doe.refresh_from_db()

        assert john_doe.given_name == 'Jane'
        assert models.Preprint.objects.filter(
            agent_relations__agent=john_doe).count() == 1
        assert models.Preprint.objects.filter(
            agent_relations__agent=john_doe).first().title == 'All About Cats'
Exemplo n.º 16
0
 def test_normalize_workidentifier(self, input, output, Graph):
     graph = ChangeGraph(
         Graph(WorkIdentifier(uri=input, creative_work=None)))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(
         WorkIdentifier(uri=output, parse=True, creative_work=None))
                                  if output else [])
Exemplo n.º 17
0
def create_graph_dependencies():
    return ChangeGraph([{
        '@id': '_:123',
        '@type': 'person',
        'given_name': 'Jane',
        'family_name': 'Doe',
    }, {
        '@id': '_:456',
        '@type': 'Creator',
        'agent': {
            '@id': '_:123',
            '@type': 'person'
        },
        'creative_work': {
            '@id': '_:789',
            '@type': 'preprint'
        },
    }, {
        '@id':
        '_:789',
        '@type':
        'preprint',
        'title':
        'All About Cats',
        'related_agents': [{
            '@id': '_:456',
            '@type': 'Creator'
        }]
    }])
Exemplo n.º 18
0
    def do_run(self, *args, **kwargs):
        # Load all relevant ContentTypes in a single query
        ContentType.objects.get_for_models(*apps.get_models('share'),
                                           for_concrete_models=False)

        logger.info('%s started make JSON patches for NormalizedData %s at %s',
                    self.started_by, self.normalized.id,
                    datetime.datetime.utcnow().isoformat())

        try:
            with transaction.atomic():
                cg = ChangeGraph(self.normalized.data['@graph'],
                                 namespace=self.normalized.source.username)
                cg.process()
                cs = ChangeSet.objects.from_graph(cg, self.normalized.id)
                if cs and (self.source.is_robot or self.source.is_trusted):
                    # TODO: verify change set is not overwriting user created object
                    cs.accept()
        except Exception as e:
            logger.info(
                'Failed make JSON patches for NormalizedData %s with exception %s. Retrying...',
                self.normalized.id, e)
            raise self.retry(countdown=10, exc=e)

        logger.info(
            'Finished make JSON patches for NormalizedData %s by %s at %s',
            self.normalized.id, self.started_by,
            datetime.datetime.utcnow().isoformat())
Exemplo n.º 19
0
    def test_is_blank(self):
        node = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'person',
        }]).nodes[0]

        node._id = '1234'
        assert node.is_blank is False
Exemplo n.º 20
0
    def test_all_disambiguate(self, input, Graph, normalized_data_id):
        graph = ChangeGraph(Graph(*input))
        ChangeSet.objects.from_graph(graph, normalized_data_id).accept()

        assert all(n.instance is None for n in graph.nodes)
        GraphDisambiguator().find_instances(graph)
        assert all(n.instance for n in graph.nodes)
        assert all(n.instance._meta.model_name == n.type for n in graph.nodes)
Exemplo n.º 21
0
    def test_reaccept(self, input, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()  # Force new values to be generated

        first_cg = ChangeGraph(Graph(*input))
        first_cg.process()
        first_cs = ChangeSet.objects.from_graph(first_cg, NormalizedDataFactory().id)
        assert first_cs is not None
        first_cs.accept()

        second_cg = ChangeGraph(Graph(*input))
        second_cg.process()
        second_cs = ChangeSet.objects.from_graph(second_cg, NormalizedDataFactory().id)
        assert second_cs is None
Exemplo n.º 22
0
 def test_normalize_tags_on_work(self, input, output, Graph):
     graph = ChangeGraph(Graph(CreativeWork(tags=input)))
     graph.normalize()
     graph.prune()
     assert [
         n.serialize()
         for n in sorted(graph.nodes, key=lambda x: x.type + str(x.id))
     ] == Graph(CreativeWork(tags=output))
Exemplo n.º 23
0
    def test_single_node(self):
        graph = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'person',
            'given_name': 'Doe',
            'family_name': 'Jane',
        }])

        assert len(graph.nodes) == 1
Exemplo n.º 24
0
 def test_create_extra(self):
     graph = ChangeGraph([], namespace='testing')
     node = graph.create(None, 'tag', {
         'name': 'Foo',
         'extra': {
             'tag': 'Foo'
         }
     })
     assert node.namespace == 'testing'
Exemplo n.º 25
0
    def test_peels_context(self):
        node = ChangeGraph([{
            '@id': '_:5678',
            '@type': 'contributor',
            '@context': {
                'schema': 'www.example.com'
            },
        }]).nodes[0]

        assert node.context == {'schema': 'www.example.com'}
Exemplo n.º 26
0
 def test_unresolved_reference(self):
     with pytest.raises(UnresolvableReference) as e:
         ChangeGraph([{
             '@id': '_:5678',
             '@type': 'contributor',
             'agent': {
                 '@id': '_:1234',
                 '@type': 'person'
             }
         }]).process()
     assert e.value.args == (('_:1234', 'person'), )
Exemplo n.º 27
0
    def test_from_ld(self):
        node = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'person',
            'given_name': 'Doe',
            'family_name': 'Jane',
        }]).nodes[0]

        assert node.id == '_:1234'
        assert node.type == 'person'
        assert node.is_blank is True
        assert node.attrs == {'given_name': 'Doe', 'family_name': 'Jane'}
Exemplo n.º 28
0
    def test_get_data(self, generator, model, route, controlled_values, client, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        cg = ChangeGraph(Graph(*generator))
        cg.process()

        for obj in cg.serialize():
            if obj['@type'] == model:
                expected_id = obj['@id']
                expected = obj
        response = client.get('/api/v2/{}/{}/'.format(route, expected_id))

        actual = json.loads(response.content.decode(encoding='UTF-8'))

        assert response.status_code == 200
        assert actual['data']['id'] == expected_id
        assert actual['data']['attributes']['type'] == expected['@type']
        for value in controlled_values:
            assert actual['data']['attributes'][value] == expected[camelCase_to_underscore(value)]
Exemplo n.º 29
0
    def test_delete_cascade(self, queryset, deltas, Graph):
        initial_cg = ChangeGraph(Graph(*self.initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(
            initial_cg,
            factories.NormalizedDataFactory().id).accept()

        before = {model: model.objects.count() for model in deltas.keys()}

        queryset.delete()

        for model, delta in deltas.items():
            assert model.objects.count() - before[model] == delta
Exemplo n.º 30
0
    def test_extras(self):
        node = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'person',
            'given_name': 'Doe',
            'family_name': 'Jane',
            'extra': {
                'likes': ['cats']
            }
        }]).nodes[0]

        assert 'extra' not in node.attrs
        assert node.extra == {'likes': ['cats']}