Exemplo n.º 1
0
    def test_can_delete_work(self, john_doe, normalized_data_id):
        graph = ChangeGraph([{
            '@id': '_:abc',
            '@type': 'workidentifier',
            'uri': 'http://osf.io/faq',
            'creative_work': {'@id': '_:789', '@type': 'preprint'}
        }, {
            '@id': '_:789',
            '@type': 'preprint',
            'title': 'All About Cats',
        }])

        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)

        preprint, identifier = change_set.accept()

        assert preprint.is_deleted is False

        graph = ChangeGraph([{
            '@id': '_:abc',
            '@type': 'workidentifier',
            'uri': 'http://osf.io/faq',
            'creative_work': {'@id': '_:789', '@type': 'preprint'}
        }, {
            '@id': '_:789',
            'is_deleted': True,
            '@type': 'preprint',
        }])
        graph.process()

        models.ChangeSet.objects.from_graph(graph, normalized_data_id).accept()

        preprint.refresh_from_db()
        assert preprint.is_deleted is True
Exemplo n.º 2
0
    def test_no_timetraveling(self, Graph):
        newer_graph = ChangeGraph(
            Graph(
                Publication(
                    id=1,
                    sparse=True,
                    identifiers=[WorkIdentifier(1)],
                    date_updated='2017-02-03T18:07:53.385000',
                    is_deleted=False,
                )))

        newer_graph.process()
        ChangeSet.objects.from_graph(newer_graph,
                                     NormalizedDataFactory().id).accept()

        older_graph = ChangeGraph(
            Graph(
                Publication(id=1,
                            sparse=True,
                            identifiers=[WorkIdentifier(1)],
                            date_updated='2017-02-03T18:07:50.000000',
                            is_deleted=True,
                            title='Not Previously Changed')))

        older_graph.process()
        assert older_graph.nodes[0].change == {
            'title': 'Not Previously Changed'
        }
Exemplo n.º 3
0
    def test_accept_subject(self, normalized_data_id):
        models.Subject.objects.bulk_create([
            models.Subject(name='Felines')
        ])

        assert models.Subject.objects.filter(name='Felines').count() == 1

        graph = ChangeGraph([{
            '@id': '_:987',
            '@type': 'subject',
            'name': 'Felines'
        }, {
            '@id': '_:678',
            '@type': 'throughsubjects',
            'subject': {'@id': '_:987', '@type': 'subject'},
            'creative_work': {'@id': '_:789', '@type': 'preprint'},
        }, {
            '@id': '_:789',
            '@type': 'preprint',
            'title': 'All About Cats',
        }])

        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)

        change_set.accept()

        assert models.Preprint.objects.filter(subjects__name='Felines').count() == 1
        assert models.Preprint.objects.filter(subjects__name='Felines').first().title == 'All About Cats'
Exemplo n.º 4
0
    def test_no_merge_on_blank_value(self, Graph):
        blank_cited_as = [
            Publication(
                identifiers=[WorkIdentifier(1)],
                agent_relations=[
                    Publisher(cited_as='', agent=Organization(1)),
                ]
            )
        ]
        initial_cg = ChangeGraph(Graph(*blank_cited_as))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 1
        assert models.Organization.objects.count() == 1

        additional_pub = [
            Publication(
                identifiers=[WorkIdentifier(1)],
                agent_relations=[
                    Publisher(cited_as='', agent=Organization(1)),
                    Publisher(cited_as='', agent=Organization(2)),
                ]
            )
        ]

        next_cg = ChangeGraph(Graph(*additional_pub))
        next_cg.process()
        ChangeSet.objects.from_graph(next_cg, NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 2
        assert models.Organization.objects.count() == 2
Exemplo n.º 5
0
    def _apply_changes(self, job, normalized_datum):
        updated = None

        try:
            # Load all relevant ContentTypes in a single query
            ContentType.objects.get_for_models(*apps.get_models('share'), for_concrete_models=False)

            with transaction.atomic():
                cg = ChangeGraph(normalized_datum.data['@graph'], namespace=normalized_datum.source.username)
                cg.process()
                cs = ChangeSet.objects.from_graph(cg, normalized_datum.id)
                if cs and (normalized_datum.source.is_robot or normalized_datum.source.is_trusted or Source.objects.filter(user=normalized_datum.source).exists()):
                    updated = cs.accept()

        # Retry if it was just the wrong place at the wrong time
        except (exceptions.IngestConflict, OperationalError) as e:
            job.retries = (job.retries or 0) + 1
            job.save(update_fields=('retries',))
            if job.retries > self.MAX_RETRIES:
                raise
            job.reschedule()
            return

        if not updated:
            return  # Nothing to index

        # Index works that were added or directly updated
        updated_works = set(x.id for x in updated if isinstance(x, AbstractCreativeWork))
        # and works that matched, even if they didn't change, in case any related objects did
        existing_works = set(n.instance.id for n in cg.nodes if isinstance(n.instance, AbstractCreativeWork))

        return list(updated_works | existing_works)
Exemplo n.º 6
0
    def test_no_merge_on_blank_value(self, Graph):
        blank_cited_as = [
            Publication(identifiers=[WorkIdentifier(1)],
                        agent_relations=[
                            Publisher(cited_as='', agent=Organization(1)),
                        ])
        ]
        initial_cg = ChangeGraph(Graph(*blank_cited_as))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg,
                                     NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 1
        assert models.Organization.objects.count() == 1

        additional_pub = [
            Publication(identifiers=[WorkIdentifier(1)],
                        agent_relations=[
                            Publisher(cited_as='', agent=Organization(1)),
                            Publisher(cited_as='', agent=Organization(2)),
                        ])
        ]

        next_cg = ChangeGraph(Graph(*additional_pub))
        next_cg.process()
        ChangeSet.objects.from_graph(next_cg,
                                     NormalizedDataFactory().id).accept()
        assert models.Publication.objects.count() == 1
        assert models.Publisher.objects.count() == 2
        assert models.Organization.objects.count() == 2
Exemplo n.º 7
0
    def do_run(self, *args, **kwargs):
        # Load all relevant ContentTypes in a single query
        ContentType.objects.get_for_models(*apps.get_models('share'),
                                           for_concrete_models=False)

        logger.info('%s started make JSON patches for NormalizedData %s at %s',
                    self.started_by, self.normalized.id,
                    datetime.datetime.utcnow().isoformat())

        try:
            with transaction.atomic():
                cg = ChangeGraph(self.normalized.data['@graph'],
                                 namespace=self.normalized.source.username)
                cg.process()
                cs = ChangeSet.objects.from_graph(cg, self.normalized.id)
                if cs and (self.source.is_robot or self.source.is_trusted):
                    # TODO: verify change set is not overwriting user created object
                    cs.accept()
        except Exception as e:
            logger.info(
                'Failed make JSON patches for NormalizedData %s with exception %s. Retrying...',
                self.normalized.id, e)
            raise self.retry(countdown=10, exc=e)

        logger.info(
            'Finished make JSON patches for NormalizedData %s by %s at %s',
            self.normalized.id, self.started_by,
            datetime.datetime.utcnow().isoformat())
Exemplo n.º 8
0
    def test_no_timetraveling(self, Graph):
        newer_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-02-03T18:07:53.385000',
                is_deleted=False,
            )
        ))

        newer_graph.process()
        ChangeSet.objects.from_graph(newer_graph, NormalizedDataFactory().id).accept()

        older_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-02-03T18:07:50.000000',
                is_deleted=True,
                title='Not Previously Changed'
            )
        ))

        older_graph.process()
        assert older_graph.nodes[0].change == {'title': 'Not Previously Changed'}
Exemplo n.º 9
0
 def test_normalize_workidentifier(self, input, output, Graph):
     graph = ChangeGraph(
         Graph(WorkIdentifier(uri=input, creative_work=None)))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(
         WorkIdentifier(uri=output, parse=True, creative_work=None))
                                  if output else [])
Exemplo n.º 10
0
    def test_add_relation_related(self, normalized_data_id):
        '''
        A work exists. Add a second work with a relation to the first work.
        The first work should have the appropriate inverse relation to the
        second work.
        '''

        uri = 'http://osf.io/special-snowflake'
        models.ChangeSet.objects.from_graph(ChangeGraph([{
            '@id': '_:1234',
            '@type': 'article',
            'title': 'All About Cats',
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'article'}
        }]), normalized_data_id).accept()

        assert models.Article.objects.count() == 1

        graph = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'title': 'Dogs are okay too',
            'related_works': [{'@id': '_:foo', '@type': 'cites'}]
        }, {
            '@id': '_:foo',
            '@type': 'cites',
            'subject': {'@id': '_:1234', '@type': 'preprint'},
            'related': {'@id': '_:2345', '@type': 'creativework'},
        }, {
            '@id': '_:2345',
            '@type': 'creativework',
            'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}]
        }, {
            '@id': '_:4567',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:2345', '@type': 'creativework'}
        }])
        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)
        change_set.accept()

        assert models.Article.objects.count() == 1
        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0

        cat = models.Article.objects.first()
        dog = models.Preprint.objects.first()

        assert dog.outgoing_creative_work_relations.count() == 1
        assert dog.outgoing_creative_work_relations.first()._meta.model_name == 'cites'
        assert dog.outgoing_creative_work_relations.first().related == cat
        assert cat.incoming_creative_work_relations.count() == 1
        assert cat.incoming_creative_work_relations.first()._meta.model_name == 'cites'
        assert cat.incoming_creative_work_relations.first().subject == dog
Exemplo n.º 11
0
    def test_no_changes(self, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.discarded_ids.clear()
        cg = ChangeGraph(Graph(*initial))
        cg.process()
        assert ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id) is None
Exemplo n.º 12
0
    def test_no_changes(self, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.discarded_ids.clear()
        cg = ChangeGraph(Graph(*initial))
        cg.process()
        assert ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id) is None
Exemplo n.º 13
0
    def test_change_work_type(self, normalized_data_id):
        '''
        A CreativeWork with an Identifier exists. Accept a new changeset
        with a Preprint with the same Identifier. The preprint should
        disambiguate to the existing work, and the work's type should be
        updated to Preprint
        '''
        title = 'Ambiguous Earthquakes'
        uri = 'http://osf.io/special-snowflake'

        cg = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'project',
            'title': title,
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'project'}
        }])

        cg.process()

        original_change_set = models.ChangeSet.objects.from_graph(cg, normalized_data_id)

        work, identifier = original_change_set.accept()
        id = work.id

        assert identifier.uri == uri
        assert models.Project.objects.count() == 1
        assert models.Preprint.objects.count() == 0
        assert models.CreativeWork.objects.count() == 1
        assert models.Project.objects.all()[0].changes.count() == 1

        cg = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'preprint'}
        }])

        cg.process()
        change_set = models.ChangeSet.objects.from_graph(cg, normalized_data_id)

        change_set.accept()

        assert models.Project.objects.count() == 0
        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.count() == 1
        assert models.Preprint.objects.get(id=id).title == title
        assert models.Preprint.objects.all()[0].changes.count() == 2
Exemplo n.º 14
0
    def test_split_brain(self, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        # Multiple matches found for a thing should break
        cg = ChangeGraph(Graph(Preprint(identifiers=[WorkIdentifier(1), WorkIdentifier(2)])))
        with pytest.raises(NotImplementedError) as e:
            cg.process()
        assert e.value.args[0] == "Multiple <class 'share.models.creative.Preprint'>s found"
Exemplo n.º 15
0
    def test_delete_cascade(self, queryset, deltas, Graph):
        initial_cg = ChangeGraph(Graph(*self.initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, factories.NormalizedDataFactory().id).accept()

        before = {model: model.objects.count() for model in deltas.keys()}

        queryset.delete()

        for model, delta in deltas.items():
            assert model.objects.count() - before[model] == delta
Exemplo n.º 16
0
    def test_delete_cascade(self, queryset, deltas, Graph):
        initial_cg = ChangeGraph(Graph(*self.initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(
            initial_cg,
            factories.NormalizedDataFactory().id).accept()

        before = {model: model.objects.count() for model in deltas.keys()}

        queryset.delete()

        for model, delta in deltas.items():
            assert model.objects.count() - before[model] == delta
Exemplo n.º 17
0
    def test_generic_creative_work(self, normalized_data_id):
        '''
        A Preprint with an Identifier exists. Accept a changeset with a
        CreativeWork with the same Identifier and a different title.
        The Preprint's title should be updated to the new value, but its type
        should remain the same.
        '''
        old_title = 'Ambiguous Earthquakes'
        uri = 'http://osf.io/special-snowflake'

        original_change_set = models.ChangeSet.objects.from_graph(ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'title': old_title,
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'preprint'}
        }]), normalized_data_id)

        preprint, identifier = original_change_set.accept()
        id = preprint.id

        assert identifier.uri == uri
        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0
        assert models.Preprint.objects.get(id=id).title == old_title

        new_title = 'Ambidextrous Earthquakes'

        graph = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'creativework',
            'title': new_title,
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'creativework'}
        }])

        graph.process()
        change_set = models.ChangeSet.objects.from_graph(graph, normalized_data_id)
        change_set.accept()

        assert models.Preprint.objects.count() == 1
        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0
        assert models.Preprint.objects.get(id=id).title == new_title
Exemplo n.º 18
0
    def test_split_brain(self, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg,
                                     NormalizedDataFactory().id).accept()

        # Multiple matches found for a thing should break
        cg = ChangeGraph(
            Graph(Preprint(identifiers=[WorkIdentifier(1),
                                        WorkIdentifier(2)])))
        with pytest.raises(NotImplementedError) as e:
            cg.process()
        assert e.value.args[
            0] == "Multiple <class 'share.models.creative.Preprint'>s found"
Exemplo n.º 19
0
    def test_disambiguate(self, input, model, delta, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()
        # Nasty hack to avoid progres' fuzzy counting
        before = model.objects.exclude(change=None).count()

        cg = ChangeGraph(Graph(*input))
        cg.process()
        cs = ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id)
        if cs is not None:
            cs.accept()

        assert (model.objects.exclude(change=None).count() - before) == delta
Exemplo n.º 20
0
    def test_disambiguate(self, input, model, delta, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()
        # Nasty hack to avoid progres' fuzzy counting
        before = model.objects.exclude(change=None).count()

        cg = ChangeGraph(Graph(*input))
        cg.process()
        cs = ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id)
        if cs is not None:
            cs.accept()

        assert (model.objects.exclude(change=None).count() - before) == delta
Exemplo n.º 21
0
    def test_no_timetraveling_many(self, Graph):
        oldest_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                is_deleted=True,
                title='The first title',
                description='The first description',
                identifiers=[WorkIdentifier(1)],
                date_updated='2016-02-03T18:07:50.000000',
            )
        ))

        oldest_graph.process()
        ChangeSet.objects.from_graph(oldest_graph, NormalizedDataFactory().id).accept()

        newer_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                is_deleted=False,
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-02-03T18:07:50.000000',
            )
        ))

        newer_graph.process()
        ChangeSet.objects.from_graph(newer_graph, NormalizedDataFactory().id).accept()

        newest_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                title='The final title',
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-02-03T18:07:53.385000',
            )
        ))

        newest_graph.process()
        ChangeSet.objects.from_graph(newest_graph, NormalizedDataFactory().id).accept()

        older_graph = ChangeGraph(Graph(
            Publication(
                id=1,
                sparse=True,
                is_deleted=True,
                title='The second title',
                description='The final description',
                identifiers=[WorkIdentifier(1)],
                date_updated='2017-01-01T18:00:00.000000',
            )
        ))

        older_graph.process()
        assert older_graph.nodes[0].change == {'description': 'The final description'}
Exemplo n.º 22
0
def disambiguate(self, normalized_id):
    normalized = NormalizedData.objects.select_related('source__source').get(
        pk=normalized_id)

    if self.request.id:
        self.update_state(meta={'source': normalized.source.source.long_title})

    # Load all relevant ContentTypes in a single query
    ContentType.objects.get_for_models(*apps.get_models('share'),
                                       for_concrete_models=False)

    updated = None

    try:
        with transaction.atomic():
            cg = ChangeGraph(normalized.data['@graph'],
                             namespace=normalized.source.username)
            cg.process()
            cs = ChangeSet.objects.from_graph(cg, normalized.id)
            if cs and (normalized.source.is_robot
                       or normalized.source.is_trusted or
                       Source.objects.filter(user=normalized.source).exists()):
                # TODO: verify change set is not overwriting user created object
                updated = cs.accept()
    except Exception as e:
        raise self.retry(
            exc=e,
            countdown=(random.random() + 1) *
            min(settings.CELERY_RETRY_BACKOFF_BASE**self.request.retries,
                60 * 15))

    if not updated:
        return
    # Only index creativeworks on the fly, for the moment.
    updated_works = set(x.id for x in updated
                        if isinstance(x, AbstractCreativeWork))
    existing_works = set(n.instance.id for n in cg.nodes
                         if isinstance(n.instance, AbstractCreativeWork))
    ids = list(updated_works | existing_works)

    try:
        SearchIndexer(self.app).index('creativework', *ids)
    except Exception as e:
        logger.exception('Could not add results from %r to elasticqueue',
                         normalized)
        raise
Exemplo n.º 23
0
    def test_disambiguate(self, input, model_delta, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()
        before_count = {}
        for model in model_delta.keys():
            before_count[model] = model.objects.filter(type=model._meta.label_lower).count()

        cg = ChangeGraph(Graph(*input))
        cg.process()
        cs = ChangeSet.objects.from_graph(cg, NormalizedDataFactory().id)
        if cs is not None:
            cs.accept()

        for model in model_delta.keys():
            assert model.objects.filter(type=model._meta.label_lower).count() - before_count[model] == model_delta[model]
Exemplo n.º 24
0
    def do_run(self, *args, **kwargs):
        # Load all relevant ContentTypes in a single query
        ContentType.objects.get_for_models(*apps.get_models('share'), for_concrete_models=False)

        logger.info('%s started make JSON patches for NormalizedData %s at %s', self.started_by, self.normalized.id, datetime.datetime.utcnow().isoformat())

        try:
            with transaction.atomic():
                cg = ChangeGraph(self.normalized.data['@graph'], namespace=self.normalized.source.username)
                cg.process()
                cs = ChangeSet.objects.from_graph(cg, self.normalized.id)
                if cs and (self.source.is_robot or self.source.is_trusted):
                    # TODO: verify change set is not overwriting user created object
                    cs.accept()
        except Exception as e:
            logger.info('Failed make JSON patches for NormalizedData %s with exception %s. Retrying...', self.normalized.id, e)
            raise self.retry(countdown=10, exc=e)

        logger.info('Finished make JSON patches for NormalizedData %s by %s at %s', self.normalized.id, self.started_by, datetime.datetime.utcnow().isoformat())
Exemplo n.º 25
0
    def test_get_data(self, generator, model, route, controlled_values, client, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process(disambiguate=False)
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        cg = ChangeGraph(Graph(*generator))
        cg.process()

        for obj in cg.serialize():
            if obj['@type'] == model:
                expected_id = obj['@id']
                expected = obj
        response = client.get('/api/v2/{}/{}/'.format(route, expected_id))

        actual = json.loads(response.content.decode(encoding='UTF-8'))

        assert response.status_code == 200
        assert actual['data']['id'] == expected_id
        assert actual['data']['attributes']['type'] == expected['@type']
        for value in controlled_values:
            assert actual['data']['attributes'][value] == expected[camelCase_to_underscore(value)]
Exemplo n.º 26
0
    def test_canonical(self, Graph, first_canonical, second_canonical, change):
        first_source = factories.SourceFactory(canonical=first_canonical)
        second_source = factories.SourceFactory(canonical=second_canonical)

        first_graph = ChangeGraph(Graph(
            Preprint(
                id=1,
                title='The first title',
                identifiers=[WorkIdentifier(1)],
            )),
                                  namespace=first_source.user.username)

        second_graph = ChangeGraph(Graph(
            Article(
                id=1,
                title='The Second Title',
                identifiers=[WorkIdentifier(1)],
            )),
                                   namespace=second_source.user.username)

        first_graph.process()
        (cw, _) = ChangeSet.objects.from_graph(
            first_graph,
            NormalizedDataFactory(source=first_source.user).id).accept()

        assert cw.type == 'share.preprint'
        assert cw.title == 'The first title'

        second_graph.process()
        ChangeSet.objects.from_graph(
            second_graph,
            NormalizedDataFactory(source=second_source.user).id).accept()

        cw = models.AbstractCreativeWork.objects.get(id=cw.id)

        assert second_graph.nodes[0].change == change
        assert cw.type == change.get('type', 'share.preprint')
        assert cw.title == change.get('title', 'The first title')
Exemplo n.º 27
0
    def test_reaccept(self, input, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()  # Force new values to be generated

        first_cg = ChangeGraph(Graph(*input))
        first_cg.process()
        first_cs = ChangeSet.objects.from_graph(first_cg, NormalizedDataFactory().id)
        assert first_cs is not None
        first_cs.accept()

        second_cg = ChangeGraph(Graph(*input))
        second_cg.process()
        second_cs = ChangeSet.objects.from_graph(second_cg, NormalizedDataFactory().id)
        assert second_cs is None
Exemplo n.º 28
0
    def test_reaccept(self, input, Graph):
        initial_cg = ChangeGraph(Graph(*initial))
        initial_cg.process()
        ChangeSet.objects.from_graph(initial_cg, NormalizedDataFactory().id).accept()

        Graph.reseed()  # Force new values to be generated

        first_cg = ChangeGraph(Graph(*input))
        first_cg.process()
        first_cs = ChangeSet.objects.from_graph(first_cg, NormalizedDataFactory().id)
        assert first_cs is not None
        first_cs.accept()

        second_cg = ChangeGraph(Graph(*input))
        second_cg.process()
        second_cs = ChangeSet.objects.from_graph(second_cg, NormalizedDataFactory().id)
        assert second_cs is None
Exemplo n.º 29
0
 def test_normalize_contributor_creator_relation(self, input, output,
                                                 Graph):
     graph = ChangeGraph(Graph(CreativeWork(agent_relations=input)))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(CreativeWork(agent_relations=output))
Exemplo n.º 30
0
 def test_normalize_creativework(self, input, output, Graph):
     graph = ChangeGraph(Graph(CreativeWork(**input)))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(CreativeWork(**output))
Exemplo n.º 31
0
 def test_normalize_organization_institution_name(self, input, output,
                                                  Graph):
     graph = ChangeGraph(Graph(*input))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(*output)
Exemplo n.º 32
0
    def test_normalize_tag(self, input, output, Graph):
        graph = ChangeGraph(Graph(CreativeWork(tags=[input])))
        graph.process(disambiguate=False)

        assert graph.serialize() == Graph(CreativeWork(tags=output))
Exemplo n.º 33
0
 def test_normalize_organization_institution_name(self, input, output, Graph):
     graph = ChangeGraph(Graph(*input))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(*output)
Exemplo n.º 34
0
 def test_normalize_agent(self, input, output, Graph):
     graph = ChangeGraph(Graph(input))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(output) if output else [])
Exemplo n.º 35
0
 def test_normalize_agentidentifier(self, input, output, Graph):
     graph = ChangeGraph(Graph(AgentIdentifier(uri=input, agent=None)))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(AgentIdentifier(uri=output, parse=True, agent=None)) if output else [])
Exemplo n.º 36
0
    def test_add_work_with_existing_relation(self, normalized_data_id):
        '''
        Harvest a work that has a relation to some work identified by a DOI.
        The related work should be a CreativeWork with no information except
        the one Identifier.
        Then harvest a work with the same DOI. It should update the
        CreativeWork's type and attributes instead of creating a new work.
        '''

        uri = 'http://osf.io/special-snowflake'

        models.ChangeSet.objects.from_graph(ChangeGraph([{
            '@id': '_:1234',
            '@type': 'preprint',
            'title': 'Dogs are okay',
            'related_works': [{'@id': '_:foo', '@type': 'cites'}]
        }, {
            '@id': '_:foo',
            '@type': 'cites',
            'subject': {'@id': '_:1234', '@type': 'preprint'},
            'related': {'@id': '_:2345', '@type': 'creativework'},
        }, {
            '@id': '_:2345',
            '@type': 'creativework',
            'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}]
        }, {
            '@id': '_:4567',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:2345', '@type': 'creativework'}
        }]), normalized_data_id).accept()

        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 1
        assert models.Preprint.objects.count() == 1
        assert models.Article.objects.count() == 0

        change = ChangeGraph([{
            '@id': '_:1234',
            '@type': 'article',
            'title': 'All About Cats',
            'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {'@id': '_:1234', '@type': 'article'}
        }])
        change.process()

        models.ChangeSet.objects.from_graph(change, normalized_data_id).accept()

        assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0
        assert models.Article.objects.count() == 1
        assert models.Preprint.objects.count() == 1

        cat = models.Article.objects.first()
        dog = models.Preprint.objects.first()

        assert dog.outgoing_creative_work_relations.count() == 1
        assert dog.outgoing_creative_work_relations.first()._meta.model_name == 'cites'
        assert dog.outgoing_creative_work_relations.first().related == cat
        assert cat.incoming_creative_work_relations.count() == 1
        assert cat.incoming_creative_work_relations.first()._meta.model_name == 'cites'
        assert cat.incoming_creative_work_relations.first().subject == dog
Exemplo n.º 37
0
 def test_normalize_workidentifier(self, input, output, Graph):
     graph = ChangeGraph(Graph(WorkIdentifier(uri=input, creative_work=None)))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(WorkIdentifier(uri=output, parse=True, creative_work=None)) if output else [])
Exemplo n.º 38
0
 def from_graph(self, graph, disambiguate=False):
     nd = NormalizedData.objects.create(data=graph, source=share_source)
     cg = ChangeGraph(graph['@graph'])
     cg.process(disambiguate=disambiguate)
     return ChangeSet.objects.from_graph(cg, nd.pk)
Exemplo n.º 39
0
 def test_normalize_agentworkrelation(self, input, output, Graph):
     graph = ChangeGraph(Graph(input))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(output)
Exemplo n.º 40
0
 def test_normalize_agent(self, input, output, Graph):
     graph = ChangeGraph(Graph(input))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(output) if output else [])
Exemplo n.º 41
0
    def test_normalize_tag(self, input, output, Graph):
        graph = ChangeGraph(Graph(CreativeWork(tags=[input])))
        graph.process(disambiguate=False)

        assert graph.serialize() == Graph(CreativeWork(tags=output))
Exemplo n.º 42
0
 def test_normalize_agentworkrelation(self, input, output, Graph):
     graph = ChangeGraph(Graph(input))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(output)
Exemplo n.º 43
0
    def test_add_work_with_existing_relation(self, normalized_data_id):
        '''
        Harvest a work that has a relation to some work identified by a DOI.
        The related work should be a CreativeWork with no information except
        the one Identifier.
        Then harvest a work with the same DOI. It should update the
        CreativeWork's type and attributes instead of creating a new work.
        '''

        uri = 'http://osf.io/special-snowflake'

        models.ChangeSet.objects.from_graph(
            ChangeGraph([{
                '@id': '_:1234',
                '@type': 'preprint',
                'title': 'Dogs are okay',
                'related_works': [{
                    '@id': '_:foo',
                    '@type': 'cites'
                }]
            }, {
                '@id': '_:foo',
                '@type': 'cites',
                'subject': {
                    '@id': '_:1234',
                    '@type': 'preprint'
                },
                'related': {
                    '@id': '_:2345',
                    '@type': 'creativework'
                },
            }, {
                '@id':
                '_:2345',
                '@type':
                'creativework',
                'identifiers': [{
                    '@id': '_:4567',
                    '@type': 'workidentifier'
                }]
            }, {
                '@id': '_:4567',
                '@type': 'workidentifier',
                'uri': uri,
                'creative_work': {
                    '@id': '_:2345',
                    '@type': 'creativework'
                }
            }]), normalized_data_id).accept()

        assert models.CreativeWork.objects.filter(
            type='share.creativework').count() == 1
        assert models.Preprint.objects.count() == 1
        assert models.Article.objects.count() == 0

        change = ChangeGraph([{
            '@id':
            '_:1234',
            '@type':
            'article',
            'title':
            'All About Cats',
            'identifiers': [{
                '@id': '_:2345',
                '@type': 'workidentifier'
            }]
        }, {
            '@id': '_:2345',
            '@type': 'workidentifier',
            'uri': uri,
            'creative_work': {
                '@id': '_:1234',
                '@type': 'article'
            }
        }])
        change.process()

        models.ChangeSet.objects.from_graph(change,
                                            normalized_data_id).accept()

        assert models.CreativeWork.objects.filter(
            type='share.creativework').count() == 0
        assert models.Article.objects.count() == 1
        assert models.Preprint.objects.count() == 1

        cat = models.Article.objects.first()
        dog = models.Preprint.objects.first()

        assert dog.outgoing_creative_work_relations.count() == 1
        assert dog.outgoing_creative_work_relations.first(
        )._meta.model_name == 'cites'
        assert dog.outgoing_creative_work_relations.first().related == cat
        assert cat.incoming_creative_work_relations.count() == 1
        assert cat.incoming_creative_work_relations.first(
        )._meta.model_name == 'cites'
        assert cat.incoming_creative_work_relations.first().subject == dog
Exemplo n.º 44
0
def accept_changes(context, nodes, username):
    user = models.ShareUser.objects.get(username=username)
    cg = ChangeGraph(nodes, namespace=user.username)
    cg.process()
    nd = NormalizedDataFactory(source=user)
    return ChangeSet.objects.from_graph(cg, nd.id).accept()
Exemplo n.º 45
0
 def test_normalize_contributor_creator_relation(self, input, output, Graph):
     graph = ChangeGraph(Graph(CreativeWork(agent_relations=input)))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(CreativeWork(agent_relations=output))
Exemplo n.º 46
0
 def test_normalize_person_relation(self, input, output, Graph):
     graph = ChangeGraph(Graph(*input))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(*output)
Exemplo n.º 47
0
    def test_no_timetraveling_many(self, Graph):
        oldest_graph = ChangeGraph(
            Graph(
                Publication(
                    id=1,
                    sparse=True,
                    is_deleted=True,
                    title='The first title',
                    description='The first description',
                    identifiers=[WorkIdentifier(1)],
                    date_updated='2016-02-03T18:07:50.000000',
                )))

        oldest_graph.process()
        ChangeSet.objects.from_graph(oldest_graph,
                                     NormalizedDataFactory().id).accept()

        newer_graph = ChangeGraph(
            Graph(
                Publication(
                    id=1,
                    sparse=True,
                    is_deleted=False,
                    identifiers=[WorkIdentifier(1)],
                    date_updated='2017-02-03T18:07:50.000000',
                )))

        newer_graph.process()
        ChangeSet.objects.from_graph(newer_graph,
                                     NormalizedDataFactory().id).accept()

        newest_graph = ChangeGraph(
            Graph(
                Publication(
                    id=1,
                    sparse=True,
                    title='The final title',
                    identifiers=[WorkIdentifier(1)],
                    date_updated='2017-02-03T18:07:53.385000',
                )))

        newest_graph.process()
        ChangeSet.objects.from_graph(newest_graph,
                                     NormalizedDataFactory().id).accept()

        older_graph = ChangeGraph(
            Graph(
                Publication(
                    id=1,
                    sparse=True,
                    is_deleted=True,
                    title='The second title',
                    description='The final description',
                    identifiers=[WorkIdentifier(1)],
                    date_updated='2017-01-01T18:00:00.000000',
                )))

        older_graph.process()
        assert older_graph.nodes[0].change == {
            'description': 'The final description'
        }
Exemplo n.º 48
0
 def test_normalize_agentidentifier(self, input, output, Graph):
     graph = ChangeGraph(Graph(AgentIdentifier(uri=input, agent=None)))
     graph.process(disambiguate=False)
     assert graph.serialize() == (Graph(
         AgentIdentifier(uri=output, parse=True, agent=None))
                                  if output else [])
Exemplo n.º 49
0
 def test_normalize_person_relation(self, input, output, Graph):
     graph = ChangeGraph(Graph(*input))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(*output)
Exemplo n.º 50
0
 def from_graph(self, graph, disambiguate=False):
     nd = NormalizedData.objects.create(data=graph, source=share_user)
     cg = ChangeGraph(graph['@graph'])
     cg.process(disambiguate=disambiguate)
     return ChangeSet.objects.from_graph(cg, nd.pk)
Exemplo n.º 51
0
 def test_normalize_creativework(self, input, output, Graph):
     graph = ChangeGraph(Graph(CreativeWork(**input)))
     graph.process(disambiguate=False)
     assert graph.serialize() == Graph(CreativeWork(**output))