def test_infinite_regulate(self): reg = Regulator() reg._default_steps.graph_steps = (InfiniteGraphStep(),) graph = MutableGraph() graph.add_node(None, 'agent', {'name': 'Agent Agent'}) with pytest.raises(InfiniteRegulationError): reg.regulate(graph)
def test_blocks(self, schemes, authorities, expected_identifiers): identifiers = [ # (uri, scheme, authority) ('http://osf.io/mst3k/', 'http', 'osf.io'), ('mailto:[email protected]', 'mailto', 'example.com'), ('2049-3630', 'urn', 'issn'), ('0000-0002-1825-0097', 'http', 'orcid.org'), ] step = NormalizeIRIs(blocked_schemes=schemes, blocked_authorities=authorities) graph = MutableGraph() for uri, scheme, authority in identifiers: node = graph.add_node('id_{}'.format(authority), 'workidentifier', {'uri': uri}) assert node['scheme'] is None assert node['host'] is None step.regulate_node(node) if scheme not in schemes and authority not in authorities: assert node['scheme'] == scheme assert node['host'] == authority assert len(graph.filter_type('workidentifier')) == expected_identifiers
def test_calls_run(self, mock_steps, num_nodes): graph = MutableGraph() for i in range(num_nodes): graph.add_node(i, 'creativework') Regulator(regulator_config={'not': 'empty'}).regulate(graph) assert all(s.run.call_count == 1 for st in mock_steps.values() for s in st)
def test_infinite_regulate(self): reg = Regulator() reg._default_steps.graph_steps = (InfiniteGraphStep(), ) graph = MutableGraph() graph.add_node(None, 'agent', {'name': 'Agent Agent'}) with pytest.raises(InfiniteRegulationError): reg.regulate(graph)
def test_can_delete_work(self, john_doe, normalized_data): graph = MutableGraph.from_jsonld([{ '@id': '_:abc', '@type': 'workidentifier', 'uri': 'http://osf.io/faq', 'creative_work': {'@id': '_:789', '@type': 'preprint'} }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() preprint, identifier = change_set.accept() assert preprint.is_deleted is False graph = MutableGraph.from_jsonld([{ '@id': '_:abc', '@type': 'workidentifier', 'uri': 'http://osf.io/faq', 'creative_work': {'@id': '_:789', '@type': 'preprint'} }, { '@id': '_:789', 'is_deleted': True, '@type': 'preprint', }]) ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set().accept() preprint.refresh_from_db() assert preprint.is_deleted is True
def test_normalize_agentidentifier(self, input, output): graph = MutableGraph() node = graph.add_node('1', 'agentidentifier', uri=input) NormalizeIRIs().regulate_node(node) if output: assert node['uri'] == output else: assert len(graph) == 0
def test_normalize_agentidentifier(self, input, output): graph = MutableGraph() node = graph.add_node('1', 'agentidentifier', {'uri': input}) NormalizeIRIs().regulate_node(node) if output: assert node['uri'] == output else: assert len(graph) == 0
def test_add_relation_related(self, normalized_data): ''' A work exists. Add a second work with a relation to the first work. The first work should have the appropriate inverse relation to the second work. ''' uri = 'http://osf.io/special-snowflake' ChangeSetBuilder(MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'article', 'title': 'All About Cats', 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'article'} }]), normalized_data).build_change_set().accept() assert models.Article.objects.count() == 1 graph = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay too', 'related_works': [{'@id': '_:foo', '@type': 'cites'}] }, { '@id': '_:foo', '@type': 'cites', 'subject': {'@id': '_:1234', '@type': 'preprint'}, 'related': {'@id': '_:2345', '@type': 'creativework'}, }, { '@id': '_:2345', '@type': 'creativework', 'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}] }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:2345', '@type': 'creativework'} }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Article.objects.count() == 1 assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0 cat = models.Article.objects.first() dog = models.Preprint.objects.first() assert dog.outgoing_creative_work_relations.count() == 1 assert dog.outgoing_creative_work_relations.first()._meta.model_name == 'cites' assert dog.outgoing_creative_work_relations.first().related == cat assert cat.incoming_creative_work_relations.count() == 1 assert cat.incoming_creative_work_relations.first()._meta.model_name == 'cites' assert cat.incoming_creative_work_relations.first().subject == dog
def test_normalize_workidentifier(self, input, output): graph = MutableGraph() node = graph.add_node('1', 'workidentifier', {'uri': input}) step = NormalizeIRIs(blocked_schemes=['mailto'], blocked_authorities=['issn', 'orcid.org']) step.regulate_node(node) if output: assert node['uri'] == output else: assert len(graph) == 0
def test_calls_steps(self, steps, num_nodes): graph = MutableGraph() for i in range(num_nodes): graph.add_node(i, 'creativework') Regulator().regulate(graph) assert all(m.should_regulate.call_count == num_nodes for m in steps['node']) assert all(m.regulate_node.call_count == num_nodes for m in steps['node']) assert all(m.regulate_graph.call_count == 1 for m in steps['graph']) assert all(m.validate_graph.call_count == 1 for m in steps['validation'])
def test_normalize_workidentifier(self, input, output): graph = MutableGraph() node = graph.add_node('1', 'workidentifier', uri=input) step = NormalizeIRIs(blocked_schemes=['mailto'], blocked_authorities=['issn', 'orcid.org']) step.regulate_node(node) if output: assert node['uri'] == output else: assert len(graph) == 0
def test_change_work_type(self, normalized_data): ''' A CreativeWork with an Identifier exists. Accept a new changeset with a Preprint with the same Identifier. The preprint should disambiguate to the existing work, and the work's type should be updated to Preprint ''' title = 'Ambiguous Earthquakes' uri = 'http://osf.io/special-snowflake' cg = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'project', 'title': title, 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'project'} }]) original_change_set = ChangeSetBuilder(cg, normalized_data, disambiguate=True).build_change_set() work, identifier = original_change_set.accept() id = work.id assert identifier.uri == uri assert models.Project.objects.count() == 1 assert models.Preprint.objects.count() == 0 assert models.CreativeWork.objects.count() == 1 assert models.Project.objects.all()[0].changes.count() == 1 cg = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'preprint'} }]) change_set = ChangeSetBuilder(cg, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Project.objects.count() == 0 assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.count() == 1 assert models.Preprint.objects.get(id=id).title == title assert models.Preprint.objects.all()[0].changes.count() == 2
def test_generic_creative_work(self, normalized_data): ''' A Preprint with an Identifier exists. Accept a changeset with a CreativeWork with the same Identifier and a different title. The Preprint's title should be updated to the new value, but its type should remain the same. ''' old_title = 'Ambiguous Earthquakes' uri = 'http://osf.io/special-snowflake' original_change_set = ChangeSetBuilder(MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': old_title, 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'preprint'} }]), normalized_data).build_change_set() preprint, identifier = original_change_set.accept() id = preprint.id assert identifier.uri == uri assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0 assert models.Preprint.objects.get(id=id).title == old_title new_title = 'Ambidextrous Earthquakes' graph = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'creativework', 'title': new_title, 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'creativework'} }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0 assert models.Preprint.objects.get(id=id).title == new_title
def change_node(): return next(n for n in MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'person', 'given_name': 'No', 'family_name': 'Matter', }]))
def create_graph(): return MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'person', 'given_name': 'Jane', 'family_name': 'Doe', }])
def test_update_dependencies_accept(self, john_doe, normalized_data): john_doe_id = IDObfuscator.encode(john_doe) graph = MutableGraph.from_jsonld([{ '@id': john_doe_id, '@type': 'person', 'given_name': 'Jane', }, { '@id': '_:456', '@type': 'Creator', 'agent': {'@id': john_doe_id, '@type': 'person'}, 'creative_work': {'@id': '_:789', '@type': 'preprint'}, }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', }]) change_set = ChangeSetBuilder(graph, normalized_data, matches={ john_doe_id: john_doe, }).build_change_set() change_set.accept() john_doe.refresh_from_db() assert john_doe.given_name == 'Jane' assert models.Preprint.objects.filter(agent_relations__agent=john_doe).count() == 1 assert models.Preprint.objects.filter(agent_relations__agent=john_doe).first().title == 'All About Cats'
def transform(self, datum, **kwargs): """Transform a RawDatum Args: datum: RawDatum to transform **kwargs: Forwared to do_transform. Overrides values in the source config's transformer_kwargs Returns a MutableGraph """ source_id = None if not isinstance(datum, (str, bytes)): source_id = datum.suid.identifier datum = datum.datum if isinstance(datum, bytes): datum = datum.decode() jsonld, root_ref = self.do_transform(datum, **self._get_kwargs(**kwargs)) if not jsonld: return None if source_id and jsonld and root_ref: self.add_source_identifier(source_id, jsonld, root_ref) # TODO return a MutableGraph from do_transform, maybe build it directly in Parser? return MutableGraph.from_jsonld(jsonld)
def _consume_job(self, job, superfluous, force, apply_changes=True, index=True, urgent=False): datum = None # Check whether we've already done transform/regulate if not superfluous: datum = job.ingested_normalized_data.order_by('-created_at').first() if superfluous or datum is None: graph = self._transform(job) if not graph: return graph = self._regulate(job, graph) if not graph: return datum = NormalizedData.objects.create( data={'@graph': graph.to_jsonld()}, source=job.suid.source_config.source.user, raw=job.raw, ) job.ingested_normalized_data.add(datum) else: graph = MutableGraph.from_jsonld(datum.data) if apply_changes: updated_work_ids = self._apply_changes(job, graph, datum) if index and updated_work_ids: self._update_index(updated_work_ids, urgent)
def accept_changes(context, nodes, username): user = models.ShareUser.objects.get(username=username) graph = MutableGraph.from_jsonld(nodes) Regulator().regulate(graph) nd = NormalizedDataFactory(source=user) change_set = ChangeSetBuilder(graph, nd, disambiguate=True).build_change_set() return change_set.accept() if change_set else None
def create_graph_dependencies(): return MutableGraph.from_jsonld([{ '@id': '_:123', '@type': 'person', 'given_name': 'Jane', 'family_name': 'Doe', }, { '@id': '_:456', '@type': 'Creator', 'agent': { '@id': '_:123', '@type': 'person' }, 'creative_work': { '@id': '_:789', '@type': 'preprint' }, }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', 'related_agents': [{ '@id': '_:456', '@type': 'Creator' }] }])
def _consume_job(self, job, superfluous, force, apply_changes=True, index=True, urgent=False): datum = None # Check whether we've already done transform/regulate if not superfluous: datum = job.ingested_normalized_data.order_by( '-created_at').first() if superfluous or datum is None: graph = self._transform(job) if not graph: return graph = self._regulate(job, graph) if not graph: return datum = NormalizedData.objects.create( data={'@graph': graph.to_jsonld()}, source=job.suid.source_config.source.user, raw=job.raw, ) job.ingested_normalized_data.add(datum) else: graph = MutableGraph.from_jsonld(datum.data) if apply_changes: updated_work_ids = self._apply_changes(job, graph, datum) if index and updated_work_ids: self._update_index(updated_work_ids, urgent)
def graph(self): g = MutableGraph() g.add_node(1, 'creativework', title='A work!', extra={ 'foo': 'flooby', 'bah': 'hab', }) g.add_node(2, 'creativework', title='Another work!', extra={ 'extra': 'extra', 'bah': 'hab', }) g.add_node(3, 'creativework', title='No extra :(') return g
def test_can_delete_work(self, john_doe, normalized_data): graph = MutableGraph.from_jsonld([{ '@id': '_:abc', '@type': 'workidentifier', 'uri': 'http://osf.io/faq', 'creative_work': { '@id': '_:789', '@type': 'preprint' } }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() preprint, identifier = change_set.accept() assert preprint.is_deleted is False graph = MutableGraph.from_jsonld([{ '@id': '_:abc', '@type': 'workidentifier', 'uri': 'http://osf.io/faq', 'creative_work': { '@id': '_:789', '@type': 'preprint' } }, { '@id': '_:789', 'is_deleted': True, '@type': 'preprint', }]) ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set().accept() preprint.refresh_from_db() assert preprint.is_deleted is True
def test_legacy_pipeline(self, legacy, monkeypatch): mock_apply_changes = mock.Mock(return_value=[]) monkeypatch.setattr( 'share.tasks.jobs.IngestJobConsumer._apply_changes', mock_apply_changes) monkeypatch.setattr('django.conf.settings.SHARE_LEGACY_PIPELINE', legacy) g = MutableGraph() g.add_node('_:id', 'creativework', title='This is a title') job = factories.IngestJobFactory( raw__datum=json.dumps({'@graph': g.to_jsonld(in_edges=False)})) ingest.apply(kwargs={'job_id': job.id}, throw=True) if legacy: assert NormalizedData.objects.count() == 1 assert mock_apply_changes.call_count == 1 else: assert NormalizedData.objects.count() == 0 assert not mock_apply_changes.called
def create_graph_dependencies(): return MutableGraph.from_jsonld([{ '@id': '_:123', '@type': 'person', 'given_name': 'Jane', 'family_name': 'Doe', }, { '@id': '_:456', '@type': 'Creator', 'agent': {'@id': '_:123', '@type': 'person'}, 'creative_work': {'@id': '_:789', '@type': 'preprint'}, }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', 'related_agents': [{'@id': '_:456', '@type': 'Creator'}] }])
def test_related_works(self, normalized_data): ''' Create two works with a relation between them. ''' uri = 'http://osf.io/special-snowflake' change_set = ChangeSetBuilder(MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay too', 'related_works': [{'@id': '_:foo', '@type': 'cites'}] }, { '@id': '_:2345', '@type': 'creativework', 'title': 'Cats, tho', 'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}] }, { '@id': '_:foo', '@type': 'cites', 'subject': {'@id': '_:1234', '@type': 'preprint'}, 'related': {'@id': '_:2345', '@type': 'creativework'}, }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:2345', '@type': 'creativework'} }]), normalized_data).build_change_set() change_set.accept() assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter(type='share.creativework').count() == 1 p = models.Preprint.objects.first() c = models.AbstractCreativeWork.objects.get(title='Cats, tho') assert p.related_works.count() == 1 assert p.related_works.first() == c assert p.outgoing_creative_work_relations.count() == 1 assert p.outgoing_creative_work_relations.first()._meta.model_name == 'cites' assert p.outgoing_creative_work_relations.first().related == c assert c.incoming_creative_work_relations.count() == 1 assert c.incoming_creative_work_relations.first()._meta.model_name == 'cites' assert c.incoming_creative_work_relations.first().subject == p
def graph(self): g = MutableGraph() g.add_node(1, 'creativework', { 'title': 'A work!', 'extra': { 'foo': 'flooby', 'bah': 'hab', }, }) g.add_node(2, 'creativework', { 'title': 'Another work!', 'extra': { 'extra': 'extra', 'bah': 'hab', }, }) g.add_node(3, 'creativework', {'title': 'No extra :('}) return g
def test_update_dependencies_accept(self, john_doe, normalized_data): john_doe_id = IDObfuscator.encode(john_doe) graph = MutableGraph.from_jsonld([{ '@id': john_doe_id, '@type': 'person', 'given_name': 'Jane', }, { '@id': '_:456', '@type': 'Creator', 'agent': { '@id': john_doe_id, '@type': 'person' }, 'creative_work': { '@id': '_:789', '@type': 'preprint' }, }, { '@id': '_:789', '@type': 'preprint', 'title': 'All About Cats', }]) change_set = ChangeSetBuilder(graph, normalized_data, matches={ john_doe_id: john_doe, }).build_change_set() change_set.accept() john_doe.refresh_from_db() assert john_doe.given_name == 'Jane' assert models.Preprint.objects.filter( agent_relations__agent=john_doe).count() == 1 assert models.Preprint.objects.filter( agent_relations__agent=john_doe).first().title == 'All About Cats'
def test_change_work_type(self, normalized_data): ''' A CreativeWork with an Identifier exists. Accept a new changeset with a Preprint with the same Identifier. The preprint should disambiguate to the existing work, and the work's type should be updated to Preprint ''' title = 'Ambiguous Earthquakes' uri = 'http://osf.io/special-snowflake' cg = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'project', 'title': title, 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'project' } }]) original_change_set = ChangeSetBuilder( cg, normalized_data, disambiguate=True).build_change_set() work, identifier = original_change_set.accept() id = work.id assert identifier.uri == uri assert models.Project.objects.count() == 1 assert models.Preprint.objects.count() == 0 assert models.CreativeWork.objects.count() == 1 assert models.Project.objects.all()[0].changes.count() == 1 cg = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'preprint' } }]) change_set = ChangeSetBuilder(cg, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Project.objects.count() == 0 assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.count() == 1 assert models.Preprint.objects.get(id=id).title == title assert models.Preprint.objects.all()[0].changes.count() == 2
def from_graph(self, jsonld, disambiguate=False): nd = NormalizedData.objects.create(data=jsonld, source=share_user) graph = MutableGraph.from_jsonld(jsonld) return ChangeSetBuilder(graph, nd, disambiguate=disambiguate).build_change_set()
def update_graph(jane_doe): return MutableGraph.from_jsonld([{ '@id': IDObfuscator.encode(jane_doe), '@type': 'person', 'family_name': 'Dough', }])
def test_add_work_with_existing_relation(self, normalized_data): ''' Harvest a work that has a relation to some work identified by a DOI. The related work should be a CreativeWork with no information except the one Identifier. Then harvest a work with the same DOI. It should update the CreativeWork's type and attributes instead of creating a new work. ''' uri = 'http://osf.io/special-snowflake' ChangeSetBuilder( MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay', 'related_works': [{ '@id': '_:foo', '@type': 'cites' }] }, { '@id': '_:foo', '@type': 'cites', 'subject': { '@id': '_:1234', '@type': 'preprint' }, 'related': { '@id': '_:2345', '@type': 'creativework' }, }, { '@id': '_:2345', '@type': 'creativework', 'identifiers': [{ '@id': '_:4567', '@type': 'workidentifier' }] }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:2345', '@type': 'creativework' } }]), normalized_data).build_change_set().accept() assert models.CreativeWork.objects.filter( type='share.creativework').count() == 1 assert models.Preprint.objects.count() == 1 assert models.Article.objects.count() == 0 change = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'article', 'title': 'All About Cats', 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'article' } }]) ChangeSetBuilder(change, normalized_data, disambiguate=True).build_change_set().accept() assert models.CreativeWork.objects.filter( type='share.creativework').count() == 0 assert models.Article.objects.count() == 1 assert models.Preprint.objects.count() == 1 cat = models.Article.objects.first() dog = models.Preprint.objects.first() assert dog.outgoing_creative_work_relations.count() == 1 assert dog.outgoing_creative_work_relations.first( )._meta.model_name == 'cites' assert dog.outgoing_creative_work_relations.first().related == cat assert cat.incoming_creative_work_relations.count() == 1 assert cat.incoming_creative_work_relations.first( )._meta.model_name == 'cites' assert cat.incoming_creative_work_relations.first().subject == dog
def test_add_relation_related(self, normalized_data): ''' A work exists. Add a second work with a relation to the first work. The first work should have the appropriate inverse relation to the second work. ''' uri = 'http://osf.io/special-snowflake' ChangeSetBuilder( MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'article', 'title': 'All About Cats', 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'article' } }]), normalized_data).build_change_set().accept() assert models.Article.objects.count() == 1 graph = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay too', 'related_works': [{ '@id': '_:foo', '@type': 'cites' }] }, { '@id': '_:foo', '@type': 'cites', 'subject': { '@id': '_:1234', '@type': 'preprint' }, 'related': { '@id': '_:2345', '@type': 'creativework' }, }, { '@id': '_:2345', '@type': 'creativework', 'identifiers': [{ '@id': '_:4567', '@type': 'workidentifier' }] }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:2345', '@type': 'creativework' } }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Article.objects.count() == 1 assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter( type='share.creativework').count() == 0 cat = models.Article.objects.first() dog = models.Preprint.objects.first() assert dog.outgoing_creative_work_relations.count() == 1 assert dog.outgoing_creative_work_relations.first( )._meta.model_name == 'cites' assert dog.outgoing_creative_work_relations.first().related == cat assert cat.incoming_creative_work_relations.count() == 1 assert cat.incoming_creative_work_relations.first( )._meta.model_name == 'cites' assert cat.incoming_creative_work_relations.first().subject == dog
def test_add_work_with_existing_relation(self, normalized_data): ''' Harvest a work that has a relation to some work identified by a DOI. The related work should be a CreativeWork with no information except the one Identifier. Then harvest a work with the same DOI. It should update the CreativeWork's type and attributes instead of creating a new work. ''' uri = 'http://osf.io/special-snowflake' ChangeSetBuilder(MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay', 'related_works': [{'@id': '_:foo', '@type': 'cites'}] }, { '@id': '_:foo', '@type': 'cites', 'subject': {'@id': '_:1234', '@type': 'preprint'}, 'related': {'@id': '_:2345', '@type': 'creativework'}, }, { '@id': '_:2345', '@type': 'creativework', 'identifiers': [{'@id': '_:4567', '@type': 'workidentifier'}] }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:2345', '@type': 'creativework'} }]), normalized_data).build_change_set().accept() assert models.CreativeWork.objects.filter(type='share.creativework').count() == 1 assert models.Preprint.objects.count() == 1 assert models.Article.objects.count() == 0 change = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'article', 'title': 'All About Cats', 'identifiers': [{'@id': '_:2345', '@type': 'workidentifier'}] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': {'@id': '_:1234', '@type': 'article'} }]) ChangeSetBuilder(change, normalized_data, disambiguate=True).build_change_set().accept() assert models.CreativeWork.objects.filter(type='share.creativework').count() == 0 assert models.Article.objects.count() == 1 assert models.Preprint.objects.count() == 1 cat = models.Article.objects.first() dog = models.Preprint.objects.first() assert dog.outgoing_creative_work_relations.count() == 1 assert dog.outgoing_creative_work_relations.first()._meta.model_name == 'cites' assert dog.outgoing_creative_work_relations.first().related == cat assert cat.incoming_creative_work_relations.count() == 1 assert cat.incoming_creative_work_relations.first()._meta.model_name == 'cites' assert cat.incoming_creative_work_relations.first().subject == dog
def mutable_graph(mutable_graph_nodes): return MutableGraph.from_jsonld(mutable_graph_nodes)
def from_graph(self, jsonld, disambiguate=False): nd = NormalizedData.objects.create(data=jsonld, source=share_user) graph = MutableGraph.from_jsonld(jsonld) return ChangeSetBuilder( graph, nd, disambiguate=disambiguate).build_change_set()
def test_generic_creative_work(self, normalized_data): ''' A Preprint with an Identifier exists. Accept a changeset with a CreativeWork with the same Identifier and a different title. The Preprint's title should be updated to the new value, but its type should remain the same. ''' old_title = 'Ambiguous Earthquakes' uri = 'http://osf.io/special-snowflake' original_change_set = ChangeSetBuilder( MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': old_title, 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'preprint' } }]), normalized_data).build_change_set() preprint, identifier = original_change_set.accept() id = preprint.id assert identifier.uri == uri assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter( type='share.creativework').count() == 0 assert models.Preprint.objects.get(id=id).title == old_title new_title = 'Ambidextrous Earthquakes' graph = MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'creativework', 'title': new_title, 'identifiers': [{ '@id': '_:2345', '@type': 'workidentifier' }] }, { '@id': '_:2345', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:1234', '@type': 'creativework' } }]) change_set = ChangeSetBuilder(graph, normalized_data, disambiguate=True).build_change_set() change_set.accept() assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter( type='share.creativework').count() == 0 assert models.Preprint.objects.get(id=id).title == new_title
def test_related_works(self, normalized_data): ''' Create two works with a relation between them. ''' uri = 'http://osf.io/special-snowflake' change_set = ChangeSetBuilder( MutableGraph.from_jsonld([{ '@id': '_:1234', '@type': 'preprint', 'title': 'Dogs are okay too', 'related_works': [{ '@id': '_:foo', '@type': 'cites' }] }, { '@id': '_:2345', '@type': 'creativework', 'title': 'Cats, tho', 'identifiers': [{ '@id': '_:4567', '@type': 'workidentifier' }] }, { '@id': '_:foo', '@type': 'cites', 'subject': { '@id': '_:1234', '@type': 'preprint' }, 'related': { '@id': '_:2345', '@type': 'creativework' }, }, { '@id': '_:4567', '@type': 'workidentifier', 'uri': uri, 'creative_work': { '@id': '_:2345', '@type': 'creativework' } }]), normalized_data).build_change_set() change_set.accept() assert models.Preprint.objects.count() == 1 assert models.CreativeWork.objects.filter( type='share.creativework').count() == 1 p = models.Preprint.objects.first() c = models.AbstractCreativeWork.objects.get(title='Cats, tho') assert p.related_works.count() == 1 assert p.related_works.first() == c assert p.outgoing_creative_work_relations.count() == 1 assert p.outgoing_creative_work_relations.first( )._meta.model_name == 'cites' assert p.outgoing_creative_work_relations.first().related == c assert c.incoming_creative_work_relations.count() == 1 assert c.incoming_creative_work_relations.first( )._meta.model_name == 'cites' assert c.incoming_creative_work_relations.first().subject == p