def test_insert_claim(self): """ It should be able to create a series of claims """ r = importer.insert_claims([ {'bibcode': 'b123456789123456789', 'orcidid': '0000-0000-0000-0001', 'provenance' : 'ads test'}, {'bibcode': 'b123456789123456789', 'orcidid': '0000-0000-0000-0001', 'status' : 'updated'}, importer.create_claim(bibcode='b123456789123456789', orcidid='0000-0000-0000-0001', status='removed') ]) self.assertEquals(len(r), 3) self.assertTrue(len(self.app.session.query(ClaimsLog) .filter_by(bibcode='b123456789123456789').all()) == 3)
self.logger.warning('Error processing a record: ' '{0} ({1})'.format(w, traceback.format_exc())) continue except TypeError, e: self.logger.warning('Error processing a record: ' '{0} ({1})'.format(w, traceback.format_exc())) continue #always insert a record that marks the beginning of a full-import #TODO: record orcid's last-modified-date to_claim.append(importer.create_claim(bibcode='', orcidid=orcidid, provenance=self.__class__.__name__, status='#full-import', date=updt )) # find difference between what we have and what orcid has claims_we_have = set(updated.keys()).difference(set(removed.keys())) claims_orcid_has = set(orcid_present.keys()) # those guys will be added (with ORCID date signature) for c in claims_orcid_has.difference(claims_we_have): claim = orcid_present[c] to_claim.append(importer.create_claim(bibcode=claim[0], orcidid=orcidid, provenance=claim[2], status='claimed', date=claim[1])
def test_ingester_logic(self, updater_retrieve_metadata): """Has to be able to diff orcid profile against the existing log in a database""" # self.maxDiff = None orcidid = "0000-0003-3041-2092" httpretty.register_uri( httpretty.GET, self.app.config["API_ORCID_EXPORT_PROFILE"] % orcidid, content_type="application/json", body=open(os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".ads.json")).read(), ) httpretty.register_uri( httpretty.GET, re.compile(self.app.config["API_ORCID_UPDATES_ENDPOINT"] % ".*"), content_type="application/json", body=open( os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".orcid-updates.json") ).read(), ) httpretty.register_uri( httpretty.GET, re.compile(self.app.config["API_SOLR_QUERY_ENDPOINT"] + ".*"), content_type="application/json", body=open(os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".solr.json")).read(), ) with mock.patch("ADSOrcid.pipeline.OrcidImporter.OrcidImporter.publish") as m: worker = OrcidImporter.OrcidImporter() worker.check_orcid_updates() worker.publish.assert_called_with( {"orcidid": u"0000-0003-3041-2092", "start": "1974-11-09T22:56:52.518002+00:00"}, topic="ads.orcid.fresh-claims", ) worker.publish.reset_mock() worker.process_payload({"orcidid": u"0000-0003-3041-2092", "start": "1974-11-09T22:56:52.518002+00:00"}) with app.session_scope() as session: self.assertEquals( "2015-11-05T11:37:36.381000+00:00", session.query(KeyValue).filter(KeyValue.key == "last.check").first().value, ) recs = [] for x in session.query(ClaimsLog).all(): recs.append(x.toJSON()) self.assertEqual( recs, [ { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.381000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 1, }, { "status": u"claimed", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 2, }, { "status": u"claimed", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 3, }, { "status": u"claimed", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 4, }, { "status": u"claimed", "bibcode": u"2015arXiv150305881C", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"Roman Chyla", "orcidid": u"0000-0003-3041-2092", "id": 5, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 6, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 7, }, { "status": u"claimed", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 8, }, ], ) kv = session.query(KeyValue).filter(KeyValue.key == "last.check").first() kv.value = "" session.commit() # do the same stuff again (it should not bother with new recs) with mock.patch("ADSOrcid.pipeline.OrcidImporter.OrcidImporter.publish") as m: worker.check_orcid_updates() assert worker.publish.call_args[0][0]["start"] != "1974-11-09T22:56:52.518002+00:00" worker.publish.reset_mock() worker.process_payload({"orcidid": u"0000-0003-3041-2092"}) with app.session_scope() as session: self.assertEquals(len(session.query(ClaimsLog).all()), 8) new_value = parser.parse(session.query(KeyValue).filter(KeyValue.key == "last.check").first().value) self.assertEquals( "2015-11-05T11:37:36.381000+00:00", session.query(KeyValue).filter(KeyValue.key == "last.check").first().value, ) # now change the date of the #full-import (this will force the logic to re-evaluate the batch against the # existing claims) c = session.query(ClaimsLog).filter(ClaimsLog.status == "#full-import").first() c.created = c.created + datetime.timedelta(microseconds=1000) worker.process_payload({"orcidid": u"0000-0003-3041-2092"}) with app.session_scope() as session: recs = [] for x in session.query(ClaimsLog).all(): recs.append(x.toJSON()) self.assertEqual( recs, [ { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.382000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 1, }, { "status": u"claimed", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 2, }, { "status": u"claimed", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 3, }, { "status": u"claimed", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 4, }, { "status": u"claimed", "bibcode": u"2015arXiv150305881C", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"Roman Chyla", "orcidid": u"0000-0003-3041-2092", "id": 5, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 6, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 7, }, { "status": u"claimed", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 8, }, { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.381000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 9, }, { "status": u"unchanged", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 10, }, { "status": u"unchanged", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 11, }, { "status": u"unchanged", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 12, }, { "status": u"unchanged", "bibcode": u"2015arXiv150305881C", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 13, }, { "status": u"unchanged", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 14, }, { "status": u"unchanged", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 15, }, { "status": u"unchanged", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 16, }, ], ) # now let's pretend that we have one extra claim and there was one deletion with app.session_scope() as session: session.query(ClaimsLog).filter(ClaimsLog.id > 8).delete() # clean up session.query(ClaimsLog).filter_by(id=5).delete() importer.insert_claims( [ importer.create_claim( bibcode="2014AAS...22325503A", orcidid=orcidid, status="removed", date="2015-11-05 11:37:33.381000+00:00", ) ] ) worker.process_payload({"orcidid": u"0000-0003-3041-2092"}) with app.session_scope() as session: recs = [] for x in session.query(ClaimsLog).all(): recs.append(x.toJSON()) self.assertEqual( recs, [ { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.382000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 1, }, { "status": u"claimed", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 2, }, { "status": u"claimed", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 3, }, { "status": u"claimed", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 4, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 6, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 7, }, { "status": u"claimed", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 8, }, { "status": u"removed", "bibcode": u"2014AAS...22325503A", "created": "2015-11-05T11:37:33.381000+00:00", "provenance": u"None", "orcidid": u"0000-0003-3041-2092", "id": 9, }, { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.381000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 10, }, { "status": u"claimed", "bibcode": u"2015arXiv150305881C", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "Roman Chyla", "orcidid": u"0000-0003-3041-2092", "id": 11, }, { "status": u"claimed", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 12, }, { "status": u"unchanged", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 13, }, { "status": u"unchanged", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 14, }, { "status": u"unchanged", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 15, }, { "status": u"unchanged", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 16, }, { "status": u"unchanged", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 17, }, ], )
orcid_present[bibc.lower().strip()] = (bibc.strip(), get_date(ts.isoformat()), provenance) else: self.logger.warning("Found no bibcode for {0}".format(ids)) except KeyError, e: self.logger.warning("Error processing a record: " "{0} ({1})".format(w, traceback.format_exc())) continue except TypeError, e: self.logger.warning("Error processing a record: " "{0} ({1})".format(w, traceback.format_exc())) continue # always insert a record that marks the beginning of a full-import # TODO: record orcid's last-modified-date to_claim.append( importer.create_claim( bibcode="", orcidid=orcidid, provenance=self.__class__.__name__, status="#full-import", date=updt ) ) # find difference between what we have and what orcid has claims_we_have = set(updated.keys()).difference(set(removed.keys())) claims_orcid_has = set(orcid_present.keys()) # those guys will be added (with ORCID date signature) for c in claims_orcid_has.difference(claims_we_have): claim = orcid_present[c] to_claim.append( importer.create_claim( bibcode=claim[0], orcidid=orcidid, provenance=claim[2], status="claimed", date=claim[1] ) )