def refetch_orcidids(since=None, orcid_ids=None, **kwargs): """ Gets all orcidids that were updated since time X. :param: since - RFC889 formatted string :type: str :return: no return """ worker = RabbitMQWorker(params={ 'publish': 'ads.orcid.fresh-claims', 'exchange': app.config.get('EXCHANGE', 'ads-orcid') }) worker.connect(app.config.get('RABBITMQ_URL')) if orcid_ids: for oid in orcid_ids.split(','): worker.publish({'orcidid': oid, 'force': False}) if not since: print 'Done (just the supplied orcidids)' return logging.captureWarnings(True) if not since or isinstance(since, basestring) and since.strip() == "": with app.session_scope() as session: kv = session.query(KeyValue).filter_by(key='last.refetch').first() if kv is not None: since = kv.value else: since = '1974-11-09T22:56:52.518001Z' from_date = get_date(since) logger.info('Re-fetching orcidids updated since: {0}'.format(from_date.isoformat())) # then get all new/old orcidids from orcid-service orcidids = set(updater.get_all_touched_profiles(from_date.isoformat())) from_date = get_date() for orcidid in orcidids: try: worker.publish({'orcidid': orcidid, 'force': False}) except: # potential backpressure (we are too fast) time.sleep(2) print 'Conn problem, retrying...', orcidid worker.publish({'orcidid': orcidid, 'force': False}) with app.session_scope() as session: kv = session.query(KeyValue).filter_by(key='last.refetch').first() if kv is None: kv = KeyValue(key='last.refetch', value=from_date.isoformat()) session.add(kv) else: kv.value = from_date.isoformat() session.commit() print 'Done' logger.info('Done submitting {0} orcid ids.'.format(len(orcidids)))
def repush_claims(since=None, **kwargs): """ Re-pushes all recs that were added since date 'X' to the output (i.e. forwards them onto the Solr queue) :param: since - RFC889 formatted string :type: str :return: no return """ logging.captureWarnings(True) if not since or isinstance(since, basestring) and since.strip() == "": with app.session_scope() as session: kv = session.query(KeyValue).filter_by(key='last.repush').first() if kv is not None: since = kv.value else: since = '1974-11-09T22:56:52.518001Z' from_date = get_date(since) orcidids = set() logger.info('Re-pushing records since: {0}'.format(from_date.isoformat())) worker = RabbitMQWorker(params={ 'publish': 'ads.orcid.output', 'exchange': app.config.get('EXCHANGE', 'ads-orcid') }) worker.connect(app.config.get('RABBITMQ_URL')) num_bibcodes = 0 with app.session_scope() as session: for rec in session.query(Records) \ .filter(Records.updated >= from_date) \ .order_by(Records.updated.asc()) \ .all(): data = rec.toJSON() try: worker.publish({'bibcode': data['bibcode'], 'authors': data['authors'], 'claims': data['claims']}) except: # potential backpressure (we are too fast) time.sleep(2) print 'Conn problem, retrying ', data['bibcode'] worker.publish({'bibcode': data['bibcode'], 'authors': data['authors'], 'claims': data['claims']}) num_bibcodes += 1 with app.session_scope() as session: kv = session.query(KeyValue).filter_by(key='last.repush').first() if kv is None: kv = KeyValue(key='last.repush', value=get_date()) session.add(kv) else: kv.value = get_date() session.commit() logger.info('Done processing {0} orcid ids.'.format(num_bibcodes))
def test_dates(self): '''We want to use only UTC dates''' with self.assertRaisesRegexp(Exception, 'ValueError'): with app.session_scope() as session: rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z') session.add(rec) rec.updated = datetime.now() session.commit() with app.session_scope() as session: rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z') session.add(rec) rec.updated = get_date() session.commit()
def print_kvs(): """Prints the values stored in the KeyValue table.""" print 'Key, Value from the storage:' print '-' * 80 with app.session_scope() as session: for kv in session.query(KeyValue).order_by('key').all(): print kv.key, kv.value
def test_update_database(self): """Inserts a record (of claims) into the database""" updater.record_claims('bibcode', {'verified': ['foo', '-', 'bar'], 'unverified': ['-', '-', '-']}) with app.session_scope() as session: r = session.query(Records).filter_by(bibcode='bibcode').first() self.assertEquals(json.loads(r.claims), {'verified': ['foo', '-', 'bar'], 'unverified': ['-', '-', '-']}) self.assertTrue(r.created == r.updated) self.assertFalse(r.processed) updater.record_claims('bibcode', {'verified': ['foo', 'zet', 'bar'], 'unverified': ['-', '-', '-']}) with app.session_scope() as session: r = session.query(Records).filter_by(bibcode='bibcode').first() self.assertEquals(json.loads(r.claims), {'verified': ['foo', 'zet', 'bar'], 'unverified': ['-', '-', '-']}) self.assertTrue(r.created != r.updated) self.assertFalse(r.processed) updater.mark_processed('bibcode') with app.session_scope() as session: r = session.query(Records).filter_by(bibcode='bibcode').first() self.assertTrue(r.processed)
def test_dates(self): '''We want to use only UTC dates''' app = self.app with app.session_scope() as session: rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z') session.add(rec) rec.updated = utils.get_date() session.commit() r = session.query(Records).first() assert r.updated.tzname() == 'UTC'
def test_models(self): """Check serialization into JSON""" claim = ClaimsLog(bibcode='foo', orcidid='bar', created='2009-09-03T20:56:35.450686Z') self.assertDictEqual(claim.toJSON(), {'status': None, 'bibcode': 'foo', 'created': '2009-09-03T20:56:35.450686+00:00', 'provenance': 'None', 'orcidid': 'bar', 'id': None}) ainfo = AuthorInfo(orcidid='bar', created='2009-09-03T20:56:35.450686Z') self.assertDictEqual(ainfo.toJSON(), {'status': None, 'updated': None, 'name': None, 'created': '2009-09-03T20:56:35.450686+00:00', 'facts': {}, 'orcidid': 'bar', 'id': None, 'account_id': None}) rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z') self.assertDictEqual(rec.toJSON(), {'bibcode': 'foo', 'created': '2009-09-03T20:56:35.450686+00:00', 'updated': None, 'processed': None, 'claims': {}, 'id': None, 'authors': []}) with self.assertRaisesRegexp(Exception, 'IntegrityError'): with app.session_scope() as session: c = ClaimsLog(bibcode='foo', orcidid='bar', status='hey') session.add(c) session.commit() for s in ['blacklisted', 'postponed']: with app.session_scope() as session: session.add(AuthorInfo(orcidid='bar' + s, status=s)) session.commit() with self.assertRaisesRegexp(Exception, 'IntegrityError'): with app.session_scope() as session: c = AuthorInfo(orcidid='bar', status='hey') session.add(c) session.commit() for s in ['claimed', 'updated', 'removed', 'unchanged', '#full-import']: with app.session_scope() as session: session.add(ClaimsLog(bibcode='foo'+s, orcidid='bar', status=s)) session.commit()
def test_models(self): """Check serialization into JSON""" app = self.app claim = ClaimsLog(bibcode='foo', orcidid='bar', created='2009-09-03T20:56:35.450686Z') self.assertDictEqual( claim.toJSON(), { 'status': None, 'bibcode': 'foo', 'created': '2009-09-03T20:56:35.450686+00:00', 'provenance': 'None', 'orcidid': 'bar', 'id': None }) ainfo = AuthorInfo(orcidid='bar', created='2009-09-03T20:56:35.450686Z') self.assertDictEqual( ainfo.toJSON(), { 'status': None, 'updated': None, 'name': None, 'created': '2009-09-03T20:56:35.450686+00:00', 'facts': {}, 'orcidid': 'bar', 'id': None, 'account_id': None }) rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z') self.assertDictEqual( rec.toJSON(), { 'bibcode': 'foo', 'created': '2009-09-03T20:56:35.450686+00:00', 'updated': None, 'processed': None, 'claims': {}, 'id': None, 'authors': [] }) with self.assertRaisesRegexp(Exception, 'IntegrityError'): with app.session_scope() as session: c = ClaimsLog(bibcode='foo', orcidid='bar', status='hey') session.add(c) session.commit() for s in ['blacklisted', 'postponed']: with app.session_scope() as session: session.add(AuthorInfo(orcidid='bar' + s, status=s)) session.commit() with self.assertRaisesRegexp(Exception, 'IntegrityError'): with app.session_scope() as session: c = AuthorInfo(orcidid='bar', status='hey') session.add(c) session.commit() for s in [ 'claimed', 'updated', 'removed', 'unchanged', '#full-import' ]: with app.session_scope() as session: session.add( ClaimsLog(bibcode='foo' + s, orcidid='bar', status=s)) session.commit()
def reindex_claims(since=None, **kwargs): """ Re-runs all claims, both from the pipeline and from the orcid-service storage. :param: since - RFC889 formatted string :type: str :return: no return """ logging.captureWarnings(True) if not since or isinstance(since, basestring) and since.strip() == "": with app.session_scope() as session: kv = session.query(KeyValue).filter_by(key='last.reindex').first() if kv is not None: since = kv.value else: since = '1974-11-09T22:56:52.518001Z' from_date = get_date(since) orcidids = set() # trigger re-indexing worker = RabbitMQWorker(params={ 'publish': 'ads.orcid.fresh-claims', 'exchange': app.config.get('EXCHANGE', 'ads-orcid') }) worker.connect(app.config.get('RABBITMQ_URL')) logger.info('Loading records since: {0}'.format(from_date.isoformat())) # first re-check our own database (replay the logs) with app.session_scope() as session: for author in session.query(AuthorInfo.orcidid.distinct().label('orcidid')).all(): orcidid = author.orcidid if orcidid and orcidid.strip() != "": try: changed = updater.reindex_all_claims(orcidid, since=from_date.isoformat(), ignore_errors=True) if len(changed): orcidids.add(orcidid) worker.publish({'orcidid': orcidid, 'force': True}) except: print 'Error processing: {0}'.format(orcidid) traceback.print_exc() continue if len(orcidids) % 100 == 0: print 'Done replaying {0} profiles'.format(len(orcidids)) print 'Now harvesting orcid profiles...' # then get all new/old orcidids from orcid-service all_orcids = set(updater.get_all_touched_profiles(from_date.isoformat())) orcidids = all_orcids.difference(orcidids) from_date = get_date() for orcidid in orcidids: try: worker.publish({'orcidid': orcidid, 'force': True}) except: # potential backpressure (we are too fast) time.sleep(2) print 'Conn problem, retrying...', orcidid worker.publish({'orcidid': orcidid, 'force': True}) with app.session_scope() as session: kv = session.query(KeyValue).filter_by(key='last.reindex').first() if kv is None: kv = KeyValue(key='last.reindex', value=from_date.isoformat()) session.add(kv) else: kv.value = from_date.isoformat() session.commit() print 'Done' logger.info('Done submitting {0} orcid ids.'.format(len(orcidids)))
def test_functionality_on_new_claim(self): """ Main test, it pretends we have received claims from the ADSWS For this, you need to have 'db' and 'rabbitmq' containers running. :return: no return """ # fire up the real queue self.TM.start_workers(verbose=True) # clean the slate (production: 0000-0003-3041-2092, staging: 0000-0001-8178-9506) with app.session_scope() as session: session.query(models.AuthorInfo).filter_by(orcidid='0000-0003-3041-2092').delete() session.query(models.ClaimsLog).filter_by(orcidid='0000-0003-3041-2092').delete() session.query(models.Records).filter_by(bibcode='2015ASPC..495..401C').delete() kv = session.query(models.KeyValue).filter_by(key='last.check').first() if kv is None: kv = models.KeyValue(key='last.check') kv.value = '2051-11-09T22:56:52.518001Z' # setup/check the MongoDB has the proper data for authors mworker = workers.OutputHandler.OutputHandler(params=app.config.get('WORKERS').get('OutputHandler')) mworker.mongodb[self.app.config.get('MONGODB_COLL', 'orcid_claims')].remove({'_id': '2015ASPC..495..401C'}) r = mworker.mongodb['authors'].find_one({'_id': '2015ASPC..495..401C'}) if not r or 'authors' not in r: mworker.mongodb['authors'].insert({ "_id" : "2015ASPC..495..401C", "authors" : [ "Chyla, R", "Accomazzi, A", "Holachek, A", "Grant, C", "Elliott, J", "Henneken, E", "Thompson, D", "Kurtz, M", "Murray, S", "Sudilovsky, V" ] }) test_worker = GenericWorker.RabbitMQWorker(params={ 'publish': 'ads.orcid.claims', 'exchange': 'ads-orcid-test' }) test_worker.connect(self.TM.rabbitmq_url) # send a test claim test_worker.publish({'orcidid': '0000-0003-3041-2092', 'bibcode': '2015ASPC..495..401C'}) time.sleep(2) # check results claim = mworker.mongodb[self.app.config.get('MONGODB_COLL', 'orcid_claims')].find_one({'_id': '2015ASPC..495..401C'}) self.assertTrue(claim) self.assertEquals(claim['verified'], ['0000-0003-3041-2092', '-','-','-','-','-','-','-','-','-', ] ) with app.session_scope() as session: r = session.query(models.Records).filter_by(bibcode='2015ASPC..495..401C').first() self.assertEquals(json.loads(r.claims)['verified'], ['0000-0003-3041-2092', '-','-','-','-','-','-','-','-','-', ] )
def test_update_author(self): """Has to update AuthorInfo and also create a log of events about the changes.""" # bootstrap the db with already existing author info with app.session_scope() as session: ainfo = AuthorInfo(orcidid='0000-0003-2686-9241', facts=json.dumps({'orcid_name': [u'Stern, Daniel'], 'author': [u'Stern, D', u'Stern, D K', u'Stern, Daniel'], 'author_norm': [u'Stern, D'], 'name': u'Stern, D K' }), ) session.add(ainfo) session.commit() with app.session_scope() as session: ainfo = session.query(AuthorInfo).filter_by(orcidid='0000-0003-2686-9241').first() with mock.patch('ADSOrcid.matcher.harvest_author_info', return_value= {'orcid_name': [u'Sternx, Daniel'], 'author': [u'Stern, D', u'Stern, D K', u'Sternx, Daniel'], 'author_norm': [u'Stern, D'], 'name': u'Sternx, D K' } ) as context: matcher.cache.clear() matcher.orcid_cache.clear() matcher.ads_cache.clear() author = matcher.retrieve_orcid('0000-0003-2686-9241') self.assertDictContainsSubset({'status': None, 'name': u'Sternx, D K', 'facts': {u'author': [u'Stern, D', u'Stern, D K', u'Sternx, Daniel'], u'orcid_name': [u'Sternx, Daniel'], u'author_norm': [u'Stern, D'], u'name': u'Sternx, D K'}, 'orcidid': u'0000-0003-2686-9241', 'id': 1, 'account_id': None}, author) self.assertDictContainsSubset({'oldvalue': json.dumps([u'Stern, Daniel']), 'newvalue': json.dumps([u'Sternx, Daniel'])}, session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:orcid_name').first().toJSON()) self.assertDictContainsSubset({'oldvalue': json.dumps(u'Stern, D K'), 'newvalue': json.dumps(u'Sternx, D K')}, session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:name').first().toJSON()) self.assertDictContainsSubset({'oldvalue': json.dumps([u'Stern, D', u'Stern, D K', u'Stern, Daniel']), 'newvalue': json.dumps([u'Stern, D', u'Stern, D K', u'Sternx, Daniel'])}, session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:author').first().toJSON()) with app.session_scope() as session: ainfo = session.query(AuthorInfo).filter_by(orcidid='0000-0003-2686-9241').first() with mock.patch('ADSOrcid.matcher.harvest_author_info', return_value= { 'name': u'Sternx, D K', 'authorized': True } ) as context: matcher.cache.clear() matcher.orcid_cache.clear() matcher.ads_cache.clear() author = matcher.retrieve_orcid('0000-0003-2686-9241') self.assertDictContainsSubset({'status': None, 'name': u'Sternx, D K', 'facts': {u'authorized': True, u'name': u'Sternx, D K'}, 'orcidid': u'0000-0003-2686-9241', 'id': 1, 'account_id': 1}, author) self.assertDictContainsSubset({'oldvalue': json.dumps([u'Stern, Daniel']), 'newvalue': json.dumps([u'Sternx, Daniel'])}, session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:orcid_name').first().toJSON()) self.assertDictContainsSubset({'oldvalue': json.dumps(u'Stern, D K'), 'newvalue': json.dumps(u'Sternx, D K')}, session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:name').first().toJSON()) self.assertDictContainsSubset({'oldvalue': json.dumps([u'Stern, D', u'Stern, D K', u'Stern, Daniel']), 'newvalue': json.dumps([u'Stern, D', u'Stern, D K', u'Sternx, Daniel'])}, session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:author').first().toJSON())
def test_ingester_logic(self, updater_retrieve_metadata): """Has to be able to diff orcid profile against the existing log in a database""" # self.maxDiff = None orcidid = "0000-0003-3041-2092" httpretty.register_uri( httpretty.GET, self.app.config["API_ORCID_EXPORT_PROFILE"] % orcidid, content_type="application/json", body=open(os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".ads.json")).read(), ) httpretty.register_uri( httpretty.GET, re.compile(self.app.config["API_ORCID_UPDATES_ENDPOINT"] % ".*"), content_type="application/json", body=open( os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".orcid-updates.json") ).read(), ) httpretty.register_uri( httpretty.GET, re.compile(self.app.config["API_SOLR_QUERY_ENDPOINT"] + ".*"), content_type="application/json", body=open(os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".solr.json")).read(), ) with mock.patch("ADSOrcid.pipeline.OrcidImporter.OrcidImporter.publish") as m: worker = OrcidImporter.OrcidImporter() worker.check_orcid_updates() worker.publish.assert_called_with( {"orcidid": u"0000-0003-3041-2092", "start": "1974-11-09T22:56:52.518002+00:00"}, topic="ads.orcid.fresh-claims", ) worker.publish.reset_mock() worker.process_payload({"orcidid": u"0000-0003-3041-2092", "start": "1974-11-09T22:56:52.518002+00:00"}) with app.session_scope() as session: self.assertEquals( "2015-11-05T11:37:36.381000+00:00", session.query(KeyValue).filter(KeyValue.key == "last.check").first().value, ) recs = [] for x in session.query(ClaimsLog).all(): recs.append(x.toJSON()) self.assertEqual( recs, [ { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.381000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 1, }, { "status": u"claimed", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 2, }, { "status": u"claimed", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 3, }, { "status": u"claimed", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 4, }, { "status": u"claimed", "bibcode": u"2015arXiv150305881C", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"Roman Chyla", "orcidid": u"0000-0003-3041-2092", "id": 5, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 6, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 7, }, { "status": u"claimed", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 8, }, ], ) kv = session.query(KeyValue).filter(KeyValue.key == "last.check").first() kv.value = "" session.commit() # do the same stuff again (it should not bother with new recs) with mock.patch("ADSOrcid.pipeline.OrcidImporter.OrcidImporter.publish") as m: worker.check_orcid_updates() assert worker.publish.call_args[0][0]["start"] != "1974-11-09T22:56:52.518002+00:00" worker.publish.reset_mock() worker.process_payload({"orcidid": u"0000-0003-3041-2092"}) with app.session_scope() as session: self.assertEquals(len(session.query(ClaimsLog).all()), 8) new_value = parser.parse(session.query(KeyValue).filter(KeyValue.key == "last.check").first().value) self.assertEquals( "2015-11-05T11:37:36.381000+00:00", session.query(KeyValue).filter(KeyValue.key == "last.check").first().value, ) # now change the date of the #full-import (this will force the logic to re-evaluate the batch against the # existing claims) c = session.query(ClaimsLog).filter(ClaimsLog.status == "#full-import").first() c.created = c.created + datetime.timedelta(microseconds=1000) worker.process_payload({"orcidid": u"0000-0003-3041-2092"}) with app.session_scope() as session: recs = [] for x in session.query(ClaimsLog).all(): recs.append(x.toJSON()) self.assertEqual( recs, [ { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.382000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 1, }, { "status": u"claimed", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 2, }, { "status": u"claimed", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 3, }, { "status": u"claimed", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 4, }, { "status": u"claimed", "bibcode": u"2015arXiv150305881C", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"Roman Chyla", "orcidid": u"0000-0003-3041-2092", "id": 5, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 6, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 7, }, { "status": u"claimed", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 8, }, { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.381000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 9, }, { "status": u"unchanged", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 10, }, { "status": u"unchanged", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 11, }, { "status": u"unchanged", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 12, }, { "status": u"unchanged", "bibcode": u"2015arXiv150305881C", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 13, }, { "status": u"unchanged", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 14, }, { "status": u"unchanged", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 15, }, { "status": u"unchanged", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 16, }, ], ) # now let's pretend that we have one extra claim and there was one deletion with app.session_scope() as session: session.query(ClaimsLog).filter(ClaimsLog.id > 8).delete() # clean up session.query(ClaimsLog).filter_by(id=5).delete() importer.insert_claims( [ importer.create_claim( bibcode="2014AAS...22325503A", orcidid=orcidid, status="removed", date="2015-11-05 11:37:33.381000+00:00", ) ] ) worker.process_payload({"orcidid": u"0000-0003-3041-2092"}) with app.session_scope() as session: recs = [] for x in session.query(ClaimsLog).all(): recs.append(x.toJSON()) self.assertEqual( recs, [ { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.382000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 1, }, { "status": u"claimed", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 2, }, { "status": u"claimed", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 3, }, { "status": u"claimed", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 4, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 6, }, { "status": u"claimed", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 7, }, { "status": u"claimed", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", "provenance": u"NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 8, }, { "status": u"removed", "bibcode": u"2014AAS...22325503A", "created": "2015-11-05T11:37:33.381000+00:00", "provenance": u"None", "orcidid": u"0000-0003-3041-2092", "id": 9, }, { "status": u"#full-import", "bibcode": u"", "created": "2015-11-05T16:37:33.381000+00:00", "provenance": u"OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 10, }, { "status": u"claimed", "bibcode": u"2015arXiv150305881C", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "Roman Chyla", "orcidid": u"0000-0003-3041-2092", "id": 11, }, { "status": u"claimed", "bibcode": u"2014AAS...22325503A", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "NASA ADS", "orcidid": u"0000-0003-3041-2092", "id": 12, }, { "status": u"unchanged", "bibcode": u"2014arXiv1406.4542H", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 13, }, { "status": u"unchanged", "bibcode": u"2015ASPC..492..150T", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 14, }, { "status": u"unchanged", "bibcode": u"2015ASPC..492..208G", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 15, }, { "status": u"unchanged", "bibcode": u"2015arXiv150304194A", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 16, }, { "status": u"unchanged", "bibcode": u"2015AAS...22533655A", "created": "2015-09-16T10:59:01.721000+00:00", u"provenance": "OrcidImporter", "orcidid": u"0000-0003-3041-2092", "id": 17, }, ], )