Пример #1
0
def refetch_orcidids(since=None, orcid_ids=None, **kwargs):
    """
    Gets all orcidids that were updated since time X.
    
    :param: since - RFC889 formatted string
    :type: str
    
    :return: no return
    """
    worker = RabbitMQWorker(params={
        'publish': 'ads.orcid.fresh-claims',
        'exchange': app.config.get('EXCHANGE', 'ads-orcid')
    })
    worker.connect(app.config.get('RABBITMQ_URL'))
    if orcid_ids:
        for oid in orcid_ids.split(','):
            worker.publish({'orcidid': oid, 'force': False})
        if not since:
            print 'Done (just the supplied orcidids)'
            return
    
    
    logging.captureWarnings(True)
    if not since or isinstance(since, basestring) and since.strip() == "":
        with app.session_scope() as session:
            kv = session.query(KeyValue).filter_by(key='last.refetch').first()
            if kv is not None:
                since = kv.value
            else:
                since = '1974-11-09T22:56:52.518001Z' 
    
    from_date = get_date(since)
    
    logger.info('Re-fetching orcidids updated since: {0}'.format(from_date.isoformat()))
    
        
    # then get all new/old orcidids from orcid-service
    orcidids = set(updater.get_all_touched_profiles(from_date.isoformat()))
    from_date = get_date()
    
      
    for orcidid in orcidids:
        try:
            worker.publish({'orcidid': orcidid, 'force': False})
        except: # potential backpressure (we are too fast)
            time.sleep(2)
            print 'Conn problem, retrying...', orcidid
            worker.publish({'orcidid': orcidid, 'force': False})
        
    with app.session_scope() as session:
        kv = session.query(KeyValue).filter_by(key='last.refetch').first()
        if kv is None:
            kv = KeyValue(key='last.refetch', value=from_date.isoformat())
            session.add(kv)
        else:
            kv.value = from_date.isoformat()
        session.commit()

    print 'Done'
    logger.info('Done submitting {0} orcid ids.'.format(len(orcidids)))
Пример #2
0
def repush_claims(since=None, **kwargs):
    """
    Re-pushes all recs that were added since date 'X'
    to the output (i.e. forwards them onto the Solr queue)
    
    :param: since - RFC889 formatted string
    :type: str
    
    :return: no return
    """
    logging.captureWarnings(True)
    if not since or isinstance(since, basestring) and since.strip() == "":
        with app.session_scope() as session:
            kv = session.query(KeyValue).filter_by(key='last.repush').first()
            if kv is not None:
                since = kv.value
            else:
                since = '1974-11-09T22:56:52.518001Z' 
    
    from_date = get_date(since)
    orcidids = set()
    
    logger.info('Re-pushing records since: {0}'.format(from_date.isoformat()))
    
    worker = RabbitMQWorker(params={
        'publish': 'ads.orcid.output',
        'exchange': app.config.get('EXCHANGE', 'ads-orcid')
    })
    worker.connect(app.config.get('RABBITMQ_URL'))
    
    num_bibcodes = 0
    with app.session_scope() as session:
        for rec in session.query(Records) \
            .filter(Records.updated >= from_date) \
            .order_by(Records.updated.asc()) \
            .all():
            
            data = rec.toJSON()
            try:
                worker.publish({'bibcode': data['bibcode'], 'authors': data['authors'], 'claims': data['claims']})
            except: # potential backpressure (we are too fast)
                time.sleep(2)
                print 'Conn problem, retrying ', data['bibcode']
                worker.publish({'bibcode': data['bibcode'], 'authors': data['authors'], 'claims': data['claims']})
            num_bibcodes += 1
    
    with app.session_scope() as session:
        kv = session.query(KeyValue).filter_by(key='last.repush').first()
        if kv is None:
            kv = KeyValue(key='last.repush', value=get_date())
            session.add(kv)
        else:
            kv.value = get_date()
        session.commit()
        
    logger.info('Done processing {0} orcid ids.'.format(num_bibcodes))
Пример #3
0
    def test_dates(self):
        '''We want to use only UTC dates'''
        
        with self.assertRaisesRegexp(Exception, 'ValueError'):
            with app.session_scope() as session:
                rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z')
                session.add(rec)
                rec.updated = datetime.now()
                session.commit()

        with app.session_scope() as session:
            rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z')
            session.add(rec)
            rec.updated = get_date()
            session.commit()
Пример #4
0
def print_kvs():    
    """Prints the values stored in the KeyValue table."""
    print 'Key, Value from the storage:'
    print '-' * 80
    with app.session_scope() as session:
        for kv in session.query(KeyValue).order_by('key').all():
            print kv.key, kv.value
Пример #5
0
 def test_update_database(self):
     """Inserts a record (of claims) into the database"""
     updater.record_claims('bibcode', {'verified': ['foo', '-', 'bar'], 'unverified': ['-', '-', '-']})
     with app.session_scope() as session:
         r = session.query(Records).filter_by(bibcode='bibcode').first()
         self.assertEquals(json.loads(r.claims), {'verified': ['foo', '-', 'bar'], 'unverified': ['-', '-', '-']})
         self.assertTrue(r.created == r.updated)
         self.assertFalse(r.processed)
         
     updater.record_claims('bibcode', {'verified': ['foo', 'zet', 'bar'], 'unverified': ['-', '-', '-']})
     with app.session_scope() as session:
         r = session.query(Records).filter_by(bibcode='bibcode').first()
         self.assertEquals(json.loads(r.claims), {'verified': ['foo', 'zet', 'bar'], 'unverified': ['-', '-', '-']})
         self.assertTrue(r.created != r.updated)
         self.assertFalse(r.processed)
     
     updater.mark_processed('bibcode')
     with app.session_scope() as session:
         r = session.query(Records).filter_by(bibcode='bibcode').first()
         self.assertTrue(r.processed)
Пример #6
0
    def test_dates(self):
        '''We want to use only UTC dates'''
        app = self.app

        with app.session_scope() as session:
            rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z')
            session.add(rec)
            rec.updated = utils.get_date()
            session.commit()

            r = session.query(Records).first()
            assert r.updated.tzname() == 'UTC'
Пример #7
0
    def test_models(self):
        """Check serialization into JSON"""
        
        claim = ClaimsLog(bibcode='foo', orcidid='bar',
                          created='2009-09-03T20:56:35.450686Z')
        self.assertDictEqual(claim.toJSON(),
             {'status': None, 'bibcode': 'foo', 'created': '2009-09-03T20:56:35.450686+00:00', 'provenance': 'None', 'orcidid': 'bar', 'id': None})
        
        ainfo = AuthorInfo(orcidid='bar',
                          created='2009-09-03T20:56:35.450686Z')
        
        self.assertDictEqual(ainfo.toJSON(),
             {'status': None, 'updated': None, 'name': None, 'created': '2009-09-03T20:56:35.450686+00:00', 'facts': {}, 'orcidid': 'bar', 'id': None, 'account_id': None})
        
        rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z')

        self.assertDictEqual(rec.toJSON(),
             {'bibcode': 'foo', 'created': '2009-09-03T20:56:35.450686+00:00', 'updated': None, 'processed': None, 'claims': {}, 'id': None, 'authors': []})
        
        with self.assertRaisesRegexp(Exception, 'IntegrityError'):
            with app.session_scope() as session:
                c = ClaimsLog(bibcode='foo', orcidid='bar', status='hey')
                session.add(c)
                session.commit()
        
        for s in ['blacklisted', 'postponed']:
            with app.session_scope() as session:
                session.add(AuthorInfo(orcidid='bar' + s, status=s))
                session.commit()
        
        with self.assertRaisesRegexp(Exception, 'IntegrityError'):
            with app.session_scope() as session:
                c = AuthorInfo(orcidid='bar', status='hey')
                session.add(c)
                session.commit()
        
        for s in ['claimed', 'updated', 'removed', 'unchanged', '#full-import']:
            with app.session_scope() as session:
                session.add(ClaimsLog(bibcode='foo'+s, orcidid='bar', status=s))
                session.commit()
Пример #8
0
    def test_models(self):
        """Check serialization into JSON"""

        app = self.app
        claim = ClaimsLog(bibcode='foo',
                          orcidid='bar',
                          created='2009-09-03T20:56:35.450686Z')
        self.assertDictEqual(
            claim.toJSON(), {
                'status': None,
                'bibcode': 'foo',
                'created': '2009-09-03T20:56:35.450686+00:00',
                'provenance': 'None',
                'orcidid': 'bar',
                'id': None
            })

        ainfo = AuthorInfo(orcidid='bar',
                           created='2009-09-03T20:56:35.450686Z')

        self.assertDictEqual(
            ainfo.toJSON(), {
                'status': None,
                'updated': None,
                'name': None,
                'created': '2009-09-03T20:56:35.450686+00:00',
                'facts': {},
                'orcidid': 'bar',
                'id': None,
                'account_id': None
            })

        rec = Records(bibcode='foo', created='2009-09-03T20:56:35.450686Z')

        self.assertDictEqual(
            rec.toJSON(), {
                'bibcode': 'foo',
                'created': '2009-09-03T20:56:35.450686+00:00',
                'updated': None,
                'processed': None,
                'claims': {},
                'id': None,
                'authors': []
            })

        with self.assertRaisesRegexp(Exception, 'IntegrityError'):
            with app.session_scope() as session:
                c = ClaimsLog(bibcode='foo', orcidid='bar', status='hey')
                session.add(c)
                session.commit()

        for s in ['blacklisted', 'postponed']:
            with app.session_scope() as session:
                session.add(AuthorInfo(orcidid='bar' + s, status=s))
                session.commit()

        with self.assertRaisesRegexp(Exception, 'IntegrityError'):
            with app.session_scope() as session:
                c = AuthorInfo(orcidid='bar', status='hey')
                session.add(c)
                session.commit()

        for s in [
                'claimed', 'updated', 'removed', 'unchanged', '#full-import'
        ]:
            with app.session_scope() as session:
                session.add(
                    ClaimsLog(bibcode='foo' + s, orcidid='bar', status=s))
                session.commit()
Пример #9
0
def reindex_claims(since=None, **kwargs):
    """
    Re-runs all claims, both from the pipeline and
    from the orcid-service storage.
    
    :param: since - RFC889 formatted string
    :type: str
    
    :return: no return
    """
    logging.captureWarnings(True)
    if not since or isinstance(since, basestring) and since.strip() == "":
        with app.session_scope() as session:
            kv = session.query(KeyValue).filter_by(key='last.reindex').first()
            if kv is not None:
                since = kv.value
            else:
                since = '1974-11-09T22:56:52.518001Z' 
    
    from_date = get_date(since)
    orcidids = set()
    
    # trigger re-indexing
    worker = RabbitMQWorker(params={
        'publish': 'ads.orcid.fresh-claims',
        'exchange': app.config.get('EXCHANGE', 'ads-orcid')
    })
    worker.connect(app.config.get('RABBITMQ_URL'))
    
    
    logger.info('Loading records since: {0}'.format(from_date.isoformat()))
    
    # first re-check our own database (replay the logs)
    with app.session_scope() as session:
        for author in session.query(AuthorInfo.orcidid.distinct().label('orcidid')).all():
            orcidid = author.orcidid
            if orcidid and orcidid.strip() != "":
                try:
                    changed = updater.reindex_all_claims(orcidid, since=from_date.isoformat(), ignore_errors=True)
                    if len(changed):
                        orcidids.add(orcidid)
                    worker.publish({'orcidid': orcidid, 'force': True})
                except:
                    print 'Error processing: {0}'.format(orcidid)
                    traceback.print_exc()
                    continue
                if len(orcidids) % 100 == 0:
                    print 'Done replaying {0} profiles'.format(len(orcidids))
    
    print 'Now harvesting orcid profiles...'
    
    # then get all new/old orcidids from orcid-service
    all_orcids = set(updater.get_all_touched_profiles(from_date.isoformat()))
    orcidids = all_orcids.difference(orcidids)
    from_date = get_date()
    
      
    for orcidid in orcidids:
        try:
            worker.publish({'orcidid': orcidid, 'force': True})
        except: # potential backpressure (we are too fast)
            time.sleep(2)
            print 'Conn problem, retrying...', orcidid
            worker.publish({'orcidid': orcidid, 'force': True})
        
    with app.session_scope() as session:
        kv = session.query(KeyValue).filter_by(key='last.reindex').first()
        if kv is None:
            kv = KeyValue(key='last.reindex', value=from_date.isoformat())
            session.add(kv)
        else:
            kv.value = from_date.isoformat()
        session.commit()

    print 'Done'
    logger.info('Done submitting {0} orcid ids.'.format(len(orcidids)))
Пример #10
0
    def test_functionality_on_new_claim(self):
        """
        Main test, it pretends we have received claims from the 
        ADSWS
        
        For this, you need to have 'db' and 'rabbitmq' containers running.
        :return: no return
        """
        
        # fire up the real queue
        self.TM.start_workers(verbose=True)
        
        # clean the slate (production: 0000-0003-3041-2092, staging: 0000-0001-8178-9506) 
        with app.session_scope() as session:
            session.query(models.AuthorInfo).filter_by(orcidid='0000-0003-3041-2092').delete()
            session.query(models.ClaimsLog).filter_by(orcidid='0000-0003-3041-2092').delete()
            session.query(models.Records).filter_by(bibcode='2015ASPC..495..401C').delete()
            kv = session.query(models.KeyValue).filter_by(key='last.check').first()
            if kv is None:
                kv = models.KeyValue(key='last.check')
            kv.value = '2051-11-09T22:56:52.518001Z'
                
        # setup/check the MongoDB has the proper data for authors
        mworker = workers.OutputHandler.OutputHandler(params=app.config.get('WORKERS').get('OutputHandler'))
        mworker.mongodb[self.app.config.get('MONGODB_COLL', 'orcid_claims')].remove({'_id': '2015ASPC..495..401C'})
        r = mworker.mongodb['authors'].find_one({'_id': '2015ASPC..495..401C'})
        if not r or 'authors' not in r:
            mworker.mongodb['authors'].insert({
                "_id" : "2015ASPC..495..401C",
                "authors" : [
                    "Chyla, R",
                    "Accomazzi, A",
                    "Holachek, A",
                    "Grant, C",
                    "Elliott, J",
                    "Henneken, E",
                    "Thompson, D",
                    "Kurtz, M",
                    "Murray, S",
                    "Sudilovsky, V"
                ]
            })

        
        
        
        test_worker = GenericWorker.RabbitMQWorker(params={
                            'publish': 'ads.orcid.claims',
                            'exchange': 'ads-orcid-test'
                        })
        test_worker.connect(self.TM.rabbitmq_url)
        
        # send a test claim
        test_worker.publish({'orcidid': '0000-0003-3041-2092', 'bibcode': '2015ASPC..495..401C'})
        
        time.sleep(2)
        
        # check results
        claim = mworker.mongodb[self.app.config.get('MONGODB_COLL', 'orcid_claims')].find_one({'_id': '2015ASPC..495..401C'})
        self.assertTrue(claim)
        self.assertEquals(claim['verified'],
                          ['0000-0003-3041-2092', '-','-','-','-','-','-','-','-','-', ] 
                          )
        
        with app.session_scope() as session:
            r = session.query(models.Records).filter_by(bibcode='2015ASPC..495..401C').first()
            self.assertEquals(json.loads(r.claims)['verified'],
                              ['0000-0003-3041-2092', '-','-','-','-','-','-','-','-','-', ] 
                              )
Пример #11
0
 def test_update_author(self):
     """Has to update AuthorInfo and also create a log of events about the changes."""
     
     # bootstrap the db with already existing author info
     with app.session_scope() as session:
         ainfo = AuthorInfo(orcidid='0000-0003-2686-9241',
                            facts=json.dumps({'orcid_name': [u'Stern, Daniel'],
                                 'author': [u'Stern, D', u'Stern, D K', u'Stern, Daniel'],
                                 'author_norm': [u'Stern, D'],
                                 'name': u'Stern, D K'
                                 }),
                            )
         session.add(ainfo)
         session.commit()
     
     with app.session_scope() as session:
         ainfo = session.query(AuthorInfo).filter_by(orcidid='0000-0003-2686-9241').first()
         with mock.patch('ADSOrcid.matcher.harvest_author_info', return_value= {'orcid_name': [u'Sternx, Daniel'],
                                     'author': [u'Stern, D', u'Stern, D K', u'Sternx, Daniel'],
                                     'author_norm': [u'Stern, D'],
                                     'name': u'Sternx, D K'
                                     }
                 ) as context:
             matcher.cache.clear()
             matcher.orcid_cache.clear()
             matcher.ads_cache.clear()
             author = matcher.retrieve_orcid('0000-0003-2686-9241')
             self.assertDictContainsSubset({'status': None, 
                                            'name': u'Sternx, D K', 
                                            'facts': {u'author': [u'Stern, D', u'Stern, D K', u'Sternx, Daniel'], u'orcid_name': [u'Sternx, Daniel'], u'author_norm': [u'Stern, D'], u'name': u'Sternx, D K'}, 
                                            'orcidid': u'0000-0003-2686-9241', 
                                            'id': 1, 
                                            'account_id': None}, 
                                           author)
             self.assertDictContainsSubset({'oldvalue': json.dumps([u'Stern, Daniel']),
                                            'newvalue': json.dumps([u'Sternx, Daniel'])},
                                           session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:orcid_name').first().toJSON())
             self.assertDictContainsSubset({'oldvalue': json.dumps(u'Stern, D K'),
                                            'newvalue': json.dumps(u'Sternx, D K')},
                                           session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:name').first().toJSON())
             self.assertDictContainsSubset({'oldvalue': json.dumps([u'Stern, D', u'Stern, D K', u'Stern, Daniel']),
                                            'newvalue': json.dumps([u'Stern, D', u'Stern, D K', u'Sternx, Daniel'])},
                                           session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:author').first().toJSON())
     
     with app.session_scope() as session:
         ainfo = session.query(AuthorInfo).filter_by(orcidid='0000-0003-2686-9241').first()
         with mock.patch('ADSOrcid.matcher.harvest_author_info', return_value= {
                                     'name': u'Sternx, D K',
                                     'authorized': True
                                     }
                 ) as context:
             matcher.cache.clear()
             matcher.orcid_cache.clear()
             matcher.ads_cache.clear()
             author = matcher.retrieve_orcid('0000-0003-2686-9241')
             self.assertDictContainsSubset({'status': None, 
                                            'name': u'Sternx, D K', 
                                            'facts': {u'authorized': True, u'name': u'Sternx, D K'}, 
                                            'orcidid': u'0000-0003-2686-9241', 
                                            'id': 1, 
                                            'account_id': 1}, 
                                           author)
             self.assertDictContainsSubset({'oldvalue': json.dumps([u'Stern, Daniel']),
                                            'newvalue': json.dumps([u'Sternx, Daniel'])},
                                           session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:orcid_name').first().toJSON())
             self.assertDictContainsSubset({'oldvalue': json.dumps(u'Stern, D K'),
                                            'newvalue': json.dumps(u'Sternx, D K')},
                                           session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:name').first().toJSON())
             self.assertDictContainsSubset({'oldvalue': json.dumps([u'Stern, D', u'Stern, D K', u'Stern, Daniel']),
                                            'newvalue': json.dumps([u'Stern, D', u'Stern, D K', u'Sternx, Daniel'])},
                                           session.query(ChangeLog).filter_by(key='0000-0003-2686-9241:update:author').first().toJSON())
Пример #12
0
    def test_ingester_logic(self, updater_retrieve_metadata):
        """Has to be able to diff orcid profile against the 
        existing log in a database"""
        # self.maxDiff = None
        orcidid = "0000-0003-3041-2092"

        httpretty.register_uri(
            httpretty.GET,
            self.app.config["API_ORCID_EXPORT_PROFILE"] % orcidid,
            content_type="application/json",
            body=open(os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".ads.json")).read(),
        )
        httpretty.register_uri(
            httpretty.GET,
            re.compile(self.app.config["API_ORCID_UPDATES_ENDPOINT"] % ".*"),
            content_type="application/json",
            body=open(
                os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".orcid-updates.json")
            ).read(),
        )
        httpretty.register_uri(
            httpretty.GET,
            re.compile(self.app.config["API_SOLR_QUERY_ENDPOINT"] + ".*"),
            content_type="application/json",
            body=open(os.path.join(self.app.config["TEST_UNIT_DIR"], "stub_data", orcidid + ".solr.json")).read(),
        )

        with mock.patch("ADSOrcid.pipeline.OrcidImporter.OrcidImporter.publish") as m:
            worker = OrcidImporter.OrcidImporter()
            worker.check_orcid_updates()
            worker.publish.assert_called_with(
                {"orcidid": u"0000-0003-3041-2092", "start": "1974-11-09T22:56:52.518002+00:00"},
                topic="ads.orcid.fresh-claims",
            )
            worker.publish.reset_mock()

            worker.process_payload({"orcidid": u"0000-0003-3041-2092", "start": "1974-11-09T22:56:52.518002+00:00"})
            with app.session_scope() as session:
                self.assertEquals(
                    "2015-11-05T11:37:36.381000+00:00",
                    session.query(KeyValue).filter(KeyValue.key == "last.check").first().value,
                )
                recs = []
                for x in session.query(ClaimsLog).all():
                    recs.append(x.toJSON())
                self.assertEqual(
                    recs,
                    [
                        {
                            "status": u"#full-import",
                            "bibcode": u"",
                            "created": "2015-11-05T16:37:33.381000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 1,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015arXiv150304194A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 2,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015AAS...22533655A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 3,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2014arXiv1406.4542H",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 4,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015arXiv150305881C",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"Roman Chyla",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 5,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015ASPC..492..150T",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 6,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015ASPC..492..208G",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 7,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2014AAS...22325503A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 8,
                        },
                    ],
                )
                kv = session.query(KeyValue).filter(KeyValue.key == "last.check").first()
                kv.value = ""
                session.commit()

        # do the same stuff again (it should not bother with new recs)
        with mock.patch("ADSOrcid.pipeline.OrcidImporter.OrcidImporter.publish") as m:
            worker.check_orcid_updates()
            assert worker.publish.call_args[0][0]["start"] != "1974-11-09T22:56:52.518002+00:00"
            worker.publish.reset_mock()

            worker.process_payload({"orcidid": u"0000-0003-3041-2092"})
            with app.session_scope() as session:
                self.assertEquals(len(session.query(ClaimsLog).all()), 8)
                new_value = parser.parse(session.query(KeyValue).filter(KeyValue.key == "last.check").first().value)
                self.assertEquals(
                    "2015-11-05T11:37:36.381000+00:00",
                    session.query(KeyValue).filter(KeyValue.key == "last.check").first().value,
                )

                # now change the date of the #full-import (this will force the logic to re-evaluate the batch against the
                # existing claims)
                c = session.query(ClaimsLog).filter(ClaimsLog.status == "#full-import").first()
                c.created = c.created + datetime.timedelta(microseconds=1000)

            worker.process_payload({"orcidid": u"0000-0003-3041-2092"})

            with app.session_scope() as session:
                recs = []
                for x in session.query(ClaimsLog).all():
                    recs.append(x.toJSON())
                self.assertEqual(
                    recs,
                    [
                        {
                            "status": u"#full-import",
                            "bibcode": u"",
                            "created": "2015-11-05T16:37:33.382000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 1,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015arXiv150304194A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 2,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015AAS...22533655A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 3,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2014arXiv1406.4542H",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 4,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015arXiv150305881C",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"Roman Chyla",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 5,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015ASPC..492..150T",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 6,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2015ASPC..492..208G",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 7,
                        },
                        {
                            "status": u"claimed",
                            "bibcode": u"2014AAS...22325503A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"NASA ADS",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 8,
                        },
                        {
                            "status": u"#full-import",
                            "bibcode": u"",
                            "created": "2015-11-05T16:37:33.381000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 9,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015arXiv150304194A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 10,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015AAS...22533655A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 11,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2014arXiv1406.4542H",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 12,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015arXiv150305881C",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 13,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015ASPC..492..150T",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 14,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2015ASPC..492..208G",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 15,
                        },
                        {
                            "status": u"unchanged",
                            "bibcode": u"2014AAS...22325503A",
                            "created": "2015-09-16T10:59:01.721000+00:00",
                            "provenance": u"OrcidImporter",
                            "orcidid": u"0000-0003-3041-2092",
                            "id": 16,
                        },
                    ],
                )

            # now let's pretend that we have one extra claim and there was one deletion
            with app.session_scope() as session:
                session.query(ClaimsLog).filter(ClaimsLog.id > 8).delete()  # clean up
                session.query(ClaimsLog).filter_by(id=5).delete()
                importer.insert_claims(
                    [
                        importer.create_claim(
                            bibcode="2014AAS...22325503A",
                            orcidid=orcidid,
                            status="removed",
                            date="2015-11-05 11:37:33.381000+00:00",
                        )
                    ]
                )

            worker.process_payload({"orcidid": u"0000-0003-3041-2092"})

        with app.session_scope() as session:
            recs = []
            for x in session.query(ClaimsLog).all():
                recs.append(x.toJSON())
            self.assertEqual(
                recs,
                [
                    {
                        "status": u"#full-import",
                        "bibcode": u"",
                        "created": "2015-11-05T16:37:33.382000+00:00",
                        "provenance": u"OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 1,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015arXiv150304194A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 2,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015AAS...22533655A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 3,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2014arXiv1406.4542H",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 4,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015ASPC..492..150T",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 6,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015ASPC..492..208G",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 7,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2014AAS...22325503A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        "provenance": u"NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 8,
                    },
                    {
                        "status": u"removed",
                        "bibcode": u"2014AAS...22325503A",
                        "created": "2015-11-05T11:37:33.381000+00:00",
                        "provenance": u"None",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 9,
                    },
                    {
                        "status": u"#full-import",
                        "bibcode": u"",
                        "created": "2015-11-05T16:37:33.381000+00:00",
                        "provenance": u"OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 10,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2015arXiv150305881C",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "Roman Chyla",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 11,
                    },
                    {
                        "status": u"claimed",
                        "bibcode": u"2014AAS...22325503A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "NASA ADS",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 12,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2014arXiv1406.4542H",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 13,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2015ASPC..492..150T",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 14,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2015ASPC..492..208G",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 15,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2015arXiv150304194A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 16,
                    },
                    {
                        "status": u"unchanged",
                        "bibcode": u"2015AAS...22533655A",
                        "created": "2015-09-16T10:59:01.721000+00:00",
                        u"provenance": "OrcidImporter",
                        "orcidid": u"0000-0003-3041-2092",
                        "id": 17,
                    },
                ],
            )