def test_ignore_checksums_metrics(self): """verify ingore_checksums works with datalinks updates""" self._reset_checksum('metricstest') # put bibcode in database r = Mock() r.return_value = (['metricstest'], None) with patch.object(self.app, 'get_record', return_value={'bibcode': 'metricstest', 'bib_data_updated': get_date(), 'metrics': {'refereed': False, 'author_num': 2}, 'processed': get_date('2025'), 'metrics_checksum': '0x424cb03e'}), \ patch.object(self.app, 'update_metrics_db', return_value = (['metricstest'], None)) as u: # update with matching checksum and then update and ignore checksums tasks.task_index_records(['metricstest'], update_solr=False, update_metrics=True, update_links=False, force=True, ignore_checksums=False) self.assertEquals(u.call_count, 0) tasks.task_index_records(['metricstest'], update_solr=False, update_metrics=True, update_links=False, force=True, ignore_checksums=True) self.assertEquals(u.call_count, 1)
def test_ignore_checksums_solr(self): """verify ingore_checksums works with solr updates""" self._reset_checksum('foo') # put bibcode in database with patch.object(self.app, 'get_record') as getter, \ patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr: getter.return_value = { 'bibcode': 'foo', 'metrics_updated': get_date('1972-04-02'), 'bib_data_updated': get_date('1972-04-01'), 'solr_checksum': '0xf2708ee8' } # update with matching checksum and then update and ignore checksums tasks.task_index_records(['foo'], force=True, update_metrics=False, update_links=False, ignore_checksums=False) self.assertEquals(update_solr.call_count, 0) tasks.task_index_records(['foo'], force=True, update_metrics=False, update_links=False, ignore_checksums=True) self.assertEquals(update_solr.call_count, 1)
def test_ignore_checksums_datalinks(self): """verify ingore_checksums works with datalinks updates""" self._reset_checksum('linkstest') # put bibcode in database r = Mock() r.status_code = 200 with patch.object(self.app, 'get_record', return_value={'bibcode': 'linkstest', 'nonbib_data': {'data_links_rows': [{'baz': 0}]}, 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'processed': get_date('2025'), 'links_checksum': '0x80e85169'}), \ patch('requests.put', return_value = r, new_callable=CopyingMock) as p: # update with matching checksum and then update and ignore checksums tasks.task_index_records(['linkstest'], update_solr=False, update_metrics=False, update_links=True, force=True, ignore_checksums=False) self.assertEquals(p.call_count, 0) tasks.task_index_records(['linkstest'], update_solr=False, update_metrics=False, update_links=True, force=True, ignore_checksums=True) self.assertEquals(p.call_count, 1)
def test_task_index_links(self): """verify data is sent to links microservice update endpoint""" r = Mock() r.status_code = 200 # just make sure we have the entry in a database tasks.task_update_record(DenormalizedRecord(bibcode='linkstest')) with patch.object(self.app, 'get_record', return_value={'bibcode': 'linkstest', 'nonbib_data': {'data_links_rows': [{'baz': 0}]}, 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'processed': get_date('2025')}), \ patch('requests.put', return_value = r, new_callable=CopyingMock) as p: tasks.task_index_records(['linkstest'], update_solr=False, update_metrics=False, update_links=True, force=True) p.assert_called_with('http://localhost:8080/update', data=json.dumps([{ 'bibcode': 'linkstest', 'data_links_rows': [{ 'baz': 0 }] }]), headers={'Authorization': 'Bearer api_token'}) rec = self.app.get_record(bibcode='linkstest') self.assertEquals(rec['datalinks_checksum'], '0x80e85169') self.assertEquals(rec['solr_checksum'], None) self.assertEquals(rec['metrics_checksum'], None)
def test_avoid_duplicates(self): # just make sure we have the entry in a database self._reset_checksum('foo') self._reset_checksum('bar') with patch.object(self.app, 'get_record') as getter, \ patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr: getter.return_value = { 'bibcode': 'foo', 'bib_data_updated': get_date('1972-04-01') } tasks.task_index_records(['foo'], force=True) self.assertEquals(update_solr.call_count, 1) self._check_checksum('foo', solr='0xf2708ee8') # now change metrics (solr shouldn't be called) getter.return_value = { 'bibcode': 'foo', 'metrics_updated': get_date('1972-04-02'), 'bib_data_updated': get_date('1972-04-01'), 'solr_checksum': '0xf2708ee8' } tasks.task_index_records(['foo'], force=True) self.assertEquals(update_solr.call_count, 1)
def test_task_index_records(self): self.assertRaises( Exception, lambda: tasks.task_index_records( ['foo', 'bar'], update_solr=False, update_metrics=False)) with patch.object(tasks.logger, 'error', return_value=None) as logger: tasks.task_index_records(['non-existent']) logger.assert_called_with(u"The bibcode %s doesn't exist!", 'non-existent')
def test_index_metrics_no_data(self): """verify indexing works where there is no metrics data""" with patch.object(self.app, 'get_record', return_value={'bibcode': 'noMetrics', 'nonbib_data': {'boost': 1.2}, 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'processed': get_date('2025')}), \ patch('adsmp.app.ADSMasterPipelineCelery.update_remote_targets', new_callable=CopyingMock) as u: tasks.task_index_records(['noMetrics'], ignore_checksums=True) u.assert_not_called()
def test_index_metrics_no_data(self): """verify indexing works where there is no metrics data""" n = datetime.now() future_year = n.year + 1 with patch.object(self.app, 'get_record', return_value={'bibcode': 'noMetrics', 'nonbib_data': {'boost': 1.2}, 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'processed': get_date(str(future_year))}), \ patch('adsmp.tasks.task_index_metrics.apply_async', wraps=unwind_task_index_metrics_apply_async) as x: tasks.task_index_records(['noMetrics'], ignore_checksums=True) x.assert_not_called()
def test_task_index_links_no_data(self): """verify data links works when no data_links_rows is present""" n = datetime.now() future_year = n.year + 1 with patch.object(self.app, 'get_record', return_value={'bibcode': 'linkstest', 'nonbib_data': {'boost': 1.2}, 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'processed': get_date(str(future_year))}), \ patch('adsmp.tasks.task_index_data_links_resolver.apply_async', wraps=unwind_task_index_data_links_resolver_apply_async), \ patch('requests.put', new_callable=CopyingMock) as p: tasks.task_index_records(['linkstest'], update_solr=False, update_metrics=False, update_links=True, force=True) p.assert_not_called()
def test_task_index_links_no_data(self): """verify data links works when no data_links_rows is present""" with patch.object(self.app, 'get_record', return_value={'bibcode': 'linkstest', 'nonbib_data': {'boost': 1.2}, 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'processed': get_date('2025')}), \ patch('requests.put', new_callable=CopyingMock) as p: tasks.task_index_records(['linkstest'], update_solr=False, update_metrics=False, update_links=True, force=True) p.assert_not_called()
def test_ignore_checksums_metrics(self): """verify ingore_checksums works with metrics updates""" self._reset_checksum('metricstest') # put bibcode in database r = Mock() r.return_value = (['metricstest'], None) n = datetime.now() future_year = n.year + 1 with patch.object(self.app, 'get_record', return_value={'bibcode': 'metricstest', 'bib_data_updated': get_date(), 'metrics': {'refereed': False, 'author_num': 2}, 'processed': get_date(str(future_year)), 'metrics_checksum': '0x424cb03e'}), \ patch('adsmp.tasks.task_index_metrics.apply_async', wraps=unwind_task_index_metrics_apply_async), \ patch.object(self.app, 'index_metrics', return_value = (['metricstest'], None)) as u: # update with matching checksum and then update and ignore checksums tasks.task_index_records(['metricstest'], update_solr=False, update_metrics=True, update_links=False, force=True, ignore_checksums=False) self.assertEqual(u.call_count, 0) tasks.task_index_records(['metricstest'], update_solr=False, update_metrics=True, update_links=False, force=True, ignore_checksums=True) self.assertEqual(u.call_count, 1)
def test_task_update_solr(self): # just make sure we have the entry in a database self._reset_checksum('foobar') with patch.object(self.app, 'mark_processed', return_value=None) as update_timestamp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'orcid_claims_updated': get_date(), 'processed': get_date('2012'),}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S') self.assertTrue(update_solr.called) self.assertTrue(update_timestamp.called) self._check_checksum('foobar', solr=True) self._reset_checksum('foobar') with patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'orcid_claims_updated': get_date(), 'processed': get_date('2025'),}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S') self.assertFalse(update_solr.called) self.assertFalse(update_timestamp.called) self._check_checksum('foobar', solr=None) self._reset_checksum('foobar') with patch.object(self.app, 'mark_processed', return_value=None) as update_timestamp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'orcid_claims_updated': get_date(), 'processed': get_date('2025'),}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S', force=True) self.assertTrue(update_solr.called) self.assertTrue(update_timestamp.called) self._check_checksum('foobar', solr=True) self._reset_checksum('foobar') with patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\ patch('adsmp.solr_updater.update_solr', return_value=None) as update_solr, \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'bib_data_updated': None, 'nonbib_data_updated': get_date(), 'orcid_claims_updated': get_date(), 'processed': None,}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S') self.assertFalse(update_solr.called) self.assertFalse(update_timestamp.called) self._check_checksum('foobar', solr=None) self._reset_checksum('foobar') with patch.object(self.app, 'mark_processed', return_value=None) as update_timestamp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'bib_data_updated': get_date(), 'nonbib_data_updated': None, 'orcid_claims_updated': get_date(), 'processed': None,}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S', force=True) self.assertTrue(update_solr.called) self.assertTrue(update_timestamp.called) self.assertFalse(task_index_records.called) with patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'bib_data_updated': None, 'nonbib_data_updated': None, 'orcid_claims_updated': None, 'fulltext_claims_updated': get_date(), 'processed': None,}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S') self.assertFalse(update_solr.called) self.assertFalse(update_timestamp.called)
def test_task_update_solr(self): # just make sure we have the entry in a database self._reset_checksum('foobar') with patch.object(self.app, 'mark_processed', return_value=None) as mp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'augments_updated': get_date(), 'bib_data': {}, 'metrics': {}, 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'orcid_claims_updated': get_date(), 'processed': get_date('2012')}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S') self.assertTrue(update_solr.called) self.assertTrue(mp.called) # self._check_checksum('foobar', solr=True) self._reset_checksum('foobar') n = datetime.now() future_year = n.year + 1 with patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'augments_updated': get_date(), 'bib_data_updated': get_date(), 'nonbib_data_updated': get_date(), 'orcid_claims_updated': get_date(), 'processed': get_date(str(future_year))}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S') self.assertFalse(update_solr.called) self._check_checksum('foobar', solr=None) self._reset_checksum('foobar') with patch.object(self.app, 'mark_processed', return_value=None) as mp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'augments_updated': get_date(), 'bib_data_updated': get_date(), 'bib_data': {}, 'metrics': {}, 'nonbib_data_updated': get_date(), 'orcid_claims_updated': get_date(), 'processed': get_date(str(future_year))}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S', force=True) self.assertTrue(update_solr.called) self.assertTrue(mp.called) # self._check_checksum('foobar', solr=True) self._reset_checksum('foobar') with patch('adsmp.solr_updater.update_solr', return_value=None) as update_solr, \ patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'augments_updated': get_date(), 'bib_data_updated': None, 'nonbib_data_updated': get_date(), 'orcid_claims_updated': get_date(), 'processed': None}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S') self.assertFalse(update_solr.called) self._check_checksum('foobar', solr=None) self._reset_checksum('foobar') with patch.object(self.app, 'mark_processed', return_value=None) as mp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'augments_updated': get_date(), 'bib_data_updated': get_date(), 'bib_data': {}, 'metrics': {}, 'nonbib_data_updated': None, 'orcid_claims_updated': get_date(), 'processed': None}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S', force=True) self.assertTrue(update_solr.called) self.assertTrue(mp.called) self.assertFalse(task_index_records.called) with patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'augments_updated': get_date(), 'bib_data_updated': None, 'nonbib_data_updated': None, 'orcid_claims_updated': None, 'fulltext_claims_updated': get_date(), 'processed': None}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S') self.assertFalse(update_solr.called) with patch.object(self.app, 'mark_processed', return_value=None) as mp,\ patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \ patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \ patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar', 'augments_updated': get_date(), 'bib_data_updated': get_date('2012'), 'bib_data': {}, 'metrics': {}, 'nonbib_data_updated': get_date('2012'), 'orcid_claims_updated': get_date('2012'), 'processed': get_date('2014')}), \ patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records: self.assertFalse(update_solr.called) tasks.task_index_records('2015ApJ...815..133S') self.assertTrue(update_solr.called) self.assertTrue(mp.called) # self._check_checksum('foobar', solr=True) self._reset_checksum('foobar')