def test_solr_to_solr_reindex_and_resume_reverse(self): """ Only reindexes half of the collection on the first time. Then goes back and does a resume to make sure it works. """ self._index_docs(50000, self.colls[0]) solr = SolrClient(test_config["SOLR_SERVER"][0], auth=test_config["SOLR_CREDENTIALS"]) reindexer = Reindexer( source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="date" ) # Make sure only source has data self.assertEqual(len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), 50000) self.assertEqual(len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), 0) # This gets somehwat of a mid point date in the range. midpoint = datetime.datetime.now() - datetime.timedelta(days=((self._end_date - self._start_date).days / 2)) # Reindex approximately half of the data by restricting FQ reindexer.reindex(fq=["date:[{} TO *]".format(midpoint.isoformat() + "Z")]) sleep(10) # Make sure we have at least 20% of the data. dest_count = len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs) s_count = len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs) self.assertTrue(s_count > dest_count > s_count * 0.20) reindexer.resume() sleep(10) # Make sure countc match up after reindex self.assertEqual( len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), )
def test_solr_to_solr_reindex_and_resume_reverse(self): ''' Only reindexes half of the collection on the first time. Then goes back and does a resume to make sure it works. ''' self._index_docs(50000, self.colls[0]) solr = SolrClient(test_config['SOLR_SERVER'][0], auth=test_config['SOLR_CREDENTIALS']) reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll', date_field='date') # Make sure only source has data self.assertEqual(len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs), 50000) self.assertEqual(len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs), 0) # This gets somehwat of a mid point date in the range. midpoint = (datetime.datetime.now() - datetime.timedelta(days= ((self._end_date - self._start_date).days / 2) )) # Reindex approximately half of the data by restricting FQ reindexer.reindex(fq=['date:[{} TO *]'.format(midpoint.isoformat() + 'Z')]) # Make sure we have at least 20% of the data. dest_count = len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs) s_count = len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs) self.assertTrue(s_count > dest_count > s_count * .20) reindexer.resume() # Make sure countc match up after reindex self.assertEqual( len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs), len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs))
def test_solr_to_solr_resume_checkonly(self): ''' Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one ''' self._index_docs(50000, self.colls[0]) solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS']) reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll', date_field='date') # Make sure only source has data self.assertEqual(len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs), 50000) self.assertEqual(len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs), 0) reindexer.resume(check=True) # Makes sure nothing got indexed self.assertEqual(len(solr.query(self.colls[0], {'q': '*:*', 'rows': 10000000}).docs), 50000) self.assertEqual(len(solr.query(self.colls[1], {'q': '*:*', 'rows': 10000000}).docs), 0)
def test_solr_to_solr_resume_checkonly(self): """ Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one """ self._index_docs(50000, self.colls[0]) solr = SolrClient(test_config["SOLR_SERVER"][0], devel=True, auth=test_config["SOLR_CREDENTIALS"]) reindexer = Reindexer( source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="date" ) # Make sure only source has data self.assertEqual(len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), 50000) self.assertEqual(len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), 0) reindexer.resume(check=True) # Makes sure nothing got indexed self.assertEqual(len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), 50000) self.assertEqual(len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), 0)
def test_solr_to_solr_resume_basic(self): """ Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one """ self._index_docs(50000, self.colls[0]) solr = SolrClient(test_config["SOLR_SERVER"][0], auth=test_config["SOLR_CREDENTIALS"]) reindexer = Reindexer( source=solr, source_coll="source_coll", dest=solr, dest_coll="dest_coll", date_field="date" ) # Make sure only source has datae self.assertEqual(len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), 50000) self.assertEqual(len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), 0) reindexer.resume() sleep(10) # Make sure countc match up after reindex self.assertEqual( len(solr.query(self.colls[0], {"q": "*:*", "rows": 10000000}).docs), len(solr.query(self.colls[1], {"q": "*:*", "rows": 10000000}).docs), )
def test_solr_to_solr_resume_basic(self): ''' Checks the date_range_query generation function. Since it's pretty simple, running all the tests as one ''' self._index_docs(50000, self.colls[0]) solr = SolrClient(test_config['SOLR_SERVER'][0], auth=test_config['SOLR_CREDENTIALS']) reindexer = Reindexer(source=solr, source_coll='source_coll', dest=solr, dest_coll='dest_coll', date_field='date') #Make sure only source has datae self.assertEqual( len( solr.query(self.colls[0], { 'q': '*:*', 'rows': 10000000 }).docs), 50000) self.assertEqual( len( solr.query(self.colls[1], { 'q': '*:*', 'rows': 10000000 }).docs), 0) reindexer.resume() sleep(10) #Make sure countc match up after reindex self.assertEqual( len( solr.query(self.colls[0], { 'q': '*:*', 'rows': 10000000 }).docs), len( solr.query(self.colls[1], { 'q': '*:*', 'rows': 10000000 }).docs))