def test_chunked(self): # chunking nothing yields nothing. eq_(list(chunked([], 1)), []) # chunking list where len(list) < n eq_(list(chunked([1], 10)), [(1, )]) # chunking a list where len(list) == n eq_(list(chunked([1, 2], 2)), [(1, 2)]) # chunking list where len(list) > n eq_(list(chunked([1, 2, 3, 4, 5], 2)), [(1, 2), (3, 4), (5, )])
def test_chunked(self): # chunking nothing yields nothing. eq_(list(chunked([], 1)), []) # chunking list where len(list) < n eq_(list(chunked([1], 10)), [(1,)]) # chunking a list where len(list) == n eq_(list(chunked([1, 2], 2)), [(1, 2)]) # chunking list where len(list) > n eq_(list(chunked([1, 2, 3, 4, 5], 2)), [(1, 2), (3, 4), (5,)])
def reindex(mapping_type_names): """Reindex all instances of a given mapping type with celery tasks :arg mapping_type_names: list of mapping types to reindex """ outstanding = Record.objects.outstanding().count() if outstanding > 0: raise ReindexError('There are %s outstanding chunks.' % outstanding) batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. chunks = [] for cls, indexable in get_indexable(mapping_types=mapping_type_names): chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) for cls, id_list in chunks: index = cls.get_index() chunk_name = 'Indexing: %s %d -> %d' % (cls.get_mapping_type_name(), id_list[0], id_list[-1]) rec = Record.objects.create(batch_id=batch_id, name=chunk_name) index_chunk_task.delay(index, batch_id, rec.id, (to_class_path(cls), id_list))
def handle(self, **options): # Set up logging so it doesn't send Ricky email. logging.basicConfig(level=logging.ERROR) # Get a list of ids of questions we're going to go change. We need # a list of ids so that we can feed it to the update, but then # also know what we need to update in the index. days_180 = datetime.now() - timedelta(days=180) q_ids = list( Question.objects.filter(is_archived=False).filter( created__lte=days_180).values_list("id", flat=True)) if q_ids: log.info("Updating %d questions", len(q_ids)) sql = """ UPDATE questions_question SET is_archived = 1 WHERE id IN (%s) """ % ",".join(map(str, q_ids)) cursor = connection.cursor() cursor.execute(sql) if not transaction.get_connection().in_atomic_block: transaction.commit() if settings.ES_LIVE_INDEXING: try: # So... the first time this runs, it'll handle 160K # questions or so which stresses everything. Thus we # do it in chunks because otherwise this won't work. # # After we've done this for the first time, we can nix # the chunking code. from kitsune.search.utils import chunked for chunk in chunked(q_ids, 100): # Fetch all the documents we need to update. es_docs = get_documents(QuestionMappingType, chunk) log.info("Updating %d index documents", len(es_docs)) documents = [] # For each document, update the data and stick it # back in the index. for doc in es_docs: doc["question_is_archived"] = True doc["indexed_on"] = int(time.time()) documents.append(doc) QuestionMappingType.bulk_index(documents) except ES_EXCEPTIONS: # Something happened with ES, so let's push index # updating into an index_task which retries when it # fails because of ES issues. index_task.delay(to_class_path(QuestionMappingType), q_ids)
def handle(self, **options): # Set up logging so it doesn't send Ricky email. logging.basicConfig(level=logging.ERROR) # Get a list of ids of questions we're going to go change. We need # a list of ids so that we can feed it to the update, but then # also know what we need to update in the index. days_180 = datetime.now() - timedelta(days=180) q_ids = list( Question.objects.filter(is_archived=False) .filter(created__lte=days_180) .values_list('id', flat=True)) if q_ids: log.info('Updating %d questions', len(q_ids)) sql = """ UPDATE questions_question SET is_archived = 1 WHERE id IN (%s) """ % ','.join(map(str, q_ids)) cursor = connection.cursor() cursor.execute(sql) if not transaction.get_connection().in_atomic_block: transaction.commit() if settings.ES_LIVE_INDEXING: try: # So... the first time this runs, it'll handle 160K # questions or so which stresses everything. Thus we # do it in chunks because otherwise this won't work. # # After we've done this for the first time, we can nix # the chunking code. from kitsune.search.utils import chunked for chunk in chunked(q_ids, 100): # Fetch all the documents we need to update. es_docs = get_documents(QuestionMappingType, chunk) log.info('Updating %d index documents', len(es_docs)) documents = [] # For each document, update the data and stick it # back in the index. for doc in es_docs: doc[u'question_is_archived'] = True doc[u'indexed_on'] = int(time.time()) documents.append(doc) QuestionMappingType.bulk_index(documents) except ES_EXCEPTIONS: # Something happened with ES, so let's push index # updating into an index_task which retries when it # fails because of ES issues. index_task.delay(QuestionMappingType, q_ids)
def reindex(mapping_type_names): """Reindex all instances of a given mapping type with celery tasks :arg mapping_type_names: list of mapping types to reindex """ outstanding = Record.objects.outstanding().count() if outstanding > 0: raise ReindexError('There are %s outstanding chunks.' % outstanding) batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. chunks = [] for cls, indexable in get_indexable(mapping_types=mapping_type_names): chunks.extend( (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) for cls, id_list in chunks: index = cls.get_index() chunk_name = 'Indexing: %s %d -> %d' % ( cls.get_mapping_type_name(), id_list[0], id_list[-1]) rec = Record.objects.create(batch_id=batch_id, name=chunk_name) index_chunk_task.delay(index, batch_id, rec.id, (to_class_path(cls), id_list))
def reindex_with_scoreboard(mapping_type_names): """Reindex all instances of a given mapping type with celery tasks. This will use Redis to keep track of outstanding tasks so nothing gets screwed up by two jobs running at once. """ # TODO: If this gets fux0rd, then it's possible this could be # non-zero and we really want to just ignore it. Need the ability # to ignore it. try: client = redis_client('default') val = client.get(OUTSTANDING_INDEX_CHUNKS) if val is not None and int(val) > 0: raise ReindexError('There are %s outstanding chunks.' % val) # We don't know how many chunks we're building, but we do want # to make sure another reindex request doesn't slide in here # and kick off a bunch of chunks. # # There is a race condition here. client.set(OUTSTANDING_INDEX_CHUNKS, 1) except RedisError: log.warning('Redis not running. Can not check if there are ' 'outstanding tasks.') batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. Also generate # reconcile_tasks. chunks = [] for cls, indexable in get_indexable(mapping_types=mapping_type_names): chunks.extend( (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) reconcile_task.delay(cls.get_index(), batch_id, cls.get_mapping_type_name()) chunks_count = len(chunks) try: client = redis_client('default') client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count) except RedisError: log.warning('Redis not running. Can\'t denote outstanding tasks.') for chunk in chunks: index = chunk[0].get_index() index_chunk_task.delay(index, batch_id, chunk)
def reindex_with_scoreboard(mapping_type_names): """Reindex all instances of a given mapping type with celery tasks. This will use Redis to keep track of outstanding tasks so nothing gets screwed up by two jobs running at once. """ # TODO: If this gets fux0rd, then it's possible this could be # non-zero and we really want to just ignore it. Need the ability # to ignore it. try: client = redis_client('default') val = client.get(OUTSTANDING_INDEX_CHUNKS) if val is not None and int(val) > 0: raise ReindexError('There are %s outstanding chunks.' % val) # We don't know how many chunks we're building, but we do want # to make sure another reindex request doesn't slide in here # and kick off a bunch of chunks. # # There is a race condition here. client.set(OUTSTANDING_INDEX_CHUNKS, 1) except RedisError: log.warning('Redis not running. Can not check if there are ' 'outstanding tasks.') batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. Also generate # reconcile_tasks. chunks = [] for cls, indexable in get_indexable(mapping_types=mapping_type_names): chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) reconcile_task.delay(cls.get_index(), batch_id, cls.get_mapping_type_name()) chunks_count = len(chunks) try: client = redis_client('default') client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count) except RedisError: log.warning('Redis not running. Can\'t denote outstanding tasks.') for chunk in chunks: index = chunk[0].get_index() index_chunk_task.delay(index, batch_id, chunk)
def index_chunk(cls, id_list, reraise=False): """Index a chunk of documents. :arg cls: The MappingType class. :arg id_list: Iterable of ids of that MappingType to index. :arg reraise: False if you want errors to be swallowed and True if you want errors to be thrown. """ # Note: This bulk indexes in batches of 80. I didn't arrive at # this number through a proper scientific method. It's possible # there's a better number. It takes a while to fiddle with, # though. Probably best to expose the number as an environment # variable, then run a script that takes timings for # --criticalmass, runs overnight and returns a more "optimal" # number. for ids in chunked(id_list, 80): documents = [] for id_ in ids: try: documents.append(cls.extract_document(id_)) except UnindexMeBro: # extract_document throws this in cases where we need # to remove the item from the index. cls.unindex(id_) except Exception: log.exception('Unable to extract/index document (id: %d)', id_) if reraise: raise if documents: cls.bulk_index(documents, id_field='id') if settings.DEBUG: # Nix queries so that this doesn't become a complete # memory hog and make Will's computer sad when DEBUG=True. reset_queries()
def handle_reindex(request): """Caculates and kicks off indexing tasks""" # This is truthy if the user wants us to delete and recreate # the index first. delete_index_first = bool(request.POST.get('delete_index')) if delete_index_first: # Coming from the delete form, so we reindex all models. mapping_types_to_index = None else: # Coming from the reindex form, so we reindex whatever we're # told. mapping_types_to_index = [name.replace('check_', '') for name in request.POST.keys() if name.startswith('check_')] # TODO: If this gets fux0rd, then it's possible this could be # non-zero and we really want to just ignore it. Need the ability # to ignore it. try: client = redis_client('default') val = client.get(OUTSTANDING_INDEX_CHUNKS) if val is not None and int(val) > 0: raise ReindexError('There are %s outstanding chunks.' % val) # We don't know how many chunks we're building, but we do want # to make sure another reindex request doesn't slide in here # and kick off a bunch of chunks. # # There is a race condition here. client.set(OUTSTANDING_INDEX_CHUNKS, 1) except RedisError: log.warning('Redis not running. Can not check if there are ' 'outstanding tasks.') batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. chunks = [] for cls, indexable in get_indexable(mapping_types=mapping_types_to_index): chunks.extend( (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) if delete_index_first: # The previous lines do a lot of work and take some time to # execute. So we wait until here to wipe and rebuild the # index. That reduces the time that there is no index by a little. recreate_index() chunks_count = len(chunks) try: client = redis_client('default') client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count) except RedisError: log.warning('Redis not running. Can\'t denote outstanding tasks.') for chunk in chunks: index_chunk_task.delay(write_index(), batch_id, chunk) return HttpResponseRedirect(request.path)
def es_status_cmd(checkindex=False, log=log): """Shows elastic search index status""" try: # TODO: SUMO has a single ES_URL and that's the ZLB and does # the balancing. If that ever changes and we have multiple # ES_URLs, then this should get fixed. es_deets = requests.get(settings.ES_URLS[0]).json() except requests.exceptions.RequestException: pass read_doctype_stats = {} for index in all_read_indexes(): try: read_doctype_stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: read_doctype_stats[index] = None if set(all_read_indexes()) == set(all_write_indexes()): write_doctype_stats = read_doctype_stats else: write_doctype_stats = {} for index in all_write_indexes(): try: write_doctype_stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: write_doctype_stats[index] = None try: indexes = get_indexes(all_indexes=True) except ES_EXCEPTIONS: log.error('Your elasticsearch process is not running or ES_URLS ' 'is set wrong in your settings_local.py file.') return log.info('Elasticsearch:') log.info(' Version : %s', es_deets['version']['number']) log.info('Settings:') log.info(' ES_URLS : %s', settings.ES_URLS) log.info(' ES_INDEX_PREFIX : %s', settings.ES_INDEX_PREFIX) log.info(' ES_LIVE_INDEXING : %s', settings.ES_LIVE_INDEXING) log.info(' ES_INDEXES : %s', settings.ES_INDEXES) log.info(' ES_WRITE_INDEXES : %s', settings.ES_WRITE_INDEXES) log.info('Index stats:') if indexes: log.info(' List of indexes:') for name, count in sorted(indexes): read_write = [] if name in all_read_indexes(): read_write.append('READ') if name in all_write_indexes(): read_write.append('WRITE') log.info(' %-22s: %s %s', name, count, '/'.join(read_write)) else: log.info(' There are no %s indexes.', settings.ES_INDEX_PREFIX) if not read_doctype_stats: read_index_names = ', '.join(all_read_indexes()) log.info(' No read indexes exist. (%s)', read_index_names) else: log.info(' Read indexes:') for index, stats in read_doctype_stats.items(): if stats is None: log.info(' %s does not exist', index) else: log.info(' %s:', index) for name, count in sorted(stats.items()): log.info(' %-22s: %d', name, count) if set(all_read_indexes()) == set(all_write_indexes()): log.info(' Write indexes are the same as the read indexes.') else: if not write_doctype_stats: write_index_names = ', '.join(all_write_indexes()) log.info(' No write indexes exist. (%s)', write_index_names) else: log.info(' Write indexes:') for index, stats in write_doctype_stats.items(): if stats is None: log.info(' %s does not exist', index) else: log.info(' %s:', index) for name, count in sorted(stats.items()): log.info(' %-22s: %d', name, count) if checkindex: # Go through the index and verify everything log.info('Checking index contents....') missing_docs = 0 for cls, id_list in get_indexable(): for id_group in chunked(id_list, 100): doc_list = get_documents(cls, id_group) if len(id_group) != len(doc_list): doc_list_ids = [doc['id'] for doc in doc_list] for id_ in id_group: if id_ not in doc_list_ids: log.info(' Missing %s %s', cls.get_model_name(), id_) missing_docs += 1 if missing_docs: print 'There were %d missing_docs' % missing_docs
def es_reindex_cmd(percent=100, delete=False, mapping_types=None, criticalmass=False, log=log): """Rebuild ElasticSearch indexes :arg percent: 1 to 100--the percentage of the db to index :arg delete: whether or not to wipe the index before reindexing :arg mapping_types: list of mapping types to index :arg criticalmass: whether or not to index just a critical mass of things :arg log: the logger to use """ es = get_es() if mapping_types is None: indexes = all_write_indexes() else: indexes = indexes_for_doctypes(mapping_types) need_delete = False for index in indexes: try: # This is used to see if the index exists. get_doctype_stats(index) except ES_EXCEPTIONS: if not delete: log.error('The index "%s" does not exist. ' 'You must specify --delete.' % index) need_delete = True if need_delete: return if delete: log.info('wiping and recreating %s...', ', '.join(indexes)) recreate_indexes(es, indexes) if criticalmass: # The critical mass is defined as the entire KB plus the most # recent 15k questions (which is about how many questions # there were created in the last 180 days). We build that # indexable here. # Get only questions and wiki document stuff. all_indexable = get_indexable( mapping_types=['questions_question', 'wiki_document']) # The first item is questions because we specified that # order. Old questions don't show up in searches, so we nix # them by reversing the list (ordered by id ascending) and # slicing it. all_indexable[0] = (all_indexable[0][0], list(reversed(all_indexable[0][1]))[:15000]) elif mapping_types: all_indexable = get_indexable(percent, mapping_types) else: all_indexable = get_indexable(percent) try: old_refreshes = {} # We're doing a lot of indexing, so we get the refresh_interval of # the index currently, then nix refreshing. Later we'll restore it. for index in indexes: old_refreshes[index] = (get_index_settings(index).get( 'index.refresh_interval', '1s')) # Disable automatic refreshing es.indices.put_settings(index=index, body={'index': { 'refresh_interval': '-1' }}) start_time = time.time() for cls, indexable in all_indexable: cls_start_time = time.time() total = len(indexable) if total == 0: continue chunk_start_time = time.time() log.info('reindexing %s. %s to index....', cls.get_mapping_type_name(), total) i = 0 for chunk in chunked(indexable, 1000): chunk_start_time = time.time() index_chunk(cls, chunk) i += len(chunk) time_to_go = (total - i) * ((time.time() - cls_start_time) / i) per_1000 = (time.time() - cls_start_time) / (i / 1000.0) this_1000 = time.time() - chunk_start_time log.info(' %s/%s %s... (%s/1000 avg, %s ETA)', i, total, format_time(this_1000), format_time(per_1000), format_time(time_to_go)) delta_time = time.time() - cls_start_time log.info(' done! (%s total, %s/1000 avg)', format_time(delta_time), format_time(delta_time / (total / 1000.0))) delta_time = time.time() - start_time log.info('done! (%s total)', format_time(delta_time)) finally: # Re-enable automatic refreshing for index, old_refresh in old_refreshes.items(): es.indices.put_settings( index=index, body={'index': { 'refresh_interval': old_refresh }})
def es_status_cmd(checkindex=False, log=log): """Shows elastic search index status""" try: # TODO: SUMO has a single ES_URL and that's the ZLB and does # the balancing. If that ever changes and we have multiple # ES_URLs, then this should get fixed. es_deets = requests.get(settings.ES_URLS[0]).json() except requests.exceptions.RequestException: pass read_doctype_stats = {} for index in all_read_indexes(): try: read_doctype_stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: read_doctype_stats[index] = None if set(all_read_indexes()) == set(all_write_indexes()): write_doctype_stats = read_doctype_stats else: write_doctype_stats = {} for index in all_write_indexes(): try: write_doctype_stats[index] = get_doctype_stats(index) except ES_EXCEPTIONS: write_doctype_stats[index] = None try: indexes = get_indexes(all_indexes=True) except ES_EXCEPTIONS: log.error("Your elasticsearch process is not running or ES_URLS " "is set wrong in your settings_local.py file.") return log.info("Elasticsearch:") log.info(" Version : %s", es_deets["version"]["number"]) log.info("Settings:") log.info(" ES_URLS : %s", settings.ES_URLS) log.info(" ES_INDEX_PREFIX : %s", settings.ES_INDEX_PREFIX) log.info(" ES_LIVE_INDEXING : %s", settings.ES_LIVE_INDEXING) log.info(" ES_INDEXES : %s", settings.ES_INDEXES) log.info(" ES_WRITE_INDEXES : %s", settings.ES_WRITE_INDEXES) log.info("Index stats:") if indexes: log.info(" List of indexes:") for name, count in sorted(indexes): read_write = [] if name in all_read_indexes(): read_write.append("READ") if name in all_write_indexes(): read_write.append("WRITE") log.info(" %-22s: %s %s", name, count, "/".join(read_write)) else: log.info(" There are no %s indexes.", settings.ES_INDEX_PREFIX) if not read_doctype_stats: read_index_names = ", ".join(all_read_indexes()) log.info(" No read indexes exist. (%s)", read_index_names) else: log.info(" Read indexes:") for index, stats in list(read_doctype_stats.items()): if stats is None: log.info(" %s does not exist", index) else: log.info(" %s:", index) for name, count in sorted(stats.items()): log.info(" %-22s: %d", name, count) if set(all_read_indexes()) == set(all_write_indexes()): log.info(" Write indexes are the same as the read indexes.") else: if not write_doctype_stats: write_index_names = ", ".join(all_write_indexes()) log.info(" No write indexes exist. (%s)", write_index_names) else: log.info(" Write indexes:") for index, stats in list(write_doctype_stats.items()): if stats is None: log.info(" %s does not exist", index) else: log.info(" %s:", index) for name, count in sorted(stats.items()): log.info(" %-22s: %d", name, count) if checkindex: # Go through the index and verify everything log.info("Checking index contents....") missing_docs = 0 for cls, id_list in get_indexable(): for id_group in chunked(id_list, 100): doc_list = get_documents(cls, id_group) if len(id_group) != len(doc_list): doc_list_ids = [doc["id"] for doc in doc_list] for id_ in id_group: if id_ not in doc_list_ids: log.info(" Missing %s %s", cls.get_model_name(), id_) missing_docs += 1 if missing_docs: print("There were %d missing_docs" % missing_docs)
def es_reindex_cmd(percent=100, delete=False, mapping_types=None, criticalmass=False, log=log): """Rebuild ElasticSearch indexes :arg percent: 1 to 100--the percentage of the db to index :arg delete: whether or not to wipe the index before reindexing :arg mapping_types: list of mapping types to index :arg criticalmass: whether or not to index just a critical mass of things :arg log: the logger to use """ es = get_es() if mapping_types is None: indexes = all_write_indexes() else: indexes = indexes_for_doctypes(mapping_types) need_delete = False for index in indexes: try: # This is used to see if the index exists. get_doctype_stats(index) except ES_EXCEPTIONS: if not delete: log.error('The index "%s" does not exist. ' 'You must specify --delete.' % index) need_delete = True if need_delete: return if delete: log.info('wiping and recreating %s...', ', '.join(indexes)) recreate_indexes(es, indexes) if criticalmass: # The critical mass is defined as the entire KB plus the most # recent 15k questions (which is about how many questions # there were created in the last 180 days). We build that # indexable here. # Get only questions and wiki document stuff. all_indexable = get_indexable( mapping_types=['questions_question', 'wiki_document']) # The first item is questions because we specified that # order. Old questions don't show up in searches, so we nix # them by reversing the list (ordered by id ascending) and # slicing it. all_indexable[0] = (all_indexable[0][0], list(reversed(all_indexable[0][1]))[:15000]) elif mapping_types: all_indexable = get_indexable(percent, mapping_types) else: all_indexable = get_indexable(percent) try: old_refreshes = {} # We're doing a lot of indexing, so we get the refresh_interval of # the index currently, then nix refreshing. Later we'll restore it. for index in indexes: old_refreshes[index] = (get_index_settings(index) .get('index.refresh_interval', '1s')) # Disable automatic refreshing es.indices.put_settings(index=index, body={'index': {'refresh_interval': '-1'}}) start_time = time.time() for cls, indexable in all_indexable: cls_start_time = time.time() total = len(indexable) if total == 0: continue chunk_start_time = time.time() log.info('reconciling %s: %s in db....', cls.get_mapping_type_name(), total) ret = reconcile_chunk(cls, cls.get_indexable()) log.info(' done! reconciled %s index documents (%s total)', ret, format_time(time.time() - chunk_start_time)) log.info('reindexing %s. %s to index....', cls.get_mapping_type_name(), total) i = 0 for chunk in chunked(indexable, 1000): chunk_start_time = time.time() index_chunk(cls, chunk) i += len(chunk) time_to_go = (total - i) * ((time.time() - cls_start_time) / i) per_1000 = (time.time() - cls_start_time) / (i / 1000.0) this_1000 = time.time() - chunk_start_time log.info(' %s/%s %s... (%s/1000 avg, %s ETA)', i, total, format_time(this_1000), format_time(per_1000), format_time(time_to_go)) delta_time = time.time() - cls_start_time log.info(' done! (%s total, %s/1000 avg)', format_time(delta_time), format_time(delta_time / (total / 1000.0))) delta_time = time.time() - start_time log.info('done! (%s total)', format_time(delta_time)) finally: # Re-enable automatic refreshing for index, old_refresh in old_refreshes.items(): es.indices.put_settings( index=index, body={'index': {'refresh_interval': old_refresh}})
def handle_reindex(request): """Caculates and kicks off indexing tasks""" # This is truthy if the user wants us to delete and recreate # the index first. delete_index_first = bool(request.POST.get('delete_index')) if delete_index_first: # Coming from the delete form, so we reindex all models. mapping_types_to_index = None else: # Coming from the reindex form, so we reindex whatever we're # told. mapping_types_to_index = [ name.replace('check_', '') for name in request.POST.keys() if name.startswith('check_') ] # TODO: If this gets fux0rd, then it's possible this could be # non-zero and we really want to just ignore it. Need the ability # to ignore it. try: client = redis_client('default') val = client.get(OUTSTANDING_INDEX_CHUNKS) if val is not None and int(val) > 0: raise ReindexError('There are %s outstanding chunks.' % val) # We don't know how many chunks we're building, but we do want # to make sure another reindex request doesn't slide in here # and kick off a bunch of chunks. # # There is a race condition here. client.set(OUTSTANDING_INDEX_CHUNKS, 1) except RedisError: log.warning('Redis not running. Can not check if there are ' 'outstanding tasks.') batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. chunks = [] for cls, indexable in get_indexable(mapping_types=mapping_types_to_index): chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) if delete_index_first: # The previous lines do a lot of work and take some time to # execute. So we wait until here to wipe and rebuild the # index. That reduces the time that there is no index by a little. recreate_index() chunks_count = len(chunks) try: client = redis_client('default') client.set(OUTSTANDING_INDEX_CHUNKS, chunks_count) except RedisError: log.warning('Redis not running. Can\'t denote outstanding tasks.') for chunk in chunks: index_chunk_task.delay(write_index(), batch_id, chunk) return HttpResponseRedirect(request.path)