def handle(self, **options): """ The idea is to update the last_contribution_date field. """ today = datetime.now() yesterday = today - timedelta(days=1) # Support Forum answers user_ids = list( Answer.objects.filter(created__gte=yesterday, created__lt=today).values_list("creator_id", flat=True)) # KB Edits user_ids += list( Revision.objects.filter(created__gte=yesterday, created__lt=today).values_list( "creator_id", flat=True)) # KB Reviews user_ids += list( Revision.objects.filter(reviewed__gte=yesterday, reviewed__lt=today).values_list( "reviewer_id", flat=True)) # Note: # Army of Awesome replies are live indexed. No need to do anything here. index_task.delay(to_class_path(UserMappingType), list(set(user_ids)))
def reindex(mapping_type_names): """Reindex all instances of a given mapping type with celery tasks :arg mapping_type_names: list of mapping types to reindex """ outstanding = Record.objects.outstanding().count() if outstanding > 0: raise ReindexError('There are %s outstanding chunks.' % outstanding) batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. chunks = [] for cls, indexable in get_indexable(mapping_types=mapping_type_names): chunks.extend((cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) for cls, id_list in chunks: index = cls.get_index() chunk_name = 'Indexing: %s %d -> %d' % (cls.get_mapping_type_name(), id_list[0], id_list[-1]) rec = Record.objects.create(batch_id=batch_id, name=chunk_name) index_chunk_task.delay(index, batch_id, rec.id, (to_class_path(cls), id_list))
def handle(self, **options): # Set up logging so it doesn't send Ricky email. logging.basicConfig(level=logging.ERROR) # Get a list of ids of questions we're going to go change. We need # a list of ids so that we can feed it to the update, but then # also know what we need to update in the index. days_180 = datetime.now() - timedelta(days=180) q_ids = list( Question.objects.filter(is_archived=False).filter( created__lte=days_180).values_list("id", flat=True)) if q_ids: log.info("Updating %d questions", len(q_ids)) sql = """ UPDATE questions_question SET is_archived = 1 WHERE id IN (%s) """ % ",".join(map(str, q_ids)) cursor = connection.cursor() cursor.execute(sql) if not transaction.get_connection().in_atomic_block: transaction.commit() if settings.ES_LIVE_INDEXING: try: # So... the first time this runs, it'll handle 160K # questions or so which stresses everything. Thus we # do it in chunks because otherwise this won't work. # # After we've done this for the first time, we can nix # the chunking code. from kitsune.search.utils import chunked for chunk in chunked(q_ids, 100): # Fetch all the documents we need to update. es_docs = get_documents(QuestionMappingType, chunk) log.info("Updating %d index documents", len(es_docs)) documents = [] # For each document, update the data and stick it # back in the index. for doc in es_docs: doc["question_is_archived"] = True doc["indexed_on"] = int(time.time()) documents.append(doc) QuestionMappingType.bulk_index(documents) except ES_EXCEPTIONS: # Something happened with ES, so let's push index # updating into an index_task which retries when it # fails because of ES issues. index_task.delay(to_class_path(QuestionMappingType), q_ids)
def reindex_questions_answers(sender, instance, **kw): """When a question is saved, we need to reindex it's answers. This is needed because the solution may have changed.""" if instance.id: answer_ids = instance.answers.all().values_list("id", flat=True) index_task.delay(to_class_path(AnswerMetricsMappingType), list(answer_ids))
def update_question_vote_chunk(data): """Update num_votes_past_week for a number of questions.""" # First we recalculate num_votes_past_week in the db. log.info("Calculating past week votes for %s questions." % len(data)) ids = ",".join(map(str, data)) sql = (""" UPDATE questions_question q SET num_votes_past_week = ( SELECT COUNT(created) FROM questions_questionvote qv WHERE qv.question_id = q.id AND qv.created >= DATE(SUBDATE(NOW(), 7)) ) WHERE q.id IN (%s); """ % ids) cursor = connection.cursor() cursor.execute(sql) if not transaction.get_connection().in_atomic_block: transaction.commit() # Next we update our index with the changes we made directly in # the db. if data and settings.ES_LIVE_INDEXING: # Get the data we just updated from the database. sql = (""" SELECT id, num_votes_past_week FROM questions_question WHERE id in (%s); """ % ids) cursor = connection.cursor() cursor.execute(sql) # Since this returns (id, num_votes_past_week) tuples, we can # convert that directly to a dict. id_to_num = dict(cursor.fetchall()) try: # Fetch all the documents we need to update. from kitsune.questions.models import QuestionMappingType from kitsune.search import es_utils es_docs = es_utils.get_documents(QuestionMappingType, data) # For each document, update the data and stick it back in the # index. for doc in es_docs: # Note: Need to keep this in sync with # Question.extract_document. num = id_to_num[int(doc["id"])] doc["question_num_votes_past_week"] = num QuestionMappingType.index(doc, id_=doc["id"]) except ES_EXCEPTIONS: # Something happened with ES, so let's push index updating # into an index_task which retries when it fails because # of ES issues. index_task.delay(to_class_path(QuestionMappingType), list(id_to_num.keys()))
def reindex(mapping_type_names): """Reindex all instances of a given mapping type with celery tasks :arg mapping_type_names: list of mapping types to reindex """ outstanding = Record.objects.outstanding().count() if outstanding > 0: raise ReindexError('There are %s outstanding chunks.' % outstanding) batch_id = create_batch_id() # Break up all the things we want to index into chunks. This # chunkifies by class then by chunk size. chunks = [] for cls, indexable in get_indexable(mapping_types=mapping_type_names): chunks.extend( (cls, chunk) for chunk in chunked(indexable, CHUNK_SIZE)) for cls, id_list in chunks: index = cls.get_index() chunk_name = 'Indexing: %s %d -> %d' % ( cls.get_mapping_type_name(), id_list[0], id_list[-1]) rec = Record.objects.create(batch_id=batch_id, name=chunk_name) index_chunk_task.delay(index, batch_id, rec.id, (to_class_path(cls), id_list))
def handle(self, **options): index_task.delay(to_class_path(DocumentMappingType), DocumentMappingType.get_indexable())
def test_to_class_path(): eq_(to_class_path(FooBarClassOfAwesome), 'kitsune.search.tests.test_utils:FooBarClassOfAwesome')
def test_to_class_path(): eq_(to_class_path(FooBarClassOfAwesome), "kitsune.search.tests.test_utils:FooBarClassOfAwesome")
def unindex_later(self): """Register myself to be unindexed at the end of the request.""" _local_tasks().add( (unindex_task.delay, (to_class_path(self.get_mapping_type()), (self.pk,))) )