def unindex_task(cls, id_list, **kw): """Unindex documents specified by cls and ids""" statsd.incr('search.tasks.unindex_task.%s' % cls.get_mapping_type_name()) try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() for id_ in id_list: cls.unindex(id_) except Exception as exc: retries = unindex_task.request.retries if retries >= MAX_RETRIES: # Some exceptions aren't pickleable and we need this to # throw things that are pickleable. raise IndexingTaskError() statsd.incr('search.tasks.unindex_task.retry', 1) statsd.incr('search.tasks.unindex_task.retry%d' % RETRY_TIMES[retries], 1) unindex_task.retry(exc=exc, max_retries=MAX_RETRIES, countdown=RETRY_TIMES[retries]) finally: unpin_this_thread()
def render_document_cascade(base): """Given a document, render it and all documents that may be affected.""" # This walks along the graph of links between documents. If there is # a document A that includes another document B as a template, then # there is an edge from A to B in this graph. The goal here is to # process every node exactly once. This is robust to cycles and # diamonds in the graph, since it keeps track of what nodes have # been visited already. # In case any thing goes wrong, this guarantees we unpin the DB try: # Sends all writes to the master DB. Slaves are readonly. pin_this_thread() todo = set([base]) done = set() while todo: d = todo.pop() if d in done: # Don't process a node twice. continue d.html = d.parse_and_calculate_links() d.save() done.add(d) todo.update( l.linked_from for l in d.links_to().filter(kind__in=['template', 'include'])) finally: unpin_this_thread()
def render_document_cascade(base): """Given a document, render it and all documents that may be affected.""" # This walks along the graph of links between documents. If there is # a document A that includes another document B as a template, then # there is an edge from A to B in this graph. The goal here is to # process every node exactly once. This is robust to cycles and # diamonds in the graph, since it keeps track of what nodes have # been visited already. # In case any thing goes wrong, this guarantees we unpin the DB try: # Sends all writes to the master DB. Slaves are readonly. pin_this_thread() todo = set([base]) done = set() while todo: d = todo.pop() if d in done: # Don't process a node twice. continue d.html = d.parse_and_calculate_links() d.save() done.add(d) todo.update(l.linked_from for l in d.links_to() .filter(kind__in=['template', 'include'])) finally: unpin_this_thread()
def _rebuild_kb_chunk(data, **kwargs): """Re-render a chunk of documents.""" log.info('Rebuilding %s documents.' % len(data)) pin_this_thread() # Stick to master. messages = [] for pk in data: message = None try: document = Document.objects.get(pk=pk) document.html = document.current_revision.content_cleaned document.save() except Document.DoesNotExist: message = 'Missing document: %d' % pk except ValidationError as e: message = 'ValidationError for %d: %s' % (pk, e.messages[0]) except SlugCollision: message = 'SlugCollision: %d' % pk if message: log.debug(message) messages.append(message) if messages: subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' % settings.PLATFORM_NAME) mail_admins(subject=subject, message='\n'.join(messages)) transaction.commit_unless_managed() unpin_this_thread() # Not all tasks need to do use the master.
def index_chunk_task(index, batch_id, rec_id, chunk): """Index a chunk of things. :arg index: the name of the index to index to :arg batch_id: the name for the batch this chunk belongs to :arg rec_id: the id for the record for this task :arg chunk: a (class, id_list) of things to index """ cls, id_list = chunk try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() # Update record data. rec = Record.objects.get(pk=rec_id) rec.start_time = datetime.datetime.now() rec.message = u'Reindexing into %s' % index rec.status = Record.STATUS_IN_PROGRESS rec.save() index_chunk(cls, id_list, reraise=True) rec.mark_success() except Exception: rec.mark_fail(u'Errored out %s %s' % ( sys.exc_type, sys.exc_value)) raise finally: unpin_this_thread()
def log_answer(answer): pin_this_thread() creator = answer.creator created = answer.created question = answer.question users = [a.creator for a in question.answers.select_related('creator').exclude( creator=creator)] if question.creator != creator: users += [question.creator] users = set(users) # Remove duplicates. if users: action = Action.objects.create( creator=creator, created=created, url=answer.get_absolute_url(), content_object=answer, formatter_cls='questions.formatters.AnswerFormatter') action.users.add(*users) transaction.commit_unless_managed() # Record karma actions AnswerAction(answer.creator, answer.created.date()).save() if answer == answer.question.answers.order_by('created')[0]: FirstAnswerAction(answer.creator, answer.created.date()).save() unpin_this_thread()
def index_task(cls_path, id_list, **kw): """Index documents specified by cls and ids""" cls = from_class_path(cls_path) try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() qs = cls.get_model().objects.filter(pk__in=id_list).values_list( "pk", flat=True) for id_ in qs: try: cls.index(cls.extract_document(id_), id_=id_) except UnindexMeBro: # If extract_document throws this, then we need to # remove this item from the index. cls.unindex(id_) except Exception as exc: retries = index_task.request.retries if retries >= MAX_RETRIES: # Some exceptions aren't pickleable and we need this to # throw things that are pickleable. raise IndexingTaskError() index_task.retry(exc=exc, max_retries=MAX_RETRIES, countdown=RETRY_TIMES[retries]) finally: unpin_this_thread()
def _rebuild_kb_chunk(data): """Re-render a chunk of documents. Note: Don't use host components when making redirects to wiki pages; those redirects won't be auto-pruned when they're 404s. """ log.info('Rebuilding %s documents.' % len(data)) pin_this_thread() # Stick to master. messages = [] start = time.time() for pk in data: message = None try: document = Document.objects.get(pk=pk) # If we know a redirect link to be broken (i.e. if it looks like a # link to a document but the document isn't there), log an error: url = document.redirect_url() if (url and points_to_document_view(url) and not document.redirect_document()): log.warn('Invalid redirect document: %d' % pk) html = document.parse_and_calculate_links() if document.html != html: # We are calling update here to so we only update the html # column instead of all of them. This bypasses post_save # signal handlers like the one that triggers reindexing. # See bug 797038 and bug 797352. Document.objects.filter(pk=pk).update(html=html) statsd.incr('wiki.rebuild_chunk.change') else: statsd.incr('wiki.rebuild_chunk.nochange') except Document.DoesNotExist: message = 'Missing document: %d' % pk except Revision.DoesNotExist: message = 'Missing revision for document: %d' % pk except ValidationError as e: message = 'ValidationError for %d: %s' % (pk, e.messages[0]) except SlugCollision: message = 'SlugCollision: %d' % pk except TitleCollision: message = 'TitleCollision: %d' % pk if message: log.debug(message) messages.append(message) d = time.time() - start statsd.timing('wiki.rebuild_chunk', int(round(d * 1000))) if messages: subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' % settings.PLATFORM_NAME) mail_admins(subject=subject, message='\n'.join(messages)) if not transaction.get_connection().in_atomic_block: transaction.commit() unpin_this_thread() # Not all tasks need to do use the master.
def log_answer(answer): pin_this_thread() creator = answer.creator created = answer.created question = answer.question users = [ a.creator for a in question.answers.select_related('creator').exclude( creator=creator) ] if question.creator != creator: users += [question.creator] users = set(users) # Remove duplicates. if users: action = Action.objects.create( creator=creator, created=created, url=answer.get_absolute_url(), content_object=answer, formatter_cls='questions.formatters.AnswerFormatter') action.users.add(*users) transaction.commit_unless_managed() unpin_this_thread()
def _rebuild_kb_chunk(data, **kwargs): """Re-render a chunk of documents.""" log.info('Rebuilding %s documents.' % len(data)) pin_this_thread() # Stick to master. messages = [] for pk in data: message = None try: document = Document.objects.get(pk=pk) document.html = document.current_revision.content_cleaned document.save() except Document.DoesNotExist: message = 'Missing document: %d' % pk except ValidationError as e: message = 'ValidationError for %d: %s' % (pk, e.messages[0]) except SlugCollision: message = 'SlugCollision: %d' % pk except TitleCollision: message = 'TitleCollision: %d' % pk if message: log.debug(message) messages.append(message) if messages: subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' % settings.PLATFORM_NAME) mail_admins(subject=subject, message='\n'.join(messages)) transaction.commit_unless_managed() unpin_this_thread() # Not all tasks need to do use the master.
def index_chunk_task(index, batch_id, rec_id, chunk): """Index a chunk of things. :arg index: the name of the index to index to :arg batch_id: the name for the batch this chunk belongs to :arg rec_id: the id for the record for this task :arg chunk: a (cls_path, id_list) of things to index """ cls_path, id_list = chunk cls = from_class_path(cls_path) rec = None try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() # Update record data. rec = Record.objects.get(pk=rec_id) rec.start_time = datetime.datetime.now() rec.message = u'Reindexing into %s' % index rec.status = Record.STATUS_IN_PROGRESS rec.save() index_chunk(cls, id_list) rec.mark_success() except Exception: if rec is not None: rec.mark_fail(u'Errored out %s %s' % (sys.exc_type, sys.exc_value)) raise finally: unpin_this_thread()
def handle(self, **options): try: # Sends all writes to the master DB. Slaves are readonly. pin_this_thread() # Since we currently use MySQL, we have to load the whole table into memory # at once - iterator() won't chunk requests to MySQL. However, we can massively # reduce memory usage by only loading the columns we need: docs = Document.objects.all().values("id", "current_revision_id") for d in docs.iterator(): revs = Revision.objects.filter(document_id=d["id"], is_approved=True) revs = revs.order_by("-reviewed").values_list("id", flat=True)[:1] if len(revs): rev_id = revs[0] if d["current_revision_id"] != rev_id: doc = Document.objects.get(id=d["id"]) doc.current_revision_id = rev_id doc.save() print(doc.get_absolute_url()) finally: unpin_this_thread()
def test_pinning_encapsulation(self): """Check the pinning getters and setters.""" self.assertFalse(this_thread_is_pinned()) pin_this_thread() self.assertTrue(this_thread_is_pinned()) unpin_this_thread() self.assertFalse(this_thread_is_pinned())
def index_task(cls, id_list, **kw): """Index documents specified by cls and ids""" statsd.incr('search.tasks.index_task.%s' % cls.get_mapping_type_name()) try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() qs = cls.get_model().uncached.filter(id__in=id_list).values_list( 'id', flat=True) for id_ in qs: try: cls.index(cls.extract_document(id_), id_=id_) except UnindexMeBro: # If extract_document throws this, then we need to # remove this item from the index. cls.unindex(id_) except Exception as exc: retries = index_task.request.retries if retries >= MAX_RETRIES: # Some exceptions aren't pickleable and we need this to # throw things that are pickleable. raise IndexingTaskError() statsd.incr('search.tasks.index_task.retry', 1) statsd.incr('search.tasks.index_task.retry%d' % RETRY_TIMES[retries], 1) index_task.retry(exc=exc, max_retries=MAX_RETRIES, countdown=RETRY_TIMES[retries]) finally: unpin_this_thread()
def index_task(cls, id_list, **kw): """Index documents specified by cls and ids""" statsd.incr('search.tasks.index_task.%s' % cls.get_mapping_type_name()) try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() qs = cls.get_model().uncached.filter(pk__in=id_list).values_list( 'pk', flat=True) for id_ in qs: try: cls.index(cls.extract_document(id_), id_=id_) except UnindexMeBro: # If extract_document throws this, then we need to # remove this item from the index. cls.unindex(id_) except Exception as exc: retries = index_task.request.retries if retries >= MAX_RETRIES: # Some exceptions aren't pickleable and we need this to # throw things that are pickleable. raise IndexingTaskError() statsd.incr('search.tasks.index_task.retry', 1) statsd.incr('search.tasks.index_task.retry%d' % RETRY_TIMES[retries], 1) index_task.retry(exc=exc, max_retries=MAX_RETRIES, countdown=RETRY_TIMES[retries]) finally: unpin_this_thread()
def test_slave_context_manager_exception(self): pin_this_thread() self.assertTrue(this_thread_is_pinned()) with self.assertRaises(ValueError): with use_slave: self.assertFalse(this_thread_is_pinned()) raise ValueError self.assertTrue(this_thread_is_pinned())
def middleware(request): if (request.method == 'POST' or request.path.startswith('/admin/') or request.path.startswith('/accounts/') or '/edit' in request.path): pin_this_thread() else: unpin_this_thread() return get_response(request)
def thread2_worker(): pin_this_thread() with use_primary_db: orchestrator.release() thread2_lock.acquire() pinned[2] = this_thread_is_pinned() orchestrator.release()
def test_decorator_resets(self): @use_primary_db def check(): assert this_thread_is_pinned() pin_this_thread() assert this_thread_is_pinned() check() assert this_thread_is_pinned()
def process_request(self, request): """Set the thread's pinning flag according to the presence of the incoming cookie.""" if PINNING_COOKIE in request.COOKIES or request.method not in READ_ONLY_METHODS: pin_this_thread() else: # In case the last request this thread served was pinned: unpin_this_thread()
def test_decorator_resets(self): @use_master def check(): assert this_thread_is_pinned() pin_this_thread() assert this_thread_is_pinned() check() assert this_thread_is_pinned()
def process_request(self, request): """Set the thread's pinning flag according to the presence of the incoming cookie.""" if (PINNING_COOKIE in request.COOKIES or request.method not in READ_ONLY_METHODS): pin_this_thread() else: # In case the last request this thread served was pinned: unpin_this_thread()
def test_pinned_reads(self): """Test PinningMasterSlaveRouter.db_for_read() when pinned and when not.""" router = PinningMasterSlaveRouter() eq_(router.db_for_read(TestModel), get_slave()) pin_this_thread() eq_(router.db_for_read(TestModel), MASTER_DATABASE)
def test_pinned_reads(self): """Test PinningMasterSlaveRouter.db_for_read() when pinned and when not.""" router = PinningMasterSlaveRouter() eq_(router.db_for_read(None), get_slave()) pin_this_thread() eq_(router.db_for_read(None), DEFAULT_DB_ALIAS)
def test_pinning_encapsulation(self): """Check the pinning getters and setters.""" assert not this_thread_is_pinned(), "Thread started out pinned or this_thread_is_pinned() is broken." pin_this_thread() assert this_thread_is_pinned(), "pin_this_thread() didn't pin the thread." unpin_this_thread() assert not this_thread_is_pinned(), "Thread remained pinned after unpin_this_thread()."
def test_decorator_resets(self): @use_master def check(): self.assertTrue(this_thread_is_pinned()) pin_this_thread() self.assertTrue(this_thread_is_pinned()) check() self.assertTrue(this_thread_is_pinned())
def test_slave_decorator(self): @use_slave def check(): self.assertFalse(this_thread_is_pinned()) pin_this_thread() self.assertTrue(this_thread_is_pinned()) check() self.assertTrue(this_thread_is_pinned())
def process_request(self, request): if not getattr(request, 'API', False): return super(APIPinningMiddleware, self).process_request(request) if request.amo_user and not request.amo_user.is_anonymous(): statsd.incr('api.db.pinned') pin_this_thread() return statsd.incr('api.db.unpinned') unpin_this_thread()
def test_pinning_encapsulation(self): """Check the pinning getters and setters.""" assert not this_thread_is_pinned(), \ "Thread started out pinned or this_thread_is_pinned() is broken." pin_this_thread() assert this_thread_is_pinned(), \ "pin_this_thread() didn't pin the thread." unpin_this_thread() assert not this_thread_is_pinned(), \ "Thread remained pinned after unpin_this_thread()."
def process_request(self, request): if not getattr(request, 'API', False): return super(APIPinningMiddleware, self).process_request(request) if (request.amo_user and not request.amo_user.is_anonymous() and (cache.get(self.cache_key(request)) or request.method in ['DELETE', 'PATCH', 'POST', 'PUT'])): statsd.incr('api.db.pinned') pin_this_thread() return statsd.incr('api.db.unpinned') unpin_this_thread()
def process_request(self, request): if not getattr(request, 'API', False): return super(APIPinningMiddleware, self).process_request(request) if (request.user and not request.user.is_anonymous() and (cache.get(self.cache_key(request)) or request.method in ['DELETE', 'PATCH', 'POST', 'PUT'])): statsd.incr('api.db.pinned') pin_this_thread() return statsd.incr('api.db.unpinned') unpin_this_thread()
def index_chunk_task(write_index, batch_id, chunk): """Index a chunk of things. :arg write_index: the name of the index to index to :arg batch_id: the name for the batch this chunk belongs to :arg chunk: a (class, id_list) of things to index """ # Need to import Record here to prevent circular import from kitsune.search.models import Record cls, id_list = chunk task_name = '{0} {1} -> {2}'.format(cls.get_mapping_type_name(), id_list[0], id_list[-1]) rec = Record.objects.create( starttime=datetime.datetime.now(), text=u'Batch: {0} Task: {1}: Reindexing into {2}'.format( batch_id, task_name, write_index)) try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() index_chunk(cls, id_list, reraise=True) except Exception: rec.text = u'{0}: Errored out {1} {2}'.format( rec.text, sys.exc_type, sys.exc_value)[:255] # Truncate at 255 chars. log.exception('Error while indexing a chunk') # Some exceptions aren't pickleable and we need this to throw # things that are pickleable. raise IndexingTaskError() finally: unpin_this_thread() rec.endtime = datetime.datetime.now() rec.save() try: client = redis_client('default') client.decr(OUTSTANDING_INDEX_CHUNKS, 1) except RedisError: # If Redis isn't running, then we just log that the task # was completed. log.info('Index task %s completed.', task_name)
def migrate_helpfulvotes(start_id, end_id): """Transfer helpfulvotes from old to new version.""" if not waffle.switch_is_active('migrate-helpfulvotes'): raise # Celery emails the failed IDs so we know to which to rerun. start = time.time() pin_this_thread() # Pin to master transaction.enter_transaction_management() transaction.managed(True) try: cursor = connection.cursor() cursor.execute("""INSERT INTO `wiki_helpfulvote` (revision_id, helpful, created, creator_id, anonymous_id, user_agent) SELECT COALESCE( (SELECT id FROM `wiki_revision` WHERE `document_id` = wiki_helpfulvoteold.document_id AND `is_approved`=1 AND (`reviewed` <= wiki_helpfulvoteold.created OR `reviewed` IS NULL) ORDER BY CASE WHEN `reviewed` IS NULL THEN 1 ELSE 0 END, `wiki_revision`.`created` DESC LIMIT 1), (SELECT id FROM `wiki_revision` WHERE `document_id` = wiki_helpfulvoteold.document_id AND (`reviewed` <= wiki_helpfulvoteold.created OR `reviewed` IS NULL) ORDER BY CASE WHEN `reviewed` IS NULL THEN 1 ELSE 0 END, `wiki_revision`.`created` DESC LIMIT 1), (SELECT id FROM `wiki_revision` WHERE `document_id` = wiki_helpfulvoteold.document_id ORDER BY `created` ASC LIMIT 1)), helpful, created, creator_id, anonymous_id, user_agent FROM `wiki_helpfulvoteold` WHERE id >= %s AND id < %s""", [start_id, end_id]) transaction.commit() except: transaction.rollback() raise transaction.leave_transaction_management() unpin_this_thread() d = time.time() - start statsd.timing('wiki.migrate_helpfulvotes', int(round(d * 1000)))
def _rebuild_kb_chunk(data, **kwargs): """Re-render a chunk of documents. Note: Don't use host components when making redirects to wiki pages; those redirects won't be auto-pruned when they're 404s. """ log.info('Rebuilding %s documents.' % len(data)) pin_this_thread() # Stick to master. messages = [] start = time.time() for pk in data: message = None try: document = Document.objects.get(pk=pk) # If we know a redirect link to be broken (i.e. if it looks like a # link to a document but the document isn't there), log an error: url = document.redirect_url() if (url and points_to_document_view(url) and not document.redirect_document()): log.error('Invalid redirect document: %d' % pk) document.html = document.current_revision.content_parsed document.save() except Document.DoesNotExist: message = 'Missing document: %d' % pk except ValidationError as e: message = 'ValidationError for %d: %s' % (pk, e.messages[0]) except SlugCollision: message = 'SlugCollision: %d' % pk except TitleCollision: message = 'TitleCollision: %d' % pk if message: log.debug(message) messages.append(message) d = time.time() - start statsd.timing('wiki.rebuild_chunk', int(round(d * 1000))) if messages: subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' % settings.PLATFORM_NAME) mail_admins(subject=subject, message='\n'.join(messages)) transaction.commit_unless_managed() unpin_this_thread() # Not all tasks need to do use the master.
def _rebuild_kb_chunk(data, **kwargs): """Re-render a chunk of documents. Note: Don't use host components when making redirects to wiki pages; those redirects won't be auto-pruned when they're 404s. """ log.info('Rebuilding %s documents.' % len(data)) pin_this_thread() # Stick to master. messages = [] start = time.time() for pk in data: message = None try: document = Document.objects.get(pk=pk) # If we know a redirect link to be broken (i.e. if it looks like a # link to a document but the document isn't there), delete it: url = document.redirect_url() if (url and points_to_document_view(url) and not document.redirect_document()): document.delete() else: document.html = document.current_revision.content_parsed document.save() except Document.DoesNotExist: message = 'Missing document: %d' % pk except ValidationError as e: message = 'ValidationError for %d: %s' % (pk, e.messages[0]) except SlugCollision: message = 'SlugCollision: %d' % pk except TitleCollision: message = 'TitleCollision: %d' % pk if message: log.debug(message) messages.append(message) d = time.time() - start statsd.timing('wiki.rebuild_chunk', int(round(d * 1000))) if messages: subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' % settings.PLATFORM_NAME) mail_admins(subject=subject, message='\n'.join(messages)) transaction.commit_unless_managed() unpin_this_thread() # Not all tasks need to do use the master.
def index_chunk_task(write_index, batch_id, chunk): """Index a chunk of things. :arg write_index: the name of the index to index to :arg batch_id: the name for the batch this chunk belongs to :arg chunk: a (class, id_list) of things to index """ # Need to import Record here to prevent circular import from kitsune.search.models import Record cls, id_list = chunk task_name = '{0} {1} -> {2}'.format( cls.get_mapping_type_name(), id_list[0], id_list[-1]) rec = Record.objects.create( starttime=datetime.datetime.now(), text=u'Batch: {0} Task: {1}: Reindexing into {2}'.format( batch_id, task_name, write_index)) try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() index_chunk(cls, id_list, reraise=True) except Exception: rec.text = u'{0}: Errored out {1} {2}'.format( rec.text, sys.exc_type, sys.exc_value)[:255] # Truncate at 255 chars. log.exception('Error while indexing a chunk') # Some exceptions aren't pickleable and we need this to throw # things that are pickleable. raise IndexingTaskError() finally: unpin_this_thread() rec.endtime = datetime.datetime.now() rec.save() try: client = redis_client('default') client.decr(OUTSTANDING_INDEX_CHUNKS, 1) except RedisError: # If Redis isn't running, then we just log that the task # was completed. log.info('Index task %s completed.', task_name)
def log_answer(answer): pin_this_thread() # Record karma actions AnswerAction(answer.creator, answer.created.date()).save() try: from kitsune.questions.models import Answer answers = Answer.uncached.filter(question=answer.question_id) if answer == answers.order_by('created')[0]: FirstAnswerAction(answer.creator, answer.created.date()).save() except IndexError: # If we hit an IndexError, we assume this is the first answer. FirstAnswerAction(answer.creator, answer.created.date()).save() unpin_this_thread()
def process_request(self, request): if not getattr(request, "API", False): return super(APIPinningMiddleware, self).process_request(request) if ( request.user and not request.user.is_anonymous() and (cache.get(self.cache_key(request)) or request.method in ["DELETE", "PATCH", "POST", "PUT"]) ): statsd.incr("api.db.pinned") pin_this_thread() return statsd.incr("api.db.unpinned") unpin_this_thread()
def add_short_links(doc_ids): """Create short_url's for a list of docs.""" base_url = "https://{0}%s".format(Site.objects.get_current().domain) docs = Document.objects.filter(id__in=doc_ids) try: pin_this_thread() # Stick to master. for doc in docs: # Use django's reverse so the locale isn't included. endpoint = django_reverse("wiki.document", args=[doc.slug]) doc.update(share_link=generate_short_url(base_url % endpoint)) except BitlyRateLimitException: # The next run of the `generate_missing_share_links` cron job will # catch all documents that were unable to be processed. pass finally: unpin_this_thread()
def update_question_votes(question_id): from kitsune.questions.models import Question log.debug("Got a new QuestionVote for question_id=%s." % question_id) # Pin to master db to avoid lag delay issues. pin_this_thread() try: q = Question.objects.get(id=question_id) q.sync_num_votes_past_week() q.save(force_update=True) except Question.DoesNotExist: log.info("Question id=%s deleted before task." % question_id) unpin_this_thread()
def update_question_votes(question_id): from kitsune.questions.models import Question log.debug('Got a new QuestionVote for question_id=%s.' % question_id) statsd.incr('questions.tasks.update') # Pin to master db to avoid lag delay issues. pin_this_thread() try: q = Question.uncached.get(id=question_id) q.sync_num_votes_past_week() q.save(force_update=True) except Question.DoesNotExist: log.info('Question id=%s deleted before task.' % question_id) unpin_this_thread()
def add_short_links(doc_ids): """Create short_url's for a list of docs.""" base_url = 'https://{0}%s'.format(Site.objects.get_current().domain) docs = Document.objects.filter(id__in=doc_ids) try: pin_this_thread() # Stick to master. for doc in docs: endpoint = reverse('wiki.document', locale=doc.locale, args=[doc.slug]) doc.share_link = generate_short_url(base_url % endpoint) doc.save() except BitlyRateLimitException: # The next run of the `generate_missing_share_links` cron job will # catch all documents that were unable to be processed. pass finally: unpin_this_thread()
def add_short_links(doc_ids): """Create short_url's for a list of docs.""" base_url = 'https://{0}%s'.format(Site.objects.get_current().domain) docs = Document.objects.filter(id__in=doc_ids) try: pin_this_thread() # Stick to master. for doc in docs: # Use django's reverse so the locale isn't included. endpoint = django_reverse('wiki.document', args=[doc.slug]) doc.update(share_link=generate_short_url(base_url % endpoint)) statsd.incr('wiki.add_short_links.success') except BitlyRateLimitException: # The next run of the `generate_missing_share_links` cron job will # catch all documents that were unable to be processed. statsd.incr('wiki.add_short_links.rate_limited') pass finally: unpin_this_thread()
def index_chunk_task(write_index, batch_id, chunk): """Index a chunk of things. :arg write_index: the name of the index to index to :arg batch_id: the name for the batch this chunk belongs to :arg chunk: a (class, id_list) of things to index """ # Need to import Record here to prevent circular import from search.models import Record cls, id_list = chunk task_name = '%s %d -> %d' % (cls.get_model_name(), id_list[0], id_list[-1]) rec = Record( starttime=datetime.datetime.now(), text=(u'Batch: %s Task: %s: Reindexing into %s' % ( batch_id, task_name, write_index))) rec.save() try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() index_chunk(cls, id_list, reraise=True) except Exception: rec.text = (u'%s: Errored out %s %s' % ( rec.text, sys.exc_type, sys.exc_value)) raise finally: unpin_this_thread() rec.endtime = datetime.datetime.now() rec.save() try: client = redis_client('default') client.decr(OUTSTANDING_INDEX_CHUNKS, 1) except RedisError: # If Redis isn't running, then we just log that the task # was completed. log.info('Index task %s completed.', task_name)
def purge_tweets(): """Periodically purge old tweets for each locale. This does a lot of DELETEs on master, so it shouldn't run too frequently. Probably once every hour or more. """ # Pin to master pin_this_thread() # Build list of tweets to delete, by id. for locale in settings.SUMO_LANGUAGES: locale = settings.LOCALES[locale].iso639_1 # Some locales don't have an iso639_1 code, too bad for them. if not locale: continue oldest = _get_oldest_tweet(locale, settings.CC_MAX_TWEETS) if oldest: log.debug("Truncating tweet list: Removing tweets older than %s, " "for [%s]." % (oldest.created, locale)) Tweet.objects.filter(locale=locale, created__lte=oldest.created).delete()
def handle(self, **options): """ This does a lot of DELETEs on master, so it shouldn't run too frequently. Probably once every hour or more. """ # Pin to master pin_this_thread() # Build list of tweets to delete, by id. for locale in settings.SUMO_LANGUAGES: locale = settings.LOCALES[locale].iso639_1 # Some locales don't have an iso639_1 code, too bad for them. if not locale: continue oldest = _get_oldest_tweet(locale, settings.CC_MAX_TWEETS) if oldest: log.debug( 'Truncating tweet list: Removing tweets older than %s, for [%s].' % (oldest.created, locale)) Tweet.objects.filter(locale=locale, created__lte=oldest.created).delete()
def handle(self, **options): try: # Sends all writes to the master DB. Slaves are readonly. pin_this_thread() docs = Document.objects.all() for d in docs: revs = Revision.objects.filter(document=d, is_approved=True) revs = list(revs.order_by('-reviewed')[:1]) if len(revs): rev = revs[0] if d.current_revision != rev: d.current_revision = rev d.save() print(d.get_absolute_url()) finally: unpin_this_thread()
def index_chunk_task(write_index, batch_id, chunk): """Index a chunk of things. :arg write_index: the name of the index to index to :arg batch_id: the name for the batch this chunk belongs to :arg chunk: a (class, id_list) of things to index """ # Need to import Record here to prevent circular import from search.models import Record cls, id_list = chunk task_name = '%s %d -> %d' % (cls.get_model_name(), id_list[0], id_list[-1]) rec = Record(starttime=datetime.datetime.now(), text=(u'Batch: %s Task: %s: Reindexing into %s' % (batch_id, task_name, write_index))) rec.save() try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() index_chunk(cls, id_list, reraise=True) except Exception: rec.text = (u'%s: Errored out %s %s' % (rec.text, sys.exc_type, sys.exc_value)) raise finally: unpin_this_thread() rec.endtime = datetime.datetime.now() rec.save() try: client = redis_client('default') client.decr(OUTSTANDING_INDEX_CHUNKS, 1) except RedisError: # If Redis isn't running, then we just log that the task # was completed. log.info('Index task %s completed.', task_name)
def index_chunk_task(write_index, batch_id, rec_id, chunk): """Index a chunk of things. :arg write_index: the name of the index to index to :arg batch_id: the name for the batch this chunk belongs to :arg rec_id: the id for the record for this task :arg chunk: a (class, id_list) of things to index """ cls_path, id_list = chunk cls = from_class_path(cls_path) rec = None # Need to import Record here to prevent circular import from kitsune.search.models import Record try: # Pin to master db to avoid replication lag issues and stale data. pin_this_thread() # Update record data. rec = Record.objects.get(pk=rec_id) rec.start_time = datetime.datetime.now() rec.message = "Reindexing into %s" % write_index rec.status = Record.STATUS_IN_PROGRESS rec.save() index_chunk(cls, id_list, reraise=True) rec.mark_success() except Exception: if rec is not None: rec.mark_fail("Errored out %s %s" % (sys.exc_info()[0], sys.exc_info()[1])) log.exception("Error while indexing a chunk") # Some exceptions aren't pickleable and we need this to throw # things that are pickleable. raise IndexingTaskError() finally: unpin_this_thread()
def unindex_task(cls_path, id_list, **kw): """Unindex documents specified by cls and ids""" cls = from_class_path(cls_path) try: # Pin to master db to avoid replication lag issues and stale # data. pin_this_thread() for id_ in id_list: cls.unindex(id_) except Exception as exc: retries = unindex_task.request.retries if retries >= MAX_RETRIES: # Some exceptions aren't pickleable and we need this to # throw things that are pickleable. raise IndexingTaskError() unindex_task.retry(exc=exc, max_retries=MAX_RETRIES, countdown=RETRY_TIMES[retries]) finally: unpin_this_thread()
def log_reply(post): pin_this_thread() creator = post.author # TODO: Rename post.author to post.creator. created = post.created thread = post.thread users = [p.author for p in thread.post_set.select_related('author').exclude(author=creator)] users = set(users) # Remove duplicates. if users: action = Action.objects.create( creator=creator, created=created, url=post.get_absolute_url(), content_object=post, formatter_cls='forums.formatters.ForumReplyFormatter') action.users.add(*users) transaction.commit_unless_managed() unpin_this_thread()