Пример #1
0
def unindex_task(cls, id_list, **kw):
    """Unindex documents specified by cls and ids"""
    statsd.incr('search.tasks.unindex_task.%s' % cls.get_mapping_type_name())
    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()
        for id_ in id_list:
            cls.unindex(id_)
    except Exception as exc:
        retries = unindex_task.request.retries
        if retries >= MAX_RETRIES:
            # Some exceptions aren't pickleable and we need this to
            # throw things that are pickleable.
            raise IndexingTaskError()

        statsd.incr('search.tasks.unindex_task.retry', 1)
        statsd.incr('search.tasks.unindex_task.retry%d' % RETRY_TIMES[retries],
                    1)

        unindex_task.retry(exc=exc,
                           max_retries=MAX_RETRIES,
                           countdown=RETRY_TIMES[retries])
    finally:
        unpin_this_thread()
Пример #2
0
def render_document_cascade(base):
    """Given a document, render it and all documents that may be affected."""

    # This walks along the graph of links between documents. If there is
    # a document A that includes another document B as a template, then
    # there is an edge from A to B in this graph. The goal here is to
    # process every node exactly once. This is robust to cycles and
    # diamonds in the graph, since it keeps track of what nodes have
    # been visited already.

    # In case any thing goes wrong, this guarantees we unpin the DB
    try:
        # Sends all writes to the master DB. Slaves are readonly.
        pin_this_thread()

        todo = set([base])
        done = set()

        while todo:
            d = todo.pop()
            if d in done:
                # Don't process a node twice.
                continue
            d.html = d.parse_and_calculate_links()
            d.save()
            done.add(d)
            todo.update(
                l.linked_from
                for l in d.links_to().filter(kind__in=['template', 'include']))

    finally:
        unpin_this_thread()
Пример #3
0
def render_document_cascade(base):
    """Given a document, render it and all documents that may be affected."""

    # This walks along the graph of links between documents. If there is
    # a document A that includes another document B as a template, then
    # there is an edge from A to B in this graph. The goal here is to
    # process every node exactly once. This is robust to cycles and
    # diamonds in the graph, since it keeps track of what nodes have
    # been visited already.

    # In case any thing goes wrong, this guarantees we unpin the DB
    try:
        # Sends all writes to the master DB. Slaves are readonly.
        pin_this_thread()

        todo = set([base])
        done = set()

        while todo:
            d = todo.pop()
            if d in done:
                # Don't process a node twice.
                continue
            d.html = d.parse_and_calculate_links()
            d.save()
            done.add(d)
            todo.update(l.linked_from for l in d.links_to()
                        .filter(kind__in=['template', 'include']))

    finally:
        unpin_this_thread()
Пример #4
0
def _rebuild_kb_chunk(data, **kwargs):
    """Re-render a chunk of documents."""
    log.info('Rebuilding %s documents.' % len(data))

    pin_this_thread()  # Stick to master.

    messages = []
    for pk in data:
        message = None
        try:
            document = Document.objects.get(pk=pk)
            document.html = document.current_revision.content_cleaned
            document.save()
        except Document.DoesNotExist:
            message = 'Missing document: %d' % pk
        except ValidationError as e:
            message = 'ValidationError for %d: %s' % (pk, e.messages[0])
        except SlugCollision:
            message = 'SlugCollision: %d' % pk

        if message:
            log.debug(message)
            messages.append(message)

    if messages:
        subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' %
                   settings.PLATFORM_NAME)
        mail_admins(subject=subject, message='\n'.join(messages))
    transaction.commit_unless_managed()

    unpin_this_thread()  # Not all tasks need to do use the master.
Пример #5
0
def index_chunk_task(index, batch_id, rec_id, chunk):
    """Index a chunk of things.

    :arg index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg rec_id: the id for the record for this task
    :arg chunk: a (class, id_list) of things to index
    """
    cls, id_list = chunk

    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()

        # Update record data.
        rec = Record.objects.get(pk=rec_id)
        rec.start_time = datetime.datetime.now()
        rec.message = u'Reindexing into %s' % index
        rec.status = Record.STATUS_IN_PROGRESS
        rec.save()

        index_chunk(cls, id_list, reraise=True)

        rec.mark_success()

    except Exception:
        rec.mark_fail(u'Errored out %s %s' % (
                sys.exc_type, sys.exc_value))
        raise

    finally:
        unpin_this_thread()
Пример #6
0
def log_answer(answer):
    pin_this_thread()

    creator = answer.creator
    created = answer.created
    question = answer.question
    users = [a.creator for a in
             question.answers.select_related('creator').exclude(
                creator=creator)]
    if question.creator != creator:
        users += [question.creator]
    users = set(users)  # Remove duplicates.

    if users:
        action = Action.objects.create(
            creator=creator,
            created=created,
            url=answer.get_absolute_url(),
            content_object=answer,
            formatter_cls='questions.formatters.AnswerFormatter')
        action.users.add(*users)

    transaction.commit_unless_managed()

    # Record karma actions
    AnswerAction(answer.creator, answer.created.date()).save()
    if answer == answer.question.answers.order_by('created')[0]:
        FirstAnswerAction(answer.creator, answer.created.date()).save()

    unpin_this_thread()
Пример #7
0
def index_task(cls_path, id_list, **kw):
    """Index documents specified by cls and ids"""
    cls = from_class_path(cls_path)
    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()

        qs = cls.get_model().objects.filter(pk__in=id_list).values_list(
            "pk", flat=True)
        for id_ in qs:
            try:
                cls.index(cls.extract_document(id_), id_=id_)
            except UnindexMeBro:
                # If extract_document throws this, then we need to
                # remove this item from the index.
                cls.unindex(id_)

    except Exception as exc:
        retries = index_task.request.retries
        if retries >= MAX_RETRIES:
            # Some exceptions aren't pickleable and we need this to
            # throw things that are pickleable.
            raise IndexingTaskError()

        index_task.retry(exc=exc,
                         max_retries=MAX_RETRIES,
                         countdown=RETRY_TIMES[retries])
    finally:
        unpin_this_thread()
Пример #8
0
def _rebuild_kb_chunk(data):
    """Re-render a chunk of documents.

    Note: Don't use host components when making redirects to wiki pages; those
    redirects won't be auto-pruned when they're 404s.

    """
    log.info('Rebuilding %s documents.' % len(data))

    pin_this_thread()  # Stick to master.

    messages = []
    start = time.time()
    for pk in data:
        message = None
        try:
            document = Document.objects.get(pk=pk)

            # If we know a redirect link to be broken (i.e. if it looks like a
            # link to a document but the document isn't there), log an error:
            url = document.redirect_url()
            if (url and points_to_document_view(url) and
                    not document.redirect_document()):
                log.warn('Invalid redirect document: %d' % pk)

            html = document.parse_and_calculate_links()
            if document.html != html:
                # We are calling update here to so we only update the html
                # column instead of all of them. This bypasses post_save
                # signal handlers like the one that triggers reindexing.
                # See bug 797038 and bug 797352.
                Document.objects.filter(pk=pk).update(html=html)
                statsd.incr('wiki.rebuild_chunk.change')
            else:
                statsd.incr('wiki.rebuild_chunk.nochange')
        except Document.DoesNotExist:
            message = 'Missing document: %d' % pk
        except Revision.DoesNotExist:
            message = 'Missing revision for document: %d' % pk
        except ValidationError as e:
            message = 'ValidationError for %d: %s' % (pk, e.messages[0])
        except SlugCollision:
            message = 'SlugCollision: %d' % pk
        except TitleCollision:
            message = 'TitleCollision: %d' % pk

        if message:
            log.debug(message)
            messages.append(message)
    d = time.time() - start
    statsd.timing('wiki.rebuild_chunk', int(round(d * 1000)))

    if messages:
        subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' %
                   settings.PLATFORM_NAME)
        mail_admins(subject=subject, message='\n'.join(messages))
    if not transaction.get_connection().in_atomic_block:
        transaction.commit()

    unpin_this_thread()  # Not all tasks need to do use the master.
Пример #9
0
def log_answer(answer):
    pin_this_thread()

    creator = answer.creator
    created = answer.created
    question = answer.question
    users = [
        a.creator for a in question.answers.select_related('creator').exclude(
            creator=creator)
    ]
    if question.creator != creator:
        users += [question.creator]
    users = set(users)  # Remove duplicates.

    if users:
        action = Action.objects.create(
            creator=creator,
            created=created,
            url=answer.get_absolute_url(),
            content_object=answer,
            formatter_cls='questions.formatters.AnswerFormatter')
        action.users.add(*users)

    transaction.commit_unless_managed()
    unpin_this_thread()
Пример #10
0
def _rebuild_kb_chunk(data, **kwargs):
    """Re-render a chunk of documents."""
    log.info('Rebuilding %s documents.' % len(data))

    pin_this_thread()  # Stick to master.

    messages = []
    for pk in data:
        message = None
        try:
            document = Document.objects.get(pk=pk)
            document.html = document.current_revision.content_cleaned
            document.save()
        except Document.DoesNotExist:
            message = 'Missing document: %d' % pk
        except ValidationError as e:
            message = 'ValidationError for %d: %s' % (pk, e.messages[0])
        except SlugCollision:
            message = 'SlugCollision: %d' % pk
        except TitleCollision:
            message = 'TitleCollision: %d' % pk

        if message:
            log.debug(message)
            messages.append(message)

    if messages:
        subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' %
                   settings.PLATFORM_NAME)
        mail_admins(subject=subject, message='\n'.join(messages))
    transaction.commit_unless_managed()

    unpin_this_thread()  # Not all tasks need to do use the master.
Пример #11
0
def index_chunk_task(index, batch_id, rec_id, chunk):
    """Index a chunk of things.

    :arg index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg rec_id: the id for the record for this task
    :arg chunk: a (cls_path, id_list) of things to index
    """
    cls_path, id_list = chunk
    cls = from_class_path(cls_path)
    rec = None

    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()

        # Update record data.
        rec = Record.objects.get(pk=rec_id)
        rec.start_time = datetime.datetime.now()
        rec.message = u'Reindexing into %s' % index
        rec.status = Record.STATUS_IN_PROGRESS
        rec.save()

        index_chunk(cls, id_list)

        rec.mark_success()

    except Exception:
        if rec is not None:
            rec.mark_fail(u'Errored out %s %s' % (sys.exc_type, sys.exc_value))
        raise

    finally:
        unpin_this_thread()
Пример #12
0
    def handle(self, **options):
        try:
            # Sends all writes to the master DB. Slaves are readonly.
            pin_this_thread()

            # Since we currently use MySQL, we have to load the whole table into memory
            # at once - iterator() won't chunk requests to MySQL. However, we can massively
            # reduce memory usage by only loading the columns we need:
            docs = Document.objects.all().values("id", "current_revision_id")

            for d in docs.iterator():
                revs = Revision.objects.filter(document_id=d["id"],
                                               is_approved=True)
                revs = revs.order_by("-reviewed").values_list("id",
                                                              flat=True)[:1]

                if len(revs):
                    rev_id = revs[0]

                    if d["current_revision_id"] != rev_id:
                        doc = Document.objects.get(id=d["id"])
                        doc.current_revision_id = rev_id
                        doc.save()
                        print(doc.get_absolute_url())
        finally:
            unpin_this_thread()
Пример #13
0
 def test_pinning_encapsulation(self):
     """Check the pinning getters and setters."""
     self.assertFalse(this_thread_is_pinned())
     pin_this_thread()
     self.assertTrue(this_thread_is_pinned())
     unpin_this_thread()
     self.assertFalse(this_thread_is_pinned())
Пример #14
0
def index_task(cls, id_list, **kw):
    """Index documents specified by cls and ids"""
    statsd.incr('search.tasks.index_task.%s' % cls.get_mapping_type_name())
    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()

        qs = cls.get_model().uncached.filter(id__in=id_list).values_list(
            'id', flat=True)
        for id_ in qs:
            try:
                cls.index(cls.extract_document(id_), id_=id_)
            except UnindexMeBro:
                # If extract_document throws this, then we need to
                # remove this item from the index.
                cls.unindex(id_)

    except Exception as exc:
        retries = index_task.request.retries
        if retries >= MAX_RETRIES:
            # Some exceptions aren't pickleable and we need this to
            # throw things that are pickleable.
            raise IndexingTaskError()

        statsd.incr('search.tasks.index_task.retry', 1)
        statsd.incr('search.tasks.index_task.retry%d' % RETRY_TIMES[retries],
                    1)

        index_task.retry(exc=exc, max_retries=MAX_RETRIES,
                         countdown=RETRY_TIMES[retries])
    finally:
        unpin_this_thread()
Пример #15
0
def index_task(cls, id_list, **kw):
    """Index documents specified by cls and ids"""
    statsd.incr('search.tasks.index_task.%s' % cls.get_mapping_type_name())
    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()

        qs = cls.get_model().uncached.filter(pk__in=id_list).values_list(
            'pk', flat=True)
        for id_ in qs:
            try:
                cls.index(cls.extract_document(id_), id_=id_)
            except UnindexMeBro:
                # If extract_document throws this, then we need to
                # remove this item from the index.
                cls.unindex(id_)

    except Exception as exc:
        retries = index_task.request.retries
        if retries >= MAX_RETRIES:
            # Some exceptions aren't pickleable and we need this to
            # throw things that are pickleable.
            raise IndexingTaskError()

        statsd.incr('search.tasks.index_task.retry', 1)
        statsd.incr('search.tasks.index_task.retry%d' % RETRY_TIMES[retries],
                    1)

        index_task.retry(exc=exc,
                         max_retries=MAX_RETRIES,
                         countdown=RETRY_TIMES[retries])
    finally:
        unpin_this_thread()
Пример #16
0
 def test_slave_context_manager_exception(self):
     pin_this_thread()
     self.assertTrue(this_thread_is_pinned())
     with self.assertRaises(ValueError):
         with use_slave:
             self.assertFalse(this_thread_is_pinned())
             raise ValueError
     self.assertTrue(this_thread_is_pinned())
Пример #17
0
 def middleware(request):
     if (request.method == 'POST' or request.path.startswith('/admin/')
             or request.path.startswith('/accounts/')
             or '/edit' in request.path):
         pin_this_thread()
     else:
         unpin_this_thread()
     return get_response(request)
Пример #18
0
        def thread2_worker():
            pin_this_thread()
            with use_primary_db:
                orchestrator.release()
                thread2_lock.acquire()

            pinned[2] = this_thread_is_pinned()
            orchestrator.release()
Пример #19
0
 def test_decorator_resets(self):
     @use_primary_db
     def check():
         assert this_thread_is_pinned()
     pin_this_thread()
     assert this_thread_is_pinned()
     check()
     assert this_thread_is_pinned()
Пример #20
0
 def process_request(self, request):
     """Set the thread's pinning flag according to the presence of the
     incoming cookie."""
     if PINNING_COOKIE in request.COOKIES or request.method not in READ_ONLY_METHODS:
         pin_this_thread()
     else:
         # In case the last request this thread served was pinned:
         unpin_this_thread()
Пример #21
0
 def test_decorator_resets(self):
     @use_master
     def check():
         assert this_thread_is_pinned()
     pin_this_thread()
     assert this_thread_is_pinned()
     check()
     assert this_thread_is_pinned()
Пример #22
0
        def thread2_worker():
            pin_this_thread()
            with use_primary_db:
                orchestrator.release()
                thread2_lock.acquire()

            pinned[2] = this_thread_is_pinned()
            orchestrator.release()
Пример #23
0
 def test_decorator_resets(self):
     @use_master
     def check():
         assert this_thread_is_pinned()
     pin_this_thread()
     assert this_thread_is_pinned()
     check()
     assert this_thread_is_pinned()
Пример #24
0
    def test_decorator_resets(self):
        @use_primary_db
        def check():
            assert this_thread_is_pinned()

        pin_this_thread()
        assert this_thread_is_pinned()
        check()
        assert this_thread_is_pinned()
Пример #25
0
 def process_request(self, request):
     """Set the thread's pinning flag according to the presence of the
     incoming cookie."""
     if (PINNING_COOKIE in request.COOKIES
             or request.method not in READ_ONLY_METHODS):
         pin_this_thread()
     else:
         # In case the last request this thread served was pinned:
         unpin_this_thread()
Пример #26
0
    def test_pinned_reads(self):
        """Test PinningMasterSlaveRouter.db_for_read() when pinned and when
        not."""
        router = PinningMasterSlaveRouter()

        eq_(router.db_for_read(TestModel), get_slave())

        pin_this_thread()
        eq_(router.db_for_read(TestModel), MASTER_DATABASE)
Пример #27
0
    def test_pinned_reads(self):
        """Test PinningMasterSlaveRouter.db_for_read() when pinned and when
        not."""
        router = PinningMasterSlaveRouter()

        eq_(router.db_for_read(None), get_slave())

        pin_this_thread()
        eq_(router.db_for_read(None), DEFAULT_DB_ALIAS)
Пример #28
0
    def test_pinned_reads(self):
        """Test PinningMasterSlaveRouter.db_for_read() when pinned and when
        not."""
        router = PinningMasterSlaveRouter()

        eq_(router.db_for_read(None), get_slave())

        pin_this_thread()
        eq_(router.db_for_read(None), DEFAULT_DB_ALIAS)
Пример #29
0
    def test_pinning_encapsulation(self):
        """Check the pinning getters and setters."""
        assert not this_thread_is_pinned(), "Thread started out pinned or this_thread_is_pinned() is broken."

        pin_this_thread()
        assert this_thread_is_pinned(), "pin_this_thread() didn't pin the thread."

        unpin_this_thread()
        assert not this_thread_is_pinned(), "Thread remained pinned after unpin_this_thread()."
Пример #30
0
    def test_decorator_resets(self):

        @use_master
        def check():
            self.assertTrue(this_thread_is_pinned())

        pin_this_thread()
        self.assertTrue(this_thread_is_pinned())
        check()
        self.assertTrue(this_thread_is_pinned())
Пример #31
0
    def test_slave_decorator(self):

        @use_slave
        def check():
            self.assertFalse(this_thread_is_pinned())

        pin_this_thread()
        self.assertTrue(this_thread_is_pinned())
        check()
        self.assertTrue(this_thread_is_pinned())
Пример #32
0
    def process_request(self, request):
        if not getattr(request, 'API', False):
            return super(APIPinningMiddleware, self).process_request(request)

        if request.amo_user and not request.amo_user.is_anonymous():
            statsd.incr('api.db.pinned')
            pin_this_thread()
            return

        statsd.incr('api.db.unpinned')
        unpin_this_thread()
Пример #33
0
    def process_request(self, request):
        if not getattr(request, 'API', False):
            return super(APIPinningMiddleware, self).process_request(request)

        if request.amo_user and not request.amo_user.is_anonymous():
            statsd.incr('api.db.pinned')
            pin_this_thread()
            return

        statsd.incr('api.db.unpinned')
        unpin_this_thread()
Пример #34
0
    def test_pinning_encapsulation(self):
        """Check the pinning getters and setters."""
        assert not this_thread_is_pinned(), \
            "Thread started out pinned or this_thread_is_pinned() is broken."

        pin_this_thread()
        assert this_thread_is_pinned(), \
            "pin_this_thread() didn't pin the thread."

        unpin_this_thread()
        assert not this_thread_is_pinned(), \
            "Thread remained pinned after unpin_this_thread()."
Пример #35
0
    def process_request(self, request):
        if not getattr(request, 'API', False):
            return super(APIPinningMiddleware, self).process_request(request)

        if (request.amo_user and not request.amo_user.is_anonymous()
                and (cache.get(self.cache_key(request))
                     or request.method in ['DELETE', 'PATCH', 'POST', 'PUT'])):
            statsd.incr('api.db.pinned')
            pin_this_thread()
            return

        statsd.incr('api.db.unpinned')
        unpin_this_thread()
Пример #36
0
    def process_request(self, request):
        if not getattr(request, 'API', False):
            return super(APIPinningMiddleware, self).process_request(request)

        if (request.user and not request.user.is_anonymous() and
                (cache.get(self.cache_key(request)) or
                 request.method in ['DELETE', 'PATCH', 'POST', 'PUT'])):
            statsd.incr('api.db.pinned')
            pin_this_thread()
            return

        statsd.incr('api.db.unpinned')
        unpin_this_thread()
Пример #37
0
def index_chunk_task(write_index, batch_id, chunk):
    """Index a chunk of things.

    :arg write_index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg chunk: a (class, id_list) of things to index
    """
    # Need to import Record here to prevent circular import
    from kitsune.search.models import Record

    cls, id_list = chunk

    task_name = '{0} {1} -> {2}'.format(cls.get_mapping_type_name(),
                                        id_list[0], id_list[-1])

    rec = Record.objects.create(
        starttime=datetime.datetime.now(),
        text=u'Batch: {0} Task: {1}: Reindexing into {2}'.format(
            batch_id, task_name, write_index))

    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()

        index_chunk(cls, id_list, reraise=True)

    except Exception:
        rec.text = u'{0}: Errored out {1} {2}'.format(
            rec.text, sys.exc_type,
            sys.exc_value)[:255]  # Truncate at 255 chars.

        log.exception('Error while indexing a chunk')

        # Some exceptions aren't pickleable and we need this to throw
        # things that are pickleable.
        raise IndexingTaskError()

    finally:
        unpin_this_thread()
        rec.endtime = datetime.datetime.now()
        rec.save()

        try:
            client = redis_client('default')
            client.decr(OUTSTANDING_INDEX_CHUNKS, 1)
        except RedisError:
            # If Redis isn't running, then we just log that the task
            # was completed.
            log.info('Index task %s completed.', task_name)
Пример #38
0
def migrate_helpfulvotes(start_id, end_id):
    """Transfer helpfulvotes from old to new version."""

    if not waffle.switch_is_active('migrate-helpfulvotes'):
        raise  # Celery emails the failed IDs so we know to which to rerun.

    start = time.time()

    pin_this_thread()  # Pin to master

    transaction.enter_transaction_management()
    transaction.managed(True)
    try:
        cursor = connection.cursor()
        cursor.execute("""INSERT INTO `wiki_helpfulvote`
            (revision_id, helpful, created,
            creator_id, anonymous_id, user_agent)
            SELECT COALESCE(
                    (SELECT id FROM `wiki_revision`
                        WHERE `document_id` = wiki_helpfulvoteold.document_id
                            AND `is_approved`=1 AND
                            (`reviewed` <= wiki_helpfulvoteold.created
                                OR `reviewed` IS NULL)
                        ORDER BY CASE WHEN `reviewed`
                            IS NULL THEN 1 ELSE 0 END,
                                  `wiki_revision`.`created` DESC LIMIT 1),
                    (SELECT id FROM `wiki_revision`
                        WHERE `document_id` = wiki_helpfulvoteold.document_id
                            AND (`reviewed` <= wiki_helpfulvoteold.created
                                OR `reviewed` IS NULL)
                        ORDER BY CASE WHEN `reviewed`
                            IS NULL THEN 1 ELSE 0 END,
                                `wiki_revision`.`created`  DESC LIMIT 1),
                    (SELECT id FROM `wiki_revision`
                        WHERE `document_id` = wiki_helpfulvoteold.document_id
                        ORDER BY `created` ASC LIMIT 1)),
                helpful, created, creator_id, anonymous_id, user_agent
            FROM `wiki_helpfulvoteold` WHERE id >= %s AND id < %s""",
            [start_id, end_id])
        transaction.commit()
    except:
        transaction.rollback()
        raise

    transaction.leave_transaction_management()

    unpin_this_thread()

    d = time.time() - start
    statsd.timing('wiki.migrate_helpfulvotes', int(round(d * 1000)))
Пример #39
0
def _rebuild_kb_chunk(data, **kwargs):
    """Re-render a chunk of documents.

    Note: Don't use host components when making redirects to wiki pages; those
    redirects won't be auto-pruned when they're 404s.

    """
    log.info('Rebuilding %s documents.' % len(data))

    pin_this_thread()  # Stick to master.

    messages = []
    start = time.time()
    for pk in data:
        message = None
        try:
            document = Document.objects.get(pk=pk)

            # If we know a redirect link to be broken (i.e. if it looks like a
            # link to a document but the document isn't there), log an error:
            url = document.redirect_url()
            if (url and points_to_document_view(url)
                    and not document.redirect_document()):
                log.error('Invalid redirect document: %d' % pk)

            document.html = document.current_revision.content_parsed
            document.save()
        except Document.DoesNotExist:
            message = 'Missing document: %d' % pk
        except ValidationError as e:
            message = 'ValidationError for %d: %s' % (pk, e.messages[0])
        except SlugCollision:
            message = 'SlugCollision: %d' % pk
        except TitleCollision:
            message = 'TitleCollision: %d' % pk

        if message:
            log.debug(message)
            messages.append(message)
    d = time.time() - start
    statsd.timing('wiki.rebuild_chunk', int(round(d * 1000)))

    if messages:
        subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' %
                   settings.PLATFORM_NAME)
        mail_admins(subject=subject, message='\n'.join(messages))
    transaction.commit_unless_managed()

    unpin_this_thread()  # Not all tasks need to do use the master.
Пример #40
0
def _rebuild_kb_chunk(data, **kwargs):
    """Re-render a chunk of documents.

    Note: Don't use host components when making redirects to wiki pages; those
    redirects won't be auto-pruned when they're 404s.

    """
    log.info('Rebuilding %s documents.' % len(data))

    pin_this_thread()  # Stick to master.

    messages = []
    start = time.time()
    for pk in data:
        message = None
        try:
            document = Document.objects.get(pk=pk)

            # If we know a redirect link to be broken (i.e. if it looks like a
            # link to a document but the document isn't there), delete it:
            url = document.redirect_url()
            if (url and points_to_document_view(url) and
                not document.redirect_document()):
                document.delete()
            else:
                document.html = document.current_revision.content_parsed
                document.save()
        except Document.DoesNotExist:
            message = 'Missing document: %d' % pk
        except ValidationError as e:
            message = 'ValidationError for %d: %s' % (pk, e.messages[0])
        except SlugCollision:
            message = 'SlugCollision: %d' % pk
        except TitleCollision:
            message = 'TitleCollision: %d' % pk

        if message:
            log.debug(message)
            messages.append(message)
    d = time.time() - start
    statsd.timing('wiki.rebuild_chunk', int(round(d * 1000)))

    if messages:
        subject = ('[%s] Exceptions raised in _rebuild_kb_chunk()' %
                   settings.PLATFORM_NAME)
        mail_admins(subject=subject, message='\n'.join(messages))
    transaction.commit_unless_managed()

    unpin_this_thread()  # Not all tasks need to do use the master.
Пример #41
0
def index_chunk_task(write_index, batch_id, chunk):
    """Index a chunk of things.

    :arg write_index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg chunk: a (class, id_list) of things to index
    """
    # Need to import Record here to prevent circular import
    from kitsune.search.models import Record

    cls, id_list = chunk

    task_name = '{0} {1} -> {2}'.format(
        cls.get_mapping_type_name(), id_list[0], id_list[-1])

    rec = Record.objects.create(
        starttime=datetime.datetime.now(),
        text=u'Batch: {0} Task: {1}: Reindexing into {2}'.format(
            batch_id, task_name, write_index))

    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()

        index_chunk(cls, id_list, reraise=True)

    except Exception:
        rec.text = u'{0}: Errored out {1} {2}'.format(
            rec.text, sys.exc_type, sys.exc_value)[:255]  # Truncate at 255 chars.

        log.exception('Error while indexing a chunk')

        # Some exceptions aren't pickleable and we need this to throw
        # things that are pickleable.
        raise IndexingTaskError()

    finally:
        unpin_this_thread()
        rec.endtime = datetime.datetime.now()
        rec.save()

        try:
            client = redis_client('default')
            client.decr(OUTSTANDING_INDEX_CHUNKS, 1)
        except RedisError:
            # If Redis isn't running, then we just log that the task
            # was completed.
            log.info('Index task %s completed.', task_name)
Пример #42
0
def log_answer(answer):
    pin_this_thread()

    # Record karma actions
    AnswerAction(answer.creator, answer.created.date()).save()
    try:
        from kitsune.questions.models import Answer
        answers = Answer.uncached.filter(question=answer.question_id)
        if answer == answers.order_by('created')[0]:
            FirstAnswerAction(answer.creator, answer.created.date()).save()
    except IndexError:
        # If we hit an IndexError, we assume this is the first answer.
        FirstAnswerAction(answer.creator, answer.created.date()).save()

    unpin_this_thread()
Пример #43
0
def log_answer(answer):
    pin_this_thread()

    # Record karma actions
    AnswerAction(answer.creator, answer.created.date()).save()
    try:
        from kitsune.questions.models import Answer
        answers = Answer.uncached.filter(question=answer.question_id)
        if answer == answers.order_by('created')[0]:
            FirstAnswerAction(answer.creator, answer.created.date()).save()
    except IndexError:
        # If we hit an IndexError, we assume this is the first answer.
        FirstAnswerAction(answer.creator, answer.created.date()).save()

    unpin_this_thread()
Пример #44
0
    def process_request(self, request):
        if not getattr(request, "API", False):
            return super(APIPinningMiddleware, self).process_request(request)

        if (
            request.user
            and not request.user.is_anonymous()
            and (cache.get(self.cache_key(request)) or request.method in ["DELETE", "PATCH", "POST", "PUT"])
        ):
            statsd.incr("api.db.pinned")
            pin_this_thread()
            return

        statsd.incr("api.db.unpinned")
        unpin_this_thread()
Пример #45
0
def add_short_links(doc_ids):
    """Create short_url's for a list of docs."""
    base_url = "https://{0}%s".format(Site.objects.get_current().domain)
    docs = Document.objects.filter(id__in=doc_ids)
    try:
        pin_this_thread()  # Stick to master.
        for doc in docs:
            # Use django's reverse so the locale isn't included.
            endpoint = django_reverse("wiki.document", args=[doc.slug])
            doc.update(share_link=generate_short_url(base_url % endpoint))
    except BitlyRateLimitException:
        # The next run of the `generate_missing_share_links` cron job will
        # catch all documents that were unable to be processed.
        pass
    finally:
        unpin_this_thread()
Пример #46
0
def update_question_votes(question_id):
    from kitsune.questions.models import Question

    log.debug("Got a new QuestionVote for question_id=%s." % question_id)

    # Pin to master db to avoid lag delay issues.
    pin_this_thread()

    try:
        q = Question.objects.get(id=question_id)
        q.sync_num_votes_past_week()
        q.save(force_update=True)
    except Question.DoesNotExist:
        log.info("Question id=%s deleted before task." % question_id)

    unpin_this_thread()
Пример #47
0
def update_question_votes(question_id):
    from kitsune.questions.models import Question

    log.debug('Got a new QuestionVote for question_id=%s.' % question_id)
    statsd.incr('questions.tasks.update')

    # Pin to master db to avoid lag delay issues.
    pin_this_thread()

    try:
        q = Question.uncached.get(id=question_id)
        q.sync_num_votes_past_week()
        q.save(force_update=True)
    except Question.DoesNotExist:
        log.info('Question id=%s deleted before task.' % question_id)

    unpin_this_thread()
Пример #48
0
def update_question_votes(question_id):
    from kitsune.questions.models import Question

    log.debug('Got a new QuestionVote for question_id=%s.' % question_id)
    statsd.incr('questions.tasks.update')

    # Pin to master db to avoid lag delay issues.
    pin_this_thread()

    try:
        q = Question.uncached.get(id=question_id)
        q.sync_num_votes_past_week()
        q.save(force_update=True)
    except Question.DoesNotExist:
        log.info('Question id=%s deleted before task.' % question_id)

    unpin_this_thread()
Пример #49
0
def add_short_links(doc_ids):
    """Create short_url's for a list of docs."""
    base_url = 'https://{0}%s'.format(Site.objects.get_current().domain)
    docs = Document.objects.filter(id__in=doc_ids)
    try:
        pin_this_thread()  # Stick to master.
        for doc in docs:
            endpoint = reverse('wiki.document',
                               locale=doc.locale,
                               args=[doc.slug])
            doc.share_link = generate_short_url(base_url % endpoint)
            doc.save()
    except BitlyRateLimitException:
        # The next run of the `generate_missing_share_links` cron job will
        # catch all documents that were unable to be processed.
        pass
    finally:
        unpin_this_thread()
Пример #50
0
def add_short_links(doc_ids):
    """Create short_url's for a list of docs."""
    base_url = 'https://{0}%s'.format(Site.objects.get_current().domain)
    docs = Document.objects.filter(id__in=doc_ids)
    try:
        pin_this_thread()  # Stick to master.
        for doc in docs:
            # Use django's reverse so the locale isn't included.
            endpoint = django_reverse('wiki.document', args=[doc.slug])
            doc.update(share_link=generate_short_url(base_url % endpoint))
            statsd.incr('wiki.add_short_links.success')
    except BitlyRateLimitException:
        # The next run of the `generate_missing_share_links` cron job will
        # catch all documents that were unable to be processed.
        statsd.incr('wiki.add_short_links.rate_limited')
        pass
    finally:
        unpin_this_thread()
Пример #51
0
def add_short_links(doc_ids):
    """Create short_url's for a list of docs."""
    base_url = 'https://{0}%s'.format(Site.objects.get_current().domain)
    docs = Document.objects.filter(id__in=doc_ids)
    try:
        pin_this_thread()  # Stick to master.
        for doc in docs:
            endpoint = reverse('wiki.document',
                               locale=doc.locale,
                               args=[doc.slug])
            doc.share_link = generate_short_url(base_url % endpoint)
            doc.save()
    except BitlyRateLimitException:
        # The next run of the `generate_missing_share_links` cron job will
        # catch all documents that were unable to be processed.
        pass
    finally:
        unpin_this_thread()
Пример #52
0
def index_chunk_task(write_index, batch_id, chunk):
    """Index a chunk of things.

    :arg write_index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg chunk: a (class, id_list) of things to index
    """
    # Need to import Record here to prevent circular import
    from search.models import Record

    cls, id_list = chunk

    task_name = '%s %d -> %d' % (cls.get_model_name(), id_list[0], id_list[-1])

    rec = Record(
        starttime=datetime.datetime.now(),
        text=(u'Batch: %s Task: %s: Reindexing into %s' % (
                batch_id, task_name, write_index)))
    rec.save()

    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()
        index_chunk(cls, id_list, reraise=True)

    except Exception:
        rec.text = (u'%s: Errored out %s %s' % (
                rec.text, sys.exc_type, sys.exc_value))
        raise
    finally:
        unpin_this_thread()
        rec.endtime = datetime.datetime.now()
        rec.save()

        try:
            client = redis_client('default')
            client.decr(OUTSTANDING_INDEX_CHUNKS, 1)
        except RedisError:
            # If Redis isn't running, then we just log that the task
            # was completed.
            log.info('Index task %s completed.', task_name)
Пример #53
0
def purge_tweets():
    """Periodically purge old tweets for each locale.

    This does a lot of DELETEs on master, so it shouldn't run too frequently.
    Probably once every hour or more.

    """
    # Pin to master
    pin_this_thread()

    # Build list of tweets to delete, by id.
    for locale in settings.SUMO_LANGUAGES:
        locale = settings.LOCALES[locale].iso639_1
        # Some locales don't have an iso639_1 code, too bad for them.
        if not locale:
            continue
        oldest = _get_oldest_tweet(locale, settings.CC_MAX_TWEETS)
        if oldest:
            log.debug("Truncating tweet list: Removing tweets older than %s, " "for [%s]." % (oldest.created, locale))
            Tweet.objects.filter(locale=locale, created__lte=oldest.created).delete()
Пример #54
0
    def handle(self, **options):
        """
        This does a lot of DELETEs on master, so it shouldn't run too frequently.
        Probably once every hour or more.
        """
        # Pin to master
        pin_this_thread()

        # Build list of tweets to delete, by id.
        for locale in settings.SUMO_LANGUAGES:
            locale = settings.LOCALES[locale].iso639_1
            # Some locales don't have an iso639_1 code, too bad for them.
            if not locale:
                continue
            oldest = _get_oldest_tweet(locale, settings.CC_MAX_TWEETS)
            if oldest:
                log.debug(
                    'Truncating tweet list: Removing tweets older than %s, for [%s].' %
                    (oldest.created, locale))
                Tweet.objects.filter(locale=locale, created__lte=oldest.created).delete()
Пример #55
0
    def handle(self, **options):
        try:
            # Sends all writes to the master DB. Slaves are readonly.
            pin_this_thread()

            docs = Document.objects.all()

            for d in docs:
                revs = Revision.objects.filter(document=d, is_approved=True)
                revs = list(revs.order_by('-reviewed')[:1])

                if len(revs):
                    rev = revs[0]

                    if d.current_revision != rev:
                        d.current_revision = rev
                        d.save()
                        print(d.get_absolute_url())
        finally:
            unpin_this_thread()
Пример #56
0
def index_chunk_task(write_index, batch_id, chunk):
    """Index a chunk of things.

    :arg write_index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg chunk: a (class, id_list) of things to index
    """
    # Need to import Record here to prevent circular import
    from search.models import Record

    cls, id_list = chunk

    task_name = '%s %d -> %d' % (cls.get_model_name(), id_list[0], id_list[-1])

    rec = Record(starttime=datetime.datetime.now(),
                 text=(u'Batch: %s Task: %s: Reindexing into %s' %
                       (batch_id, task_name, write_index)))
    rec.save()

    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()
        index_chunk(cls, id_list, reraise=True)

    except Exception:
        rec.text = (u'%s: Errored out %s %s' %
                    (rec.text, sys.exc_type, sys.exc_value))
        raise
    finally:
        unpin_this_thread()
        rec.endtime = datetime.datetime.now()
        rec.save()

        try:
            client = redis_client('default')
            client.decr(OUTSTANDING_INDEX_CHUNKS, 1)
        except RedisError:
            # If Redis isn't running, then we just log that the task
            # was completed.
            log.info('Index task %s completed.', task_name)
Пример #57
0
def index_chunk_task(write_index, batch_id, rec_id, chunk):
    """Index a chunk of things.

    :arg write_index: the name of the index to index to
    :arg batch_id: the name for the batch this chunk belongs to
    :arg rec_id: the id for the record for this task
    :arg chunk: a (class, id_list) of things to index
    """
    cls_path, id_list = chunk
    cls = from_class_path(cls_path)
    rec = None

    # Need to import Record here to prevent circular import
    from kitsune.search.models import Record

    try:
        # Pin to master db to avoid replication lag issues and stale data.
        pin_this_thread()

        # Update record data.
        rec = Record.objects.get(pk=rec_id)
        rec.start_time = datetime.datetime.now()
        rec.message = "Reindexing into %s" % write_index
        rec.status = Record.STATUS_IN_PROGRESS
        rec.save()

        index_chunk(cls, id_list, reraise=True)
        rec.mark_success()

    except Exception:
        if rec is not None:
            rec.mark_fail("Errored out %s %s" %
                          (sys.exc_info()[0], sys.exc_info()[1]))

        log.exception("Error while indexing a chunk")
        # Some exceptions aren't pickleable and we need this to throw
        # things that are pickleable.
        raise IndexingTaskError()

    finally:
        unpin_this_thread()
Пример #58
0
def unindex_task(cls_path, id_list, **kw):
    """Unindex documents specified by cls and ids"""
    cls = from_class_path(cls_path)
    try:
        # Pin to master db to avoid replication lag issues and stale
        # data.
        pin_this_thread()
        for id_ in id_list:
            cls.unindex(id_)
    except Exception as exc:
        retries = unindex_task.request.retries
        if retries >= MAX_RETRIES:
            # Some exceptions aren't pickleable and we need this to
            # throw things that are pickleable.
            raise IndexingTaskError()

        unindex_task.retry(exc=exc,
                           max_retries=MAX_RETRIES,
                           countdown=RETRY_TIMES[retries])
    finally:
        unpin_this_thread()
Пример #59
0
def log_reply(post):
    pin_this_thread()

    creator = post.author  # TODO: Rename post.author to post.creator.
    created = post.created
    thread = post.thread
    users = [p.author for p in
             thread.post_set.select_related('author').exclude(author=creator)]
    users = set(users)  # Remove duplicates.

    if users:
        action = Action.objects.create(
            creator=creator,
            created=created,
            url=post.get_absolute_url(),
            content_object=post,
            formatter_cls='forums.formatters.ForumReplyFormatter')
        action.users.add(*users)

    transaction.commit_unless_managed()
    unpin_this_thread()