Exemplo n.º 1
0
    def test_contribution_msec(self):
        # Test the contribution time shows up correctly as None.
        self.signup(self.EMAIL, self.USERNAME)
        self.login(self.EMAIL)
        user_id = self.get_user_id_from_email(self.EMAIL)
        response_dict = self.get_json(
            '/profilehandler/data/%s' % self.USERNAME)
        self.assertIsNone(response_dict['first_contribution_msec'])

        # Update the first_contribution_msec to the current time in
        # milliseconds.
        first_time_in_msecs = utils.get_current_time_in_millisecs()
        user_services.update_first_contribution_msec_if_not_set(
            user_id, first_time_in_msecs)

        # Test the contribution date correctly changes to current_time_in_msecs.
        response_dict = self.get_json(
            '/profilehandler/data/%s' % self.USERNAME)
        self.assertEqual(
            response_dict['first_contribution_msec'],
            first_time_in_msecs)

        # Test that the contribution date is not changed after the first time it
        # is set.
        second_time_in_msecs = utils.get_current_time_in_millisecs()
        user_services.update_first_contribution_msec_if_not_set(
            user_id, second_time_in_msecs)
        response_dict = self.get_json(
            '/profilehandler/data/%s' % self.USERNAME)
        self.assertEqual(
            response_dict['first_contribution_msec'],
            first_time_in_msecs)
Exemplo n.º 2
0
    def _run_job(cls, job_id, additional_job_params):
        """Starts the job."""
        logging.info(
            'Job %s started at %s' %
            (job_id, utils.get_current_time_in_millisecs()))
        cls.register_start(job_id)

        try:
            result = cls._run(additional_job_params)
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.error(
                'Job %s failed at %s' %
                (job_id, utils.get_current_time_in_millisecs()))
            cls.register_failure(
                job_id, '%s\n%s' % (unicode(e), traceback.format_exc()))
            raise taskqueue_services.PermanentTaskFailure(
                'Task failed: %s\n%s' % (unicode(e), traceback.format_exc()))

        # Note that the job may have been canceled after it started and before
        # it reached this stage. This will result in an exception when the
        # validity of the status code transition is checked.
        cls.register_completion(job_id, result)
        logging.info(
            'Job %s completed at %s' %
            (job_id, utils.get_current_time_in_millisecs()))
Exemplo n.º 3
0
    def generate_new_thread_id(cls, entity_type: str, entity_id: str) -> str:
        """Generates a new thread ID which is unique.

        Args:
            entity_type: str. The type of the entity.
            entity_id: str. The ID of the entity.

        Returns:
            str. A thread ID that is different from the IDs of all
            the existing threads within the given entity.

        Raises:
            Exception. There were too many collisions with existing thread IDs
                when attempting to generate a new thread ID.
        """
        for _ in python_utils.RANGE(_MAX_RETRIES):
            thread_id = (
                '%s.%s.%s%s' %
                (entity_type, entity_id,
                 utils.base64_from_int(
                     int(utils.get_current_time_in_millisecs())),
                 utils.base64_from_int(utils.get_random_int(_RAND_RANGE))))
            if not cls.get_by_id(thread_id):
                return thread_id
        raise Exception(
            'New thread id generator is producing too many collisions.')
Exemplo n.º 4
0
    def _real_enqueue(cls, job_id):
        entity_class_types = cls.entity_classes_to_map_over()
        entity_class_names = [
            '%s.%s' %
            (entity_class_type.__module__, entity_class_type.__name__)
            for entity_class_type in entity_class_types
        ]

        kwargs = {
            'job_name':
            job_id,
            'mapper_spec':
            '%s.%s.map' % (cls.__module__, cls.__name__),
            'reducer_spec':
            '%s.%s.reduce' % (cls.__module__, cls.__name__),
            'input_reader_spec':
            ('core.jobs.MultipleDatastoreEntitiesInputReader'),
            'output_writer_spec':
            ('mapreduce.output_writers.BlobstoreRecordsOutputWriter'),
            'mapper_params': {
                MAPPER_PARAM_KEY_ENTITY_KINDS:
                entity_class_names,
                # Note that all parameters passed to the mapper need to be
                # strings. Also note that the value for this key is determined
                # just before enqueue time, so it will be roughly equal to the
                # actual enqueue time.
                MAPPER_PARAM_KEY_QUEUED_TIME_MSECS:
                str(utils.get_current_time_in_millisecs()),
            }
        }
        mr_pipeline = MapReduceJobPipeline(
            job_id, '%s.%s' % (cls.__module__, cls.__name__), kwargs)
        mr_pipeline.start(base_path='/mapreduce/worker/pipeline')
Exemplo n.º 5
0
    def enqueue(
            cls, job_id, queue_name,
            additional_job_params=None, shard_count=None):
        """Marks a job as queued and adds it to a queue for processing.

        Args:
            job_id: str. The ID of the job to enqueue.
            queue_name: str. The queue name the job should be run in. See
                core.platform.taskqueue.gae_taskqueue_services for supported
                values.
            additional_job_params: dict(str : *) or None. Additional parameters
                for the job.
            shard_count: int. Number of shards used for the job.
        """
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(
            job_id, model.status_code, STATUS_CODE_QUEUED)
        cls._require_correct_job_type(model.job_type)

        # Enqueue the job.
        cls._real_enqueue(
            job_id, queue_name, additional_job_params, shard_count)

        model.status_code = STATUS_CODE_QUEUED
        model.time_queued_msec = utils.get_current_time_in_millisecs()
        model.additional_job_params = additional_job_params
        model.update_timestamps()
        model.put()
Exemplo n.º 6
0
def _change_activity_status(committer_id, activity_id, activity_type,
                            new_status, commit_message):
    """Changes the status of the given activity.

    Args:
        committer_id: str. ID of the user who is performing the update action.
        activity_id: str. ID of the activity.
        activity_type: str. The type of activity. Possible values:
            constants.ACTIVITY_TYPE_EXPLORATION
            constants.ACTIVITY_TYPE_COLLECTION
        new_status: str. The new status of the activity.
        commit_message: str. The human-written commit message for this change.
    """
    activity_rights = _get_activity_rights(activity_type, activity_id)
    old_status = activity_rights.status
    activity_rights.status = new_status
    if activity_type == constants.ACTIVITY_TYPE_EXPLORATION:
        cmd_type = CMD_CHANGE_EXPLORATION_STATUS
    elif activity_type == constants.ACTIVITY_TYPE_COLLECTION:
        cmd_type = CMD_CHANGE_COLLECTION_STATUS
    commit_cmds = [{
        'cmd': cmd_type,
        'old_status': old_status,
        'new_status': new_status
    }]

    if new_status != ACTIVITY_STATUS_PRIVATE:
        activity_rights.viewer_ids = []
        if activity_rights.first_published_msec is None:
            activity_rights.first_published_msec = (
                utils.get_current_time_in_millisecs())

    _save_activity_rights(committer_id, activity_rights, activity_type,
                          commit_message, commit_cmds)
    _update_activity_summary(activity_type, activity_rights)
Exemplo n.º 7
0
def _change_activity_status(committer_id, activity_id, activity_type,
                            new_status, commit_message):
    """Change the status of an activity. Commits changes.

    Args:
    - committer_id: str. The id of the user who is performing the update
        action.
    - activity_id: str. The id of the collection or activity.
    - activity_type: str. One of feconf.ACTIVITY_TYPE_EXPLORATION or
        feconf.ACTIVITY_TYPE_COLLECTION.
    - new_status: str. The new status of the exploration.
    - commit_message: str. The human-written commit message for this change.
    """
    activity_rights = _get_activity_rights(activity_type, activity_id)
    old_status = activity_rights.status
    activity_rights.status = new_status
    if activity_type == feconf.ACTIVITY_TYPE_EXPLORATION:
        cmd_type = CMD_CHANGE_EXPLORATION_STATUS
    elif activity_type == feconf.ACTIVITY_TYPE_COLLECTION:
        cmd_type = CMD_CHANGE_COLLECTION_STATUS
    commit_cmds = [{
        'cmd': cmd_type,
        'old_status': old_status,
        'new_status': new_status
    }]

    if new_status != ACTIVITY_STATUS_PRIVATE:
        activity_rights.viewer_ids = []
        if activity_rights.first_published_msec is None:
            activity_rights.first_published_msec = (
                utils.get_current_time_in_millisecs())

    _save_activity_rights(committer_id, activity_rights, activity_type,
                          commit_message, commit_cmds)
    _update_activity_summary(activity_type, activity_rights)
Exemplo n.º 8
0
def update_collection(committer_id, collection_id, change_list,
                      commit_message):
    """Updates a collection. Commits changes.

    Args:
        committer_id: str. The id of the user who is performing the update
            action.
        collection_id: str. The collection id.
        change_list: list(dict). Each entry represents a CollectionChange
            object. These changes are applied in sequence to produce the
            resulting collection.
        commit_message: str or None. A description of changes made to the
            collection. For published collections, this must be present; for
            unpublished collections, it may be equal to None.
    """
    is_public = rights_manager.is_collection_public(collection_id)

    if is_public and not commit_message:
        raise ValueError(
            'Collection is public so expected a commit message but '
            'received none.')

    collection = apply_change_list(collection_id, change_list)

    _save_collection(committer_id, collection, commit_message, change_list)
    update_collection_summary(collection.id, committer_id)

    if (not rights_manager.is_collection_private(collection.id)
            and committer_id != feconf.MIGRATION_BOT_USER_ID):
        user_services.update_first_contribution_msec_if_not_set(
            committer_id, utils.get_current_time_in_millisecs())
Exemplo n.º 9
0
Arquivo: jobs.py Projeto: miyucy/oppia
    def _real_enqueue(cls, job_id):
        entity_class_types = cls.entity_classes_to_map_over()
        entity_class_names = [
            '%s.%s' % (
                entity_class_type.__module__, entity_class_type.__name__)
            for entity_class_type in entity_class_types]

        kwargs = {
            'job_name': job_id,
            'mapper_spec': '%s.%s.map' % (cls.__module__, cls.__name__),
            'reducer_spec': '%s.%s.reduce' % (cls.__module__, cls.__name__),
            'input_reader_spec': (
                'core.jobs.MultipleDatastoreEntitiesInputReader'),
            'output_writer_spec': (
                'mapreduce.output_writers.BlobstoreRecordsOutputWriter'),
            'mapper_params': {
                MAPPER_PARAM_KEY_ENTITY_KINDS: entity_class_names,
                # Note that all parameters passed to the mapper need to be
                # strings. Also note that the value for this key is determined
                # just before enqueue time, so it will be roughly equal to the
                # actual enqueue time.
                MAPPER_PARAM_KEY_QUEUED_TIME_MSECS: str(
                    utils.get_current_time_in_millisecs()),
            }
        }
        mr_pipeline = MapReduceJobPipeline(
            job_id, '%s.%s' % (cls.__module__, cls.__name__), kwargs)
        mr_pipeline.start(base_path='/mapreduce/worker/pipeline')
Exemplo n.º 10
0
    def register_completion(
            cls, job_id, output_list, max_output_len_chars=None):
        """Marks a job as completed.

        Args:
            job_id: str. The ID of the job to complete.
            output_list: list(object). The output produced by the job.
            max_output_len_chars: int or None. Max length of output_list.
                If None, the default maximum output length is used.
        """
        _default_max_len_chars = 900000
        _max_output_len_chars = (
            _default_max_len_chars if max_output_len_chars is None else
            max_output_len_chars)
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(
            job_id, model.status_code, STATUS_CODE_COMPLETED)
        cls._require_correct_job_type(model.job_type)

        model.status_code = STATUS_CODE_COMPLETED
        model.time_finished_msec = utils.get_current_time_in_millisecs()
        model.output = cls._compress_output_list(
            output_list, _max_output_len_chars)
        model.update_timestamps()
        model.put()

        cls._post_completed_hook(job_id)
Exemplo n.º 11
0
    def _validate_time_fields(cls, item):
        """Validate the time fields in entity.

        Args:
            item: datastore_services.Model. JobModel to validate.
        """
        if item.time_started_msec and (
                item.time_queued_msec > item.time_started_msec):
            cls._add_error(
                'time queued check',
                'Entity id %s: time queued %s is greater '
                'than time started %s' % (
                    item.id, item.time_queued_msec, item.time_started_msec))

        if item.time_finished_msec and (
                item.time_started_msec > item.time_finished_msec):
            cls._add_error(
                'time started check',
                'Entity id %s: time started %s is greater '
                'than time finished %s' % (
                    item.id, item.time_started_msec, item.time_finished_msec))

        current_time_msec = utils.get_current_time_in_millisecs()
        if item.time_finished_msec > current_time_msec:
            cls._add_error(
                'time finished check',
                'Entity id %s: time finished %s is greater '
                'than the current time' % (
                    item.id, item.time_finished_msec))
Exemplo n.º 12
0
    def run(self, job_id, job_class_str, output):
        """Extracts the results of a MR job and registers its completion.

        Args:
            job_id: str. The ID of the job to run.
            job_class_str: str. Should uniquely identify each type of job.
            output: str. The output produced by the job.
        """
        job_class = mapreduce_util.for_name(job_class_str)

        try:
            iterator = input_readers.GoogleCloudStorageInputReader(
                output, 0)
            results_list = []
            for item_reader in iterator:
                for item in item_reader:
                    results_list.append(json.loads(item))
            job_class.register_completion(job_id, results_list)
        except Exception as e:
            logging.exception(
                'Job %s failed at %s' % (
                    job_id, utils.get_current_time_in_millisecs()
                )
            )
            job_class.register_failure(
                job_id,
                '%s\n%s' % (python_utils.UNICODE(e), traceback.format_exc()))
Exemplo n.º 13
0
    def cancel(cls, job_id, user_id):
        """Marks a job as canceled.

        Args:
            job_id: str. The ID of the job to cancel.
            user_id: str. The id of the user who cancelled the job.
        """
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(
            job_id, model.status_code, STATUS_CODE_CANCELED)
        cls._require_correct_job_type(model.job_type)

        cancel_message = 'Canceled by %s' % (user_id or 'system')

        # Cancel the job.
        cls._pre_cancel_hook(job_id, cancel_message)

        model.status_code = STATUS_CODE_CANCELED
        model.time_finished_msec = utils.get_current_time_in_millisecs()
        model.error = cancel_message
        model.update_timestamps()
        model.put()

        cls._post_cancel_hook(job_id, cancel_message)
Exemplo n.º 14
0
def _get_search_rank(collection_id):
    """Returns an integer determining the document's rank in search.

    Featured collections get a ranking bump, and so do collections that
    have been more recently updated.
    """
    rights = rights_manager.get_collection_rights(collection_id)
    rank = _DEFAULT_RANK + (_STATUS_PUBLICIZED_BONUS if rights.status ==
                            rights_manager.ACTIVITY_STATUS_PUBLICIZED else 0)

    # Iterate backwards through the collection history metadata until we find
    # the most recent snapshot that was committed by a human.
    last_human_update_ms = 0
    snapshots_metadata = get_collection_snapshots_metadata(collection_id)
    for snapshot_metadata in reversed(snapshots_metadata):
        if snapshot_metadata['committer_id'] != feconf.MIGRATION_BOT_USER_ID:
            last_human_update_ms = snapshot_metadata['created_on_ms']
            break

    _time_now_ms = utils.get_current_time_in_millisecs()
    time_delta_days = int(
        (_time_now_ms - last_human_update_ms) / _MS_IN_ONE_DAY)
    if time_delta_days == 0:
        rank += 80
    elif time_delta_days == 1:
        rank += 50
    elif 2 <= time_delta_days <= 7:
        rank += 35

    # Ranks must be non-negative.
    return max(rank, 0)
Exemplo n.º 15
0
def _get_search_rank(collection_id):
    """Returns an integer determining the document's rank in search.

    Featured collections get a ranking bump, and so do collections that
    have been more recently updated.
    """
    rights = rights_manager.get_collection_rights(collection_id)
    rank = _DEFAULT_RANK + (
        _STATUS_PUBLICIZED_BONUS
        if rights.status == rights_manager.ACTIVITY_STATUS_PUBLICIZED
        else 0)

    # Iterate backwards through the collection history metadata until we find
    # the most recent snapshot that was committed by a human.
    last_human_update_ms = 0
    snapshots_metadata = get_collection_snapshots_metadata(collection_id)
    for snapshot_metadata in reversed(snapshots_metadata):
        if snapshot_metadata['committer_id'] != feconf.MIGRATION_BOT_USER_ID:
            last_human_update_ms = snapshot_metadata['created_on_ms']
            break

    _time_now_ms = utils.get_current_time_in_millisecs()
    time_delta_days = int(
        (_time_now_ms - last_human_update_ms) / _MS_IN_ONE_DAY)
    if time_delta_days == 0:
        rank += 80
    elif time_delta_days == 1:
        rank += 50
    elif 2 <= time_delta_days <= 7:
        rank += 35

    # Ranks must be non-negative.
    return max(rank, 0)
Exemplo n.º 16
0
def _change_activity_status(committer_id, activity_id, activity_type, new_status, commit_message):
    """Change the status of an activity. Commits changes.

    Args:
    - committer_id: str. The id of the user who is performing the update
        action.
    - activity_id: str. The id of the collection or activity.
    - activity_type: str. One of feconf.ACTIVITY_TYPE_EXPLORATION or
        feconf.ACTIVITY_TYPE_COLLECTION.
    - new_status: str. The new status of the exploration.
    - commit_message: str. The human-written commit message for this change.
    """
    activity_rights = _get_activity_rights(activity_type, activity_id)
    old_status = activity_rights.status
    activity_rights.status = new_status
    if activity_type == feconf.ACTIVITY_TYPE_EXPLORATION:
        cmd_type = CMD_CHANGE_EXPLORATION_STATUS
    elif activity_type == feconf.ACTIVITY_TYPE_COLLECTION:
        cmd_type = CMD_CHANGE_COLLECTION_STATUS
    commit_cmds = [{"cmd": cmd_type, "old_status": old_status, "new_status": new_status}]

    if new_status != ACTIVITY_STATUS_PRIVATE:
        activity_rights.viewer_ids = []
        if activity_rights.first_published_msec is None:
            activity_rights.first_published_msec = utils.get_current_time_in_millisecs()

    _save_activity_rights(committer_id, activity_rights, activity_type, commit_message, commit_cmds)
    _update_activity_summary(activity_type, activity_rights)
Exemplo n.º 17
0
    def get(self):
        """Handles GET requests."""
        recent_job_data = jobs.get_data_for_recent_jobs()
        unfinished_job_data = jobs.get_data_for_unfinished_jobs()

        for job in unfinished_job_data:
            job['can_be_canceled'] = job['is_cancelable'] and any([
                klass.__name__ == job['job_type']
                for klass in (
                    jobs_registry.ONE_OFF_JOB_MANAGERS + (
                        jobs_registry.AUDIT_JOB_MANAGERS))])

        queued_or_running_job_types = set([
            job['job_type'] for job in unfinished_job_data])
        one_off_job_status_summaries = [{
            'job_type': klass.__name__,
            'is_queued_or_running': (
                klass.__name__ in queued_or_running_job_types)
        } for klass in jobs_registry.ONE_OFF_JOB_MANAGERS]
        audit_job_status_summaries = [{
            'job_type': klass.__name__,
            'is_queued_or_running': (
                klass.__name__ in queued_or_running_job_types)
        } for klass in jobs_registry.AUDIT_JOB_MANAGERS]

        self.render_json({
            'human_readable_current_time': (
                utils.get_human_readable_time_string(
                    utils.get_current_time_in_millisecs())),
            'one_off_job_status_summaries': one_off_job_status_summaries,
            'audit_job_status_summaries': audit_job_status_summaries,
            'recent_job_data': recent_job_data,
            'unfinished_job_data': unfinished_job_data,
        })
Exemplo n.º 18
0
def update_collection(
        committer_id, collection_id, change_list, commit_message):
    """Update an collection. Commits changes.

    Args:
    - committer_id: str. The id of the user who is performing the update
        action.
    - collection_id: str. The collection id.
    - change_list: list of dicts, each representing a CollectionChange object.
        These changes are applied in sequence to produce the resulting
        collection.
    - commit_message: str or None. A description of changes made to the
        collection. For published collections, this must be present; for
        unpublished collections, it may be equal to None.
    """
    is_public = rights_manager.is_collection_public(collection_id)

    if is_public and not commit_message:
        raise ValueError(
            'Collection is public so expected a commit message but '
            'received none.')

    collection = apply_change_list(collection_id, change_list)
    _save_collection(committer_id, collection, commit_message, change_list)
    update_collection_summary(collection.id, committer_id)

    if not rights_manager.is_collection_private(collection.id):
        user_services.update_first_contribution_msec_if_not_set(
            committer_id, utils.get_current_time_in_millisecs())
Exemplo n.º 19
0
    def _validate_time_fields(cls, item):
        """Validate the time fields in entity.

        Args:
            item: datastore_services.Model. ContinuousComputationModel to
                validate.
        """
        if item.last_started_msec > item.last_finished_msec and (
                item.last_started_msec > item.last_stopped_msec):
            cls._add_error(
                'last started check',
                'Entity id %s: last started %s is greater '
                'than both last finished %s and last stopped %s' % (
                    item.id, item.last_started_msec, item.last_finished_msec,
                    item.last_stopped_msec))

        current_time_msec = utils.get_current_time_in_millisecs()
        if item.last_finished_msec > current_time_msec:
            cls._add_error(
                'last finished check',
                'Entity id %s: last finished %s is greater '
                'than the current time' % (
                    item.id, item.last_finished_msec))

        if item.last_stopped_msec > current_time_msec:
            cls._add_error(
                'last stopped check',
                'Entity id %s: last stopped %s is greater '
                'than the current time' % (
                    item.id, item.last_stopped_msec))
Exemplo n.º 20
0
    def get(self):
        """Handles GET requests."""
        recent_job_data = jobs.get_data_for_recent_jobs()
        unfinished_job_data = jobs.get_data_for_unfinished_jobs()

        for job in unfinished_job_data:
            job['can_be_canceled'] = job['is_cancelable'] and any([
                klass.__name__ == job['job_type']
                for klass in (jobs_registry.ONE_OFF_JOB_MANAGERS +
                              (jobs_registry.AUDIT_JOB_MANAGERS))
            ])

        queued_or_running_job_types = set(
            [job['job_type'] for job in unfinished_job_data])
        one_off_job_status_summaries = [{
            'job_type':
            klass.__name__,
            'is_queued_or_running': (klass.__name__
                                     in queued_or_running_job_types)
        } for klass in jobs_registry.ONE_OFF_JOB_MANAGERS]
        audit_job_status_summaries = [{
            'job_type':
            klass.__name__,
            'is_queued_or_running': (klass.__name__
                                     in queued_or_running_job_types)
        } for klass in jobs_registry.AUDIT_JOB_MANAGERS]

        continuous_computations_data = jobs.get_continuous_computations_info(
            jobs_registry.ALL_CONTINUOUS_COMPUTATION_MANAGERS)
        for computation in continuous_computations_data:
            if computation['last_started_msec']:
                computation['human_readable_last_started'] = (
                    utils.get_human_readable_time_string(
                        computation['last_started_msec']))
            if computation['last_stopped_msec']:
                computation['human_readable_last_stopped'] = (
                    utils.get_human_readable_time_string(
                        computation['last_stopped_msec']))
            if computation['last_finished_msec']:
                computation['human_readable_last_finished'] = (
                    utils.get_human_readable_time_string(
                        computation['last_finished_msec']))

        self.render_json({
            'continuous_computations_data':
            continuous_computations_data,
            'human_readable_current_time':
            (utils.get_human_readable_time_string(
                utils.get_current_time_in_millisecs())),
            'one_off_job_status_summaries':
            one_off_job_status_summaries,
            'audit_job_status_summaries':
            audit_job_status_summaries,
            'recent_job_data':
            recent_job_data,
            'unfinished_job_data':
            unfinished_job_data,
        })
Exemplo n.º 21
0
def cleanup_old_jobs_pipelines():
    """Clean the pipelines of old jobs."""
    num_cleaned = 0
    max_age_msec = (
        MAX_MAPREDUCE_METADATA_RETENTION_MSECS + 7 * 24 * 60 * 60 * 1000)
    # Only consider jobs that started at most 1 week before recency_msec.
    # The latest start time that a job scheduled for cleanup may have.
    max_start_time_msec = (
        utils.get_current_time_in_millisecs() -
        MAX_MAPREDUCE_METADATA_RETENTION_MSECS
    )
    # Get all pipeline ids from jobs that started between max_age_msecs
    # and max_age_msecs + 1 week, before now.
    pipeline_id_to_job_instance = {}

    job_instances = job_models.JobModel.get_recent_jobs(1000, max_age_msec)
    for job_instance in job_instances:
        if (
                job_instance.time_started_msec < max_start_time_msec and
                not job_instance.has_been_cleaned_up
        ):
            if 'root_pipeline_id' in job_instance.metadata:
                pipeline_id = job_instance.metadata['root_pipeline_id']
                pipeline_id_to_job_instance[pipeline_id] = job_instance

    # Clean up pipelines.
    for pline in pipeline.get_root_list()['pipelines']:
        pipeline_id = pline['pipelineId']
        job_definitely_terminated = (
            pline['status'] == 'done' or
            pline['status'] == 'aborted' or
            pline['currentAttempt'] > pline['maxAttempts']
        )
        have_start_time = 'startTimeMs' in pline
        job_started_too_long_ago = (
            have_start_time and
            pline['startTimeMs'] < max_start_time_msec
        )

        if (job_started_too_long_ago or
                (not have_start_time and job_definitely_terminated)):
            # At this point, the map/reduce pipeline is either in a
            # terminal state, or has taken so long that there's no
            # realistic possibility that there might be a race condition
            # between this and the job actually completing.
            if pipeline_id in pipeline_id_to_job_instance:
                job_instance = pipeline_id_to_job_instance[pipeline_id]
                job_instance.has_been_cleaned_up = True
                job_instance.update_timestamps()
                job_instance.put()

            # This enqueues a deferred cleanup item.
            p = pipeline.Pipeline.from_id(pipeline_id)
            if p:
                p.cleanup()
                num_cleaned += 1

    logging.warning('%s MR jobs cleaned up.' % num_cleaned)
Exemplo n.º 22
0
    def get(self):
        """Handles GET requests."""
        demo_exploration_ids = feconf.DEMO_EXPLORATIONS.keys()

        recent_job_data = jobs.get_data_for_recent_jobs()
        unfinished_job_data = jobs.get_data_for_unfinished_jobs()
        for job in unfinished_job_data:
            job['can_be_canceled'] = job['is_cancelable'] and any([
                klass.__name__ == job['job_type']
                for klass in jobs_registry.ONE_OFF_JOB_MANAGERS])

        queued_or_running_job_types = set([
            job['job_type'] for job in unfinished_job_data])
        one_off_job_specs = [{
            'job_type': klass.__name__,
            'is_queued_or_running': (
                klass.__name__ in queued_or_running_job_types)
        } for klass in jobs_registry.ONE_OFF_JOB_MANAGERS]

        continuous_computations_data = jobs.get_continuous_computations_info(
            jobs_registry.ALL_CONTINUOUS_COMPUTATION_MANAGERS)
        for computation in continuous_computations_data:
            if computation['last_started_msec']:
                computation['human_readable_last_started'] = (
                    utils.get_human_readable_time_string(
                        computation['last_started_msec']))
            if computation['last_stopped_msec']:
                computation['human_readable_last_stopped'] = (
                    utils.get_human_readable_time_string(
                        computation['last_stopped_msec']))
            if computation['last_finished_msec']:
                computation['human_readable_last_finished'] = (
                    utils.get_human_readable_time_string(
                        computation['last_finished_msec']))

        self.values.update({
            'continuous_computations_data': continuous_computations_data,
            'demo_collections': sorted(feconf.DEMO_COLLECTIONS.iteritems()),
            'demo_explorations': sorted(feconf.DEMO_EXPLORATIONS.iteritems()),
            'demo_exploration_ids': demo_exploration_ids,
            'human_readable_current_time': (
                utils.get_human_readable_time_string(
                    utils.get_current_time_in_millisecs())),
            'one_off_job_specs': one_off_job_specs,
            'recent_job_data': recent_job_data,
            'rte_components_html': jinja2.utils.Markup(
                rte_component_registry.Registry.get_html_for_all_components()),
            'unfinished_job_data': unfinished_job_data,
            'value_generators_js': jinja2.utils.Markup(
                editor.get_value_generators_js()),
        })

        self.render_template('pages/admin/admin.html')
Exemplo n.º 23
0
    def get_new_id(cls, entity_name):
        """Overwrites superclass method.

        Args:
            entity_name: str. The name of the entity to create a new job id for.

        Returns:
            str. A job id.
        """
        job_type = entity_name
        current_time_str = str(int(utils.get_current_time_in_millisecs()))
        random_int = random.randint(0, 1000)
        return '%s-%s-%s' % (job_type, current_time_str, random_int)
Exemplo n.º 24
0
    def generate_new_thread_id(cls, exploration_id):
        """Generates a new thread id, unique within the exploration.

        Exploration ID + the generated thread ID is globally unique.
        """
        for _ in range(_MAX_RETRIES):
            thread_id = (
                utils.base64_from_int(utils.get_current_time_in_millisecs()) +
                utils.base64_from_int(utils.get_random_int(_RAND_RANGE)))
            if not cls.get_by_exp_and_thread_id(exploration_id, thread_id):
                return thread_id
        raise Exception(
            'New thread id generator is producing too many collisions.')
Exemplo n.º 25
0
 def test_invalid_last_stopped_msec(self):
     current_time_msec = utils.get_current_time_in_millisecs()
     self.model_instance.last_stopped_msec = current_time_msec * 10.0
     self.model_instance.update_timestamps()
     self.model_instance.put()
     expected_output = [
         (
             u'[u\'failed validation check for last stopped check '
             'of ContinuousComputationModel\', [u\'Entity id %s: '
             'last stopped %s is greater than the current time\']]'
         ) % (self.model_instance.id, self.model_instance.last_stopped_msec)]
     self.run_job_and_check_output(
         expected_output, sort=False, literal_eval=False)
Exemplo n.º 26
0
 def _stop_computation_transactional():
     """Transactional implementation for marking a continuous
     computation as stopping/idle.
     """
     cc_model = job_models.ContinuousComputationModel.get(cls.__name__)
     # If there is no job currently running, go to IDLE immediately.
     new_status_code = (
         job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_STOPPING
         if do_unfinished_jobs_exist else
         job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_IDLE)
     cc_model.status_code = new_status_code
     cc_model.last_stopped_msec = utils.get_current_time_in_millisecs()
     cc_model.put()
Exemplo n.º 27
0
Arquivo: jobs.py Projeto: oppia/oppia
    def register_failure(cls, job_id, error):
        """Marks a job as failed."""
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(job_id, model.status_code, STATUS_CODE_FAILED)
        cls._require_correct_job_type(model.job_type)

        model.status_code = STATUS_CODE_FAILED
        model.time_finished_msec = utils.get_current_time_in_millisecs()
        model.error = error
        model.put()

        cls._post_failure_hook(job_id)
Exemplo n.º 28
0
    def get_new_id(cls, entity_name):
        """Overwrites superclass method.

        Args:
            entity_name: str. The name of the entity to create a new job id for.

        Returns:
            str. A job id.
        """
        job_type = entity_name
        current_time_str = str(int(utils.get_current_time_in_millisecs()))
        random_int = random.randint(0, 1000)
        return '%s-%s-%s' % (job_type, current_time_str, random_int)
Exemplo n.º 29
0
Arquivo: jobs.py Projeto: oppia/oppia
    def register_completion(cls, job_id, output):
        """Marks a job as completed."""
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(job_id, model.status_code, STATUS_CODE_COMPLETED)
        cls._require_correct_job_type(model.job_type)

        model.status_code = STATUS_CODE_COMPLETED
        model.time_finished_msec = utils.get_current_time_in_millisecs()
        model.output = output
        model.put()

        cls._post_completed_hook(job_id)
Exemplo n.º 30
0
    def generate_new_thread_id(cls, exploration_id):
        """Generates a new thread id, unique within the exploration.

        Exploration ID + the generated thread ID is globally unique.
        """
        for _ in range(_MAX_RETRIES):
            thread_id = (
                utils.base64_from_int(utils.get_current_time_in_millisecs()) +
                utils.base64_from_int(utils.get_random_int(_RAND_RANGE)))
            if not cls.get_by_exp_and_thread_id(exploration_id, thread_id):
                return thread_id
        raise Exception(
            'New thread id generator is producing too many collisions.')
Exemplo n.º 31
0
 def _stop_computation_transactional():
     """Transactional implementation for marking a continuous
     computation as stopping/idle.
     """
     cc_model = job_models.ContinuousComputationModel.get(cls.__name__)
     # If there is no job currently running, go to IDLE immediately.
     new_status_code = (
         job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_STOPPING if 
         do_unfinished_jobs_exist else
         job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_IDLE)
     cc_model.status_code = new_status_code
     cc_model.last_stopped_msec = utils.get_current_time_in_millisecs()
     cc_model.put()
Exemplo n.º 32
0
Arquivo: jobs.py Projeto: oppia/oppia
    def register_start(cls, job_id, metadata=None):
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(job_id, model.status_code, STATUS_CODE_STARTED)
        cls._require_correct_job_type(model.job_type)

        cls._pre_start_hook(job_id)

        model.metadata = metadata
        model.status_code = STATUS_CODE_STARTED
        model.time_started_msec = utils.get_current_time_in_millisecs()
        model.put()

        cls._post_start_hook(job_id)
Exemplo n.º 33
0
Arquivo: jobs.py Projeto: yarinf/oppia
    def _real_enqueue(cls, job_id, additional_job_params):
        entity_class_types = cls.entity_classes_to_map_over()
        entity_class_names = [
            '%s.%s' %
            (entity_class_type.__module__, entity_class_type.__name__)
            for entity_class_type in entity_class_types
        ]

        kwargs = {
            'job_name':
            job_id,
            'mapper_spec':
            '%s.%s.map' % (cls.__module__, cls.__name__),
            'reducer_spec':
            '%s.%s.reduce' % (cls.__module__, cls.__name__),
            'input_reader_spec':
            ('core.jobs.MultipleDatastoreEntitiesInputReader'),
            'output_writer_spec':
            ('core.jobs.GoogleCloudStorageConsistentJsonOutputWriter'),
            'mapper_params': {
                MAPPER_PARAM_KEY_ENTITY_KINDS:
                entity_class_names,
                # Note that all parameters passed to the mapper need to be
                # strings. Also note that the value for this key is determined
                # just before enqueue time, so it will be roughly equal to the
                # actual enqueue time.
                MAPPER_PARAM_KEY_QUEUED_TIME_MSECS:
                str(utils.get_current_time_in_millisecs()),
            },
            'reducer_params': {
                'output_writer': {
                    'bucket_name': app_identity.get_default_gcs_bucket_name(),
                    'content_type': 'text/plain',
                    'naming_format': 'mrdata/$name/$id/output-$num',
                }
            }
        }

        if additional_job_params is not None:
            for param_name in additional_job_params:
                if param_name in kwargs['mapper_params']:
                    raise Exception(
                        'Additional job param %s shadows an existing mapper '
                        'param' % param_name)
                kwargs['mapper_params'][param_name] = copy.deepcopy(
                    additional_job_params[param_name])

        mr_pipeline = MapReduceJobPipeline(
            job_id, '%s.%s' % (cls.__module__, cls.__name__), kwargs)
        mr_pipeline.start(base_path='/mapreduce/worker/pipeline')
Exemplo n.º 34
0
Arquivo: jobs.py Projeto: oppia/oppia
        def _start_computation_transactional():
            """Transactional implementation for marking a continuous
            computation as started.
            """
            cc_model = job_models.ContinuousComputationModel.get(cls.__name__, strict=False)
            if cc_model is None:
                cc_model = job_models.ContinuousComputationModel(id=cls.__name__)

            if cc_model.status_code != job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_IDLE:
                raise Exception("Attempted to start computation %s, which is already " "running." % cls.__name__)

            cc_model.status_code = job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_RUNNING
            cc_model.last_started_msec = utils.get_current_time_in_millisecs()
            cc_model.put()
Exemplo n.º 35
0
def publish_collection_and_update_user_profiles(committer_id, col_id):
    """Publishes the collection with publish_collection() function in
    rights_manager.py, as well as updates first_contribution_msec.

    It is the responsibility of the caller to check that the collection is
    valid prior to publication.
    """
    rights_manager.publish_collection(committer_id, col_id)
    contribution_time_msec = utils.get_current_time_in_millisecs()
    collection_summary = get_collection_summary_by_id(col_id)
    contributor_ids = collection_summary.contributor_ids
    for contributor in contributor_ids:
        user_services.update_first_contribution_msec_if_not_set(
            contributor, contribution_time_msec)
Exemplo n.º 36
0
Arquivo: jobs.py Projeto: oppia/oppia
    def run(self, job_id, job_class_str, output):
        job_class = mapreduce_util.for_name(job_class_str)

        try:
            iterator = input_readers.GoogleCloudStorageInputReader(output, 0)
            results_list = []
            for item_reader in iterator:
                for item in item_reader:
                    results_list.append(json.loads(item))
            job_class.register_completion(job_id, results_list)
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.error("Job %s failed at %s" % (job_id, utils.get_current_time_in_millisecs()))
            job_class.register_failure(job_id, "%s\n%s" % (unicode(e), traceback.format_exc()))
Exemplo n.º 37
0
    def _run_job(cls, job_id, additional_job_params):
        """Starts the job."""
        logging.info('Job %s started at %s' %
                     (job_id, utils.get_current_time_in_millisecs()))
        cls.register_start(job_id)

        try:
            result = cls._run(additional_job_params)
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.error('Job %s failed at %s' %
                          (job_id, utils.get_current_time_in_millisecs()))
            cls.register_failure(
                job_id, '%s\n%s' % (unicode(e), traceback.format_exc()))
            raise taskqueue_services.PermanentTaskFailure(
                'Task failed: %s\n%s' % (unicode(e), traceback.format_exc()))

        # Note that the job may have been canceled after it started and before
        # it reached this stage. This will result in an exception when the
        # validity of the status code transition is checked.
        cls.register_completion(job_id, result)
        logging.info('Job %s completed at %s' %
                     (job_id, utils.get_current_time_in_millisecs()))
Exemplo n.º 38
0
    def register_failure(cls, job_id, error):
        """Marks a job as failed."""
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(job_id, model.status_code,
                                      STATUS_CODE_FAILED)
        cls._require_correct_job_type(model.job_type)

        model.status_code = STATUS_CODE_FAILED
        model.time_finished_msec = utils.get_current_time_in_millisecs()
        model.error = error
        model.put()

        cls._post_failure_hook(job_id)
Exemplo n.º 39
0
    def register_completion(cls, job_id, output):
        """Marks a job as completed."""
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(job_id, model.status_code,
                                      STATUS_CODE_COMPLETED)
        cls._require_correct_job_type(model.job_type)

        model.status_code = STATUS_CODE_COMPLETED
        model.time_finished_msec = utils.get_current_time_in_millisecs()
        model.output = output
        model.put()

        cls._post_completed_hook(job_id)
Exemplo n.º 40
0
def publish_collection_and_update_user_profiles(committer_id, col_id):
    """Publishes the collection with publish_collection() function in
    rights_manager.py, as well as updates first_contribution_msec.

    It is the responsibility of the caller to check that the collection is
    valid prior to publication.
    """
    rights_manager.publish_collection(committer_id, col_id)
    contribution_time_msec = utils.get_current_time_in_millisecs()
    collection_summary = get_collection_summary_by_id(col_id)
    contributor_ids = collection_summary.contributor_ids
    for contributor in contributor_ids:
        user_services.update_first_contribution_msec_if_not_set(
            contributor, contribution_time_msec)
Exemplo n.º 41
0
    def register_start(cls, job_id, metadata=None):
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(job_id, model.status_code,
                                      STATUS_CODE_STARTED)
        cls._require_correct_job_type(model.job_type)

        cls._pre_start_hook(job_id)

        model.metadata = metadata
        model.status_code = STATUS_CODE_STARTED
        model.time_started_msec = utils.get_current_time_in_millisecs()
        model.put()

        cls._post_start_hook(job_id)
Exemplo n.º 42
0
 def test_invalid_time_finished_msec(self):
     current_time_msec = utils.get_current_time_in_millisecs()
     self.model_instance.time_finished_msec = current_time_msec * 10.0
     self.model_instance.update_timestamps()
     self.model_instance.put()
     expected_output = [
         (u'[u\'failed validation check for time finished '
          'check of JobModel\', [u\'Entity id %s: time '
          'finished %s is greater than the current time\']]') %
         (self.model_instance.id, self.model_instance.time_finished_msec),
         u'[u\'fully-validated JobModel\', 1]'
     ]
     self.run_job_and_check_output(expected_output,
                                   sort=True,
                                   literal_eval=False)
Exemplo n.º 43
0
    def setUp(self):
        super(JobModelValidatorTests, self).setUp()

        current_time_str = python_utils.UNICODE(
            int(utils.get_current_time_in_millisecs()))
        random_int = random.randint(0, 1000)
        self.model_instance = job_models.JobModel(
            id='test-%s-%s' % (current_time_str, random_int),
            status_code=job_models.STATUS_CODE_NEW, job_type='test',
            time_queued_msec=1, time_started_msec=10, time_finished_msec=20)
        self.model_instance.update_timestamps()
        self.model_instance.put()

        self.job_class = (
            prod_validation_jobs_one_off.JobModelAuditOneOffJob)
Exemplo n.º 44
0
Arquivo: jobs.py Projeto: yarinf/oppia
    def run(self, job_id, job_class_str, output):
        job_class = mapreduce_util.for_name(job_class_str)

        try:
            iterator = input_readers.GoogleCloudStorageInputReader(output, 0)
            results_list = []
            for item_reader in iterator:
                for item in item_reader:
                    results_list.append(json.loads(item))
            job_class.register_completion(job_id, results_list)
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.error('Job %s failed at %s' %
                          (job_id, utils.get_current_time_in_millisecs()))
            job_class.register_failure(
                job_id, '%s\n%s' % (unicode(e), traceback.format_exc()))
Exemplo n.º 45
0
    def get_recent_jobs(cls, limit, recency_msec):
        """Gets at most limit jobs with respect to a time after recency_msec.

        Args:
            limit: int. A limit on the number of jobs to return.
            recency_msec: int. The number of milliseconds earlier
                than the current time.

        Returns:
            list(JobModel) or None. A list of at most `limit` jobs
            that come after recency_msec time.
        """
        earliest_time_msec = (utils.get_current_time_in_millisecs() -
                              recency_msec)
        return cls.query().filter(cls.time_queued_msec > earliest_time_msec
                                  ).order(-cls.time_queued_msec).fetch(limit)
Exemplo n.º 46
0
    def enqueue(cls, job_id, additional_job_params=None):
        """Marks a job as queued and adds it to a queue for processing."""
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(job_id, model.status_code,
                                      STATUS_CODE_QUEUED)
        cls._require_correct_job_type(model.job_type)

        # Enqueue the job.
        cls._pre_enqueue_hook(job_id)
        cls._real_enqueue(job_id, additional_job_params)

        model.status_code = STATUS_CODE_QUEUED
        model.time_queued_msec = utils.get_current_time_in_millisecs()
        model.put()

        cls._post_enqueue_hook(job_id)
Exemplo n.º 47
0
    def enqueue(cls, job_id, additional_job_params=None):
        """Marks a job as queued and adds it to a queue for processing."""
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(
            job_id, model.status_code, STATUS_CODE_QUEUED)
        cls._require_correct_job_type(model.job_type)

        # Enqueue the job.
        cls._pre_enqueue_hook(job_id)
        cls._real_enqueue(job_id, additional_job_params)

        model.status_code = STATUS_CODE_QUEUED
        model.time_queued_msec = utils.get_current_time_in_millisecs()
        model.put()

        cls._post_enqueue_hook(job_id)
Exemplo n.º 48
0
    def get_recent_jobs(cls, limit, recency_msec):
        """Gets at most limit jobs with respect to a time after recency_msec.

        Args:
            limit: int. A limit on the number of jobs to return.
            recency_msec: int. The number of milliseconds earlier
                than the current time.

        Returns:
            list(JobModel) or None. A list of at most `limit` jobs
            that come after recency_msec time.
        """
        earliest_time_msec = (
            utils.get_current_time_in_millisecs() - recency_msec)
        return cls.query().filter(
            cls.time_queued_msec > earliest_time_msec
        ).order(-cls.time_queued_msec).fetch(limit)
Exemplo n.º 49
0
Arquivo: jobs.py Projeto: oppia/oppia
    def cancel(cls, job_id, user_id):
        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(job_id, model.status_code, STATUS_CODE_CANCELED)
        cls._require_correct_job_type(model.job_type)

        cancel_message = "Canceled by %s" % (user_id or "system")

        # Cancel the job.
        cls._pre_cancel_hook(job_id, cancel_message)

        model.status_code = STATUS_CODE_CANCELED
        model.time_finished_msec = utils.get_current_time_in_millisecs()
        model.error = cancel_message
        model.put()

        cls._post_cancel_hook(job_id, cancel_message)
Exemplo n.º 50
0
    def _real_enqueue(cls, job_id, additional_job_params):
        entity_class_types = cls.entity_classes_to_map_over()
        entity_class_names = [
            '%s.%s' % (
                entity_class_type.__module__, entity_class_type.__name__)
            for entity_class_type in entity_class_types]

        kwargs = {
            'job_name': job_id,
            'mapper_spec': '%s.%s.map' % (cls.__module__, cls.__name__),
            'reducer_spec': '%s.%s.reduce' % (cls.__module__, cls.__name__),
            'input_reader_spec': (
                'core.jobs.MultipleDatastoreEntitiesInputReader'),
            'output_writer_spec': (
                'core.jobs.GoogleCloudStorageConsistentJsonOutputWriter'),
            'mapper_params': {
                MAPPER_PARAM_KEY_ENTITY_KINDS: entity_class_names,
                # Note that all parameters passed to the mapper need to be
                # strings. Also note that the value for this key is determined
                # just before enqueue time, so it will be roughly equal to the
                # actual enqueue time.
                MAPPER_PARAM_KEY_QUEUED_TIME_MSECS: str(
                    utils.get_current_time_in_millisecs()),
            },
            'reducer_params': {
                'output_writer': {
                    'bucket_name': app_identity.get_default_gcs_bucket_name(),
                    'content_type': 'text/plain',
                    'naming_format': 'mrdata/$name/$id/output-$num',
                }
            }
        }

        if additional_job_params is not None:
            for param_name in additional_job_params:
                if param_name in kwargs['mapper_params']:
                    raise Exception(
                        'Additional job param %s shadows an existing mapper '
                        'param' % param_name)
                kwargs['mapper_params'][param_name] = copy.deepcopy(
                    additional_job_params[param_name])

        mr_pipeline = MapReduceJobPipeline(
            job_id, '%s.%s' % (cls.__module__, cls.__name__), kwargs)
        mr_pipeline.start(base_path='/mapreduce/worker/pipeline')
Exemplo n.º 51
0
def _get_search_rank(collection_id):
    """Returns an integer determining the document's rank in search.

    Featured collections get a ranking bump, and so do collections that
    have been more recently updated.
    """
    # TODO(bhenning): Improve this calculation. Some possible suggestions for
    # a better ranking include using an average of the search ranks of each
    # exploration referenced in the collection and/or demoting collections
    # for any validation errors from explorations referenced in the collection.
    _STATUS_PUBLICIZED_BONUS = 30
    # This is done to prevent the rank hitting 0 too easily. Note that
    # negative ranks are disallowed in the Search API.
    _DEFAULT_RANK = 20

    collection = get_collection_by_id(collection_id)
    rights = rights_manager.get_collection_rights(collection_id)
    summary = get_collection_summary_by_id(collection_id)
    rank = _DEFAULT_RANK + (
        _STATUS_PUBLICIZED_BONUS
        if rights.status == rights_manager.ACTIVITY_STATUS_PUBLICIZED
        else 0)

    # Iterate backwards through the collection history metadata until we find
    # the most recent snapshot that was committed by a human.
    last_human_update_ms = 0
    snapshots_metadata = get_collection_snapshots_metadata(collection_id)
    for snapshot_metadata in reversed(snapshots_metadata):
        if snapshot_metadata['committer_id'] != feconf.MIGRATION_BOT_USER_ID:
            last_human_update_ms = snapshot_metadata['created_on_ms']
            break

    _TIME_NOW_MS = utils.get_current_time_in_millisecs()
    _MS_IN_ONE_DAY = 24 * 60 * 60 * 1000
    time_delta_days = int(
        (_TIME_NOW_MS - last_human_update_ms) / _MS_IN_ONE_DAY)
    if time_delta_days == 0:
        rank += 80
    elif time_delta_days == 1:
        rank += 50
    elif 2 <= time_delta_days <= 7:
        rank += 35

    # Ranks must be non-negative.
    return max(rank, 0)
Exemplo n.º 52
0
    def run(self, job_id, job_class_str, output):
        job_class = mapreduce_util.for_name(job_class_str)

        try:
            iterator = input_readers.RecordsReader(output, 0)
            results_list = []
            for item in iterator:
                # Map/reduce puts reducer output into blobstore files as a
                # string obtained via "str(result)".  Use AST as a safe
                # alternative to eval() to get the Python object back.
                results_list.append(ast.literal_eval(item))
            job_class.register_completion(job_id, results_list)
        except Exception as e:
            logging.error(traceback.format_exc())
            logging.error(
                'Job %s failed at %s' %
                (job_id, utils.get_current_time_in_millisecs()))
            job_class.register_failure(
                job_id,
                '%s\n%s' % (unicode(e), traceback.format_exc()))
Exemplo n.º 53
0
    def register_completion(cls, job_id, output_list):
        """Marks a job as completed."""
        _MAX_OUTPUT_LENGTH_CHARS = 900000

        # Ensure that preconditions are met.
        model = job_models.JobModel.get(job_id, strict=True)
        cls._require_valid_transition(
            job_id, model.status_code, STATUS_CODE_COMPLETED)
        cls._require_correct_job_type(model.job_type)

        model.status_code = STATUS_CODE_COMPLETED
        model.time_finished_msec = utils.get_current_time_in_millisecs()

        # TODO(bhenning): Add tests for this.
        output_str_list = ['%s' % output_value for output_value in output_list]

        # De-duplicate the lines of output since it's not very useful to repeat
        # them.
        counter = collections.Counter(list(output_str_list))
        output_str_frequency_list = [
            (output_str, counter[output_str]) for output_str in counter]
        output_str_list = [
            line if freq == 1 else '%s (%d times)' % (line, freq)
            for (line, freq) in output_str_frequency_list
        ]

        cutoff_index = 0
        total_output_size = 0
        for idx, output_str in enumerate(output_str_list):
            cutoff_index += 1
            total_output_size += len(output_str)
            if total_output_size >= _MAX_OUTPUT_LENGTH_CHARS:
                max_element_length = (
                    total_output_size - _MAX_OUTPUT_LENGTH_CHARS)
                output_str_list[idx] = output_str[:max_element_length]
                output_str_list[idx] += ' <TRUNCATED>'
                break
        model.output = output_str_list[:cutoff_index]
        model.put()

        cls._post_completed_hook(job_id)
Exemplo n.º 54
0
    def generate_new_thread_id(cls, exploration_id):
        """Generates a new thread ID which is unique within the exploration.

        Args:
            exploration_id: str. The ID of the exploration.

        Returns:
            str. A thread ID that is different from the IDs of all
                the existing threads within the given exploration.

        Raises:
           Exception: There were too many collisions with existing thread IDs
               when attempting to generate a new thread ID.
        """
        for _ in range(_MAX_RETRIES):
            thread_id = (
                utils.base64_from_int(utils.get_current_time_in_millisecs()) +
                utils.base64_from_int(utils.get_random_int(_RAND_RANGE)))
            if not cls.get_by_exp_and_thread_id(exploration_id, thread_id):
                return thread_id
        raise Exception(
            'New thread id generator is producing too many collisions.')
Exemplo n.º 55
0
Arquivo: jobs.py Projeto: oppia/oppia
    def _real_enqueue(cls, job_id, additional_job_params):
        entity_class_types = cls.entity_classes_to_map_over()
        entity_class_names = [
            "%s.%s" % (entity_class_type.__module__, entity_class_type.__name__)
            for entity_class_type in entity_class_types
        ]

        kwargs = {
            "job_name": job_id,
            "mapper_spec": "%s.%s.map" % (cls.__module__, cls.__name__),
            "reducer_spec": "%s.%s.reduce" % (cls.__module__, cls.__name__),
            "input_reader_spec": ("core.jobs.MultipleDatastoreEntitiesInputReader"),
            "output_writer_spec": ("core.jobs.GoogleCloudStorageConsistentJsonOutputWriter"),
            "mapper_params": {
                MAPPER_PARAM_KEY_ENTITY_KINDS: entity_class_names,
                # Note that all parameters passed to the mapper need to be
                # strings. Also note that the value for this key is determined
                # just before enqueue time, so it will be roughly equal to the
                # actual enqueue time.
                MAPPER_PARAM_KEY_QUEUED_TIME_MSECS: str(utils.get_current_time_in_millisecs()),
            },
            "reducer_params": {
                "output_writer": {
                    "bucket_name": app_identity.get_default_gcs_bucket_name(),
                    "content_type": "text/plain",
                    "naming_format": "mrdata/$name/$id/output-$num",
                }
            },
        }

        if additional_job_params is not None:
            for param_name in additional_job_params:
                if param_name in kwargs["mapper_params"]:
                    raise Exception("Additional job param %s shadows an existing mapper " "param" % param_name)
                kwargs["mapper_params"][param_name] = copy.deepcopy(additional_job_params[param_name])

        mr_pipeline = MapReduceJobPipeline(job_id, "%s.%s" % (cls.__module__, cls.__name__), kwargs)
        mr_pipeline.start(base_path="/mapreduce/worker/pipeline")
Exemplo n.º 56
0
 def get_recent_jobs(cls, limit, recency_msec):
     earliest_time_msec = (
         utils.get_current_time_in_millisecs() - recency_msec)
     return cls.query().filter(
         cls.time_queued_msec > earliest_time_msec
     ).order(-cls.time_queued_msec).fetch(limit)
Exemplo n.º 57
0
    def get(self):
        """Handles GET requests."""
        self.values['counters'] = [{
            'name': counter.name,
            'description': counter.description,
            'value': counter.value
        } for counter in counters.Registry.get_all_counters()]

        if counters.HTML_RESPONSE_COUNT.value:
            average_time = (
                counters.HTML_RESPONSE_TIME_SECS.value /
                counters.HTML_RESPONSE_COUNT.value)
            self.values['counters'].append({
                'name': 'average-html-response-time-secs',
                'description': 'Average HTML response time in seconds',
                'value': average_time
            })

        if counters.JSON_RESPONSE_COUNT.value:
            average_time = (
                counters.JSON_RESPONSE_TIME_SECS.value /
                counters.JSON_RESPONSE_COUNT.value)
            self.values['counters'].append({
                'name': 'average-json-response-time-secs',
                'description': 'Average JSON response time in seconds',
                'value': average_time
            })

        demo_exploration_ids = feconf.DEMO_EXPLORATIONS.keys()

        recent_job_data = jobs.get_data_for_recent_jobs()
        unfinished_job_data = jobs.get_data_for_unfinished_jobs()
        for job in unfinished_job_data:
            job['can_be_canceled'] = job['is_cancelable'] and any([
                klass.__name__ == job['job_type']
                for klass in jobs_registry.ONE_OFF_JOB_MANAGERS])

        queued_or_running_job_types = set([
            job['job_type'] for job in unfinished_job_data])
        one_off_job_specs = [{
            'job_type': klass.__name__,
            'is_queued_or_running': (
                klass.__name__ in queued_or_running_job_types)
        } for klass in jobs_registry.ONE_OFF_JOB_MANAGERS]

        continuous_computations_data = jobs.get_continuous_computations_info(
            jobs_registry.ALL_CONTINUOUS_COMPUTATION_MANAGERS)
        for computation in continuous_computations_data:
            if computation['last_started_msec']:
                computation['human_readable_last_started'] = (
                    utils.get_human_readable_time_string(
                        computation['last_started_msec']))
            if computation['last_stopped_msec']:
                computation['human_readable_last_stopped'] = (
                    utils.get_human_readable_time_string(
                        computation['last_stopped_msec']))
            if computation['last_finished_msec']:
                computation['human_readable_last_finished'] = (
                    utils.get_human_readable_time_string(
                        computation['last_finished_msec']))

        self.values.update({
            'continuous_computations_data': continuous_computations_data,
            'demo_collections': sorted(feconf.DEMO_COLLECTIONS.iteritems()),
            'demo_explorations': sorted(feconf.DEMO_EXPLORATIONS.iteritems()),
            'demo_exploration_ids': demo_exploration_ids,
            'human_readable_current_time': (
                utils.get_human_readable_time_string(
                    utils.get_current_time_in_millisecs())),
            'one_off_job_specs': one_off_job_specs,
            'recent_job_data': recent_job_data,
            'rte_components_html': jinja2.utils.Markup(
                rte_component_registry.Registry.get_html_for_all_components()),
            'unfinished_job_data': unfinished_job_data,
            'value_generators_js': jinja2.utils.Markup(
                editor.VALUE_GENERATORS_JS.value),
        })

        self.render_template('admin/admin.html')
Exemplo n.º 58
0
    def get(self):
        """Clean up intermediate data items for completed M/R jobs that
        started more than MAX_MAPREDUCE_METADATA_RETENTION_MSECS milliseconds
        ago.

        Map/reduce runs leave around a large number of rows in several
        tables.  This data is useful to have around for a while:
        - it helps diagnose any problems with jobs that may be occurring
        - it shows where resource usage is occurring
        However, after a few days, this information is less relevant, and
        should be cleaned up.
        """
        recency_msec = MAX_MAPREDUCE_METADATA_RETENTION_MSECS

        num_cleaned = 0

        min_age_msec = recency_msec
        # Only consider jobs that started at most 1 week before recency_msec.
        max_age_msec = recency_msec + 7 * 24 * 60 * 60 * 1000
        # The latest start time that a job scheduled for cleanup may have.
        max_start_time_msec = (
            utils.get_current_time_in_millisecs() - min_age_msec)

        # Get all pipeline ids from jobs that started between max_age_msecs
        # and max_age_msecs + 1 week, before now.
        pipeline_id_to_job_instance = {}

        job_instances = job_models.JobModel.get_recent_jobs(1000, max_age_msec)
        for job_instance in job_instances:
            if (job_instance.time_started_msec < max_start_time_msec and not
                    job_instance.has_been_cleaned_up):
                if 'root_pipeline_id' in job_instance.metadata:
                    pipeline_id = job_instance.metadata['root_pipeline_id']
                    pipeline_id_to_job_instance[pipeline_id] = job_instance

        # Clean up pipelines.
        for pline in pipeline.get_root_list()['pipelines']:
            pipeline_id = pline['pipelineId']
            job_definitely_terminated = (
                pline['status'] == 'done' or
                pline['status'] == 'aborted' or
                pline['currentAttempt'] > pline['maxAttempts'])
            have_start_time = 'startTimeMs' in pline
            job_started_too_long_ago = (
                have_start_time and
                pline['startTimeMs'] < max_start_time_msec)

            if (job_started_too_long_ago or
                (not have_start_time and job_definitely_terminated)):
                # At this point, the map/reduce pipeline is either in a
                # terminal state, or has taken so long that there's no
                # realistic possibility that there might be a race condition
                # between this and the job actually completing.
                if pipeline_id in pipeline_id_to_job_instance:
                    job_instance = pipeline_id_to_job_instance[pipeline_id]
                    job_instance.has_been_cleaned_up = True
                    job_instance.put()

                # This enqueues a deferred cleanup item.
                p = pipeline.Pipeline.from_id(pipeline_id)
                if p:
                    p.cleanup()
                    num_cleaned += 1

        logging.warning('%s MR jobs cleaned up.' % num_cleaned)

        if job_models.JobModel.do_unfinished_jobs_exist(
                jobs.JobCleanupManager.__name__):
            logging.warning('A previous cleanup job is still running.')
        else:
            jobs.JobCleanupManager.enqueue(
                jobs.JobCleanupManager.create_new(), additional_job_params={
                    jobs.MAPPER_PARAM_MAX_START_TIME_MSEC: max_start_time_msec
                })
            logging.warning('Deletion jobs for auxiliary entities kicked off.')
Exemplo n.º 59
0
 def _update_last_finished_time_transactional():
     cc_model = job_models.ContinuousComputationModel.get(cls.__name__)
     cc_model.last_finished_msec = utils.get_current_time_in_millisecs()
     cc_model.put()
Exemplo n.º 60
0
 def get_new_id(cls, entity_name):
     """Overwrites superclass method."""
     job_type = entity_name
     current_time_str = str(int(utils.get_current_time_in_millisecs()))
     random_int = random.randint(0, 1000)
     return '%s-%s-%s' % (job_type, current_time_str, random_int)