Пример #1
0
def clone_model(model: datastore_services.TYPE_MODEL_SUBCLASS,
                **new_values: Any) -> datastore_services.TYPE_MODEL_SUBCLASS:
    """Clones the entity, adding or overriding constructor attributes.

    The cloned entity will have exactly the same property values as the
    original entity, except where overridden. By default, it will have no
    parent entity or key name, unless supplied.

    IMPORTANT: This function should be used in EVERY DoFn, beacse one of Apache
    Beam's invariants is that all input values are IMMUTABLE.
    TODO(#12449): Use a metaclass to wrap DoFn.process() with a function that
    clones inputs, so that contributors don't need to remember to.

    Args:
        model: datastore_services.Model. Model to clone.
        **new_values: dict(str: *). Keyword arguments to override when
            invoking the cloned entity's constructor.

    Returns:
        datastore_services.Model. A cloned, and possibly modified, copy of self.
        Subclasses of BaseModel will return a clone with the same type.
    """
    # Reference implementation: https://stackoverflow.com/a/2712401/4859885.
    model_id = new_values.pop('id', None) or get_model_id(model)
    cls = model.__class__
    props = {k: v.__get__(model, cls) for k, v in cls._properties.items()}  # pylint: disable=protected-access
    props.update(new_values)
    with datastore_services.get_ndb_context():
        return cls(id=model_id, **props)
    def process(
        self, ref_exp_summary_model: exp_models.ExpSummaryModel,
        compared_exp_summary_models: Iterable[exp_models.ExpSummaryModel]
    ) -> Iterable[Tuple[str, Dict[str, Union[str, float]]]]:
        """Compute similarities between exploraitons.

        Args:
            ref_exp_summary_model: ExpSummaryModel. Reference exploration
                summary. We are trying to find explorations similar to this
                reference summary.
            compared_exp_summary_models: list(ExpSummaryModel). List of other
                explorations summaries against which we compare the reference
                summary.

        Yields:
            (str, dict(str, str|float)). Tuple, the first element is
            the exploration ID of the reference exploration summary.
            The second is a dictionary. The structure of the dictionary is:
                exp_id: str. The ID of the similar exploration.
                similarity_score: float. The similarity score for
                    the exploration.
        """
        with datastore_services.get_ndb_context():
            for compared_exp_summary_model in compared_exp_summary_models:
                if compared_exp_summary_model.id == ref_exp_summary_model.id:
                    continue
                similarity_score = recommendations_services.get_item_similarity(
                    ref_exp_summary_model, compared_exp_summary_model)
                if similarity_score >= SIMILARITY_SCORE_THRESHOLD:
                    yield (ref_exp_summary_model.id, {
                        'similarity_score': similarity_score,
                        'exp_id': compared_exp_summary_model.id
                    })
Пример #3
0
def run_job(
    job_class: Type[base_jobs.JobBase],
    sync: bool,
    namespace: Optional[str] = None,
    pipeline: Optional[beam.Pipeline] = None
) -> beam_job_models.BeamJobRunModel:
    """Runs the specified job synchronously.

    In other words, the function will wait for the job to finish running before
    returning a value.

    Args:
        job_class: type(base_jobs.JobBase). The type of job to run.
        sync: bool. Whether to run the job synchronously.
        namespace: str. The namespace in which models should be created.
        pipeline: Pipeline. The pipeline to run the job upon. If omitted, then a
            new pipeline will be used instead.

    Returns:
        BeamJobRun. Contains metadata related to the execution status of the
        job.

    Raises:
        RuntimeError. Failed to deploy given job to the Dataflow service.
    """
    if pipeline is None:
        pipeline = beam.Pipeline(
            runner=runners.DirectRunner()
            if sync else runners.DataflowRunner(),
            options=job_options.JobOptions(namespace=namespace))

    job = job_class(pipeline)
    job_name = job_class.__name__

    # NOTE: Exceptions raised within this context are logged and suppressed.
    with _job_bookkeeping_context(job_name) as run_model:
        _ = job.run() | job_io.PutResults(run_model.id)

        run_result = pipeline.run()

        if sync:
            run_result.wait_until_finish()
            run_model.latest_job_state = beam_job_models.BeamJobState.DONE.value

        elif run_result.has_job:
            run_model.dataflow_job_id = run_result.job_id()
            run_model.latest_job_state = run_result.state

        else:
            raise RuntimeError(
                'Failed to deploy %s to the Dataflow service. Please try again '
                'after a few minutes.' % job_name)

    # NDB operations in Beam do not properly update the context cache
    # (this cache is separate for every application thread), thus we clear
    # it ourselves.
    with datastore_services.get_ndb_context() as ndb_context:
        ndb_context.clear_cache()

    return run_model
Пример #4
0
    def _create_skill_opportunity_model(
        skill: skill_models.SkillModel,
        question_skill_links: List[question_models.QuestionSkillLinkModel]
    ) -> result.Result[opportunity_models.SkillOpportunityModel, Exception]:
        """Transforms a skill object and a list of QuestionSkillLink objects
        into a skill opportunity model.

        Args:
            skill: skill_models.SkillModel. The skill to create the opportunity
                for.
            question_skill_links: list(question_models.QuestionSkillLinkModel).
                The list of QuestionSkillLinkModel for the given skill.

        Returns:
            Result[opportunity_models.SkillOpportunityModel, Exception].
            Result object that contains SkillOpportunityModel when the operation
            is successful and Exception when an exception occurs.
        """
        try:
            skill_opportunity = opportunity_domain.SkillOpportunity(
                skill_id=skill.id,
                skill_description=skill.description,
                question_count=(
                    GenerateSkillOpportunityModelJob.
                    _count_unique_question_ids(question_skill_links)))
            skill_opportunity.validate()
            with datastore_services.get_ndb_context():
                opportunity_model = opportunity_models.SkillOpportunityModel(
                    id=skill_opportunity.id,
                    skill_description=skill_opportunity.skill_description,
                    question_count=skill_opportunity.question_count)
                opportunity_model.update_timestamps()
                return result.Ok(opportunity_model)
        except Exception as e:
            return result.Err(e)
Пример #5
0
 def __call__(self, environ: Dict[str, str],
              start_response: webapp2.Response) -> Any:
     global_cache = datastore_services.RedisCache(
         cache_services.CLOUD_NDB_REDIS_CLIENT
     )  # type: ignore[attr-defined]
     with datastore_services.get_ndb_context(global_cache=global_cache):
         return self.wsgi_app(environ, start_response)
Пример #6
0
    def _update_skill(
        skill_model: skill_models.SkillModel,
        migrated_skill: skill_domain.Skill,
        skill_changes: Sequence[skill_domain.SkillChange]
    ) -> Sequence[base_models.BaseModel]:
        """Generates newly updated skill models.

        Args:
            skill_model: SkillModel. The skill which should be updated.
            migrated_skill: Skill. The migrated skill domain object.
            skill_changes: sequence(SkillChange). The skill changes to apply.

        Returns:
            sequence(BaseModel). Sequence of models which should be put into
            the datastore.
        """
        updated_skill_model = (skill_services.populate_skill_model_fields(
            skill_model, migrated_skill))
        commit_message = ('Update skill content schema version to %d and '
                          'skill misconceptions schema version to %d and '
                          'skill rubrics schema version to %d.') % (
                              feconf.CURRENT_SKILL_CONTENTS_SCHEMA_VERSION,
                              feconf.CURRENT_MISCONCEPTIONS_SCHEMA_VERSION,
                              feconf.CURRENT_RUBRIC_SCHEMA_VERSION)
        change_dicts = [change.to_dict() for change in skill_changes]
        with datastore_services.get_ndb_context():
            models_to_put = updated_skill_model.compute_models_to_commit(
                feconf.MIGRATION_BOT_USERNAME,
                feconf.COMMIT_TYPE_EDIT,
                commit_message,
                change_dicts,
                additional_models={}).values()
        datastore_services.update_timestamps_multi(list(models_to_put))
        return models_to_put
Пример #7
0
    def _update_story(
        story_model: story_models.StoryModel,
        migrated_story: story_domain.Story,
        story_change: story_domain.StoryChange
    ) -> Sequence[base_models.BaseModel]:
        """Generates newly updated story models.

        Args:
            story_model: StoryModel. The story which should be updated.
            migrated_story: Story. The migrated story domain object.
            story_change: StoryChange. The story change to apply.

        Returns:
            sequence(BaseModel). Sequence of models which should be put into
            the datastore.
        """
        updated_story_model = story_services.populate_story_model_fields(
            story_model, migrated_story)
        change_dicts = [story_change.to_dict()]
        with datastore_services.get_ndb_context():
            models_to_put = updated_story_model.compute_models_to_commit(
                feconf.MIGRATION_BOT_USERNAME,
                feconf.COMMIT_TYPE_EDIT,
                'Update story contents schema version to %d.' % (
                    feconf.CURRENT_STORY_CONTENTS_SCHEMA_VERSION),
                change_dicts,
                additional_models={}
            )
        models_to_put_values = []
        for _, value in models_to_put.items():
            # Here, we are narrowing down the type from object to BaseModel.
            assert isinstance(value, base_models.BaseModel)
            models_to_put_values.append(value)
        datastore_services.update_timestamps_multi(models_to_put_values)
        return models_to_put_values
Пример #8
0
 def __call__(self, environ: Dict[str, str],
              start_response: webapp2.Response) -> webapp2.Response:
     global_cache = datastore_services.RedisCache(
         cache_services.CLOUD_NDB_REDIS_CLIENT
     )  # type: ignore[attr-defined]
     with datastore_services.get_ndb_context(global_cache=global_cache):
         # Cast is needed since webapp2.WSGIApplication is not
         # correctly typed.
         return cast(webapp2.Response,
                     self.wsgi_app(environ, start_response))
Пример #9
0
    def create_beam_job_run_result_model(
            self, result: job_run_result.JobRunResult,
            namespace: Optional[str]) -> beam_job_models.BeamJobRunResultModel:
        """Returns an NDB model for storing the given JobRunResult.

        Args:
            result: job_run_result.JobRunResult. The result.
            namespace: str. The namespace in which models should be created.

        Returns:
            BeamJobRunResultModel. The NDB model.
        """
        with datastore_services.get_ndb_context(namespace=namespace):
            return beam_job_services.create_beam_job_run_result_model(
                self.job_id, result.stdout, result.stderr)
    def process(
        self, user_settings_model: user_models.UserSettingsModel
    ) -> Iterable[user_models.UserStatsModel]:
        """Creates empty user stats model with id.

        Args:
            user_settings_model: UserSettingsModel. Model from which to
                create the user stats model.

        Yields:
            UserStatsModel. The created user stats model.
        """
        with datastore_services.get_ndb_context():
            user_stats_model = (user_models.UserStatsModel(
                id=user_settings_model.id))
        user_stats_model.update_timestamps()
        yield user_stats_model
Пример #11
0
    def run_job(self) -> beam.PCollection[job_run_result.JobRunResult]:
        """Runs a new instance of self.JOB_CLASS and returns its output.
        Test authors should override this method if their jobs need arguments
        for their run() method, for example:
            class FooJob(JobBase):
                def run(self, model_kind):
                    pass
        Should override this method to provide a value for `model_kind`.

        Returns:
            PCollection. The output of the job.
        """
        job_results = self.job.run()
        # NDB operations in Beam do not properly update the context cache
        # (this cache is separate for every application thread), thus we clear
        # it ourselves.
        with datastore_services.get_ndb_context() as ndb_context:
            ndb_context.clear_cache()
        return job_results
Пример #12
0
    def _generate_translation_contribution_model(
        entity_id: str,
        translation: suggestion_registry.TranslationContributionStats
    ) -> suggestion_models.TranslationContributionStatsModel:
        """Generate translation contribution stats model from the domain object.

        Args:
            entity_id: str. The ID of the model.
            translation: TranslationContributionStats. Domain object.

        Returns:
            TranslationContributionStatsModel. The created model.
        """
        language_code, contributor_user_id, topic_id = entity_id.split('.')
        with datastore_services.get_ndb_context():
            translation_contributions_stats_model = (
                suggestion_models.TranslationContributionStatsModel(
                    id=entity_id,
                    language_code=language_code,
                    contributor_user_id=contributor_user_id,
                    topic_id=topic_id,
                    submitted_translations_count=(
                        translation.submitted_translations_count),
                    submitted_translation_word_count=(
                        translation.submitted_translation_word_count),
                    accepted_translations_count=(
                        translation.accepted_translations_count),
                    accepted_translations_without_reviewer_edits_count=(
                        translation
                        .accepted_translations_without_reviewer_edits_count
                    ),
                    accepted_translation_word_count=(
                        translation.accepted_translation_word_count),
                    rejected_translations_count=(
                        translation.rejected_translations_count),
                    rejected_translation_word_count=(
                        translation.rejected_translation_word_count),
                    contribution_dates=translation.contribution_dates
                )
            )
            translation_contributions_stats_model.update_timestamps()
            return translation_contributions_stats_model
Пример #13
0
    def _create_recommendation(
            exp_id: str, recommended_exp_ids: Iterable[str]
    ) -> recommendations_models.ExplorationRecommendationsModel:
        """Creates exploration recommendation model.

        Args:
            exp_id: str. The exploration ID for which the recommendation is
                created.
            recommended_exp_ids: list(str). The list of recommended
                exploration IDs.

        Returns:
            ExplorationRecommendationsModel. The created model.
        """
        with datastore_services.get_ndb_context():
            exp_recommendation_model = (
                recommendations_models.ExplorationRecommendationsModel(
                    id=exp_id, recommended_exploration_ids=recommended_exp_ids))
        exp_recommendation_model.update_timestamps()
        return exp_recommendation_model
    def _generate_opportunities_related_to_topic(
        topic: topic_domain.Topic,
        stories_dict: Dict[str, story_domain.Story],
        exps_dict: Dict[str, exp_domain.Exploration]
    ) -> result.Result[
        List[opportunity_models.ExplorationOpportunitySummaryModel], Exception
    ]:
        """Generate opportunities related to a topic.

        Args:
            topic: Topic. Topic for which to generate the opportunities.
            stories_dict: dict(str, Story). All stories in the datastore, keyed
                by their ID.
            exps_dict: dict(str, Exploration). All explorations in
                the datastore, keyed by their ID.

        Returns:
            dict(str, *). Metadata about the operation. Keys are:
                status: str. Whether the job succeeded or failed.
                job_result: JobRunResult. A detailed report of the status,
                    including exception details if a failure occurred.
                models: list(ExplorationOpportunitySummaryModel). The models
                    generated by the operation.
        """
        try:
            story_ids = topic.get_canonical_story_ids() # type: ignore[no-untyped-call]
            existing_story_ids = (
                set(stories_dict.keys()).intersection(story_ids))
            exp_ids: List[str] = list(itertools.chain.from_iterable(
                stories_dict[story_id].story_contents.get_all_linked_exp_ids()
                for story_id in existing_story_ids))
            existing_exp_ids = set(exps_dict.keys()).intersection(exp_ids)

            missing_story_ids = set(story_ids).difference(existing_story_ids)
            missing_exp_ids = set(exp_ids).difference(existing_exp_ids)
            if len(missing_exp_ids) > 0 or len(missing_story_ids) > 0:
                raise Exception(
                    'Failed to regenerate opportunities for topic id: %s, '
                    'missing_exp_with_ids: %s, missing_story_with_ids: %s' % (
                        topic.id,
                        list(missing_exp_ids),
                        list(missing_story_ids)))

            exploration_opportunity_summary_list = []
            stories = [
                stories_dict[story_id] for story_id in existing_story_ids
            ]
            for story in stories:
                for exp_id in story.story_contents.get_all_linked_exp_ids():
                    exploration_opportunity_summary_list.append(
                        opportunity_services.create_exp_opportunity_summary( # type: ignore[no-untyped-call]
                            topic, story, exps_dict[exp_id]))

            exploration_opportunity_summary_model_list = []
            with datastore_services.get_ndb_context():
                for opportunity in exploration_opportunity_summary_list:
                    model = (
                        opportunity_models.ExplorationOpportunitySummaryModel(
                            id=opportunity.id,
                            topic_id=opportunity.topic_id,
                            topic_name=opportunity.topic_name,
                            story_id=opportunity.story_id,
                            story_title=opportunity.story_title,
                            chapter_title=opportunity.chapter_title,
                            content_count=opportunity.content_count,
                            incomplete_translation_language_codes=(
                                opportunity
                                .incomplete_translation_language_codes
                            ),
                            translation_counts=opportunity.translation_counts,
                            language_codes_needing_voice_artists=(
                                opportunity
                                .language_codes_needing_voice_artists
                            ),
                            language_codes_with_assigned_voice_artists=(
                                opportunity
                                .language_codes_with_assigned_voice_artists
                            )
                        )
                    )
                    model.update_timestamps()
                    exploration_opportunity_summary_model_list.append(model)

            return result.Ok(exploration_opportunity_summary_model_list)
        except Exception as e:
            return result.Err(e)