示例#1
0
    def _reload_collection(self, collection_id):
        """Reloads the collection in dev_mode corresponding to the given
        collection id.

        Args:
            collection_id: str. The collection id.

        Raises:
            Exception. Cannot reload a collection in production.
        """
        if constants.DEV_MODE:
            logging.info('[ADMIN] %s reloaded collection %s' %
                         (self.user_id, collection_id))
            collection_services.load_demo(
                python_utils.convert_to_bytes(collection_id))
            rights_manager.release_ownership_of_collection(
                user_services.get_system_user(),
                python_utils.convert_to_bytes(collection_id))
        else:
            raise Exception('Cannot reload a collection in production.')
 def map(item):
     try:
         item.put(update_last_updated_time=False)
     except Exception as e:
         model_name = item.__class__.__name__
         model_id = item.id
         identifier_message = '%s with id %s failed with error: %s' % (
             model_name, model_id, python_utils.convert_to_bytes(e))
         yield ('FAILURE', identifier_message)
     else:
         yield ('SUCCESS', 1)
    def __init__(self, id_property, model_id, target_kind, target_id):
        """Initializes a new ModelRelationshipError.

        Args:
            id_property: ModelProperty. The property referring to the ID of the
                target model.
            model_id: bytes. The ID of the model with problematic ID property.
            target_kind: str. The kind of model the property refers to.
            target_id: bytes. The ID of the specific model that the property
                refers to. NOTE: This is the value of the ID property.
        """
        # NOTE: IDs are converted to bytes because that's how they're read from
        # and written to the datastore.
        super(ModelRelationshipError,
              self).__init__(id_property.model_kind,
                             model_id=python_utils.convert_to_bytes(model_id))
        self.message = ('%s=%r should correspond to the ID of an existing %s, '
                        'but no such model exists' %
                        (id_property, python_utils.convert_to_bytes(target_id),
                         target_kind))
示例#4
0
    def get(self, page_context, page_identifier, asset_type, encoded_filename):
        """Returns an asset file.

        Args:
            page_context: str. The context of the page where the asset is
                required.
            page_identifier: str. The unique identifier for the particular
                context. Valid page_context: page_identifier pairs:
                exploration: exp_id
                story: story_id
                topic: topic_id
                skill: skill_id
                subtopic: topic_name of the topic that it is part of.
            asset_type: str. Type of the asset, either image or audio.
            encoded_filename: str. The asset filename. This
              string is encoded in the frontend using encodeURIComponent().
        """
        if not constants.DEV_MODE:
            raise self.PageNotFoundException

        try:
            filename = python_utils.urllib_unquote(encoded_filename)
            file_format = filename[(filename.rfind('.') + 1):]

            # If the following is not cast to str, an error occurs in the wsgi
            # library because unicode gets used.
            self.response.headers[
                'Content-Type'] = python_utils.convert_to_bytes(
                    '%s/%s' % (asset_type, file_format))

            if page_context == feconf.ENTITY_TYPE_SUBTOPIC:
                entity_type = feconf.ENTITY_TYPE_TOPIC
                topic = topic_fetchers.get_topic_by_name(page_identifier)
                entity_id = topic.id
            elif (page_context == feconf.ENTITY_TYPE_EXPLORATION
                  or page_context == feconf.ENTITY_TYPE_SKILL
                  or page_context == feconf.ENTITY_TYPE_TOPIC
                  or page_context == feconf.ENTITY_TYPE_STORY):
                entity_type = page_context
                entity_id = page_identifier
            else:
                raise self.InvalidInputException

            fs = fs_domain.AbstractFileSystem(
                fs_domain.DatastoreBackedFileSystem(entity_type, entity_id))
            raw = fs.get('%s/%s' % (asset_type, filename))

            self.response.cache_control.no_cache = None
            self.response.cache_control.public = True
            self.response.cache_control.max_age = 600
            self.response.write(raw)
        except:
            raise self.PageNotFoundException
示例#5
0
    def commit(self, filepath, raw_bytes, mimetype=None):
        """Replaces the contents of the file with the given by test string.

        Args:
            filepath: str. The path to the relevant file within the entity's
                assets folder.
            raw_bytes: str. The content to be stored in the file.
            mimetype: str. The content-type of the file.
        """
        raw_bytes = python_utils.convert_to_bytes(raw_bytes)
        self._check_filepath(filepath)
        self._impl.commit(filepath, raw_bytes, mimetype)
示例#6
0
def base64_from_int(value):
    # type: (int) -> Text
    """Converts the number into base64 representation.

    Args:
        value: int. Integer value for conversion into base64.

    Returns:
        *. Returns the base64 representation of the number passed.
    """
    byte_value = b'[' + python_utils.convert_to_bytes(value) + b']' # type: ignore[no-untyped-call]
    return base64.b64encode(byte_value)
示例#7
0
def generate_signature(secret, message, vm_id):
    """Generates digital signature for given data.

    Args:
        secret: bytes. The secret used to communicate with Oppia-ml.
        message: bytes. The message payload data.
        vm_id: str. The ID of the VM that generated the message.

    Returns:
        str. The signature of the payload data.
    """
    encoded_vm_id = python_utils.convert_to_bytes(vm_id)
    message = b'%s|%s' % (base64.b64encode(message), encoded_vm_id)
    return hmac.new(secret, msg=message, digestmod=hashlib.sha256).hexdigest()
    def test_redis_configuration_file_matches_feconf_redis_configuration(self):
        """Tests that the redis configuration file and feconf variables have
        the same port definition.
        """
        self.assertTrue(os.path.exists(
            os.path.join(common.CURR_DIR, 'redis.conf')))

        with python_utils.open_file(
            os.path.join(common.CURR_DIR, 'redis.conf'), 'r') as redis_conf:
            lines = redis_conf.readlines()
            elements = lines[0].split()
            self.assertEqual(len(elements), 2)
            self.assertEqual(
                elements[1], python_utils.convert_to_bytes(feconf.REDISPORT))
示例#9
0
    def map(item):
        if item.deleted:
            return

        exploration = exp_fetchers.get_exploration_from_model(item)
        exp_rights = rights_manager.get_exploration_rights(item.id)

        try:
            if exp_rights.status == rights_domain.ACTIVITY_STATUS_PRIVATE:
                exploration.validate()
            else:
                exploration.validate(strict=True)
        except utils.ValidationError as e:
            yield (item.id, python_utils.convert_to_bytes(e))
示例#10
0
def get_e2e_suite_names_from_script_travis_yml_file():
    """Extracts the script section from the .travis.yml file.

    Returns:
        list(str). An alphabetically-sorted list of names of test suites
        from the script section in the .travis.yml file.
    """
    travis_file_content = read_and_parse_travis_yml_file()
    script_str = python_utils.convert_to_bytes(travis_file_content['script'])
    # The following line extracts the test suites from patterns like
    # python -m scripts.run_e2e_tests --suite="accessibility".
    e2e_test_suite_regex = re.compile(r'--suite="([a-zA-Z_-]*)"')
    suites_list = e2e_test_suite_regex.findall(script_str)

    return sorted(suites_list)
示例#11
0
def verify_signature(oppia_ml_auth_info):
    """Function that checks if the signature received from the VM is valid.

    Args:
        oppia_ml_auth_info: OppiaMLAuthInfo. Domain object containing
            authentication information.

    Returns:
        bool. Whether the incoming request is valid.
    """
    secret = None
    for val in config_domain.VMID_SHARED_SECRET_KEY_MAPPING.value:
        if val['vm_id'] == oppia_ml_auth_info.vm_id:
            secret = python_utils.convert_to_bytes(val['shared_secret_key'])
            break
    if secret is None:
        return False

    generated_signature = generate_signature(
        secret, python_utils.convert_to_bytes(oppia_ml_auth_info.message),
        oppia_ml_auth_info.vm_id)
    if generated_signature != oppia_ml_auth_info.signature:
        return False
    return True
示例#12
0
    def _generate_id(cls, exp_id, exp_version, state_name):
        """Generates a unique ID for the Classifier Exploration Mapping of the
        form [exp_id].[exp_version].[state_name].

        Args:
            exp_id: str. ID of the exploration.
            exp_version: int. The exploration version at the time
                this training job was created.
            state_name: unicode. The name of the state to which the classifier
                belongs.

        Returns:
            str. ID of the new Classifier Exploration Mapping instance.
        """
        new_id = '%s.%s.%s' % (exp_id, exp_version, state_name)
        return python_utils.convert_to_bytes(new_id)
示例#13
0
    def post(self):
        payload = json.loads(self.request.body.decode())
        if 'fn_identifier' not in payload:
            raise Exception(
                'This request cannot defer tasks because it does not contain a '
                'function identifier attribute (fn_identifier). Deferred tasks '
                'must contain a function_identifier in the payload.')
        if payload['fn_identifier'] not in self.DEFERRED_TASK_FUNCTIONS:
            raise Exception(
                'The function id, %s, is not valid.' %
                python_utils.convert_to_bytes(payload['fn_identifier']))

        deferred_task_function = self.DEFERRED_TASK_FUNCTIONS[
            payload['fn_identifier']]
        deferred_task_function(*payload['args'], **payload['kwargs'])
        self.render_json({})
示例#14
0
    def get(self, page_context, page_identifier, asset_type, encoded_filename):
        """Returns an asset file.

        Args:
            page_context: str. The context of the page where the asset is
                required.
            page_identifier: str. The unique identifier for the particular
                context. Valid page_context: page_identifier pairs:
                exploration: exp_id
                story: story_id
                topic: topic_id
                skill: skill_id
                subtopic: topic_name of the topic that it is part of.
            asset_type: str. Type of the asset, either image or audio.
            encoded_filename: str. The asset filename. This
                string is encoded in the frontend using encodeURIComponent().
        """
        if not constants.EMULATOR_MODE:
            raise self.PageNotFoundException

        try:
            filename = python_utils.urllib_unquote(encoded_filename)
            file_format = filename[(filename.rfind('.') + 1):]

            # If the following is not cast to str, an error occurs in the wsgi
            # library because unicode gets used.
            content_type = (
                'image/svg+xml' if file_format == 'svg' else '%s/%s' % (
                    asset_type, file_format))
            self.response.headers[b'Content-Type'] = (
                python_utils.convert_to_bytes(content_type))

            if page_context not in self._SUPPORTED_PAGE_CONTEXTS:
                raise self.InvalidInputException

            fs = fs_domain.AbstractFileSystem(
                fs_domain.GcsFileSystem(page_context, page_identifier))
            raw = fs.get('%s/%s' % (asset_type, filename))

            self.response.cache_control.no_cache = None
            self.response.cache_control.public = True
            self.response.cache_control.max_age = 600
            self.response.write(raw)
        except Exception as e:
            logging.exception(
                'File not found: %s. %s' % (encoded_filename, e))
            raise self.PageNotFoundException
示例#15
0
 def run(self):
     try:
         self.output = self.func()
         if self.verbose:
             log('LOG %s:' % self.name, show_time=True)
             log(self.output)
             log('----------------------------------------')
         log('FINISHED %s: %.1f secs' %
             (self.name, time.time() - self.start_time), show_time=True)
         self.finished = True
     except Exception as e:
         self.exception = e
         if 'KeyboardInterrupt' not in python_utils.convert_to_bytes(
                 self.exception.args[0]):
             log('ERROR %s: %.1f secs' %
                 (self.name, time.time() - self.start_time), show_time=True)
         self.finished = True
示例#16
0
    def setUp(self):
        super(NextJobHandlerTest, self).setUp()

        self.exp_id = 'exp_id1'
        self.title = 'Testing Classifier storing'
        self.category = 'Test'
        interaction_id = 'TextInput'
        self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[
            interaction_id]['algorithm_id']
        self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[
            interaction_id]['algorithm_version']
        self.training_data = [
            {
                u'answer_group_index': 1,
                u'answers': [u'a1', u'a2']
            },
            {
                u'answer_group_index': 2,
                u'answers': [u'a2', u'a3']
            }
        ]
        self.job_id = classifier_models.ClassifierTrainingJobModel.create(
            self.algorithm_id, interaction_id, self.exp_id, 1,
            datetime.datetime.utcnow(), self.training_data, 'Home',
            feconf.TRAINING_JOB_STATUS_NEW, 1)
        self.classifier_data = text_classifier_pb2.TextClassifierFrozenModel()
        self.classifier_data.model_json = ''
        fs_services.save_classifier_data(
            self.exp_id, self.job_id, self.classifier_data)

        self.expected_response = {
            u'job_id': self.job_id,
            u'training_data': self.training_data,
            u'algorithm_id': self.algorithm_id,
            u'algorithm_version': self.algorithm_version
        }

        self.payload = {}
        self.payload['vm_id'] = feconf.DEFAULT_VM_ID
        secret = feconf.DEFAULT_VM_SHARED_SECRET
        self.payload['message'] = json.dumps({})
        self.payload['signature'] = classifier_services.generate_signature(
            python_utils.convert_to_bytes(secret),
            self.payload['message'], self.payload['vm_id'])
示例#17
0
    def map(item):
        if item.deleted:
            return

        try:
            exploration = exp_fetchers.get_exploration_from_model(item)
        except Exception as e:
            yield ('Error %s when loading exploration' %
                   python_utils.convert_to_bytes(e), [item.id])
            return

        html_list = exploration.get_all_html_content_strings()

        err_dict = html_validation_service.validate_rte_format(
            html_list, feconf.RTE_FORMAT_CKEDITOR)

        for key in err_dict:
            if err_dict[key]:
                yield ('%s Exp Id: %s' % (key, item.id), err_dict[key])
示例#18
0
    def commit(self, filepath, raw_bytes, mimetype=None):
        """Replaces the contents of the file with the given by test string.

        Args:
            filepath: str. The path to the relevant file within the entity's
                assets folder.
            raw_bytes: str. The content to be stored in the file.
            mimetype: str. The content-type of the file. If mimetype is set to
                'application/octet-stream' then raw_bytes is expected to
                contain binary data. In all other cases, raw_bytes is expected
                to be textual data.
        """
        # Note that textual data needs to be converted to bytes so that it can
        # be stored in a file opened in binary mode. However, it is not
        # required for binary data (i.e. when mimetype is set to
        # 'application/octet-stream').
        file_content = (python_utils.convert_to_bytes(raw_bytes) if
                        mimetype != 'application/octet-stream' else raw_bytes)
        self._check_filepath(filepath)
        self._impl.commit(filepath, file_content, mimetype)
示例#19
0
def update_flaky_tests_count(sheet, row_index, current_count):
    """Updates the flaky tests count in the google sheet.

    Args:
        sheet: googleapiclient.discovery.Resource. The spreedsheet object.
        row_index: int. The index of the row to update in the sheet.
        current_count: int. The current count of this flake in the sheet.
    """
    sheet_id = os.getenv('FLAKY_E2E_TEST_SHEET_ID')
    if sheet_id is not None:
        values = [[current_count + 1]]

        body = {'values': values}

        sheet.values().update(spreadsheetId=sheet_id,
                              range='Log!F' +
                              python_utils.convert_to_bytes(row_index + 5),
                              valueInputOption='USER_ENTERED',
                              body=body).execute()
        python_utils.PRINT('** NOTE: Updated sheet for first failing test **')
示例#20
0
def get_e2e_suite_names_from_jobs_travis_yml_file():
    """Extracts the test suites from env/jobs section from
    the .travis.yml file.

    Returns:
        list(str): An alphabetically-sorted list of names of test suites
        from the jobs section in the .travis.yml file.
    """
    travis_file_content = read_and_parse_travis_yml_file()
    jobs_str = python_utils.convert_to_bytes(
        travis_file_content['env']['jobs'])
    suites_from_jobs = []
    # The following line extracts the test suite name from the jobs section
    # that is in the form RUN_E2E_TESTS_ACCESSIBILITY=true.
    test_regex = re.compile(r'RUN_E2E_TESTS_([A-Z_]*)=')
    jobs = test_regex.findall(jobs_str)
    for job in jobs:
        suites_from_jobs.append(utils.snake_case_to_camel_case(job.lower()))

    return sorted(suites_from_jobs)
    def map(model_instance):
        """Implements a map function which defers to a pre-defined validator."""
        model_name = model_instance.__class__.__name__
        validator_cls_name = '%sValidator' % model_name
        # Module name for models is of the form:
        # 'core.storage.<model-type>.gae_models'.
        # Module name for validators is of the form:
        # 'core.domain.<model-type>_validators'.
        # So, we extract the module name for models to obtain the module name
        # for validators. There is no extra test required to verify that models
        # and validators have names defined based on model-type since if they
        # don't the validators test will automatically fail based on the import
        # we perform here for validators.
        model_module_name = model_instance.__module__
        model_type = model_module_name.split('.')[2]
        validator_module_name = '%s_validators' % model_type
        # TODO(#10415): This try catch is required until all the validators are
        # refactored. Remove the try catch block once #10415 is fixed.
        try:
            validator_module = importlib.import_module(
                'core.domain.%s' % validator_module_name)
        except ImportError:
            validator_module = importlib.import_module(
                'core.domain.prod_validators')
        validator = getattr(validator_module, validator_cls_name)
        if not model_instance.deleted:
            validator.validate(model_instance)
        else:
            validator.validate_deleted(model_instance)

        if len(validator.errors) > 0:
            for error_key, error_list in validator.errors.items():
                error_message = (
                    ((',').join(set(error_list))).encode(encoding='utf-8'))
                yield (
                    'failed validation check for %s of %s' % (
                        error_key, model_name),
                    python_utils.convert_to_bytes(error_message)
                )
        else:
            yield ('%s %s' % (VALIDATION_STATUS_SUCCESS, model_name), 1)
示例#22
0
文件: utils.py 项目: sajalasati/oppia
def convert_png_binary_to_data_url(content: Union[str, bytes]) -> str:
    """Converts a PNG image string (represented by 'content') to a data URL.

    Args:
        content: str. PNG binary file content.

    Returns:
        str. Data URL created from the binary content of the PNG.

    Raises:
        Exception. The given binary string does not represent a PNG image.
    """
    # We accept unicode but imghdr.what(file, h) accepts 'h' of type bytes.
    # So we have casted content to be bytes.
    content = python_utils.convert_to_bytes(content)
    if imghdr.what(None, h=content) == 'png':
        return '%s%s' % (
            PNG_DATA_URL_PREFIX,
            python_utils.url_quote(base64.b64encode(content))  # type: ignore[no-untyped-call]
        )
    else:
        raise Exception('The given string does not represent a PNG image.')
示例#23
0
 def run(self):
     try:
         self.output = self.func()
         if self.verbose:
             log('LOG %s:' % self.name, show_time=True)
             log(self.output)
             log('----------------------------------------')
         log('FINISHED %s: %.1f secs' %
             (self.name, time.time() - self.start_time),
             show_time=True)
     except Exception as e:
         self.exception = e
         self.stacktrace = traceback.format_exc()
         if 'KeyboardInterrupt' not in python_utils.convert_to_bytes(
                 self.exception.args[0]):
             log(e)
             log('ERROR %s: %.1f secs' %
                 (self.name, time.time() - self.start_time),
                 show_time=True)
     finally:
         self.semaphore.release()
         self.finished = True
示例#24
0
def verify_signature(message, vm_id, received_signature):
    """Function that checks if the signature received from the VM is valid.

    Args:
        message: dict. The message payload data.
        vm_id: str. The ID of the VM instance.
        received_signature: str. The signature received from the VM.

    Returns:
        bool. Whether the incoming request is valid.
    """
    secret = None
    for val in config_domain.VMID_SHARED_SECRET_KEY_MAPPING.value:
        if val['vm_id'] == vm_id:
            secret = python_utils.convert_to_bytes(val['shared_secret_key'])
            break
    if secret is None:
        return False

    generated_signature = generate_signature(secret, message)
    if generated_signature != received_signature:
        return False
    return True
示例#25
0
def get_user_id_from_email(email):
    """Given an email address, returns a user id.

    Returns None if the email address does not correspond to a valid user id.
    """
    class _FakeUser(ndb.Model):
        """A fake user class."""
        _use_memcache = False
        _use_cache = False
        user = ndb.UserProperty(required=True)

    try:
        fake_user = users.User(email)
    except users.UserNotFoundError:
        logging.error(
            'The email address %s does not correspond to a valid user_id' %
            email)
        return None

    key = _FakeUser(id=email, user=fake_user).put()
    obj = _FakeUser.get_by_id(key.id())
    user_id = obj.user.user_id()
    return python_utils.convert_to_bytes(user_id) if user_id else None
示例#26
0
 def run(self):
     try:
         self.task_results = self.func()
         if self.verbose:
             for task_result in self.task_results:
                 # The following section will print the output of the lint
                 # checks.
                 if self.report_enabled:
                     log(
                         'Report from %s check\n'
                         '----------------------------------------\n'
                         '%s' % (task_result.name, '\n'.join(
                             task_result.get_report())), show_time=True)
                 # The following section will print the output of backend
                 # tests.
                 else:
                     log(
                         'LOG %s:\n%s'
                         '----------------------------------------' %
                         (self.name, task_result.messages[0]),
                         show_time=True)
         log(
             'FINISHED %s: %.1f secs' % (
                 self.name, time.time() - self.start_time), show_time=True)
     except Exception as e:
         self.exception = e
         self.stacktrace = traceback.format_exc()
         if 'KeyboardInterrupt' not in python_utils.convert_to_bytes(
                 self.exception.args[0]):
             log(e)
             log(
                 'ERROR %s: %.1f secs' %
                 (self.name, time.time() - self.start_time), show_time=True)
     finally:
         self.semaphore.release()
         self.finished = True
示例#27
0
def get_exploration_by_id(exploration_id, strict=True, version=None):
    """Returns an Exploration domain object.

    Args:
        exploration_id: str. The id of the exploration to be returned.
        strict: bool. Whether to fail noisily if no exploration with a given id
            exists.
        version: int or None. The version of the exploration to be returned.
            If None, the latest version of the exploration is returned.

    Returns:
        Exploration. The domain object corresponding to the given exploration.
    """
    sub_namespace = python_utils.convert_to_bytes(version) if version else None
    cached_exploration = caching_services.get_multi(
        caching_services.CACHE_NAMESPACE_EXPLORATION,
        sub_namespace,
        [exploration_id]
    ).get(exploration_id)

    if cached_exploration is not None:
        return cached_exploration
    else:
        exploration_model = exp_models.ExplorationModel.get(
            exploration_id, strict=strict, version=version)
        if exploration_model:
            exploration = get_exploration_from_model(exploration_model)
            caching_services.set_multi(
                caching_services.CACHE_NAMESPACE_EXPLORATION,
                sub_namespace,
                {
                    exploration_id: exploration
                })
            return exploration
        else:
            return None
示例#28
0
def main(args=None):
    """Run the tests."""
    parsed_args = _PARSER.parse_args(args=args)

    setup.main(args=[])
    setup_gae.main(args=[])

    for directory in DIRS_TO_ADD_TO_SYS_PATH:
        if not os.path.exists(os.path.dirname(directory)):
            raise Exception('Directory %s does not exist.' % directory)

        # The directories should only be inserted starting at index 1. See
        # https://stackoverflow.com/a/10095099 and
        # https://stackoverflow.com/q/10095037 for more details.
        sys.path.insert(1, directory)

    import dev_appserver
    dev_appserver.fix_sys_path()

    if parsed_args.generate_coverage_report:
        python_utils.PRINT('Checking whether coverage is installed in %s' %
                           common.OPPIA_TOOLS_DIR)
        if not os.path.exists(
                os.path.join(common.OPPIA_TOOLS_DIR,
                             'coverage-%s' % common.COVERAGE_VERSION)):
            raise Exception('Coverage is not installed, please run the start '
                            'script.')

        pythonpath_components = [COVERAGE_DIR]
        if os.environ.get('PYTHONPATH'):
            pythonpath_components.append(os.environ.get('PYTHONPATH'))

        os.environ['PYTHONPATH'] = os.pathsep.join(pythonpath_components)

    if parsed_args.test_target and parsed_args.test_path:
        raise Exception('At most one of test_path and test_target '
                        'should be specified.')
    if parsed_args.test_path and '.' in parsed_args.test_path:
        raise Exception('The delimiter in test_path should be a slash (/)')
    if parsed_args.test_target and '/' in parsed_args.test_target:
        raise Exception('The delimiter in test_target should be a dot (.)')

    if parsed_args.test_target:
        if '_test' in parsed_args.test_target:
            all_test_targets = [parsed_args.test_target]
        else:
            python_utils.PRINT('')
            python_utils.PRINT(
                '---------------------------------------------------------')
            python_utils.PRINT(
                'WARNING : test_target flag should point to the test file.')
            python_utils.PRINT(
                '---------------------------------------------------------')
            python_utils.PRINT('')
            time.sleep(3)
            python_utils.PRINT('Redirecting to its corresponding test file...')
            all_test_targets = [parsed_args.test_target + '_test']
    else:
        include_load_tests = not parsed_args.exclude_load_tests
        all_test_targets = _get_all_test_targets(
            test_path=parsed_args.test_path,
            include_load_tests=include_load_tests)

    # Prepare tasks.
    max_concurrent_runs = 25
    concurrent_count = min(multiprocessing.cpu_count(), max_concurrent_runs)
    semaphore = threading.Semaphore(concurrent_count)

    task_to_taskspec = {}
    tasks = []
    for test_target in all_test_targets:
        test = TestingTaskSpec(test_target,
                               parsed_args.generate_coverage_report)
        task = concurrent_task_utils.create_task(test.run,
                                                 parsed_args.verbose,
                                                 semaphore,
                                                 name=test_target)
        task_to_taskspec[task] = test
        tasks.append(task)

    task_execution_failed = False
    try:
        concurrent_task_utils.execute_tasks(tasks, semaphore)
    except Exception:
        task_execution_failed = True

    for task in tasks:
        if task.exception:
            concurrent_task_utils.log(
                python_utils.convert_to_bytes(task.exception.args[0]))

    python_utils.PRINT('')
    python_utils.PRINT('+------------------+')
    python_utils.PRINT('| SUMMARY OF TESTS |')
    python_utils.PRINT('+------------------+')
    python_utils.PRINT('')

    # Check we ran all tests as expected.
    total_count = 0
    total_errors = 0
    total_failures = 0
    for task in tasks:
        spec = task_to_taskspec[task]

        if not task.finished:
            python_utils.PRINT('CANCELED  %s' % spec.test_target)
            test_count = 0
        elif (task.exception and 'No tests were run'
              in python_utils.convert_to_bytes(task.exception.args[0])):
            python_utils.PRINT('ERROR     %s: No tests found.' %
                               spec.test_target)
            test_count = 0
        elif task.exception:
            exc_str = python_utils.convert_to_bytes(task.exception.args[0])
            python_utils.PRINT(exc_str[exc_str.find('='):exc_str.rfind('-')])

            tests_failed_regex_match = re.search(
                r'Test suite failed: ([0-9]+) tests run, ([0-9]+) errors, '
                '([0-9]+) failures',
                python_utils.convert_to_bytes(task.exception.args[0]))

            try:
                test_count = int(tests_failed_regex_match.group(1))
                errors = int(tests_failed_regex_match.group(2))
                failures = int(tests_failed_regex_match.group(3))
                total_errors += errors
                total_failures += failures
                python_utils.PRINT('FAILED    %s: %s errors, %s failures' %
                                   (spec.test_target, errors, failures))
            except AttributeError:
                # There was an internal error, and the tests did not run (The
                # error message did not match `tests_failed_regex_match`).
                test_count = 0
                total_errors += 1
                python_utils.PRINT('')
                python_utils.PRINT(
                    '------------------------------------------------------')
                python_utils.PRINT('    WARNING: FAILED TO RUN %s' %
                                   spec.test_target)
                python_utils.PRINT('')
                python_utils.PRINT(
                    '    This is most likely due to an import error.')
                python_utils.PRINT(
                    '------------------------------------------------------')
        else:
            try:
                tests_run_regex_match = re.search(
                    r'Ran ([0-9]+) tests? in ([0-9\.]+)s', task.output)
                test_count = int(tests_run_regex_match.group(1))
                test_time = float(tests_run_regex_match.group(2))
                python_utils.PRINT('SUCCESS   %s: %d tests (%.1f secs)' %
                                   (spec.test_target, test_count, test_time))
            except Exception:
                python_utils.PRINT('An unexpected error occurred. '
                                   'Task output:\n%s' % task.output)

        total_count += test_count

    python_utils.PRINT('')
    if total_count == 0:
        raise Exception('WARNING: No tests were run.')

    python_utils.PRINT('Ran %s test%s in %s test class%s.' %
                       (total_count, '' if total_count == 1 else 's',
                        len(tasks), '' if len(tasks) == 1 else 'es'))

    if total_errors or total_failures:
        python_utils.PRINT('(%s ERRORS, %s FAILURES)' %
                           (total_errors, total_failures))
    else:
        python_utils.PRINT('All tests passed.')

    if task_execution_failed:
        raise Exception('Task execution failed.')
    elif total_errors or total_failures:
        raise Exception('%s errors, %s failures' %
                        (total_errors, total_failures))

    if parsed_args.generate_coverage_report:
        subprocess.check_call(
            [sys.executable, COVERAGE_MODULE_PATH, 'combine'])
        process = subprocess.Popen([
            sys.executable, COVERAGE_MODULE_PATH, 'report',
            '--omit="%s*","third_party/*","/usr/share/*"' %
            common.OPPIA_TOOLS_DIR, '--show-missing'
        ],
                                   stdout=subprocess.PIPE)

        report_stdout, _ = process.communicate()
        python_utils.PRINT(report_stdout)

        coverage_result = re.search(
            r'TOTAL\s+(\d+)\s+(\d+)\s+(?P<total>\d+)%\s+', report_stdout)
        if coverage_result.group('total') != '100':
            raise Exception('Backend test coverage is not 100%')

    python_utils.PRINT('')
    python_utils.PRINT('Done!')
示例#29
0
    def setUp(self):
        super(TrainedClassifierHandlerTests, self).setUp()

        self.exp_id = 'exp_id1'
        self.title = 'Testing Classifier storing'
        self.category = 'Test'
        yaml_path = os.path.join(feconf.TESTS_DATA_DIR,
                                 'string_classifier_test.yaml')
        with python_utils.open_file(yaml_path, 'r') as yaml_file:
            self.yaml_content = yaml_file.read()
        self.signup(self.CURRICULUM_ADMIN_EMAIL,
                    self.CURRICULUM_ADMIN_USERNAME)
        self.signup('*****@*****.**', 'mod')

        assets_list = []
        with self.swap(feconf, 'ENABLE_ML_CLASSIFIERS', True):
            exp_services.save_new_exploration_from_yaml_and_assets(
                feconf.SYSTEM_COMMITTER_ID, self.yaml_content, self.exp_id,
                assets_list)
        self.exploration = exp_fetchers.get_exploration_by_id(self.exp_id)
        self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[
            self.exploration.states['Home'].interaction.id]['algorithm_id']
        self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[
            self.exploration.states['Home'].interaction.
            id]['algorithm_version']

        self.classifier_data = {
            '_alpha': 0.1,
            '_beta': 0.001,
            '_prediction_threshold': 0.5,
            '_training_iterations': 25,
            '_prediction_iterations': 5,
            '_num_labels': 10,
            '_num_docs': 12,
            '_num_words': 20,
            '_label_to_id': {
                'text': 1
            },
            '_word_to_id': {
                'hello': 2
            },
            '_w_dp': [],
            '_b_dl': [],
            '_l_dp': [],
            '_c_dl': [],
            '_c_lw': [],
            '_c_l': [],
        }
        classifier_training_job = (
            classifier_services.get_classifier_training_job(
                self.exp_id, self.exploration.version, 'Home',
                self.algorithm_id))
        self.assertIsNotNone(classifier_training_job)
        self.job_id = classifier_training_job.job_id

        # TODO(pranavsid98): Replace the three commands below with
        # mark_training_job_pending after Giritheja's PR gets merged.
        classifier_training_job_model = (
            classifier_models.ClassifierTrainingJobModel.get(self.job_id,
                                                             strict=False))
        classifier_training_job_model.status = (
            feconf.TRAINING_JOB_STATUS_PENDING)
        classifier_training_job_model.update_timestamps()
        classifier_training_job_model.put()

        self.job_result = (training_job_response_payload_pb2.
                           TrainingJobResponsePayload.JobResult())
        self.job_result.job_id = self.job_id

        classifier_frozen_model = (
            text_classifier_pb2.TextClassifierFrozenModel())
        classifier_frozen_model.model_json = json.dumps(self.classifier_data)

        self.job_result.text_classifier.CopyFrom(classifier_frozen_model)

        self.payload_proto = (
            training_job_response_payload_pb2.TrainingJobResponsePayload())
        self.payload_proto.job_result.CopyFrom(self.job_result)
        self.payload_proto.vm_id = feconf.DEFAULT_VM_ID
        self.secret = feconf.DEFAULT_VM_SHARED_SECRET
        self.payload_proto.signature = classifier_services.generate_signature(
            python_utils.convert_to_bytes(self.secret),
            python_utils.convert_to_bytes(
                self.payload_proto.job_result.SerializeToString()),
            self.payload_proto.vm_id)

        self.payload_for_fetching_next_job_request = {
            'vm_id': feconf.DEFAULT_VM_ID,
            'message': json.dumps({})
        }

        self.payload_for_fetching_next_job_request['signature'] = (
            classifier_services.generate_signature(
                python_utils.convert_to_bytes(self.secret),
                python_utils.convert_to_bytes(
                    self.payload_for_fetching_next_job_request['message']),
                self.payload_for_fetching_next_job_request['vm_id']))
示例#30
0
def convert_to_textangular(html_data):
    """This function converts the html to TextAngular supported format.

    Args:
        html_data: str. HTML string to be converted.

    Returns:
        str. The converted HTML string.
    """
    if not len(html_data):
        return html_data

    # <br> is replaced with <br/> before conversion because BeautifulSoup
    # in some cases adds </br> closing tag and br is reported as parent
    # of other tags which produces issues in migration.
    html_data = html_data.replace('<br>', '<br/>')

    # To convert the rich text content within tabs and collapsible components
    # to valid TextAngular format. If there is no tabs or collapsible component
    # convert_tag_contents_to_rte_format will make no change to html_data.
    html_data = convert_tag_contents_to_rte_format(
        html_data, convert_to_textangular)

    soup = bs4.BeautifulSoup(html_data.encode(encoding='utf-8'), 'html.parser')

    allowed_tag_list = (
        feconf.RTE_CONTENT_SPEC[
            'RTE_TYPE_TEXTANGULAR']['ALLOWED_TAG_LIST'])
    allowed_parent_list = (
        feconf.RTE_CONTENT_SPEC[
            'RTE_TYPE_TEXTANGULAR']['ALLOWED_PARENT_LIST'])

    # The td tag will be unwrapped and tr tag will be replaced with p tag.
    # So if td is parent of blockquote after migration blockquote should
    # be parent of the p tag to get the alomst same appearance. p cannot
    # remain parent of blockquote since that is not allowed in TextAngular.
    # If blockquote is wrapped in p we need to unwrap the p but here
    # we need to make blockquote the parent of p. Since this cannot
    # be distinguished after migration to p, this part is checked
    # before migration.
    for blockquote in soup.findAll(name='blockquote'):
        if blockquote.parent.name == 'td':
            blockquote.parent.parent.wrap(soup.new_tag('blockquote'))
            blockquote.unwrap()

    # If p tags are left within a td tag, the contents of a table row
    # in final output will span to multiple lines instead of all
    # items being in a single line. So, any p tag within
    # td tag is unwrapped.
    for p in soup.findAll(name='p'):
        if p.parent.name == 'td':
            p.unwrap()

    # To remove all tags except those in allowed tag list.
    all_tags = soup.findAll()
    for tag in all_tags:
        if tag.name == 'strong':
            tag.name = 'b'
        elif tag.name == 'em':
            tag.name = 'i'
        # Current rte does not support horizontal rule, the closest
        # replacement of a horizontal rule is a line break to obtain
        # the same appearance.
        elif tag.name == 'hr':
            tag.name = 'br'
        # 'a' tag is to be replaced with oppia-noninteractive-link.
        # For this the attributes and text within a tag is used to
        # create new link tag which is wrapped as parent of a and then
        # a tag is removed.
        # In case where there is no href attribute or no text within the
        # a tag, the tag is simply removed.
        elif tag.name == 'a':
            replace_with_link = True
            if tag.has_attr('href') and tag.get_text():
                children = tag.findChildren()
                for child in children:
                    if child.name == 'oppia-noninteractive-link':
                        tag.unwrap()
                        replace_with_link = False
                if replace_with_link:
                    link = soup.new_tag('oppia-noninteractive-link')
                    url = tag['href']
                    text = tag.get_text()
                    link['url-with-value'] = escape_html(json.dumps(url))
                    link['text-with-value'] = escape_html(json.dumps(text))
                    tag.wrap(link)
                    # If any part of text in a tag is wrapped in b or i tag
                    # link tag is also wrapped in those tags to maintain
                    # almost similar appearance.
                    count_of_b_parent = 0
                    count_of_i_parent = 0
                    for child in children:
                        if child.name == 'b' and not count_of_b_parent:
                            link.wrap(soup.new_tag('b'))
                            count_of_b_parent = 1
                        if child.name == 'i' and not count_of_i_parent:
                            link.wrap(soup.new_tag('i'))
                            count_of_i_parent = 1
                    tag.extract()
            else:
                tag.unwrap()
        # To maintain the appearance of table, tab is added after
        # each element in row. In one of the cases the elements were
        # p tags with some text and line breaks. In such case td.string
        # is None and there is no need to add tabs since linebreak is
        # already present.
        elif tag.name == 'td' and tag.next_sibling:
            tag.insert_after(' ')
            tag.unwrap()
        # Divs and table rows are both replaced with p tag
        # to maintain almost same appearance.
        elif tag.name == 'div' or tag.name == 'tr':
            tag.name = 'p'
        # All other invalid tags are simply removed.
        elif tag.name not in allowed_tag_list:
            tag.unwrap()

    # Removal of tags can break the soup into parts which are continuous
    # and not wrapped in any tag. This part recombines the continuous
    # parts not wrapped in any tag.
    soup = bs4.BeautifulSoup(
        python_utils.convert_to_bytes(soup), 'html.parser')

    # Ensure that blockquote tag is wrapped in an allowed parent.
    for blockquote in soup.findAll(name='blockquote'):
        while blockquote.parent.name not in allowed_parent_list['blockquote']:
            blockquote.parent.unwrap()

    # Ensure that pre tag is not wrapped p tags.
    for pre in soup.findAll(name='pre'):
        while pre.parent.name == 'p':
            pre.parent.unwrap()

    # Ensure that ol and ul are not wrapped in p tags.
    for tag_name in ['ol', 'ul']:
        for tag in soup.findAll(name=tag_name):
            while tag.parent.name == 'p':
                tag.parent.unwrap()

    # Ensure that br tag is wrapped in an allowed parent.
    for br in soup.findAll(name='br'):
        if br.parent.name == 'pre':
            br.insert_after('\n')
            br.unwrap()
        elif br.parent.name not in allowed_parent_list['br']:
            wrap_with_siblings(br, soup.new_tag('p'))

    # Ensure that b and i tags are wrapped in an allowed parent.
    for tag_name in ['b', 'i']:
        for tag in soup.findAll(name=tag_name):
            if tag.parent.name == 'oppia-noninteractive-link':
                tag.parent.wrap(soup.new_tag(tag_name))
                parent = tag.parent.parent
                tag.unwrap()
                tag = parent
            if tag.parent.name == tag_name:
                parent = tag.parent
                tag.unwrap()
                tag = parent
            if tag.parent.name in ['blockquote', '[document]']:
                wrap_with_siblings(tag, soup.new_tag('p'))

    # Ensure that oppia inline components are wrapped in an allowed parent.
    for tag_name in INLINE_COMPONENT_TAG_NAMES:
        for tag in soup.findAll(name=tag_name):
            if tag.parent.name in ['blockquote', '[document]']:
                wrap_with_siblings(tag, soup.new_tag('p'))

    # Ensure oppia link component is not a child of another link component.
    for link in soup.findAll(name='oppia-noninteractive-link'):
        if link.parent.name == 'oppia-noninteractive-link':
            link.unwrap()

    # Ensure that oppia block components are wrapped in an allowed parent.
    for tag_name in BLOCK_COMPONENT_TAG_NAMES:
        for tag in soup.findAll(name=tag_name):
            if tag.parent.name in ['blockquote', '[document]']:
                wrap_with_siblings(tag, soup.new_tag('p'))

    # Ensure that every content in html is wrapped in a tag.
    for content in soup.contents:
        if not content.name:
            content.wrap(soup.new_tag('p'))

    # Ensure that p tag has a valid parent.
    for p in soup.findAll(name='p'):
        if p.parent.name != 'p' and (
                p.parent.name not in allowed_parent_list['p']):
            p.parent.unwrap()

    # Ensure that p tag is not wrapped in p tag.
    for p in soup.findAll(name='p'):
        if p.parent.name == 'p':
            child_tags = p.parent.contents
            index = 0
            while index < len(child_tags):
                current_tag = child_tags[index]

                # If the current tag is not a paragraph tag, wrap it and all
                # consecutive non-p tags after it into a single p-tag.
                new_p = soup.new_tag('p')
                while current_tag.name != 'p':
                    current_tag = current_tag.wrap(new_p)
                    index = child_tags.index(current_tag) + 1
                    if index >= len(child_tags):
                        break
                    current_tag = child_tags[index]

                index += 1
            p.parent.unwrap()

    # Beautiful soup automatically changes some <br> to <br/>,
    # so it has to be replaced directly in the string.
    # Also, when any html string with <br/> is stored in exploration
    # html strings they are stored as <br>. Since both of these
    # should match and <br> and <br/> have same working,
    # so the tag has to be replaced in this way.
    return python_utils.STR(soup).replace('<br/>', '<br>')