def test_convert_to_bytes(self): string1 = 'Home' string2 = u'Лорем' self.assertEqual(python_utils.convert_to_bytes(string1), string1.encode(encoding='utf-8')) self.assertEqual(python_utils.convert_to_bytes(string2), string2.encode(encoding='utf-8'))
def test_deferred_tasks_handler_raises_correct_exceptions(self): incorrect_function_identifier = 'incorrect_function_id' taskqueue_services.defer(incorrect_function_identifier, taskqueue_services.QUEUE_NAME_DEFAULT) raises_incorrect_function_id_exception = self.assertRaisesRegexp( Exception, 'The function id, %s, is not valid.' % incorrect_function_identifier) with raises_incorrect_function_id_exception: self.process_and_flush_pending_tasks() headers = { # Need to convert to bytes since test app doesn't allow unicode. 'X-Appengine-QueueName': python_utils.convert_to_bytes('queue'), 'X-Appengine-TaskName': python_utils.convert_to_bytes('None'), 'X-AppEngine-Fake-Is-Admin': python_utils.convert_to_bytes('1') } csrf_token = self.get_new_csrf_token() self.post_task(feconf.TASK_URL_DEFERRED, {}, headers, csrf_token=csrf_token, expect_errors=True, expected_status_int=500)
def test_error_on_invalid_classifier_data_in_message(self): # Altering message dict to result in invalid dict. self.payload_proto.job_result.ClearField('classifier_frozen_model') self.payload_proto.signature = classifier_services.generate_signature( python_utils.convert_to_bytes(self.secret), python_utils.convert_to_bytes( self.payload_proto.job_result.SerializeToString()), self.payload_proto.vm_id) self.post_blob('/ml/trainedclassifierhandler', self.payload_proto.SerializeToString(), expected_status_int=400)
def convert_to_hash(input_string: str, max_length: int) -> str: """Convert a string to a SHA1 hash. Args: input_string: str. Input string for conversion to hash. max_length: int. Maximum Length of the generated hash. Returns: str. Hash Value generated from the input_String of the specified length. Raises: Exception. If the input string is not the instance of the str, them this exception is raised. """ if not isinstance(input_string, python_utils.BASESTRING): raise Exception( 'Expected string, received %s of type %s' % (input_string, type(input_string))) # Encodes strings using the character set [A-Za-z0-9]. # Prefixing altchars with b' to ensure that all characters in encoded_string # remain encoded (otherwise encoded_string would be of type unicode). encoded_string = base64.b64encode( hashlib.sha1( python_utils.convert_to_bytes(input_string)).digest(), altchars=b'ab' ).replace(b'=', b'c') return encoded_string[:max_length].decode('utf-8')
def len_in_bytes(self) -> int: """Returns the number of bytes encoded by the JobRunResult instance. Returns: int. The number of bytes encoded by the JobRunResult instance. """ output_bytes = (python_utils.convert_to_bytes(s) for s in (self.stdout, self.stderr)) return sum(len(output) for output in output_bytes)
def setUp(self): super(NextJobHandlerTest, self).setUp() self.exp_id = 'exp_id1' self.title = 'Testing Classifier storing' self.category = 'Test' interaction_id = 'TextInput' self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_id'] self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_version'] self.training_data = [{ u'answer_group_index': 1, u'answers': [u'a1', u'a2'] }, { u'answer_group_index': 2, u'answers': [u'a2', u'a3'] }] self.job_id = classifier_models.ClassifierTrainingJobModel.create( self.algorithm_id, interaction_id, self.exp_id, 1, datetime.datetime.utcnow(), self.training_data, 'Home', feconf.TRAINING_JOB_STATUS_NEW, 1) self.classifier_data = text_classifier_pb2.TextClassifierFrozenModel() self.classifier_data.model_json = '' fs_services.save_classifier_data(self.exp_id, self.job_id, self.classifier_data) self.expected_response = { u'job_id': self.job_id, u'training_data': self.training_data, u'algorithm_id': self.algorithm_id, u'algorithm_version': self.algorithm_version } self.payload = {} self.payload['vm_id'] = feconf.DEFAULT_VM_ID secret = feconf.DEFAULT_VM_SHARED_SECRET self.payload['message'] = json.dumps({}) self.payload['signature'] = classifier_services.generate_signature( python_utils.convert_to_bytes(secret), python_utils.convert_to_bytes(self.payload['message']), self.payload['vm_id'])
def verify_signature(oppia_ml_auth_info): """Function that checks if the signature received from the VM is valid. Args: oppia_ml_auth_info: OppiaMLAuthInfo. Domain object containing authentication information. Returns: bool. Whether the incoming request is valid. """ secret = None for val in config_domain.VMID_SHARED_SECRET_KEY_MAPPING.value: if val['vm_id'] == oppia_ml_auth_info.vm_id: secret = python_utils.convert_to_bytes(val['shared_secret_key']) break if secret is None: return False generated_signature = generate_signature( secret, python_utils.convert_to_bytes(oppia_ml_auth_info.message), oppia_ml_auth_info.vm_id) if generated_signature != oppia_ml_auth_info.signature: return False return True
def generate_signature(secret, message, vm_id): """Generates digital signature for given data. Args: secret: bytes. The secret used to communicate with Oppia-ml. message: bytes. The message payload data. vm_id: str. The ID of the VM that generated the message. Returns: str. The signature of the payload data. """ encoded_vm_id = python_utils.convert_to_bytes(vm_id) message = b'%s|%s' % (base64.b64encode(message), encoded_vm_id) return hmac.new( secret, msg=message, digestmod=hashlib.sha256 ).hexdigest()
def commit(self, filepath, raw_bytes, mimetype=None): """Replaces the contents of the file with the given by test string. Args: filepath: str. The path to the relevant file within the entity's assets folder. raw_bytes: str. The content to be stored in the file. mimetype: str. The content-type of the file. If mimetype is set to 'application/octet-stream' then raw_bytes is expected to contain binary data. In all other cases, raw_bytes is expected to be textual data. """ # Note that textual data needs to be converted to bytes so that it can # be stored in a file opened in binary mode. However, it is not # required for binary data (i.e. when mimetype is set to # 'application/octet-stream'). file_content = (python_utils.convert_to_bytes(raw_bytes) if mimetype != 'application/octet-stream' else raw_bytes) self._check_filepath(filepath) self._impl.commit(filepath, file_content, mimetype)
def convert_png_binary_to_data_url(content: Union[str, bytes]) -> str: """Converts a PNG image string (represented by 'content') to a data URL. Args: content: str. PNG binary file content. Returns: str. Data URL created from the binary content of the PNG. Raises: Exception. The given binary string does not represent a PNG image. """ # We accept unicode but imghdr.what(file, h) accepts 'h' of type bytes. # So we have casted content to be bytes. content = python_utils.convert_to_bytes(content) if imghdr.what(None, h=content) == 'png': return '%s%s' % (PNG_DATA_URL_PREFIX, urllib.parse.quote(base64.b64encode(content))) else: raise Exception('The given string does not represent a PNG image.')
def setUp(self): super(TrainedClassifierHandlerTests, self).setUp() self.exp_id = 'exp_id1' self.title = 'Testing Classifier storing' self.category = 'Test' yaml_path = os.path.join(feconf.TESTS_DATA_DIR, 'string_classifier_test.yaml') with python_utils.open_file(yaml_path, 'r') as yaml_file: self.yaml_content = yaml_file.read() self.signup(self.CURRICULUM_ADMIN_EMAIL, self.CURRICULUM_ADMIN_USERNAME) self.signup('*****@*****.**', 'mod') assets_list = [] with self.swap(feconf, 'ENABLE_ML_CLASSIFIERS', True): exp_services.save_new_exploration_from_yaml_and_assets( feconf.SYSTEM_COMMITTER_ID, self.yaml_content, self.exp_id, assets_list) self.exploration = exp_fetchers.get_exploration_by_id(self.exp_id) self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[ self.exploration.states['Home'].interaction.id]['algorithm_id'] self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[ self.exploration.states['Home'].interaction. id]['algorithm_version'] self.classifier_data = { '_alpha': 0.1, '_beta': 0.001, '_prediction_threshold': 0.5, '_training_iterations': 25, '_prediction_iterations': 5, '_num_labels': 10, '_num_docs': 12, '_num_words': 20, '_label_to_id': { 'text': 1 }, '_word_to_id': { 'hello': 2 }, '_w_dp': [], '_b_dl': [], '_l_dp': [], '_c_dl': [], '_c_lw': [], '_c_l': [], } classifier_training_job = ( classifier_services.get_classifier_training_job( self.exp_id, self.exploration.version, 'Home', self.algorithm_id)) self.assertIsNotNone(classifier_training_job) self.job_id = classifier_training_job.job_id # TODO(pranavsid98): Replace the three commands below with # mark_training_job_pending after Giritheja's PR gets merged. classifier_training_job_model = ( classifier_models.ClassifierTrainingJobModel.get(self.job_id, strict=False)) classifier_training_job_model.status = ( feconf.TRAINING_JOB_STATUS_PENDING) classifier_training_job_model.update_timestamps() classifier_training_job_model.put() self.job_result = (training_job_response_payload_pb2. TrainingJobResponsePayload.JobResult()) self.job_result.job_id = self.job_id classifier_frozen_model = ( text_classifier_pb2.TextClassifierFrozenModel()) classifier_frozen_model.model_json = json.dumps(self.classifier_data) self.job_result.text_classifier.CopyFrom(classifier_frozen_model) self.payload_proto = ( training_job_response_payload_pb2.TrainingJobResponsePayload()) self.payload_proto.job_result.CopyFrom(self.job_result) self.payload_proto.vm_id = feconf.DEFAULT_VM_ID self.secret = feconf.DEFAULT_VM_SHARED_SECRET self.payload_proto.signature = classifier_services.generate_signature( python_utils.convert_to_bytes(self.secret), python_utils.convert_to_bytes( self.payload_proto.job_result.SerializeToString()), self.payload_proto.vm_id) self.payload_for_fetching_next_job_request = { 'vm_id': feconf.DEFAULT_VM_ID, 'message': json.dumps({}) } self.payload_for_fetching_next_job_request['signature'] = ( classifier_services.generate_signature( python_utils.convert_to_bytes(self.secret), python_utils.convert_to_bytes( self.payload_for_fetching_next_job_request['message']), self.payload_for_fetching_next_job_request['vm_id']))