def test_store_classifier_data(self): """Test the store_classifier_data method.""" exp_id = u'1' next_scheduled_check_time = datetime.datetime.utcnow() state_name = 'Home' interaction_id = 'TextInput' job_id = self._create_classifier_training_job( feconf.INTERACTION_CLASSIFIER_MAPPING['TextInput']['algorithm_id'], interaction_id, exp_id, 1, next_scheduled_check_time, [], state_name, feconf.TRAINING_JOB_STATUS_PENDING, {}, 1) # Retrieve classifier data from GCS and ensure that content is same. classifier_training_job = ( classifier_services.get_classifier_training_job_by_id(job_id)) classifier_data = ( self._get_classifier_data_from_classifier_training_job( classifier_training_job)) self.assertEqual(json.loads(classifier_data.model_json), {}) classifier_data_proto = text_classifier_pb2.TextClassifierFrozenModel() classifier_data_proto.model_json = json.dumps( {'classifier_data': 'data'}) classifier_services.store_classifier_data( job_id, classifier_data_proto) classifier_training_job = ( classifier_services.get_classifier_training_job_by_id(job_id)) classifier_data = ( self._get_classifier_data_from_classifier_training_job( classifier_training_job)) self.assertDictEqual( json.loads(classifier_data.model_json), {'classifier_data': 'data'})
def test_save_and_get_classifier_data(self): """Test that classifier data is stored and retrieved correctly.""" fs_services.save_classifier_data('exp_id', 'job_id', self.classifier_data_proto) filepath = 'job_id-classifier-data.pb.xz' file_system_class = fs_services.get_entity_file_system_class() fs = fs_domain.AbstractFileSystem( file_system_class(feconf.ENTITY_TYPE_EXPLORATION, 'exp_id')) classifier_data = utils.decompress_from_zlib(fs.get(filepath)) classifier_data_proto = text_classifier_pb2.TextClassifierFrozenModel() classifier_data_proto.ParseFromString(classifier_data) self.assertEqual(classifier_data_proto.model_json, self.classifier_data_proto.model_json)
def setUp(self): super(FileSystemClassifierDataTests, self).setUp() self.fs = fs_domain.AbstractFileSystem( fs_domain.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION, 'exp_id')) self.classifier_data_proto = ( text_classifier_pb2.TextClassifierFrozenModel()) self.classifier_data_proto.model_json = json.dumps({ 'param1': 40, 'param2': [34.2, 54.13, 95.23], 'submodel': { 'param1': 12 } })
def _create_classifier_training_job( self, algorithm_id, interaction_id, exp_id, exp_version, next_scheduled_check_time, training_data, state_name, status, classifier_data, algorithm_version): """Creates a new classifier training job model and stores classfier data in a file. """ job_id = classifier_models.ClassifierTrainingJobModel.create( algorithm_id, interaction_id, exp_id, exp_version, next_scheduled_check_time, training_data, state_name, status, algorithm_version) classifier_data_proto = text_classifier_pb2.TextClassifierFrozenModel() classifier_data_proto.model_json = json.dumps(classifier_data) fs_services.save_classifier_data(exp_id, job_id, classifier_data_proto) return job_id
def setUp(self): super(ClassifierTrainingJobModelValidatorTests, self).setUp() self.signup(self.OWNER_EMAIL, self.OWNER_USERNAME) self.owner_id = self.get_user_id_from_email(self.OWNER_EMAIL) explorations = [exp_domain.Exploration.create_default_exploration( '%s' % i, title='title %d' % i, category='category%d' % i, ) for i in python_utils.RANGE(2)] for exp in explorations: exp.add_states(['StateTest%s' % exp.id]) exp_services.save_new_exploration(self.owner_id, exp) next_scheduled_check_time = datetime.datetime.utcnow() classifier_data_proto = text_classifier_pb2.TextClassifierFrozenModel() classifier_data_proto.model_json = json.dumps( {'classifier_data': 'data'}) id0 = classifier_models.ClassifierTrainingJobModel.create( 'TextClassifier', 'TextInput', '0', 1, next_scheduled_check_time, [{'answer_group_index': 1, 'answers': ['a1', 'a2']}], 'StateTest0', feconf.TRAINING_JOB_STATUS_NEW, 1) fs_services.save_classifier_data( 'TextClassifier', id0, classifier_data_proto) self.model_instance_0 = ( classifier_models.ClassifierTrainingJobModel.get_by_id(id0)) id1 = classifier_models.ClassifierTrainingJobModel.create( 'TextClassifier', 'TextInput', '1', 1, next_scheduled_check_time, [{'answer_group_index': 1, 'answers': ['a1', 'a2']}], 'StateTest1', feconf.TRAINING_JOB_STATUS_NEW, 1) fs_services.save_classifier_data( 'TextClassifier', id1, classifier_data_proto) self.model_instance_1 = ( classifier_models.ClassifierTrainingJobModel.get_by_id(id1)) self.job_class = ( prod_validation_jobs_one_off .ClassifierTrainingJobModelAuditOneOffJob)
def setUp(self): super(NextJobHandlerTest, self).setUp() self.exp_id = 'exp_id1' self.title = 'Testing Classifier storing' self.category = 'Test' interaction_id = 'TextInput' self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_id'] self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_version'] self.training_data = [ { u'answer_group_index': 1, u'answers': [u'a1', u'a2'] }, { u'answer_group_index': 2, u'answers': [u'a2', u'a3'] } ] self.job_id = classifier_models.ClassifierTrainingJobModel.create( self.algorithm_id, interaction_id, self.exp_id, 1, datetime.datetime.utcnow(), self.training_data, 'Home', feconf.TRAINING_JOB_STATUS_NEW, 1) self.classifier_data = text_classifier_pb2.TextClassifierFrozenModel() self.classifier_data.model_json = '' fs_services.save_classifier_data( self.exp_id, self.job_id, self.classifier_data) self.expected_response = { u'job_id': self.job_id, u'training_data': self.training_data, u'algorithm_id': self.algorithm_id, u'algorithm_version': self.algorithm_version } self.payload = {} self.payload['vm_id'] = feconf.DEFAULT_VM_ID secret = feconf.DEFAULT_VM_SHARED_SECRET self.payload['message'] = json.dumps({}) self.payload['signature'] = classifier_services.generate_signature( python_utils.convert_to_bytes(secret), self.payload['message'], self.payload['vm_id'])
def setUp(self): super(TrainedClassifierHandlerTests, self).setUp() self.exp_id = 'exp_id1' self.title = 'Testing Classifier storing' self.category = 'Test' yaml_path = os.path.join( feconf.TESTS_DATA_DIR, 'string_classifier_test.yaml') with python_utils.open_file(yaml_path, 'r') as yaml_file: self.yaml_content = yaml_file.read() self.signup(self.ADMIN_EMAIL, self.ADMIN_USERNAME) self.signup('*****@*****.**', 'mod') assets_list = [] with self.swap(feconf, 'ENABLE_ML_CLASSIFIERS', True): exp_services.save_new_exploration_from_yaml_and_assets( feconf.SYSTEM_COMMITTER_ID, self.yaml_content, self.exp_id, assets_list) self.exploration = exp_fetchers.get_exploration_by_id(self.exp_id) self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[ self.exploration.states['Home'].interaction.id]['algorithm_id'] self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[ self.exploration.states['Home'].interaction.id]['algorithm_version'] self.classifier_data = { '_alpha': 0.1, '_beta': 0.001, '_prediction_threshold': 0.5, '_training_iterations': 25, '_prediction_iterations': 5, '_num_labels': 10, '_num_docs': 12, '_num_words': 20, '_label_to_id': {'text': 1}, '_word_to_id': {'hello': 2}, '_w_dp': [], '_b_dl': [], '_l_dp': [], '_c_dl': [], '_c_lw': [], '_c_l': [], } classifier_training_job = ( classifier_services.get_classifier_training_job( self.exp_id, self.exploration.version, 'Home', self.algorithm_id)) self.assertIsNotNone(classifier_training_job) self.job_id = classifier_training_job.job_id # TODO(pranavsid98): Replace the three commands below with # mark_training_job_pending after Giritheja's PR gets merged. classifier_training_job_model = ( classifier_models.ClassifierTrainingJobModel.get( self.job_id, strict=False)) classifier_training_job_model.status = ( feconf.TRAINING_JOB_STATUS_PENDING) classifier_training_job_model.update_timestamps() classifier_training_job_model.put() self.job_result = ( training_job_response_payload_pb2.TrainingJobResponsePayload. JobResult()) self.job_result.job_id = self.job_id classifier_frozen_model = ( text_classifier_pb2.TextClassifierFrozenModel()) classifier_frozen_model.model_json = json.dumps(self.classifier_data) self.job_result.text_classifier.CopyFrom(classifier_frozen_model) self.payload_proto = ( training_job_response_payload_pb2.TrainingJobResponsePayload()) self.payload_proto.job_result.CopyFrom(self.job_result) self.payload_proto.vm_id = feconf.DEFAULT_VM_ID self.secret = feconf.DEFAULT_VM_SHARED_SECRET self.payload_proto.signature = classifier_services.generate_signature( python_utils.convert_to_bytes(self.secret), self.payload_proto.job_result.SerializeToString(), self.payload_proto.vm_id) self.payload_for_fetching_next_job_request = { 'vm_id': feconf.DEFAULT_VM_ID, 'message': json.dumps({}) } self.payload_for_fetching_next_job_request['signature'] = ( classifier_services.generate_signature( python_utils.convert_to_bytes(self.secret), self.payload_for_fetching_next_job_request['message'], self.payload_for_fetching_next_job_request['vm_id']))