def __init__(self, metadata): """ Args: metadata: an AutoDLMetadata object. Its definition can be found in AutoDL_ingestion_program/dataset.py """ self.done_training = False self.metadata = metadata self.domain = "speech" test_metadata_filename = self.metadata.get_dataset_name().replace('train', 'test') + '/metadata.textproto' self.test_num = [int(line.split(':')[1]) for line in open(test_metadata_filename, 'r').readlines()[:3] if 'sample_count' in line][0] # DomainModel = meta_domain_2_model(self.domain) self.domain_metadata = get_domain_metadata(metadata, self.domain) self.domain_metadata["test_num"] = self.test_num self.class_num = self.domain_metadata["class_num"] self.train_num = self.domain_metadata["train_num"] logger.info("Note:domain={}, domain_metadata is {}".format(self.domain, self.domain_metadata)) # self.domain_model = DomainModel(self.domain_metadata) self.domain_model = ModelExecutor(self.domain_metadata) self.ensemble_val_record_list = list() self.ensemble_val_nauc_list = list() self.cur_cls_name = None self.cur_train_his_report = dict() self.g_predevel_space = list() self.g_train_loss_list = list() as_timer("model_speech_init")
def offline_fit(self, train_examples_x: np.ndarray, train_examples_y: np.ndarray, fit_params: dict = None): # for single-label if fit_params.get("if_multilabel") is False: train_examples_y = ohe2cat(train_examples_y) self.model.fit(train_examples_x, train_examples_y) self.label_map = self.model.classes_ # for multi-labels. else: if self.ml_mode == 1: for cls in range(self.class_num): cls_y = train_examples_y[:, cls] # self.logReg_pipeline.fit(train_examples_x, cls_y) self.ml_models[cls].fit(train_examples_x, cls_y) elif self.ml_mode == 2: self.ml_model.fit(train_examples_x, train_examples_y) elif self.ml_mode == 3: for cls in range(self.class_num): cls_y = train_examples_y[:, cls] self.logReg_pipeline.fit(train_examples_x, cls_y) else: error("Error: wrong ml_mode={}".format(self.ml_mode)) as_timer("lr_liblinear_fit_{}".format(len(train_examples_x)))
def test(self, dataset, remaining_time_budget=None): """Test method of domain-specific model.""" cur_y_pred = self.domain_model.test_pipeline(dataset) self.cur_train_his_report["pred_probas"] = cur_y_pred if self.cur_cls_name == CLS_TR34 and self.domain_model.tr34_cls_train_pip_run >= 8: loss_godown_rate = self.domain_model.decision_maker.ensemble_learner.get_loss_godown_rate(self.g_train_loss_list, EVAL_TLOSS_TAIL_SIZE) if loss_godown_rate >= EVAL_TLOSS_GODOWN_RATE_THRES: self.domain_model.decision_maker.ensemble_learner.add_eval_pred_item(self.cur_train_his_report) if self.domain_model.tr34_cls_train_pip_run >= 15: pred_rule = { "t_loss": 5, # "t_acc": 1 } pred_ensemble = self.domain_model.decision_maker.ensemble_learner.softvoting_ensemble_preds(pred_rule) else: pred_ensemble = cur_y_pred # if pred ensemble. # real_y_pred = self.done_training = False as_timer("test_start") # return cur_y_pred return pred_ensemble
def online_fit(self, train_examples_x: np.ndarray, train_examples_y: np.ndarray, fit_params:dict): self.trn_gen = Tr34DataGenerator(train_examples_x, train_examples_y, **self.tr34_cls_params) # self.first_r_train_x = self.train_x cur_train_len = len(train_examples_x) self.first_r_data_generator = self.trn_gen cur_epoch = self.decide_epoch_curround(fit_params.get("first_epoch", 14), fit_params.get("left_epoch", 1)) early_stopping = TerminateOnBaseline(monitor="acc", baseline=0.999) cur_fit_history = self.model.fit_generator( self.first_r_data_generator, # steps_per_epoch=int(cur_train_len // self.tr34_cls_params["batch_size"] // 2), steps_per_epoch=self.decide_stepperepoch_curround(cur_train_len), validation_data=fit_params.get("valid_data"), #todo: put in. epochs=cur_epoch, max_queue_size=10, callbacks=self.callbacks + [early_stopping], use_multiprocessing=False, workers=1, verbose=ThinRes34Config.VERBOSE, ) self.round_idx += 1 as_timer("TR34Cls_r{}_fit".format(self.round_idx)) # get fit history cur_train_loss = round(cur_fit_history.history.get("loss")[-1], 6) cur_train_acc = round(cur_fit_history.history.get("acc")[-1], 6) cur_lr = cur_fit_history.history.get("lr")[-1] cur_fit_history_report = { "t_loss": cur_train_loss, "t_acc": cur_train_acc } return cur_fit_history_report
def online_fit(self, train_examples_x: np.ndarray, train_examples_y: np.ndarray, fit_params: dict): self.cur_train_num = len(train_examples_x) cur_training_generator = MixupGenerator( train_examples_x, train_examples_y, batch_size=self.train_batch_size, datagen=self.img_freqmasking_datagen)() # update self._fit_params_decision(fit_params) self.model.fit_generator( cur_training_generator, steps_per_epoch=self.fit_params_res["steps_per_epoch"], epochs=self.fit_params_res["epochs"], initial_epoch=self.fit_params_res["initial_epoch"], shuffle=True, verbose=1, ) self.n_iter += 5 as_timer("CNNCls_fit_{}_{}_{}_{}".format( self.cur_train_num, self.fit_params_res["initial_epoch"], self.fit_params_res["epochs"], self.fit_params_res["steps_per_epoch"], ))
def init(self, class_num: int, init_params: dict): """ :param class_num: :param init_params: - n_mels - pretrain_path :return: """ self.class_num = class_num self.clf_name = "cnn_pret" self.n_mels = init_params.get("n_mels") # 64, fixed, as pretrained. # self.model = self._load_pretrained_model(input_shape=(self.n_mels, self.n_mels, 1), n_classes=self.class_num) self.model = cnn_load_pretrained_model(input_shape=(self.n_mels, self.n_mels, 1), n_classes=self.class_num) info("Backbone classifier={} is init, class_num={}, init_params={}". format(self.clf_name, self.class_num, init_params)) as_timer("clf_{}_init".format(self.clf_name)) self.train_batch_size = init_params.get("train_batch_size") self.predict_batch_size = init_params.get("predict_batch_size") self.n_iter = 0 # option: self.img_freqmasking_datagen = ImageDataGenerator( preprocessing_function=DNpAugPreprocessor.frequency_masking)
def predict_proba(self, test_examples: np.ndarray, predict_prob_params: dict) -> np.ndarray: cur_test_generator = TTAGenerator(test_examples, batch_size=self.predict_batch_size)() test_size = len(test_examples) pred_probs = self.model.predict_generator( cur_test_generator, steps=int(np.ceil(test_size / self.predict_batch_size))) as_timer("CNNCls_testpred") return pred_probs
def eval_val(self, val_examples_x, val_examples_y): valid_generator = TTAGenerator(val_examples_x, batch_size=self.predict_batch_size)() valid_size = len(val_examples_x) valid_probas = self.model.predict_generator( valid_generator, steps=int(np.ceil(valid_size / self.predict_batch_size))) # val_auc = ATEvaluator.autodl_auc(val_examples_y, valid_probas) val_auc = ATEvaluator.skl_auc_macro(val_examples_y, valid_probas) as_timer("CNNCls_evalval_{}_{}".format(valid_size, val_auc)) return val_auc
def predict_proba(self, test_examples: np.ndarray, predict_prob_params: dict=None) -> np.ndarray: K.set_learning_phase(0) if self.good_to_predict(): y_pred = self.model.predict(test_examples, batch_size=self.tr34_mconfig.PRED_SIZE) as_timer("TR34Cls_Test") self.test_idx += 1 self.last_y_pred = y_pred self.last_y_pred_round = self.round_idx return y_pred else: return self.last_y_pred
def offline_fit(self, train_examples_x: np.ndarray, train_examples_y: np.ndarray, fit_params: dict = None): if fit_params.get("if_multilabel") is False: train_examples_y = ohe2cat(train_examples_y) self.model.fit(train_examples_x, train_examples_y) self.label_map = self.model.classes_ else: self.ml_model.fit(train_examples_x, train_examples_y) as_timer("lr_sag_fit_{}".format(len(train_examples_x)))
def predict_proba(self, test_examples: np.ndarray, predict_prob_params: dict = None) -> np.ndarray: if predict_prob_params.get("if_multilabel") is True: return self.predict_proba_multilabel(test_examples) else: raw_pred_probas = self.model.predict_proba(test_examples) if len(self.label_map) < self.class_num: rebuilt_pred_proba = self.rebuild_prob_res( self.label_map, raw_pred_probas) as_timer("lr_liblinaer_pred_proba_{}".format( len(test_examples))) return rebuilt_pred_proba else: return raw_pred_probas
def init_kapre_melspectrogram_extractor(self): self.kapre_melspectrogram_extractor = self.make_melspectrogram_extractor( (1, self.kape_params.get("CROP_SEC") * self.kape_params.get("SAMPLING_RATE"))) if KAPRE_FMAKER_WARMUP: warmup_size = 10 warmup_x = [ np.array([np.random.uniform() for i in range(48000)], dtype=np.float32) for j in range(warmup_size) ] # warmup_x_mel = extract_features(warmup_x) warmup_x_mel = self.make_features(warmup_x, feats_maker_params={ "len_sample": 5, "sr": 16000 }) info("Kpare_featmaker warmup.") as_timer("Kpare_featmaker_warmup")
def train(self, dataset, remaining_time_budget=None): """Train method of domain-specific model.""" logger.info("Note: speech_train_process model.py starts train") as_timer("train_start") if IF_TRAIN_BREAK_CONDITION: while True: self.cur_train_his_report = self.domain_model.train_pipeline(dataset) self.cur_cls_name = self.cur_train_his_report.get("cls_name") cur_val_nauc = self.cur_train_his_report["val_nauc"] self.ensemble_val_record_list.append([self.cur_cls_name, cur_val_nauc]) self.ensemble_val_nauc_list.append(cur_val_nauc) if cur_val_nauc == -1 or cur_val_nauc > self.get_accept_nauc(): info("Decision=Yes, cur_cls_name={}, cur_val_nauc={}, his_top_nauc={}".format(self.cur_cls_name, cur_val_nauc, max(self.ensemble_val_nauc_list))) break else: info("Decision=No, cur_cls_name={}, cur_val_nauc={}, his_top_nauc={}".format(self.cur_cls_name, cur_val_nauc, max(self.ensemble_val_nauc_list))) else: self.cur_train_his_report = self.domain_model.train_pipeline(dataset) self.cur_cls_name = self.cur_train_his_report.get("cls_name") cur_t_loss = self.cur_train_his_report.get("t_loss") if cur_t_loss is None: self.g_train_loss_list.append(100000) else: self.g_train_loss_list.append(cur_t_loss) info("train_his_report={}".format(self.cur_train_his_report)) cur_val_nauc = self.cur_train_his_report["val_nauc"] self.ensemble_val_record_list.append([self.cur_cls_name, cur_val_nauc]) self.ensemble_val_nauc_list.append(cur_val_nauc) as_timer("speech_model_basic_train")
def predict_proba_2(self, test_examples: np.ndarray, predict_prob_params: dict=None) -> np.ndarray: K.set_learning_phase(0) y_pred = self.model.predict(test_examples, batch_size=self.tr34_mconfig.PRED_SIZE) as_timer("TR34Cls_Test") self.test_idx += 1 return y_pred
def init(self, class_num: int, init_params: dict=None): self.class_num = class_num self.model, self.callbacks = self.tr34_model_init(class_num) self.choose_round_spec_len() as_timer("TR34Cls_init")