def test_seq_ml_reader(self): test_n_sample = 10 test_batch_size = 2 test_n_feat = 30 test_sub_group = 3 data = generate_simulation_sequence_data(n_sample=test_n_sample, n_feat=test_n_feat, task='binaryclass')() seq_ds = seq_ml_reader.DatasetReader( data, sub_group=test_sub_group, data_type='aggregation', task_type='binaryclass').get_data() assert np.shape(seq_ds['X'])[0] == test_n_sample assert np.shape(seq_ds['X'])[1] == test_n_feat * test_sub_group assert np.shape(seq_ds['Y'])[0] == test_n_sample assert np.shape(seq_ds['Y'])[1] == 1 test_n_sample = 10 test_batch_size = 2 test_n_feat = 30 test_sub_group = 3 test_n_class = 3 data = generate_simulation_sequence_data(n_sample=test_n_sample, n_feat=test_n_feat, task='multiclass', n_class=test_n_class)() seq_ds = seq_ml_reader.DatasetReader( data, sub_group=test_sub_group, data_type='aggregation', task_type='multiclass').get_data() assert np.shape(seq_ds['X'])[0] == test_n_sample assert np.shape(seq_ds['X'])[1] == test_n_feat * test_sub_group assert np.shape(seq_ds['Y'])[0] == test_n_sample assert np.shape(seq_ds['Y'])[1] == 1 test_n_sample = 10 test_batch_size = 2 test_n_feat = 30 test_sub_group = 3 test_n_class = 3 data = generate_simulation_sequence_data(n_sample=test_n_sample, n_feat=test_n_feat, task='multilabel', n_class=test_n_class)() seq_ds = seq_ml_reader.DatasetReader( data, sub_group=test_sub_group, data_type='aggregation', task_type='multilabel').get_data() assert np.shape(seq_ds['X'])[0] == test_n_sample assert np.shape(seq_ds['X'])[1] == test_n_feat * test_sub_group assert np.shape(seq_ds['Y'])[0] == test_n_sample assert np.shape(seq_ds['Y'])[1] == test_n_class
def fit(self, data_dict, X=None, y=None, assign_task_type=None): """ Parameters ---------- train_data : { 'x':list[episode_file_path], 'y':list[label], 'l':list[seq_len], 'feat_n': n of feature space, 'label_n': n of label space } The input train samples dict. valid_data : { 'x':list[episode_file_path], 'y':list[label], 'l':list[seq_len], 'feat_n': n of feature space, 'label_n': n of label space } The input valid samples dict. Returns ------- self : object Fitted estimator. """ self.task_type = assign_task_type if data_dict != None: self._data_check([data_dict]) data = ml_reader.DatasetReader( data_dict, task_type=self.task_type).get_data() _X = np.array(data['X']) _y = np.array(data['Y']) elif X != None and y != None: self._data_check([{'X': X, 'Y': Y}]) _X = X _y = Y else: raise Exception('fill in correct data for model train') print(np.shape(_X), np.shape(_y)) self._build_model() self.predictor.fit(_X, _y) model_path = os.path.join(self.checkout_dir, 'best.model') joblib.dump(self.predictor, model_path)
def inference(self, data_dict, X=None, y=None): """ Parameters ---------- test_data : { 'x':list[episode_file_path], 'y':list[label], 'l':list[seq_len], 'feat_n': n of feature space, 'label_n': n of label space } The input test samples dict. """ if data_dict != None: self._data_check([data_dict]) data = ml_reader.DatasetReader( data_dict, task_type=self.task_type).get_data() _X = data['X'] _y = data['Y'] elif X != None and y != None: self._data_check({'X': X, 'Y': y}) _X = X _y = y else: raise Exception('fill in correct data for model inference') if self.task_type in ['binaryclass', 'regression']: real_v = _y.reshape(-1, 1) prob_v = self.predictor.predict_proba(_X)[:, 1].reshape(-1, 1) elif self.task_type in ['multiclass']: real_v = np.array(_y) prob_v = self.predictor.predict_proba(_X).reshape( -1, np.shape(real_v)[1]) elif self.task_type in ['multilabel']: real_v = np.array(_y) prob_v = [] _prob_v = self.predictor.predict_proba(_X) for each_class in _prob_v: if len(each_class) == 1: each_class = np.array([each_class]) prob_v.append(each_class[:, 1:2]) prob_v = np.concatenate(prob_v, 1) pickle.dump(prob_v, open(os.path.join(self.result_dir, 'hat_y'), 'wb')) pickle.dump(real_v, open(os.path.join(self.result_dir, 'y'), 'wb'))