def inference(self, data_dict, X=None, y=None): """ Parameters ---------- test_data : { 'x':list[episode_file_path], 'y':list[label], 'l':list[seq_len], 'feat_n': n of feature space, 'label_n': n of label space } The input test samples dict. """ if data_dict != None: self._data_check([data_dict]) data = ml_reader.DatasetReader( data_dict, device=self.device, task_type=self.task_type).get_data() _X = data['X'] _y = data['Y'] elif X != None and y != None: self._data_check({'X': X, 'Y': y}) _X = X _y = y else: raise Exception('fill in correct data for model inference') if self.task_type in ['binaryclass', 'regression']: real_v = _y.reshape(-1, 1) prob_v = self.predictor.predict_proba(_X)[:, 1].reshape(-1, 1) elif self.task_type in ['multiclass']: real_v = np.array(_y) prob_v = self.predictor.predict_proba(_X).reshape( -1, np.shape(real_v)[1]) elif self.task_type in ['multilabel']: real_v = np.array(_y) prob_v = [] _prob_v = self.predictor.predict_proba(_X) for each_class in _prob_v: if len(each_class) == 1: each_class = np.array([each_class]) if np.shape(each_class)[1] == 2: v = each_class[:, 1].reshape((-1, 1)) else: v = each_class prob_v.append(v) prob_v = np.concatenate(prob_v, 1) pickle.dump(prob_v, open(os.path.join(self.result_dir, 'hat_y'), 'wb')) pickle.dump(real_v, open(os.path.join(self.result_dir, 'y'), 'wb'))
def fit(self, data_dict, X=None, y=None, assign_task_type=None): """ Parameters ---------- train_data : { 'x':list[episode_file_path], 'y':list[label], 'l':list[seq_len], 'feat_n': n of feature space, 'label_n': n of label space } The input train samples dict. valid_data : { 'x':list[episode_file_path], 'y':list[label], 'l':list[seq_len], 'feat_n': n of feature space, 'label_n': n of label space } The input valid samples dict. Returns ------- self : object Fitted estimator. """ self.task_type = assign_task_type if data_dict != None: self._data_check([data_dict]) data = ml_reader.DatasetReader( data_dict, device=self.device, task_type=self.task_type).get_data() _X = np.array(data['X']) _y = np.array(data['Y']) elif X != None and y != None: self._data_check([{'X': X, 'Y': Y}]) _X = X _y = Y else: raise Exception('fill in correct data for model train') print(np.shape(_X), np.shape(_y)) self._build_model() self.predictor.fit(_X, _y) model_path = os.path.join(self.checkout_dir, 'best.model') joblib.dump(self.predictor, model_path)