示例#1
0
    def test_seq_ml_reader(self):
        test_n_sample = 10
        test_batch_size = 2
        test_n_feat = 30
        test_sub_group = 3
        data = generate_simulation_sequence_data(n_sample=test_n_sample,
                                                 n_feat=test_n_feat,
                                                 task='binaryclass')()
        seq_ds = seq_ml_reader.DatasetReader(
            data,
            sub_group=test_sub_group,
            data_type='aggregation',
            task_type='binaryclass').get_data()
        assert np.shape(seq_ds['X'])[0] == test_n_sample
        assert np.shape(seq_ds['X'])[1] == test_n_feat * test_sub_group
        assert np.shape(seq_ds['Y'])[0] == test_n_sample
        assert np.shape(seq_ds['Y'])[1] == 1

        test_n_sample = 10
        test_batch_size = 2
        test_n_feat = 30
        test_sub_group = 3
        test_n_class = 3
        data = generate_simulation_sequence_data(n_sample=test_n_sample,
                                                 n_feat=test_n_feat,
                                                 task='multiclass',
                                                 n_class=test_n_class)()
        seq_ds = seq_ml_reader.DatasetReader(
            data,
            sub_group=test_sub_group,
            data_type='aggregation',
            task_type='multiclass').get_data()
        assert np.shape(seq_ds['X'])[0] == test_n_sample
        assert np.shape(seq_ds['X'])[1] == test_n_feat * test_sub_group
        assert np.shape(seq_ds['Y'])[0] == test_n_sample
        assert np.shape(seq_ds['Y'])[1] == 1

        test_n_sample = 10
        test_batch_size = 2
        test_n_feat = 30
        test_sub_group = 3
        test_n_class = 3
        data = generate_simulation_sequence_data(n_sample=test_n_sample,
                                                 n_feat=test_n_feat,
                                                 task='multilabel',
                                                 n_class=test_n_class)()
        seq_ds = seq_ml_reader.DatasetReader(
            data,
            sub_group=test_sub_group,
            data_type='aggregation',
            task_type='multilabel').get_data()
        assert np.shape(seq_ds['X'])[0] == test_n_sample
        assert np.shape(seq_ds['X'])[1] == test_n_feat * test_sub_group
        assert np.shape(seq_ds['Y'])[0] == test_n_sample
        assert np.shape(seq_ds['Y'])[1] == test_n_class
示例#2
0
文件: rf.py 项目: zyh1234/PyHealth
    def fit(self, data_dict, X=None, y=None, assign_task_type=None):
        """
        Parameters

        ----------

        train_data : {
                      'x':list[episode_file_path], 
                      'y':list[label], 
                      'l':list[seq_len], 
                      'feat_n': n of feature space, 
                      'label_n': n of label space
                      }

            The input train samples dict.
 
        valid_data : {
                      'x':list[episode_file_path], 
                      'y':list[label], 
                      'l':list[seq_len], 
                      'feat_n': n of feature space, 
                      'label_n': n of label space
                      }

            The input valid samples dict.


        Returns

        -------

        self : object

            Fitted estimator.

        """
        self.task_type = assign_task_type
        if data_dict != None:
            self._data_check([data_dict])
            data = ml_reader.DatasetReader(
                data_dict, task_type=self.task_type).get_data()
            _X = np.array(data['X'])
            _y = np.array(data['Y'])
        elif X != None and y != None:
            self._data_check([{'X': X, 'Y': Y}])
            _X = X
            _y = Y
        else:
            raise Exception('fill in correct data for model train')

        print(np.shape(_X), np.shape(_y))
        self._build_model()
        self.predictor.fit(_X, _y)
        model_path = os.path.join(self.checkout_dir, 'best.model')
        joblib.dump(self.predictor, model_path)
示例#3
0
文件: rf.py 项目: zyh1234/PyHealth
    def inference(self, data_dict, X=None, y=None):
        """

        Parameters

        ----------

        test_data : {
                      'x':list[episode_file_path], 
                      'y':list[label], 
                      'l':list[seq_len], 
                      'feat_n': n of feature space, 
                      'label_n': n of label space
                      }

            The input test samples dict.
  
        """

        if data_dict != None:
            self._data_check([data_dict])
            data = ml_reader.DatasetReader(
                data_dict, task_type=self.task_type).get_data()
            _X = data['X']
            _y = data['Y']
        elif X != None and y != None:
            self._data_check({'X': X, 'Y': y})
            _X = X
            _y = y
        else:
            raise Exception('fill in correct data for model inference')

        if self.task_type in ['binaryclass', 'regression']:
            real_v = _y.reshape(-1, 1)
            prob_v = self.predictor.predict_proba(_X)[:, 1].reshape(-1, 1)
        elif self.task_type in ['multiclass']:
            real_v = np.array(_y)
            prob_v = self.predictor.predict_proba(_X).reshape(
                -1,
                np.shape(real_v)[1])
        elif self.task_type in ['multilabel']:
            real_v = np.array(_y)
            prob_v = []
            _prob_v = self.predictor.predict_proba(_X)
            for each_class in _prob_v:
                if len(each_class) == 1:
                    each_class = np.array([each_class])
                prob_v.append(each_class[:, 1:2])
            prob_v = np.concatenate(prob_v, 1)

        pickle.dump(prob_v, open(os.path.join(self.result_dir, 'hat_y'), 'wb'))
        pickle.dump(real_v, open(os.path.join(self.result_dir, 'y'), 'wb'))