示例#1
0
class TestDataSetSplitter(unittest.TestCase):
    def setUp(self):
        self._data = {
            'target': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
            'feature_1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
            'feature_2': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        }

        self._dataframe = pd.DataFrame(data=self._data)
        self._data_model = DataModel(self._dataframe)
        self._data_model.set_tf_feature_columns([
            tf.feature_column.numeric_column('feature_1'),
            tf.feature_column.numeric_column('feature_2')
        ])

        self._data_model.set_target_column('target')

    def test_split_data(self):
        splitter = DataSetSplitter(self._data_model)
        evaluation_data, train_data = splitter.split_by_ratio(ratios=[20, 80])

        train_features = train_data.get_feature_columns()
        train_target = train_data.get_target_column()

        eval_features = evaluation_data.get_feature_columns()
        eval_target = evaluation_data.get_target_column()

        self.assertEqual(len(train_target), 8)
        self.assertEqual(len(train_features), 8)
        self.assertEqual(len(eval_target), 2)
        self.assertEqual(len(eval_features), 2)
示例#2
0
 def render_tf_feature_columns(self, data_model: DataModel):
     data_model.set_tf_feature_columns([])
     for feature_column_info in self.feature_columns():
         column_strategy = FeatureColumnStrategyFactory.get_strategy(
             feature_column_info['name'], feature_column_info['type'],
             data_model, self.feature_config())
         feature_columns = column_strategy.build()
         for tf_feature_column in feature_columns:
             data_model.add_tf_feature_columns(tf_feature_column)