예제 #1
0
 def load_features(self):
     # retrieve values from config, assign_category does this
     self.set('df', pd.read_csv(self.get('file')))
     df = self.get('df')
     df.reset_index(inplace=True, drop=True)
     categories, unique_values, default_list, frequent_values2frequency = self.assign_category(
         df)
     default_values = [str(v) for v in default_list.values()]
     self.set(
         'data',
         preprocessing.insert_data(df, categories, unique_values,
                                   default_list, frequent_values2frequency,
                                   SAMPLE_DATA_SIZE))
     self.set('defaults',
              dict(zip(self.get('data').index.tolist(), default_values)))
     self.set('category_list', categories)
     return categories
예제 #2
0
default_list = {
    'sepal_length': 5.8,
    'sepal_width': 3.0,
    'petal_length': 4.35,
    'petal_width': 1.3,
    'class': 'Iris-setosa'
}
frequent_values2frequency = {
    'sepal_length': (5.0, 10),
    'sepal_width': (3.0, 26),
    'petal_length': (1.5, 14),
    'petal_width': (0.2, 28),
    'class': ('Iris-setosa', 50)
}
SAMPLE_DATA_SIZE = 5
data = preprocessing.insert_data(df, categories, unique_values, default_list,
                                 frequent_values2frequency, SAMPLE_DATA_SIZE)
data.Category = categories

labels = feature_util.get_target_labels(target, data.Category[target], fs)
all_params_config = config_reader.read_config(config_file)
export_dir = all_params_config.export_dir()
dtypes = fs.group_by(categories)
category_list = [
    'numerical', 'numerical', 'numerical', 'numerical', 'categorical'
]
features = fs.create_tf_features(categories, target)


def test_add_and_get_port():
    t.add_port(username, config_file, port)
    assert t.get_port(username, config_file) == port
예제 #3
0
def test_insert_data():
    data = preprocessing.insert_data(df, categories, unique_values, default_list, frequent_values2frequency,
                                     SAMPLE_DATA_SIZE)
    assert data.values.size == 45
    assert data.shape == (5, 9)