def load_features(self): # retrieve values from config, assign_category does this self.set('df', pd.read_csv(self.get('file'))) df = self.get('df') df.reset_index(inplace=True, drop=True) categories, unique_values, default_list, frequent_values2frequency = self.assign_category( df) default_values = [str(v) for v in default_list.values()] self.set( 'data', preprocessing.insert_data(df, categories, unique_values, default_list, frequent_values2frequency, SAMPLE_DATA_SIZE)) self.set('defaults', dict(zip(self.get('data').index.tolist(), default_values))) self.set('category_list', categories) return categories
default_list = { 'sepal_length': 5.8, 'sepal_width': 3.0, 'petal_length': 4.35, 'petal_width': 1.3, 'class': 'Iris-setosa' } frequent_values2frequency = { 'sepal_length': (5.0, 10), 'sepal_width': (3.0, 26), 'petal_length': (1.5, 14), 'petal_width': (0.2, 28), 'class': ('Iris-setosa', 50) } SAMPLE_DATA_SIZE = 5 data = preprocessing.insert_data(df, categories, unique_values, default_list, frequent_values2frequency, SAMPLE_DATA_SIZE) data.Category = categories labels = feature_util.get_target_labels(target, data.Category[target], fs) all_params_config = config_reader.read_config(config_file) export_dir = all_params_config.export_dir() dtypes = fs.group_by(categories) category_list = [ 'numerical', 'numerical', 'numerical', 'numerical', 'categorical' ] features = fs.create_tf_features(categories, target) def test_add_and_get_port(): t.add_port(username, config_file, port) assert t.get_port(username, config_file) == port
def test_insert_data(): data = preprocessing.insert_data(df, categories, unique_values, default_list, frequent_values2frequency, SAMPLE_DATA_SIZE) assert data.values.size == 45 assert data.shape == (5, 9)