Exemplo n.º 1
0
 def test_should_equal_n_features_length_of_feature_names(self):
     self.assertEqual(len(gsd.generate_synthetic_dataset(n_features=2,feature_names=['A','B'], feature_weights=[0.5,0.5]).columns),3)
Exemplo n.º 2
0
 def test_should_throw_error_if_n_samples_none(self):
     with self.assertRaises(ValueError):
         gsd.generate_synthetic_dataset(n_samples=None)
Exemplo n.º 3
0
 def test_should_return_dataframe(self):
     self.assertTrue(isinstance(gsd.generate_synthetic_dataset(), pd.DataFrame))
Exemplo n.º 4
0
 def test_should_equal_n_samples(self):
     self.assertEqual(len(gsd.generate_synthetic_dataset(n_samples=10)),10)
Exemplo n.º 5
0
 def test_should_throw_error_if_n_features_does_not_equal_length_of_feature_weights(self):
     with self.assertRaises(ValueError):
         gsd.generate_synthetic_dataset(n_features=3, feature_names=['A','B','C'], feature_weights=[0.0,0.0])
Exemplo n.º 6
0
 def test_should_throw_error_if_feature_names_is_None(self):
     with self.assertRaises(ValueError):
         gsd.generate_synthetic_dataset(feature_names=None)
Exemplo n.º 7
0
 def test_should_throw_error_if_class_weights_dont_add_up_to_1(self):
     with self.assertRaises(ValueError):
         gsd.generate_synthetic_dataset(class_weights=[0,0])
Exemplo n.º 8
0
 def test_should_throw_error_if_n_features_is_less_than_1(self):
     with self.assertRaises(ValueError):
         gsd.generate_synthetic_dataset(n_features=0)
Exemplo n.º 9
0
import generate_synthetic_dataset as gsd
import json
import sys

with open(f'{sys.argv[1]}') as f:
    json_from_file = json.load(f)
    data_model = gsd.SyntheticDataModel(**json_from_file)
    dataset = gsd.generate_synthetic_dataset(
        n_samples=data_model.n_samples,
        n_classes=data_model.n_classes,
        class_weights=data_model.class_weights,
        class_names=data_model.class_names,
        y_column_name=data_model.y_column_name,
        n_features=data_model.n_features,
        feature_names=data_model.feature_names,
        feature_types=data_model.feature_types,
        feature_number_range=data_model.feature_number_range,
        feature_positive_class_ratio=data_model.feature_positive_class_ratio,
        feature_negative_class_ratio=data_model.feature_negative_class_ratio)
    dataset.to_csv(f"{data_model.dataset_name}.csv", index=False)