def setup_training_data(self, train_file, target): # Loading training data self.train_data = data_io.get_data(setup.get_datasets_path(), train_file) # Setting what to predict self.target_column = target
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv") self.data["Nonsense"] = np.nan self.data = self.data.apply(pd.to_numeric, errors='coerce') self.data.loc[self.data.shape[0]] = [np.nan] * self.data.shape[1] self.target = 'Survived' self.start = self.data.shape
def standard_pred_type(self, filename, target, goal, message): # Arrange data = data_io.get_data(setup.get_datasets_path(), filename) # Act ml_type = dataset_insight.get_prediction_type(data[target]) # Assert self.assertEqual(ml_type, goal, msg=message)
def test_merge(self): # Arrange filepath = setup.get_datasets_path() + "titanic_train.csv" full_csv = False full_rds = False # Act merge = mg.Merger() merge.merge(["Name", filepath, filepath, filepath]) for File in os.listdir(setup.get_datasets_path()): if File.endswith("full.dataset.csv"): full_csv = True os.remove(setup.get_datasets_path() + File) if File.endswith("full.dataset.rds"): full_rds = True os.remove(setup.get_datasets_path() + File) # Assert self.assertEqual(full_csv, True) self.assertEqual(full_rds, True)
def test_scaling(self): # Arrange data = data_io.get_data(setup.get_datasets_path(), "winequality-red.csv") # Act target = data_scaling.scale_numeric_data(data) target = pd.DataFrame.as_matrix(target) # Assert self.assertAlmostEqual(target.mean(), 0, 5) self.assertAlmostEqual(target.std(), 1, 5)
def test_data_split(self): # Arrange data = data_io.get_data(setup.get_datasets_path(), "winequality-red.csv") # Act x_train, x_test, y_train, y_test = data_splitting.get_train_test( data, "quality") # Assert self.assertEqual(len(x_train[:, 0]), len(y_train)) self.assertEqual(len(x_test[:, 0]), len(y_test)) self.assertAlmostEqual(len(y_train) / len(y_test), 4, 2)
class TestSplitter(TestCase): # test_comma_split tests the functionality of the splitter, with the delimiter as a comma. test_csv = False train_csv = False test_rds = False train_rds = False filepath = setup.get_datasets_path() def helper(self): self.test_csv = False self.train_csv = False self.test_rds = False self.train_rds = False for File in os.listdir(self.filepath): if File.endswith(".testing.data.csv"): self.test_csv = True os.remove(self.filepath + File) if File.endswith(".training.data.csv"): self.train_csv = True os.remove(self.filepath + File) if File.endswith(".training.data.rds"): self.train_rds = True os.remove(self.filepath + File) if File.endswith(".testing.data.rds"): self.test_rds = True os.remove(self.filepath + File) def split_body(self, path, delim): #Arrange splitter = sp.Splitter() splitter.Split(path, delim) #Act self.helper() #Assert self.assertEquals = (self.test_csv, True) self.assertEquals = (self.train_csv, True) self.assertEquals = (self.test_rds, True) self.assertEquals = (self.train_rds, True) def test_comma_split(self): self.split_body(self.filepath + "titanic_train.csv", ",") # test_semicolon_split tests the functionality of the splitter, with the delimiter as a semicolon. def test_semicolon_split(self): self.split_body(self.filepath + "winequality-red.csv", ";")
for item in dirs: if imghdr.what(path + item) in imgfiles: # if the file has the correct type img = Image.open(path + item) height, width = img.size if height == size and width == size: size_count = size_count + 1 else: bad_count = bad_count + 1 return size_count, bad_count # Performing Testing path = get_datasets_path() # img count img, not_img = get_file_count(path) print("This classifier only works on image files.") print("You have " + str(img) + " image files and " + str(not_img) + " non-image files.") # img sizes size = 28 good_size, bad_size = get_img_size(path, size) print("Each image in the path must have same the length and width.") print("You have " + str(img) + " files of size " + str(size) + " and " + str(bad_size) + " files not of that size.") # scoring
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "winequality-red.csv") self.target = 'quality' self.start = self.data.shape
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv")
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "winequality-red.csv") self.strategy = filler_strategy.FillerStrategy(self.data)
def standard_delim(self, filename, target_delim): # Act delim = dataset_insight.get_delimiter(setup.get_datasets_path() + filename) # Assert self.assertEqual(delim, target_delim, "Delimiter detected incorrectly.")
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv") self.data["Nonsense"] = np.nan self.data["Filled"] = 1 self.data = self.data.apply(pd.to_numeric,errors='coerce')
def setup_test_data(self, test_file): # Loading testing data self.test_data = data_io.get_data(setup.get_datasets_path(), test_file)