Пример #1
0
    def setup_training_data(self, train_file, target):
        # Loading training data
        self.train_data = data_io.get_data(setup.get_datasets_path(),
                                           train_file)

        # Setting what to predict
        self.target_column = target
Пример #2
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(),
                                  "titanic_train.csv")
     self.data["Nonsense"] = np.nan
     self.data = self.data.apply(pd.to_numeric, errors='coerce')
     self.data.loc[self.data.shape[0]] = [np.nan] * self.data.shape[1]
     self.target = 'Survived'
     self.start = self.data.shape
Пример #3
0
    def standard_pred_type(self, filename, target, goal, message):
        # Arrange
        data = data_io.get_data(setup.get_datasets_path(), filename)

        # Act
        ml_type = dataset_insight.get_prediction_type(data[target])

        # Assert
        self.assertEqual(ml_type, goal, msg=message)
Пример #4
0
    def test_merge(self):

        # Arrange
        filepath = setup.get_datasets_path() + "titanic_train.csv"
        full_csv = False
        full_rds = False

        # Act
        merge = mg.Merger()
        merge.merge(["Name", filepath, filepath, filepath])

        for File in os.listdir(setup.get_datasets_path()):
            if File.endswith("full.dataset.csv"):
                full_csv = True
                os.remove(setup.get_datasets_path() + File)
            if File.endswith("full.dataset.rds"):
                full_rds = True
                os.remove(setup.get_datasets_path() + File)

        # Assert
        self.assertEqual(full_csv, True)
        self.assertEqual(full_rds, True)
Пример #5
0
    def test_scaling(self):

        # Arrange
        data = data_io.get_data(setup.get_datasets_path(),
                                "winequality-red.csv")

        # Act
        target = data_scaling.scale_numeric_data(data)
        target = pd.DataFrame.as_matrix(target)

        # Assert
        self.assertAlmostEqual(target.mean(), 0, 5)
        self.assertAlmostEqual(target.std(), 1, 5)
Пример #6
0
    def test_data_split(self):

        # Arrange
        data = data_io.get_data(setup.get_datasets_path(),
                                "winequality-red.csv")

        # Act
        x_train, x_test, y_train, y_test = data_splitting.get_train_test(
            data, "quality")

        # Assert
        self.assertEqual(len(x_train[:, 0]), len(y_train))
        self.assertEqual(len(x_test[:, 0]), len(y_test))
        self.assertAlmostEqual(len(y_train) / len(y_test), 4, 2)
Пример #7
0
class TestSplitter(TestCase):

    # test_comma_split tests the functionality of the splitter, with the delimiter as a comma.
    test_csv = False
    train_csv = False
    test_rds = False
    train_rds = False
    filepath = setup.get_datasets_path()

    def helper(self):
        self.test_csv = False
        self.train_csv = False
        self.test_rds = False
        self.train_rds = False
        for File in os.listdir(self.filepath):
            if File.endswith(".testing.data.csv"):
                self.test_csv = True
                os.remove(self.filepath + File)
            if File.endswith(".training.data.csv"):
                self.train_csv = True
                os.remove(self.filepath + File)
            if File.endswith(".training.data.rds"):
                self.train_rds = True
                os.remove(self.filepath + File)
            if File.endswith(".testing.data.rds"):
                self.test_rds = True
                os.remove(self.filepath + File)

    def split_body(self, path, delim):

        #Arrange
        splitter = sp.Splitter()
        splitter.Split(path, delim)

        #Act
        self.helper()

        #Assert
        self.assertEquals = (self.test_csv, True)
        self.assertEquals = (self.train_csv, True)
        self.assertEquals = (self.test_rds, True)
        self.assertEquals = (self.train_rds, True)

    def test_comma_split(self):
        self.split_body(self.filepath + "titanic_train.csv", ",")

    # test_semicolon_split tests the functionality of the splitter, with the delimiter as a semicolon.
    def test_semicolon_split(self):
        self.split_body(self.filepath + "winequality-red.csv", ";")
Пример #8
0
    for item in dirs:
        if imghdr.what(path +
                       item) in imgfiles:  # if the file has the correct type
            img = Image.open(path + item)
            height, width = img.size
            if height == size and width == size:
                size_count = size_count + 1
            else:
                bad_count = bad_count + 1

    return size_count, bad_count


# Performing Testing
path = get_datasets_path()

# img count
img, not_img = get_file_count(path)
print("This classifier only works on image files.")
print("You have " + str(img) + " image files and " + str(not_img) +
      " non-image files.")

# img sizes
size = 28
good_size, bad_size = get_img_size(path, size)
print("Each image in the path must have same the length and width.")
print("You have " + str(img) + " files of size " + str(size) + " and " +
      str(bad_size) + " files not of that size.")

# scoring
Пример #9
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(),
                                  "winequality-red.csv")
     self.target = 'quality'
     self.start = self.data.shape
Пример #10
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv")
Пример #11
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(),
                                  "winequality-red.csv")
     self.strategy = filler_strategy.FillerStrategy(self.data)
Пример #12
0
    def standard_delim(self, filename, target_delim):
        # Act
        delim = dataset_insight.get_delimiter(setup.get_datasets_path() + filename)

        # Assert
        self.assertEqual(delim, target_delim, "Delimiter detected incorrectly.")
Пример #13
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv")
     self.data["Nonsense"] = np.nan
     self.data["Filled"] = 1
     self.data = self.data.apply(pd.to_numeric,errors='coerce')
Пример #14
0
 def setup_test_data(self, test_file):
     # Loading testing data
     self.test_data = data_io.get_data(setup.get_datasets_path(), test_file)