Python Data.preprocess примеры использования

Язык программирования: Python

Пространство имен/Пакет: preprocessing

Класс/Тип: Data

Метод/Функция: preprocess

Примеров на hotexamples.com: 3

Python Data.preprocess - 3 примера найдено. Это лучшие примеры Python кода для preprocessing.Data.preprocess, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Data(24)

load_data(5)

create_data(4)

preprocess(3)

fill_missing_data(2)

selectFeature(2)

LLR(1)

clear_memory(1)

featurewise_center(1)

featurewise_std_normalization(1)

get_batch(1)

label_making(1)

load(1)

split_data(1)

Пример #1

Показать файл

Файл: Test.py Проект: tico2303/AI

def TestFeatureSelection():
	data = Data("cs_170_small80.txt")
	dat = data.preprocess()
	print dat.shape
	print "dat type: ", type(dat)
	col_nums = dat.shape[1]
	feature_indices = []
	for d in range(col_nums):
		feature_indices.append(d)
		print data.selectFeature(dat,feature_indices )

Пример #2

Показать файл

Файл: Test.py Проект: tico2303/AI

def TestDistance():
	data = Data("testData.txt")
	validator = LeaveOneOutValidator(data.data, KnearestNeighbor)
	d = data.preprocess()
	test, train = validator.leaveOneOut(d, 0)
	knn = KnearestNeighbor()
	print "test: ", test
	print "train: \n", train

	print "distance: ",  knn.distance(train, test)
	print "\n\n"

Пример #3

Показать файл

Файл: test_preprocessing.py Проект: datadonK23/Thoughtful_DL

class PreprocessingTests(unittest.TestCase):
    def setUp(self):
        self.data = Data()

    def test_load(self):
        """
        Test existence, type, length of loaded data
        """
        self.data.load()

        self.assertIsNotNone(self.data._dataset, "loaded no data")
        self.assertEqual(type(("foo", "bar")), type(self.data._dataset),
                         "loaded no tuple")
        self.assertEqual(2, len(self.data._dataset),
                         "loaded tuple has false length")

    def test_preprocess(self):
        """
        Test one-hot-encoding and type conversions of preprocessed data
        """
        self.data.load()
        self.data.preprocess()

        # one-hot-encodings
        np.testing.assert_array_equal([0., 1.],
                                      np.unique(self.data._dataset[0][0]),
                                      "false one-hot-encoding of train_data")
        np.testing.assert_array_equal([0., 1.],
                                      np.unique(self.data._dataset[1][0]),
                                      "false one-hot-encoding of test_data")
        self.assertEqual("float64", self.data._dataset[0][0].dtype,
                         "wrong type of train_data values")
        self.assertEqual("float64", self.data._dataset[1][0].dtype,
                         "wrong type of test_data values")

        # label vectorization
        self.assertEqual(np.ndarray, type(self.data._dataset[0][1]),
                         "wrong type of train_labels set")
        self.assertEqual(np.ndarray, type(self.data._dataset[1][1]),
                         "wrong type of test_labels set")
        self.assertEqual("float32", self.data._dataset[0][1].dtype,
                         "wrong type of train_labels values")
        self.assertEqual("float32", self.data._dataset[1][1].dtype,
                         "wrong type of test_labels values")

    def test_split_data(self):
        """
        Test correct train-dev-test-split
        """
        self.data.load()
        self.data.preprocess()
        self.data.split_data()

        # correct number of tuples
        self.assertEqual(3, len(self.data._dataset), "wrong number of splits")
        self.assertEqual(2, len(self.data._dataset[0]),
                         "wrong number of train splits")
        self.assertEqual(2, len(self.data._dataset[1]),
                         "wrong number of dev splits")
        self.assertEqual(2, len(self.data._dataset[2]),
                         "wrong number of test splits")

        # existence
        self.assertIsNotNone(self.data._dataset[0][0], "train_data is None")
        self.assertIsNotNone(self.data._dataset[0][1], "train_labels is None")
        self.assertIsNotNone(self.data._dataset[1][0], "dev_data is None")
        self.assertIsNotNone(self.data._dataset[1][1], "dev_labels is None")
        self.assertIsNotNone(self.data._dataset[2][0], "test_data is None")
        self.assertIsNotNone(self.data._dataset[2][1], "test_labels is None")

    def test_train_dev_test(self):
        """
        Test type and shape of train, dev & test sets
        """
        (train_data,
         train_labels), (dev_data,
                         dev_labels), (test_data,
                                       test_labels) = self.data.train_dev_test

        # type
        self.assertEqual(np.ndarray, type(train_data),
                         "wrong type of train_data")
        self.assertEqual(np.ndarray, type(train_labels),
                         "wrong type of train_labels")
        self.assertEqual(np.ndarray, type(dev_data), "wrong type of dev_data")
        self.assertEqual(np.ndarray, type(dev_labels),
                         "wrong type of dev_labels")
        self.assertEqual(np.ndarray, type(test_data),
                         "wrong type of test_data")
        self.assertEqual(np.ndarray, type(test_labels),
                         "wrong type of test_labels")

        # shape
        self.assertEqual((15000, 10000), train_data.shape,
                         "train_data has wrong shape")
        self.assertEqual((15000, ), train_labels.shape,
                         "train_labels have wrong shape")
        self.assertEqual((10000, 10000), dev_data.shape,
                         "dev_data has wrong shape")
        self.assertEqual((10000, ), dev_labels.shape,
                         "dev_labels have wrong shape")
        self.assertEqual((25000, 10000), test_data.shape,
                         "test_data has wrong shape")
        self.assertEqual((25000, ), test_labels.shape,
                         "test_labels have wrong shape")