Python CSVDataReader примеры, niaaml.data.CSVDataReader Python примеры использования

Пример #1

0

Показать файл

    def test_pipeline_optimize_works_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        data_reader = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes.csv',
            has_header=True,
            contains_classes=True)

        self.assertIsInstance(pipeline.get_classifier(), RandomForest)
        self.assertIsInstance(pipeline.get_feature_selection_algorithm(),
                              SelectKBest)
        self.assertIsInstance(pipeline.get_feature_transform_algorithm(),
                              Normalizer)

        accuracy = pipeline.optimize(data_reader.get_x(), data_reader.get_y(),
                                     20, 40, 'ParticleSwarmAlgorithm',
                                     'Accuracy')

        if accuracy != float('inf'):
            self.assertGreaterEqual(accuracy, -1.0)
            self.assertLessEqual(accuracy, 0.0)

        self.assertIsInstance(pipeline.get_classifier(), RandomForest)
        self.assertIsInstance(pipeline.get_feature_selection_algorithm(),
                              SelectKBest)
        self.assertIsInstance(pipeline.get_feature_transform_algorithm(),
                              Normalizer)

Пример #2

0

Показать файл

Файл: test_classifiers.py Проект: adi3/NiaAML

 def setUp(self):
     self.__data = CSVDataReader(
         src=os.path.dirname(os.path.abspath(__file__)) +
         '/tests_files/dataset_header_classes.csv',
         has_header=True,
         contains_classes=True)
     self.__x_train, self.__x_test, self.__y_train, self.__y_test = train_test_split(
         self.__data.get_x(), self.__data.get_y(), test_size=0.2)

Пример #3

0

Показать файл

Файл: test_pipeline_optimizer.py Проект: adi3/NiaAML

    def test_pipeline_optimizer_missing_values_categorical_attributes_run_works_fine(
            self):
        data_reader = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes_cat_miss.csv',
            has_header=True,
            contains_classes=True)
        ppo = PipelineOptimizer(
            data=self.__data_reader,
            feature_selection_algorithms=['SelectKBest', 'SelectPercentile'],
            feature_transform_algorithms=['Normalizer', 'StandardScaler'],
            classifiers=['AdaBoost', 'Bagging'],
            categorical_features_encoder='OneHotEncoder',
            imputer='SimpleImputer',
            log=False)

        pipeline = ppo.run('Accuracy', 10, 10, 20, 20,
                           'ParticleSwarmAlgorithm')
        self.assertIsInstance(pipeline, Pipeline)
        self.assertTrue(
            isinstance(pipeline.get_classifier(), AdaBoost)
            or isinstance(pipeline.get_classifier(), Bagging))
        self.assertTrue(
            isinstance(pipeline.get_feature_selection_algorithm(), SelectKBest)
            or isinstance(pipeline.get_feature_selection_algorithm(),
                          SelectPercentile))
        self.assertTrue(
            pipeline.get_feature_transform_algorithm() is None or isinstance(
                pipeline.get_feature_transform_algorithm(), Normalizer)
            or isinstance(pipeline.get_feature_transform_algorithm(),
                          StandardScaler))

Пример #4

0

Показать файл

Файл: data_analysis.py Проект: luckyLukac/sport-activities-features

 def analyze_data(
     self,
     src: str,
     fitness_name: str,
     population_size: uint,
     number_of_evaluations: uint,
     optimization_algorithm: str,
     classifiers: Iterable,
     feature_selection_algorithms: Iterable = None,
     feature_transform_algorithms: Iterable = None,
     imputer: str = None,
 ) -> Pipeline:
     """
     Method for running AutoML process using NiaAML
     PipelineOptimizer class instance.\n
     Args:
         src (str):
             path to a CSV file
         fitness_name (str):
             name of the fitness class to use as a function
         population_size (uint):
             number of individuals in the optimization process
         number_of_evaluations (uint):
             number of maximum evaluations
         optimization_algorithm (str):
             name of the optimization algorithm to use
         classifiers (Iterable[Classifier]):
             array of names of possible classifiers
         feature_selection_algorithms (Optional[Iterable[str]]):
             array of names of possible feature selection algorithms
         feature_transform_algorithms (Optional[Iterable[str]]):
             array of names of possible feature transform algorithms
         imputer (Optional[str]):
             name of the imputer used for features
             that contain missing values
     Returns:
         Pipeline: instance of Pipeline object from the NiaAML framework
     Note:
         See NiaAML's documentation for more details on possible
         input parameters' values and further usage of the
         returned Pipeline object.
     """
     data = CSVDataReader(src=src, contains_classes=True, has_header=True)
     pipeline_optimizer = PipelineOptimizer(
         data=data,
         classifiers=classifiers,
         feature_selection_algorithms=feature_selection_algorithms,
         feature_transform_algorithms=feature_transform_algorithms,
         imputer=imputer,
     )
     pipeline = pipeline_optimizer.run_v1(
         fitness_name,
         population_size,
         number_of_evaluations,
         optimization_algorithm
     )
     return pipeline

Пример #5

0

Показать файл

Файл: optimize_thread.py Проект: firefly-cpp/NiaAML-GUI

    def run(self):
        dataReader = CSVDataReader(src=self.__data.csvSrc, has_header=self.__data.csvHasHeader)
        optimizer = PipelineOptimizer(
            data=dataReader,
            feature_selection_algorithms=self.__data.fsas,
            feature_transform_algorithm=self.__data.ftas,
            classifiers=self.__data.classifiers,
            categorical_features_encoder=self.__data.encoder,
            imputer=self.__data.imputer
        )
        optimizer._PipelineOptimizer__logger = HackyLogger(self.progress.emit)

        if self.__data.isOptimization is True:
            pipeline = optimizer.run(self.__data.fitnessFunctionName, self.__data.popSize, self.__data.popSizeInner, self.__data.numEvals, self.__data.numEvalsInner, self.__data.optAlgName, self.__data.optAlgInnerName)
        else:
            pipeline = optimizer.run_v1(self.__data.fitnessFunctionName, self.__data.popSize, self.__data.numEvals, self.__data.optAlgName)

        pipeline.export(os.path.join(self.__data.outputFolder, 'niaamlGUIoutput'))
        pipeline.export_text(os.path.join(self.__data.outputFolder, 'niaamlGUIoutput'))
        self.optimized.emit(pipeline.to_string())

Пример #6

0

Показать файл

    def test_pipeline_run_works_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        data_reader = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes.csv',
            has_header=True,
            contains_classes=True)
        pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 20, 40,
                          'ParticleSwarmAlgorithm', 'Accuracy')
        predicted = pipeline.run(
            pandas.DataFrame(
                numpy.random.uniform(low=0.0,
                                     high=15.0,
                                     size=(30, data_reader.get_x().shape[1]))))

        self.assertEqual(predicted.shape, (30, ))

        s1 = set(data_reader.get_y())
        s2 = set(predicted)
        self.assertTrue(s2.issubset(s1))
        self.assertTrue(len(s2) > 0 and len(s2) <= 2)

Пример #7

0

Показать файл

Файл: test_feature_selection.py Проект: adi3/NiaAML

class FeatureSelectionTestCase(TestCase):
    def setUp(self):
        self.__data = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes.csv',
            has_header=True,
            contains_classes=True)

    def test_PSO_works_fine(self):
        algo = fs.ParticleSwarmOptimization()
        selected_features_mask = algo.select_features(self.__data.get_x(),
                                                      self.__data.get_y())
        self.assertEqual(self.__data.get_x().shape[1],
                         len(selected_features_mask))

    def test_select_k_best_works_fine(self):
        algo = fs.SelectKBest()
        selected_features_mask = algo.select_features(self.__data.get_x(),
                                                      self.__data.get_y())
        self.assertEqual(self.__data.get_x().shape[1],
                         len(selected_features_mask))

    def test_select_percentile_works_fine(self):
        algo = fs.SelectPercentile()
        selected_features_mask = algo.select_features(self.__data.get_x(),
                                                      self.__data.get_y())
        self.assertEqual(self.__data.get_x().shape[1],
                         len(selected_features_mask))

    def test_bat_algorithm_works_fine(self):
        algo = fs.BatAlgorithm()
        selected_features_mask = algo.select_features(self.__data.get_x(),
                                                      self.__data.get_y())
        self.assertEqual(self.__data.get_x().shape[1],
                         len(selected_features_mask))

    def test_de_works_fine(self):
        algo = fs.DifferentialEvolution()
        selected_features_mask = algo.select_features(self.__data.get_x(),
                                                      self.__data.get_y())
        self.assertEqual(self.__data.get_x().shape[1],
                         len(selected_features_mask))

    def test_gwo_works_fine(self):
        algo = fs.GreyWolfOptimizer()
        selected_features_mask = algo.select_features(self.__data.get_x(),
                                                      self.__data.get_y())
        self.assertEqual(self.__data.get_x().shape[1],
                         len(selected_features_mask))

    def test_jdefsth_works_fine(self):
        algo = fs.jDEFSTH()
        selected_features_mask = algo.select_features(self.__data.get_x(),
                                                      self.__data.get_y())
        self.assertEqual(self.__data.get_x().shape[1],
                         len(selected_features_mask))

    def test_vt_works_fine(self):
        algo = fs.VarianceThreshold()
        selected_features_mask = algo.select_features(self.__data.get_x(),
                                                      self.__data.get_y())
        self.assertEqual(self.__data.get_x().shape[1],
                         len(selected_features_mask))

Пример #8

0

Показать файл

Файл: test_classifiers.py Проект: adi3/NiaAML

class ClassifierTestCase(TestCase):
    def setUp(self):
        self.__data = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes.csv',
            has_header=True,
            contains_classes=True)
        self.__x_train, self.__x_test, self.__y_train, self.__y_test = train_test_split(
            self.__data.get_x(), self.__data.get_y(), test_size=0.2)

    def test_adaboost_works_fine(self):
        algo = c.AdaBoost()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_bagging_works_fine(self):
        algo = c.Bagging()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_ert_works_fine(self):
        algo = c.ExtremelyRandomizedTrees()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_lsvc_works_fine(self):
        algo = c.LinearSVC()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_mlp_works_fine(self):
        algo = c.MultiLayerPerceptron()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_rf_works_fine(self):
        algo = c.RandomForest()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_dt_works_fine(self):
        algo = c.DecisionTree()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_kn_works_fine(self):
        algo = c.KNeighbors()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_gp_works_fine(self):
        algo = c.GaussianProcess()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_gnb_works_fine(self):
        algo = c.GaussianNB()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

    def test_qda_works_fine(self):
        algo = c.QuadraticDiscriminantAnalysis()
        algo.fit(self.__x_train, self.__y_train)
        predictions = algo.predict(self.__x_test)
        self.assertEqual(predictions.shape, self.__y_test.shape)

Пример #9

0

Показать файл

from niaaml.fitness import Precision
from niaaml.data import CSVDataReader
import os
import numpy

"""
This example presents how to use an implemented fitness function and its method individually. In this case, we use Precision for demonstration, but
you can use any of the implemented fitness functions in the same way.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)

# lets say the following array contains predictions after the classification process
predictions=numpy.random.choice(['Class 1', 'Class 2'], size=data_reader.get_y().shape)

# instantiate instance of a fitness function (Precision in this case)
fitness_func = Precision()

# calculate fitness value
precision = fitness_func.get_fitness(predictions, data_reader.get_y())

# precision will probably be low due to dummy data
print(precision)

Пример #10

0

Показать файл

from niaaml.preprocessing.feature_transform import Normalizer
import os
from niaaml.data import CSVDataReader

"""
This example presents how to individually use an implemented feature transform algorithm and its methods individually. In this case, we use Normalizer for demonstration, but
you can use any of the implemented feature transform algorithms in the same way.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)

# instantiate Normalizer
ft = Normalizer()

# set parameters of the Normalizer
ft.set_parameters(norm='l2')

# fit the algorithm to the input data
ft.fit(data_reader.get_x())

# transform features
transformed_features = ft.transform(data_reader.get_x())

# print feature transform algorithm in a user-friendly form
print(ft.to_string())

Пример #11

0

Показать файл

from niaaml.preprocessing.feature_selection import SelectKBest
import os
from niaaml.data import CSVDataReader
from sklearn.feature_selection import chi2

"""
This example presents how to use an implemented feature selection algorithm and its methods individually. In this case, we use SelectKBest for demonstration, but
you can use any of the implemented feature selection algorithms in the same way.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset.csv', has_header=False, contains_classes=True)

# instantiate SelectKBest feature selection algorithms
fs = SelectKBest()

# set parameters of the object
fs.set_parameters(k=4, score_func=chi2)

# select best features according to the SelectKBest algorithm (returns boolean mask of the selected features - True if selected, False if not)
features_mask = fs.select_features(data_reader.get_x(), data_reader.get_y())

# print feature selection algorithm in a user-friendly form
print(fs.to_string())

Пример #12

0

Показать файл

Файл: load_data_csv.py Проект: adi3/NiaAML

import os
from niaaml.data import CSVDataReader
"""
This example presents how to instantiate CSVDataReader and use its methods. You can use it to contain data in a single variable,
or as an input to an instance of the PipelineOptimizer class.
"""

# CSVDataReader gets a path to csv file on the input, reads and parses it into the x and y arrays
# has_header and contains_classes arguments needs to be set according to the input csv file's structure
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) +
                            '/example_files/dataset.csv',
                            has_header=False,
                            contains_classes=True)

# get x and y arrays and print them
print(data_reader.get_x())
print(data_reader.get_y())

Пример #13

0

Показать файл

 def run(self):
     dataReader = CSVDataReader(src=self.__data.csvSrc, contains_classes=False, has_header=self.__data.csvHasHeader)
     pipeline = Pipeline.load(self.__data.pipelineSrc)
     predictions = pipeline.run(dataReader.get_x())
     self.ran.emit(str(predictions))

Пример #14

0

Показать файл

from niaaml.classifiers import MultiLayerPerceptron
from niaaml.preprocessing.feature_selection import VarianceThreshold
from niaaml.preprocessing.feature_transform import Normalizer
from niaaml.data import CSVDataReader
from niaaml.preprocessing.encoding import encode_categorical_features
import os
import numpy
import pandas
"""
This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline.
We use a dataset that contains categorical and numerical features.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) +
                            '/example_files/dataset_categorical.csv',
                            has_header=False,
                            contains_classes=True)

# we use the utility method encode_categorical_features to get encoders for the categorical features, but you may instantiate and fit
# feature encoders separately and pass them as an array (as long as they are implemented as this framework suggests)
# there should be as many encoders as categorical features
# this example uses One-Hot Encoding
_, encoders = encode_categorical_features(data_reader.get_x(), 'OneHotEncoder')

# instantiate a Pipeline object
pipeline = Pipeline(feature_selection_algorithm=VarianceThreshold(),
                    feature_transform_algorithm=Normalizer(),
                    classifier=MultiLayerPerceptron(),
                    categorical_features_encoders=encoders)

# run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)

Пример #15

0

Показать файл

Файл: run_pipeline_optimizer_csv_data_missing.py Проект: adi3/NiaAML

import os
from niaaml import PipelineOptimizer, Pipeline
from niaaml.data import CSVDataReader

"""
In this example, we show how to use the PipelineOptimizer class. This example is using an instance of CSVDataReader.
The instantiated PipelineOptimizer will try and assemble the best pipeline with the components that are specified in its constructor.
We use a dataset with 1 categorical feature and missing values to demonstrate a use of PipelineOptimizer instance with automatic feature encoding and imputation.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/example_files/dataset_categorical_missing.csv', has_header=False, contains_classes=True)

# instantiate PipelineOptimizer that chooses among specified classifiers, feature selection algorithms and feature transform algorithms
# OneHotEncoder is used for encoding categorical features in this example
# SimpleImputer is used for imputing missing values in this example
# log is True by default, log_verbose means more information if True, log_output_file is the destination of a log file
# if log_output_file is not provided there is no file created
# if log is False, logging is turned off
pipeline_optimizer = PipelineOptimizer(
    data=data_reader,
    classifiers=['AdaBoost', 'Bagging', 'MultiLayerPerceptron', 'RandomForest', 'ExtremelyRandomizedTrees', 'LinearSVC'],
    feature_selection_algorithms=['SelectKBest', 'SelectPercentile', 'ParticleSwarmOptimization', 'VarianceThreshold'],
    feature_transform_algorithms=['Normalizer', 'StandardScaler'],
    categorical_features_encoder='OneHotEncoder',
    imputer='SimpleImputer',
    log=True,
    log_verbose=True,
    log_output_file='output.log'
)

Пример #16

0

Показать файл

from niaaml.classifiers import AdaBoost
import os
from niaaml.data import CSVDataReader
import numpy
"""
In this example, we show how to individually use an implemented classifier and its methods. In this case we use AdaBoost for demonstration, but
you can use any of the implemented classifiers in the same way.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) +
                            '/example_files/dataset.csv',
                            has_header=False,
                            contains_classes=True)

# instantiate AdaBoost classifier
classifier = AdaBoost()

# set parameters of the classifier
classifier.set_parameters(n_estimators=50, algorithm='SAMME')

# fit classifier to the data
classifier.fit(data_reader.get_x(), data_reader.get_y())

# predict classes of the dummy input
predicted = classifier.predict(
    numpy.random.uniform(low=0.0,
                         high=15.0,
                         size=(30, data_reader.get_x().shape[1])))

# print classifier in a user-friendly form

Пример #17

0

Показать файл

from niaaml.preprocessing.feature_selection import VarianceThreshold
from niaaml.preprocessing.feature_transform import Normalizer
from niaaml.data import CSVDataReader
from niaaml.preprocessing.encoding import encode_categorical_features
from niaaml.preprocessing.imputation import impute_features
import os
import numpy
import pandas
"""
This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline.
We use a dataset that contains categorical and numerical features with missing values.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) +
                            '/example_files/dataset_categorical_missing.csv',
                            has_header=False,
                            contains_classes=True)

features = data_reader.get_x()

# we use the utility method impute_features to get imputers for the features with missing values, but you may instantiate and fit
# imputers separately and pass them as a dictionary (as long as they are implemented as this framework suggests), with keys as column names or indices (if there is no header in the csv)
# there should be as many imputers as the features with missing values
# this example uses Simple Imputer
features, imputers = impute_features(features, 'SimpleImputer')

# exactly the same goes for encoders
_, encoders = encode_categorical_features(features, 'OneHotEncoder')

# instantiate a Pipeline object
pipeline = Pipeline(feature_selection_algorithm=VarianceThreshold(),

Пример #18

0

Показать файл

from niaaml import Pipeline
from niaaml.classifiers import MultiLayerPerceptron
from niaaml.preprocessing.feature_selection import VarianceThreshold
from niaaml.preprocessing.feature_transform import Normalizer
from niaaml.data import CSVDataReader
import os
import numpy
import pandas
"""
This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) +
                            '/example_files/dataset.csv',
                            has_header=False,
                            contains_classes=True)

# instantiate a Pipeline object
pipeline = Pipeline(feature_selection_algorithm=VarianceThreshold(),
                    feature_transform_algorithm=Normalizer(),
                    classifier=MultiLayerPerceptron())

# run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 10, 50,
                  'ParticleSwarmAlgorithm', 'Accuracy')

# run the pipeline using dummy data
# you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
predicted = pipeline.run(
    pandas.DataFrame(

Пример #19

0

Показать файл

Файл: test_feature_transform.py Проект: adi3/NiaAML

class FeatureTransformTestCase(TestCase):
    def setUp(self):
        self.__data = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes.csv',
            has_header=True,
            contains_classes=True)

    def test_mas_works_fine(self):
        algo = ft.MaxAbsScaler()
        algo.fit(self.__data.get_x())
        transformed = algo.transform(self.__data.get_x())
        self.assertEqual(transformed.shape, self.__data.get_x().shape)

    def test_norm_works_fine(self):
        algo = ft.Normalizer()
        algo.fit(self.__data.get_x())
        transformed = algo.transform(self.__data.get_x())
        self.assertEqual(transformed.shape, self.__data.get_x().shape)

    def test_qt_works_fine(self):
        algo = ft.QuantileTransformer()
        algo.fit(self.__data.get_x())
        transformed = algo.transform(self.__data.get_x())
        self.assertEqual(transformed.shape, self.__data.get_x().shape)

    def test_rs_works_fine(self):
        algo = ft.RobustScaler()
        algo.fit(self.__data.get_x())
        transformed = algo.transform(self.__data.get_x())
        self.assertEqual(transformed.shape, self.__data.get_x().shape)

    def test_ss_works_fine(self):
        algo = ft.StandardScaler()
        algo.fit(self.__data.get_x())
        transformed = algo.transform(self.__data.get_x())
        self.assertEqual(transformed.shape, self.__data.get_x().shape)

Пример #20

0

Показать файл

Файл: test_csv_data_reader.py Проект: adi3/NiaAML

 def test_header_classes_works_fine(self):
     data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) + '/tests_files/dataset_header_classes.csv', has_header=True, contains_classes=True)
     x = data_reader.get_x()
     y = data_reader.get_y()
     self.assertEqual(x.shape, (100, 6))
     self.assertEqual(y.shape, (100, ))

Пример #21

0

Показать файл

Файл: test_feature_selection.py Проект: adi3/NiaAML

 def setUp(self):
     self.__data = CSVDataReader(
         src=os.path.dirname(os.path.abspath(__file__)) +
         '/tests_files/dataset_header_classes.csv',
         has_header=True,
         contains_classes=True)

Python CSVDataReader примеры использования