Exemplo n.º 1
0
    def test_binarizer(self):

        trfm_obj = Binarizer()
        trfm_obj, feature_names, target_name = auto_dataset_for_regression(
            trfm_obj)

        self.assertEqual(pp.get_class_name(trfm_obj),
                         trfm_obj.__class__.__name__)

        self.assertEqual(
            pp.get_derived_colnames('binarizer', ['displacement']),
            ['binarizer(displacement)'])

        self.assertEqual(
            pp.binarizer(trfm_obj,
                         feature_names)['der_fld'][0].__class__.__name__,
            pml.DerivedField().__class__.__name__)

        self.assertEqual(
            pp.binarizer(trfm_obj, feature_names)['der_fld'][0].get_optype(),
            "continuous")

        self.assertEqual(
            pp.binarizer(trfm_obj, feature_names)['der_fld'][0].get_dataType(),
            "double")

        self.assertEqual(
            pp.binarizer(trfm_obj, feature_names)['der_fld']
            [0].get_Apply().get_Constant()[0].get_valueOf_(),
            trfm_obj.threshold)
Exemplo n.º 2
0
    def test_max_abs_scaler(self):

        trfm_obj = MaxAbsScaler()
        trfm_obj, feature_names, target_name = auto_dataset_for_regression(
            trfm_obj)

        self.assertEqual(pp.get_class_name(trfm_obj),
                         trfm_obj.__class__.__name__)

        self.assertEqual(
            pp.get_derived_colnames('max_abs__scaler', ['displacement']),
            ['max_abs__scaler(displacement)'])

        self.assertEqual(
            pp.max_abs_scaler(trfm_obj,
                              feature_names)['der_fld'][0].__class__.__name__,
            pml.DerivedField().__class__.__name__)

        self.assertEqual(
            pp.max_abs_scaler(trfm_obj,
                              feature_names)['der_fld'][0].get_optype(),
            "continuous")

        self.assertEqual(
            pp.max_abs_scaler(trfm_obj,
                              feature_names)['der_fld'][0].get_dataType(),
            "double")

        self.assertEqual(
            pp.max_abs_scaler(trfm_obj, feature_names)['der_fld']
            [0].get_Apply().get_Constant()[0].get_valueOf_(),
            "{:.25f}".format(trfm_obj.max_abs_[0]))
Exemplo n.º 3
0
 def test_tfidf_vectorizer(self):
     trfm_obj = TfidfVectorizer(norm=None)
     trfm_obj, feature_names, target_name = auto_dataset_for_tfidf_and_count_vec(
         trfm_obj)
     self.assertEqual(pp.get_class_name(trfm_obj),
                      trfm_obj.__class__.__name__)
     self.assertEqual(
         len(pp.tfidf_vectorizer(trfm_obj, feature_names)['der_col_names']),
         len(trfm_obj.get_feature_names()))
     self.assertEqual(
         pp.tfidf_vectorizer(trfm_obj,
                             feature_names)['der_col_names'][0], 'tfidf@[' +
         feature_names[0] + '](' + trfm_obj.get_feature_names()[0] + ')')
     self.assertEqual(
         len(pp.tfidf_vectorizer(trfm_obj, feature_names)['der_fld']) - 1,
         len(trfm_obj.idf_))
     self.assertEqual(
         pp.tfidf_vectorizer(
             trfm_obj,
             feature_names)['der_fld'][0].get_Apply().get_function(),
         'lowercase')
     self.assertEqual(
         pp.tfidf_vectorizer(trfm_obj, feature_names)['pp_feat_name'],
         feature_names[0])
     self.assertEqual(
         len(
             pp.tfidf_vectorizer(trfm_obj,
                                 feature_names)['pp_feat_class_lbl']), 0)
     self.assertEqual(
         pp.tfidf_vectorizer(trfm_obj,
                             feature_names)['der_fld'][0].get_optype(),
         'categorical')
     self.assertEqual(
         pp.tfidf_vectorizer(trfm_obj,
                             feature_names)['der_fld'][1].get_optype(),
         'continuous')
     self.assertEqual(
         pp.tfidf_vectorizer(trfm_obj,
                             feature_names)['der_fld'][0].get_dataType(),
         'string')
     self.assertEqual(
         pp.tfidf_vectorizer(trfm_obj,
                             feature_names)['der_fld'][1].get_dataType(),
         'double')
     self.assertEqual(
         pp.tfidf_vectorizer(trfm_obj, feature_names)['der_fld']
         [1].get_Apply().get_Constant()[0].get_valueOf_(), trfm_obj.idf_[0])
     self.assertEqual(
         pp.tfidf_vectorizer(trfm_obj, feature_names)['der_fld']
         [1].get_Apply().get_TextIndex()[0].get_Constant().get_valueOf_(),
         trfm_obj.get_feature_names()[0])
     self.assertEqual(
         pp.tfidf_vectorizer(trfm_obj, feature_names)['der_fld']
         [-1].get_Apply().get_TextIndex()[0].get_Constant().get_valueOf_(),
         trfm_obj.get_feature_names()[-1])
Exemplo n.º 4
0
    def test_lbl_encoder(self):

        trfm_obj = LabelEncoder()
        trfm_obj, feature_names, target_name = auto_dataset_for_regression(
            trfm_obj)
        self.assertEqual(pp.get_class_name(trfm_obj),
                         trfm_obj.__class__.__name__)

        self.assertEqual(pp.get_derived_colnames('labelEncoder', ['origin']),
                         ['labelEncoder(origin)'])

        self.assertEqual(
            pp.lbl_encoder(trfm_obj,
                           feature_names)['der_fld'][0].__class__.__name__,
            pml.DerivedField().__class__.__name__)

        self.assertEqual(
            pp.lbl_encoder(trfm_obj, feature_names)['der_fld'][0].get_optype(),
            "continuous")

        self.assertEqual(
            pp.lbl_encoder(trfm_obj,
                           feature_names)['der_fld'][0].get_dataType(),
            "double")
        self.assertEqual(
            pp.lbl_encoder(trfm_obj, feature_names)['der_col_names'][0],
            "labelEncoder(origin)")
        self.assertEqual(
            pp.lbl_encoder(trfm_obj, feature_names)['pp_feat_class_lbl'][0],
            trfm_obj.classes_[0])

        self.assertEqual(
            pp.lbl_encoder(trfm_obj, feature_names)['der_fld']
            [0].get_MapValues().get_outputColumn(), "output")

        self.assertEqual(
            pp.lbl_encoder(trfm_obj, feature_names)['pp_feat_name'], "origin")
Exemplo n.º 5
0
    def test_lbl_binarizer(self):

        trfm_obj = LabelBinarizer()
        trfm_obj, feature_names, target_name = auto_dataset_for_regression(
            trfm_obj)
        self.assertEqual(pp.get_class_name(trfm_obj),
                         trfm_obj.__class__.__name__)

        self.assertEqual(
            pp.lbl_binarizer(trfm_obj,
                             feature_names)['der_fld'][0].__class__.__name__,
            pml.DerivedField().__class__.__name__)

        self.assertEqual(
            pp.lbl_binarizer(trfm_obj,
                             feature_names)['der_fld'][0].get_optype(),
            "categorical")

        self.assertEqual(
            pp.lbl_binarizer(trfm_obj,
                             feature_names)['der_fld'][0].get_dataType(),
            "double")

        self.assertEqual(
            pp.lbl_binarizer(trfm_obj, feature_names)['pp_feat_class_lbl'][0],
            trfm_obj.classes_[0])

        self.assertEqual(
            pp.lbl_binarizer(
                trfm_obj,
                feature_names)['der_fld'][0].get_NormDiscrete().get_field(),
            "origin")

        self.assertEqual(
            pp.lbl_binarizer(trfm_obj, feature_names)['pp_feat_name'],
            "origin")