Exemplo n.º 1
0
    def test_from_numpy_names(self):
        d = Domain.from_numpy(np.zeros((1, 5)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {}".format(i) for i in range(1, 6)])

        d = Domain.from_numpy(np.zeros((1, 99)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {:02}".format(i) for i in range(1, 100)])

        d = Domain.from_numpy(np.zeros((1, 100)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {:03}".format(i) for i in range(1, 101)])

        d = Domain.from_numpy(np.zeros((1, 1)))
        self.assertTrue(d.anonymous)
        self.assertEqual(d.attributes[0].name, "Feature")

        d = Domain.from_numpy(np.zeros((1, 3)), np.zeros((1, 1)),
                              np.zeros((1, 100)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {}".format(i) for i in range(1, 4)])
        self.assertEqual(d.class_var.name, "Target")
        self.assertEqual([var.name for var in d.metas],
                         ["Meta {:03}".format(i) for i in range(1, 101)])
Exemplo n.º 2
0
    def test_do_not_recluster_on_same_data(self):
        """Do not recluster data points when targets or metas change."""

        # Prepare some dummy data
        x = np.eye(5)
        y1, y2 = np.ones((5, 1)), np.ones((5, 2))
        meta1, meta2 = np.ones((5, 1)), np.ones((5, 2))

        table1 = Table.from_numpy(
            domain=Domain.from_numpy(X=x, Y=y1, metas=meta1),
            X=x, Y=y1, metas=meta1,
        )
        # X is same, should not cause update
        table2 = Table.from_numpy(
            domain=Domain.from_numpy(X=x, Y=y2, metas=meta2),
            X=x, Y=y2, metas=meta2,
        )
        # X is different, should cause update
        table3 = table1.copy()
        table3.X[:, 0] = 1

        with patch.object(self.widget, 'commit') as commit:
            self.send_signal(self.widget.Inputs.data, table1)
            self.commit_and_wait()
            call_count = commit.call_count

            # Sending data with same X should not recompute the clustering
            self.send_signal(self.widget.Inputs.data, table2)
            self.commit_and_wait()
            self.assertEqual(call_count, commit.call_count)

            # Sending data with different X should recompute the clustering
            self.send_signal(self.widget.Inputs.data, table3)
            self.commit_and_wait()
            self.assertEqual(call_count + 1, commit.call_count)
Exemplo n.º 3
0
    def test_index_error(self):
        d = Domain((age, gender, income), metas=(ssn, race))
        for idx in (3, np.int(3), -3, np.int(-3), incomeA, "no_such_thing"):
            with self.assertRaises(ValueError):
                d.index(idx)

        with self.assertRaises(TypeError):
            d.index([2])
Exemplo n.º 4
0
    def test_copy(self):
        age.number_of_decimals = 5
        attributes = (age, gender, income)

        domain = Domain(attributes, [race], [ssn])

        new_domain = domain.copy()
        new_domain[age].number_of_decimals = 10

        self.assertEqual(domain[age].number_of_decimals, 5)
        self.assertEqual(new_domain[age].number_of_decimals, 10)
def select_attrs(table, features, class_var=None,
                 class_vars=None, metas=None):
    """ Select only ``attributes`` from the ``table``.
    """
    if class_vars is None:
        domain = Domain(features, class_var)
    else:
        domain = Domain(features, class_var, class_vars=class_vars)
    if metas:
        domain.add_metas(metas)
    return Table(domain, table)
Exemplo n.º 6
0
    def test_conversion(self):
        domain = Domain([age, income], [race],
                        [gender, education, ssn])

        values, metas = domain.convert([42, 13, "White"])
        assert_array_equal(values, np.array([42, 13, 0]))
        assert_array_equal(metas, np.array([Unknown, Unknown, None]))

        values, metas = domain.convert([42, 13, "White", "M", "HS", "1234567"])
        assert_array_equal(values, np.array([42, 13, 0]))
        assert_array_equal(metas, np.array([0, 1, "1234567"], dtype=object))
Exemplo n.º 7
0
    def test_conversion_size(self):
        domain = Domain([age, gender, income], [race])
        self.assertRaises(ValueError, domain.convert, [0] * 3)
        self.assertRaises(ValueError, domain.convert, [0] * 5)

        domain = Domain([age, income], [race],
                        [gender, education, ssn])
        self.assertRaises(ValueError, domain.convert, [0] * 2)
        self.assertRaises(ValueError, domain.convert, [0] * 4)
        self.assertRaises(ValueError, domain.convert, [0] * 7)
        domain.convert([0] * 3)
        domain.convert([0] * 6)
Exemplo n.º 8
0
    def test_from_numpy_dimensions(self):
        d = Domain.from_numpy(np.zeros((1, 1)), np.zeros(5))
        self.assertTrue(d.anonymous)
        self.assertEqual(len(d.class_vars), 1)

        d = Domain.from_numpy(np.zeros((1, 1)), np.zeros((5, 1)))
        self.assertTrue(d.anonymous)
        self.assertEqual(len(d.class_vars), 1)

        self.assertRaises(ValueError, Domain.from_numpy, np.zeros(2))
        self.assertRaises(ValueError, Domain.from_numpy, np.zeros((2, 2, 2)))
        self.assertRaises(ValueError, Domain.from_numpy, np.zeros((2, 2)), np.zeros((2, 2, 2)))
Exemplo n.º 9
0
    def test_conversion(self):
        domain = Domain([age, income], [race],
                        [gender, education, ssn])

        x, y, metas = domain.convert([42, 13, "White"])
        assert_array_equal(x, np.array([42, 13]))
        assert_array_equal(y, np.array([0]))
        self.assertTrue(all(np.isnan(np.array(metas, dtype=float))))

        x, y, metas = domain.convert([42, 13, "White", "M", "HS", "1234567"])
        assert_array_equal(x, np.array([42, 13]))
        assert_array_equal(y, np.array([0]))
        assert_array_equal(metas, np.array([0, 1, "1234567"], dtype=object))
Exemplo n.º 10
0
 def test_var_from_domain(self):
     d = Domain((age, gender, income), metas=(ssn, race))
     self.assertEqual(d.var_from_domain(incomeA), incomeA)
     self.assertEqual(d.var_from_domain(incomeA, False), incomeA)
     with self.assertRaises(IndexError):
         d.var_from_domain(incomeA, True)
     with self.assertRaises(TypeError):
         d.var_from_domain(1, no_index=True)
     with self.assertRaises(TypeError):
         d.var_from_domain(-1, no_index=True)
Exemplo n.º 11
0
    def test_has_discrete(self):
        self.assertFalse(Domain([]).has_discrete_attributes())
        self.assertFalse(Domain([], [age]).has_discrete_attributes())
        self.assertFalse(Domain([], race).has_discrete_attributes())

        self.assertFalse(Domain([age], None).has_discrete_attributes())
        self.assertTrue(Domain([race], None).has_discrete_attributes())
        self.assertTrue(Domain([age, race], None).has_discrete_attributes())
        self.assertTrue(Domain([race, age], None).has_discrete_attributes())

        self.assertFalse(Domain([], [age]).has_discrete_attributes(True))
        self.assertTrue(Domain([], [race]).has_discrete_attributes(True))
        self.assertFalse(Domain([age], None).has_discrete_attributes(True))
        self.assertTrue(Domain([race], None).has_discrete_attributes(True))
        self.assertTrue(Domain([age], race).has_discrete_attributes(True))
        self.assertTrue(Domain([race], age).has_discrete_attributes(True))
        self.assertTrue(Domain([], [race, age]).has_discrete_attributes(True))

        d = Domain([], None, [gender])
        self.assertTrue(d.has_discrete_attributes(False, True))
        d = Domain([], None, [age])
        self.assertFalse(d.has_discrete_attributes(False, True))
        d = Domain([], [age], [gender])
        self.assertTrue(d.has_discrete_attributes(True, True))
        d = Domain([], [incomeA], [age])
        self.assertFalse(d.has_discrete_attributes(True, True))
Exemplo n.º 12
0
    def test_from_numpy_values(self):
        d = Domain.from_numpy(np.zeros((1, 1)), np.arange(1, 3).reshape(2, 1))
        self.assertTrue(d.anonymous)
        self.assertIsInstance(d.class_var, ContinuousVariable)

        d = Domain.from_numpy(np.zeros((1, 1)), np.arange(2).reshape(2, 1))
        self.assertTrue(d.anonymous)
        self.assertIsInstance(d.class_var, DiscreteVariable)
        self.assertEqual(d.class_var.values, ["v{}".format(i)
                                              for i in range(1, 3)])

        d = Domain.from_numpy(np.zeros((1, 1)), np.arange(18, 23).reshape(5, 1))
        self.assertTrue(d.anonymous)
        self.assertIsInstance(d.class_var, ContinuousVariable)
Exemplo n.º 13
0
    def test_has_continuous(self):
        self.assertFalse(Domain([]).has_continuous_attributes())
        self.assertFalse(Domain([], [age]).has_continuous_attributes())
        self.assertFalse(Domain([], [race]).has_continuous_attributes())

        self.assertTrue(Domain([age], None).has_continuous_attributes())
        self.assertFalse(Domain([race], None).has_continuous_attributes())
        self.assertTrue(Domain([age, race], None).has_continuous_attributes())
        self.assertTrue(Domain([race, age], None).has_continuous_attributes())

        self.assertTrue(Domain([], [age]).has_continuous_attributes(True))
        self.assertFalse(Domain([], [race]).has_continuous_attributes(True))
        self.assertTrue(Domain([age], None).has_continuous_attributes(True))
        self.assertFalse(Domain([race], None).has_continuous_attributes(True))
        self.assertTrue(Domain([age], race).has_continuous_attributes(True))
        self.assertTrue(Domain([race], age).has_continuous_attributes(True))
        self.assertTrue(Domain([], [race, age]).has_continuous_attributes(True))

        d = Domain([], None, [age])
        self.assertTrue(d.has_continuous_attributes(False, True))
        d = Domain([], None, [gender])
        self.assertFalse(d.has_continuous_attributes(False, True))
        d = Domain([], [gender], [age])
        self.assertTrue(d.has_continuous_attributes(True, True))
        d = Domain([], [race], [gender])
        self.assertFalse(d.has_continuous_attributes(True, True))
Exemplo n.º 14
0
    def test_get_conversion(self):
        d = Domain((age, gender, income), metas=(ssn, race))
        e = Domain((gender, race), None, metas=(age, gender, ssn))
        f = Domain((gender,), (race, income), metas=(age, income, ssn))
        g = Domain((), metas=(age, gender, ssn))

        d_to_e = e.get_conversion(d)
        self.assertIs(d_to_e.source, d)
        self.assertEqual(d_to_e.attributes, [1, -2])
        self.assertEqual(d_to_e.class_vars, [])
        self.assertEqual(d_to_e.metas, [0, 1, -1])

        d_to_e = e.get_conversion(d)
        self.assertIs(d_to_e.source, d)
        self.assertEqual(d_to_e.attributes, [1, -2])
        self.assertEqual(d_to_e.class_vars, [])
        self.assertEqual(d_to_e.metas, [0, 1, -1])

        d_to_f = f.get_conversion(d)
        self.assertIs(d_to_f.source, d)
        self.assertEqual(d_to_f.attributes, [1])
        self.assertEqual(d_to_f.class_vars, [-2, 2])
        self.assertEqual(d_to_f.metas, [0, 2, -1])

        d_to_e = e.get_conversion(d)
        self.assertIs(d_to_e.source, d)
        self.assertEqual(d_to_e.attributes, [1, -2])
        self.assertEqual(d_to_e.class_vars, [])
        self.assertEqual(d_to_e.metas, [0, 1, -1])

        d_to_f = f.get_conversion(d)
        self.assertIs(d_to_f.source, d)
        self.assertEqual(d_to_f.attributes, [1])
        self.assertEqual(d_to_f.class_vars, [-2, 2])
        self.assertEqual(d_to_f.metas, [0, 2, -1])

        f_to_g = g.get_conversion(f)
        self.assertIs(f_to_g.source, f)
        self.assertEqual(f_to_g.attributes, [])
        self.assertEqual(f_to_g.class_vars, [])
        self.assertEqual(f_to_g.metas, [-1, 0, -3])

        x = lambda: 42
        income.compute_value = x
        g_to_f = f.get_conversion(g)
        self.assertIs(g_to_f.source, g)
        self.assertEqual(g_to_f.attributes, [-2])
        self.assertEqual(g_to_f.class_vars, [Variable.compute_value, x])
        self.assertEqual(g_to_f.metas, [-1, x, -3])
def take(table, indices, axis=0):
    """ Take values form the ``table`` along the ``axis``. 
    """
    indices = mask_to_indices(indices, (len(table), len(table.domain)), axis)
    if axis == 0:
        # Take the rows (instances)
        instances = [table[i] for i in indices]
        table = Table(instances) if instances else Table(table.domain)
    elif axis == 1:
        # Take the columns (attributes)
        variables = table.domain.variables
        vars = [variables[i] for i in indices]
        domain = Domain(vars, table.domain.class_var in vars)
        domain.add_metas(table.domain.get_metas())
        table = Table(domain, table)
    return table
Exemplo n.º 16
0
 def test_index(self):
     d = Domain((age, gender, income), metas=(ssn, race))
     for idx, var in [(age, 0),
                      ("AGE", 0),
                      (0, 0),
                      (np.int_(0), 0),
                      (income, 2),
                      ("income", 2),
                      (2, 2),
                      (np.int_(2), 2),
                      (ssn, -1),
                      ("SSN", -1),
                      (-1, -1),
                      (np.int_(-1), -1),
                      (-2, -2), (np.int_(-2), -2)]:
         self.assertEqual(d.index(idx), var)
Exemplo n.º 17
0
    def test_latlon_detection_heuristic(self):
        xy = np.c_[np.random.uniform(-180, 180, 100),
                   np.random.uniform(-90, 90, 100)]
        data = Table.from_numpy(Domain.from_numpy(xy), xy)
        self.widget.set_data(data)

        self.assertIn(self.widget.lat_attr, data.domain)
        self.assertIn(self.widget.lon_attr, data.domain)
Exemplo n.º 18
0
 def test_from_numpy_values(self):
     for aran_min, aran_max, vartype in [(1, 3, ContinuousVariable),
                                         (0, 2, DiscreteVariable),
                                         (18, 23, ContinuousVariable)]:
         n_rows, n_cols, = aran_max - aran_min, 1
         d = Domain.from_numpy(np.zeros((1, 1)), np.arange(aran_min, aran_max).reshape(n_rows, n_cols))
         self.assertTrue(d.anonymous)
         self.assertIsInstance(d.class_var, vartype)
         if isinstance(vartype, DiscreteVariable):
             self.assertEqual(d.class_var.values, ["v{}".format(i) for i in range(1, 3)])
def join_domains(domain1, domain2):
    variables = domain1.variables + domain1.variables
    used_set = set()
    def used(vars):
        mask = []
        for var in vars:
            mask.append(var not in used_set)
            used_set.add(var)
            
    used_mask1 = used(domain1.variables)
    used_mask2 = used(domain2.variables)
    if domain2.classVar:
        used_mask2[-1] = True
        
    variables = [v for v, used in zip(variables, used_mask1 + used_mask2)]
    
    joined_domain = Domain(variables, domain2.classVar)
    joined_domain.add_metas(domain1.get_metas())
    joined_domain.add_metas(domain2.get_metas())
    return joined_domain, used_mask1, used_mask2
Exemplo n.º 20
0
 def test_anova(self):
     nrows, ncols = 500, 5
     X = np.random.rand(nrows, ncols)
     y = 4 + (-3*X[:, 1] + X[:, 3]) // 2
     domain = Domain.from_numpy(X, y)
     domain = Domain(domain.attributes,
                     DiscreteVariable('c', values=np.unique(y)))
     data = Table(domain, X, y)
     scorer = score.ANOVA()
     sc = [scorer(data, a) for a in range(ncols)]
     self.assertTrue(np.argmax(sc) == 1)
Exemplo n.º 21
0
    def test_improved_randomized_pca_properly_called(self):
        # It doesn't matter what we put into the matrix
        x_ = np.random.normal(0, 1, (100, 20))
        x = Table.from_numpy(Domain.from_numpy(x_), x_)

        pca.randomized_pca = MagicMock(wraps=pca.randomized_pca)
        PCA(10, svd_solver="randomized", random_state=42)(x)
        pca.randomized_pca.assert_called_once()

        pca.randomized_pca.reset_mock()
        PCA(10, svd_solver="arpack", random_state=42)(x)
        pca.randomized_pca.assert_not_called()
Exemplo n.º 22
0
    def test_from_numpy_names(self):
        for n_cols, name in [(5, "Feature {}"),
                             (99, "Feature {:02}"),
                             (100, "Feature {:03}")]:
            d = Domain.from_numpy(np.zeros((1, n_cols)))
            self.assertTrue(d.anonymous)
            self.assertEqual([var.name for var in d.attributes],
                             [name.format(i) for i in range(1, n_cols+1)])

        d = Domain.from_numpy(np.zeros((1, 1)))
        self.assertTrue(d.anonymous)
        self.assertEqual(d.attributes[0].name, "Feature")

        d = Domain.from_numpy(np.zeros((1, 3)), np.zeros((1, 1)),
                              np.zeros((1, 100)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {}".format(i) for i in range(1, 4)])
        self.assertEqual(d.class_var.name, "Target")
        self.assertEqual([var.name for var in d.metas],
                         ["Meta {:03}".format(i) for i in range(1, 101)])
Exemplo n.º 23
0
 def test_chi2(self):
     nrows, ncols = 500, 5
     X = np.random.randint(4, size=(nrows, ncols))
     y = 10 + (-3*X[:, 1] + X[:, 3]) // 2
     domain = Domain.from_numpy(X, y)
     domain = Domain(domain.attributes,
                     DiscreteVariable('c', values=np.unique(y)))
     table = Table(domain, X, y)
     data = preprocess.Discretize()(table)
     scorer = score.Chi2()
     sc = [scorer(data, a) for a in range(ncols)]
     self.assertTrue(np.argmax(sc) == 1)
Exemplo n.º 24
0
    def test_conversion(self):
        domain = Domain([age, income], [race],
                        [gender, education, ssn])

        x, y, metas = domain.convert([42, 13, "White"])
        assert_array_equal(x, np.array([42, 13]))
        assert_array_equal(y, np.array([0]))
        metas_exp = [gender.Unknown, education.Unknown, ssn.Unknown]

        def eq(a, b):
            if isinstance(a, Real) and isinstance(b, Real) and \
                    np.isnan(a) and np.isnan(b):
                return True
            else:
                return a == b

        self.assertTrue(all(starmap(eq, zip(metas, metas_exp))))

        x, y, metas = domain.convert([42, 13, "White", "M", "HS", "1234567"])
        assert_array_equal(x, np.array([42, 13]))
        assert_array_equal(y, np.array([0]))
        assert_array_equal(metas, np.array([0, 1, "1234567"], dtype=object))
Exemplo n.º 25
0
    def test_rrelieff(self):
        X = np.random.random((100, 5))
        y = ((X[:, 0] > .5) ^ (X[:, 1] < .5) - 1).astype(float)
        xor = Table.from_numpy(Domain.from_numpy(X, y), X, y)

        scorer = score.RReliefF()
        weights = scorer(xor, None)
        best = {xor.domain[attr].name for attr in weights.argsort()[-2:]}
        self.assertSetEqual(set(a.name for a in xor.domain.attributes[:2]), best)

        weights = scorer(self.housing, None)
        best = {self.housing.domain[attr].name for attr in weights.argsort()[-6:]}
        for feature in ('LSTAT', 'RM', 'AGE'):
            self.assertIn(feature, best)
Exemplo n.º 26
0
    def test_improved_randomized_pca_dense_data(self):
        """Randomized PCA should work well on dense data."""
        random_state = check_random_state(42)

        # Let's take a tall, skinny matrix
        x_ = random_state.normal(0, 1, (100, 20))
        x = Table.from_numpy(Domain.from_numpy(x_), x_)

        pca = PCA(10, svd_solver="full", random_state=random_state)(x)
        rpca = PCA(10, svd_solver="randomized", random_state=random_state)(x)

        np.testing.assert_almost_equal(
            pca.components_, rpca.components_, decimal=8
        )
        np.testing.assert_almost_equal(
            pca.explained_variance_, rpca.explained_variance_, decimal=8
        )
        np.testing.assert_almost_equal(
            pca.singular_values_, rpca.singular_values_, decimal=8
        )

        # And take a short, fat matrix
        x_ = random_state.normal(0, 1, (20, 100))
        x = Table.from_numpy(Domain.from_numpy(x_), x_)

        pca = PCA(10, svd_solver="full", random_state=random_state)(x)
        rpca = PCA(10, svd_solver="randomized", random_state=random_state)(x)

        np.testing.assert_almost_equal(
            pca.components_, rpca.components_, decimal=8
        )
        np.testing.assert_almost_equal(
            pca.explained_variance_, rpca.explained_variance_, decimal=8
        )
        np.testing.assert_almost_equal(
            pca.singular_values_, rpca.singular_values_, decimal=8
        )
Exemplo n.º 27
0
    def test_different_domains_with_same_attributes_are_equal(self):
        domain1 = Domain([])
        domain2 = Domain([])
        self.assertEqual(domain1, domain2)

        domain1.attributes = (ContinuousVariable('var1'),)
        self.assertNotEqual(domain1, domain2)

        domain2.attributes = (ContinuousVariable('var1'),)
        self.assertEqual(domain1, domain2)

        domain1.class_vars = (ContinuousVariable('var1'),)
        self.assertNotEqual(domain1, domain2)

        domain2.class_vars = (ContinuousVariable('var1'),)
        self.assertEqual(domain1, domain2)

        domain1._metas = (ContinuousVariable('var1'),)
        self.assertNotEqual(domain1, domain2)

        domain2._metas = (ContinuousVariable('var1'),)
        self.assertEqual(domain1, domain2)
Exemplo n.º 28
0
    def test_clusters_ordered_by_size(self):
        """Cluster names should be sorted based on the number of instances."""
        x1 = np.array([[0, 0]] * 20)
        x2 = np.array([[1, 0]] * 15)
        x3 = np.array([[0, 1]] * 10)
        x4 = np.array([[1, 1]] * 5)
        data = np.vstack((x1, x2, x3, x4))
        # Remove any order depencence in data, not that this should affect it
        np.random.shuffle(data)

        table = Table.from_numpy(domain=Domain.from_numpy(X=data), X=data)

        self.send_signal(self.widget.Inputs.data, table)
        self.widget.k_neighbors = 4
        self.commit_and_wait()
        output = self.get_output(self.widget.Outputs.annotated_data)

        clustering = output.get_column_view('Cluster')[0].astype(int)
        counts = np.bincount(clustering)
        np.testing.assert_equal(counts, sorted(counts, reverse=True))
Exemplo n.º 29
0
def dataCombine(corpus,liwcResultList,featureNames,markedTexts):
    liwcResultTable,columnNames = list2table(liwcResultList,featureNames)
    fieldIdFile = getFieldId(corpus, FIELDNAMEFILE)
    fieldIdCounselor = getFieldId(corpus, FIELDNAMECOUNSELOR)
    domain = [ContinuousVariable(name=FIELDNAMEMSGID)]+list(corpus.domain.variables)
    for columnName in sortKeys(columnNames):
        domain.append(ContinuousVariable(name=columnName,number_of_decimals=NBROFDECIMALS))
    metas = [StringVariable(name=FIELDNAMEFILE),StringVariable(name=FIELDNAMECOUNSELOR),StringVariable(name=FIELDNAMEMARKEDTEXT)]
    dataOut = []
    metasOut = []
    for i in range(0,len(corpus)):
        fileName = corpus.metas[i][fieldIdFile]
        counselorId = corpus.metas[i][fieldIdCounselor]
        metasOut.append([fileName,counselorId,markedTexts[i]])
        row = [i+1]+list(corpus[i].values())
        for columnName in sortKeys(columnNames):
            if (not re.match("^\d+\s",columnName) and columnName != NUMBERCOUNT) or int(liwcResultTable[i][NBROFMATCHES]) == 0:
                row.append(int(liwcResultTable[i][columnName]))
            else:
                row.append(float(liwcResultTable[i][columnName])/float(liwcResultTable[i][NBROFMATCHES]))
        dataOut.append(row)
    table = Table.from_numpy(Domain(domain,metas=metas),np.array(dataOut),metas=np.array(metasOut))
    return(table) 
Exemplo n.º 30
0
    def setUp(self):
        self.domain = Domain(attributes=[
            ContinuousVariable('c1'),
            DiscreteVariable('d1', values='abc'),
            DiscreteVariable('d2', values='def')
        ],
                             class_vars=[DiscreteVariable('d3', values='ghi')],
                             metas=[
                                 ContinuousVariable('c2'),
                                 DiscreteVariable('d4', values='jkl')
                             ])
        self.args = (self.domain, {
            'c1': Continuous,
            'd1': Discrete,
            'd2': Discrete,
            'd3': Discrete
        }, {
            'c2': Continuous,
            'd4': Discrete,
        })

        self.handler = SelectAttributesDomainContextHandler(first_match=False)
        self.handler.read_defaults = lambda: None
Exemplo n.º 31
0
    def test_mismatching_targets(self):
        warning = self.widget.Warning

        maj_iris = ConstantLearner()(self.iris)
        dom = self.iris.domain
        iris3 = self.iris.transform(Domain(dom[:3], dom[3]))
        maj_iris3 = ConstantLearner()(iris3)

        self.send_signal(self.widget.Inputs.predictors, maj_iris, 1)
        self.send_signal(self.widget.Inputs.predictors, maj_iris3, 2)
        self.assertFalse(warning.wrong_targets.is_shown())

        self.send_signal(self.widget.Inputs.data, self.iris)
        self.assertTrue(warning.wrong_targets.is_shown())

        self.send_signal(self.widget.Inputs.predictors, None, 2)
        self.assertFalse(warning.wrong_targets.is_shown())

        self.send_signal(self.widget.Inputs.predictors, maj_iris3, 2)
        self.assertTrue(warning.wrong_targets.is_shown())

        self.send_signal(self.widget.Inputs.data, None)
        self.assertFalse(warning.wrong_targets.is_shown())
Exemplo n.º 32
0
    def test_annotation_bool(self):
        """Check if bool labels remain bool"""
        a = ContinuousVariable("a")
        a.attributes["hidden"] = True
        d = Domain([a])
        t = Table.from_domain(d)

        self.send_signal(self.widget.Inputs.data, t)

        assert isinstance(self.widget, OWEditDomain)
        # select first variable
        idx = self.widget.domain_view.model().index(0)
        self.widget.domain_view.setCurrentIndex(idx)

        # change first attribute value
        editor = self.widget.findChild(ContinuousVariableEditor)
        assert isinstance(editor, ContinuousVariableEditor)
        idx = editor.labels_model.index(0, 1)
        editor.labels_model.setData(idx, "False", Qt.EditRole)

        self.widget.commit()
        t2 = self.get_output(self.widget.Outputs.data)
        self.assertFalse(t2.domain["a"].attributes["hidden"])
def create_coef_table(classifier):
    i = classifier.intercept
    c = classifier.coefficients
    if c.shape[0] > 2:
        values = [
            classifier.domain.class_var.values[int(i)]
            for i in classifier.used_vals[0]
        ]
    else:
        values = [
            classifier.domain.class_var.values[int(classifier.used_vals[0][1])]
        ]
    domain = Domain(
        [ContinuousVariable(value, number_of_decimals=7) for value in values],
        metas=[StringVariable("name")],
    )
    coefs = np.vstack((i.reshape(1, len(i)), c.T))
    names = [[attr.name] for attr in classifier.domain.attributes]
    names = [["intercept"]] + names
    names = np.array(names, dtype=object)
    coef_table = Table.from_numpy(domain, X=coefs, metas=names)
    coef_table.name = "coefficients"
    return coef_table
Exemplo n.º 34
0
    def test_vizrank_class_nan(self):
        """
        When class values are nan, vizrank should be disabled. It should behave like
        the class column is missing.
        GH-2757
        """
        def assert_vizrank_enabled(data, is_enabled):
            self.send_signal(self.widget.Inputs.data, data)
            self.assertEqual(is_enabled,
                             self.widget.vizrank_button.isEnabled())

        data1 = Table("iris")[::30]
        data2 = Table("iris")[::30]
        data2.Y[:] = np.nan
        domain = Domain(attributes=data2.domain.attributes[:4],
                        class_vars=DiscreteVariable("iris", values=[]))
        data2 = Table(domain, data2.X, Y=data2.Y)
        data3 = Table("iris")[::30]
        data3.Y[:] = np.nan

        for data, is_enabled in zip([data1, data2, data1, data3, data1],
                                    [True, False, True, False, True]):
            assert_vizrank_enabled(data, is_enabled)
Exemplo n.º 35
0
    def test_set_data_no_class(self):
        """Widget is properly set up when there is no class"""
        widget = self.widget
        var_model = widget.controls.var.model()
        cvar_model = widget.controls.cvar.model()
        iris = self.iris

        domain = Domain(iris.domain.attributes + iris.domain.class_vars)
        data = iris.transform(domain)
        self.send_signal(widget.Inputs.data, data)
        self.assertEqual({var.name
                          for var in var_model},
                         {var.name
                          for var in domain.attributes})
        self.assertEqual(list(cvar_model),
                         [None, DomainModel.Separator, iris.domain.class_var])
        self.assertIs(widget.var, domain[0])
        self.assertIs(widget.cvar, None)
        np.testing.assert_equal(widget.valid_data, self.iris.X[:, 0])
        self.assertIsNone(widget.valid_group_data)
        self.assertIsNotNone(self.get_output(widget.Outputs.histogram_data))
        self.assertIsNotNone(self.get_output(widget.Outputs.annotated_data))
        self.assertIsNone(self.get_output(widget.Outputs.selected_data))
Exemplo n.º 36
0
    def send_coefficients(self):
        """
        Function sends coefficients on widget's output if model has them
        """

        if (self.model is not None
                and isinstance(self.learner, LogisticRegressionLearner)
                and hasattr(self.model, 'skl_model')):
            model = self.model.skl_model
            domain = Domain([ContinuousVariable("coef", number_of_decimals=7)],
                            metas=[StringVariable("name")])
            coefficients = (model.intercept_.tolist() +
                            model.coef_[0].tolist())

            data = self.model.instances
            for preprocessor in self.learner.preprocessors:
                data = preprocessor(data)
            names = ["Intercept"] + [x.name for x in data.domain.attributes]

            coefficients_table = Table(domain, list(zip(coefficients, names)))
            self.Outputs.coefficients.send(coefficients_table)
        else:
            self.Outputs.coefficients.send(None)
Exemplo n.º 37
0
    def compute_distances(self):
        self.Error.diff_domains.clear()
        if not self.data or not self.reference:
            self.distances = None
            return
        if set(self.reference.domain.attributes) != \
                set(self.data.domain.attributes):
            self.Error.diff_domains()
            self.distances = None
            return

        metric = METRICS[self.distance_index][1]
        n_ref = len(self.reference)

        # comparing only attributes, no metas and class-vars
        new_domain = Domain(self.data.domain.attributes)
        reference = self.reference.transform(new_domain)
        data = self.data.transform(new_domain)

        all_data = Table.concatenate([reference, data], 0)
        pp_all_data = Impute()(RemoveNaNColumns()(all_data))
        pp_reference, pp_data = pp_all_data[:n_ref], pp_all_data[n_ref:]
        self.distances = metric(pp_data, pp_reference).min(axis=1)
Exemplo n.º 38
0
 def test_random(self):
     nrows, ncols = 1000, 5
     x = np.random.randint(-20, 51, (nrows, ncols))
     y = np.random.randint(-2, 3, (nrows, 1))
     x1, x2 = np.split(x, 2)
     y1, y2 = np.split(y, 2)
     attr = (
         ContinuousVariable("Feature 1"),
         ContinuousVariable("Feature 2"),
         ContinuousVariable("Feature 3"),
         ContinuousVariable("Feature 4"),
         ContinuousVariable("Feature 5"),
     )
     class_vars = (DiscreteVariable("Target 1"),)
     domain = Domain(attr, class_vars)
     t = Table(domain, x1, y1)
     lrn = KNNLearner()
     clf = lrn(t)
     z = clf(x2)
     correct = z == y2.flatten()
     ca = sum(correct) / len(correct)
     self.assertGreater(ca, 0.1)
     self.assertLess(ca, 0.3)
Exemplo n.º 39
0
 def test_XY_large(self):
     from Orange.data.sql.table import AUTO_DL_LIMIT as DLL
     mat = np.random.randint(0, 2, (DLL + 100, 3))
     conn, table_name = self.create_sql_table(mat)
     sql_table = SqlTable(conn,
                          table_name,
                          type_hints=Domain([],
                                            DiscreteVariable(
                                                name='col2',
                                                values=['0', '1', '2'])))
     self.assertRaises(ValueError, lambda: sql_table.X)
     self.assertRaises(ValueError, lambda: sql_table.Y)
     with self.assertRaises(ValueError):
         sql_table.download_data(DLL + 10)
     # Download partial data
     sql_table.download_data(DLL + 10, partial=True)
     assert_almost_equal(sql_table.X, mat[:DLL + 10, :2])
     assert_almost_equal(sql_table.Y.flatten()[:DLL + 10], mat[:DLL + 10,
                                                               2])
     # Download all data
     sql_table.download_data()
     assert_almost_equal(sql_table.X, mat[:, :2])
     assert_almost_equal(sql_table.Y.flatten(), mat[:, 2])
Exemplo n.º 40
0
    def __call__(self, data, feature=None):
        if not data.domain.class_var:
            raise ValueError("{} requires data with a target variable.".format(
                self.friendly_name))
        if not isinstance(data.domain.class_var, self.class_type):
            raise ValueError("{} requires a {} target variable.".format(
                self.friendly_name,
                self._friendly_vartype_name(self.class_type)))

        if feature is not None:
            f = data.domain[feature]
            data = data.transform(Domain([f], data.domain.class_vars))

        for pp in self.preprocessors:
            data = pp(data)

        for var in data.domain.attributes:
            if not isinstance(var, self.feature_type):
                raise ValueError("{} cannot score {} variables.".format(
                    self.friendly_name,
                    self._friendly_vartype_name(type(var))))

        return self.score_data(data, feature)
Exemplo n.º 41
0
    def _score_heuristic(self):
        def normalized(a):
            span = np.max(a, axis=0) - np.min(a, axis=0)
            span[span == 0] = 1
            return (a - np.mean(a, axis=0)) / span

        domain = self.master.data.domain
        attr_color = self.master.attr_color
        domain = Domain(
            attributes=[
                v for v in chain(domain.variables, domain.metas)
                if v.is_continuous and v is not attr_color
            ],
            class_vars=attr_color,
        )
        data = self.master.data.transform(domain).copy()
        with data.unlocked():
            data.X = normalized(data.X)
        relief = ReliefF if attr_color.is_discrete else RReliefF
        weights = relief(n_iterations=100, k_nearest=self.minK)(data)
        results = sorted(zip(weights, domain.attributes),
                         key=lambda x: (-x[0], x[1].name))
        return [attr for _, attr in results]
Exemplo n.º 42
0
 def test_NaiveBayes(self):
     table = SqlTable(dict(host='localhost', database='test'),
                      'iris',
                      type_hints=Domain([],
                                        DiscreteVariable(
                                            "iris",
                                            values=[
                                                'Iris-setosa',
                                                'Iris-virginica',
                                                'Iris-versicolor'
                                            ])))
     table = DiscretizeTable(table)
     bayes = nb.NaiveBayesLearner()
     clf = bayes(table)
     # Single instance prediction
     self.assertEqual(clf(table[0]), table[0].get_class())
     # Table prediction
     pred = clf(table)
     actual = array([ins.get_class() for ins in table])
     ca = pred == actual
     ca = ca.sum() / len(ca)
     self.assertGreater(ca, 0.95)
     self.assertLess(ca, 1.)
    def test_fix_values(self, msgbox):
        w = self.widget

        msgbox.ApplyRole, msgbox.RejectRole = object(), object()
        msgbox.return_value = Mock()
        dlgexec = msgbox.return_value.exec = Mock()

        v = [DiscreteVariable(name, values=tuple("abc"))
             for name in ("ana", "berta", "cilka")]
        domain = Domain(v, [])
        self.send_signal(w.Inputs.data, Table.from_numpy(domain, [[0, 1, 2]]))

        w.descriptors = [StringDescriptor(
            "y", "ana.value + berta.value + cilka.value")]

        # Reject fixing - no changes
        dlgexec.return_value=msgbox.RejectRole
        w.fix_expressions()
        self.assertEqual(w.descriptors[0].expression,
                         "ana.value + berta.value + cilka.value")

        dlgexec.return_value = Mock(return_value=msgbox.AcceptRole)

        w.fix_expressions()
        self.assertEqual(w.descriptors[0].expression, "ana + berta + cilka")

        w.descriptors = [StringDescriptor(
            "y", "ana.value + dani.value + cilka.value")]
        with patch.object(w, "apply"):  # dani doesn't exist and will fail
            w.fix_expressions()
        self.assertEqual(w.descriptors[0].expression,
                         "ana + dani.value + cilka")

        w.descriptors = [ContinuousDescriptor("y", "sqrt(berta)", 1)]
        w.fix_expressions()
        self.assertEqual(w.descriptors[0].expression,
                         "sqrt({'a': 0, 'b': 1, 'c': 2}[berta])")
Exemplo n.º 44
0
    def __call__(self, data):
        """
        Removes unused features or classes from the given data. Returns a new
        data table.

        Parameters
        ----------
        data : Orange.data.Table
            A data table to remove features or classes from.

        Returns
        -------
        data : Orange.data.Table
            New data table.
        """
        if data is None:
            return None

        domain = data.domain
        attrs_state = [
            purge_var_M(var, data, self.attr_flags)
            for var in domain.attributes
        ]
        class_state = [
            purge_var_M(var, data, self.class_flags)
            for var in domain.class_vars
        ]
        metas_state = [
            purge_var_M(var, data, self.meta_flags) for var in domain.metas
        ]

        att_vars, self.attr_results = self.get_vars_and_results(attrs_state)
        cls_vars, self.class_results = self.get_vars_and_results(class_state)
        meta_vars, self.meta_results = self.get_vars_and_results(metas_state)

        domain = Domain(att_vars, cls_vars, meta_vars)
        return data.transform(domain)
Exemplo n.º 45
0
    def recompute_heatmap(self, points):
        if self.model is None or self.data is None:
            self.exposeObject('model_predictions', {})
            self.evalJS('draw_heatmap()')
            return

        latlons = np.array(points)
        table = Table(Domain([self.lat_attr, self.lon_attr]), latlons)
        try:
            predictions = self.model(table)
        except Exception as e:
            self._owwidget.Error.model_error(e)
            return
        else:
            self._owwidget.Error.model_error.clear()

        class_var = self.model.domain.class_var
        is_regression = class_var.is_continuous
        if is_regression:
            predictions = scale(np.round(predictions, 7))  # Avoid small errors
            kwargs = dict(extrema=self._legend_values(
                class_var, [np.nanmin(predictions),
                            np.nanmax(predictions)]))
        else:
            colorgen = ColorPaletteGenerator(len(class_var.values),
                                             class_var.colors)
            predictions = colorgen.getRGB(predictions)
            kwargs = dict(legend_labels=self._legend_values(
                class_var, range(len(class_var.values))),
                          full_labels=list(class_var.values),
                          colors=[
                              color_to_hex(colorgen.getRGB(i))
                              for i in range(len(class_var.values))
                          ])
        self.exposeObject('model_predictions', dict(data=predictions,
                                                    **kwargs))
        self.evalJS('draw_heatmap()')
Exemplo n.º 46
0
    def compute(self):
        fileName = xoppy_calc_und_power_density(
            ELECTRONENERGY=self.ELECTRONENERGY,
            ELECTRONENERGYSPREAD=self.ELECTRONENERGYSPREAD,
            ELECTRONCURRENT=self.ELECTRONCURRENT,
            ELECTRONBEAMSIZEH=self.ELECTRONBEAMSIZEH,
            ELECTRONBEAMSIZEV=self.ELECTRONBEAMSIZEV,
            ELECTRONBEAMDIVERGENCEH=self.ELECTRONBEAMDIVERGENCEH,
            ELECTRONBEAMDIVERGENCEV=self.ELECTRONBEAMDIVERGENCEV,
            PERIODID=self.PERIODID,
            NPERIODS=self.NPERIODS,
            KV=self.KV,
            DISTANCE=self.DISTANCE,
            GAPH=self.GAPH,
            GAPV=self.GAPV,
            HSLITPOINTS=self.HSLITPOINTS,
            VSLITPOINTS=self.VSLITPOINTS,
            METHOD=self.METHOD)
        #send specfile
        self.send("xoppy_specfile", fileName)

        print("Loading file:  ", fileName)
        #load spec file with one scan, # is comment
        out = np.loadtxt(fileName)
        print("data shape: ", out.shape)
        #get labels
        txt = open(fileName).readlines()
        tmp = [line.find("#L") for line in txt]
        itmp = np.where(np.array(tmp) != (-1))
        labels = txt[itmp[0]].replace("#L ", "").split("  ")
        print("data labels: ", labels)
        #
        # build and send orange table
        #
        domain = Domain([ContinuousVariable(i) for i in labels])
        table = Table.from_numpy(domain, out)
        self.send("xoppy_table", table)
Exemplo n.º 47
0
def finance_data(symbol, since=None, until=None, granularity='d'):
    """Fetch Yahoo Finance data for stock or index `symbol` within the period
    after `since` and before `until` (both inclusive).

    Parameters
    ----------
    symbol: str
        A stock or index symbol, as supported by Yahoo Finance.
    since: date
        A start date (default: 1900-01-01).
    until: date
        An end date (default: today).
    granularity: 'd' or 'w' or 'm' or 'v'
        What data to get: daily, weekly, monthly, or dividends.

    Returns
    -------
    data : Timeseries
    """
    if since is None:
        since = date(1900, 1, 1)
    if until is None:
        until = date.today()

    f = web.DataReader(symbol, 'yahoo', since, until)
    data = Timeseries.from_data_table(table_from_frame(f))

    # Make Adjusted Close a class variable
    attrs = [var.name for var in data.domain.attributes]
    attrs.remove('Adj Close')
    data = Timeseries.from_table(
        Domain(attrs, [data.domain['Adj Close']], None, source=data.domain),
        data)

    data.name = symbol
    data.time_variable = data.domain['Date']
    return data
Exemplo n.º 48
0
def _corpus_from_records(records, includes_metadata):
    """Receives PubMed records and transforms them into a corpus.

    Args:
        records (list): A list of PubMed entries.
        includes_metadata (list): A list of text fields to include.

    Returns:
        corpus: The output Corpus.
    """
    meta_vars = []
    time_var = None
    for field_name, _ in includes_metadata:
        if field_name == PUBMED_FIELD_DATE:
            time_var = TimeVariable(field_name)
            meta_vars.append(time_var)
        else:
            meta_vars.append(StringVariable.make(field_name))
            if field_name == PUBMED_FIELD_TITLE:
                meta_vars[-1].attributes["title"] = True

    meta_values, class_values = _records_to_corpus_entries(
        records,
        includes_metadata=includes_metadata,
        time_var=time_var,
    )

    class_vars = [
        DiscreteVariable('section',
                         values=list(map(str, set(filter(None,
                                                         class_values)))))
    ]
    domain = Domain([], class_vars=class_vars, metas=meta_vars)

    Y = np.array([class_vars[0].to_val(cv) for cv in class_values])[:, None]

    return Corpus(domain=domain, Y=Y, metas=meta_values)
Exemplo n.º 49
0
    def commit(self):
        selected = annotated = components = None
        graph = self.graph
        if self.plotdata.data is not None:
            name = self.data.name
            data = self.plotdata.data
            mask = self.plotdata.valid_mask.astype(int)
            mask[mask == 1] = graph.selection if graph.selection is not None \
                else [False * len(mask)]
            selection = np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask)
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes
            if graph.selection is not None and np.max(graph.selection) > 1:
                annotated = create_groups_table(data, mask)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

            comp_domain = Domain(
                self.plotdata.points[:, 2],
                metas=[StringVariable(name='component')])

            metas = np.array([["RX"], ["RY"], ["angle"]])
            angle = np.arctan2(np.array(self.plotdata.points[:, 1].T, dtype=float),
                               np.array(self.plotdata.points[:, 0].T, dtype=float))
            components = Table.from_numpy(
                comp_domain,
                X=np.row_stack((self.plotdata.points[:, :2].T, angle)),
                metas=metas)
            components.name = name + ": components"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)
Exemplo n.º 50
0
        def set_actual_data():
            self.closeContext()
            self.Error.data_size_mismatch.clear()
            self.Warning.no_graph_found.clear()
            self._invalid_data = False
            if network is None:
                if self.node_data is not None:
                    self.Warning.no_graph_found()
                return
            n_nodes = len(self.network.nodes)
            if self.node_data is not None:
                if len(self.node_data) != n_nodes:
                    self.Error.data_size_mismatch()
                    self._invalid_data = True
                    self.data = None
                else:
                    self.data = self.node_data
            if self.node_data is None:
                if isinstance(network.nodes, Table):
                    self.data = network.nodes
                elif isinstance(network.nodes, np.ndarray) \
                        and (len(network.nodes.shape) == 1
                             or network.nodes.shape[1] == 1):
                    self.data = Table.from_numpy(
                        Domain([], None, [StringVariable("label")]),
                        np.zeros((len(network.nodes), 0)),
                        None,
                        metas=network.nodes.reshape((n_nodes, 1)))
                else:
                    self.data = None

            if self.data is not None:
                # Replicate the necessary parts of set_data
                self.valid_data = np.full(len(self.data), True, dtype=bool)
                self.init_attr_values()
                self.openContext(self.data)
                self.cb_class_density.setEnabled(self.can_draw_density())
Exemplo n.º 51
0
    def test_callbacks_called_on_value(self):
        widget = self.widget
        send = widget.Outputs.network.send = Mock()
        update = widget.update_output = Mock(side_effect=widget.update_output)

        self._set_graph(Table(Domain([self.c])))
        update.assert_called()
        update.reset_mock()
        send.assert_called()
        send.reset_mock()

        widget.connect_value = 1
        widget.controls.connect_value.activated[int].emit(1)
        update.assert_called()
        update.reset_mock()
        send.assert_called()
        send.reset_mock()

        widget.connector_value = 1
        widget.controls.connector_value.activated[int].emit(1)
        update.assert_called()
        update.reset_mock()
        send.assert_called()
        send.reset_mock()
Exemplo n.º 52
0
    def test_value_combo_updates(self):
        widget = self.widget
        widget.update_output = Mock()
        cb_kept = widget.controls.connect_value
        a, c = self.a, self.c

        self._set_graph(Table(Domain([a, c])))
        self.assertEqual(len(cb_kept), 2)
        widget.update_output.assert_called()
        widget.update_output.reset_mock()

        widget.variable = c
        widget.controls.variable.activated[int].emit(1)
        self.assertEqual(len(cb_kept), 4)
        widget.update_output.assert_called()
        widget.update_output.reset_mock()

        widget.connect_value = 3
        widget.variable = a
        widget.controls.variable.activated[int].emit(0)
        self.assertEqual(len(cb_kept), 2)
        self.assertEqual(widget.connect_value, 0)
        widget.update_output.assert_called()
        widget.update_output.reset_mock()
Exemplo n.º 53
0
    def test_file_not_found(self):
        # Create a dummy file
        file_name = "test_owfile_data.tab"
        domainA = Domain([DiscreteVariable("d1", values=("a", "b"))],
                         DiscreteVariable("c1", values=("aaa", "bbb")))
        dataA = Table(domainA, np.array([[0], [1], [0], [np.nan]]),
                      np.array([0, 1, 0, 1]))
        dataA.save(file_name)

        # Open the file with the widget
        self.open_dataset(file_name)
        self.assertEqual(self.get_output(self.widget.Outputs.data).domain, dataA.domain)

        # Delete the file and try to reload it
        remove(file_name)
        self.widget.load_data()
        self.assertEqual(file_name, path.basename(self.widget.last_path()))
        self.assertTrue(self.widget.Error.file_not_found.is_shown())
        self.assertIsNone(self.get_output(self.widget.Outputs.data))
        self.assertEqual(self.widget.infolabel.text(), "No data.")

        # Open a sample dataset
        self.open_dataset("iris")
        self.assertFalse(self.widget.Error.file_not_found.is_shown())
Exemplo n.º 54
0
    def test_no_values_target(self):
        train = Table("titanic")
        model = ConstantLearner()(train)
        self.send_signal(self.widget.Inputs.predictors, model)
        domain = Domain([
            DiscreteVariable("status", values=["first", "third"]),
            DiscreteVariable("age", values=["adult", "child"]),
            DiscreteVariable("sex", values=["female", "male"])
        ], [DiscreteVariable("survived", values=[])])
        test = Table(domain, np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]]),
                     np.full((3, 1), np.nan))
        self.send_signal(self.widget.Inputs.data, test)
        pred = self.get_output(self.widget.Outputs.predictions)
        self.assertEqual(len(pred), len(test))

        results = self.get_output(self.widget.Outputs.evaluation_results)

        cm_widget = self.create_widget(OWConfusionMatrix)
        self.send_signal(cm_widget.Inputs.evaluation_results,
                         results,
                         widget=cm_widget)

        ra_widget = self.create_widget(OWROCAnalysis)
        self.send_signal(ra_widget.Inputs.evaluation_results,
                         results,
                         widget=ra_widget)

        lc_widget = self.create_widget(OWLiftCurve)
        self.send_signal(lc_widget.Inputs.evaluation_results,
                         results,
                         widget=lc_widget)

        cp_widget = self.create_widget(OWCalibrationPlot)
        self.send_signal(cp_widget.Inputs.evaluation_results,
                         results,
                         widget=cp_widget)
Exemplo n.º 55
0
    def test_result(self):
        pp = PreprocessorList([BASE_TRANSFORMER, RegexpTokenizer()])
        corpus = pp(Corpus.from_file("book-excerpts")[::3])
        vect = BowVectorizer()
        corpus_vect = vect.transform(corpus)

        words = ["beheld", "events", "dragged", "basin", "visit", "have"]
        d = Domain([corpus_vect.domain[w] for w in words])
        corpus_vect = corpus_vect.transform(d)

        self.send_signal(self.widget.Inputs.data, corpus_vect)
        self.send_signal(self.widget.Inputs.selected_data, corpus_vect[:1])
        self.wait_until_finished(timeout=100000)

        np.testing.assert_array_almost_equal(
            self.widget.results.p_values,
            [0.02128, 1, 0.04255, 0.06383, 0.08511, 0.97872],
            decimal=5,
        )
        np.testing.assert_array_almost_equal(
            self.widget.results.fdr_values,
            [0.12766, 1, 0.12766, 0.12766, 0.12766, 1],
            decimal=5,
        )
Exemplo n.º 56
0
    def test_empty_data(self):
        """No crash on empty data"""
        data = Table("iris")[::3]
        widget = self.widget
        widget.default_method_index = Method.Model

        self.send_signal(self.widget.Inputs.data, data, wait=1000)
        imp_data = self.get_output(self.widget.Outputs.data)
        np.testing.assert_equal(imp_data.X, data.X)
        np.testing.assert_equal(imp_data.Y, data.Y)

        self.send_signal(self.widget.Inputs.data,
                         Table.from_domain(data.domain),
                         wait=1000)
        imp_data = self.get_output(self.widget.Outputs.data)
        self.assertEqual(len(imp_data), 0)

        # only meta columns
        data = data.transform(Domain([], [], data.domain.attributes))
        self.send_signal("Data", data, wait=1000)
        imp_data = self.get_output("Data")
        self.assertEqual(len(imp_data), len(data))
        self.assertEqual(imp_data.domain, data.domain)
        np.testing.assert_equal(imp_data.metas, data.metas)
Exemplo n.º 57
0
 def prepare_data():
     if len(cont_attrs) < len(attrs):
         self.Warning.ignoring_disc_variables()
     if len(cont_attrs) == 1:
         self.Warning.single_attribute()
     x = Table.from_table(Domain(cont_attrs), data).X
     if sp.issparse(x):
         self.data = data
         self.cont_x = x.tocsr()
     else:
         mask = np.all(np.isfinite(x), axis=1)
         if not np.any(mask):
             self.Error.no_defined_rows()
         else:
             if np.all(mask):
                 self.data = data
                 self.cont_x = x.copy()
             else:
                 self.data = data[mask]
                 self.cont_x = x[mask]
             self.cont_x -= np.min(self.cont_x, axis=0)[None, :]
             sums = np.sum(self.cont_x, axis=0)[None, :]
             sums[sums == 0] = 1
             self.cont_x /= sums
    def test_select_data_discrete(self):
        """
        Test select data function
        """
        w = self.widget

        # test with data set for logistic regression - class discrete
        domain = Domain([ContinuousVariable('a'),
                         ContinuousVariable('b')],
                        DiscreteVariable('c', values=['a', 'b']))
        data = Table(domain, [[1, 2], [1, 2]], [0, 1])

        self.send_signal(w.Inputs.data, data)
        self.assertEqual(len(w.select_data()), len(data))
        self.assertEqual(len(w.select_data().domain.attributes), 2)
        self.assertEqual(len(w.select_data().domain.class_var.values), 2)
        self.assertEqual(w.select_data().domain.class_var.values[1],
                         data.domain.class_var.values[1])
        self.assertEqual(w.select_data().domain.class_var.values[0],
                         data.domain.class_var.values[0])
        self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x)
        self.assertEqual(w.select_data().domain.attributes[1].name, w.attr_y)
        self.assertEqual(w.select_data().domain.class_var.values[0],
                         w.target_class)
Exemplo n.º 59
0
    def test_index(self):
        d = Domain((age, gender, income), metas=(ssn, race))
        self.assertEqual(d.index(age), 0)
        self.assertEqual(d.index("AGE"), 0)
        self.assertEqual(d.index(0), 0)
        self.assertEqual(d.index(np.int_(0)), 0)

        self.assertEqual(d.index(income), 2)
        self.assertEqual(d.index("income"), 2)
        self.assertEqual(d.index(2), 2)
        self.assertEqual(d.index(np.int_(2)), 2)

        self.assertEqual(d.index(ssn), -1)
        self.assertEqual(d.index("SSN"), -1)
        self.assertEqual(d.index(-1), -1)
        self.assertEqual(d.index(np.int_(-1)), -1)

        self.assertEqual(d.index(-2), -2)
        self.assertEqual(d.index(np.int_(-2)), -2)
Exemplo n.º 60
0
 def test_index_error(self):
     d = Domain((age, gender, income), metas=(ssn, race))
     with self.assertRaises(ValueError):
         d.index(3)
     with self.assertRaises(ValueError):
         d.index(np.int_(3))
     with self.assertRaises(ValueError):
         d.index(-3)
     with self.assertRaises(ValueError):
         d.index(np.int_(-3))
     with self.assertRaises(ValueError):
         d.index(incomeA)
     with self.assertRaises(ValueError):
         d.index("no_such_thing")
     with self.assertRaises(TypeError):
         d.index([2])