Exemplo n.º 1
0
    def test_matching_dissim(self):
        a = np.array([[0, 1, 2, 0, 1, 2]])
        b = np.array([[0, 1, 2, 0, 1, 0]])
        assert_equal(1, matching_dissim(a, b))

        a = np.array([[np.NaN, 1, 2, 0, 1, 2]])
        b = np.array([[0, 1, 2, 0, 1, 0]])
        assert_equal(2, matching_dissim(a, b))

        a = np.array([['a', 'b', 'c', 'd']])
        b = np.array([['a', 'b', 'c', 'd'], ['d', 'c', 'b', 'a']])
        assert_array_equal(np.array([0, 4]), matching_dissim(a, b))
Exemplo n.º 2
0
    def test_matching_dissim(self):
        a = np.array([[0, 1, 2, 0, 1, 2]])
        b = np.array([[0, 1, 2, 0, 1, 0]])
        assert_equal(1, matching_dissim(a, b))

        a = np.array([[np.NaN, 1, 2, 0, 1, 2]])
        b = np.array([[0, 1, 2, 0, 1, 0]])
        assert_equal(2, matching_dissim(a, b))

        a = np.array([['a', 'b', 'c', 'd']])
        b = np.array([['a', 'b', 'c', 'd'], ['d', 'c', 'b', 'a']])
        assert_array_equal(np.array([0, 4]), matching_dissim(a, b))
Exemplo n.º 3
0
    def _get_initial_centers(self, dataset, categorical_indices):
        dataset_cat = dataset.take(categorical_indices, axis=1).values
        categorical_labels = [
            column for index, column in enumerate(dataset.columns)
            if index in categorical_indices
        ]
        dataset_num = dataset.drop(categorical_labels, axis=1).values

        categorical_weight = self.categorical_weight
        if categorical_weight is None or categorical_weight < 0:
            categorical_weight = 0.5 * dataset_num.std()
        initial_centroids_num = np.zeros(
            (self.cluster_number, dataset_num.shape[1]))
        initial_centroids_cat = np.zeros(
            (self.cluster_number, dataset_cat.shape[1]))
        rand_index = randint(0, dataset.shape[0] - 1)
        initial_centroids_num[0], initial_centroids_cat[0] = dataset_num[
            rand_index], dataset_cat[rand_index]

        for i in range(1, self.cluster_number):
            distances_num_cat = [
                np.zeros((i, dataset.shape[0]), dtype=np.float64),
                np.zeros((i, dataset.shape[0]))
            ]
            for j in range(0, i):
                distances_num_cat[0][j] = dissimilarity_python.euclidean(
                    dataset_num, initial_centroids_num[j])
                distances_num_cat[1][j] = matching_dissim(
                    dataset_cat, initial_centroids_cat[j])
            distances = np.amin(distances_num_cat[0] +
                                categorical_weight * distances_num_cat[1],
                                axis=0)
            probabilities = distances / np.sum(distances)
            chosen_point = np.random.choice(range(0, dataset.shape[0]),
                                            p=probabilities)
            initial_centroids_num[i] = dataset_num[chosen_point]
            initial_centroids_cat[i] = dataset_cat[chosen_point]

        initial_centroids = [initial_centroids_num, initial_centroids_cat]
        return initial_centroids
Exemplo n.º 4
0
    def test_ng_dissim(self):
        X = np.array([[0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 1]])
        centroids = X
        membship = np.array([[1, 0], [0, 1]])

        assert_array_equal(np.array([0., 1.]),
                           ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([1., 0.]),
                           ng_dissim(centroids, X[1], X=X, membship=membship))

        # Unit test for initialization (i.e., same as matching_dissim)
        membship = np.array([[0, 0], [0, 0]])
        mdiss_00 = matching_dissim(np.array([X[0]]), np.array([X[0]]))[0]
        mdiss_01 = matching_dissim(np.array([X[0]]), np.array([X[1]]))[0]
        mdiss_11 = matching_dissim(np.array([X[1]]), np.array([X[1]]))[0]

        assert_array_equal(np.array([mdiss_00, mdiss_01]),
                           ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([mdiss_01, mdiss_11]),
                           ng_dissim(centroids, X[1], X=X, membship=membship))

        # Unit test for NaN
        X = np.array([[np.NaN, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 1]])
        centroids = X
        membship = np.array([[1, 0], [0, 1]])

        assert_array_equal(np.array([1., 2.]),
                           ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([2., 0.]),
                           ng_dissim(centroids, X[1], X=X, membship=membship))

        # Unit test for initialization with NaN(i.e., same as matching_dissim)
        membship = np.array([[0, 0], [0, 0]])
        mdiss_00 = matching_dissim(np.array([X[0]]), np.array([X[0]]))[0]
        mdiss_01 = matching_dissim(np.array([X[0]]), np.array([X[1]]))[0]
        mdiss_11 = matching_dissim(np.array([X[1]]), np.array([X[1]]))[0]

        assert_array_equal(np.array([mdiss_00, mdiss_01]),
                           ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([mdiss_01, mdiss_11]),
                           ng_dissim(centroids, X[1], X=X, membship=membship))

        X = np.array([['a', 'b', 'c', 'd'], ['a', 'b', 'e', 'd'],
                      ['d', 'c', 'b', 'a']])
        centroids = np.array([['a', 'b', 'c', 'd'], ['d', 'c', 'b', 'a']])
        membship = np.array([[1, 1, 0], [0, 0, 1]])

        assert_array_equal(np.array([0.5, 4.]),
                           ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([1., 4.]),
                           ng_dissim(centroids, X[1], X=X, membship=membship))
        assert_array_equal(np.array([4., 0.]),
                           ng_dissim(centroids, X[2], X=X, membship=membship))

        # Unit test for initialization (i.e., same as matching_dissim)
        membship = np.array([[0, 0, 0], [0, 0, 0]])
        mdiss_00 = matching_dissim(np.array([X[0]]), np.array([X[0]]))[0]
        mdiss_01 = matching_dissim(np.array([X[0]]), np.array([X[1]]))[0]
        mdiss_11 = matching_dissim(np.array([X[1]]), np.array([X[1]]))[0]
        mdiss_02 = matching_dissim(np.array([X[0]]), np.array([X[2]]))[0]
        mdiss_12 = matching_dissim(np.array([X[0]]), np.array([X[2]]))[0]
        mdiss_22 = matching_dissim(np.array([X[2]]), np.array([X[2]]))[0]

        assert_array_equal(np.array([mdiss_00, mdiss_02]),
                           ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([mdiss_01, mdiss_12]),
                           ng_dissim(centroids, X[1], X=X, membship=membship))
        assert_array_equal(np.array([mdiss_12, mdiss_22]),
                           ng_dissim(centroids, X[2], X=X, membship=membship))
Exemplo n.º 5
0
    def test_ng_dissim(self):
        X = np.array([[0, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 1]])
        centroids = X
        membship = np.array([[1, 0], [0, 1]])

        assert_array_equal(np.array([0., 1.]), ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([1., 0.]), ng_dissim(centroids, X[1], X=X, membship=membship))

        # Unit test for initialization (i.e., same as matching_dissim)
        membship = np.array([[0, 0], [0, 0]])
        mdiss_00 = matching_dissim(np.array([X[0]]), np.array([X[0]]))[0]
        mdiss_01 = matching_dissim(np.array([X[0]]), np.array([X[1]]))[0]
        mdiss_11 = matching_dissim(np.array([X[1]]), np.array([X[1]]))[0]

        assert_array_equal(np.array([mdiss_00, mdiss_01]), ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([mdiss_01, mdiss_11]), ng_dissim(centroids, X[1], X=X, membship=membship))

        # Unit test for NaN
        X = np.array([[np.NaN, 1, 2, 0, 1, 2], [0, 1, 2, 0, 1, 1]])
        centroids = X
        membship = np.array([[1, 0], [0, 1]])

        assert_array_equal(np.array([1., 2.]), ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([2., 0.]), ng_dissim(centroids, X[1], X=X, membship=membship))

        # Unit test for initialization with NaN(i.e., same as matching_dissim)
        membship = np.array([[0, 0], [0, 0]])
        mdiss_00 = matching_dissim(np.array([X[0]]), np.array([X[0]]))[0]
        mdiss_01 = matching_dissim(np.array([X[0]]), np.array([X[1]]))[0]
        mdiss_11 = matching_dissim(np.array([X[1]]), np.array([X[1]]))[0]

        assert_array_equal(np.array([mdiss_00, mdiss_01]), ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([mdiss_01, mdiss_11]), ng_dissim(centroids, X[1], X=X, membship=membship))

        X = np.array([['a', 'b', 'c', 'd'], ['a', 'b', 'e', 'd'], ['d', 'c', 'b', 'a']])
        centroids =  np.array([['a', 'b', 'c', 'd'], ['d', 'c', 'b', 'a']])
        membship = np.array([[1, 1, 0], [0, 0, 1]])

        assert_array_equal(np.array([0.5, 4.]), ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([1., 4.]), ng_dissim(centroids, X[1], X=X, membship=membship))
        assert_array_equal(np.array([4., 0.]), ng_dissim(centroids, X[2], X=X, membship=membship))

        # Unit test for initialization (i.e., same as matching_dissim)
        membship = np.array([[0, 0, 0], [0, 0, 0]])
        mdiss_00 = matching_dissim(np.array([X[0]]), np.array([X[0]]))[0]
        mdiss_01 = matching_dissim(np.array([X[0]]), np.array([X[1]]))[0]
        mdiss_11 = matching_dissim(np.array([X[1]]), np.array([X[1]]))[0]
        mdiss_02 = matching_dissim(np.array([X[0]]), np.array([X[2]]))[0]
        mdiss_12 = matching_dissim(np.array([X[0]]), np.array([X[2]]))[0]
        mdiss_22 = matching_dissim(np.array([X[2]]), np.array([X[2]]))[0]

        assert_array_equal(np.array([mdiss_00, mdiss_02]), ng_dissim(centroids, X[0], X=X, membship=membship))
        assert_array_equal(np.array([mdiss_01, mdiss_12]), ng_dissim(centroids, X[1], X=X, membship=membship))
        assert_array_equal(np.array([mdiss_12, mdiss_22]), ng_dissim(centroids, X[2], X=X, membship=membship))