Exemplo n.º 1
0
def get_nearest_cluster (clusters, point):
    """ Retrieves the closest cluster of a point. """

    # Initially the result is set to the first neuron.
    nearest_cluster = clusters[0]
    best_distance = distance.euclidian(clusters[0], point)

    # We look for a closer neuron in the neuron list.
    for i in xrange(1, len(clusters)):
        tmp_distance = distance.euclidian(clusters[i], point)
        if tmp_distance < best_distance:
            nearest_cluster = clusters[i]
            best_distance = tmp_distance

    return nearest_cluster
Exemplo n.º 2
0
    def get_nearest_cluster(self, point):
        """ Retrieves the closest cluster of a point. """

        # Initially the result is set to the first neuron.
        nearest_cluster = self._clusters[0]
        best_distance = distance.euclidian(self._clusters[0], point)

        # We look for a closer neuron in the neuron list.
        for i in xrange(1, len(self._clusters)):
            tmp_distance = distance.euclidian(self._clusters[i], point)
            if tmp_distance < best_distance:
                nearest_cluster = self._clusters[i]
                best_distance = tmp_distance

        return nearest_cluster
Exemplo n.º 3
0
    def class_probability (self, point, c):
        distances = []

        # Compute distances with all dataset
        for i in xrange (len (self.__data)):
            dist = distance.euclidian (self.__data[i], point)
            distances.append ((dist, self.__labels[i]))

        # Keep only K nearest points
        distances.sort (dist_cmp)
        distances = distances[:self.k]
        # Compute the most frequent label in neighbours
        labels_freq = {}
        freq_tot = 0.0
        ref_freq = 0.0
        for i in xrange (len (distances)):
            freq_tot += 1
            if distances[i][1] == c:
                ref_freq += 1
            if not labels_freq.has_key (distances[i][1]):
                labels_freq[distances[i][1]] = 0
            else:
                labels_freq[distances[i][1]] += 1
        i = 0
        for label in labels_freq:
            if i == 0:
                label_max = label
                freq_max = labels_freq[label]
                i += 1
            else:
                if labels_freq[label] > freq_max:
                    label_max = label
                    freq_max = labels_freq[label]

        return ref_freq / freq_tot
Exemplo n.º 4
0
    def class_probability(self, point, c):
        distances = []

        # Compute distances with all dataset
        for i in xrange(len(self.__data)):
            dist = distance.euclidian(self.__data[i], point)
            distances.append((dist, self.__labels[i]))

        # Keep only K nearest points
        distances.sort(dist_cmp)
        distances = distances[: self.k]
        # Compute the most frequent label in neighbours
        labels_freq = {}
        freq_tot = 0.0
        ref_freq = 0.0
        for i in xrange(len(distances)):
            freq_tot += 1
            if distances[i][1] == c:
                ref_freq += 1
            if not labels_freq.has_key(distances[i][1]):
                labels_freq[distances[i][1]] = 0
            else:
                labels_freq[distances[i][1]] += 1
        i = 0
        for label in labels_freq:
            if i == 0:
                label_max = label
                freq_max = labels_freq[label]
                i += 1
            else:
                if labels_freq[label] > freq_max:
                    label_max = label
                    freq_max = labels_freq[label]

        return ref_freq / freq_tot
Exemplo n.º 5
0
def clusterize(neurons, data_dimension, clusters, dataset, nb_clusters=0):
    # Each neuron is put in its own cluster.
    for neuron in neurons:
        c = cluster.Cluster(data_dimension)
        c.add_neuron(neuron)
        c.compute_average()
        clusters.append(c)

    # Every element from the dataset is classified and the corresponding
    # cluster stores its label.
    for z in dataset:
        nearest_cluster = clusters[0]
        best_distance = distance.euclidian(clusters[0], z.data())
        for i in xrange(1, len(clusters)):
            tmp_distance = distance.euclidian(clusters[i], z.data())
            if tmp_distance < best_distance:
                nearest_cluster = clusters[i]
                best_distance = tmp_distance
        nearest_cluster.add_label(z.labels())
        nearest_cluster._neurons[0]._labels.append(z.labels())

    # Here we reduce the number of cluster to the one specified. To do
    # that we look for the two closest clusters and we merge them until
    #  we have the good number of clusters.
    if nb_clusters > 0:
        while len(clusters) > nb_clusters:
            best_distance = clusters[0].distance_with(clusters[1].get_center())
            cluster1 = 0
            cluster2 = 1
            for i in xrange(len(clusters)):
                for j in xrange(len(clusters)):
                    if i != j:
                        tmp_distance = clusters[i].distance_with(
                            clusters[j].get_center())
                        if tmp_distance < best_distance:
                            cluster1 = i
                            cluster2 = j
                            best_distance = tmp_distance
            clusters[i].merge_with(clusters[j])
            clusters.pop(j)
Exemplo n.º 6
0
def clusterize (neurons, data_dimension, clusters, dataset, nb_clusters=0):
    # Each neuron is put in its own cluster.
    for neuron in neurons:
        c = cluster.Cluster(data_dimension)
        c.add_neuron(neuron)
        c.compute_average()
        clusters.append(c)

    # Every element from the dataset is classified and the corresponding
    # cluster stores its label.
    for z in dataset:
        nearest_cluster = clusters[0]
        best_distance = distance.euclidian(clusters[0], z.data())
        for i in xrange(1, len(clusters)):
            tmp_distance = distance.euclidian(clusters[i], z.data())
            if tmp_distance < best_distance:
                nearest_cluster = clusters[i]
                best_distance = tmp_distance
        nearest_cluster.add_label(z.labels())
        nearest_cluster._neurons[0]._labels.append(z.labels())

    # Here we reduce the number of cluster to the one specified. To do
    # that we look for the two closest clusters and we merge them until
    #  we have the good number of clusters.
    if nb_clusters > 0:
        while len(clusters) > nb_clusters:
            best_distance = clusters[0].distance_with(clusters[1].get_center())
            cluster1 = 0
            cluster2 = 1
            for i in xrange(len(clusters)):
                for j in xrange(len(clusters)):
                    if i != j:
                        tmp_distance = clusters[i].distance_with(clusters[j].get_center())
                        if tmp_distance < best_distance:
                            cluster1 = i
                            cluster2 = j
                            best_distance = tmp_distance
            clusters[i].merge_with(clusters[j])
            clusters.pop(j)
Exemplo n.º 7
0
    def learn (self):
        if self._verbose:
            self.draw_all()
        # Parameters initialization
        neurons = self._neurons
        sigma_init = distance.euclidian(neurons[len(neurons) / 2].get_position(), \
                neurons[len(neurons) - 1].get_position(), \
                dimension=len(neurons[0].get_position()))
        t1 = 1000 / math.log(sigma_init)
        nu0 = 0.1
        t2 = 1000

        nu = lambda n: nu0 * math.exp(-n / t2)

        iter = 1
        data = self._dataset.data()

        while (iter <= ITERATION_MAX):
            z = data[int(random.random() * len(data))]

            sigma = sigma_init * math.exp(-iter / t1)

            # Get nearest neuron for observation z
            nearest_neuron = neurons[0]
            best_distance = distance.euclidian(neurons[0], z)
            for i in xrange(1, len(neurons)):
                tmp_distance = distance.euclidian(neurons[i], z)
                if tmp_distance < best_distance:
                    nearest_neuron = neurons[i]
                    best_distance = tmp_distance

            if (iter % len(neurons) == 0):
                changed_neurons = 0
            else:
                changed_neurons = None
            for neuron in neurons:
                d = distance.euclidian(neuron.get_position(), \
                        nearest_neuron.get_position(),         \
                        dimension=len(neuron.get_position()))
                h = gaussian_kernel(d, sigma)
                dmoved = 0
                for i in xrange(len(neuron)):
                    step = nu(iter) * h * (z[i] - neuron[i])
                    neuron[i] += step
                    dmoved += abs(step)

                if not changed_neurons is None:
                    if dmoved > 0:
                        changed_neurons += 1

            if not changed_neurons is None:
                perc = float(changed_neurons) / len(neurons) * 100
                if perc <= PERC_NEURONS_CHANGED:
                    break

            if iter % 1000 == 0 and self._verbose:
                print "%f%% of neurons moved at last check" % perc
                print "iteration %i" % iter
                self.draw_all()

            iter += 1

        if self._verbose:
            print "%i%% of neurons moved at last check" % int(perc)
            self.draw_all()

        clusterize(self._neurons, self._data_dimension, self._clusters, self._dataset, self._nb_clusters)

        return iter - 1
Exemplo n.º 8
0
    def learn(self):
        if self._verbose:
            self.draw_all()
        # Parameters initialization
        neurons = self._neurons
        sigma_init = distance.euclidian(neurons[len(neurons) / 2].get_position(), \
                neurons[len(neurons) - 1].get_position(), \
                dimension=len(neurons[0].get_position()))
        t1 = 1000 / math.log(sigma_init)
        nu0 = 0.1
        t2 = 1000

        nu = lambda n: nu0 * math.exp(-n / t2)

        iter = 1
        data = self._dataset.data()

        while (iter <= ITERATION_MAX):
            z = data[int(random.random() * len(data))]

            sigma = sigma_init * math.exp(-iter / t1)

            # Get nearest neuron for observation z
            nearest_neuron = neurons[0]
            best_distance = distance.euclidian(neurons[0], z)
            for i in xrange(1, len(neurons)):
                tmp_distance = distance.euclidian(neurons[i], z)
                if tmp_distance < best_distance:
                    nearest_neuron = neurons[i]
                    best_distance = tmp_distance

            if (iter % len(neurons) == 0):
                changed_neurons = 0
            else:
                changed_neurons = None
            for neuron in neurons:
                d = distance.euclidian(neuron.get_position(), \
                        nearest_neuron.get_position(),         \
                        dimension=len(neuron.get_position()))
                h = gaussian_kernel(d, sigma)
                dmoved = 0
                for i in xrange(len(neuron)):
                    step = nu(iter) * h * (z[i] - neuron[i])
                    neuron[i] += step
                    dmoved += abs(step)

                if not changed_neurons is None:
                    if dmoved > 0:
                        changed_neurons += 1

            if not changed_neurons is None:
                perc = float(changed_neurons) / len(neurons) * 100
                if perc <= PERC_NEURONS_CHANGED:
                    break

            if iter % 1000 == 0 and self._verbose:
                print "%f%% of neurons moved at last check" % perc
                print "iteration %i" % iter
                self.draw_all()

            iter += 1

        if self._verbose:
            print "%i%% of neurons moved at last check" % int(perc)
            self.draw_all()

        clusterize(self._neurons, self._data_dimension, self._clusters,
                   self._dataset, self._nb_clusters)

        return iter - 1