def __init__(self, alphabet, size=20, mean=0.075, verbose=False): self.gng = GrowingNeuralGas(size, mean=mean, feature_length=len(alphabet), verbose=verbose) self.alphabet = alphabet self.run_iterations = 0 self.gng.lock(True)
def evaluate(): ds = pd.read_csv("Sales_Transactions_Dataset_Weekly.csv") data = ds[ ["Normalized 0", "Normalized 1", "Normalized 2", "Normalized 3", "Normalized 4", "Normalized 5", "Normalized 6", "Normalized 7", "Normalized 8", "Normalized 9", "Normalized 10", "Normalized 11", "Normalized 12", "Normalized 13", "Normalized 14", "Normalized 15", "Normalized 16", "Normalized 17", "Normalized 18", "Normalized 19", "Normalized 20", "Normalized 21", "Normalized 22", "Normalized 23", "Normalized 24", "Normalized 25", "Normalized 26", "Normalized 27", "Normalized 28", "Normalized 29", "Normalized 30", "Normalized 31", "Normalized 32", "Normalized 33", "Normalized 34", "Normalized 35", "Normalized 36", "Normalized 37", "Normalized 38", "Normalized 39", "Normalized 40", "Normalized 41", "Normalized 42", "Normalized 43", "Normalized 44", "Normalized 45", "Normalized 46", "Normalized 47", "Normalized 48", "Normalized 49", "Normalized 51" ]] gng = GrowingNeuralGas(data.as_matrix(), output_folder="visualization") gng.fit_network(e_b=0.05, e_n=0.006, a_max=8, l=100, a=0.5, d=0.995, passes=10, plot_evolution=True) clustered_data = gng.cluster_data() print('Found %d clusters.' % nx.number_connected_components(gng.network)) target_infered = [] for observation, cluster in clustered_data: target_infered.append(cluster) # homogeneity = metrics.homogeneity_score(data, target_infered) # print(homogeneity) gng.plot_clusters(gng.cluster_data()) clusters = [[]] len = 1 for elem in gng.cluster_data(): if (clusters.__len__() <= elem[1]): for i in range(int(elem[1] + 1)): clusters.append([]) clusters[int(elem[1])].append(len) len += 1 print(clusters)
def evaluate(e_b, e_n, a_max, l, a, d, passes): ds = pd.read_csv("Sales_Transactions_Dataset_Weekly.csv") data = ds[[ "W0", "W1", "W2", "W3", "W4", "W5", "W6", "W7", "W8", "W9", "W10", "W11", "W12", "W13", "W14", "W15", "W16", "W17", "W18", "W19", "W20", "W21", "W22", "W23", "W24", "W25", "W26", "W27", "W28", "W29", "W30", "W31", "W32", "W33", "W34", "W35", "W36", "W37", "W38", "W39", "W40", "W41", "W42", "W43", "W44", "W45", "W46", "W47", "W48", "W49", "W51" ]] # plt.plot(data, label='Data') # plt.show() gng = GrowingNeuralGas(data.as_matrix(), output_folder="visualization") gng.fit_network(e_b=e_b, e_n=e_n, a_max=a_max, l=l, a=a, d=d, passes=passes, plot_evolution=True) clustered_data = gng.cluster_data() print('Found %d clusters.' % nx.number_connected_components(gng.network)) target_infered = [] for observation, cluster in clustered_data: target_infered.append(cluster) # homogeneity = metrics.homogeneity_score(data, target_infered) # print(homogeneity) gng.plot_clusters(gng.cluster_data()) clusters = [[]] len = 1 for elem in gng.cluster_data(): if (clusters.__len__() <= elem[1]): for i in range(int(elem[1] + 1)): clusters.append([]) clusters[int(elem[1])].append(len) len += 1 ind = 0 for clust in clusters: if clust.__len__() != 0: print('Cluster №' + str(ind) + ' size: (' + str(clust.__len__()) + ') contains data:') ind += 1 print(clust) print('Global error all network(on euclidean distance): ' + str(gng.compute_global_error())) return clusters
class GNGNetwork: def __init__(self, alphabet, size=20, mean=0.075, verbose=False): self.gng = GrowingNeuralGas(size, mean=mean, feature_length=len(alphabet), verbose=verbose) self.alphabet = alphabet self.run_iterations = 0 self.gng.lock(True) def save(self, filename): pickle.dump(self, open(filename, "wb")) @staticmethod def load(filename, mean): return pickle.load(open(filename, "rb")) def train(self, columns, iterations, fraction=1.0, verbose=False): if verbose: print("Training GNG network (iterations: %d)..." % iterations) self.gng.lock(False) indices = list(range(len(columns))) for _ in range(iterations): self.run_iterations += 1 shuffle(indices) if verbose: print("Iter %4d" % _) for i in indices[:int(fraction * len(indices))]: self.gng.feedforward(columns[i]) self.gng.lock(True) def evaluate(self, sequences, length, indices): return sum( self.evaluate_column(column) for column in gen_columns( self.alphabet, sequences, length, indices)) def evaluate_column(self, column, verbose=False): output = self.gng.feedforward(column) active = self.gng.active_neurons output = [o for o, a in zip(output, active) if a] result = max(output)**2 if verbose: print(" ".join("%.4f" % x if x > 0.0 else " " for x in column)) print(" ".join("%.4f" % x for x in output)) #print("Max output: %.4f" % result) print("") return result
def create_cluster_from_neuralgasnetwork(model: model, a=0.5, passes=80, distance_toremove_edge=8): data = model.mesures().values model.setname("NEURALGAS avec distance_toremove=" + str(distance_toremove_edge) + " passes=" + str(passes)) if not model.load_cluster(): model.start_treatment() gng = GrowingNeuralGas(data) gng.fit_network(e_b=0.05, e_n=0.006, distance_toremove_edge=distance_toremove_edge, l=100, a=0.5, d=0.995, passes=passes, plot_evolution=False) model.end_treatment() print('Found %d clusters.' % gng.number_of_clusters()) model.clusters_from_real(gng.cluster_data(), "NEURALGAS_") #gng.plot_clusters(gng.cluster_data()) return model
def evaluate_on_digits(): digits = datasets.load_digits() data = digits.data target = digits.target gng = GrowingNeuralGas(data) gng.fit_network(e_b=0.05, e_n=0.006, a_max=8, l=100, a=0.5, d=0.995, passes=5, plot_evolution=False) clustered_data = gng.cluster_data() print('Found %d clusters.' % nx.number_connected_components(gng.network)) target_infered = [] for observation, cluster in clustered_data: target_infered.append(cluster) homogeneity = metrics.homogeneity_score(target, target_infered) print(homogeneity) gng.plot_clusters(gng.reduce_dimension(gng.cluster_data()))
values = normalize(values).values # returning values return values if __name__ == '__main__': if os.path.exists('visualization/sequence'): shutil.rmtree('visualization/sequence') os.makedirs('visualization/sequence') n_samples = 1500 data = None #data = dataset() # in case the user wants to use a specific dataset #data = datasets.load_breast_cancer() #data = datasets.load_iris(n_samples=n_samples, random_state=8) #data = datasets.make_blobs(n_samples=n_samples, random_state=8) data = datasets.make_moons(n_samples=n_samples, noise=.05) #data = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) #data = StandardScaler().fit_transform(data.data) # depends on the data variable data = StandardScaler().fit_transform(data[0]) # depends on the data variable print('Done.') print('Fitting neural network...') gng = GrowingNeuralGas(data) gng.fit_network(e_b=0.1, e_n=0.006, a_max=10, l=200, a=0.5, d=0.995, passes=5, plot_evolution=True) print('Found %d clusters.' % gng.number_of_clusters()) gng.plot_clusters(gng.cluster_data())
for i in range(len(self.background)): if abs(colors[i] - self.background[i]) > 50: backgroundPoint = False for i in range(len(colors)): colors[i] = self.normalize(colors[i], 255) # Remove alpha channel + others if we find them. while len(colors) > 3: colors.pop(len(colors) - 1) point = [self.normalize(x, self.width), self.normalize(y, self.width)] + colors return point gng = GrowingNeuralGas(dim=5) for i in range(1, 12): reader = ImageReader("images/rgb/rgb%s.png" % i) gng.run(2000, reader) print "Number of units:" + str(len(gng.units)) application = QApplication([]) view = GNGPlotter(reader.width, reader.height) view.gng = gng view.show() application.exec_()
from gng import GrowingNeuralGas from sklearn import datasets from sklearn.preprocessing import StandardScaler import os import shutil __authors__ = 'Adrien Guille' __email__ = '*****@*****.**' if __name__ == '__main__': if os.path.exists('visualization/sequence'): shutil.rmtree('visualization/sequence') os.makedirs('visualization/sequence') n_samples = 2000 dataset_type = 'moons' data = None print('Preparing data...') if dataset_type == 'blobs': data = datasets.make_blobs(n_samples=n_samples, random_state=8) elif dataset_type == 'moons': data = datasets.make_moons(n_samples=n_samples, noise=.05) elif dataset_type == 'circles': data = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) data = StandardScaler().fit_transform(data[0]) print('Done.') print('Fitting neural network...') gng = GrowingNeuralGas(data) gng.fit_network(e_b=0.1, e_n=0.006, a_max=10, l=200, a=0.5, d=0.995, passes=8, plot_evolution=True) print('Found %d clusters.' % gng.number_of_clusters()) gng.plot_clusters(gng.cluster_data())
plt.show() scaler = preprocessing.StandardScaler() data_scaled = scaler.fit_transform(data) eps = np.std(data_scaled) n_mature = 20 numData = 0 global_train_err = [] global_test_err = [] num_mature_neurons = [] train_data, test_data = train_test_split(shuffle(data_scaled), test_size=0.25) gng = GrowingNeuralGas(amature=n_mature, alfac1=0.1, alfacN=0.01, startA=torch.tensor(test_data[0]), startB=torch.tensor(test_data[1]), lambdaParam=20, alfaParam=0.5, dParam=0.995) print("Train, test size: {} {} - eps: {} - matureNeurons: {}".format( train_data.shape[0], test_data.shape[0], eps, n_mature)) try: mean_train_error = np.inf mean_test_error = np.inf mature_neurons_ratio = 0 for ep in range(epochs): start = time.time() for d in train_data: gng.forward(torch.tensor(d).view(1, -1)) gng.CountSignal += 1
if __name__ == '__main__': if os.path.exists('visualization/sequence'): shutil.rmtree('visualization/sequence') os.makedirs('visualization/sequence') n_samples = 2000 dataset_type = 'blobs' data = None print('Preparing data...') if dataset_type == 'blobs': data = datasets.make_blobs(n_samples=n_samples, random_state=8) elif dataset_type == 'moons': data = datasets.make_moons(n_samples=n_samples, noise=.05) elif dataset_type == 'circles': data = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) data = StandardScaler().fit_transform(data[0]) print('Done.') print('Fitting neural network...') gng = GrowingNeuralGas(data, output_folder="visualization") gng.fit_network(e_b=0.1, e_n=0.006, a_max=10, l=200, a=0.5, d=0.995, passes=8, plot_evolution=False) print('Found %d clusters.' % gng.number_of_clusters()) gng.plot_clusters(gng.cluster_data()) print('Global error all network(on euclidean distance): ' + str(gng.compute_global_error()))
colors = list(self.pixels[x, y]) for i in range(len(self.background)): if abs(colors[i] - self.background[i]) > 50: backgroundPoint = False for i in range(len(colors)): colors[i] = self.normalize(colors[i], 255) # Remove alpha channel + others if we find them. while len(colors) > 3: colors.pop(len(colors)-1) point = [self.normalize(x, self.width), self.normalize(y, self.width)] + colors return point gng = GrowingNeuralGas(dim=5) for i in range(1, 12): reader = ImageReader("images/rgb/rgb%s.png" % i) gng.run(2000, reader) print "Number of units:" + str(len(gng.units)) application = QApplication([]) view = GNGPlotter(reader.width, reader.height) view.gng = gng view.show() application.exec_()