def SOM(data,leninput,lentarget): som = MiniSom(16,16,leninput,sigma=1.0,learning_rate=0.5) som.random_weights_init(data) print("Training...") som.train_random(data,10000) # training with 10000 iterations print("\n...ready!") numpy.save('weight_som',som.weights)
class SomModel(Model): def __init__(self,input_length): from minisom import MiniSom self.som = MiniSom(10, 10, input_length,sigma=0.3,learning_rate=0.1,normalize=True) def run(self,inp): self.som.trian_single_instance(inp.flatten())
class Som: def init(self): self.core = MiniSom(50,50,6,sigma=.8,learning_rate=.5) # needs to match generating minisom command (specifically the load_map) self.core.load_map() self.callme = rospy.Service("mapping", Compute, self.callback) print "SOM setup complete" def callback(self, data): vector = np.array([data.fx, data.fy, data.fz, data.tx, data.ty, data.tz]) # format as needed print vector w = self.core.winner(vector) return w[0],w[1]
def test_som(): print "Clustering.." session_log_db = db.session_log allTopic = articles.distinct("topic") lentopic = len(allTopic) uniqueTopic = [] for t in allTopic: uniqueTopic.append("Topik " + str(t).strip()) lebarSOM = lentopic*lentopic + lentopic*2 + 1 panjangSOM = session_log_db.find({"data_uji":no_uji}).count() #somInput = zeros((panjangSOM,lebarSOM),dtype=int16) somInput = [] oriSess = [] for s in session_log_db.find({"data_uji":no_uji}): somInput.append(getPresedenceMatrix(convertSession(s["session"],uniqueTopic),uniqueTopic,1)) oriSess.append(s["session"]) som = MiniSom(16,16,lentopic,sigma=1.0,learning_rate=0.5) som.weights = numpy.load('weight_som.npy') #print som.weights outfile = open('cluster-result.csv','w') seq_number = 0 cluster_mongo = db.cluster_result cluster_mongo.remove({"data_uji":no_uji}) for cnt,xx in enumerate(somInput): w = som.winner(xx) # getting the winner #print cnt #print xx #print w #for z in xx: # outfile.write("%s " % str(z)) outfile.write("%s " % str(("|".join(oriSess[seq_number])))) outfile.write("%s-%s \n" % (str(w[0]),str(w[1]))) cluster_mongo.insert({"topik":"|".join(oriSess[seq_number]),"cluster":(str(w[0])+"-"+str(w[1])),"data_uji":no_uji}) seq_number = seq_number + 1 #outfile.write("%s %s\n" % str(xx),str(w)) # palce a marker on the winning position for the sample xx #plot(w[0]+.5,w[1]+.5,markers[t[cnt]],markerfacecolor='None', # markeredgecolor=colors[t[cnt]],markersize=12,markeredgewidth=2) outfile.close() #TopikCluster() html = '<div role="alert" class="alert alert-success alert-dismissible fade in">' html = html + ' <button aria-label="Close" data-dismiss="alert" class="close" type="button"><span aria-hidden="true">Close</span></button>' html = html + 'Berhasil Melakukan Clustering</div>' return html
def test_recommendation(): uji_profil = db.uji_profil current_seq = [] for t in uji_profil.find({}): current_seq.append("Topik " + str(t['topic'])) ''' APPLY SOM ''' allTopic = articles.distinct("topic") lentopic = len(allTopic) uniqueTopic = [] for t in allTopic: uniqueTopic.append("Topik " + str(t).strip()) lebarSOM = lentopic*lentopic + lentopic*2 + 1 somInput = [] somInput.append(getPresedenceMatrix(convertSession(current_seq,uniqueTopic),uniqueTopic,1)) som = MiniSom(16,16,lentopic,sigma=1.0,learning_rate=0.5) som.weights = numpy.load('weight_som.npy') cluster_winner = "" for cnt,xx in enumerate(somInput): w = som.winner(xx) # getting the winner cluster_winner = (str(w[0])+"-"+str(w[1])) ''' SEARCH FOR THE PATTERN IN PARTICULAR CLUSTER ''' print cluster_winner print current_seq prefix_result = db.prefix_result prefix_cluster = prefix_result.find({"cluster":cluster_winner,"data_uji":no_uji}).sort("min_sup",pymongo.DESCENDING) topik_rekomendasi = getTopikRekomendasi(current_seq,prefix_cluster) if topik_rekomendasi == "": prefix_cluster = prefix_result.find({"data_uji":no_uji}).sort("min_sup",pymongo.DESCENDING) topik_rekomendasi = getTopikRekomendasi(current_seq,prefix_cluster) html = "--tidak ada topik rekomendasi--" if(topik_rekomendasi!=""): the_topik = topik_rekomendasi.replace("Topik","").strip() html = getTestArticle(the_topik,"Rekomendasi 1","accordion_recommendation",'col_rek1',"") html += getTestArticle(the_topik,"Rekomendasi 2","accordion_recommendation",'col_rek2',"") html += getTestArticle(the_topik,"Rekomendasi 3","accordion_recommendation",'col_rek3',"") return html
class KuKuModel(Model): def __init__(self,proprioception_input_length,sensory_input_length,reservoir_size): # Build the Reservoir tau = .1 # execution timestep for the cortical rate model sigma = .001 # intra-reservoir weights eps = .1 # learning rate som_size = 10*10 self.sensory_input_length = sensory_input_length self.proprioception_input_length = proprioception_input_length full_reservoir_input_length = proprioception_input_length+som_size # Nodes: units, tau, method self.reservoir_input = esn.Node((full_reservoir_input_length,), 0, esn._load ) self.reservoir = esn.Node((reservoir_size,), tau, esn._reservoir ) self.reservoir_output = esn.Node((som_size,), 0, esn._load ) # Arcs: target, source, weight, eps # input from som self.d_P = esn.Arc( self.reservoir, self.reservoir_input, sigma, 0 ) self.d_P.initConnections( numpy.random.randn, self.reservoir.shape+self.reservoir_input.shape ) # type of init numpy func #print d_P.connections # recurrent connections intra node self.r_P = esn.Arc( self.reservoir, self.reservoir, sigma, 0 ) self.r_P.initConnections( numpy.random.randn, self.reservoir.shape+self.reservoir.shape ) # type of init numpy func #print r_P.connections # input from som self.d_out = esn.Arc( self.reservoir_output, self.reservoir, 0, eps ) self.d_out.initConnections( numpy.random.randn, self.reservoir_output.shape+self.reservoir.shape ) # type of init numpy func #print d_out.connections from minisom import MiniSom self.som = MiniSom(10, 10, sensory_input_length,sigma=0.3,learning_rate=0.1,normalize=True) self.previous_som_activation = numpy.zeros((10,10)) def run(self,inp): self.som.train_single_instance(inp[:self.sensory_input_length]) self.reservoir_input.update(numpy.append(self.previous_som_activation.flatten().copy(),inp[-self.proprioception_input_length:])) # 3 self.reservoir.update(self.d_P.read()) self.reservoir_output.update(self.d_out.read()) print "error:",self.som.activation_map.flatten() - self.reservoir_output.state self.d_out.learn(self.som.activation_map.flatten() - self.reservoir_output.state ) self.previous_som_activation = self.som.activation_map.flatten().copy()
def make_treeview(self, data, liststore): #i = 0 cols = self.columns[self.combobox.get_active()] #print type(cols) #print len(cols) for d in data: #i += 1 tmp = d.tolist() #print 'tmp', tmp #while len(tmp) < cols: #tmp.append(False) #print 'tmp', tmp #cols = cols - 1 Qe = MiniSom.quantization_error_subset(self.som,d,len(cols)) #print tmp tmp.append(Qe) tmp.append(4 * Qe ** 0.5) liststore.append(tmp) treeview = gtk.TreeView(model=liststore) #i = 0 for d in range(len(self.test_data[0])): #print i #i += 1 renderer_text = gtk.CellRendererText() column_text = gtk.TreeViewColumn(self.pattern_labels[d], renderer_text, text=d) treeview.append_column(column_text) column_text = gtk.TreeViewColumn('Qe', renderer_text, text=d+1) treeview.append_column(column_text) column_text = gtk.TreeViewColumn('NLT', renderer_text, text=d+2) treeview.append_column(column_text) return treeview
def setUp(self): self.som = MiniSom(5, 5, 1) for w in self.som.weights: # checking weights normalization assert_almost_equal(1.0, np.linalg.norm(w)) self.som.weights = np.zeros((5, 5)) # fake weights self.som.weights[2, 3] = 5.0 self.som.weights[1, 1] = 2.0
def __init__(self,proprioception_input_length,sensory_input_length,reservoir_size): # Build the Reservoir tau = .1 # execution timestep for the cortical rate model sigma = .001 # intra-reservoir weights eps = .1 # learning rate som_size = 10*10 self.sensory_input_length = sensory_input_length self.proprioception_input_length = proprioception_input_length full_reservoir_input_length = proprioception_input_length+som_size # Nodes: units, tau, method self.reservoir_input = esn.Node((full_reservoir_input_length,), 0, esn._load ) self.reservoir = esn.Node((reservoir_size,), tau, esn._reservoir ) self.reservoir_output = esn.Node((som_size,), 0, esn._load ) # Arcs: target, source, weight, eps # input from som self.d_P = esn.Arc( self.reservoir, self.reservoir_input, sigma, 0 ) self.d_P.initConnections( numpy.random.randn, self.reservoir.shape+self.reservoir_input.shape ) # type of init numpy func #print d_P.connections # recurrent connections intra node self.r_P = esn.Arc( self.reservoir, self.reservoir, sigma, 0 ) self.r_P.initConnections( numpy.random.randn, self.reservoir.shape+self.reservoir.shape ) # type of init numpy func #print r_P.connections # input from som self.d_out = esn.Arc( self.reservoir_output, self.reservoir, 0, eps ) self.d_out.initConnections( numpy.random.randn, self.reservoir_output.shape+self.reservoir.shape ) # type of init numpy func #print d_out.connections from minisom import MiniSom self.som = MiniSom(10, 10, sensory_input_length,sigma=0.3,learning_rate=0.1,normalize=True) self.previous_som_activation = numpy.zeros((10,10))
def SOM(data,leninput,lentarget): som = MiniSom(5,5,leninput,sigma=1.0,learning_rate=0.5) som.random_weights_init(data) print("Training...") som.train_batch(data,10000) # training with 10000 iterations print("\n...ready!") numpy.save('weight_som.txt',som.weights) bone() pcolor(som.distance_map().T) # distance map as background colorbar() t = zeros(lentarget,dtype=int) # use different colors and markers for each label markers = ['o','s','D'] colors = ['r','g','b'] outfile = open('cluster-result.csv','w') for cnt,xx in enumerate(data): w = som.winner(xx) # getting the winner #print cnt #print xx #print w for z in xx: outfile.write("%s " % str(z)) outfile.write("%s-%s \n" % (str(w[0]),str(w[1]))) #outfile.write("%s %s\n" % str(xx),str(w)) # palce a marker on the winning position for the sample xx #plot(w[0]+.5,w[1]+.5,markers[t[cnt]],markerfacecolor='None', # markeredgecolor=colors[t[cnt]],markersize=12,markeredgewidth=2) outfile.close()
def testSOMs(): from sklearn import datasets from minisom import MiniSom d = datasets.load_iris() data = np.apply_along_axis(lambda x: x/np.linalg.norm(x), 1, d['data']) # data normalization som = MiniSom(7, 7, 4, sigma=1.0, learning_rate=0.5) som.random_weights_init(data) print("Training...") som.train_random(data, 1000) # random training print("\n...ready!") ### Plotting the response for each pattern in the iris dataset ### from pylab import plot,axis,show,pcolor,colorbar,bone bone() pcolor(som.distance_map().T) # plotting the distance map as background colorbar() t = d['target'] # use different colors and markers for each label markers = ['o','s','D'] colors = ['r','g','b'] for cnt,xx in enumerate(data): w = som.winner(xx) # getting the winner # palce a marker on the winning position for the sample xx plot(w[0]+.5,w[1]+.5,markers[t[cnt]],markerfacecolor='None', markeredgecolor=colors[t[cnt]],markersize=12,markeredgewidth=2) axis([0,som.weights.shape[0],0,som.weights.shape[1]]) show() # show the figure
def SOM(data,leninput,lentarget,alpha_som,omega_som): som = MiniSom(16,16,leninput,sigma=omega_som,learning_rate=alpha_som) som.random_weights_init(data) print("Training...") som.train_batch(data,20000) # training with 10000 iterations print("\n...ready!") numpy.save('weight_som',som.weights) bone() pcolor(som.distance_map().T) # distance map as background colorbar() t = zeros(lentarget,dtype=int) # use different colors and markers for each label markers = ['o','s','D'] colors = ['r','g','b'] outfile = open('cluster-result.csv','w') for cnt,xx in enumerate(data): w = som.winner(xx) # getting the winner for z in xx: outfile.write("%s " % str(z)) outfile.write("%s-%s \n" % (str(w[0]),str(w[1]))) outfile.close()
def train_som(self): training_data = [v[0] for v in self.vectors] from minisom import MiniSom size = len(training_data[0]) self.som = MiniSom(10, 10, size, sigma=0.3, learning_rate=0.5) print "Training SOM..." self.som.train_random(training_data, 100) print "...ready!"
def init_som(self, widget=None, data=None): ##print self.data ### Initialization and training ### cols = self.columns[self.combobox.get_active()] data = self.data[:, 0:len(cols)] #print len(cols) self.som = MiniSom(self.width_spin_button.get_value_as_int(), self.height_spin_button.get_value_as_int(), len(cols),sigma=1.2,learning_rate=0.5) # self.som.weights_init_gliozzi(data) self.som.random_weights_init(data)
def train_som(data, offset=None): """ offset: offset between points used for training """ if offset: data = data[::offset, :] som = MiniSom( param['nr_rows'], param['nr_cols'], data.shape[1], data, sigma=param['sigma'], learning_rate=param['learning_rate'], norm='minmax') #som.random_weights_init() # choose initial nodes from data points som.train_random(param['nr_epochs']) # random training return som
def test_som(alpha_som,omega_som): print "Clustering pada Data Uji " + str(no_uji) session_log_db = db.session_log allTopic = articles.distinct("topic") lentopic = len(allTopic) uniqueTopic = [] for t in allTopic: uniqueTopic.append("Topik " + str(t).strip()) lebarSOM = lentopic*lentopic + lentopic*2 + 1 panjangSOM = session_log_db.find({"data_uji":no_uji}).count() #somInput = zeros((panjangSOM,lebarSOM),dtype=int16) somInput = [] oriSess = [] for s in session_log_db.find({"data_uji":no_uji}): somInput.append(getPresedenceMatrix(convertSession(s["session"],uniqueTopic),uniqueTopic,1)) oriSess.append(s["session"]) som = MiniSom(16,16,lentopic,sigma=omega_som,learning_rate=alpha_som) som.weights = numpy.load('weight_som.npy') #print som.weights outfile = open('cluster-result.csv','w') seq_number = 0 cluster_mongo = db.cluster_result cluster_mongo.remove({"data_uji":no_uji}) for cnt,xx in enumerate(somInput): w = som.winner(xx) # getting the winner outfile.write("%s " % str(("|".join(oriSess[seq_number])))) outfile.write("%s-%s \n" % (str(w[0]),str(w[1]))) cluster_mongo.insert({"topik":"|".join(oriSess[seq_number]),"cluster":(str(w[0])+"-"+str(w[1])),"data_uji":no_uji}) seq_number = seq_number + 1 outfile.close() #TopikCluster() return "Berhasil Melakukan Clustering"
def __init__(self, parent, controller): ## tk.Frame 초기화 tk.Frame.__init__(self, parent) style.use("ggplot") self.figure = pl.figure(1) self.a = self.figure.add_subplot(111) self.canvas = FigureCanvasTkAgg(self.figure, self) self.canvas.get_tk_widget().grid(sticky="news") self.canvas._tkcanvas.grid(sticky="news") ## Initialization self.som = MiniSom(10,10,136,sigma=1.0,learning_rate=0.5)
def update_treeview(self, data, liststore): cols = len(self.columns[self.combobox.get_active()]) for i, d in enumerate(data): for j in range(len(d)): #print j liststore[i][j] = d[j] if j >= cols: liststore[i][j] = -999 Qe = MiniSom.quantization_error_subset(self.som,d,cols) #print d, liststore[i] liststore[i][-2]= Qe liststore[i][-1]= 4 * Qe ** 0.5
def _minisomrandom(self): """Clusters sentence vectors using minisomrandom algorithm Returns ------- numpy ndarray codebook (weights) of the trained SOM """ H = int(self.opts['size']) W = int(self.opts['size']) N = self.X.shape[1] som = MiniSom(H, W, N, sigma=1.0, random_seed=1) if self.opts['initialization']: som.random_weights_init(self.X) som.train_random(self.X, self.opts['niterations']) return som.get_weights()
import numpy as np import pandas as pd # Import the dataset df = pd.read_csv("Credit_Card_Applications.csv") X = df.iloc[:, :-1].values y = df.iloc[:, -1].values # Feature Scaling from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler(feature_range=(0, 1)) X = scaler.fit_transform(X) # Training the SOM from minisom import MiniSom som_model = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5) som_model.random_weights_init(X) som_model.train_random(data=X, num_iteration=200) # Visualizing the results from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som_model.distance_map().T) colorbar() markers = ['o', 's'] colors = ['r', 'g'] for i, x in enumerate(X): w = som_model.winner(x) plot(w[0] + 0.5, w[1] + 0.5, markers[y[i]],
class SOM: def If_running(self): #print som.running self.play.set_sensitive(not self.som.running) return self.som.running def If_paused(self): #print som.running #self.pause.set_sensitive(self.som.running) return False def Status_update(self): if self.som.running: context_id = self.status_bar.get_context_id("Running") #print context_id text = "Iteration: " + str(self.som.tick).zfill(len(str(self.som.ticks))) + "/" + str(self.som.ticks).zfill(len(str(self.som.ticks))) if self.som.paused: text += ", Paused" self.status_bar.push(context_id, text) return True # we need it to keep updating if the model is running elif not self.som.running: if not self.som.paused: self.status_bar.remove_all(self.status_bar.get_context_id("Running")) self.status_bar.remove_all(self.status_bar.get_context_id("Ready")) context_id = self.status_bar.get_context_id("Ready") #print context_id text = "Ready" self.status_bar.push(context_id, text) return False #def Quit(self, widget, data=None): ##print 'Byez!' #gtk.main_quit() #def Pause(self, widget=None, data=None): #self.som.Pause() #if self.som.paused: #self.pause.set_label("Unpause") #else: #self.pause.set_label("Pause") #glib.idle_add(self.som.Run) #glib.idle_add(self.If_running) #glib.idle_add(self.Status_update) def open_file(self, file_name): try: #cols = self.columns[self.combobox.get_active()] #print cols self.data = np.genfromtxt(file_name, delimiter=',',usecols=(self.visual_and_acoustic),skip_header=1) self.pattern_labels = np.genfromtxt(file_name, delimiter=',',usecols=(self.visual_and_acoustic), skip_footer=14, dtype=str) self.file_name = file_name self.update_treeview(self.data, self.patterns_liststore) #print self.data except: print "File is probably not in the right format:", file_name raise def select_file(self, widget=None, data=None): #response = self.dialog.run() #if response == gtk.RESPONSE_OK: #self.open_file(self.dialog.get_filename()) #elif response == gtk.RESPONSE_CANCEL: #print 'Closed, no files selected' #self.dialog.destroy() dialog = gtk.FileChooserDialog("Open..", None, gtk.FILE_CHOOSER_ACTION_OPEN, (gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK)) dialog.set_default_response(gtk.RESPONSE_OK) tmp = os.getcwd() tmp = 'file://' + tmp #print tmp #print dialog.set_current_folder_uri(tmp) #print dialog.get_current_folder_uri() filter = gtk.FileFilter() filter.set_name("All files") filter.add_pattern("*") dialog.add_filter(filter) filter = gtk.FileFilter() filter.set_name("Comma-separated values") filter.add_pattern("*.csv") dialog.add_filter(filter) dialog.set_filter(filter) #dialog = gtk.FileChooserDialog("Please choose a file", self, #gtk.FileChooserAction.OPEN, #(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL, #gtk.STOCK_OPEN, gtk.ResponseType.OK)) response = dialog.run() if response == gtk.RESPONSE_OK: #print("Open clicked") #print("File selected: " + dialog.get_filename()) self.open_file(dialog.get_filename()) #elif response == gtk.RESPONSE_CANCEL: #print("Cancel clicked") dialog.destroy() def Run(self, widget=None, data=None): #self.som.ticks += self.iterations_spin_button.get_value_as_int() if not self.som.running: ### Initialization and training ### #self.som = MiniSom(5, 15, 8,sigma=1.2,learning_rate=0.5) #self.init_som() for i in range(1): self.train_som() #self.figure.clf() self.Draw_figure() self.canvas.draw() self.canvas.draw_idle() #We need to draw *and* flush self.figure.canvas.draw() self.figure.canvas.flush_events() #print "draw" self.update_treeview(self.test_data, self.test_liststore) self.update_treeview(self.data, self.patterns_liststore) glib.idle_add(self.Status_update) glib.idle_add(self.If_running) glib.idle_add(self.If_paused) def Test(self, widget=None, data=None): #self.som.ticks += self.iterations_spin_button.get_value_as_int() if not self.som.running: ### Initialization and training ### #self.som = MiniSom(5, 15, 8,sigma=1.2,learning_rate=0.5) self.test_som() #self.figure.clf() self.Draw_figure() self.canvas.draw() self.canvas.draw_idle() #We need to draw *and* flush self.figure.canvas.draw() self.figure.canvas.flush_events() #print "draw" glib.idle_add(self.Status_update) glib.idle_add(self.If_running) glib.idle_add(self.If_paused) def Reset(self, widget=None, data=None): self.init_som() self.Draw_figure() self.canvas.draw() self.canvas.draw_idle() #We need to draw *and* flush self.figure.canvas.draw() self.figure.canvas.flush_events() #print "draw" self.update_treeview(self.test_data, self.test_liststore) self.update_treeview(self.data, self.patterns_liststore) glib.idle_add(self.Status_update) glib.idle_add(self.If_running) glib.idle_add(self.If_paused) def delete_event(self, widget=None, event=None, data=None): # If you return FALSE in the "delete_event" signal handler, # GTK will emit the "destroy" signal. Returning TRUE means # you don't want the window to be destroyed. # This is useful for popping up 'are you sure you want to quit?' # type dialogs. #print "delete event occurred" # Change FALSE to TRUE and the main window will not be destroyed # with a "delete_event". return False #def on_key_event(self, event): #print('you pressed %s'%event.key) #key_press_handler(event, self.canvas, self.toolbar) def destroy(self, widget=None, data=None): #print "destroy signal occurred" gtk.main_quit() def Draw_figure(self): self.axes.cla() # Clear axis cols = self.columns[self.combobox.get_active()] data = self.data[:, 0:len(cols)] #ion() # Turn on interactive mode. #hold(True) # Clear the plot before adding new data. #print som.distance_map().T #exit() bone() background = self.axes.pcolor(self.som.distance_map().T) # plotting the distance map as background #f.colorbar(a) t = np.zeros(len(self.target),dtype=int) t[self.target == 'A'] = 0 t[self.target == 'B'] = 1 t[self.target == 'C'] = 2 t[self.target == 'D'] = 3 # use different colors and markers for each label markers = ['o','s','D', '+'] colors = ['r','g','b', 'y'] for cnt,xx in enumerate(data): w = self.som.winner(xx) # getting the winner # place a marker on the winning position for the sample xx tmp = self.axes.plot(w[0]+.5,w[1]+.5,markers[t[cnt]],markerfacecolor='None', markeredgecolor=colors[t[cnt]],markersize=12,markeredgewidth=2) self.axes.axis([0,self.som.weights.shape[0],0,self.som.weights.shape[1]]) #show() # show the figure #print "drawing" #self.figure.canvas.draw() def init_som(self, widget=None, data=None): ##print self.data ### Initialization and training ### cols = self.columns[self.combobox.get_active()] data = self.data[:, 0:len(cols)] #print len(cols) self.som = MiniSom(self.width_spin_button.get_value_as_int(), self.height_spin_button.get_value_as_int(), len(cols),sigma=1.2,learning_rate=0.5) # self.som.weights_init_gliozzi(data) self.som.random_weights_init(data) def train_som(self): cols = self.columns[self.combobox.get_active()] data = self.data[:, 0:len(cols)] print("Training...") #self.som.train_gliozzi(data) # Gliozzi et al training self.som.train_random(data,20) print("\n...ready!") def make_treeview(self, data, liststore): #i = 0 cols = self.columns[self.combobox.get_active()] #print type(cols) #print len(cols) for d in data: #i += 1 tmp = d.tolist() #print 'tmp', tmp #while len(tmp) < cols: #tmp.append(False) #print 'tmp', tmp #cols = cols - 1 Qe = MiniSom.quantization_error_subset(self.som,d,len(cols)) #print tmp tmp.append(Qe) tmp.append(4 * Qe ** 0.5) liststore.append(tmp) treeview = gtk.TreeView(model=liststore) #i = 0 for d in range(len(self.test_data[0])): #print i #i += 1 renderer_text = gtk.CellRendererText() column_text = gtk.TreeViewColumn(self.pattern_labels[d], renderer_text, text=d) treeview.append_column(column_text) column_text = gtk.TreeViewColumn('Qe', renderer_text, text=d+1) treeview.append_column(column_text) column_text = gtk.TreeViewColumn('NLT', renderer_text, text=d+2) treeview.append_column(column_text) return treeview def update_treeview(self, data, liststore): cols = len(self.columns[self.combobox.get_active()]) for i, d in enumerate(data): for j in range(len(d)): #print j liststore[i][j] = d[j] if j >= cols: liststore[i][j] = -999 Qe = MiniSom.quantization_error_subset(self.som,d,cols) #print d, liststore[i] liststore[i][-2]= Qe liststore[i][-1]= 4 * Qe ** 0.5 def select_columns(self, widget=None): #self.open_file(self.file_name) #self.init_som() self.update_treeview(self.test_data, self.test_liststore) self.update_treeview(self.data, self.patterns_liststore) #---------------------------------------- # SAM added these functions here def pertSomWeights( self, widget=None, data=None ): #if scale == None: scale = .5 print( 'Adding noise to SOM weights') # print( self.som.weights ) # print( self.som.weights.shape ) pertAmount = scale*(np.random.random_sample( self.som.weights.shape)-.5) self.som.weights = self.som.weights + pertAmount # print self.som.weights self.Draw_figure() self.canvas.draw() self.canvas.draw_idle() #We need to draw *and* flush self.figure.canvas.draw() self.figure.canvas.flush_events() def pertInputs( self, widget=None, data=None ): #if scale == None: p = .2 print( 'Making %f prop of inputs 0.5' %p) #print( self.data.shape ) # randomly get indices to switch, then replace noiseIndex = np.random.binomial(1,p, self.data.shape) #ones at p proportion of samples self.data[noiseIndex ==1 ] = .5 print( self.data ) # update the treeview for the "Patterns" tab to see the result graphically self.update_treeview(self.data, self.patterns_liststore) #---------------------------------------- def __init__(self): # create a new window self.window = gtk.Window(gtk.WINDOW_TOPLEVEL) # When the window is given the "delete_event" signal (this is given # by the window manager, usually by the "close" option, or on the # titlebar), we ask it to call the delete_event () function # as defined above. The data passed to the callback # function is NULL and is ignored in the callback function. self.window.connect("delete_event", self.delete_event) # Here we connect the "destroy" event to a signal handler. # This event occurs when we call gtk_widget_destroy() on the window, # or if we return FALSE in the "delete_event" callback. self.window.connect("destroy", self.destroy) #window.set_icon_from_file(get_resource_path("icon.png")) #window.connect("delete-event", Quit) #window.connect("destroy", Quit) self.window.set_title("SOM model") self.window.set_default_size(500, 500) #this si to ensure the window is always the smallest it can be #self.window.set_resizable(False) #window.set_border_width(10) # Args are: homogeneous, spacing, expand, fill, padding homogeneous = False spacing = 0 expand = False fill = False padding = 10 self.hbox = gtk.HBox(homogeneous, spacing) self.vbox = gtk.VBox(homogeneous, spacing) self.window.add(self.vbox) #self.adjustment = gtk.Adjustment(value=10000, lower=1, upper=100000000, step_incr=1000, page_incr=10000) #self.iterations_spin_button = gtk.SpinButton(self.adjustment, climb_rate=0, digits=0) self.label = gtk.Label("Dimensions:") self.adjustment = gtk.Adjustment(value=5, lower=1, upper=100, step_incr=2, page_incr=5) self.width_spin_button = gtk.SpinButton(self.adjustment, climb_rate=0, digits=0) self.adjustment = gtk.Adjustment(value=10, lower=1, upper=100, step_incr=2, page_incr=5) self.height_spin_button = gtk.SpinButton(self.adjustment, climb_rate=0, digits=0) # Create a series of buttons with the appropriate settings image = gtk.Image() # (from http://www.pygtk.org/docs/pygtk/gtk-stock-items.html) image.set_from_stock(gtk.STOCK_EXECUTE, 1) self.play = gtk.Button() self.play.set_image(image) self.play.set_label("Train") #image = gtk.Image() ## (from http://www.pygtk.org/docs/pygtk/gtk-stock-items.html) #image.set_from_stock(gtk.STOCK_APPLY, 1) #self.test = gtk.Button() #self.test.set_image(image) #self.test.set_label("Test") image = gtk.Image() # (from http://www.pygtk.org/docs/pygtk/gtk-stock-items.html) image.set_from_stock(gtk.STOCK_OPEN, 1) self.open = gtk.Button() self.open.set_image(image) self.open.set_label("Open patterns") #self.pause = gtk.Button(stock = gtk.STOCK_MEDIA_PAUSE) image = gtk.Image() image.set_from_stock(gtk.STOCK_REFRESH, 1) self.reset = gtk.Button() self.reset.set_image(image) self.reset.set_label("Reset") self.play.connect("clicked", self.Run, None) #self.test.connect("clicked", self.Test, None) self.open.connect("clicked", self.select_file, None) #self.pause.connect("clicked", self.Pause, None) self.reset.connect("clicked", self.Reset, None) self.height_spin_button.connect("value-changed", self.Reset, "Height changed") self.width_spin_button.connect("value-changed", self.Reset, "Width changed") # add perturb button to disturb trained som weights self.perturb = gtk.Button("Perturb SOM") # create gtk button to perturb som weights self.perturb.connect( "clicked", self.pertSomWeights, None ) # run self.pertSomWeights self.perturb.show() # tell GTK to show button, but not where # add button to add noisy encoding to training inputs self.perturbInputButton = gtk.Button("Perturb Inputs") # create gtk button to perturb som weights self.perturbInputButton.connect( "clicked", self.pertInputs, None ) # run self.pertSomWeights self.perturbInputButton.show() # tell GTK to show button, but not where #self.width_spin_button.connect("value_changed", self.init_som) #self.height_spin_button.connect("value_changed", self.init_som) #self.som = Environment(width = self.width_spin_button.get_value_as_int(), height = self.height_spin_button.get_value_as_int()) #self.som.show() #self.pause.set_sensitive(self.som.paused) #self.vbox.pack_start(self.som, True, True, 0) #file_names = # ['stimuli.csv'] allFileName = '4750.csv' #'stimuli.csv' self.file_name = allFileName #'4749.csv' # 'stimuli.csv' # file_names[0] self.test_file_name = allFileName #'4749.csv' # 'stimuli.csv' self.visual_only = [0,1,2,3,4,5,6,7] self.visual_and_acoustic = [0,1,2,3,4,5,6,7,8] self.columns = [self.visual_only, self.visual_and_acoustic] #f = Figure(figsize=(5,4), dpi=100) #a = f.add_subplot(111) self.combobox = gtk.combo_box_new_text() self.combobox.append_text('Visual only') self.combobox.append_text('Visual and acoustic') self.test_data = np.genfromtxt(self.test_file_name, delimiter=',',usecols=(self.visual_and_acoustic),skip_header=1) self.test_data += -.5 #0.00001 self.test_data = np.apply_along_axis(lambda x: x/np.linalg.norm(x),1,self.test_data) # data normalization self.target = np.genfromtxt(self.file_name,delimiter=',',usecols=(9),dtype=str,skip_header=1) # loading the labels for use in the figure self.combobox.set_active(1) self.combobox.connect('changed', self.Reset) #cols = self.columns[self.combobox.get_active()] #print cols self.data = np.genfromtxt(self.file_name, delimiter=',',usecols=(self.visual_and_acoustic),skip_header=1) self.data += -.5 #0.00001 self.data = np.apply_along_axis(lambda x: x/np.linalg.norm(x),1,self.data) # data normalization #self.pattern_labels = np.genfromtxt(self.file_name, delimiter=',',usecols=(self.visual_and_acoustic), skip_footer=14, dtype=str) self.pattern_labels = np.genfromtxt(self.file_name, delimiter=',',usecols=(self.visual_and_acoustic), dtype=str)[0] #print self.pattern_labels self.init_som() #self.toolbar = NavigationToolbar(self.canvas, self.window) #self.vbox.pack_start(self.toolbar, False, False) #self.vbox.pack_start(self.canvas) self.test_liststore = gtk.ListStore(float, float, float, float, float, float, float, float, float, float, float) self.patterns_liststore = gtk.ListStore(float, float, float, float, float, float, float, float, float, float, float) self.test_treeview = self.make_treeview(self.test_data, self.test_liststore) self.patterns_treeview = self.make_treeview(self.data, self.patterns_liststore) #self.data = np.genfromtxt(self.file_name, delimiter=',',usecols=(0,1,2,3,4,5,6,7),skip_header=1) #self.pattern_labels = np.genfromtxt(self.file_name, delimiter=',',usecols=(0,1,2,3,4,5,6,7), skip_footer=8, dtype=str) ##self.data = np.apply_along_axis(lambda x: x/np.linalg.norm(x),1,self.data) # data normalization self.figure, self.axes= plt.subplots() # Create canvas. self.canvas = FigureCanvas(self.figure) # a gtk.DrawingArea self.canvas.set_size_request(300, 400) self.Draw_figure() self.notebook = gtk.Notebook() self.notebook.set_tab_pos(gtk.POS_TOP) self.vbox.pack_start(self.notebook) label = gtk.Label("Distance map") self.notebook.append_page(self.canvas, label) label = gtk.Label("Patterns") self.notebook.append_page(self.patterns_treeview, label) label = gtk.Label("Testing") #hbox = gtk.HBox(homogeneous, spacing) self.notebook.append_page(self.test_treeview, label) #hbox.pack_start(test_treeview, expand, fill, 0) #hbox.pack_start(test_treeview, expand, fill, 0) self.patterns_treeview.show() self.test_treeview.show() self.canvas.draw_idle() self.canvas.show() self.figure.canvas.draw() self.vbox.pack_start(self.hbox, expand, fill, 10) self.status_bar = gtk.Statusbar() self.vbox.pack_start(self.status_bar, expand, fill, 0) self.status_bar.show() glib.idle_add(self.Status_update) self.hbox.show() self.vbox.show() self.play.show() #self.test.show() self.open.show() #self.pause.show() self.reset.show() #self.iterations_spin_button.show() self.width_spin_button.show() self.height_spin_button.show() self.hbox.pack_start(self.play, expand, fill, padding) #self.hbox.pack_start(self.test, expand, fill, padding) self.hbox.pack_start(self.open, expand, fill, padding) self.hbox.pack_start(self.combobox, expand, fill, padding) #self.hbox.pack_start(self.pause, expand, fill, 0) self.hbox.pack_start(self.reset, expand, fill, padding) #self.hbox.pack_start(self.iterations_spin_button, expand, fill, 0) self.hbox.pack_start(self.label, expand, fill, padding) self.hbox.pack_start(self.width_spin_button, expand, fill, padding) self.hbox.pack_start(self.height_spin_button, expand, fill, 0) self.hbox.pack_start( self.perturb, expand, fill, padding) self.hbox.pack_start( self.perturbInputButton, expand, fill, padding) #self.quit = gtk.Button("Quit") self.quit = gtk.Button(stock = gtk.STOCK_QUIT) self.combobox.connect('changed', self.select_columns) self.quit.connect("clicked", self.destroy, None) self.hbox.pack_end(self.quit, expand, fill, padding) self.quit.show() #print window.get_size() self.window.show_all() self.window.present() #gtk.main() # And of course, our main loop. #gtk.main() # Control returns here when main_quit() is called return None def main(self): # All PyGTK applications must have a gtk.main(). Control ends here # and waits for an event to occur (like a key press or mouse event). gtk.main()
import numpy as np import pandas as pd dataset = pd.read_csv('Credit_Card_Applications.csv') X = dataset.iloc[:, 1:-1].values Y = dataset.iloc[:, -1].values from sklearn.preprocessing import MinMaxScaler Normalizer = MinMaxScaler(feature_range=(0, 1)) X = Normalizer.fit_transform(X) #Training the SOM from minisom import MiniSom som = MiniSom(x=15, y=15, sigma=0.8, learning_rate=0.5, input_len=14) som.random_weights_init(X) som.train_random(data=X, num_iteration=200) #Visualizing the Results from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) colorbar() for i, j in enumerate(X): if Y[i] == 1: W_node = som.winner(j) plot(W_node[0] + 0.5,
from pylab import imread,imshow,figure,show,subplot,title from numpy import reshape,flipud,unravel_index,zeros from minisom import MiniSom # read the image img = imread('tree.jpg') # reshaping the pixels matrix pixels = reshape(img,(img.shape[0]*img.shape[1],3)) # SOM initialization and training print('training...') som = MiniSom(3,3,3,sigma=0.1,learning_rate=0.2) # 3x3 = 9 final colors som.random_weights_init(pixels) starting_weights = som.weights.copy() # saving the starting weights som.train_random(pixels,100) print('quantization...') qnt = som.quantization(pixels) # quantize each pixels of the image print('building new image...') clustered = zeros(img.shape) for i,q in enumerate(qnt): # place the quantized values into a new image clustered[unravel_index(i,dims=(img.shape[0],img.shape[1]))] = q print('done.') # show the result figure(1) subplot(221) title('original') imshow(flipud(img)) subplot(222)
def test_random_weights_init(self): som = MiniSom(2, 2, 2, random_seed=1) som.random_weights_init(array([[1.0, .0]])) for w in som._weights: assert_array_equal(w[0], array([1.0, .0]))
def test_distance_map(self): som = MiniSom(2, 2, 2, random_seed=1) som._weights = array([[[1., 0.], [0., 1.]], [[1., 0.], [0., 1.]]]) assert_array_equal(som.distance_map(), array([[1., 1.], [1., 1.]]))
def create_som(): som = MiniSom(20, 20, 64, sigma=1.5, learning_rate=0.5) return som
def test_unavailable_neigh_function(self): with self.assertRaises(ValueError): MiniSom(5, 5, 1, neighborhood_function='boooom')
class ContextualSom: def __init__(self, corpus): self._corpus = corpus self._tokens = [] self._token_to_vector = {} self._token_to_avg_vector = {} self._som = None self._all_labels = [ "noun", "verb", "closed_class", "quantifier", "classifier", "adjective", "adverb", "interjection", "unknown" ] def _average_vector(self, token): before = np.zeros(100) after = np.zeros(100) before_count = 0 after_count = 0 # Sweep a window through processed corpus # Calculate the average of all the vectors appearing before and after the token for i in range(len(self._tokens)): if self._tokens[i] == token: if i > 0 and self._tokens[i - 1] in self._token_to_vector: before += self._token_to_vector[self._tokens[i - 1]] before_count += 1 if i < len(self._tokens) - 2 and self._tokens[ i + 1] in self._token_to_vector: after += self._token_to_vector[self._tokens[i + 1]] after_count += 1 if before_count != 0: before = before / before_count if after_count != 0: after = after / after_count return normalize(np.concatenate([before, after])) @staticmethod def _get_category(pos): if pos in ["NNG", "NNP"]: return "noun" if pos == "VV": return "verb" if pos in ["VA"]: return "adjective" if pos in ["NR", "SN"]: return "quantifier" if pos == "NNBC": return "classifier" if pos == "MAG": return "adverb" if pos == "IC": return "interjection" if pos == "UNKNOWN": return "unknown" return "closed_class" @staticmethod def _get_colour(category): map_ = { "noun": "yellow", "verb": "blue", "closed_class": "red", "quantifier": "pink", "classifier": "cyan", "adjective": "green", "adverb": "orange", "interjection": "purple", "unknown": "gray" } return map_[category] def preprocess(self): mecab = Mecab() # Parts-of-speech tagger token_pos = mecab.pos(self._corpus) # Mecab sometimes returns multiple POS tags for a token; we take the first one for simplicity self._tokens = [(token, pos.split("+")[0]) for token, pos in token_pos] counter = Counter(self._tokens) counter = {token: count for token, count in counter.most_common(500)} # Assign random vectors to each token self._token_to_vector = { token: normalize(np.random.normal(size=100)) for token in counter } self._token_to_avg_vector = { token: self._average_vector(token) for token in counter } def train(self, x, y, epochs, verbose=False, **kwargs): som_input = np.asarray(list(self._token_to_avg_vector.values())) # All hyperparameters from Zhao, Li, et al., 2011 self._som = MiniSom(x, y, som_input.shape[1], **kwargs) self._som.train(som_input, epochs, verbose=verbose) def scores(self): positions = [] labels = [] for token, v in self._token_to_avg_vector.items(): labels.append(self._get_category(token[1])) positions.append(self._som.winner(v)) positions = np.asarray(positions) labels = np.asarray(labels) label_ind = np.asarray([self._all_labels.index(l) for l in labels]) predictions = [] for ind, p in enumerate(positions): knn = KNeighborsClassifier(n_neighbors=5) knn.fit(np.delete(positions, ind, axis=0), np.delete(label_ind, ind, axis=0)) predictions.append(knn.predict([p])[0]) predictions = np.asarray(predictions) scores = {} for label in range(len(self._all_labels)): cat_labels = label_ind[label_ind == label] cat_predictions = predictions[label_ind == label] correct = cat_labels == cat_predictions correct_percentage = correct.sum() / len(cat_labels) scores[self._all_labels[label]] = correct_percentage return scores
def create_som(data, labels, one_hots, filename_load_weights, filename_save_weights, load_weights=False, num_iteration=100, plot_data=False, plot_labels=False, save_plot=False, plot_distance_map=False, show_activations=False, show_single_chars=False, filename_plots='unspecified.png'): assert len(data) == len(labels) size = int(np.ceil(np.sqrt(len(data)))) input_len = len(data[0]) # Initialization and training som = MiniSom(x=size, y=size, input_len=input_len, model=rnn, sigma=1.0, learning_rate=0.5) if load_weights: som.load_weights(filename=filename_load_weights) else: som.random_weights_init(data) print("Training...") som.train_random(data, num_iteration=num_iteration) # random training print("\n...ready!") som.save_weights(filename=filename_save_weights) print("beginn mapping vectors") # Plotting the response for each pattern in the data set if plot_distance_map: plt.bone() plt.pcolor( som.distance_map().T) # plotting the distance map as background plt.colorbar() else: plt.figure(figsize=(size, size)) for i, data_point in enumerate(data): w = som.winner(data_point) # getting the winner if plot_data: # place a string of the vector on the winning position for the sample plt.text(x=w[0], y=w[1] + np.random.rand() * 0.9, s=str(data_point), size='small', color='r') if plot_labels: #place the string of the label on the winning position for the sample plt.text(x=w[0] + 0.75, y=w[1] + np.random.rand() * 0.9, s=labels[i], size='small', color='b') #add axis plt.axis([0, size, 0, size]) #save if specified if save_plot: plt.savefig('../RNN/SOM_graphics/{}.png'.format(filename_plots)) plt.show() if show_activations: for i in range(len(one_hots)): plt.bone() plt.pcolor(som.activation_map( one_hots[i])) # plotting the distance map as background plt.colorbar() plt.title('vec_{}'.format(one_hots[i])) plt.show() if show_single_chars: unique_labels = np.unique(labels) for unique_label in unique_labels: #plt.figure(figsize=(size, size)) plt.bone() plt.pcolor(som.distance_map().T ) # plotting the distance map as background plt.colorbar() for i, data_point in enumerate(data): if unique_label == labels[i]: w = som.winner(data_point) # getting the winner plt.text(x=w[0] + 0.75, y=w[1] + np.random.rand() * 0.9, s=labels[i], size='small', color='r') #plot the vectors plt.text(x=w[0], y=w[1] + np.random.rand() * 0.9, s=str(data_point), size='small', color='b') # add axis plt.axis([0, size, 0, size]) plt.show()
def train(self, x, y, epochs, verbose=False, **kwargs): som_input = np.asarray(list(self._token_to_avg_vector.values())) # All hyperparameters from Zhao, Li, et al., 2011 self._som = MiniSom(x, y, som_input.shape[1], **kwargs) self._som.train(som_input, epochs, verbose=verbose)
import matplotlib.pyplot as plt import pandas as pd # Importing the dataset dataset = pd.read_csv('Credit_Card_Applications.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, -1].values # Feature Scaling from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range = (0, 1)) X = sc.fit_transform(X) # Training the SOM from minisom import MiniSom som = MiniSom(x = 10, y = 10, input_len = 15, sigma = 1.0, learning_rate = 0.5) som.random_weights_init(X) som.train_random(data = X, num_iteration = 100) # Visualizing the results from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) colorbar() markers = ['o', 's'] colors = ['r', 'g'] for i, x in enumerate(X): w = som.winner(x) plot(w[0] + 0.5, w[1] + 0.5, markers[y[i]],
] data_list = list( map(lambda path: pickle.load(open(path, 'rb')), data_path_list)) # concatenate the data vertically dataset = np.concatenate(data_list, axis=0) # Without the Timestamp ############################### X = dataset.reshape( ( len(dataset), 200, ) ) # Change 200 to 201 depending on whether the data has timestamp (pre-flattened with test_radar_data_labeler) som = MiniSom(x=100, y=100, input_len=200, sigma=1.0, learning_rate=0.5) # With the Timestamp ########################## # min-max normalize the timestamp, # X = dataset.reshape((len(dataset), 201,)) # Change 200 to 201 depending on whether the data has timestamp (pre-flattened with test_radar_data_labeler) # timestamp_scaler = MinMaxScaler(feature_range=(0, 1)) # timestamp_col_scaled = timestamp_scaler.fit_transform(X[:, 0].reshape(-1, 1)) # X[:, 0] = timestamp_col_scaled.reshape((len(timestamp_col_scaled))) # som = MiniSom(x=50, y=50, input_len=201, sigma=1.0, learning_rate=0.5) # som.random_weights_init(X) som.train_random(data=X, num_iteration=1000) # visualize results label_path = 'F:\config_detection\labels/labeled_onNotOn_080719.csv' label_array = pd.read_csv(label_path).values[:, 1:]
testFeaturesPath = '../Feature_Vectors/normalised_features_match.pickle' TestFeatures = pickle.load(open(testFeaturesPath, 'rb')) #List of all features in training data Features = [] for val in DataFeatures.values(): Features.append(val) #(x,y) -- size of output grid for SOM x = 5 y = 5 #Number of iterations to run iteration = input("Input number of iterations: ") #Create a SOM som = MiniSom(x, y, 20, sigma=0.3, learning_rate=0.5) print "Training..." som.train_random(Features, iteration) # trains the SOM with 100 iterations print "...ready!" #Map the output neuron position to a unique cluster id. (0,0) --> 0, (0,1) --> 1 and so on. feature_map = {} k = 0 for i in range(x): for j in range(y): feature_map[(i, j)] = k k += 1 #Open a csv file to write the attribute name and its corresponding cluster id #print 'attribute Spatial Position'
plt.pcolor(som.distance_map().T) return fig, ax RS = 20160101 if __name__ == '__main__': args = _parse_file_argument() data = pd.read_csv(args.csv) data.fillna(0, inplace=True) label_column = args.label_prefix label_prefix = data[label_column].values data.drop(label_column, axis=1, inplace=True) label_column = args.label_sufix label_sufix = data[label_column].values data.drop(label_column, axis=1, inplace=True) id_column = 'id' data.drop(id_column, axis=1, inplace=True) som = MiniSom(8,8,len(data.columns),sigma=1.0,learning_rate=0.5,random_seed=RS) som.random_weights_init(data.as_matrix()) som.train_random(data.as_matrix(),100) _plot_distribution(som) plt.savefig('som.png', dpi=120)
class TestMinisom(unittest.TestCase): def setUp(self): self.som = MiniSom(5, 5, 1) for i in range(5): for j in range(5): # checking weights normalization assert_almost_equal(1.0, linalg.norm(self.som._weights[i, j])) self.som._weights = zeros((5, 5, 1)) # fake weights self.som._weights[2, 3] = 5.0 self.som._weights[1, 1] = 2.0 def test_decay_function(self): assert self.som._decay_function(1., 2., 3.) == 1. / (1. + 2. / (3. / 2)) def test_fast_norm(self): assert fast_norm(array([1, 3])) == sqrt(1 + 9) def test_check_input_len(self): with self.assertRaises(ValueError): self.som.train_batch([[1, 2]], 1) with self.assertRaises(ValueError): self.som.random_weights_init(array([[1, 2]])) with self.assertRaises(ValueError): self.som._check_input_len(array([[1, 2]])) self.som._check_input_len(array([[1]])) self.som._check_input_len([[1]]) def test_unavailable_neigh_function(self): with self.assertRaises(ValueError): MiniSom(5, 5, 1, neighborhood_function='boooom') def test_gaussian(self): bell = self.som._gaussian((2, 2), 1) assert bell.max() == 1.0 assert bell.argmax() == 12 # unravel(12) = (2,2) def test_mexican_hat(self): bell = self.som._mexican_hat((2, 2), 1) assert bell.max() == 1.0 assert bell.argmax() == 12 # unravel(12) = (2,2) def test_bubble(self): bubble = self.som._bubble((2, 2), 1) assert bubble[2, 2] == 1 assert sum(sum(bubble)) == 1 def test_triangle(self): bubble = self.som._triangle((2, 2), 1) assert bubble[2, 2] == 1 assert sum(sum(bubble)) == 1 def test_win_map(self): winners = self.som.win_map([[5.0], [2.0]]) assert winners[(2, 3)][0] == [5.0] assert winners[(1, 1)][0] == [2.0] def test_labels_map(self): labels_map = self.som.labels_map([[5.0], [2.0]], ['a', 'b']) assert labels_map[(2, 3)]['a'] == 1 assert labels_map[(1, 1)]['b'] == 1 with self.assertRaises(ValueError): self.som.labels_map([[5.0]], ['a', 'b']) def test_activation_reponse(self): response = self.som.activation_response([[5.0], [2.0]]) assert response[2, 3] == 1 assert response[1, 1] == 1 def test_activate(self): assert self.som.activate(5.0).argmin() == 13.0 # unravel(13) = (2,3) def test_quantization_error(self): assert self.som.quantization_error([[5], [2]]) == 0.0 assert self.som.quantization_error([[4], [1]]) == 1.0 def test_quantization(self): q = self.som.quantization(array([[4], [2]])) assert q[0] == 5.0 assert q[1] == 2.0 def test_random_seed(self): som1 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) som2 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) # same initialization assert_array_almost_equal(som1._weights, som2._weights) data = random.rand(100, 2) som1 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) som1.train_random(data, 10) som2 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) som2.train_random(data, 10) # same state after training assert_array_almost_equal(som1._weights, som2._weights) def test_train_batch(self): som = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) data = array([[4, 2], [3, 1]]) q1 = som.quantization_error(data) som.train_batch(data, 10) assert q1 > som.quantization_error(data) data = array([[1, 5], [6, 7]]) q1 = som.quantization_error(data) som.train_batch(data, 10, verbose=True) assert q1 > som.quantization_error(data) def test_train_random(self): som = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) data = array([[4, 2], [3, 1]]) q1 = som.quantization_error(data) som.train_random(data, 10) assert q1 > som.quantization_error(data) data = array([[1, 5], [6, 7]]) q1 = som.quantization_error(data) som.train_random(data, 10, verbose=True) assert q1 > som.quantization_error(data) def test_random_weights_init(self): som = MiniSom(2, 2, 2, random_seed=1) som.random_weights_init(array([[1.0, .0]])) for w in som._weights: assert_array_equal(w[0], array([1.0, .0])) def test_pca_weights_init(self): som = MiniSom(2, 2, 2) som.pca_weights_init(array([[1., 0.], [0., 1.], [1., 0.], [0., 1.]])) expected = array([[[0., -1.41421356], [-1.41421356, 0.]], [[1.41421356, 0.], [0., 1.41421356]]]) assert_array_almost_equal(som._weights, expected) def test_distance_map(self): som = MiniSom(2, 2, 2, random_seed=1) som._weights = array([[[1., 0.], [0., 1.]], [[1., 0.], [0., 1.]]]) assert_array_equal(som.distance_map(), array([[1., 1.], [1., 1.]])) def test_pickling(self): with open('som.p', 'wb') as outfile: pickle.dump(self.som, outfile) with open('som.p', 'rb') as infile: pickle.load(infile) os.remove('som.p')
from minisom import MiniSom from numpy import genfromtxt,zeros,apply_along_axis,linalg import matplotlib.pyplot as plt sample='K562.tab' from collections import OrderedDict data = genfromtxt(sample,delimiter='\t',usecols=(4,5,6,7,8,9,10,11)) data = apply_along_axis(lambda x: x/linalg.norm(x),1,data) # data normalization som = MiniSom(20,20,8,sigma=1.0,learning_rate=0.5,random_seed=1234) som.train_random(data,int(1.5*len(data))) # random training ### Plotting the response for each pattern in the iris dataset ### import pylab from pylab import plot,axis,show,pcolor,colorbar,bone bone() pcolor(som.distance_map().T) # plotting the distance map as background colorbar() target = genfromtxt(sample,delimiter='\t',usecols=(3),dtype=str) # loading the labels t = zeros(len(target),dtype=int) t[target == 'insulator'] = 0 t[target == 'gene_body'] = 1 t[target == 'active_promoter'] = 2 t[target == 'enhancer'] = 3 t[target == 'poised_promoter'] = 4 # use different colors and markers for each label markers = ['o','s','D','v','^'] colors = ['r','g','b','c','m'] plotTitle = 'K562' pngName = 'K562.png' tags=[] #save the winner for each step for cnt,xx in enumerate(data):
def test_random_seed(self): som1 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) som2 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) # same initialization assert_array_almost_equal(som1._weights, som2._weights) data = random.rand(100, 2) som1 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) som1.train_random(data, 10) som2 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) som2.train_random(data, 10) # same state after training assert_array_almost_equal(som1._weights, som2._weights)
class SOMDiscretizer(Discretizer): def __init__(self, width=4, height=4, sigma=0.3, learning_rate=0.5): self.width = width self.height = height self.sigma = sigma self.learning_rate = learning_rate def train(self, data): self.som = MiniSom(self.width, self.height, len(data[0]), sigma=self.sigma, learning_rate=self.learning_rate) self.som.train_random(data, 1000000) def discretize(self, data_point): x, y = self.som.winner(data_point) return self.som.weights[x,y] def visualize(self, filename, subset=None): box_side = 150 border = 30 text_height = 10 text_offset_x = 60 text_offset_y = 30 w = (self.width * box_side) + ((self.width-1) * border) h = (self.height * box_side) + ((self.height-1) * border) img = Image.new('RGB', (w, h)) draw = ImageDraw.Draw(img) for i in range(self.width): for j in range(self.height): offset = np.array([ i*(box_side + border), j*(box_side + border), (i+1)*(box_side + border), (j+1)*(box_side + border) ]) def coords(arr, offset): a = arr + offset return [ (a[0], a[1]), (a[2], a[3]) ] def dimension_subset(vector, subset): if subset is not None: return vector[subset[0]:subset[1]+1] return vector # Draw the prototype vector box box_position = coords(np.array([ 0, 0, box_side, box_side ]), offset) prototype_vector = dimension_subset(self.som.weights[i, j], subset) fill = int(self.norm_data_vector(prototype_vector) * 200) + 55 draw.rectangle(box_position, fill=(0, fill, 0)) # Write the prototype vector as text text_position = box_position[0] line_no = 0 for value in prototype_vector: rounded_value = round(value * 100) / 100 base_x, base_y = box_position[0] text_position = (base_x + text_offset_x, base_y + text_offset_y + text_height*line_no) draw.text(text_position, str(rounded_value)) line_no += 1 right_fill, bottom_fill, diagonal_fill = 0, 0, 0 # Draw right border of U-matrix if i != self.width - 1: right_border_position = coords(np.array([ box_side+1, 0, box_side+1+border, box_side ]), offset) prototype_vector_a = dimension_subset(self.som.weights[i, j], subset) prototype_vector_b = dimension_subset(self.som.weights[i+1, j], subset) right_fill = 255 - int(self.data_vector_difference(prototype_vector_a, prototype_vector_b) * 255) draw.rectangle(right_border_position, fill=(right_fill, right_fill, right_fill)) # Draw bottom border of U-matrix if j != self.height - 1: bottom_border_position = coords(np.array([ 0, box_side+1, box_side, box_side+1+border ]), offset) prototype_vector_a = dimension_subset(self.som.weights[i, j], subset) prototype_vector_b = dimension_subset(self.som.weights[i, j+1], subset) bottom_fill = 255 - int(self.data_vector_difference(prototype_vector_a, prototype_vector_b) * 255) draw.rectangle(bottom_border_position, fill=(bottom_fill, bottom_fill, bottom_fill)) # Draw diagonal border of U-matrix if i != self.width - 1 and j != self.height - 1: diagonal_border_position = coords(np.array([ box_side+1, box_side+1, box_side+1+border, box_side+1+border ]), offset) prototype_vector_a = dimension_subset(self.som.weights[i, j], subset) prototype_vector_b = dimension_subset(self.som.weights[i+1, j+1], subset) diagonal_fill = 255 - int(self.data_vector_difference(prototype_vector_a, prototype_vector_b) * 255) draw.rectangle(diagonal_border_position, fill=(diagonal_fill, diagonal_fill, diagonal_fill)) img.save(filename)
def test_pca_weights_init(self): som = MiniSom(2, 2, 2) som.pca_weights_init(array([[1., 0.], [0., 1.], [1., 0.], [0., 1.]])) expected = array([[[0., -1.41421356], [-1.41421356, 0.]], [[1.41421356, 0.], [0., 1.41421356]]]) assert_array_almost_equal(som._weights, expected)
X = df.iloc[:, :-1].values y = df.iloc[:, -1].values # Feature Scaling from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range=(0, 1)) X = sc.fit_transform(X) # Training the SOM from minisom import MiniSom # Here for the dimensions of the grid, there are not much customers i.e. not much observations so we make a 10 X 10 grid. som = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5) # Initialize the weights som.random_weights_init(X) #Train the SOM on X som.train_random(X, 100) # Executes quickly as the dataset is small # 2 dimensional grid that will contain all the final winning nodes, for each of this winning nodes we will get MID- Mean Interneuron Distance """MID of a specific winning node is the mean of the distances of all the neurons around the winning node inside a neighborhood defined,sigma here, which is the radius of this neighborhood. Higher is the MID the more the winning node will be far away from it's neighbors, inside a neighborhood. Therefore the higher the MID,the more the winning node is an outlier. And since in some way the majority of the winning nodes represent the rules that are respected. If far from this majority of neurons, then far from the general rules,and that is how ouliers are detected
sc = MinMaxScaler(feature_range=(0, 1)) # fit sc object to X so sc gets all info (min and max) and all info for normalization # apply normalization to X, fit method returns normalized version of X X = sc.fit_transform(X) # Training the SOM # Unsupervised Learning, we don't consider # sigma is the radius of the different neighborhoods # learning weight, hyperparameter decides how much weight # higher the learning rates, faster will be convergence # lower the learning rate, the slower it takes for SOM to build from minisom import MiniSom som = MiniSom(x=25, y=25, input_len=30, sigma=1.0, learning_rate=0.3) # randomly initialize the weight vectors to small numbers close to 0 som.random_weights_init(X) # train som on X, matrix of features and patterns recognized som.train_random(data=X, num_iteration=100) # Visualising the results # two-dimensional grid of the winning nodes # get M-ID Mean Inter-neruon Distances, Inside the neighborhood, radius, winning # higher MID, winning, outlier neuron far from the general neuron, fraud, winning nodes # with the higher M-ID. Winning node will use different nodes from pylab import bone, pcolor, colorbar, plot, show
def train(self, som_dim: tuple = (250, 250), sigma: float = 1.0, learning_rate: float = 0.5, batch_size: int = 500, seed: int = 42, weight_init: str = 'random'): """Train self-organising map. Parameters ---------- som_dim : tuple, (default=(250, 250)) dimensions of SOM embedding (number of nodes) sigma : float, (default=1.0) the radius of the different neighbors in the SOM, default = 1.0 learning_rate : float, (default=0.5) alters the rate at which weights are updated batch_size : int, (default=500) size of batches used in training (alters number of total iterations) seed : int, (default=42) random seed weight_init : str, (default='random') how to initialise weights: either 'random' or 'pca' (Initializes the weights to span the first two principal components) Returns ------- None """ som = MiniSom(som_dim[0], som_dim[1], self.dims, sigma=sigma, learning_rate=learning_rate, neighborhood_function=self.nf, random_seed=seed) if weight_init == 'random': som.random_weights_init(self.data) elif weight_init == 'pca': if not self.normalisation: print('Warning: It is strongly recommended to normalize the data before initializing ' 'the weights if using PCA.') som.pca_weights_init(self.data) else: print('Warning: invalid value provided for "weight_init", valid input is either "random" or "pca". ' 'Defaulting to random initialisation of weights') som.random_weights_init(self.data) print("------------- Training SOM -------------") som.train_batch(self.data, batch_size, verbose=True) # random training self.xn = som_dim[0] self.yn = som_dim[1] self.map = som self.weights = som.get_weights() self.flatten_weights = self.weights.reshape(self.xn*self.yn, self.dims) print("\nTraining complete!") print("----------------------------------------")
x=[] y=[] for i in range(N): if x3[i]-y3[i]+1 > 0 and -x3[i]-y3[i]+1 > 0 and y3[i]>0: x.append(x3[i]) y.append(y3[i]) trainData = np.zeros((len(x),2)) for i in range(len(x)): trainData[i][0]=x[i] trainData[i][1]=y[i] return trainData data = generateData(200) som = MiniSom(10, 10, input_len=2, sigma=0.3, learning_rate=0.5) # initialization of 6x6 SOM som.random_weights_init(data) som.train_random(data, 5000) # trains the SOM with 100 iterations weights = som.get_weights() mappings = som.win_map(data) #Visualizing the result from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) #平均距离 colorbar() for i,x in enumerate(data): w = som.winner(x) plot(w[0]+0.5,w[1]+0.5,'^','r',markersize= 10)
def __init__(self,input_length): from minisom import MiniSom self.som = MiniSom(10, 10, input_length,sigma=0.3,learning_rate=0.1,normalize=True)
def learn(dataset,sigma=0.3,learning_rate=0.5,nb_iter=10000): nb_sample, nb_features = dataset.shape som = MiniSom(6,6,nb_features,sigma=sigma,learning_rate=learning_rate) som.train_random(dataset,nb_iter) return som
def get_dataset(file): """ Returns the normalized data set of the speficied file """ data = np.genfromtxt(file, delimiter=",", usecols=(0, 1, 2, 3)) return np.apply_along_axis(lambda x: x / np.linalg.norm(x), 1, data) train_dataset = get_dataset("data_sets/train.csv") test_dataset = get_dataset("data_sets/test.csv") validation_dataset = get_dataset("data_sets/validation.csv") # Creates SOM of 8x8 dimentions som = MiniSom(8, 8, 4, sigma=1.3, learning_rate=0.5) # Initializes the weights with info of the dataset som.pca_weights_init(train_dataset) print("Training started") start = time.perf_counter() som.train_batch(train_dataset, 10000) end = time.perf_counter() print("Training took {} seconds!".format(end - start)) # Generation of graphs classification_graph("test", test_dataset) classification_graph("validation", validation_dataset) classification_graph("train", train_dataset) frequency_graph(train_dataset) error_graph(train_dataset)
from pylab import imread, imshow, figure, show, subplot, title from numpy import reshape, flipud, unravel_index, zeros from minisom import MiniSom # read the image img = imread('tree.jpg') # reshaping the pixels matrix pixels = reshape(img, (img.shape[0] * img.shape[1], 3)) # SOM initialization and training print('training...') som = MiniSom(3, 3, 3, sigma=0.1, learning_rate=0.2) # 3x3 = 9 final colors som.random_weights_init(pixels) starting_weights = som.weights.copy() # saving the starting weights som.train_random(pixels, 100) print('quantization...') qnt = som.quantization(pixels) # quantize each pixels of the image print('building new image...') clustered = zeros(img.shape) for i, q in enumerate(qnt): # place the quantized values into a new image clustered[unravel_index(i, dims=(img.shape[0], img.shape[1]))] = q print('done.') # show the result figure(1) subplot(221) title('original') imshow(flipud(img)) subplot(222)
dataset[col] = dataset[col].str.rstrip('%').astype('float') / 100 else: dataset[col]=dataset[col].replace( '[\$,)]','', regex=True )\ .replace( '[(]','-', regex=True ).astype(float) dataset = dataset.fillna(dataset.mean()) X = dataset.iloc[:, :].values y = dataset.iloc[:, 0].values #feature scaling from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range=(0, 1)) X = sc.fit_transform(X) #train SOM from minisom import MiniSom som = MiniSom(4, 4, input_len=30) som.random_weights_init(X) som.train_random(X, 100) #visualization from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) colorbar() show() #find cities location_list = [] mapping = som.win_map(X) for keys in mapping.keys(): location_list.append(sc.inverse_transform(mapping.get(keys)))
#importing the data set dataset = pd.read_csv('Credit_Card_Applications.csv') x= dataset.iloc[:,:-1] # the -1 is for the last value y= dataset.iloc[:,-1] # the -1 is for the last value #fesature scaling Data preprocessing/ from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range = (0,1)) X = sc.fit_transform(x) # NORMALIZATION # we are going to use a libary. from minisom import MiniSom som = MiniSom(x = 10, y =10, input_len = 15, sigma =1.0, learning_rate = 0.5) som.random_weights_init(X) som.train_random(X, num_iteration = 100) # we have to visualize the results from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) # all distances for all the neural nets colorbar() #add a bar markers = ['o', 's'] colors = ['r', 'g'] for i, x in enumerate(X): # customers is x w = som.winner(x) #the wining node for a customer plot(w[0] + 0.5, w[1] + 0.5,
y = dataset.iloc[:, -1].values #here will only use X in training set because doing unsuperwise deep learning// we are telling customer eligibility, not predicting classes. so no dependent variable considered. # Feature Scaling(between 0 & 1) #compulsary for deep learning so high computation from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range=(0, 1)) X = sc.fit_transform(X) # -------Training the SOM #here we are using MiniSom 1.0 #https://test.pypi.org/project/MiniSom/1.0/ #here in your working directory , we need to keep Minisom 1.0.py file downloaded created by developer in working directory. from minisom import MiniSom som = MiniSom( x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5 ) #object som trained on X// X & y are dimension of SOM(MORE THE DATA i.e no of CUSTOMER more will be dimension)/// here input_len is the no of feature in training dataset i.e X(14) and +1 for customer id som.random_weights_init( X ) #sigma is the radious of different neighbourhood i.e default value is 1.0// learning_rate will decide how much weight updated in each learning rate so default value is 0.5 so higher will be the learning_rate faster will be convergence, lower the learning_rate, longer the self organising map take time to build.// decay_function can be use to improve convergence som.train_random( data=X, num_iteration=100) #num_iteration is no of time it need to repeate #random_weights_init IS THE method initialize the weight mention by developer i.e by Minisom1.0 #train_random method use to train # ---------Visualizing the results #here we will calculate mean interneuron distance(MID) i.e mean of euclian distance between study neuron and neighbourhood so we can detect outlier which will be far from the nieghbour neuron on basis of euclian distance #larger the mid closer to white in colour from pylab import bone, pcolor, colorbar, plot, show #BUILDING self organising map bone() #initlizee the figure i.e window contain map pcolor(
import pandas as pd from sklearn.preprocessing import MinMaxScaler from pylab import pcolor, colorbar, plot #para visualização dos dados base = pd.read_csv("wines.csv") X = base.iloc[:, 1:14].values y = base.iloc[:, 0].values #como os valores de X não estão normalizados precisamos realizar os códigos seguintes normalizador = MinMaxScaler(feature_range=(0, 1)) X = normalizador.fit_transform(X) som = MiniSom(8, 8, input_len=X.shape[1], sigma=1.0, learning_rate=0.5, random_seed=2) '''Para saber o tamanho do mapa usa-se a regra de 5sqr(N), como temos 178 registros: 5 x sqr(178) = 65,65, aproximado a 64, que dá uma matrix 8x8. Sigma = raio do BMU (best matching unit)''' som.random_weights_init(X) som.train_random(data=X, num_iteration=100) #100 é suficiente para a maioria dos casos som._weights '''Todos esses valores mostrados pelos pesos representam os novos pontos criados de modo a ajudar na criação do mapa''' som._activation_map #Aqui se visualiza os valores do mapa em si q = som.activation_response(
onehotencoder = OneHotEncoder(categorical_features = [0]) X = onehotencoder.fit_transform(X).toarray() X = X.values X12 = np.asarray(X12, dtype = int) Xnew = np.append(X, X12, axis=1) Xfin = np.delete(Xnew, 12, axis=1) # Feature Scaling from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range = (0, 1)) X_SOM = sc.fit_transform(X) # Training the SOM from minisom import MiniSom som = MiniSom(x = 10, y = 10, input_len = 17, sigma = 1.0, learning_rate = 0.5) som.random_weights_init(X_SOM) som.train_random(data = X_SOM, num_iteration = 100) # Visualizing the results from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) colorbar() markers = ['o', 's', 'x', 'o', 's', 'x', 'v'] colors = ['r', 'g', 'b', 'w', 'y', 'c', 'm'] for i, x in enumerate(X_SOM): w = som.winner(x) plot(w[0] + 0.5, w[1] + 0.5, markers[y[i]],
# Importation des données dataset = pd.read_csv("Credit_Card_Applications.csv") X = dataset.iloc[:, :-1].values y = dataset.iloc[:, -1].values #Changement d'échelle from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range = (0, 1)) X = sc.fit_transform(X) # Entraînement du SOM from minisom import MiniSom som = MiniSom(x=10, y=10, input_len=15) som.random_weights_init(X) som.train_random(X,num_iteration=100) # Visualisation des résultats from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) colorbar() markers = ("o", "s") colors = ("r", "g") for i, x in enumerate(X): w = som.winner(x)
Features = [] for i in range(0, len(c_features[0])): # column = c_features[:][i] column = [row[i] for row in c_features] Features.append(column) print "=== Size of Features used in SOM: %d" % len(Features) # Self Organising Map # x = input('enter x value for grid: ') # y = input('enter y value for grid: ') # iteration = input("Input number of iterations: ") x = 6 y = 6 iteration = 100 # Create a SOM som = MiniSom(x, y, 20, sigma=0.3, learning_rate=0.5) print "Training..." som.train_random(Features, iteration) # trains the SOM with 100 iterations print "...ready!" # Map the output neuron position to a unique cluster id. (0,0) --> 0, (0,1) --> 1 and so on. feature_map = {} k = 0 for i in range(x): for j in range(y): feature_map[(i, j)] = k k += 1 # print feature_map, '\n'
# load the digits dataset from scikit-learn # 901 samples, about 180 samples per class # the digits represented 0,1,2,3,4 from sklearn import datasets digits = datasets.load_digits(n_class=4) data = digits.data # matrix where each row is a vector that represent a digit. num = digits.target # num[i] is the digit represented by data[i] # training the som from minisom import MiniSom som = MiniSom(20,20,64,sigma=.8,learning_rate=0.5) print("Training...") som.train_random(data,1500) # random training print("\n...ready!") # plotting the results from pylab import text,show,cm,axis,figure,subplot,imshow,zeros wmap = {} figure(1) im = 0 for x,t in zip(data,num): # scatterplot w = som.winner(x) wmap[w] = im text(w[0]+.5, w[1]+.5, str(t), color=cm.Dark2(t / 4.), fontdict={'weight': 'bold', 'size': 11}) im = im + 1 axis([0,som.weights.shape[0],0,som.weights.shape[1]]) figure(2,facecolor='white') cnt = 0 for j in reversed(range(20)): # images mosaic for i in range(20):
# Take only the last column y = credit_card_applications_df.iloc[:, -1].values # Feature scaling: Use Normalization MinMaxScaler from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range=(0, 1)) X = sc.fit_transform(X) # We will be suing minisom library here from minisom import MiniSom # Out dataset is small # So we will just create a 10 by 10 matrix, X = 10, y = 10 # Input_len = number of features in our dataset 14+1=15 # Sigma: Radious in the different neighborhoods in the grid minisom = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5) minisom.random_weights_init(X) minisom.train_random(data=X, num_iteration=200) # Visualize the SOM from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(minisom.distance_map().T) colorbar() # Color close to white are frauds markers = ["o", "s"] # s=square colors = ["r", "g"] # red = didn't get approval, green=got approval for i, x in enumerate(X): winning_node = minisom.winner(x) plot( winning_node[0] + 0.5, # placing to center using .5 winning_node[1] + 0.5,
In partucular it shows how to train MiniSom and how to visualize the result. ATTENTION: pylab is required for the visualization. """ # reading the iris dataset in the csv format # (downloaded from http://aima.cs.berkeley.edu/data/iris.csv) #rn = len(open('iris4.csv').readlines()) data = genfromtxt('data5.csv', delimiter=',',dtype = float) data = numpy.nan_to_num(data) print (data) data = apply_along_axis(lambda x: x/linalg.norm(x),1,data) # data normalization ### Initialization and training ### som = MiniSom(40,40,136,sigma=1.0,learning_rate=0.5) som.random_weights_init(data) print("Training...") som.train_random(data,10000) # random training print("\n...ready!") ### Plotting the response for each pattern in the iris dataset ### from pylab import plot,axis,show,pcolor,colorbar,bone bone() pcolor(som.distance_map().T) # plotting the distance map as background colorbar() target = genfromtxt('class5.csv',delimiter=',',usecols=(0),dtype=int) # loadingthe labels t = zeros(len(target),dtype=int) print (target)
dataset = pd.read_csv('Credit_Card_Applications.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, -1].values #Feature Scaling: #we are scaling to y (class). no need to scale this as well, just X. from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range=(0, 1)) X = sc.fit_transform(X) #Train the SOM: #import minisom from minisom import MiniSom som = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5, random_seed=1) #initialize weights: som.random_weights_init(X) som.train_random(data=X, num_iteration=100) #VIZ: from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) colorbar() markers = ['o', 's'] #circle, square colors = ['r', 'g'] # red, green
def train(self, data): self.som = MiniSom(self.width, self.height, len(data[0]), sigma=self.sigma, learning_rate=self.learning_rate) self.som.train_random(data, 1000000)
X = dataset.iloc[:, : -1].values #Taking all rows and all columns, except last column. SOM is trained based on all columns except last column. print "Independent variables:\n", X y = dataset.iloc[:, -1].values # Taking all rows and last column. print "Account Approval:\n", y ## Feature Scaling sc = MinMaxScaler( feature_range=(0, 1) ) # Scaling:Easier for deep learning model to train if there are many dimensions X = sc.fit_transform(X) print "Normalized X:\n", X ## Training the SOM som = MiniSom( x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5 ) # Definition: MiniSom(X,y,input_len, sigma=1, learning_rate=0.5,decay_function=None,random_seed=None). 10 by 10 grid chosen as number of observation is small. Input len = number of features, including customer id in order to identify customers later. Sigma = radius of the neighbourhoods of the grid. The higher the learning rate, the faster the convergence. Decay_function can be used to improve the convergence. # Use larger array for larger user base. som.random_weights_init( X ) #Initializing the weights randomly. Put in the data that needs to be trained. som.train_random( X, num_iteration=100) #Apply Step 4 to Step 9, for 100 iterations. ## Visualizing the results bone() pcolor( som.distance_map().T ) #som.distance_map will return all the Mean Inter-Neuron Distances (MID) in one matrix colorbar() markers = ['o', 's']