def SOM(data,leninput,lentarget):
    som = MiniSom(16,16,leninput,sigma=1.0,learning_rate=0.5)
    som.random_weights_init(data)
    print("Training...")
    som.train_random(data,10000) # training with 10000 iterations
    print("\n...ready!")
    
    numpy.save('weight_som',som.weights)
Пример #2
0
class SomModel(Model):
      def __init__(self,input_length):
          from minisom import MiniSom
          self.som = MiniSom(10, 10, input_length,sigma=0.3,learning_rate=0.1,normalize=True)

      def run(self,inp):
          self.som.trian_single_instance(inp.flatten())
Пример #3
0
class Som:
    def init(self):
        self.core = MiniSom(50,50,6,sigma=.8,learning_rate=.5) # needs to match generating minisom command (specifically the load_map)
        self.core.load_map()
        self.callme = rospy.Service("mapping", Compute, self.callback)
	print "SOM setup complete"
    
    def callback(self, data):
        vector = np.array([data.fx, data.fy, data.fz, data.tx, data.ty, data.tz]) # format as needed
	print vector        
	w = self.core.winner(vector)
        return w[0],w[1]
def test_som():
    print "Clustering.."
    
    session_log_db = db.session_log
    allTopic = articles.distinct("topic")
    lentopic = len(allTopic)
    uniqueTopic = []
    for t in allTopic:
        uniqueTopic.append("Topik " + str(t).strip())
    
    lebarSOM = lentopic*lentopic + lentopic*2 + 1
    panjangSOM = session_log_db.find({"data_uji":no_uji}).count()
    #somInput = zeros((panjangSOM,lebarSOM),dtype=int16)
    somInput = []
    oriSess = []
    for s in session_log_db.find({"data_uji":no_uji}):
        somInput.append(getPresedenceMatrix(convertSession(s["session"],uniqueTopic),uniqueTopic,1))
        oriSess.append(s["session"])

    som = MiniSom(16,16,lentopic,sigma=1.0,learning_rate=0.5)
    som.weights = numpy.load('weight_som.npy')
    #print som.weights
    outfile = open('cluster-result.csv','w')
    seq_number = 0
    cluster_mongo = db.cluster_result
    cluster_mongo.remove({"data_uji":no_uji})
    for cnt,xx in enumerate(somInput):
        w = som.winner(xx) # getting the winner
        #print cnt
        #print xx
        #print w
        
        #for z in xx:
        #    outfile.write("%s " % str(z))
        outfile.write("%s " % str(("|".join(oriSess[seq_number]))))
        outfile.write("%s-%s \n" % (str(w[0]),str(w[1])))
        cluster_mongo.insert({"topik":"|".join(oriSess[seq_number]),"cluster":(str(w[0])+"-"+str(w[1])),"data_uji":no_uji})
        seq_number = seq_number + 1
        #outfile.write("%s %s\n" % str(xx),str(w))
        # palce a marker on the winning position for the sample xx
        #plot(w[0]+.5,w[1]+.5,markers[t[cnt]],markerfacecolor='None',
        #     markeredgecolor=colors[t[cnt]],markersize=12,markeredgewidth=2)
    outfile.close()
    #TopikCluster()
    
    html = '<div role="alert" class="alert alert-success alert-dismissible fade in">'
    html = html + ' <button aria-label="Close" data-dismiss="alert" class="close" type="button"><span aria-hidden="true">Close</span></button>'
    html = html + 'Berhasil Melakukan Clustering</div>'
    
    return html
def test_recommendation():
    uji_profil = db.uji_profil
    current_seq = []
    for t in uji_profil.find({}):
        current_seq.append("Topik " + str(t['topic']))

    '''
    APPLY SOM
    '''
    allTopic = articles.distinct("topic")
    lentopic = len(allTopic)
    uniqueTopic = []
    for t in allTopic:
        uniqueTopic.append("Topik " + str(t).strip())

    lebarSOM = lentopic*lentopic + lentopic*2 + 1
    
    somInput = []
    somInput.append(getPresedenceMatrix(convertSession(current_seq,uniqueTopic),uniqueTopic,1))
    som = MiniSom(16,16,lentopic,sigma=1.0,learning_rate=0.5)
    som.weights = numpy.load('weight_som.npy')
    cluster_winner = ""
    for cnt,xx in enumerate(somInput):
        w = som.winner(xx) # getting the winner
        cluster_winner = (str(w[0])+"-"+str(w[1]))

    '''
    SEARCH FOR THE PATTERN IN PARTICULAR CLUSTER
    '''

    print cluster_winner
    print current_seq

    prefix_result = db.prefix_result
    prefix_cluster = prefix_result.find({"cluster":cluster_winner,"data_uji":no_uji}).sort("min_sup",pymongo.DESCENDING)

    topik_rekomendasi = getTopikRekomendasi(current_seq,prefix_cluster)

    if topik_rekomendasi == "":
        prefix_cluster = prefix_result.find({"data_uji":no_uji}).sort("min_sup",pymongo.DESCENDING)
        topik_rekomendasi = getTopikRekomendasi(current_seq,prefix_cluster)
    
    html = "--tidak ada topik rekomendasi--"
    if(topik_rekomendasi!=""):
        the_topik = topik_rekomendasi.replace("Topik","").strip()
        html = getTestArticle(the_topik,"Rekomendasi 1","accordion_recommendation",'col_rek1',"")
        html += getTestArticle(the_topik,"Rekomendasi 2","accordion_recommendation",'col_rek2',"")
        html += getTestArticle(the_topik,"Rekomendasi 3","accordion_recommendation",'col_rek3',"")

    return html
Пример #6
0
class KuKuModel(Model):
    
      def __init__(self,proprioception_input_length,sensory_input_length,reservoir_size):
            # Build the Reservoir
            tau = .1 # execution timestep for the cortical rate model
            sigma = .001 # intra-reservoir weights
            eps = .1 # learning rate
            som_size = 10*10
            self.sensory_input_length = sensory_input_length
            self.proprioception_input_length = proprioception_input_length
            
            full_reservoir_input_length = proprioception_input_length+som_size
            # Nodes: units, tau, method
            self.reservoir_input = esn.Node((full_reservoir_input_length,), 0, esn._load )
            self.reservoir = esn.Node((reservoir_size,), tau, esn._reservoir )
            self.reservoir_output = esn.Node((som_size,), 0, esn._load )
            
            # Arcs: target, source, weight, eps
            # input from som
            self.d_P  = esn.Arc( self.reservoir, self.reservoir_input, sigma, 0 )
            self.d_P.initConnections( numpy.random.randn, self.reservoir.shape+self.reservoir_input.shape ) # type of init numpy func
            #print d_P.connections

            # recurrent connections intra node
            self.r_P  = esn.Arc( self.reservoir, self.reservoir, sigma, 0 )
            self.r_P.initConnections( numpy.random.randn, self.reservoir.shape+self.reservoir.shape ) # type of init numpy func
            #print r_P.connections

            # input from som
            self.d_out  = esn.Arc( self.reservoir_output, self.reservoir, 0, eps )
            self.d_out.initConnections( numpy.random.randn, self.reservoir_output.shape+self.reservoir.shape ) # type of init numpy func
            #print d_out.connections
                      
            from minisom import MiniSom
            self.som = MiniSom(10, 10, sensory_input_length,sigma=0.3,learning_rate=0.1,normalize=True)
            
            self.previous_som_activation = numpy.zeros((10,10))
          

      def run(self,inp):
          self.som.train_single_instance(inp[:self.sensory_input_length])
          
          self.reservoir_input.update(numpy.append(self.previous_som_activation.flatten().copy(),inp[-self.proprioception_input_length:])) # 3
          self.reservoir.update(self.d_P.read())
          self.reservoir_output.update(self.d_out.read())
          print "error:",self.som.activation_map.flatten()  - self.reservoir_output.state
          self.d_out.learn(self.som.activation_map.flatten()  - self.reservoir_output.state )
          
          self.previous_som_activation = self.som.activation_map.flatten().copy()
Пример #7
0
    def make_treeview(self, data, liststore):
	#i = 0
	cols = self.columns[self.combobox.get_active()]
	#print type(cols)
	#print len(cols)
	for d in data:
	  #i += 1

	  tmp = d.tolist()
	  #print 'tmp', tmp
	  #while len(tmp) < cols:
	    #tmp.append(False)
	    #print 'tmp', tmp
	    #cols = cols - 1
	  Qe = MiniSom.quantization_error_subset(self.som,d,len(cols))
	  #print tmp
	  tmp.append(Qe)
	  tmp.append(4 * Qe ** 0.5)
	  liststore.append(tmp)

	treeview = gtk.TreeView(model=liststore)
	#i = 0
	for d in range(len(self.test_data[0])):
	  #print i
	  #i += 1
	  renderer_text = gtk.CellRendererText()
	  column_text = gtk.TreeViewColumn(self.pattern_labels[d], renderer_text, text=d)
	  treeview.append_column(column_text)
	column_text = gtk.TreeViewColumn('Qe', renderer_text, text=d+1)
	treeview.append_column(column_text)
	column_text = gtk.TreeViewColumn('NLT', renderer_text, text=d+2)
	treeview.append_column(column_text)

	return treeview
Пример #8
0
 def setUp(self):
     self.som = MiniSom(5, 5, 1)
     for w in self.som.weights:  # checking weights normalization
         assert_almost_equal(1.0, np.linalg.norm(w))
     self.som.weights = np.zeros((5, 5))  # fake weights
     self.som.weights[2, 3] = 5.0
     self.som.weights[1, 1] = 2.0
Пример #9
0
      def __init__(self,proprioception_input_length,sensory_input_length,reservoir_size):
            # Build the Reservoir
            tau = .1 # execution timestep for the cortical rate model
            sigma = .001 # intra-reservoir weights
            eps = .1 # learning rate
            som_size = 10*10
            self.sensory_input_length = sensory_input_length
            self.proprioception_input_length = proprioception_input_length
            
            full_reservoir_input_length = proprioception_input_length+som_size
            # Nodes: units, tau, method
            self.reservoir_input = esn.Node((full_reservoir_input_length,), 0, esn._load )
            self.reservoir = esn.Node((reservoir_size,), tau, esn._reservoir )
            self.reservoir_output = esn.Node((som_size,), 0, esn._load )
            
            # Arcs: target, source, weight, eps
            # input from som
            self.d_P  = esn.Arc( self.reservoir, self.reservoir_input, sigma, 0 )
            self.d_P.initConnections( numpy.random.randn, self.reservoir.shape+self.reservoir_input.shape ) # type of init numpy func
            #print d_P.connections

            # recurrent connections intra node
            self.r_P  = esn.Arc( self.reservoir, self.reservoir, sigma, 0 )
            self.r_P.initConnections( numpy.random.randn, self.reservoir.shape+self.reservoir.shape ) # type of init numpy func
            #print r_P.connections

            # input from som
            self.d_out  = esn.Arc( self.reservoir_output, self.reservoir, 0, eps )
            self.d_out.initConnections( numpy.random.randn, self.reservoir_output.shape+self.reservoir.shape ) # type of init numpy func
            #print d_out.connections
                      
            from minisom import MiniSom
            self.som = MiniSom(10, 10, sensory_input_length,sigma=0.3,learning_rate=0.1,normalize=True)
            
            self.previous_som_activation = numpy.zeros((10,10))
def SOM(data,leninput,lentarget):
    som = MiniSom(5,5,leninput,sigma=1.0,learning_rate=0.5)
    som.random_weights_init(data)
    print("Training...")
    som.train_batch(data,10000) # training with 10000 iterations
    print("\n...ready!")
    
    numpy.save('weight_som.txt',som.weights)
   
    bone()
    pcolor(som.distance_map().T) # distance map as background
    colorbar()
    
    t = zeros(lentarget,dtype=int)
    
    # use different colors and markers for each label
    markers = ['o','s','D']
    colors = ['r','g','b']
    outfile = open('cluster-result.csv','w')
    for cnt,xx in enumerate(data):
        w = som.winner(xx) # getting the winner
        #print cnt
        #print xx
        #print w
        
        for z in xx:
            outfile.write("%s " % str(z))
        outfile.write("%s-%s \n" % (str(w[0]),str(w[1])))
        
        #outfile.write("%s %s\n" % str(xx),str(w))
        # palce a marker on the winning position for the sample xx
        #plot(w[0]+.5,w[1]+.5,markers[t[cnt]],markerfacecolor='None',
        #     markeredgecolor=colors[t[cnt]],markersize=12,markeredgewidth=2)
    outfile.close()
Пример #11
0
def testSOMs():
    from sklearn import datasets
    from minisom import MiniSom

    d = datasets.load_iris()
    data = np.apply_along_axis(lambda x: x/np.linalg.norm(x), 1, d['data']) # data normalization

    som = MiniSom(7, 7, 4, sigma=1.0, learning_rate=0.5)

    som.random_weights_init(data)
    print("Training...")
    som.train_random(data, 1000) # random training
    print("\n...ready!")

    ### Plotting the response for each pattern in the iris dataset ###
    from pylab import plot,axis,show,pcolor,colorbar,bone
    bone()
    pcolor(som.distance_map().T) # plotting the distance map as background
    colorbar()
    t = d['target']
    # use different colors and markers for each label
    markers = ['o','s','D']
    colors = ['r','g','b']
    for cnt,xx in enumerate(data):
     w = som.winner(xx) # getting the winner
     # palce a marker on the winning position for the sample xx
     plot(w[0]+.5,w[1]+.5,markers[t[cnt]],markerfacecolor='None',
        markeredgecolor=colors[t[cnt]],markersize=12,markeredgewidth=2)
    axis([0,som.weights.shape[0],0,som.weights.shape[1]])
    show() # show the figure
def SOM(data,leninput,lentarget,alpha_som,omega_som):
    som = MiniSom(16,16,leninput,sigma=omega_som,learning_rate=alpha_som)
    som.random_weights_init(data)
    print("Training...")
    som.train_batch(data,20000) # training with 10000 iterations
    print("\n...ready!")
    
    numpy.save('weight_som',som.weights)
   
    bone()
    pcolor(som.distance_map().T) # distance map as background
    colorbar()
    
    t = zeros(lentarget,dtype=int)
    
    # use different colors and markers for each label
    markers = ['o','s','D']
    colors = ['r','g','b']
    outfile = open('cluster-result.csv','w')
    for cnt,xx in enumerate(data):
        w = som.winner(xx) # getting the winner
        
        
        for z in xx:
            outfile.write("%s " % str(z))
        outfile.write("%s-%s \n" % (str(w[0]),str(w[1])))
        
        
    outfile.close()
Пример #13
0
    def train_som(self):

        training_data = [v[0] for v in self.vectors]
        from minisom import MiniSom
        size = len(training_data[0])
        self.som = MiniSom(10, 10, size, sigma=0.3, learning_rate=0.5)
        print "Training SOM..."
        self.som.train_random(training_data, 100)
        print "...ready!"
Пример #14
0
    def init_som(self, widget=None, data=None):
      ##print self.data
      ### Initialization and training ###
      cols = self.columns[self.combobox.get_active()]
      data = self.data[:, 0:len(cols)]

      #print len(cols)
      self.som = MiniSom(self.width_spin_button.get_value_as_int(), self.height_spin_button.get_value_as_int(), len(cols),sigma=1.2,learning_rate=0.5)
#      self.som.weights_init_gliozzi(data)
      self.som.random_weights_init(data)
Пример #15
0
def train_som(data, offset=None):
    """
    offset: offset between points used for training
    """
    
    if offset:
        data = data[::offset, :]
    
    som = MiniSom(
        param['nr_rows'],
        param['nr_cols'], 
        data.shape[1], 
        data, 
        sigma=param['sigma'], 
        learning_rate=param['learning_rate'], 
        norm='minmax')
        
    #som.random_weights_init() # choose initial nodes from data points
    som.train_random(param['nr_epochs']) # random training
    
    return som
def test_som(alpha_som,omega_som):
    
    print "Clustering pada Data Uji " + str(no_uji)
    
    session_log_db = db.session_log
    allTopic = articles.distinct("topic")
    lentopic = len(allTopic)
    uniqueTopic = []
    for t in allTopic:
        uniqueTopic.append("Topik " + str(t).strip())
    
    lebarSOM = lentopic*lentopic + lentopic*2 + 1
    panjangSOM = session_log_db.find({"data_uji":no_uji}).count()
    #somInput = zeros((panjangSOM,lebarSOM),dtype=int16)
    somInput = []
    oriSess = []
    for s in session_log_db.find({"data_uji":no_uji}):
        somInput.append(getPresedenceMatrix(convertSession(s["session"],uniqueTopic),uniqueTopic,1))
        oriSess.append(s["session"])

    
    som = MiniSom(16,16,lentopic,sigma=omega_som,learning_rate=alpha_som)
    som.weights = numpy.load('weight_som.npy')
    #print som.weights
    outfile = open('cluster-result.csv','w')
    seq_number = 0
    cluster_mongo = db.cluster_result
    cluster_mongo.remove({"data_uji":no_uji})
    for cnt,xx in enumerate(somInput):
        w = som.winner(xx) # getting the winner
        outfile.write("%s " % str(("|".join(oriSess[seq_number]))))
        outfile.write("%s-%s \n" % (str(w[0]),str(w[1])))
        cluster_mongo.insert({"topik":"|".join(oriSess[seq_number]),"cluster":(str(w[0])+"-"+str(w[1])),"data_uji":no_uji})
        seq_number = seq_number + 1
    outfile.close()
    #TopikCluster()
    
    return "Berhasil Melakukan Clustering"
Пример #17
0
    def __init__(self, parent, controller):
     
        ## tk.Frame 초기화
        tk.Frame.__init__(self, parent)
        
        style.use("ggplot")

        self.figure = pl.figure(1)
        self.a = self.figure.add_subplot(111)

        self.canvas = FigureCanvasTkAgg(self.figure, self)
        self.canvas.get_tk_widget().grid(sticky="news")
        self.canvas._tkcanvas.grid(sticky="news")
        
        ## Initialization
        self.som = MiniSom(10,10,136,sigma=1.0,learning_rate=0.5)
Пример #18
0
    def update_treeview(self, data, liststore):
      	cols = len(self.columns[self.combobox.get_active()])

	for i, d in enumerate(data):

	  for j in range(len(d)):
	    #print j

	    liststore[i][j] = d[j]

	    if j >= cols:
	      liststore[i][j] = -999
	  Qe = MiniSom.quantization_error_subset(self.som,d,cols)

	  #print d, liststore[i]
	  liststore[i][-2]= Qe
	  liststore[i][-1]= 4 * Qe ** 0.5
Пример #19
0
    def _minisomrandom(self):
        """Clusters sentence vectors using minisomrandom algorithm
        
        Returns
        -------
        numpy ndarray
            codebook (weights) of the trained SOM
        """

        H = int(self.opts['size'])
        W = int(self.opts['size'])
        N = self.X.shape[1]
        som = MiniSom(H, W, N, sigma=1.0, random_seed=1)
        if self.opts['initialization']:
            som.random_weights_init(self.X)
        som.train_random(self.X, self.opts['niterations'])
        return som.get_weights()
import numpy as np
import pandas as pd

# Import the dataset
df = pd.read_csv("Credit_Card_Applications.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)

# Training the SOM
from minisom import MiniSom
som_model = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5)
som_model.random_weights_init(X)
som_model.train_random(data=X, num_iteration=200)

# Visualizing the results
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som_model.distance_map().T)
colorbar()
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X):
    w = som_model.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y[i]],
Пример #21
0
class SOM:



    def If_running(self):
      #print som.running
      self.play.set_sensitive(not self.som.running)
      return self.som.running

    def If_paused(self):
      #print som.running
      #self.pause.set_sensitive(self.som.running)
      return False

    def Status_update(self):
      if self.som.running:
	context_id = self.status_bar.get_context_id("Running")
	#print context_id
	text = "Iteration: " +  str(self.som.tick).zfill(len(str(self.som.ticks))) + "/" + str(self.som.ticks).zfill(len(str(self.som.ticks)))
	if self.som.paused:
	  text += ", Paused"
	self.status_bar.push(context_id, text)
	return True # we need it to keep updating if the model is running
      elif not self.som.running:
	if not self.som.paused:
	  self.status_bar.remove_all(self.status_bar.get_context_id("Running"))
	  self.status_bar.remove_all(self.status_bar.get_context_id("Ready"))
	  context_id = self.status_bar.get_context_id("Ready")
	  #print context_id
	  text = "Ready"
	  self.status_bar.push(context_id, text)
	return False

    #def Quit(self, widget, data=None):
      ##print 'Byez!'
      #gtk.main_quit()

    #def Pause(self, widget=None, data=None):
	#self.som.Pause()
	#if self.som.paused:
	  #self.pause.set_label("Unpause")
	#else:
	  #self.pause.set_label("Pause")
	  #glib.idle_add(self.som.Run)
	  #glib.idle_add(self.If_running)
	#glib.idle_add(self.Status_update)


    def open_file(self, file_name):
      try:
	  #cols = self.columns[self.combobox.get_active()]
	  #print cols
	  self.data = np.genfromtxt(file_name, delimiter=',',usecols=(self.visual_and_acoustic),skip_header=1)
	  self.pattern_labels = np.genfromtxt(file_name, delimiter=',',usecols=(self.visual_and_acoustic), skip_footer=14, dtype=str)
	  self.file_name = file_name

	  self.update_treeview(self.data, self.patterns_liststore)

	  #print self.data
      except:
	  print "File is probably not in the right format:", file_name
	  raise

    def select_file(self, widget=None, data=None):
      #response = self.dialog.run()
      #if response == gtk.RESPONSE_OK:
	#self.open_file(self.dialog.get_filename())

      #elif response == gtk.RESPONSE_CANCEL:
	#print 'Closed, no files selected'

      #self.dialog.destroy()

      dialog = gtk.FileChooserDialog("Open..", None, gtk.FILE_CHOOSER_ACTION_OPEN, (gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK))
      dialog.set_default_response(gtk.RESPONSE_OK)
      tmp = os.getcwd()
      tmp = 'file://' + tmp
      #print tmp
      #print dialog.set_current_folder_uri(tmp)
      #print dialog.get_current_folder_uri()
      filter = gtk.FileFilter()
      filter.set_name("All files")
      filter.add_pattern("*")
      dialog.add_filter(filter)

      filter = gtk.FileFilter()
      filter.set_name("Comma-separated values")

      filter.add_pattern("*.csv")
      dialog.add_filter(filter)
      dialog.set_filter(filter)

        #dialog = gtk.FileChooserDialog("Please choose a file", self,
            #gtk.FileChooserAction.OPEN,
            #(gtk.STOCK_CANCEL, gtk.ResponseType.CANCEL,
             #gtk.STOCK_OPEN, gtk.ResponseType.OK))


      response = dialog.run()
      if response == gtk.RESPONSE_OK:
	  #print("Open clicked")
	  #print("File selected: " + dialog.get_filename())
	  self.open_file(dialog.get_filename())
      #elif response == gtk.RESPONSE_CANCEL:
	  #print("Cancel clicked")

      dialog.destroy()

    def Run(self, widget=None, data=None):
      #self.som.ticks += self.iterations_spin_button.get_value_as_int()

      if not self.som.running:
	### Initialization and training ###
	#self.som = MiniSom(5, 15, 8,sigma=1.2,learning_rate=0.5)
	#self.init_som()
	for i in range(1):
	  self.train_som()
	  #self.figure.clf()
	  self.Draw_figure()
	  self.canvas.draw()
	  self.canvas.draw_idle()
	  #We need to draw *and* flush
	  self.figure.canvas.draw()
	  self.figure.canvas.flush_events()
	  #print "draw"

	  self.update_treeview(self.test_data, self.test_liststore)
	  self.update_treeview(self.data, self.patterns_liststore)



	  glib.idle_add(self.Status_update)
	  glib.idle_add(self.If_running)
	  glib.idle_add(self.If_paused)


    def Test(self, widget=None, data=None):
      #self.som.ticks += self.iterations_spin_button.get_value_as_int()

      if not self.som.running:
	### Initialization and training ###
	#self.som = MiniSom(5, 15, 8,sigma=1.2,learning_rate=0.5)
	self.test_som()
	#self.figure.clf()
	self.Draw_figure()
	self.canvas.draw()
	self.canvas.draw_idle()
	#We need to draw *and* flush
        self.figure.canvas.draw()
        self.figure.canvas.flush_events()
	#print "draw"

      glib.idle_add(self.Status_update)
      glib.idle_add(self.If_running)
      glib.idle_add(self.If_paused)


    def Reset(self, widget=None, data=None):
      self.init_som()
      self.Draw_figure()
      self.canvas.draw()
      self.canvas.draw_idle()
      #We need to draw *and* flush
      self.figure.canvas.draw()
      self.figure.canvas.flush_events()
      #print "draw"

      self.update_treeview(self.test_data, self.test_liststore)
      self.update_treeview(self.data, self.patterns_liststore)



      glib.idle_add(self.Status_update)
      glib.idle_add(self.If_running)
      glib.idle_add(self.If_paused)



    def delete_event(self, widget=None, event=None, data=None):
        # If you return FALSE in the "delete_event" signal handler,
        # GTK will emit the "destroy" signal. Returning TRUE means
        # you don't want the window to be destroyed.
        # This is useful for popping up 'are you sure you want to quit?'
        # type dialogs.
        #print "delete event occurred"

        # Change FALSE to TRUE and the main window will not be destroyed
        # with a "delete_event".
        return False

    #def on_key_event(self, event):
      #print('you pressed %s'%event.key)
      #key_press_handler(event, self.canvas, self.toolbar)

    def destroy(self, widget=None, data=None):
        #print "destroy signal occurred"
        gtk.main_quit()

    def Draw_figure(self):
  	self.axes.cla()   # Clear axis
	cols = self.columns[self.combobox.get_active()]
	data = self.data[:, 0:len(cols)]


	#ion()       # Turn on interactive mode.
	#hold(True) # Clear the plot before adding new data.


	#print som.distance_map().T
	#exit()
	bone()

	background = self.axes.pcolor(self.som.distance_map().T) # plotting the distance map as background
	#f.colorbar(a)
	t = np.zeros(len(self.target),dtype=int)
	t[self.target == 'A'] = 0
	t[self.target == 'B'] = 1
	t[self.target == 'C'] = 2
	t[self.target == 'D'] = 3

	# use different colors and markers for each label
	markers = ['o','s','D', '+']
	colors = ['r','g','b', 'y']
	for cnt,xx in enumerate(data):
	  w = self.som.winner(xx) # getting the winner
	  # place a marker on the winning position for the sample xx
	  tmp = self.axes.plot(w[0]+.5,w[1]+.5,markers[t[cnt]],markerfacecolor='None',
	      markeredgecolor=colors[t[cnt]],markersize=12,markeredgewidth=2)
	self.axes.axis([0,self.som.weights.shape[0],0,self.som.weights.shape[1]])
	#show() # show the figure
	#print "drawing"
	#self.figure.canvas.draw()



    def init_som(self, widget=None, data=None):
      ##print self.data
      ### Initialization and training ###
      cols = self.columns[self.combobox.get_active()]
      data = self.data[:, 0:len(cols)]

      #print len(cols)
      self.som = MiniSom(self.width_spin_button.get_value_as_int(), self.height_spin_button.get_value_as_int(), len(cols),sigma=1.2,learning_rate=0.5)
#      self.som.weights_init_gliozzi(data)
      self.som.random_weights_init(data)

    def train_som(self):
      cols = self.columns[self.combobox.get_active()]
      data = self.data[:, 0:len(cols)]
      print("Training...")
      #self.som.train_gliozzi(data) # Gliozzi et al training

      self.som.train_random(data,20)


      print("\n...ready!")

    def make_treeview(self, data, liststore):
	#i = 0
	cols = self.columns[self.combobox.get_active()]
	#print type(cols)
	#print len(cols)
	for d in data:
	  #i += 1

	  tmp = d.tolist()
	  #print 'tmp', tmp
	  #while len(tmp) < cols:
	    #tmp.append(False)
	    #print 'tmp', tmp
	    #cols = cols - 1
	  Qe = MiniSom.quantization_error_subset(self.som,d,len(cols))
	  #print tmp
	  tmp.append(Qe)
	  tmp.append(4 * Qe ** 0.5)
	  liststore.append(tmp)

	treeview = gtk.TreeView(model=liststore)
	#i = 0
	for d in range(len(self.test_data[0])):
	  #print i
	  #i += 1
	  renderer_text = gtk.CellRendererText()
	  column_text = gtk.TreeViewColumn(self.pattern_labels[d], renderer_text, text=d)
	  treeview.append_column(column_text)
	column_text = gtk.TreeViewColumn('Qe', renderer_text, text=d+1)
	treeview.append_column(column_text)
	column_text = gtk.TreeViewColumn('NLT', renderer_text, text=d+2)
	treeview.append_column(column_text)

	return treeview

    def update_treeview(self, data, liststore):
      	cols = len(self.columns[self.combobox.get_active()])

	for i, d in enumerate(data):

	  for j in range(len(d)):
	    #print j

	    liststore[i][j] = d[j]

	    if j >= cols:
	      liststore[i][j] = -999
	  Qe = MiniSom.quantization_error_subset(self.som,d,cols)

	  #print d, liststore[i]
	  liststore[i][-2]= Qe
	  liststore[i][-1]= 4 * Qe ** 0.5

    def select_columns(self, widget=None):
      #self.open_file(self.file_name)
      #self.init_som()
      self.update_treeview(self.test_data, self.test_liststore)
      self.update_treeview(self.data, self.patterns_liststore)


#----------------------------------------
# SAM added these functions here

    def pertSomWeights( self,  widget=None, data=None ):
        #if scale == None:
        scale = .5
        print( 'Adding noise to SOM weights')
        # print( self.som.weights )
        # print( self.som.weights.shape )
	pertAmount = scale*(np.random.random_sample( self.som.weights.shape)-.5)
        self.som.weights = self.som.weights + pertAmount
#	print self.som.weights
	self.Draw_figure()
	self.canvas.draw()
	self.canvas.draw_idle()
	#We need to draw *and* flush
	self.figure.canvas.draw()
	self.figure.canvas.flush_events()


    def pertInputs( self,  widget=None, data=None ):
        #if scale == None:
        p = .2
        print( 'Making %f prop of inputs 0.5' %p)
        #print( self.data.shape )
	
        # randomly get indices to switch, then replace
	noiseIndex = np.random.binomial(1,p, self.data.shape)  #ones at p proportion of samples
	self.data[noiseIndex ==1 ] = .5
	print( self.data )
	# update the treeview for the "Patterns" tab to see the result graphically 
	self.update_treeview(self.data, self.patterns_liststore)


#----------------------------------------
    def __init__(self):
      # create a new window
      self.window = gtk.Window(gtk.WINDOW_TOPLEVEL)
      # When the window is given the "delete_event" signal (this is given
      # by the window manager, usually by the "close" option, or on the
      # titlebar), we ask it to call the delete_event () function
      # as defined above. The data passed to the callback
      # function is NULL and is ignored in the callback function.
      self.window.connect("delete_event", self.delete_event)
      # Here we connect the "destroy" event to a signal handler.
      # This event occurs when we call gtk_widget_destroy() on the window,
      # or if we return FALSE in the "delete_event" callback.
      self.window.connect("destroy", self.destroy)

      #window.set_icon_from_file(get_resource_path("icon.png"))
      #window.connect("delete-event", Quit)
      #window.connect("destroy", Quit)
      self.window.set_title("SOM model")
      self.window.set_default_size(500, 500) #this si to ensure the window is always the smallest it can be
      #self.window.set_resizable(False)
      #window.set_border_width(10)

      # Args are: homogeneous, spacing, expand, fill, padding
      homogeneous = False
      spacing = 0
      expand = False
      fill = False
      padding = 10

      self.hbox = gtk.HBox(homogeneous, spacing)
      self.vbox = gtk.VBox(homogeneous, spacing)
      self.window.add(self.vbox)


      #self.adjustment = gtk.Adjustment(value=10000, lower=1, upper=100000000, step_incr=1000, page_incr=10000)
      #self.iterations_spin_button = gtk.SpinButton(self.adjustment, climb_rate=0, digits=0)
      self.label = gtk.Label("Dimensions:")

      self.adjustment = gtk.Adjustment(value=5, lower=1, upper=100, step_incr=2, page_incr=5)
      self.width_spin_button = gtk.SpinButton(self.adjustment, climb_rate=0, digits=0)
      self.adjustment = gtk.Adjustment(value=10, lower=1, upper=100, step_incr=2, page_incr=5)
      self.height_spin_button = gtk.SpinButton(self.adjustment, climb_rate=0, digits=0)


      # Create a series of buttons with the appropriate settings

      image = gtk.Image()
      #  (from http://www.pygtk.org/docs/pygtk/gtk-stock-items.html)
      image.set_from_stock(gtk.STOCK_EXECUTE, 1)
      self.play = gtk.Button()
      self.play.set_image(image)
      self.play.set_label("Train")

      #image = gtk.Image()
      ##  (from http://www.pygtk.org/docs/pygtk/gtk-stock-items.html)
      #image.set_from_stock(gtk.STOCK_APPLY, 1)
      #self.test = gtk.Button()
      #self.test.set_image(image)
      #self.test.set_label("Test")

      image = gtk.Image()
      #  (from http://www.pygtk.org/docs/pygtk/gtk-stock-items.html)
      image.set_from_stock(gtk.STOCK_OPEN, 1)
      self.open = gtk.Button()
      self.open.set_image(image)
      self.open.set_label("Open patterns")

      #self.pause = gtk.Button(stock = gtk.STOCK_MEDIA_PAUSE)

      image = gtk.Image()
      image.set_from_stock(gtk.STOCK_REFRESH, 1)
      self.reset = gtk.Button()
      self.reset.set_image(image)
      self.reset.set_label("Reset")

      self.play.connect("clicked", self.Run, None)
      #self.test.connect("clicked", self.Test, None)
      self.open.connect("clicked", self.select_file, None)

      #self.pause.connect("clicked", self.Pause, None)
      self.reset.connect("clicked", self.Reset, None)
      self.height_spin_button.connect("value-changed", self.Reset, "Height changed")
      self.width_spin_button.connect("value-changed", self.Reset, "Width changed")

      # add perturb button to disturb trained som weights
      self.perturb = gtk.Button("Perturb SOM") # create gtk button to perturb som weights
      self.perturb.connect( "clicked", self.pertSomWeights, None ) # run self.pertSomWeights
      self.perturb.show() # tell GTK to show button, but not where
       
      # add button to add noisy encoding to training inputs
      self.perturbInputButton = gtk.Button("Perturb Inputs") # create gtk button to perturb som weights
      self.perturbInputButton.connect( "clicked", self.pertInputs, None ) # run self.pertSomWeights
      self.perturbInputButton.show() # tell GTK to show button, but not where
	


      #self.width_spin_button.connect("value_changed", self.init_som)
      #self.height_spin_button.connect("value_changed", self.init_som)

      #self.som = Environment(width = self.width_spin_button.get_value_as_int(), height = self.height_spin_button.get_value_as_int())
      #self.som.show()
      #self.pause.set_sensitive(self.som.paused)
      #self.vbox.pack_start(self.som, True, True, 0)
      #file_names =  #  ['stimuli.csv']

      allFileName = '4750.csv' #'stimuli.csv'	
      self.file_name =  allFileName  #'4749.csv' # 'stimuli.csv' # file_names[0]
      self.test_file_name = allFileName #'4749.csv' # 'stimuli.csv'

      self.visual_only = [0,1,2,3,4,5,6,7]
      self.visual_and_acoustic = [0,1,2,3,4,5,6,7,8]
      self.columns = [self.visual_only, self.visual_and_acoustic]

      
      #f = Figure(figsize=(5,4), dpi=100)
      #a = f.add_subplot(111)
      self.combobox = gtk.combo_box_new_text()
      self.combobox.append_text('Visual only')
      self.combobox.append_text('Visual and acoustic')
      self.test_data = np.genfromtxt(self.test_file_name, delimiter=',',usecols=(self.visual_and_acoustic),skip_header=1)
      self.test_data +=  -.5 #0.00001



      self.test_data = np.apply_along_axis(lambda x: x/np.linalg.norm(x),1,self.test_data) # data normalization

      self.target = np.genfromtxt(self.file_name,delimiter=',',usecols=(9),dtype=str,skip_header=1) # loading the labels for use in the figure
      self.combobox.set_active(1)
      self.combobox.connect('changed', self.Reset)
      #cols = self.columns[self.combobox.get_active()]
      #print cols
      self.data = np.genfromtxt(self.file_name, delimiter=',',usecols=(self.visual_and_acoustic),skip_header=1)
      self.data += -.5  #0.00001
      self.data = np.apply_along_axis(lambda x: x/np.linalg.norm(x),1,self.data) # data normalization

      #self.pattern_labels = np.genfromtxt(self.file_name, delimiter=',',usecols=(self.visual_and_acoustic), skip_footer=14, dtype=str)
      self.pattern_labels = np.genfromtxt(self.file_name, delimiter=',',usecols=(self.visual_and_acoustic), dtype=str)[0]


      #print self.pattern_labels
      self.init_som()
      #self.toolbar = NavigationToolbar(self.canvas, self.window)
      #self.vbox.pack_start(self.toolbar, False, False)
      #self.vbox.pack_start(self.canvas)
      self.test_liststore = gtk.ListStore(float, float, float, float, float, float, float, float, float, float, float)
      self.patterns_liststore = gtk.ListStore(float, float, float, float, float, float, float, float, float, float, float)

      self.test_treeview = self.make_treeview(self.test_data, self.test_liststore)
      self.patterns_treeview = self.make_treeview(self.data, self.patterns_liststore)
      #self.data = np.genfromtxt(self.file_name, delimiter=',',usecols=(0,1,2,3,4,5,6,7),skip_header=1)
      #self.pattern_labels = np.genfromtxt(self.file_name, delimiter=',',usecols=(0,1,2,3,4,5,6,7), skip_footer=8, dtype=str)
      ##self.data = np.apply_along_axis(lambda x: x/np.linalg.norm(x),1,self.data) # data normalization





      self.figure, self.axes= plt.subplots()

      # Create canvas.
      self.canvas = FigureCanvas(self.figure)  # a gtk.DrawingArea
      self.canvas.set_size_request(300, 400)
      self.Draw_figure()





      self.notebook = gtk.Notebook()
      self.notebook.set_tab_pos(gtk.POS_TOP)
      self.vbox.pack_start(self.notebook)

      label = gtk.Label("Distance map")
      self.notebook.append_page(self.canvas, label)
      label = gtk.Label("Patterns")
      self.notebook.append_page(self.patterns_treeview, label)
      label = gtk.Label("Testing")
      #hbox = gtk.HBox(homogeneous, spacing)

      self.notebook.append_page(self.test_treeview, label)
      #hbox.pack_start(test_treeview, expand, fill, 0)
      #hbox.pack_start(test_treeview, expand, fill, 0)


      self.patterns_treeview.show()
      self.test_treeview.show()


      self.canvas.draw_idle()
      self.canvas.show()
      self.figure.canvas.draw()

      self.vbox.pack_start(self.hbox, expand, fill, 10)
      self.status_bar = gtk.Statusbar()
      self.vbox.pack_start(self.status_bar, expand, fill, 0)
      self.status_bar.show()
      glib.idle_add(self.Status_update)
      self.hbox.show()
      self.vbox.show()
      self.play.show()
      #self.test.show()
      self.open.show()

      #self.pause.show()
      self.reset.show()
      #self.iterations_spin_button.show()
      self.width_spin_button.show()
      self.height_spin_button.show()



      self.hbox.pack_start(self.play, expand, fill, padding)
      #self.hbox.pack_start(self.test, expand, fill, padding)
      self.hbox.pack_start(self.open, expand, fill, padding)
      self.hbox.pack_start(self.combobox, expand, fill, padding)
      #self.hbox.pack_start(self.pause, expand, fill, 0)
      self.hbox.pack_start(self.reset, expand, fill, padding)
      #self.hbox.pack_start(self.iterations_spin_button, expand, fill, 0)
      self.hbox.pack_start(self.label, expand, fill, padding)

      self.hbox.pack_start(self.width_spin_button, expand, fill, padding)
      self.hbox.pack_start(self.height_spin_button, expand, fill, 0)
      self.hbox.pack_start( self.perturb, expand, fill, padding)
      self.hbox.pack_start( self.perturbInputButton, expand, fill, padding)

	


      #self.quit = gtk.Button("Quit")
      self.quit = gtk.Button(stock = gtk.STOCK_QUIT)
      self.combobox.connect('changed', self.select_columns)

      self.quit.connect("clicked", self.destroy, None)
      self.hbox.pack_end(self.quit, expand, fill, padding)
      self.quit.show()
      #print window.get_size()





      self.window.show_all()



      self.window.present()
      #gtk.main()
      # And of course, our main loop.
      #gtk.main()
      # Control returns here when main_quit() is called


      return None

    def main(self):

    # All PyGTK applications must have a gtk.main(). Control ends here
    # and waits for an event to occur (like a key press or mouse event).
      gtk.main()
Пример #22
0
import numpy as np
import pandas as pd

dataset = pd.read_csv('Credit_Card_Applications.csv')
X = dataset.iloc[:, 1:-1].values
Y = dataset.iloc[:, -1].values

from sklearn.preprocessing import MinMaxScaler
Normalizer = MinMaxScaler(feature_range=(0, 1))
X = Normalizer.fit_transform(X)

#Training the SOM
from minisom import MiniSom

som = MiniSom(x=15, y=15, sigma=0.8, learning_rate=0.5, input_len=14)

som.random_weights_init(X)

som.train_random(data=X, num_iteration=200)

#Visualizing the Results
from pylab import bone, pcolor, colorbar, plot, show

bone()
pcolor(som.distance_map().T)
colorbar()
for i, j in enumerate(X):
    if Y[i] == 1:
        W_node = som.winner(j)
        plot(W_node[0] + 0.5,
Пример #23
0
from pylab import imread,imshow,figure,show,subplot,title
from numpy import reshape,flipud,unravel_index,zeros
from minisom import MiniSom

# read the image
img = imread('tree.jpg')

# reshaping the pixels matrix
pixels = reshape(img,(img.shape[0]*img.shape[1],3))

# SOM initialization and training
print('training...')
som = MiniSom(3,3,3,sigma=0.1,learning_rate=0.2) # 3x3 = 9 final colors
som.random_weights_init(pixels)
starting_weights = som.weights.copy() # saving the starting weights
som.train_random(pixels,100)

print('quantization...')
qnt = som.quantization(pixels) # quantize each pixels of the image
print('building new image...')
clustered = zeros(img.shape)
for i,q in enumerate(qnt): # place the quantized values into a new image
	clustered[unravel_index(i,dims=(img.shape[0],img.shape[1]))] = q
print('done.')

# show the result
figure(1)
subplot(221)
title('original')
imshow(flipud(img))
subplot(222)
Пример #24
0
 def test_random_weights_init(self):
     som = MiniSom(2, 2, 2, random_seed=1)
     som.random_weights_init(array([[1.0, .0]]))
     for w in som._weights:
         assert_array_equal(w[0], array([1.0, .0]))
Пример #25
0
 def test_distance_map(self):
     som = MiniSom(2, 2, 2, random_seed=1)
     som._weights = array([[[1., 0.], [0., 1.]], [[1., 0.], [0., 1.]]])
     assert_array_equal(som.distance_map(), array([[1., 1.], [1., 1.]]))
Пример #26
0
def create_som():
    som = MiniSom(20, 20, 64, sigma=1.5, learning_rate=0.5)
    return som
Пример #27
0
 def test_unavailable_neigh_function(self):
     with self.assertRaises(ValueError):
         MiniSom(5, 5, 1, neighborhood_function='boooom')
Пример #28
0
class ContextualSom:
    def __init__(self, corpus):
        self._corpus = corpus
        self._tokens = []
        self._token_to_vector = {}
        self._token_to_avg_vector = {}
        self._som = None
        self._all_labels = [
            "noun", "verb", "closed_class", "quantifier", "classifier",
            "adjective", "adverb", "interjection", "unknown"
        ]

    def _average_vector(self, token):
        before = np.zeros(100)
        after = np.zeros(100)
        before_count = 0
        after_count = 0

        # Sweep a window through processed corpus
        # Calculate the average of all the vectors appearing before and after the token
        for i in range(len(self._tokens)):
            if self._tokens[i] == token:
                if i > 0 and self._tokens[i - 1] in self._token_to_vector:
                    before += self._token_to_vector[self._tokens[i - 1]]
                    before_count += 1
                if i < len(self._tokens) - 2 and self._tokens[
                        i + 1] in self._token_to_vector:
                    after += self._token_to_vector[self._tokens[i + 1]]
                    after_count += 1

        if before_count != 0:
            before = before / before_count
        if after_count != 0:
            after = after / after_count

        return normalize(np.concatenate([before, after]))

    @staticmethod
    def _get_category(pos):
        if pos in ["NNG", "NNP"]:
            return "noun"
        if pos == "VV":
            return "verb"
        if pos in ["VA"]:
            return "adjective"
        if pos in ["NR", "SN"]:
            return "quantifier"
        if pos == "NNBC":
            return "classifier"
        if pos == "MAG":
            return "adverb"
        if pos == "IC":
            return "interjection"
        if pos == "UNKNOWN":
            return "unknown"

        return "closed_class"

    @staticmethod
    def _get_colour(category):
        map_ = {
            "noun": "yellow",
            "verb": "blue",
            "closed_class": "red",
            "quantifier": "pink",
            "classifier": "cyan",
            "adjective": "green",
            "adverb": "orange",
            "interjection": "purple",
            "unknown": "gray"
        }

        return map_[category]

    def preprocess(self):
        mecab = Mecab()  # Parts-of-speech tagger
        token_pos = mecab.pos(self._corpus)

        # Mecab sometimes returns multiple POS tags for a token; we take the first one for simplicity
        self._tokens = [(token, pos.split("+")[0]) for token, pos in token_pos]

        counter = Counter(self._tokens)
        counter = {token: count for token, count in counter.most_common(500)}

        # Assign random vectors to each token
        self._token_to_vector = {
            token: normalize(np.random.normal(size=100))
            for token in counter
        }
        self._token_to_avg_vector = {
            token: self._average_vector(token)
            for token in counter
        }

    def train(self, x, y, epochs, verbose=False, **kwargs):
        som_input = np.asarray(list(self._token_to_avg_vector.values()))

        # All hyperparameters from Zhao, Li, et al., 2011
        self._som = MiniSom(x, y, som_input.shape[1], **kwargs)
        self._som.train(som_input, epochs, verbose=verbose)

    def scores(self):
        positions = []
        labels = []

        for token, v in self._token_to_avg_vector.items():
            labels.append(self._get_category(token[1]))
            positions.append(self._som.winner(v))

        positions = np.asarray(positions)
        labels = np.asarray(labels)
        label_ind = np.asarray([self._all_labels.index(l) for l in labels])

        predictions = []
        for ind, p in enumerate(positions):
            knn = KNeighborsClassifier(n_neighbors=5)
            knn.fit(np.delete(positions, ind, axis=0),
                    np.delete(label_ind, ind, axis=0))
            predictions.append(knn.predict([p])[0])

        predictions = np.asarray(predictions)

        scores = {}

        for label in range(len(self._all_labels)):
            cat_labels = label_ind[label_ind == label]
            cat_predictions = predictions[label_ind == label]

            correct = cat_labels == cat_predictions
            correct_percentage = correct.sum() / len(cat_labels)

            scores[self._all_labels[label]] = correct_percentage

        return scores
Пример #29
0
def create_som(data,
               labels,
               one_hots,
               filename_load_weights,
               filename_save_weights,
               load_weights=False,
               num_iteration=100,
               plot_data=False,
               plot_labels=False,
               save_plot=False,
               plot_distance_map=False,
               show_activations=False,
               show_single_chars=False,
               filename_plots='unspecified.png'):
    assert len(data) == len(labels)

    size = int(np.ceil(np.sqrt(len(data))))
    input_len = len(data[0])
    # Initialization and training
    som = MiniSom(x=size,
                  y=size,
                  input_len=input_len,
                  model=rnn,
                  sigma=1.0,
                  learning_rate=0.5)
    if load_weights:
        som.load_weights(filename=filename_load_weights)
    else:
        som.random_weights_init(data)
        print("Training...")
        som.train_random(data, num_iteration=num_iteration)  # random training
        print("\n...ready!")
        som.save_weights(filename=filename_save_weights)

    print("beginn mapping vectors")

    # Plotting the response for each pattern in the data set
    if plot_distance_map:
        plt.bone()
        plt.pcolor(
            som.distance_map().T)  # plotting the distance map as background
        plt.colorbar()
    else:
        plt.figure(figsize=(size, size))

    for i, data_point in enumerate(data):
        w = som.winner(data_point)  # getting the winner
        if plot_data:
            # place a string of the vector on the winning position for the sample
            plt.text(x=w[0],
                     y=w[1] + np.random.rand() * 0.9,
                     s=str(data_point),
                     size='small',
                     color='r')

        if plot_labels:
            #place the string of the label on the winning position for the sample
            plt.text(x=w[0] + 0.75,
                     y=w[1] + np.random.rand() * 0.9,
                     s=labels[i],
                     size='small',
                     color='b')

    #add axis
    plt.axis([0, size, 0, size])

    #save if specified
    if save_plot:
        plt.savefig('../RNN/SOM_graphics/{}.png'.format(filename_plots))
    plt.show()

    if show_activations:
        for i in range(len(one_hots)):
            plt.bone()
            plt.pcolor(som.activation_map(
                one_hots[i]))  # plotting the distance map as background
            plt.colorbar()
            plt.title('vec_{}'.format(one_hots[i]))
            plt.show()

    if show_single_chars:
        unique_labels = np.unique(labels)
        for unique_label in unique_labels:
            #plt.figure(figsize=(size, size))
            plt.bone()
            plt.pcolor(som.distance_map().T
                       )  # plotting the distance map as background
            plt.colorbar()
            for i, data_point in enumerate(data):
                if unique_label == labels[i]:
                    w = som.winner(data_point)  # getting the winner
                    plt.text(x=w[0] + 0.75,
                             y=w[1] + np.random.rand() * 0.9,
                             s=labels[i],
                             size='small',
                             color='r')
                    #plot the vectors
                    plt.text(x=w[0],
                             y=w[1] + np.random.rand() * 0.9,
                             s=str(data_point),
                             size='small',
                             color='b')
            # add axis
            plt.axis([0, size, 0, size])
            plt.show()
Пример #30
0
    def train(self, x, y, epochs, verbose=False, **kwargs):
        som_input = np.asarray(list(self._token_to_avg_vector.values()))

        # All hyperparameters from Zhao, Li, et al., 2011
        self._som = MiniSom(x, y, som_input.shape[1], **kwargs)
        self._som.train(som_input, epochs, verbose=verbose)
Пример #31
0
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('Credit_Card_Applications.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
X = sc.fit_transform(X)

# Training the SOM
from minisom import MiniSom
som = MiniSom(x = 10, y = 10, input_len = 15, sigma = 1.0, learning_rate = 0.5)
som.random_weights_init(X)
som.train_random(data = X, num_iteration = 100)

# Visualizing the results
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y[i]],
Пример #32
0
]

data_list = list(
    map(lambda path: pickle.load(open(path, 'rb')), data_path_list))

# concatenate the data vertically
dataset = np.concatenate(data_list, axis=0)

# Without the Timestamp ###############################
X = dataset.reshape(
    (
        len(dataset),
        200,
    )
)  # Change 200 to 201 depending on whether the data has timestamp (pre-flattened with test_radar_data_labeler)
som = MiniSom(x=100, y=100, input_len=200, sigma=1.0, learning_rate=0.5)

# With the Timestamp ##########################
# min-max normalize the timestamp,
# X = dataset.reshape((len(dataset), 201,))  # Change 200 to 201 depending on whether the data has timestamp (pre-flattened with test_radar_data_labeler)
# timestamp_scaler = MinMaxScaler(feature_range=(0, 1))
# timestamp_col_scaled = timestamp_scaler.fit_transform(X[:, 0].reshape(-1, 1))
# X[:, 0] = timestamp_col_scaled.reshape((len(timestamp_col_scaled)))
# som = MiniSom(x=50, y=50, input_len=201, sigma=1.0, learning_rate=0.5)

# som.random_weights_init(X)
som.train_random(data=X, num_iteration=1000)

# visualize results
label_path = 'F:\config_detection\labels/labeled_onNotOn_080719.csv'
label_array = pd.read_csv(label_path).values[:, 1:]
testFeaturesPath = '../Feature_Vectors/normalised_features_match.pickle'
TestFeatures = pickle.load(open(testFeaturesPath, 'rb'))

#List of all features in training data
Features = []
for val in DataFeatures.values():
    Features.append(val)

#(x,y) -- size of output grid for SOM
x = 5
y = 5
#Number of iterations to run
iteration = input("Input number of iterations: ")

#Create a SOM
som = MiniSom(x, y, 20, sigma=0.3, learning_rate=0.5)
print "Training..."
som.train_random(Features, iteration)  # trains the SOM with 100 iterations
print "...ready!"

#Map the output neuron position to a unique cluster id. (0,0) --> 0, (0,1) --> 1 and so on.
feature_map = {}
k = 0

for i in range(x):
    for j in range(y):
        feature_map[(i, j)] = k
        k += 1

#Open a csv file to write the attribute name and its corresponding cluster id
#print 'attribute			Spatial Position'
Пример #34
0
    plt.pcolor(som.distance_map().T)

    return fig, ax

RS = 20160101

if __name__ == '__main__':
    args = _parse_file_argument()
    data = pd.read_csv(args.csv)
    data.fillna(0, inplace=True)

    label_column = args.label_prefix
    label_prefix = data[label_column].values
    data.drop(label_column, axis=1, inplace=True)

    label_column = args.label_sufix
    label_sufix = data[label_column].values
    data.drop(label_column, axis=1, inplace=True)

    id_column = 'id'
    data.drop(id_column, axis=1, inplace=True)

    som = MiniSom(8,8,len(data.columns),sigma=1.0,learning_rate=0.5,random_seed=RS)
    som.random_weights_init(data.as_matrix())

    som.train_random(data.as_matrix(),100)

    _plot_distribution(som)
    plt.savefig('som.png', dpi=120)
Пример #35
0
class TestMinisom(unittest.TestCase):
    def setUp(self):
        self.som = MiniSom(5, 5, 1)
        for i in range(5):
            for j in range(5):
                # checking weights normalization
                assert_almost_equal(1.0, linalg.norm(self.som._weights[i, j]))
        self.som._weights = zeros((5, 5, 1))  # fake weights
        self.som._weights[2, 3] = 5.0
        self.som._weights[1, 1] = 2.0

    def test_decay_function(self):
        assert self.som._decay_function(1., 2.,
                                        3.) == 1. / (1. + 2. / (3. / 2))

    def test_fast_norm(self):
        assert fast_norm(array([1, 3])) == sqrt(1 + 9)

    def test_check_input_len(self):
        with self.assertRaises(ValueError):
            self.som.train_batch([[1, 2]], 1)

        with self.assertRaises(ValueError):
            self.som.random_weights_init(array([[1, 2]]))

        with self.assertRaises(ValueError):
            self.som._check_input_len(array([[1, 2]]))

        self.som._check_input_len(array([[1]]))
        self.som._check_input_len([[1]])

    def test_unavailable_neigh_function(self):
        with self.assertRaises(ValueError):
            MiniSom(5, 5, 1, neighborhood_function='boooom')

    def test_gaussian(self):
        bell = self.som._gaussian((2, 2), 1)
        assert bell.max() == 1.0
        assert bell.argmax() == 12  # unravel(12) = (2,2)

    def test_mexican_hat(self):
        bell = self.som._mexican_hat((2, 2), 1)
        assert bell.max() == 1.0
        assert bell.argmax() == 12  # unravel(12) = (2,2)

    def test_bubble(self):
        bubble = self.som._bubble((2, 2), 1)
        assert bubble[2, 2] == 1
        assert sum(sum(bubble)) == 1

    def test_triangle(self):
        bubble = self.som._triangle((2, 2), 1)
        assert bubble[2, 2] == 1
        assert sum(sum(bubble)) == 1

    def test_win_map(self):
        winners = self.som.win_map([[5.0], [2.0]])
        assert winners[(2, 3)][0] == [5.0]
        assert winners[(1, 1)][0] == [2.0]

    def test_labels_map(self):
        labels_map = self.som.labels_map([[5.0], [2.0]], ['a', 'b'])
        assert labels_map[(2, 3)]['a'] == 1
        assert labels_map[(1, 1)]['b'] == 1
        with self.assertRaises(ValueError):
            self.som.labels_map([[5.0]], ['a', 'b'])

    def test_activation_reponse(self):
        response = self.som.activation_response([[5.0], [2.0]])
        assert response[2, 3] == 1
        assert response[1, 1] == 1

    def test_activate(self):
        assert self.som.activate(5.0).argmin() == 13.0  # unravel(13) = (2,3)

    def test_quantization_error(self):
        assert self.som.quantization_error([[5], [2]]) == 0.0
        assert self.som.quantization_error([[4], [1]]) == 1.0

    def test_quantization(self):
        q = self.som.quantization(array([[4], [2]]))
        assert q[0] == 5.0
        assert q[1] == 2.0

    def test_random_seed(self):
        som1 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
        som2 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
        # same initialization
        assert_array_almost_equal(som1._weights, som2._weights)
        data = random.rand(100, 2)
        som1 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
        som1.train_random(data, 10)
        som2 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
        som2.train_random(data, 10)
        # same state after training
        assert_array_almost_equal(som1._weights, som2._weights)

    def test_train_batch(self):
        som = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
        data = array([[4, 2], [3, 1]])
        q1 = som.quantization_error(data)
        som.train_batch(data, 10)
        assert q1 > som.quantization_error(data)

        data = array([[1, 5], [6, 7]])
        q1 = som.quantization_error(data)
        som.train_batch(data, 10, verbose=True)
        assert q1 > som.quantization_error(data)

    def test_train_random(self):
        som = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
        data = array([[4, 2], [3, 1]])
        q1 = som.quantization_error(data)
        som.train_random(data, 10)
        assert q1 > som.quantization_error(data)

        data = array([[1, 5], [6, 7]])
        q1 = som.quantization_error(data)
        som.train_random(data, 10, verbose=True)
        assert q1 > som.quantization_error(data)

    def test_random_weights_init(self):
        som = MiniSom(2, 2, 2, random_seed=1)
        som.random_weights_init(array([[1.0, .0]]))
        for w in som._weights:
            assert_array_equal(w[0], array([1.0, .0]))

    def test_pca_weights_init(self):
        som = MiniSom(2, 2, 2)
        som.pca_weights_init(array([[1., 0.], [0., 1.], [1., 0.], [0., 1.]]))
        expected = array([[[0., -1.41421356], [-1.41421356, 0.]],
                          [[1.41421356, 0.], [0., 1.41421356]]])
        assert_array_almost_equal(som._weights, expected)

    def test_distance_map(self):
        som = MiniSom(2, 2, 2, random_seed=1)
        som._weights = array([[[1., 0.], [0., 1.]], [[1., 0.], [0., 1.]]])
        assert_array_equal(som.distance_map(), array([[1., 1.], [1., 1.]]))

    def test_pickling(self):
        with open('som.p', 'wb') as outfile:
            pickle.dump(self.som, outfile)
        with open('som.p', 'rb') as infile:
            pickle.load(infile)
        os.remove('som.p')
Пример #36
0
from minisom import MiniSom 
from numpy import genfromtxt,zeros,apply_along_axis,linalg
import matplotlib.pyplot as plt 
sample='K562.tab'
from collections import OrderedDict
data = genfromtxt(sample,delimiter='\t',usecols=(4,5,6,7,8,9,10,11))
data = apply_along_axis(lambda x: x/linalg.norm(x),1,data) # data normalization
       
som = MiniSom(20,20,8,sigma=1.0,learning_rate=0.5,random_seed=1234)
som.train_random(data,int(1.5*len(data))) # random training

### Plotting the response for each pattern in the iris dataset ###
import pylab
from pylab import plot,axis,show,pcolor,colorbar,bone
bone()
pcolor(som.distance_map().T) # plotting the distance map as background
colorbar()
target = genfromtxt(sample,delimiter='\t',usecols=(3),dtype=str) # loading the labels
t = zeros(len(target),dtype=int)
t[target == 'insulator'] = 0
t[target == 'gene_body'] = 1
t[target == 'active_promoter'] = 2
t[target == 'enhancer'] = 3
t[target == 'poised_promoter'] = 4
# use different colors and markers for each label
markers = ['o','s','D','v','^']
colors = ['r','g','b','c','m']
plotTitle = 'K562'
pngName  = 'K562.png'
tags=[] #save the winner for each step
for cnt,xx in enumerate(data):
Пример #37
0
 def test_random_seed(self):
     som1 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
     som2 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
     # same initialization
     assert_array_almost_equal(som1._weights, som2._weights)
     data = random.rand(100, 2)
     som1 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
     som1.train_random(data, 10)
     som2 = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1)
     som2.train_random(data, 10)
     # same state after training
     assert_array_almost_equal(som1._weights, som2._weights)
class SOMDiscretizer(Discretizer):

    def __init__(self, width=4, height=4, sigma=0.3, learning_rate=0.5):
        self.width = width
        self.height = height
        self.sigma = sigma
        self.learning_rate = learning_rate

    def train(self, data):
        self.som = MiniSom(self.width, self.height, len(data[0]), sigma=self.sigma, learning_rate=self.learning_rate)
        self.som.train_random(data, 1000000)

    def discretize(self, data_point):
        x, y = self.som.winner(data_point)
        return self.som.weights[x,y]

    def visualize(self, filename, subset=None):
        box_side = 150
        border = 30

        text_height = 10
        text_offset_x = 60
        text_offset_y = 30

        w = (self.width * box_side) + ((self.width-1) * border)
        h = (self.height * box_side) + ((self.height-1) * border)
        img = Image.new('RGB', (w, h))

        draw = ImageDraw.Draw(img)
        for i in range(self.width):
            for j in range(self.height):
                offset = np.array([
                    i*(box_side + border), j*(box_side + border),
                    (i+1)*(box_side + border), (j+1)*(box_side + border)
                ])

                def coords(arr, offset):
                    a = arr + offset
                    return [ (a[0], a[1]), (a[2], a[3]) ]

                def dimension_subset(vector, subset):
                    if subset is not None:
                        return vector[subset[0]:subset[1]+1]
                    return vector

                # Draw the prototype vector box
                box_position = coords(np.array([ 
                    0, 0,
                    box_side, box_side
                ]), offset)

                prototype_vector = dimension_subset(self.som.weights[i, j], subset)
                fill = int(self.norm_data_vector(prototype_vector) * 200) + 55

                draw.rectangle(box_position, fill=(0, fill, 0))

                # Write the prototype vector as text
                text_position = box_position[0]
                line_no = 0
                for value in prototype_vector:
                    rounded_value = round(value * 100) / 100
                    base_x, base_y = box_position[0]
                    text_position = (base_x + text_offset_x, base_y + text_offset_y + text_height*line_no)
                    draw.text(text_position, str(rounded_value))
                    line_no += 1

                right_fill, bottom_fill, diagonal_fill = 0, 0, 0

                # Draw right border of U-matrix
                if i != self.width - 1:
                    right_border_position = coords(np.array([ 
                        box_side+1, 0,
                        box_side+1+border, box_side
                    ]), offset)

                    prototype_vector_a = dimension_subset(self.som.weights[i, j], subset)
                    prototype_vector_b = dimension_subset(self.som.weights[i+1, j], subset)
                    right_fill = 255 - int(self.data_vector_difference(prototype_vector_a, prototype_vector_b) * 255)

                    draw.rectangle(right_border_position, fill=(right_fill, right_fill, right_fill))

                # Draw bottom border of U-matrix
                if j != self.height - 1:
                    bottom_border_position = coords(np.array([
                        0, box_side+1,
                        box_side, box_side+1+border
                    ]), offset)

                    prototype_vector_a = dimension_subset(self.som.weights[i, j], subset)
                    prototype_vector_b = dimension_subset(self.som.weights[i, j+1], subset)
                    bottom_fill = 255 - int(self.data_vector_difference(prototype_vector_a, prototype_vector_b) * 255)

                    draw.rectangle(bottom_border_position, fill=(bottom_fill, bottom_fill, bottom_fill))

                # Draw diagonal border of U-matrix
                if i != self.width - 1 and j != self.height - 1:
                    diagonal_border_position = coords(np.array([
                        box_side+1, box_side+1,
                        box_side+1+border, box_side+1+border
                    ]), offset)

                    prototype_vector_a = dimension_subset(self.som.weights[i, j], subset)
                    prototype_vector_b = dimension_subset(self.som.weights[i+1, j+1], subset)
                    diagonal_fill = 255 - int(self.data_vector_difference(prototype_vector_a, prototype_vector_b) * 255)

                    draw.rectangle(diagonal_border_position, fill=(diagonal_fill, diagonal_fill, diagonal_fill))

        img.save(filename)
Пример #39
0
 def test_pca_weights_init(self):
     som = MiniSom(2, 2, 2)
     som.pca_weights_init(array([[1., 0.], [0., 1.], [1., 0.], [0., 1.]]))
     expected = array([[[0., -1.41421356], [-1.41421356, 0.]],
                       [[1.41421356, 0.], [0., 1.41421356]]])
     assert_array_almost_equal(som._weights, expected)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Feature Scaling

from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler(feature_range=(0, 1))

X = sc.fit_transform(X)

# Training the SOM
from minisom import MiniSom
# Here for the dimensions of the grid, there are not much customers i.e. not much observations so we make a 10 X 10 grid.
som = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5)

# Initialize the weights
som.random_weights_init(X)

#Train the SOM on X
som.train_random(X, 100)
# Executes quickly as the dataset is small

# 2 dimensional grid that will contain all the final winning nodes, for each of this winning nodes we will get MID- Mean Interneuron Distance
"""MID of a specific winning node is the mean of the distances of all the neurons around the winning node
inside a neighborhood defined,sigma here, which is the radius of this neighborhood.
Higher is the MID the more the winning node will be far away from it's neighbors, inside a neighborhood.
Therefore the higher the MID,the more the winning node is an outlier. And since in some way
the majority of the winning nodes represent the rules that are respected.
If far from this majority of neurons, then far from the general rules,and that is how ouliers are detected
Пример #41
0
sc = MinMaxScaler(feature_range=(0, 1))

# fit sc object to X so sc gets all info (min and max) and all info for normalization
# apply normalization to X, fit method returns normalized version of X
X = sc.fit_transform(X)

# Training the SOM
# Unsupervised Learning, we don't consider
# sigma is the radius of the different neighborhoods
# learning weight, hyperparameter decides how much weight
# higher the learning rates, faster will be convergence
# lower the learning rate, the slower it takes for SOM to build
from minisom import MiniSom

som = MiniSom(x=25, y=25, input_len=30, sigma=1.0, learning_rate=0.3)

# randomly initialize the weight vectors to small numbers close to 0
som.random_weights_init(X)

# train som on X, matrix of features and patterns recognized
som.train_random(data=X, num_iteration=100)

# Visualising the results
# two-dimensional grid of the winning nodes
# get M-ID Mean Inter-neruon Distances, Inside the neighborhood, radius, winning
# higher MID, winning, outlier neuron far from the general neuron, fraud, winning nodes
# with the higher M-ID. Winning node will use different nodes

from pylab import bone, pcolor, colorbar, plot, show
Пример #42
0
    def train(self, som_dim: tuple = (250, 250),
              sigma: float = 1.0,
              learning_rate: float = 0.5,
              batch_size: int = 500,
              seed: int = 42,
              weight_init: str = 'random'):
        """Train self-organising map.

        Parameters
        ----------
        som_dim : tuple, (default=(250, 250))
            dimensions of SOM embedding (number of nodes)
        sigma : float, (default=1.0)
            the radius of the different neighbors in the SOM, default = 1.0
        learning_rate : float, (default=0.5)
            alters the rate at which weights are updated
        batch_size : int, (default=500)
            size of batches used in training (alters number of total iterations)
        seed : int, (default=42)
            random seed
        weight_init : str, (default='random')
            how to initialise weights: either 'random' or 'pca' (Initializes the weights to span the
            first two principal components)

        Returns
        -------
        None
        """

        som = MiniSom(som_dim[0], som_dim[1],
                      self.dims, sigma=sigma,
                      learning_rate=learning_rate,
                      neighborhood_function=self.nf,
                      random_seed=seed)
        if weight_init == 'random':
            som.random_weights_init(self.data)
        elif weight_init == 'pca':
            if not self.normalisation:
                print('Warning: It is strongly recommended to normalize the data before initializing '
                      'the weights if using PCA.')
            som.pca_weights_init(self.data)
        else:
            print('Warning: invalid value provided for "weight_init", valid input is either "random" or "pca". '
                  'Defaulting to random initialisation of weights')
            som.random_weights_init(self.data)

        print("------------- Training SOM -------------")
        som.train_batch(self.data, batch_size, verbose=True)  # random training
        self.xn = som_dim[0]
        self.yn = som_dim[1]
        self.map = som
        self.weights = som.get_weights()
        self.flatten_weights = self.weights.reshape(self.xn*self.yn, self.dims)
        print("\nTraining complete!")
        print("----------------------------------------")
    x=[]
    y=[]
    for i in range(N):
        if x3[i]-y3[i]+1 > 0 and -x3[i]-y3[i]+1 > 0 and y3[i]>0:
            x.append(x3[i])
            y.append(y3[i])
            
    trainData = np.zeros((len(x),2))
    for i in range(len(x)):
        trainData[i][0]=x[i]
        trainData[i][1]=y[i]

    return trainData

data = generateData(200)
som = MiniSom(10, 10, input_len=2, sigma=0.3, learning_rate=0.5) # initialization of 6x6 SOM
som.random_weights_init(data)
som.train_random(data, 5000) # trains the SOM with 100 iterations

weights = som.get_weights()
mappings = som.win_map(data)

#Visualizing the result
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)  #平均距离
colorbar()
for i,x in enumerate(data):
    w = som.winner(x)
    plot(w[0]+0.5,w[1]+0.5,'^','r',markersize= 10)
Пример #44
0
 def __init__(self,input_length):
     from minisom import MiniSom
     self.som = MiniSom(10, 10, input_length,sigma=0.3,learning_rate=0.1,normalize=True)
Пример #45
0
def learn(dataset,sigma=0.3,learning_rate=0.5,nb_iter=10000):
	nb_sample, nb_features = dataset.shape
	som = MiniSom(6,6,nb_features,sigma=sigma,learning_rate=learning_rate)
	som.train_random(dataset,nb_iter)
	return som
Пример #46
0

def get_dataset(file):
    """
    Returns the normalized data set of the speficied file
    """
    data = np.genfromtxt(file, delimiter=",", usecols=(0, 1, 2, 3))
    return np.apply_along_axis(lambda x: x / np.linalg.norm(x), 1, data)


train_dataset = get_dataset("data_sets/train.csv")
test_dataset = get_dataset("data_sets/test.csv")
validation_dataset = get_dataset("data_sets/validation.csv")

# Creates SOM of 8x8 dimentions
som = MiniSom(8, 8, 4, sigma=1.3, learning_rate=0.5)
# Initializes the weights with info of the dataset
som.pca_weights_init(train_dataset)

print("Training started")
start = time.perf_counter()
som.train_batch(train_dataset, 10000)
end = time.perf_counter()
print("Training took {} seconds!".format(end - start))

# Generation of graphs
classification_graph("test", test_dataset)
classification_graph("validation", validation_dataset)
classification_graph("train", train_dataset)
frequency_graph(train_dataset)
error_graph(train_dataset)
Пример #47
0
from pylab import imread, imshow, figure, show, subplot, title
from numpy import reshape, flipud, unravel_index, zeros
from minisom import MiniSom

# read the image
img = imread('tree.jpg')

# reshaping the pixels matrix
pixels = reshape(img, (img.shape[0] * img.shape[1], 3))

# SOM initialization and training
print('training...')
som = MiniSom(3, 3, 3, sigma=0.1, learning_rate=0.2)  # 3x3 = 9 final colors
som.random_weights_init(pixels)
starting_weights = som.weights.copy()  # saving the starting weights
som.train_random(pixels, 100)

print('quantization...')
qnt = som.quantization(pixels)  # quantize each pixels of the image
print('building new image...')
clustered = zeros(img.shape)
for i, q in enumerate(qnt):  # place the quantized values into a new image
    clustered[unravel_index(i, dims=(img.shape[0], img.shape[1]))] = q
print('done.')

# show the result
figure(1)
subplot(221)
title('original')
imshow(flipud(img))
subplot(222)
Пример #48
0
        dataset[col] = dataset[col].str.rstrip('%').astype('float') / 100
    else:
        dataset[col]=dataset[col].replace( '[\$,)]','', regex=True )\
               .replace( '[(]','-',   regex=True ).astype(float)
dataset = dataset.fillna(dataset.mean())
X = dataset.iloc[:, :].values
y = dataset.iloc[:, 0].values

#feature scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
X = sc.fit_transform(X)

#train SOM
from minisom import MiniSom
som = MiniSom(4, 4, input_len=30)
som.random_weights_init(X)
som.train_random(X, 100)

#visualization
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
show()

#find cities
location_list = []
mapping = som.win_map(X)
for keys in mapping.keys():
    location_list.append(sc.inverse_transform(mapping.get(keys)))
Пример #49
0
#importing the data set
dataset = pd.read_csv('Credit_Card_Applications.csv')
x= dataset.iloc[:,:-1] # the -1 is for the last value 
y= dataset.iloc[:,-1] # the -1 is for the last value 




#fesature scaling  Data preprocessing/
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0,1))
X = sc.fit_transform(x) # NORMALIZATION

# we are going to use a libary.
from minisom import MiniSom
som  = MiniSom(x = 10, y =10, input_len = 15, sigma =1.0, learning_rate = 0.5) 
som.random_weights_init(X)
som.train_random(X, num_iteration = 100)


# we have to visualize the results
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T) # all distances for all the neural nets 
colorbar() #add a bar 
markers = ['o', 's']
colors = ['r', 'g']
for i, x in enumerate(X): # customers is x 
    w = som.winner(x)      #the wining node for a customer
    plot(w[0] + 0.5,
         w[1] + 0.5,
Пример #50
0
y = dataset.iloc[:, -1].values
#here will only use X in training set because doing unsuperwise deep learning// we are telling customer eligibility, not predicting classes. so no dependent variable considered.

# Feature Scaling(between 0 & 1)
#compulsary for deep learning so high computation
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
X = sc.fit_transform(X)

# -------Training the SOM
#here we are using MiniSom 1.0
#https://test.pypi.org/project/MiniSom/1.0/
#here in your working directory , we need to keep Minisom 1.0.py file downloaded created by developer in working directory.
from minisom import MiniSom
som = MiniSom(
    x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5
)  #object som trained on X//   X & y are dimension of SOM(MORE THE DATA i.e no of CUSTOMER more will be dimension)///  here input_len is the no of feature in training dataset i.e X(14) and +1 for customer id
som.random_weights_init(
    X
)  #sigma is the radious of different neighbourhood i.e default value is 1.0//  learning_rate will decide how much weight updated in each learning rate so  default value is 0.5 so higher will be the learning_rate faster will be convergence, lower the learning_rate, longer the self organising map take time to build.//  decay_function can be use to improve convergence
som.train_random(
    data=X, num_iteration=100)  #num_iteration is no of time it need to repeate
#random_weights_init IS THE method initialize the weight mention by developer i.e by Minisom1.0
#train_random method use to train

# ---------Visualizing the results
#here we will calculate mean interneuron distance(MID) i.e mean of euclian distance between study neuron and neighbourhood so we can detect outlier which will be far from the nieghbour neuron on basis of euclian distance
#larger the mid closer to white in colour
from pylab import bone, pcolor, colorbar, plot, show  #BUILDING self organising map
bone()  #initlizee the figure i.e window contain map
pcolor(
Пример #51
0
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from pylab import pcolor, colorbar, plot  #para visualização dos dados

base = pd.read_csv("wines.csv")

X = base.iloc[:, 1:14].values
y = base.iloc[:, 0].values

#como os valores de X não estão normalizados precisamos realizar os códigos seguintes
normalizador = MinMaxScaler(feature_range=(0, 1))
X = normalizador.fit_transform(X)

som = MiniSom(8,
              8,
              input_len=X.shape[1],
              sigma=1.0,
              learning_rate=0.5,
              random_seed=2)
'''Para saber o tamanho do mapa usa-se a regra de 5sqr(N), como temos 178 registros:
    5 x sqr(178) = 65,65, aproximado a 64, que dá uma matrix 8x8. Sigma = raio do BMU
    (best matching unit)'''

som.random_weights_init(X)
som.train_random(data=X,
                 num_iteration=100)  #100 é suficiente para a maioria dos casos

som._weights
'''Todos esses valores mostrados pelos pesos representam os novos pontos criados
de modo a ajudar na criação do mapa'''
som._activation_map  #Aqui se visualiza os valores do mapa em si
q = som.activation_response(
onehotencoder = OneHotEncoder(categorical_features = [0])
X = onehotencoder.fit_transform(X).toarray()
X = X.values

X12 = np.asarray(X12, dtype = int)
Xnew = np.append(X, X12, axis=1)
Xfin = np.delete(Xnew, 12, axis=1)

# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
X_SOM = sc.fit_transform(X)

# Training the SOM
from minisom import MiniSom
som = MiniSom(x = 10, y = 10, input_len = 17, sigma = 1.0, learning_rate = 0.5)
som.random_weights_init(X_SOM)
som.train_random(data = X_SOM, num_iteration = 100)

# Visualizing the results
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o', 's', 'x', 'o', 's', 'x', 'v']
colors = ['r', 'g', 'b', 'w', 'y', 'c', 'm']
for i, x in enumerate(X_SOM):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y[i]],
# Importation des données
dataset = pd.read_csv("Credit_Card_Applications.csv")

X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

#Changement d'échelle

from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
X = sc.fit_transform(X)

# Entraînement du SOM
from minisom import MiniSom
som = MiniSom(x=10, y=10, input_len=15)

som.random_weights_init(X)
som.train_random(X,num_iteration=100)

# Visualisation des résultats
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()

markers = ("o", "s")
colors = ("r", "g")

for i, x in enumerate(X):
    w = som.winner(x)
Пример #54
0
Features = []
for i in range(0, len(c_features[0])):
    # column = c_features[:][i]
    column = [row[i] for row in c_features]
    Features.append(column)

print "=== Size of Features used in SOM: %d" % len(Features)
# Self Organising Map
# x = input('enter x value for grid: ')
# y = input('enter y value for grid: ')
# iteration = input("Input number of iterations: ")
x = 6
y = 6
iteration = 100
# Create a SOM
som = MiniSom(x, y, 20, sigma=0.3, learning_rate=0.5)
print "Training..."
som.train_random(Features, iteration)  # trains the SOM with 100 iterations
print "...ready!"

# Map the output neuron position to a unique cluster id. (0,0) --> 0, (0,1) --> 1 and so on.
feature_map = {}
k = 0

for i in range(x):
    for j in range(y):
        feature_map[(i, j)] = k
        k += 1

# print feature_map, '\n'
# load the digits dataset from scikit-learn
# 901 samples, about 180 samples per class
# the digits represented 0,1,2,3,4
from sklearn import datasets
digits = datasets.load_digits(n_class=4)
data = digits.data # matrix where each row is a vector that represent a digit.
num = digits.target # num[i] is the digit represented by data[i]

# training the som
from minisom import MiniSom
som = MiniSom(20,20,64,sigma=.8,learning_rate=0.5)
print("Training...")
som.train_random(data,1500) # random training
print("\n...ready!")

# plotting the results
from pylab import text,show,cm,axis,figure,subplot,imshow,zeros
wmap = {}
figure(1)
im = 0
for x,t in zip(data,num): # scatterplot
	w = som.winner(x)
	wmap[w] = im
	text(w[0]+.5, w[1]+.5, str(t), color=cm.Dark2(t / 4.), fontdict={'weight': 'bold', 'size': 11})
	im = im + 1
axis([0,som.weights.shape[0],0,som.weights.shape[1]])

figure(2,facecolor='white')
cnt = 0
for j in reversed(range(20)): # images mosaic
	for i in range(20):
Пример #56
0
# Take only the last column
y = credit_card_applications_df.iloc[:, -1].values

# Feature scaling: Use Normalization MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
X = sc.fit_transform(X)

# We will be suing minisom library here
from minisom import MiniSom

# Out dataset is small
# So we will just create a 10 by 10 matrix, X = 10, y = 10
# Input_len = number of features in our dataset 14+1=15
# Sigma: Radious in the different neighborhoods in the grid
minisom = MiniSom(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5)
minisom.random_weights_init(X)
minisom.train_random(data=X, num_iteration=200)

# Visualize the SOM
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(minisom.distance_map().T)
colorbar()  # Color close to white are frauds
markers = ["o", "s"]  # s=square
colors = ["r", "g"]  # red = didn't get approval, green=got approval
for i, x in enumerate(X):
    winning_node = minisom.winner(x)
    plot(
        winning_node[0] + 0.5,  # placing to center using .5 
        winning_node[1] + 0.5,
Пример #57
0
    In partucular it shows how to train MiniSom and how to visualize the result.
    ATTENTION: pylab is required for the visualization.        
"""


# reading the iris dataset in the csv format    
# (downloaded from http://aima.cs.berkeley.edu/data/iris.csv)
#rn = len(open('iris4.csv').readlines())

data = genfromtxt('data5.csv', delimiter=',',dtype = float)
data = numpy.nan_to_num(data)
print (data)
data = apply_along_axis(lambda x: x/linalg.norm(x),1,data) # data normalization

### Initialization and training ###
som = MiniSom(40,40,136,sigma=1.0,learning_rate=0.5)
som.random_weights_init(data)
print("Training...")
som.train_random(data,10000) # random training
print("\n...ready!")

### Plotting the response for each pattern in the iris dataset ###
from pylab import plot,axis,show,pcolor,colorbar,bone

bone()
pcolor(som.distance_map().T) # plotting the distance map as background
colorbar()

target = genfromtxt('class5.csv',delimiter=',',usecols=(0),dtype=int) # loadingthe labels
t = zeros(len(target),dtype=int)
print (target)
Пример #58
0
dataset = pd.read_csv('Credit_Card_Applications.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

#Feature Scaling:
#we are scaling to y (class). no need to scale this as well, just X.
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
X = sc.fit_transform(X)

#Train the SOM:
#import minisom
from minisom import MiniSom
som = MiniSom(x=10,
              y=10,
              input_len=15,
              sigma=1.0,
              learning_rate=0.5,
              random_seed=1)

#initialize weights:
som.random_weights_init(X)
som.train_random(data=X, num_iteration=100)

#VIZ:
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o', 's']  #circle, square
colors = ['r', 'g']  # red, green
 def train(self, data):
     self.som = MiniSom(self.width, self.height, len(data[0]), sigma=self.sigma, learning_rate=self.learning_rate)
     self.som.train_random(data, 1000000)
Пример #60
0
X = dataset.iloc[:, :
                 -1].values  #Taking all rows and all columns, except last column. SOM is trained based on all columns except last column.
print "Independent variables:\n", X
y = dataset.iloc[:, -1].values  # Taking all rows and last column.
print "Account Approval:\n", y

## Feature Scaling
sc = MinMaxScaler(
    feature_range=(0, 1)
)  # Scaling:Easier for deep learning model to train if there are many dimensions
X = sc.fit_transform(X)
print "Normalized X:\n", X

## Training the SOM
som = MiniSom(
    x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5
)  # Definition: MiniSom(X,y,input_len, sigma=1, learning_rate=0.5,decay_function=None,random_seed=None). 10 by 10 grid chosen as number of observation is small. Input len = number of features, including customer id in order to identify customers later. Sigma = radius of the neighbourhoods of the grid. The higher the learning rate, the faster the convergence. Decay_function can be used to improve the convergence.
# Use larger array for larger user base.
som.random_weights_init(
    X
)  #Initializing the weights randomly. Put in the data that needs to be trained.
som.train_random(
    X, num_iteration=100)  #Apply Step 4 to Step 9, for 100 iterations.

## Visualizing the results
bone()
pcolor(
    som.distance_map().T
)  #som.distance_map will return all the Mean Inter-Neuron Distances (MID) in one matrix
colorbar()
markers = ['o', 's']