Пример #1
0
 def train_fmeasure(self, patterns,iteration_no,learning_rate,momentum_factor):
     # N: learning rate
     accl = []
     for i in range(iteration_no):
         error = 0.0
         E=[]
         y_true = []
         y_pred = []
         num=1
         for p in patterns:
             p=p.split(',')
             targets=float(p.pop())
             inputs = []
             for x in p:
                 if len(x)>0:
                     inputs.append(float(x))
             y_true_val = self.update(inputs).pop()
             error = error + self.backPropagate(targets, learning_rate, momentum_factor)
             if abs(round(error,2)) <= 0.01:
                 y_true.append([targets])
             else:
                 y_true.append([y_true_val])
             y_pred.append([targets])
             num = num+1
         acc = mean_f1(y_true, y_pred)
         accl.append(acc*100)
     return accl
Пример #2
0
    def train_fmeasure(self, iterations=1000, train_vector=[[[0.0],[0.0]]]):
        y_true=[]
        y_pred=[]
        accl=[]
        time_constant=iterations/log(self.radius)
        radius_decaying=0.0
        learning_rate_decaying=0.0
        influence=0.0
        stack=[] #Stack for storing best matching unit's index and updated FV and PV
        temp_FV=[0.0]*self.FV_size
        temp_PV=[0.0]*self.PV_size
        for i in range(1,iterations+1):
            radius_decaying=self.radius*exp(-1.0*i/time_constant)
            learning_rate_decaying=self.learning_rate*exp(-1.0*i/time_constant)
            
            for  j in range(len(train_vector)):
                input_FV=train_vector[j][0]
                input_PV=train_vector[j][1]
                best=self.best_match(input_FV)
                stack=[]
                for k in range(self.total):
                    dist=self.distance(self.nodes[best],self.nodes[k])
                    if dist < radius_decaying:
                        temp_FV=[0.0]*self.FV_size
                        temp_PV=[0.0]*self.PV_size
                        influence=exp((-1.0*(dist**2))/(2*radius_decaying*i))

                        for l in range(self.FV_size):
                            #Learning
                            temp_FV[l]=self.nodes[k].FV[l]+influence*learning_rate_decaying*(input_FV[l]-self.nodes[k].FV[l])

                        for l in range(self.PV_size):
                            #Learning
                            temp_PV[l]=self.nodes[k].PV[l]+influence*learning_rate_decaying*(input_PV[l]-self.nodes[k].PV[l])

                        #Push the unit onto stack to update in next interval
                        stack[0:0]=[[[k],temp_FV,temp_PV]]

                
                for l in range(len(stack)):
                    
                    self.nodes[stack[l][0][0]].FV[:]=stack[l][1][:]
                    self.nodes[stack[l][0][0]].PV[:]=stack[l][2][:]

                y_true.append(input_PV)
                y_pred.append([int(round(self.predict(input_FV)[0],0))])
            acc = mean_f1(y_true, y_pred)
            accl.append(acc*100)

        return accl
Пример #3
0
def kmeans_fmeasure(file_name,iteration_no,learning_rate):
    dataSet,target = loadDataset(file_name)

    k = 3
    accl =[]

    data_length = len(dataSet)
    for x in range(iteration_no):
        labels, centroids = kmeans(dataSet,k)
        y_true = []
        y_pred = []
        for i in range(data_length):
            y_true.append([target[i]])
            y_pred.append([labels[i]])
        acc = mean_f1(y_true, y_pred)
        accl.append(acc*100)

    if file_name=="iris.csv":
        clusters2 = [[] for _ in range(k)]
        for i, p in enumerate(dataSet):
            clusters2[labels[i]].append(p)
        colors = cycle("rgbcmyk")
        clusters2.sort()
        try:
            from pylab import plot,show,grid,xlabel,ylabel
        except ImportError:
            pass
        else:
            for group, (ca,cb,cc,cd), color in zip(clusters2,centroids,colors):
                pa,pb,pc,pd = zip(*group)
                x_cords=[]
                y_cords=[]
                for i in range(len(pa)):
                    x_cords.append(pa[i]+pb[i])
                for i in range(len(pc)):
                    y_cords.append(pc[i]+pd[i])

                plot(x_cords,y_cords, "o" + color)

                plot([ca+cb],[cc+cd], '^' + 'y')
            xlabel('Sepal Length + Sepal Width')
            ylabel('Petal Length + Petal Width')
            grid(True)
            show()
    return accl
    print "Number of test examples", num_test
    '''
    predictor = learn_one_vs_all(X_train,Y_train,classifier)
    raw_predictions = predictor.decision_function(X_test)
    boolean_predictions = make_class_labels(raw_predictions,topk=3)
    label_predictions = class_inverse_transform(boolean_predictions,uniq_label_mapping)
    #label_predictions = uniq_label_mapping.inverse_transform(numpy.array(boolean_predictions))

    label_predictions = [list(item) for item in label_predictions]
    assert(len(label_predictions)==len(true_labels))

    '''

    #new custom classifier code, will give n tag classifiers. sparsified
    class_label_preds = train_custom_one_vs_all(X_train,
                                                X_test,
                                                Y_train,
                                                topk=3)
    label_predictions = []
    for pred in class_label_preds:
        t = [uniq_label_mapping[i] for i in pred]
        label_predictions += [t]

    # print the predictions mapping
    for orig, lp in zip(true_labels, label_predictions):
        print orig, "----", lp, "\n"

    #calculate the mean-fscore
    print "The mean f-score is ", mean_f1.mean_f1(true_labels,
                                                  label_predictions)
    '''

    label_predictions =  run_multiclass(X_train,X_test,Y_train_labels,fp_set_list)

    ''' 
    #new custom classifier code, will give n tag classifiers. sparsified
    class_label_preds = train_custom_one_vs_all(X_train,X_test,Y_train,topk=3)
    label_predictions = []
    for pred in class_label_preds:   
        t = [uniq_label_mapping[i] for i in pred]
        label_predictions += [t]
    '''

    # print the predictions mapping 
    for orig,lp in zip(true_labels,label_predictions):
            print orig, "----" ,lp,"\n"
       
 
    #calculate the mean-fscore
    print "The mean f-score is ", mean_f1.mean_f1(true_labels,label_predictions)
        
        


    
    
    



Пример #6
0
def lms_fmeasure(file_name,iteration_no,learning_rate):
	SSEL=[]
	with open(file_name,'r') as calc:
		l=calc.read().split('\n')
		MCN=0
		x = l[0].split(',')
		w=numpy.random.random(len(x)-1)
		b=random.random()
		w=numpy.matrix(w)
		b=numpy.matrix(b)
		while MCN<iteration_no:
			y_true = []
			y_pred = []
			count = 0
			for i in l:
				j=i.split(',')
				target=float(j.pop())
				p=[]
				for x in j:
					if len(x)>0:
						p.append(float(x))
				p=numpy.matrix(p)
				net=summation(p,w,b)
				a=act_func(net)
				e=target-a
				if abs(round(e,2)) <= 0.01:
					y_true.append([target])
				else:
					w=modify_wb(w,learning_rate,target,p)
					y_true.append([a])
				y_pred.append([target])

			acc = mean_f1(y_true, y_pred)
			SSEL.append(acc*100)
			MCN = MCN+1
	if file_name=="iris.csv":
		with open(file_name,'r') as calc:
			pat = calc.read().split('\n')
		print(len(pat))
		x_cord1=[]
		y_cord1=[]
		x_cord2=[]
		y_cord2=[]
		x_cord3=[]
		y_cord3=[]
		for i in pat:
		    p = i.split(',')
		    targets=float(p.pop())
		    inputs = []
		    for x in p:
		        if len(x)>0:
		            inputs.append(float(x))
		    if targets==0.0:
		        x_cord1.append(inputs[0]+inputs[1])
		        y_cord1.append(inputs[2]+inputs[3])
		    if targets==1.0:
		        x_cord2.append(inputs[0]+inputs[1])
		        y_cord2.append(inputs[2]+inputs[3])
		    if targets==2.0:
		        x_cord3.append(inputs[0]+inputs[1])
		        y_cord3.append(inputs[2]+inputs[3])
		fig = plt.figure()
		ax = fig.add_subplot(111)
		type1 = ax.scatter(x_cord1, y_cord1, s=50, c='red')
		type2 = ax.scatter(x_cord2, y_cord2, s=50, c='green')
		type3 = ax.scatter(x_cord3, y_cord3, s=50, c='blue')
		 
		ax.set_title('Petal size vs Sepal size', fontsize=14)
		ax.set_xlabel('Petal size (cm)')
		ax.set_ylabel('Sepal size (cm)')
		ax.legend([type1, type2, type3], ["Iris Setosa", "Iris Versicolor", "Iris Virginica"], loc=2)
		 
		ax.grid(True,linestyle='-',color='0.75')
		 
		plt.show()

	return SSEL
Пример #7
0
def knn_fmeasure(file_name,iteration_no,learning_rate):
	# prepare data
	trainingSet=[]
	testSet=[]
	split = 0.67
	loadDataset(file_name, split, trainingSet, testSet)
	print('Train set: ' + repr(len(trainingSet)))
	print('Test set: ' + repr(len(testSet)))
	# generate predictions
	predictions=[]
	accl = []
	k = 3
	for i in range(iteration_no):
		y_true = []
		y_pred = []
		for x in range(len(testSet)):
			neighbors = getNeighbors(trainingSet, testSet[x], k)
			result = getResponse(neighbors)
			predictions.append(result)
			y_true.append([float(testSet[x][-1])])
			y_pred.append([float(result)])
		acc = mean_f1(y_true, y_pred)
		accl.append(acc*100)

	if file_name=="iris.csv":
	    with open(file_name,'r') as calc:
	        pat = calc.read().split('\n')
	    print(len(pat))
	    x_cord1=[]
	    y_cord1=[]
	    x_cord2=[]
	    y_cord2=[]
	    x_cord3=[]
	    y_cord3=[]
	    for i in pat:
	        p = i.split(',')
	        targets=float(p.pop())
	        inputs = []
	        for x in p:
	            if len(x)>0:
	                inputs.append(float(x))
	        if targets==0.0:
	            x_cord1.append(inputs[0]+inputs[1])
	            y_cord1.append(inputs[2]+inputs[3])
	        if targets==1.0:
	            x_cord2.append(inputs[0]+inputs[1])
	            y_cord2.append(inputs[2]+inputs[3])
	        if targets==2.0:
	            x_cord3.append(inputs[0]+inputs[1])
	            y_cord3.append(inputs[2]+inputs[3])
	    fig = plt.figure()
	    ax = fig.add_subplot(111)
	    type1 = ax.scatter(x_cord1, y_cord1, s=50, c='red')
	    type2 = ax.scatter(x_cord2, y_cord2, s=50, c='green')
	    type3 = ax.scatter(x_cord3, y_cord3, s=50, c='blue')
	     
	    ax.set_title('Petal size vs Sepal size', fontsize=14)
	    ax.set_xlabel('Petal size (cm)')
	    ax.set_ylabel('Sepal size (cm)')
	    ax.legend([type1, type2, type3], ["Iris Setosa", "Iris Versicolor", "Iris Virginica"], loc=2)
	     
	    ax.grid(True,linestyle='-',color='0.75')
	     
	    plt.show()
	return accl