Exemplo n.º 1
0
    def test_kernel_regression_rbf4(self):
        rega = KernelRidgeRegression(random_seed=28378)
        filename = os.path.join(pyvision.__path__[0], 'data', 'synthetic',
                                'synth1_quad.txt')
        reg_file = open(filename, 'r')
        labels = []
        vectors = []
        truth = []
        for line in reg_file:
            datapoint = line.split()
            labels.append(float(datapoint[1]))
            vectors.append([float(datapoint[0])])
            truth.append(float(datapoint[2]))

        for i in range(100):
            rega.addTraining(labels[i], vectors[i])
        rega.train(verbose=False)
        #print rega.w

        mse = 0.0
        ase = 0.0
        total = 0
        table = Table()
        for i in range(100, len(labels)):
            p = rega.predict(vectors[i])
            e = p - labels[i]
            mse += e * e
            a = p - truth[i]
            ase += a * a
            table.setElement(i, 'x', vectors[i][0])
            table.setElement(i, 'measure', labels[i])
            table.setElement(i, 'truth', truth[i])
            table.setElement(i, 'pred', p)
            table.setElement(i, 'e', e)
            table.setElement(i, 'a', a)

        mse = mse / (len(labels) - 100)
        ase = ase / (len(labels) - 100)
        #print "Regression Error:",mse
        #print table
        #table.save("../../rbf4.csv")

        self.assertAlmostEqual(mse, 0.063883259792847411, places=7)
        self.assertAlmostEqual(ase, 0.028811752673991175, places=7)
Exemplo n.º 2
0
    def test_kernel_regression_rbf2(self):
        rega = KernelRidgeRegression(lams=0.1, kernels=RBF())
        filename = os.path.join(pyvision.__path__[0], 'data', 'synthetic',
                                'synth1_mix.txt')
        reg_file = open(filename, 'r')
        labels = []
        vectors = []
        truth = []
        for line in reg_file:
            datapoint = line.split()
            labels.append(float(datapoint[1]))
            vectors.append([float(datapoint[0])])
            truth.append(float(datapoint[2]))

        for i in range(100):
            rega.addTraining(labels[i], vectors[i])
        rega.train()
        #print rega.w

        mse = 0.0
        ase = 0.0
        total = 0
        table = Table()
        for i in range(100, len(labels)):
            p = rega.predict(vectors[i])
            e = p - labels[i]
            mse += e * e
            a = p - truth[i]
            ase += a * a
            table.setElement(i, 'x', vectors[i][0])
            table.setElement(i, 'measure', labels[i])
            table.setElement(i, 'truth', truth[i])
            table.setElement(i, 'pred', p)
            table.setElement(i, 'e', e)
            table.setElement(i, 'a', a)

        mse = mse / (len(labels) - 100)
        ase = ase / (len(labels) - 100)
        #print "Regression Error:",mse
        #print table
        #table.save("../../rbf2.csv")

        self.assertAlmostEqual(mse, 0.563513669235162, places=7)
        self.assertAlmostEqual(ase, 0.51596869146460422, places=7)
Exemplo n.º 3
0
 def test_kernel_regression_rbf4(self):
     rega = KernelRidgeRegression(random_seed=28378)
     filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_quad.txt')
     reg_file = open(filename,'r')
     labels = []
     vectors = []
     truth = []
     for line in reg_file:
         datapoint = line.split()
         labels.append(float(datapoint[1]))
         vectors.append([float(datapoint[0])])
         truth.append(float(datapoint[2]))
         
     for i in range(100):
         rega.addTraining(labels[i],vectors[i])
     rega.train(verbose=False)
     #print rega.w
     
     mse = 0.0
     ase = 0.0
     total = 0
     table = Table()
     for i in range(100,len(labels)):
         p = rega.predict(vectors[i])
         e = p - labels[i]
         mse += e*e
         a = p - truth[i]
         ase += a*a
         table.setElement(i,'x',vectors[i][0])
         table.setElement(i,'measure',labels[i])
         table.setElement(i,'truth',truth[i])
         table.setElement(i,'pred',p)
         table.setElement(i,'e',e)
         table.setElement(i,'a',a)
         
         
     mse = mse/(len(labels)-100)
     ase = ase/(len(labels)-100)
     #print "Regression Error:",mse
     #print table
     #table.save("../../rbf4.csv")
     
     self.assertAlmostEqual(mse,0.063883259792847411,places=7)
     self.assertAlmostEqual(ase,0.028811752673991175,places=7)
Exemplo n.º 4
0
 def test_kernel_regression_rbf2(self):
     rega = KernelRidgeRegression(lams=0.1,kernels=RBF())
     filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_mix.txt')
     reg_file = open(filename,'r')
     labels = []
     vectors = []
     truth = []
     for line in reg_file:
         datapoint = line.split()
         labels.append(float(datapoint[1]))
         vectors.append([float(datapoint[0])])
         truth.append(float(datapoint[2]))
         
     for i in range(100):
         rega.addTraining(labels[i],vectors[i])
     rega.train()
     #print rega.w
     
     mse = 0.0
     ase = 0.0
     total = 0
     table = Table()
     for i in range(100,len(labels)):
         p = rega.predict(vectors[i])
         e = p - labels[i]
         mse += e*e
         a = p - truth[i]
         ase += a*a
         table.setElement(i,'x',vectors[i][0])
         table.setElement(i,'measure',labels[i])
         table.setElement(i,'truth',truth[i])
         table.setElement(i,'pred',p)
         table.setElement(i,'e',e)
         table.setElement(i,'a',a)
         
         
     mse = mse/(len(labels)-100)
     ase = ase/(len(labels)-100)
     #print "Regression Error:",mse
     #print table
     #table.save("../../rbf2.csv")
     
     self.assertAlmostEqual(mse,0.563513669235162,places=7)
     self.assertAlmostEqual(ase,0.51596869146460422,places=7)
Exemplo n.º 5
0
    def test_regression_linear(self):
        # synthetic linear regression
        rega = RidgeRegression()
        filename = os.path.join(pyvision.__path__[0],'data','synthetic','regression.dat')
        reg_file = open(filename,'r')
        labels = []
        vectors = []
        for line in reg_file:
            datapoint = line.split()
            labels.append(float(datapoint[0]))
            vectors.append([float(datapoint[3]),float(datapoint[4]),float(datapoint[5])])

        for i in range(50):
            rega.addTraining(labels[i],vectors[i])
        rega.train()

        mse = 0.0
        total = 0
        table = Table()
        for i in range(50,len(labels)):
            p = rega.predict(vectors[i])
            e = p - labels[i]
            table.setElement(i,'truth',labels[i])
            table.setElement(i,'pred',p)
            table.setElement(i,'Residual',e)
            #print labels[i],p,e
            mse += e*e
            
        #print table
        #table.save('../../tmp.csv')
        
        mse = mse/(len(labels)-50)
        self.assertAlmostEqual(mse,0.24301122718491874,places=4)
Exemplo n.º 6
0
    def test_regression_linear(self):
        # synthetic linear regression
        rega = RidgeRegression()
        filename = os.path.join(pyvision.__path__[0], 'data', 'synthetic',
                                'regression.dat')
        reg_file = open(filename, 'r')
        labels = []
        vectors = []
        for line in reg_file:
            datapoint = line.split()
            labels.append(float(datapoint[0]))
            vectors.append([
                float(datapoint[3]),
                float(datapoint[4]),
                float(datapoint[5])
            ])

        for i in range(50):
            rega.addTraining(labels[i], vectors[i])
        rega.train()

        mse = 0.0
        total = 0
        table = Table()
        for i in range(50, len(labels)):
            p = rega.predict(vectors[i])
            e = p - labels[i]
            table.setElement(i, 'truth', labels[i])
            table.setElement(i, 'pred', p)
            table.setElement(i, 'Residual', e)
            #print labels[i],p,e
            mse += e * e

        #print table
        #table.save('../../tmp.csv')

        mse = mse / (len(labels) - 50)
        self.assertAlmostEqual(mse, 0.24301122718491874, places=4)
Exemplo n.º 7
0
    def train_SVR_Linear(self,
                         labels,
                         vectors,
                         verbose,
                         C_range,
                         callback=None):
        '''Private use only'''
        # combine the labels and vectors into one set.
        data = []
        for i in range(len(labels)):
            data.append([labels[i], vectors[i]])

        #shuffle the data
        rng = random.Random()
        if self.random_seed != None:
            rng.seed(self.random_seed)
        rng.shuffle(data)

        # partition into validation and training
        if type(
                self.validation_size
        ) == float and self.validation_size > 0.0 and self.validation_size < 1.0:
            training_cutoff = int(len(data) * (1.0 - self.validation_size))
        elif type(self.validation_size
                  ) == int and self.validation_size < len(labels):
            training_cutoff = len(labels) - self.validation_size
        else:
            raise NotImplementedError(
                "Cannot determine validation set from %s" %
                self.validation_size)

        if verbose: print "Training Cutoff:", len(labels), training_cutoff
        training_data = data[:training_cutoff]
        validation_data = data[training_cutoff:]

        tmp_labels = []
        tmp_vectors = []
        for each in training_data:
            tmp_labels.append(each[0])
            tmp_vectors.append(each[1])

        prob = svm.svm_problem(tmp_labels, tmp_vectors)

        training_info = []
        training_svm = []
        training_table = Table()
        self.training_table = training_table
        i = 0
        for C in C_range:

            param = svm.svm_parameter(svm_type=self.svm_type,
                                      kernel_type=svm.LINEAR,
                                      C=C,
                                      p=self.epsilon,
                                      nu=self.nu)

            test_svm = svm.svm_model(prob, param)

            mse = 0.0
            total = len(validation_data)
            for label, vector in validation_data:
                pred = test_svm.predict(vector)
                error = label - pred
                mse += error * error
            mse = mse / total

            training_svm.append(test_svm)
            training_info.append([C, mse])
            training_table.setElement(i, 'C', C)
            training_table.setElement(i, 'mse', mse)
            i += 1

            if callback != None:
                callback(int(100 * float(i) / len(C_range)))

        if verbose: print
        if verbose: print "------------------------------"
        if verbose: print " Tuning Information:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        best = training_info[0]
        best_svm = training_svm[0]
        for i in range(len(training_info)):
            each = training_info[i]
            if verbose: print " %8.3e  %0.8f" % (each[0], each[1])
            if best[-1] > each[-1]:
                best = each
                best_svm = training_svm[i]
        if verbose: print "------------------------------"
        if verbose: print
        if verbose: print "------------------------------"
        if verbose: print " Best Tuning:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        if verbose: print " %8.3e  %0.8f" % (best[0], best[1])
        if verbose: print "------------------------------"
        if verbose: print
        self.training_info = training_info
        self.C = best[0]
        self.error = best[1]

        self.svm = best_svm
Exemplo n.º 8
0
    def train_SVR_Linear(self,labels,vectors,verbose, C_range, callback=None):
        '''Private use only'''
        # combine the labels and vectors into one set.
        data = []
        for i in range(len(labels)):
            data.append([labels[i],vectors[i]])
            
        #shuffle the data
        rng = random.Random()
        if self.random_seed != None:
            rng.seed(self.random_seed)
        rng.shuffle(data)
                
        # partition into validation and training
        if type(self.validation_size) == float and self.validation_size > 0.0 and self.validation_size < 1.0:
            training_cutoff = int(len(data)*(1.0-self.validation_size))
        elif type(self.validation_size) == int and self.validation_size < len(labels):
            training_cutoff = len(labels)-self.validation_size
        else:
            raise NotImplementedError("Cannot determine validation set from %s"%self.validation_size)
            
        if verbose: print "Training Cutoff:",len(labels),training_cutoff
        training_data = data[:training_cutoff]
        validation_data = data[training_cutoff:]
        
        tmp_labels = []
        tmp_vectors = []
        for each in training_data:
            tmp_labels.append(each[0])
            tmp_vectors.append(each[1])
        
        prob = svm.svm_problem(tmp_labels,tmp_vectors)
        
        training_info = []
        training_svm = []
        training_table = Table()
        self.training_table = training_table
        i=0
        for C in C_range:
                
            param = svm.svm_parameter(svm_type=self.svm_type,kernel_type = svm.LINEAR, C = C, p=self.epsilon,nu=self.nu)
                
            test_svm = svm.svm_model(prob, param)
                
            mse = 0.0
            total = len(validation_data)
            for label,vector in validation_data:
                pred = test_svm.predict(vector)
                error = label - pred
                mse += error*error
            mse = mse/total
 
            training_svm.append(test_svm)
            training_info.append([C,mse])
            training_table.setElement(i,'C',C)
            training_table.setElement(i,'mse',mse)
            i+=1

            if callback != None:
                callback(int(100*float(i)/len(C_range)))
                
        if verbose: print 
        if verbose: print "------------------------------"
        if verbose: print " Tuning Information:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        best = training_info[0]
        best_svm = training_svm[0]
        for i in range(len(training_info)):
            each = training_info[i]
            if verbose: print " %8.3e  %0.8f"%(each[0],each[1])
            if best[-1] > each[-1]:
                best = each
                best_svm = training_svm[i]
        if verbose: print "------------------------------"
        if verbose: print 
        if verbose: print "------------------------------"
        if verbose: print " Best Tuning:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        if verbose: print " %8.3e  %0.8f"%(best[0],best[1])
        if verbose: print "------------------------------"
        if verbose: print
        self.training_info = training_info
        self.C     = best[0]
        self.error = best[1]

        self.svm = best_svm