Пример #1
0
    def test_kernel_regression_rbf4(self):
        rega = KernelRidgeRegression(random_seed=28378)
        filename = os.path.join(pyvision.__path__[0], 'data', 'synthetic',
                                'synth1_quad.txt')
        reg_file = open(filename, 'r')
        labels = []
        vectors = []
        truth = []
        for line in reg_file:
            datapoint = line.split()
            labels.append(float(datapoint[1]))
            vectors.append([float(datapoint[0])])
            truth.append(float(datapoint[2]))

        for i in range(100):
            rega.addTraining(labels[i], vectors[i])
        rega.train(verbose=False)
        #print rega.w

        mse = 0.0
        ase = 0.0
        total = 0
        table = Table()
        for i in range(100, len(labels)):
            p = rega.predict(vectors[i])
            e = p - labels[i]
            mse += e * e
            a = p - truth[i]
            ase += a * a
            table.setElement(i, 'x', vectors[i][0])
            table.setElement(i, 'measure', labels[i])
            table.setElement(i, 'truth', truth[i])
            table.setElement(i, 'pred', p)
            table.setElement(i, 'e', e)
            table.setElement(i, 'a', a)

        mse = mse / (len(labels) - 100)
        ase = ase / (len(labels) - 100)
        #print "Regression Error:",mse
        #print table
        #table.save("../../rbf4.csv")

        self.assertAlmostEqual(mse, 0.063883259792847411, places=7)
        self.assertAlmostEqual(ase, 0.028811752673991175, places=7)
Пример #2
0
    def test_kernel_regression_rbf2(self):
        rega = KernelRidgeRegression(lams=0.1, kernels=RBF())
        filename = os.path.join(pyvision.__path__[0], 'data', 'synthetic',
                                'synth1_mix.txt')
        reg_file = open(filename, 'r')
        labels = []
        vectors = []
        truth = []
        for line in reg_file:
            datapoint = line.split()
            labels.append(float(datapoint[1]))
            vectors.append([float(datapoint[0])])
            truth.append(float(datapoint[2]))

        for i in range(100):
            rega.addTraining(labels[i], vectors[i])
        rega.train()
        #print rega.w

        mse = 0.0
        ase = 0.0
        total = 0
        table = Table()
        for i in range(100, len(labels)):
            p = rega.predict(vectors[i])
            e = p - labels[i]
            mse += e * e
            a = p - truth[i]
            ase += a * a
            table.setElement(i, 'x', vectors[i][0])
            table.setElement(i, 'measure', labels[i])
            table.setElement(i, 'truth', truth[i])
            table.setElement(i, 'pred', p)
            table.setElement(i, 'e', e)
            table.setElement(i, 'a', a)

        mse = mse / (len(labels) - 100)
        ase = ase / (len(labels) - 100)
        #print "Regression Error:",mse
        #print table
        #table.save("../../rbf2.csv")

        self.assertAlmostEqual(mse, 0.563513669235162, places=7)
        self.assertAlmostEqual(ase, 0.51596869146460422, places=7)
Пример #3
0
    def test_regression_linear(self):
        # synthetic linear regression
        rega = RidgeRegression()
        filename = os.path.join(pyvision.__path__[0], 'data', 'synthetic',
                                'regression.dat')
        reg_file = open(filename, 'r')
        labels = []
        vectors = []
        for line in reg_file:
            datapoint = line.split()
            labels.append(float(datapoint[0]))
            vectors.append([
                float(datapoint[3]),
                float(datapoint[4]),
                float(datapoint[5])
            ])

        for i in range(50):
            rega.addTraining(labels[i], vectors[i])
        rega.train()

        mse = 0.0
        total = 0
        table = Table()
        for i in range(50, len(labels)):
            p = rega.predict(vectors[i])
            e = p - labels[i]
            table.setElement(i, 'truth', labels[i])
            table.setElement(i, 'pred', p)
            table.setElement(i, 'Residual', e)
            #print labels[i],p,e
            mse += e * e

        #print table
        #table.save('../../tmp.csv')

        mse = mse / (len(labels) - 50)
        self.assertAlmostEqual(mse, 0.24301122718491874, places=4)
Пример #4
0
    def train_SVR_Linear(self,
                         labels,
                         vectors,
                         verbose,
                         C_range,
                         callback=None):
        '''Private use only'''
        # combine the labels and vectors into one set.
        data = []
        for i in range(len(labels)):
            data.append([labels[i], vectors[i]])

        #shuffle the data
        rng = random.Random()
        if self.random_seed != None:
            rng.seed(self.random_seed)
        rng.shuffle(data)

        # partition into validation and training
        if type(
                self.validation_size
        ) == float and self.validation_size > 0.0 and self.validation_size < 1.0:
            training_cutoff = int(len(data) * (1.0 - self.validation_size))
        elif type(self.validation_size
                  ) == int and self.validation_size < len(labels):
            training_cutoff = len(labels) - self.validation_size
        else:
            raise NotImplementedError(
                "Cannot determine validation set from %s" %
                self.validation_size)

        if verbose: print "Training Cutoff:", len(labels), training_cutoff
        training_data = data[:training_cutoff]
        validation_data = data[training_cutoff:]

        tmp_labels = []
        tmp_vectors = []
        for each in training_data:
            tmp_labels.append(each[0])
            tmp_vectors.append(each[1])

        prob = svm.svm_problem(tmp_labels, tmp_vectors)

        training_info = []
        training_svm = []
        training_table = Table()
        self.training_table = training_table
        i = 0
        for C in C_range:

            param = svm.svm_parameter(svm_type=self.svm_type,
                                      kernel_type=svm.LINEAR,
                                      C=C,
                                      p=self.epsilon,
                                      nu=self.nu)

            test_svm = svm.svm_model(prob, param)

            mse = 0.0
            total = len(validation_data)
            for label, vector in validation_data:
                pred = test_svm.predict(vector)
                error = label - pred
                mse += error * error
            mse = mse / total

            training_svm.append(test_svm)
            training_info.append([C, mse])
            training_table.setElement(i, 'C', C)
            training_table.setElement(i, 'mse', mse)
            i += 1

            if callback != None:
                callback(int(100 * float(i) / len(C_range)))

        if verbose: print
        if verbose: print "------------------------------"
        if verbose: print " Tuning Information:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        best = training_info[0]
        best_svm = training_svm[0]
        for i in range(len(training_info)):
            each = training_info[i]
            if verbose: print " %8.3e  %0.8f" % (each[0], each[1])
            if best[-1] > each[-1]:
                best = each
                best_svm = training_svm[i]
        if verbose: print "------------------------------"
        if verbose: print
        if verbose: print "------------------------------"
        if verbose: print " Best Tuning:"
        if verbose: print "         C   error"
        if verbose: print "------------------------------"
        if verbose: print " %8.3e  %0.8f" % (best[0], best[1])
        if verbose: print "------------------------------"
        if verbose: print
        self.training_info = training_info
        self.C = best[0]
        self.error = best[1]

        self.svm = best_svm