Пример #1
0
    def runTests(self, data, sizes, iterations, graph=False):
        """
        Parameters
        ----------
        data : dataset
        data to be sampled from for testing

        sizes : array
        array containing tuples of
            (start size,
            step size,
            end size,
            random_size (optional),
            outlier_size (optional))
        to be used for testing.

        iterations : int
        Number of times to run each test defined by sizes array
        """

        results = {}
        for size in tqdm(sizes):
            size_F1s = [[] for _ in self.models]

            for iteration in tqdm(range(iterations)):
                (train, test) = data.test_train_split(train_percent=.9)

                r_size = 0 if len(size) < 4 else size[3]
                o_size = 0 if len(size) < 4 else size[4]

                # train and test models
                for i in range(len(self.models)):
                    model = Model(type=self.models[i][0],
                                  sample=self.models[i][1],
                                  name=(self.models[i][2] + str(iteration)))
                    if model.sample == 'Active':
                        model.activeLearn(train.get_x(),
                                          train.get_y(),
                                          start_size=size[0],
                                          end_size=size[2],
                                          step_size=size[1],
                                          random_size=r_size,
                                          outlier_size=o_size)
                    else:
                        rand_train = train.random_sample(size=size[2])
                        model.fit(rand_train.get_x(), rand_train.get_y())

                    size_F1s[i].append(
                        model.test_metric(test.get_x(), test.get_y(), f1=True))

                    if model.type == 'NN':
                        files = glob.glob('NN/*')
                        for f in files:
                            os.remove(f)
            results[size[2]] = size_F1s

        self.currentResults = results
        return results
Пример #2
0
mnist_pca = pickle.load(open("../data/pickled/mnist_data_pca50.p", "rb"))

mnist_pca_sample = mnist_pca.random_sample(percent=.5)  #24 instances

#randSVMF1s = []
activeSVMF1s = []

for _ in range(5):
    #getting test data to use for both models
    (train_pca, test_pca) = mnist_pca.test_train_split(train_percent=.8)

    #make active model
    active_SVM = Model('SVM', sample='Active')
    active_SVM.activeLearn(train_pca.get_x(),
                           train_pca.get_y(),
                           start_size=10,
                           end_size=150,
                           step_size=10,
                           random_size=2,
                           outlier_size=1)
    activeSVMF1s.append(
        active_SVM.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True))

#pickle.dump(randSVMF1s, open("randSVMF1s.p", "wb" ))

pickle.dump(activeSVMF1s, open("activeSVMDistF1s150.p", "wb"))

print(datetime.datetime.now())

print(activeSVMF1s)
Пример #3
0
randRFF1s = []
activeRFF1s = []

for _ in range(1000):
    #getting test data to use for both models
    (train_pca, test_pca) = mnist_pca.test_train_split(train_percent=.8)

    #make random train data and model
    rand_train_PCA = train_pca.random_sample(size=250)
    rand_RF = Model('RF')
    rand_RF.fit(rand_train_PCA.get_x(), rand_train_PCA.get_y())
    randRFF1s.append(
        rand_RF.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True))

    #make active model
    active_RF = Model('RF', sample='Active')
    active_RF.activeLearn(train_pca.get_x(),
                          train_pca.get_y(),
                          start_size=150,
                          end_size=250,
                          step_size=10)
    activeRFF1s.append(
        active_RF.test_metric(test_pca.get_x(), test_pca.get_y(), f1=True))

pickle.dump(randRFF1s, open("randRFF1s.p", "wb"))

pickle.dump(activeRFF1s, open("activeRFF1s.p", "wb"))

print(datetime.datetime.now())
Пример #4
0
        rand_SVM = Model('SVM')
        rand_SVM.fit(rand_train_PCA.get_x(), rand_train_PCA.get_y())
        randSVMF1s.append(
            rand_SVM.test_metric(test_pca.get_x(),
                                 test_pca.get_y(),
                                 f1=True,
                                 avg='weighted'))
        randSVMRaw.append(rand_SVM.predict(test_pca.get_x()))

        #make active model for step size 5, 10, 15
        for stepSize in [5, 10, 15]:
            active_SVM = Model('SVM', sample='Active')
            active_SVM.activeLearn(train_pca.get_x(),
                                   train_pca.get_y(),
                                   start_size=startSize,
                                   end_size=sampleSize,
                                   step_size=stepSize)
            activeSVMF1s[stepSize].append(
                active_SVM.test_metric(test_pca.get_x(),
                                       test_pca.get_y(),
                                       f1=True,
                                       avg='weighted'))
            activeSVMRaw[stepSize].append(active_SVM.predict(test_pca.get_x()))

    pickle.dump(TESTDATA, open(testfnam, "wb"))
    pickle.dump(RANDTRAIN, open(randtrainfnam, "wb"))

    pickle.dump(randSVMF1s, open(rSVMF1fname, "wb"))
    pickle.dump(randSVMRaw, open(rSVMRawfname, "wb"))
    pickle.dump(activeSVMF1s, open(aSVMF1fname, "wb"))