Пример #1
0
    def train(self, x, y):
        """ Constructs a decision tree classifier from data

        Parameters
        ----------
        x : numpy.array
            An N by K numpy array (N is the number of instances, K is the
            number of attributes)
        y : numpy.array
            An N-dimensional numpy array

        Returns
        -------
        DecisionTreeClassifier
            A copy of the DecisionTreeClassifier instance

        """

        # Make sure that x and y have the same number of instances
        assert x.shape[0] == len(y), \
            "Training failed. x and y must have the same number of instances."

        #######################################################################
        #                 ** TASK 2.1: COMPLETE THIS METHOD **
        #######################################################################

        dataset = ds.ClassifierDataset()
        dataset.initFromData(x, y)

        rootSplitObject = dataset.getSplitObjectForRoot()
        treeStats = ClassifierTreeStats()

        self.classifierTree = ClassifierTree(dataset, rootSplitObject,
                                             treeStats)

        # set a flag so that we know that the classifier has been trained
        self.is_trained = True

        return self
Пример #2
0
import prune
import cProfile

repeats = 3

pathToSimple1 = './data/simple1.txt'
pathToSimple2 = './data/simple2.txt'
pathToTest = './data/test.txt'
pathToToy = './data/toy.txt'
pathToToy2 = './data/toy2.txt'
pathToFull = './data/train_full.txt'
pathToNoisy = './data/train_noisy.txt'
pathToSub = './data/train_sub.txt'
pathToValid = './data/validation.txt'

dataset = ds.ClassifierDataset()
dataset.initFromFile(pathToFull)

dtc = cs.DecisionTreeClassifier()
print("FULL")
for i in range(repeats):
    cProfile.run('dtc.train(dataset.attrib, dataset.labels)', None, 'time')

dataset.initFromFile(pathToSub)
print("\n\n\n=====\n\n\n\n")
print("SUB")
for i in range(repeats):
    cProfile.run('dtc.train(dataset.attrib, dataset.labels)', None, 'time')

dataset.initFromFile(pathToNoisy)
print("\n\n\n=====\n\n\n\n")
Пример #3
0
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import optim
import torch.nn.functional as F
from model import BYOL, ClassifierBYOL, ClassifierScratch
import dataset
from tqdm import tqdm
import glob
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 16
EPOCHS = 50

# %%
train_data = torchvision.datasets.ImageFolder('data/label/train')
train_dataset = dataset.ClassifierDataset(train_data)
train_dataloader = DataLoader(train_dataset,
                              shuffle=True,
                              num_workers=NUM_WORKERS,
                              batch_size=BATCH_SIZE)


# %%
def acc_fn(logits, targets):
    logits = torch.argmax(logits, dim=1).detach().cpu()
    targets = targets.detach().cpu()
    acc = torch.mean((logits == targets) * 1.0)
    return acc.numpy()


def train_batch(batch, model, optimizer):
Пример #4
0
import numpy as np
import dataset as ds
from prune import Prune
import classification as cs

pathToSimple1 = './data/simple1.txt'
pathToSimple2 = './data/simple2.txt'
pathToTest = './data/test.txt'
pathToToy = './data/toy.txt'
pathToToy2 = './data/toy2.txt'
pathToFull = './data/train_full.txt'
pathToNoisy = './data/train_noisy.txt'
pathToSub = './data/train_sub.txt'
pathToValid = './data/validation.txt'
pathToToyValid = "./data/toyvalid.txt"

dataset = ds.ClassifierDataset()
dataset.initFromFile(pathToNoisy)

dtc = cs.DecisionTreeClassifier()
dtc.train(dataset.attrib, dataset.labels)

validationDataset = ds.ClassifierDataset()
validationDataset.initFromFile(pathToValid)

Prune(dtc, validationDataset.attrib, validationDataset.labels)