Exemplo n.º 1
0
def autoencoder():

    # building dataset, batch_size and preprocessor
    data = Mnist(train_valid_test_ratio=[8, 1, 1], batch_size=100, preprocessor=GCN())

    # for AutoEncoder, the inputs and outputs must be the same
    train = data.get_train()
    data.set_train(train.X, train.X)

    valid = data.get_valid()
    data.set_valid(valid.X, valid.X)

    test = data.get_test()
    data.set_test(test.X, test.X)

    # building autoencoder
    ae = AutoEncoder(input_dim=data.feature_size(), rand_seed=123)
    h1_layer = Tanh(dim=500, name="h1_layer", W=None, b=None)

    # adding encoding layer
    ae.add_encode_layer(h1_layer)

    # mirror layer has W = h1_layer.W.T
    h1_mirror = Tanh(dim=ae.input_dim, name="h1_mirror", W=h1_layer.W.T, b=None)

    # adding decoding mirror layer
    ae.add_decode_layer(h1_mirror)

    # build learning method
    learning_method = AdaGrad(learning_rate=0.1, momentum=0.9)

    # set the learning rules
    learning_rule = LearningRule(
        max_col_norm=10,
        L1_lambda=None,
        L2_lambda=None,
        training_cost=Cost(type="mse"),
        learning_rate_decay_factor=None,
        stopping_criteria={
            "max_epoch": 300,
            "epoch_look_back": 10,
            "cost": Cost(type="error"),
            "percent_decrease": 0.01,
        },
    )

    # put all the components into a TrainObject
    train_object = TrainObject(model=ae, dataset=data, learning_rule=learning_rule, learning_method=learning_method)

    # finally run the training
    train_object.run()
Exemplo n.º 2
0
    def build_dataset(self):
        dataset = None

        preprocessor = None if self.state.dataset.preprocessor.type is None else \
                       getattr(preproc, self.state.dataset.preprocessor.type)()

        noise = None if self.state.dataset.dataset_noise.type is None else \
                getattr(data_noise, self.state.dataset.dataset_noise.type)()

        if self.state.dataset.dataset_noise.type == 'Gaussian':
            noise.std = self.state.dataset.dataset_noise.std

        if self.state.dataset.preprocessor.type == 'Scale':
            preprocessor.max = self.state.dataset.preprocessor.global_max
            preprocessor.min = self.state.dataset.preprocessor.global_min
            preprocessor.buffer = self.state.dataset.preprocessor.buffer
            preprocessor.scale_range = self.state.dataset.preprocessor.scale_range

        if self.state.dataset.type == 'Mnist':
            dataset = Mnist(train_valid_test_ratio=self.state.dataset.
                            train_valid_test_ratio,
                            preprocessor=preprocessor,
                            noise=noise,
                            batch_size=self.state.dataset.batch_size,
                            num_batches=self.state.dataset.num_batches,
                            iter_class=self.state.dataset.iter_class,
                            rng=self.state.dataset.rng)
            train = dataset.get_train()
            dataset.set_train(train.y, train.y)
            valid = dataset.get_valid()
            dataset.set_valid(valid.y, valid.y)
            test = dataset.get_test()
            dataset.set_test(test.y, test.y)

        elif self.state.dataset.type[:12] == 'Mnist_Blocks':
            dataset = getattr(mnist, self.state.dataset.type)(
                feature_size=self.state.dataset.feature_size,
                target_size=self.state.dataset.feature_size,
                train_valid_test_ratio=self.state.dataset.
                train_valid_test_ratio,
                preprocessor=preprocessor,
                noise=noise,
                batch_size=self.state.dataset.batch_size,
                num_batches=self.state.dataset.num_batches,
                iter_class=self.state.dataset.iter_class,
                rng=self.state.dataset.rng)

        elif self.state.dataset.type[:4] == 'P276':
            dataset = getattr(spec, self.state.dataset.type)(
                train_valid_test_ratio=self.state.dataset.
                train_valid_test_ratio,
                preprocessor=preprocessor,
                noise=noise,
                batch_size=self.state.dataset.batch_size,
                num_batches=self.state.dataset.num_batches,
                iter_class=self.state.dataset.iter_class,
                rng=self.state.dataset.rng)
            train = dataset.get_train()
            dataset.set_train(train.X, train.X)
            valid = dataset.get_valid()
            dataset.set_valid(valid.X, valid.X)
            test = dataset.get_test()
            dataset.set_test(test.X, test.X)

        elif self.state.dataset.type[:5] == 'Laura':
            dataset = getattr(spec, self.state.dataset.type)(
                feature_size=self.state.dataset.feature_size,
                target_size=self.state.dataset.feature_size,
                train_valid_test_ratio=self.state.dataset.
                train_valid_test_ratio,
                num_blocks=self.state.dataset.num_blocks,
                preprocessor=preprocessor,
                noise=noise,
                batch_size=self.state.dataset.batch_size,
                num_batches=self.state.dataset.num_batches,
                iter_class=self.state.dataset.iter_class,
                rng=self.state.dataset.rng)

        elif self.state.dataset.type[:18] == 'TransFactor_Blocks':
            dataset = getattr(tf, self.state.dataset.type)(
                feature_size=self.state.dataset.feature_size,
                target_size=self.state.dataset.feature_size,
                one_hot=self.state.dataset.one_hot,
                num_blocks=self.state.dataset.num_blocks,
                train_valid_test_ratio=self.state.dataset.
                train_valid_test_ratio,
                preprocessor=preprocessor,
                noise=noise,
                batch_size=self.state.dataset.batch_size,
                num_batches=self.state.dataset.num_batches,
                iter_class=self.state.dataset.iter_class,
                rng=self.state.dataset.rng)

        elif self.state.dataset.type[:11] == 'TransFactor':
            dataset = getattr(tf, self.state.dataset.type)(
                # feature_size = self.state.dataset.feature_size,
                # target_size = self.state.dataset.feature_size,
                train_valid_test_ratio=self.state.dataset.
                train_valid_test_ratio,
                preprocessor=preprocessor,
                noise=noise,
                batch_size=self.state.dataset.batch_size,
                num_batches=self.state.dataset.num_batches,
                iter_class=self.state.dataset.iter_class,
                rng=self.state.dataset.rng)
            train = dataset.get_train()
            dataset.set_train(train.X, train.X)
            valid = dataset.get_valid()
            dataset.set_valid(valid.X, valid.X)
            test = dataset.get_test()
            dataset.set_test(test.X, test.X)

        elif self.state.dataset.type[:13] == 'I2R_Posterior':
            dataset = getattr(i2r, self.state.dataset.type)(
                train_valid_test_ratio=self.state.dataset.
                train_valid_test_ratio,
                preprocessor=preprocessor,
                noise=noise,
                batch_size=self.state.dataset.batch_size,
                num_batches=self.state.dataset.num_batches,
                iter_class=self.state.dataset.iter_class,
                rng=self.state.dataset.rng)

        return dataset
Exemplo n.º 3
0
Arquivo: model.py Projeto: hycis/Pynet
    def build_dataset(self):
        dataset = None

        preprocessor = None if self.state.dataset.preprocessor.type is None else \
                       getattr(preproc, self.state.dataset.preprocessor.type)()

        noise = None if self.state.dataset.dataset_noise.type is None else \
                getattr(data_noise, self.state.dataset.dataset_noise.type)()

        if self.state.dataset.dataset_noise.type == 'Gaussian':
            noise.std = self.state.dataset.dataset_noise.std

        if self.state.dataset.preprocessor.type == 'Scale':
            preprocessor.max = self.state.dataset.preprocessor.global_max
            preprocessor.min = self.state.dataset.preprocessor.global_min
            preprocessor.buffer = self.state.dataset.preprocessor.buffer
            preprocessor.scale_range = self.state.dataset.preprocessor.scale_range

        if self.state.dataset.type == 'Mnist':
            dataset = Mnist(train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)
            train = dataset.get_train()
            dataset.set_train(train.y, train.y)
            valid = dataset.get_valid()
            dataset.set_valid(valid.y, valid.y)
            test = dataset.get_test()
            dataset.set_test(test.y, test.y)

        elif self.state.dataset.type[:12] == 'Mnist_Blocks':
            dataset = getattr(mnist, self.state.dataset.type)(
                            feature_size = self.state.dataset.feature_size,
                            target_size = self.state.dataset.feature_size,
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)

        elif self.state.dataset.type[:4] == 'P276':
            dataset = getattr(spec, self.state.dataset.type)(
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)
            train = dataset.get_train()
            dataset.set_train(train.X, train.X)
            valid = dataset.get_valid()
            dataset.set_valid(valid.X, valid.X)
            test = dataset.get_test()
            dataset.set_test(test.X, test.X)

        elif self.state.dataset.type[:5] == 'Laura':
            dataset = getattr(spec, self.state.dataset.type)(
                            feature_size = self.state.dataset.feature_size,
                            target_size = self.state.dataset.feature_size,
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            num_blocks = self.state.dataset.num_blocks,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)

        elif self.state.dataset.type[:18] == 'TransFactor_Blocks':
            dataset = getattr(tf, self.state.dataset.type)(
                            feature_size = self.state.dataset.feature_size,
                            target_size = self.state.dataset.feature_size,
                            one_hot = self.state.dataset.one_hot,
                            num_blocks = self.state.dataset.num_blocks,
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)

        elif self.state.dataset.type[:11] == 'TransFactor':
            dataset = getattr(tf, self.state.dataset.type)(
                            # feature_size = self.state.dataset.feature_size,
                            # target_size = self.state.dataset.feature_size,
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)
            train = dataset.get_train()
            dataset.set_train(train.X, train.X)
            valid = dataset.get_valid()
            dataset.set_valid(valid.X, valid.X)
            test = dataset.get_test()
            dataset.set_test(test.X, test.X)

        elif self.state.dataset.type[:13] == 'I2R_Posterior':
            dataset = getattr(i2r, self.state.dataset.type)(
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)


        return dataset
Exemplo n.º 4
0
    def build_dataset(self):
        dataset = None

        preprocessor = None if self.state.dataset.preprocessor.type is None else \
                       getattr(preproc, self.state.dataset.preprocessor.type)()

        # if self.state.dataset.noise.type == 'BlackOut' or self.state.dataset.noise.type == 'MaskOut':
        #     noise = None if self.state.dataset.noise.type is None else \
        #         getattr(noisy, self.state.dataset.noise.type)(ratio=self.state.dataset.noise.ratio)
        # else:
        #     noise = getattr(noisy, self.state.dataset.noise.type)()
        noise = None if self.state.dataset.dataset_noise.type is None else \
                getattr(noisy, self.state.dataset.dataset_noise.type)()

        if self.state.dataset.preprocessor.type == 'Scale':
            preprocessor.max = self.state.dataset.preprocessor.global_max
            preprocessor.min = self.state.dataset.preprocessor.global_min
            preprocessor.buffer = self.state.dataset.preprocessor.buffer
            preprocessor.scale_range = self.state.dataset.preprocessor.scale_range

        if self.state.dataset.type == 'Mnist':
            dataset = Mnist(train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)
            train = dataset.get_train()
            dataset.set_train(train.X, train.X)
            valid = dataset.get_valid()
            dataset.set_valid(valid.X, valid.X)
            test = dataset.get_test()
            dataset.set_test(test.X, test.X)

        elif self.state.dataset.type[:12] == 'Mnist_Blocks':
            dataset = getattr(mnist, self.state.dataset.type)(
                            feature_size = self.state.dataset.feature_size,
                            target_size = self.state.dataset.feature_size,
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)

        elif self.state.dataset.type[:4] == 'P276':
            dataset = getattr(spec, self.state.dataset.type)(
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)
            train = dataset.get_train()
            dataset.set_train(train.X, train.X)
            valid = dataset.get_valid()
            dataset.set_valid(valid.X, valid.X)
            test = dataset.get_test()
            dataset.set_test(test.X, test.X)

        elif self.state.dataset.type[:5] == 'Laura':
            dataset = getattr(spec, self.state.dataset.type)(
                            feature_size = self.state.dataset.feature_size,
                            target_size = self.state.dataset.feature_size,
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)

        elif self.state.dataset.type[:18] == 'TransFactor_Blocks':
            dataset = getattr(tf, self.state.dataset.type)(
                            feature_size = self.state.dataset.feature_size,
                            target_size = self.state.dataset.feature_size,
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)

        elif self.state.dataset.type[:11] == 'TransFactor':
            dataset = getattr(tf, self.state.dataset.type)(
                            # feature_size = self.state.dataset.feature_size,
                            # target_size = self.state.dataset.feature_size,
                            train_valid_test_ratio = self.state.dataset.train_valid_test_ratio,
                            preprocessor = preprocessor,
                            noise = noise,
                            batch_size = self.state.dataset.batch_size,
                            num_batches = self.state.dataset.num_batches,
                            iter_class = self.state.dataset.iter_class,
                            rng = self.state.dataset.rng)
            train = dataset.get_train()
            dataset.set_train(train.X, train.X)
            valid = dataset.get_valid()
            dataset.set_valid(valid.X, valid.X)
            test = dataset.get_test()
            dataset.set_test(test.X, test.X)



        return dataset
Exemplo n.º 5
0
def stacked_autoencoder():

    name = "Stacked_AE"

    # =====[ Train First layer of stack autoencoder ]=====#
    print("Start training First Layer of AutoEncoder")

    # building dataset, batch_size and preprocessor
    data = Mnist(train_valid_test_ratio=[8, 1, 1], batch_size=100)

    # for AutoEncoder, the inputs and outputs must be the same
    train = data.get_train()
    data.set_train(train.X, train.X)

    valid = data.get_valid()
    data.set_valid(valid.X, valid.X)

    test = data.get_test()
    data.set_test(test.X, test.X)

    # building autoencoder
    ae = AutoEncoder(input_dim=data.feature_size(), rand_seed=123)
    h1_layer = RELU(dim=500, name="h1_layer", W=None, b=None)

    # adding encoding layer
    ae.add_encode_layer(h1_layer)

    # mirror layer has W = h1_layer.W.T
    h1_mirror = RELU(dim=ae.input_dim, name="h1_mirror", W=h1_layer.W.T, b=None)

    # adding decoding mirror layer
    ae.add_decode_layer(h1_mirror)

    # build learning method
    learning_method = SGD(learning_rate=0.001, momentum=0.9)

    # set the learning rules
    learning_rule = LearningRule(
        max_col_norm=10,
        L1_lambda=None,
        L2_lambda=None,
        training_cost=Cost(type="mse"),
        learning_rate_decay_factor=None,
        stopping_criteria={"max_epoch": 3, "epoch_look_back": 1, "cost": Cost(type="error"), "percent_decrease": 0.01},
    )

    # put all the components into a TrainObject
    train_object = TrainObject(model=ae, dataset=data, learning_rule=learning_rule, learning_method=learning_method)

    # finally run the training
    train_object.run()

    # =====[ Train Second Layer of autoencoder ]=====#

    print("Start training Second Layer of AutoEncoder")

    # fprop == forward propagation
    reduced_train_X = ae.encode(train.X)
    reduced_valid_X = ae.encode(valid.X)
    reduced_test_X = ae.encode(test.X)

    data.set_train(X=reduced_train_X, y=reduced_train_X)
    data.set_valid(X=reduced_valid_X, y=reduced_valid_X)
    data.set_test(X=reduced_test_X, y=reduced_test_X)

    # create a new mlp taking inputs from the encoded outputs of first autoencoder
    ae2 = AutoEncoder(input_dim=data.feature_size(), rand_seed=None)

    h2_layer = RELU(dim=100, name="h2_layer", W=None, b=None)
    ae2.add_encode_layer(h2_layer)

    h2_mirror = RELU(dim=h1_layer.dim, name="h2_mirror", W=h2_layer.W.T, b=None)
    ae2.add_decode_layer(h2_mirror)

    train_object = TrainObject(model=ae2, dataset=data, learning_rule=learning_rule, learning_method=learning_method)

    train_object.run()

    # =====[ Fine Tuning ]=====#
    print("Fine Tuning")

    data = Mnist()

    train = data.get_train()
    data.set_train(train.X, train.X)

    valid = data.get_valid()
    data.set_valid(valid.X, valid.X)

    test = data.get_test()
    data.set_test(test.X, test.X)

    ae3 = AutoEncoder(input_dim=data.feature_size(), rand_seed=None)
    ae3.add_encode_layer(h1_layer)
    ae3.add_encode_layer(h2_layer)
    ae3.add_decode_layer(h2_mirror)
    ae3.add_decode_layer(h1_mirror)

    train_object = TrainObject(model=ae3, dataset=data, learning_rule=learning_rule, learning_method=learning_method)

    train_object.run()
    print("Training Done")