Пример #1
0
def cnn_run_dropout_maxout(data_path, num_rows, num_cols, num_channels,
                           input_path, pred_path):
    t = time.time()
    sub_window = gen_center_sub_window(76, num_cols)
    trn = SarDataset(ds[0][0], ds[0][1], sub_window)
    vld = SarDataset(ds[1][0], ds[1][1], sub_window)
    tst = SarDataset(ds[2][0], ds[2][1], sub_window)
    print 'Take {}s to read data'.format(time.time() - t)
    t = time.time()
    batch_size = 100
    h1 = maxout.Maxout(layer_name='h2', num_units=1, num_pieces=100, irange=.1)
    hidden_layer = mlp.ConvRectifiedLinear(layer_name='h2',
                                           output_channels=8,
                                           irange=0.05,
                                           kernel_shape=[5, 5],
                                           pool_shape=[2, 2],
                                           pool_stride=[2, 2],
                                           max_kernel_norm=1.9365)
    hidden_layer2 = mlp.ConvRectifiedLinear(layer_name='h3',
                                            output_channels=8,
                                            irange=0.05,
                                            kernel_shape=[5, 5],
                                            pool_shape=[2, 2],
                                            pool_stride=[2, 2],
                                            max_kernel_norm=1.9365)
    #output_layer = mlp.Softplus(dim=1,layer_name='output',irange=0.1)
    output_layer = mlp.Linear(dim=1, layer_name='output', irange=0.05)
    trainer = sgd.SGD(learning_rate=0.001,
                      batch_size=100,
                      termination_criterion=EpochCounter(2000),
                      cost=dropout.Dropout(),
                      train_iteration_mode='even_shuffled_sequential',
                      monitor_iteration_mode='even_shuffled_sequential',
                      monitoring_dataset={
                          'test': tst,
                          'valid': vld,
                          'train': trn
                      })
    layers = [hidden_layer, hidden_layer2, output_layer]
    input_space = space.Conv2DSpace(shape=[num_rows, num_cols],
                                    num_channels=num_channels)

    ann = mlp.MLP(layers, input_space=input_space, batch_size=batch_size)
    watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective',
                                               save_path='sar_cnn_mlp.pkl')
    experiment = Train(dataset=trn,
                       model=ann,
                       algorithm=trainer,
                       extensions=[watcher])
    print 'Take {}s to compile code'.format(time.time() - t)
    t = time.time()
    experiment.main_loop()
    print 'Training time: {}s'.format(time.time() - t)
    serial.save('cnn_hhv_{0}_{1}.pkl'.format(num_rows, num_cols),
                ann,
                on_overwrite='backup')

    #read hh and hv into a 3D numpy
    image = read_hhv(input_path)
    return ann, sar_predict(ann, image, pred_path)
Пример #2
0
def cnn_transform_ensemble():
    import os
    from datetime import datetime
    print str(datetime.now())
    t0 = time.time()
    kwargs = get_default_configure()
    kwargs['num_rows'] = 41
    kwargs['num_cols'] = 41
    import pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(kwargs)

    i = -1
    while under_sample_water.which_fold != i:
        if i == -1:
            i = under_sample_water.which_fold
        kwargs['save_path'] = str(under_sample_water.which_fold) + '.pkl'
        t1 = time.time()
        ann = cnn_train_tranformer(**kwargs)
        serial.save(kwargs['predict_path'] + 'f' + kwargs['save_path'],
                    ann,
                    on_overwrite='backup')
        print 'saved to: ' + kwargs['save_path']
        print 'Traing done. Take {}h'.format((time.time() - t1) / 3600)
        break
    utils.sms_notice('Training finished. Taking {}h in total.'.format(
        (time.time() - t0) / 3600))
    print 'Traing done. Take {}h'.format((time.time() - t0) / 3600)
    # sum of all predictions
    predict_batch(predict_path)
Пример #3
0
def cnn_ensemble_leave_one_out():
    import os
    from datetime import datetime
    print str(datetime.now())
    t0 = time.time()
    which_fold = 0
    while 1:
        print which_fold
        try:
            kwargs = get_default_configure_leave_one_out(which_fold=which_fold)
        except:
            break
        kwargs['num_rows'] = 40
        kwargs['num_cols'] = 40
        pp = pprint.PrettyPrinter(indent=4)
        pp.pprint(kwargs)
        kwargs['save_path'] = str(which_fold) + '.pkl'
        t1 = time.time()
        ann = cnn_train(**kwargs)
        serial.save(kwargs['predict_path'] + 'f' + kwargs['save_path'],
                    ann,
                    on_overwrite='backup')
        print 'saved to: ' + kwargs['save_path']
        print 'Traing done. Take {}h'.format((time.time() - t1) / 3600)
        which_fold += 1
    utils.sms_notice('Training finished. Taking {}h in total.'.format(
        (time.time() - t0) / 3600))
    print 'Traing done. Take {}h'.format((time.time() - t0) / 3600)
    # sum of all predictions
    #predict_batch()
    evaluate_sets(kwargs['predict_path'])
Пример #4
0
    def train_batch(self, dataset, batch_size):

        x = dataset.get_batch_design(batch_size, include_labels=False)
        if self.flags['truncate_v']:
            x = numpy.clip(x, -self.truncation_bound['v'], self.truncation_bound['v'])
        try:
            self.batch_train_func(x)
            self.enforce_constraints()
        except:
            import pdb; pdb.set_trace()

        # accounting...
        self.examples_seen += self.batch_size
        self.batches_seen += 1

        # save to different path each epoch
        if self.my_save_path and \
           (self.batches_seen in self.save_at or
            self.batches_seen % self.save_every == 0):
            fname = self.my_save_path + '_e%i.pkl' % self.batches_seen
            print 'Saving to %s ...' % fname,
            serial.save(fname, self)
            print 'done'

        return self.batches_seen < self.max_updates
    def save_weights(self):
        """
        Saves all weights in a .txt, .npy or .mat file depending on the ending of the 'weight_path'.
        If the path ends in .pkl, the entire model is stored. 
        """
    
        model = serial.load(self.model_path)
    
        weight_dict = {}
    
        for layer in model.layers:
            try:
                weight_dict[layer.layer_name] = layer.get_weights()
            except:
                layer_weights = layer.get_weights_topo()
                weight_dict[layer.layer_name] = layer_weights# without reshaping since it the input/output vector would need to reshaped in the same way which might lead to problems

        if self.weight_path[-4:] == '.pkl':
            print 'saving model ', self.weight_path
            serial.save(self.weight_path, model)
        elif self.weight_path[-4:] == '.mat':
            scipy.io.savemat(self.weight_path[:-4]+'.mat', weight_dict)
        elif self.weight_path[-4:] == '.npy':
            np.save(self.weight_path[:-4], weight_dict)
        else:
            raise Exception('Only ".mat", ".pkl" and ".npy" files are supported as data formats.')
Пример #6
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Make sure Polyak-averaged model gets monitored.
        Save the model if necessary.

        Parameters
        ----------
        model : a Model instance
        dataset : Dataset
        algorithm : WRITEME
        """
        if self._count == self.start:
            self._worker = _PolyakWorker(model)
            algorithm.update_callbacks.append(self._worker)
            #HACK
            try:
                model.add_polyak_channels(self._worker.param_to_mean,
                                          algorithm.monitoring_dataset)
            except AttributeError:
                pass
        elif self.save_path is not None and self._count > self.start and \
                self._count % self.save_freq == 0:
            saved_params = OrderedDict()
            for param in model.get_params():
                saved_params[param] = param.get_value()
                param.set_value(self._worker.param_to_mean[param].get_value())
            serial.save(self.save_path, model)
            for param in model.get_params():
                param.set_value(saved_params[param])
        self._count += 1
Пример #7
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Save the model if we are on a save epoch.
        
        Parameters
        ----------
        model : pylearn2.models.model.Model
                model.monitor must contain a channel with name given by self.channel_name
        dataset : pylearn2.datasets.dataset.Dataset
            not used
        algorithm : TrainingAlgorithm
            not used
        """

        #monitor = model.monitor
        #channels = monitor.channels
        #channel = channels[self.channel_name]
        #val_record = channel.val_record
        #epoch = len(val_record)
        epoch = model.monitor.get_epochs_seen()

        save_file = '%s_%d.pkl' % (self.save_prefix, epoch)

        if np.mod(epoch, self.interval) == 0:
            print('Saving model to %s' % save_file)
            serial.save(save_file, model, on_overwrite='backup')
Пример #8
0
    def train_batch(self, dataset, batch_size):

        (x, y) = dataset.get_random_framepair_batch(batch_size)
        if self.flags['truncate_v']:
            x = numpy.clip(x, -self.truncation_bound['v'],
                           self.truncation_bound['v'])

        try:
            self.batch_train_func(x.astype(floatX))
            self.enforce_constraints()
        except:
            import pdb
            pdb.set_trace()

        # accounting...
        self.examples_seen += self.batch_size
        self.batches_seen += 1

        # save to different path each epoch
        if self.my_save_path and \
           (self.batches_seen in self.save_at or
            self.batches_seen % self.save_every == 0):
            fname = self.my_save_path + '_e%i.pkl' % self.batches_seen
            print 'Saving to %s ...' % fname,
            serial.save(fname, self)
            print 'done'

        return self.batches_seen < self.max_updates
Пример #9
0
def main(train_path,
        out_path,
        split,
        **kwargs):



    print 'loading training features'

    train_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()


    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 120

    report = Report(train_path, split)

    train_X_omnivore, train_y, fold_indices = get_training_subset(train_X, 'omnivore')

    model = train(fold_indices, train_X_omnivore, train_y, report, **kwargs)

    serial.save(out_path+'.omnivore.model.pkl', model)
    report.write(out_path+'.omnivore.validation_report.txt')


    report = Report(train_path, split)

    train_X_fruit, train_y, fold_indices = get_training_subset(train_X, 'fruit')

    model = train(fold_indices, train_X_fruit, train_y, report, **kwargs)

    serial.save(out_path+'.fruit.model.pkl', model)
    report.write(out_path+'.fruit.validation_report.txt')
Пример #10
0
def cnn_ensemble_leave_one_out():
    import os
    from datetime import datetime
    print str(datetime.now())
    t0 = time.time()
    which_fold = 0
    while 1:
        print which_fold
        try:
            kwargs = get_default_configure_leave_one_out(which_fold=which_fold)
        except:
            break
        kwargs['num_rows']=40
        kwargs['num_cols']=40
        pp=pprint.PrettyPrinter(indent=4)
        pp.pprint(kwargs)
        kwargs['save_path'] =  str(which_fold)+'.pkl'
        t1 = time.time()
        ann = cnn_train(**kwargs)
        serial.save(kwargs['predict_path']+'f'+kwargs['save_path'],ann,on_overwrite='backup')
        print 'saved to: '+kwargs['save_path']
        print 'Traing done. Take {}h'.format((time.time()-t1)/3600)
        which_fold += 1
    utils.sms_notice('Training finished. Taking {}h in total.'.format((time.time()-t0)/3600))
    print 'Traing done. Take {}h'.format((time.time()-t0)/3600)
    # sum of all predictions
    #predict_batch()
    evaluate_sets(kwargs['predict_path'])
Пример #11
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Make sure Polyak-averaged model gets monitored.
        Save the model if necessary.

        Parameters
        ----------
        model : a Model instance
        dataset : Dataset
        algorithm : WRITEME
        """
        if self._count == self.start:
            self._worker = _PolyakWorker(model)
            algorithm.update_callbacks.append(self._worker)
            #HACK
            try:
                model.add_polyak_channels(self._worker.param_to_mean,
                                          algorithm.monitoring_dataset)
            except AttributeError:
                pass
        elif self.save_path is not None and self._count > self.start and \
                self._count % self.save_freq == 0:
            saved_params = OrderedDict()
            for param in model.get_params():
                saved_params[param] = param.get_value()
                param.set_value(self._worker.param_to_mean[param].get_value())
            serial.save(self.save_path, model)
            for param in model.get_params():
                param.set_value(saved_params[param])
        self._count += 1
Пример #12
0
def cnn_transform_ensemble():
    import os
    from datetime import datetime
    print str(datetime.now())
    t0 = time.time()
    kwargs = get_default_configure()
    kwargs['num_rows']=41
    kwargs['num_cols']=41
    import pprint
    pp=pprint.PrettyPrinter(indent=4)
    pp.pprint(kwargs)

    i = -1
    while under_sample_water.which_fold != i:
        if i == -1:
            i = under_sample_water.which_fold
        kwargs['save_path'] =  str(under_sample_water.which_fold)+'.pkl'
        t1 = time.time()
        ann = cnn_train_tranformer(**kwargs)
        serial.save(kwargs['predict_path']+'f'+kwargs['save_path'],ann,on_overwrite='backup')
        print 'saved to: '+kwargs['save_path']
        print 'Traing done. Take {}h'.format((time.time()-t1)/3600)
        break
    utils.sms_notice('Training finished. Taking {}h in total.'.format((time.time()-t0)/3600))
    print 'Traing done. Take {}h'.format((time.time()-t0)/3600)
    # sum of all predictions
    predict_batch(predict_path)
Пример #13
0
def main(train_path,
        out_path,
        dataset,
        standardize,
        C,
        **kwargs):

    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    print('getting labels and oflds')
    train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10)
    gc.collect()
    assert train_y is not None

    print('loading training features')
    train_X = get_features(train_path, split = False, standardize = standardize)

    assert str(train_X.dtype) == 'float32'
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000,)

    print('training model')
    model =  train(train_X, train_y, C)

    print('saving model')
    serial.save(out_path, model)
Пример #14
0
def main(train_path, out_path, split, **kwargs):

    print 'loading training features'

    train_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()

    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 120

    report = Report(train_path, split)

    train_X_omnivore, train_y, fold_indices = get_training_subset(
        train_X, 'omnivore')

    model = train(fold_indices, train_X_omnivore, train_y, report, **kwargs)

    serial.save(out_path + '.omnivore.model.pkl', model)
    report.write(out_path + '.omnivore.validation_report.txt')

    report = Report(train_path, split)

    train_X_fruit, train_y, fold_indices = get_training_subset(
        train_X, 'fruit')

    model = train(fold_indices, train_X_fruit, train_y, report, **kwargs)

    serial.save(out_path + '.fruit.model.pkl', model)
    report.write(out_path + '.fruit.validation_report.txt')
Пример #15
0
    def save_weights(self):
        """
        Saves all weights in a .txt, .npy or .mat file depending on the ending of the 'weight_path'.
        If the path ends in .pkl, the entire model is stored. 
        """

        model = serial.load(self.model_path)

        weight_dict = {}

        for layer in model.layers:
            try:
                weight_dict[layer.layer_name] = layer.get_weights()
            except:
                layer_weights = layer.get_weights_topo()
                weight_dict[
                    layer.
                    layer_name] = layer_weights  # without reshaping since it the input/output vector would need to reshaped in the same way which might lead to problems

        if self.weight_path[-4:] == '.pkl':
            print 'saving model ', self.weight_path
            serial.save(self.weight_path, model)
        elif self.weight_path[-4:] == '.mat':
            scipy.io.savemat(self.weight_path[:-4] + '.mat', weight_dict)
        elif self.weight_path[-4:] == '.npy':
            np.save(self.weight_path[:-4], weight_dict)
        else:
            raise Exception(
                'Only ".mat", ".pkl" and ".npy" files are supported as data formats.'
            )
Пример #16
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Looks whether the model performs better than earlier. If it's the
        case, saves the model.

        Parameters
        ----------
        model : pylearn2.models.model.Model
            model.monitor must contain a channel with name given by
            self.channel_name
        dataset : pylearn2.datasets.dataset.Dataset
            Not used
        algorithm : TrainingAlgorithm
            Not used
        """
        monitor = model.monitor
        channels = monitor.channels
        channel = channels[self.channel_name]
        val_record = channel.val_record
        new_cost = val_record[-1]

        if self.coeff * new_cost < self.coeff * self.best_cost:
            self.best_cost = new_cost
            # Update the tag of the model object before saving it.
            self._update_tag(model)
            if self.store_best_model:
                self.best_model = deepcopy(model)
            if self.save_path is not None:
                with log_timing(log, 'Saving to ' + self.save_path):
                    serial.save(self.save_path, model, on_overwrite='backup')
Пример #17
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Looks whether the model performs better than earlier. If it's the
        case, saves the model.

        Parameters
        ----------
        model : pylearn2.models.model.Model
            model.monitor must contain a channel with name given by \
            self.channel_name
        dataset : pylearn2.datasets.dataset.Dataset
            Not used
        algorithm : TrainingAlgorithm
            Not used
        """

        monitor = model.monitor
        channels = monitor.channels
        channel = channels[self.channel_name]
        val_record = channel.val_record
        new_cost = self.coeff * val_record[-1]

        if new_cost < self.best_cost:
            self.best_cost = new_cost
            serial.save(self.save_path, model, on_overwrite = 'backup')
Пример #18
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Looks whether the model performs better than earlier. If it's the
        case, saves the model.

        Parameters
        ----------
        model : pylearn2.models.model.Model
                model.monitor must contain a channel with name given by self.channel_name
        dataset : pylearn2.datasets.dataset.Dataset
            not used
        algorithm : TrainingAlgorithm
            not used
        """

        monitor = model.monitor
        channels = monitor.channels
        channel = channels[self.channel_name]
        val_record = channel.val_record
        new_cost = self.coeff * val_record[-1]

        if new_cost < self.best_cost:
            self.best_cost = new_cost
            serial.save(self.save_path, model, on_overwrite = 'backup')
            
            # XXX: [Kien] Save best filters.
            pv = get_weights_report.get_weights_report(model = model, 
                                                       dataset = dataset)
            pv.save('best_filters.png')                                           
Пример #19
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Looks whether the model performs better than earlier. If it's the
        case, saves the model.

        Parameters
        ----------
        model : pylearn2.models.model.Model
            model.monitor must contain a channel with name given by
            self.channel_name
        dataset : pylearn2.datasets.dataset.Dataset
            Not used
        algorithm : TrainingAlgorithm
            Not used
        """

        monitor = model.monitor
        channels = monitor.channels
        channel = channels[self.channel_name]
        val_record = channel.val_record
        new_cost = self.coeff * val_record[-1]


        if new_cost < self.best_cost:
            self.best_cost = new_cost
            # Update the tag of the model object before saving it.
            self._update_tag(model)
            serial.save(self.save_path, model, on_overwrite = 'backup')
Пример #20
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Looks whether the model performs better than earlier
        - or equally good (modification).
        If it's the case, saves the model.

        Parameters
        ----------
        model : pylearn2.models.model.Model
            model.monitor must contain a channel with name given by
            self.channel_name
        dataset : pylearn2.datasets.dataset.Dataset
            Not used
        algorithm : TrainingAlgorithm
            Not used
        """
        monitor = model.monitor
        channels = monitor.channels
        channel = channels[self.channel_name]
        val_record = channel.val_record
        new_cost = val_record[-1]

        if self.coeff * new_cost <= self.coeff * self.best_cost and \
           monitor._epochs_seen >= self.start_epoch:
            self.best_cost = new_cost
            # Update the tag of the model object before saving it.
            self._update_tag(model)
            if self.store_best_model:
                self.best_model = deepcopy(model)
            if self.save_path is not None:
                with log_timing(log, 'Saving to ' + self.save_path):
                    serial.save(self.save_path, model, on_overwrite='backup')
Пример #21
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Save the model if we are on a save epoch.
        
        Parameters
        ----------
        model : pylearn2.models.model.Model
                model.monitor must contain a channel with name given by self.channel_name
        dataset : pylearn2.datasets.dataset.Dataset
            not used
        algorithm : TrainingAlgorithm
            not used
        """

        #monitor = model.monitor
        #channels = monitor.channels
        #channel = channels[self.channel_name]
        #val_record = channel.val_record
        #epoch = len(val_record)
        epoch = model.monitor.get_epochs_seen()

        save_file = '%s_%d.pkl' % (self.save_prefix, epoch)

        if np.mod(epoch, self.interval) == 0:
            print('Saving model to %s' % save_file)
            serial.save(save_file, model, on_overwrite = 'backup')
Пример #22
0
    def train_batch(self, dataset, batch_size):

        x = dataset.get_batch_design(batch_size, include_labels=False)
        [x_spike, x_slab] = self.preproc(x)
        self.batch_train_func(x_spike, x_slab)

        # accounting...
        self.examples_seen += self.batch_size
        self.batches_seen += 1

        # modify learning rate multipliers
        for (k, iter) in self.lr_mults_it.iteritems():
            if iter.next():
                print 'self.batches_seen = ', self.batches_seen
                self.lr_mults_shrd[k].set_value(iter.value)
                print 'lr_mults_shrd[%s] = %f' % (k, iter.value)

        self.enforce_constraints()

        # save to different path each epoch
        if self.my_save_path and \
           (self.batches_seen in self.save_at or
            self.batches_seen % self.save_every == 0):
            fname = self.my_save_path + '_e%i.pkl' % self.batches_seen
            print 'Saving to %s ...' % fname,
            serial.save(fname, self)
            print 'done'

        return self.batches_seen < self.max_updates
Пример #23
0
    def on_monitor(self, model, dataset, algorithm):
        # this shall never happen but better safe than sorry
        if self.predictor is None:
            self.setup(model, dataset, algorithm)

        # obtaining validating set # TODO: finally we want to have train-validation-test set. Or sth.
        valid_x = algorithm.monitoring_dataset['valid'].X
        valid_y = algorithm.monitoring_dataset['valid'].y
        predictions = self.predictor.get_predictions(valid_x)
        threshold, score = self.compute_optimal_threshold_and_score(valid_y, predictions)

        self.threshold_list.append(threshold)
        self.score_list.append(score)

        if self.saving_path is not None and self.save:
            if max(self.score_list) == score:
                try:
                    # Make sure that saving does not serialize the dataset
                    dataset._serialization_guard = SerializationGuard()
                    save_path = self.saving_path
                    serial.save(save_path, model,
                                on_overwrite='backup')
                finally:
                    dataset._serialization_guard = None

        print "F1Score1Threshold score", score, "\ncorresponding threshold:", threshold
Пример #24
0
    def on_monitor(self, model, dataset, algorithm):
        import numpy as np
        # this shall never happen but better safe than sorry
        if self.predictor is None:
            self.setup(model, dataset, algorithm)

        # obtaining validating set #
        valid_x = algorithm.monitoring_dataset['valid'].X
        valid_y = algorithm.monitoring_dataset['valid'].y
        y_pred = self.predictor.get_predictions(valid_x)
        y_classes = [np.argmax(pred) for pred in y_pred]
        score = f1_score(y_true=valid_y, y_pred=y_classes)
        self.score_list.append(score)

        if self.saving_path is not None and self.save:
            if max(self.score_list) == score:
                try:
                    # Make sure that saving does not serialize the dataset
                    dataset._serialization_guard = SerializationGuard()
                    save_path = self.saving_path
                    serial.save(save_path, model,
                                on_overwrite='backup')
                finally:
                    dataset._serialization_guard = None

        print "F1 score:", score
Пример #25
0
def main(train_path, out_path, dataset, standardize, C, **kwargs):

    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    print 'getting labels and oflds'
    train_y, fold_indices = get_labels_and_fold_indices(
        cifar10, cifar100, stl10)
    gc.collect()
    assert train_y is not None

    print 'loading training features'
    train_X = get_features(train_path, split=False, standardize=standardize)

    assert str(train_X.dtype) == 'float32'
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000, )

    print 'training model'
    model = train(train_X, train_y, C)

    print 'saving model'
    serial.save(out_path, model)
Пример #26
0
    def save(self):
        """
        Call on_save for Train and TrainCV extensions and serialize trained
        models if save_path is set.
        """
        # Train extensions
        for trainer in self.trainers:
            for extension in trainer.extensions:
                extension.on_save(trainer.model, trainer.dataset,
                                  trainer.algorithm)

        # TrainCV extensions
        for extension in self.cv_extensions:
            extension.on_save(self.trainers)

        # serialize trained models
        if self.save_path is not None:
            models = [trainer.model for trainer in self.trainers]
            try:
                for trainer in self.trainers:
                    trainer.dataset._serialization_guard = SerializationGuard()
                if not self.allow_overwrite and os.path.exists(self.save_path):
                    raise IOError("Trying to overwrite file when not allowed.")
                serial.save(self.save_path, models, on_overwrite='backup')
            finally:
                for trainer in self.trainers:
                    trainer.dataset._serialization_guard = None
Пример #27
0
    def on_save(self, trainers):
        """
        Save best model from each cross-validation fold.

        Parameters
        ----------
        trainers : list
            List of Train objects belonging to the parent TrainCV object.
        """
        if self.save_path is None:
            return
        models = []
        for trainer in trainers:
            for extension in trainer.extensions:
                if isinstance(extension, MonitorBasedSaveBest):
                    models.append(extension.best_model)
                    break
        assert len(models) == len(trainers)
        try:
            for trainer in trainers:
                trainer.dataset._serialization_guard = SerializationGuard()
                serial.save(self.save_path, models, on_overwrite='backup')
        finally:
            for trainer in trainers:
                trainer.dataset._serialization_guard = None
Пример #28
0
def get_processed_dataset():

    train_path = 'pp_cifar10_train.pkl'
    test_path = 'pp_cifar10_test.pkl'

    if os.path.exists(train_path) and os.path.exists(test_path):
        print 'loading preprocessed data'
        trainset = serial.load(train_path)
        testset = serial.load(test_path)

    else:
        print 'loading raw data...'
        trainset = cifar10.CIFAR10(which_set="train")
        testset =  cifar10.CIFAR10(which_set="test")
	
        pipeline = preprocessing.Pipeline()
        pipeline.items.append(preprocessing.ExtractPatchesWithPosition(patch_shape=patch_shape, patches_per_image=patches_per_image))
        pipeline.items.append(preprocessing.GlobalContrastNormalization(sqrt_bias=10., use_std=True))
        pipeline.items.append(preprocessing.PCA(num_components = num_components, keep_var_fraction = keep_var_fraction))
        pipeline.items.append(preprocessing.ExtractPatchPairs(patches_per_image = patches_per_image, num_images = train_size, input_width = input_width))

        trainset.apply_preprocessor(preprocessor=pipeline, can_fit=True)

        # the pkl-ing is having issues, the dataset is maybe too big.
        serial.save('pp_cifar10_train.pkl', trainset)
        serial.save('pp_cifar10_test.pkl', testset)

        # this path will be used for visualizing weights after training is done
        trainset.yaml_src = '!pkl: "%s"' % train_path
        testset.yaml_src = '!pkl: "%s"' % test_path

    return trainset, testset
Пример #29
0
    def on_save(self, trainers):
        """
        Save best model from each cross-validation fold.

        Parameters
        ----------
        trainers : list
            List of Train objects belonging to the parent TrainCV object.
        """
        if self.save_path is None:
            return
        models = []
        for trainer in trainers:
            for extension in trainer.extensions:
                if isinstance(extension, MonitorBasedSaveBest):
                    models.append(extension.best_model)
                    break
        assert len(models) == len(trainers)
        try:
            for trainer in trainers:
                trainer.dataset._serialization_guard = SerializationGuard()
                serial.save(self.save_path, models, on_overwrite='backup')
        finally:
            for trainer in trainers:
                trainer.dataset._serialization_guard = None
Пример #30
0
    def save(self):
        """
        Call on_save for Train and TrainCV extensions and serialize trained
        models if save_path is set.
        """
        # Train extensions
        for trainer in self.trainers:
            for extension in trainer.extensions:
                extension.on_save(trainer.model, trainer.dataset,
                                  trainer.algorithm)

        # TrainCV extensions
        for extension in self.cv_extensions:
            extension.on_save(self.trainers)

        # serialize trained models
        if self.save_path is not None:
            models = [trainer.model for trainer in self.trainers]
            try:
                for trainer in self.trainers:
                    trainer.dataset._serialization_guard = SerializationGuard()
                if not self.allow_overwrite and os.path.exists(self.save_path):
                    raise IOError("Trying to overwrite file when not allowed.")
                serial.save(self.save_path, models, on_overwrite='backup')
            finally:
                for trainer in self.trainers:
                    trainer.dataset._serialization_guard = None
Пример #31
0
 def on_monitor(self, model, dataset, algorithm):
     
     epoch = algorithm.monitor._epochs_seen;
     model_file = self.save_path + self.save_prefix + str(epoch) + '.pkl'; 
     
     with log_timing(log, 'saving model to {}'.format(model_file)):
         serial.save(model_file, model, on_overwrite = 'backup')
Пример #32
0
    def create_datasets(cls,
                        datasets=None,
                        overwrite=False,
                        img_dir=DATA_DIR,
                        output_dir=DATA_DIR):
        """Creates the requested datasets, and writes them to disk.
        """
        datasets = datasets or cls.ALL_DATASETS
        serial.mkdir(output_dir)

        for dataset_name in list(datasets):
            file_path_fn = lambda ext: os.path.join(
                output_dir, '%s.%s' % (dataset_name, ext))

            output_files = dict([(ext, file_path_fn(ext))
                                 for ext in ['pkl', 'npy']])
            files_missing = np.any(
                [not os.path.isfile(f) for f in output_files.values()])

            if overwrite or np.any(files_missing):
                print("Loading the %s data" % dataset_name)
                dataset = cls(which_set=dataset_name, img_dir=img_dir)

                print("Saving the %s data" % dataset_name)
                dataset.use_design_loc(output_files['npy'])
                serial.save(output_files['pkl'], dataset)
Пример #33
0
    def train_batch(self, dataset, batch_size):

        x = dataset.get_batch_design(batch_size, include_labels=False)
        self.batch_train_func(x)

        # accounting...
        self.examples_seen += self.batch_size
        self.batches_seen += 1

        # modify learning rate multipliers
        for (k, iter) in self.lr_mults_it.iteritems():
            if iter.next():
                print 'self.batches_seen = ', self.batches_seen
                self.lr_mults_shrd[k].set_value(iter.value)
                print 'lr_mults_shrd[%s] = %f' % (k,iter.value)

        self.enforce_constraints()

        # save to different path each epoch
        if self.my_save_path and \
           (self.batches_seen in self.save_at or
            self.batches_seen % self.save_every == 0):
            fname = self.my_save_path + '_e%i.pkl' % self.batches_seen
            print 'Saving to %s ...' % fname,
            serial.save(fname, self)
            print 'done'

        return self.batches_seen < self.max_updates
Пример #34
0
def main(train_path, out_path, split, **kwargs):

    y_fine, y_coarse, fold_indices = get_labels_and_fold_indices()

    gc.collect()

    print 'loading training features'

    train_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()

    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 120
    assert y_fine.shape == (120, )
    assert y_coarse.shape == (120, )

    report = Report(train_path, split)

    gc.collect()

    print 'making omnivore classifiers'
    omnivore_classifiers = get_classifiers('omnivore', train_X, y_fine,
                                           y_coarse, fold_indices)
    print 'making fruit classifiers'
    fruit_classifiers = get_classifiers('fruit', train_X, y_fine, y_coarse,
                                        fold_indices)

    model = train(fold_indices, omnivore_classifiers, fruit_classifiers,
                  train_X, y_fine, y_coarse, report, **kwargs)

    serial.save(out_path + '.model.pkl', model)
    report.write(out_path + '.validation_report.txt')
Пример #35
0
    def on_monitor(self, model, dataset, algorithm):

        epoch = algorithm.monitor._epochs_seen
        model_file = self.save_path + self.save_prefix + str(epoch) + '.pkl'

        with log_timing(log, 'saving model to {}'.format(model_file)):
            serial.save(model_file, model, on_overwrite='backup')
Пример #36
0
def compute_ZCA_fast(X, normalize, ZCA_filename="zca"):
    zca_preprocessor = preprocessing.ZCA()
    zca_preprocessor.set_matrices_save_path(ZCA_filename+".npz")
    X = X.astype(np.float32)
    if normalize:
        X /= 255.0
    zca_preprocessor.fit(X.T)
    serial.save(ZCA_filename+".pkl", zca_preprocessor)
Пример #37
0
 def save(self, filename):
     # Delete data sets
     if (hasattr(self.experiment.binary_csp, 'cnt')):
         del self.experiment.binary_csp.cnt
     if hasattr(self.experiment, 'test_cnt'):
         del self.experiment.test_cnt
     del self.experiment.cnt
     serial.save(filename, self.experiment)
Пример #38
0
 def save(self, filename):
     # Delete data sets
     if (hasattr(self.experiment.binary_csp, 'cnt')):
         del self.experiment.binary_csp.cnt
     if hasattr(self.experiment, 'test_cnt'):
         del self.experiment.test_cnt
     del self.experiment.cnt
     serial.save(filename, self.experiment)
Пример #39
0
def main(train_path,
        out_path,
        split,
        **kwargs):

    y_fine, y_coarse, fold_indices = get_labels_and_fold_indices()

    gc.collect()

    print 'loading training features'

    train_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()


    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 120
    assert y_fine.shape == (120,)
    assert y_coarse.shape == (120,)

    report = Report(train_path, split)

    gc.collect()


    print 'making omnivore classifiers'
    omnivore_classifiers = get_classifiers('omnivore',train_X,y_fine,y_coarse,fold_indices)
    print 'making fruit classifiers'
    fruit_classifiers = get_classifiers('fruit',train_X,y_fine,y_coarse,fold_indices)


    print 'loading cifar features'
    aux_features = get_features(train_path.replace('aux','train'), False)
    print 'loading cifar labels'
    aux_labels = CIFAR100(which_set='train').y_coarse

    print 'making masks'
    mask = np.zeros( aux_labels.shape, dtype='uint8')
    for label in [4,11,3,12,7,6]:
        mask += (aux_labels == label)

    print 'restricting classes'
    aux_features = aux_features[mask,:]
    aux_labels = aux_labels[mask]

    print 'downsampling data'
    aux_features = aux_features[0:300,:]
    aux_labels = aux_labels[0:300]

    print 'main train loop'
    model = train(fold_indices, omnivore_classifiers, fruit_classifiers, train_X, y_fine, y_coarse,
            aux_features, aux_labels, report, **kwargs)


    serial.save(out_path+'.model.pkl', model)
    report.write(out_path+'.validation_report.txt')
Пример #40
0
def main():
    data_dir = string.preprocess('${PYLEARN2_DATA_PATH}/stl10')

    print('Loading STL10-10 unlabeled and train datasets...')
    downsampled_dir = data_dir + '/stl10_32x32'

    data = serial.load(downsampled_dir + '/unlabeled.pkl')
    supplement = serial.load(downsampled_dir + '/train.pkl')

    print('Concatenating datasets...')
    data.set_design_matrix(np.concatenate((data.X, supplement.X), axis=0))
    del supplement

    print("Preparing output directory...")
    patch_dir = data_dir + '/stl10_patches_8x8'
    serial.mkdir(patch_dir)
    README = open(patch_dir + '/README', 'w')

    README.write(textwrap.dedent("""
    The .pkl files in this directory may be opened in python using
    cPickle, pickle, or pylearn2.serial.load.

    data.pkl contains a pylearn2 Dataset object defining an unlabeled
    dataset of 2 million 6x6 approximately whitened, contrast-normalized
    patches drawn uniformly at random from a downsampled (to 32x32)
    version of the STL-10 train and unlabeled datasets.

    preprocessor.pkl contains a pylearn2 Pipeline object that was used
    to extract the patches and approximately whiten / contrast normalize
    them. This object is necessary when extracting features for
    supervised learning or test set classification, because the
    extracted features must be computed using inputs that have been
    whitened with the ZCA matrix learned and stored by this Pipeline.

    They were created with the pylearn2 script make_stl10_patches.py.

    All other files in this directory, including this README, were
    created by the same script and are necessary for the other files
    to function correctly.
    """))

    README.close()

    print("Preprocessing the data...")
    pipeline = preprocessing.Pipeline()
    pipeline.items.append(preprocessing.ExtractPatches(patch_shape=(8, 8),
                          num_patches=2*1000*1000))
    pipeline.items.append(
        preprocessing.GlobalContrastNormalization(sqrt_bias=10., use_std=True))
    pipeline.items.append(preprocessing.ZCA())
    data.apply_preprocessor(preprocessor=pipeline, can_fit=True)

    data.use_design_loc(patch_dir + '/data.npy')

    serial.save(patch_dir + '/data.pkl', data)

    serial.save(patch_dir + '/preprocessor.pkl', pipeline)
Пример #41
0
 def main_loop(self):
     self.algorithm.setup(agent=self.agent, environment=self.environment)
     i = 0
     while True:
         rval = self.algorithm.train()
         assert rval is None
         i += 1
         if i % 1000 == 0:
             serial.save(self.save_path, self.agent)
             print 'saved!'
Пример #42
0
    def save(self):
        """ saves the model """

        #TODO-- save state of dataset and training algorithm so training can be resumed after a crash
        if self.save_path is not None:
            print 'saving to ...'+self.save_path
            t1 = time.time()
            serial.save(self.save_path, self.model)
            t2 = time.time()
            print '...done. saving took ',(t2-t1),' seconds'
Пример #43
0
    def __extract_sequence(self):

        self.model.sequence = 1

        sequence = self.model.dataset_yaml_src.split('sequence:')

        if len(sequence) > 1:
            self.model.sequence = int(sequence[1].split(',')[0])

        serial.save(self.filename,self.model)
    def __call__(self, model, dataset, algorithm):
        if SAVE_MODEL is True:
            save_path = 'toy_sparse_' + str(self.current_epoch) + '_epoch.pkl'
            save_start = datetime.datetime.now()
            serial.save(save_path, model)
            save_end = datetime.datetime.now()
            delta = (save_end - save_start)
            print 'saving model...done. saving took', str(delta)

        self.current_epoch += 1
Пример #45
0
    def __extract_sequence(self):

        self.model.sequence = 1

        sequence = self.model.dataset_yaml_src.split('sequence:')

        if len(sequence) > 1:
            self.model.sequence = int(sequence[1].split(',')[0])

        serial.save(self.filename, self.model)
Пример #46
0
def convert_net():
    for (dirpath, dirnames, filenames) in walk('../networks/'):
        for file in filenames:
            model = serial.load(dirpath+file)
            try:
                print "Saving {} parameters".format(file)
                pvals = model.get_all_params_values()
                serial.save("{}/{}_PARAMS".format(dirpath,file), pvals)
            except Exception, e:
                print e
Пример #47
0
def getpx_r(file):
	d = serial.load(file)

	d=d.reshape((50000,3,32,32))
	for i in range(0,50000):
		for j in range(0,3):
			d[i,j]=numpy.fliplr(d[i,j])
	d=d.reshape((50000,3072))
	
	serial.save(file+"_r",d)
Пример #48
0
    def save(self):
        """ saves the model """

        #TODO-- save state of dataset and training algorithm so training can be resumed after a crash
        if self.save_path is not None:
            print 'saving to ...'+self.save_path
            t1 = time.time()
            serial.save(self.save_path, self.model)
            t2 = time.time()
            print '...done. saving took ',(t2-t1),' seconds'
Пример #49
0
    def __call__(self, model, dataset, algorithm):
        if SAVE_MODEL is True:
            save_path = 'cifar10_grbm' + str(self.current_epoch) + '_epoch.pkl'
            save_start = datetime.datetime.now()
            serial.save(save_path, model)
            save_end = datetime.datetime.now()
            delta = (save_end - save_start)
            print 'saving model...done. saving took', str(delta)

        self.current_epoch += 1
Пример #50
0
 def __call__(self, model, dataset, algorithm):
     if self._count > 0 and self._count % self.save_freq == 0:
         self.avg()
         saved_params = {}
         for param in model.get_params():
             saved_params[param] = param.get_value()
             param.set_value(self.param_to_mean[param].get_value())
         serial.save(self.save_path, model)
         for param in model.get_params():
             param.set_value(saved_params[param])
     self._count += 1
Пример #51
0
 def save(self):
     """Saves the model."""
     #TODO-- save state of dataset and training algorithm so training can be
     # resumed after a crash
     if self.save_path is not None:
         print 'saving to', self.save_path, '...'
         save_start = datetime.datetime.now()
         serial.save(self.save_path, self.model)
         save_end = datetime.datetime.now()
         delta = (save_end - save_start)
         print '...done. saving took', str(delta)
Пример #52
0
def main(train_path,
        out_path,
        split,
        dataset,
        standardize,
        **kwargs):

    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    if mem:
        print 'mem usage before getting labels and folds '+str(mem.usage())
    train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10)
    if mem:
        print 'mem usage after getting labels and folds '+str(mem.usage())
    gc.collect()
    assert train_y is not None

    print 'loading training features'

    if mem:
        print 'mem usage before getting features '+str(mem.usage())
    train_X = get_features(train_path, split, standardize)
    if not train_X.flags.c_contiguous:
        print 'not C contiguous, reshaping'
        assert len(train_X.shape) == 2
        train_X = np.ascontiguousarray(train_X)
        assert train_X.flags.c_contiguous
        print 'success, contiguous now'
    gc.collect()
    if mem:
        print 'mem usage after getting features '+str(mem.usage())


    if str(train_X.dtype) != 'float32':
        warnings.warn('Your features are not float32, you may be wasting memory')
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000,)

    report = Report(train_path, split, stl10, cifar10, cifar100)

    gc.collect()

    if mem:
        print 'mem usage before calling train: '+str(mem.usage())
    model = train(fold_indices, train_X, train_y, report, **kwargs)

    serial.save(out_path+'.model.pkl', model)
    report.write(out_path+'.validation_report.txt')
Пример #53
0
 def save(self):
     """Saves the model."""
     #TODO-- save state of dataset and training algorithm so training can be
     # resumed after a crash
     if self.save_path is not None:
         print 'saving to', self.save_path, '...'
         save_start = datetime.datetime.now()
         serial.save(self.save_path, self.model)
         save_end = datetime.datetime.now()
         delta = (save_end - save_start)
         print '...done. saving took', str(delta)
    def on_monitor(self, model, dataset, algorithm):

        is_save_interval = (
                model.batches_seen in self.save_at or
                model.batches_seen % self.save_every == 0)

        if self.my_save_path and is_save_interval:
            fname = self.my_save_path + '_e%i.pkl' % model.batches_seen
            model.save_path = fname
            print 'Saving to %s ...' % fname,
            serial.save(fname, model)
            print 'done'
Пример #55
0
def save(stats, targets, fnames, save_path):

    #dataDict = {}
    #for i in range(len(stats)):
    #    peturbation = metaData[i]['perturbation']
    #    flip = metaData[i]['flipped']
    #    stat = stats[i,:]
    #    if not perturbation in dataDict:
    #        dataDict[perturbation].append((flip,

    #d = zip(stats, targets)
    #data=dict(zip(clipIDs, d))
    serial.save(save_path, {'x': stats, 'y': targets, 'path': fnames})
Пример #56
0
def main():
    data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}')

    print('Loading CIFAR-100 train dataset...')
    data = CIFAR100(which_set='train')

    print("Preparing output directory...")
    patch_dir = data_dir + '/cifar100/cifar100_patches'
    serial.mkdir(patch_dir)
    README = open(patch_dir + '/README', 'w')

    README.write(
        textwrap.dedent("""
    The .pkl files in this directory may be opened in python using
    cPickle, pickle, or pylearn2.serial.load.

    data.pkl contains a pylearn2 Dataset object defining an unlabeled
    dataset of 2 million 6x6 approximately whitened, contrast-normalized
    patches drawn uniformly at random from the CIFAR-100 train set.

    preprocessor.pkl contains a pylearn2 Pipeline object that was used
    to extract the patches and approximately whiten / contrast normalize
    them. This object is necessary when extracting features for
    supervised learning or test set classification, because the
    extracted features must be computed using inputs that have been
    whitened with the ZCA matrix learned and stored by this Pipeline.

    They were created with the pylearn2 script make_cifar100_patches.py.

    All other files in this directory, including this README, were
    created by the same script and are necessary for the other files
    to function correctly.
    """))

    README.close()

    print("Preprocessing the data...")
    pipeline = preprocessing.Pipeline()
    pipeline.items.append(
        preprocessing.ExtractPatches(patch_shape=(6, 6),
                                     num_patches=2 * 1000 * 1000))
    pipeline.items.append(
        preprocessing.GlobalContrastNormalization(sqrt_bias=10., use_std=True))
    pipeline.items.append(preprocessing.ZCA())
    data.apply_preprocessor(preprocessor=pipeline, can_fit=True)

    data.use_design_loc(patch_dir + '/data.npy')

    serial.save(patch_dir + '/data.pkl', data)

    serial.save(patch_dir + '/preprocessor.pkl', pipeline)
Пример #57
0
 def __call__(self, model, dataset, algorithm):
     if self._count == self.start:
         self._worker = _PolyakWorker(model)
         algorithm.update_callbacks.append(self._worker)
         #HACK
         model.add_polyak_channels(self._worker.param_to_mean,
                                   algorithm.monitoring_dataset)
     elif self._count > self.start and self._count % self.save_freq == 0:
         saved_params = {}
         for param in model.get_params():
             saved_params[param] = param.get_value()
             param.set_value(self._worker.param_to_mean[param].get_value())
         serial.save(self.save_path, model)
         for param in model.get_params():
             param.set_value(saved_params[param])
     self._count += 1
Пример #58
0
 def main_loop(self):
     self.algorithm.setup(agent=self.agent, environment=self.environment)
     i = 0
     for param in self.agent.get_params():
         assert not np.any(np.isnan(param.get_value())), (i, param.name)
         assert not np.any(np.isinf(param.get_value())), (i, param.name)
     while True:
         rval = self.algorithm.train()
         assert rval is None
         i += 1
         for param in self.agent.get_params():
             assert not np.any(np.isnan(param.get_value())), (i, param.name)
             assert not np.any(np.isinf(param.get_value())), (i, param.name)
         if i % 1000 == 0:
             serial.save(self.save_path, self.agent)
             logger.info('saved!')
Пример #59
0
def test_pkl_yaml_src_field():
    """
    Tests a regression where yaml_src wasn't getting correctly set on pkls.
    """
    try:
        fd, fn = mkstemp()
        close(fd)
        o = DumDum()
        o.x = ('a', 'b', 'c')
        serial.save(fn, o)
        yaml = '!pkl: \'' + fn + '\'\n'
        loaded = load(yaml)
        assert loaded.x == ('a', 'b', 'c')
        assert loaded.yaml_src == yaml
    finally:
        os.remove(fn)