Exemplo n.º 1
0
    def get_extensions(self, ext_array, config_id):
        if ext_array is None:
            return []
        extensions = []
        for ext_id in ext_array:
            row = self.db.executeSQL(
                """
            SELECT ext_class
            FROM hps3.extension
            WHERE ext_id = %s
            """, (ext_id, ), self.db.FETCH_ONE)
            if not row or row is None:
                raise HPSData("No extension for ext_id=" + str(ext_id))
            ext_class = row[0]
            fn = getattr(self, 'get_ext_' + ext_class)
            extensions.append(fn(ext_id))
        # monitor based save best
        if self.mbsb_channel_name is not None:
            save_path = self.save_prefix + str(config_id) + "_optimum.pkl"
            extensions.append(
                MonitorBasedSaveBest(channel_name=self.mbsb_channel_name,
                                     save_path=save_path))

        # HPS Logger
        extensions.append(HPSLog(self.log_channel_names, self.db, config_id))
        return extensions
Exemplo n.º 2
0
    def setup(self, trainers):
        """
        Add tracking to all trainers.

        Parameters
        ----------
        trainers : list
            List of Train objects belonging to the parent TrainCV object.
        """
        for k, trainer in enumerate(trainers):
            if self.save_path is not None and self.save_folds:
                path, ext = os.path.splitext(self.save_path)
                save_path = path + '-{}'.format(k) + ext
            else:
                save_path = None
            if self.tag_key is not None:
                tag_key = '{}-{}'.format(self.tag_key, k)
            else:
                tag_key = None
            extension = MonitorBasedSaveBest(
                self.channel_name,
                save_path=save_path,
                store_best_model=True,
                higher_is_better=self.higher_is_better,
                tag_key=tag_key)
            trainer.extensions.append(extension)
Exemplo n.º 3
0
 def create_training_problem(self, save_best_path):
     ext1 = MonitorBasedSaveBest(channel_name=self.optimize_for,
                                 save_path=save_best_path)
     trainer = Train(dataset=self.alg_datasets['train'],
                     model=self.model,
                     algorithm=self.algorithm,
                     extensions=[ext1])
     self.trainer = trainer
def main(job_id, requested_params, cache):
    # Fix sub directory problems
    sys.path.append(os.path.dirname(os.getcwd()))
    os.chdir(os.path.dirname(os.path.realpath(__file__)))

    # Add parameters that are not currently being tuned but could potentially be tuned.
    params = additional_args
    params.update(requested_params)

    if params.get('rate', None) is not None:
        params['log_init_learning_rate'][0] = numpy.array([params['rate']])

    train_params = {
        'train_start': params['start'],
        'train_stop': params['stop'],
        'valid_start': 20000,
        'valid_stop': 24000,
        'test_stop': 4000,
        'batch_size': 100,
        'max_epochs': 20,
        'max_batches': 10,
        'sgd_seed': sgd_seed_str,
        'mlp_seed': mlp_seed_str,
        'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]),
        'max_col_norm_y': params['max_norm_y'][0],
        'irange_y': math.pow(10, params['l_ir_y'][0]),
        'init_momentum': 0.5,
        'init_learning_rate': math.pow(10,
                                       params['log_init_learning_rate'][0]),
    }

    with open('slp_fooddata.yaml', 'r') as f:
        trainer = f.read()

    yaml_string = trainer % train_params
    train_obj = yaml_parse.load(yaml_string)

    pretrained_model_path = params.get('model', None)
    if pretrained_model_path is not None:
        print 'loading pre trained model'
        pretrained_model = serial.load(pretrained_model_path)
        print 'loading done'
        train_obj.model.set_param_values(pretrained_model.get_param_values())

    if 'converge' in params:
        train_obj.algorithm.termination_criterion._criteria[
            0]._max_epochs = 100
        train_obj.extensions.append(
            MonitorBasedSaveBest('valid_y_misclass', params['save']))

    train_obj.setup()
    train_obj.model.monitor.on_channel_conflict = 'ignore'
    if 'converge' not in params:
        train_obj.algorithm.termination_criterion._criteria[0].initialize(
            train_obj.model)
    train_obj.main_loop(do_setup=False)
    original_misclass = read_channel(train_obj.model, misclass_channel)
    return float(original_misclass)
Exemplo n.º 5
0
    def create_algorithm(self, data, save_best_path=None):
        self.set_dataset(data)
        self.create_adjustors()
        term = EpochCounter(max_epochs=self.max_epochs)
        if self.valid_stop:
            cost_crit = MonitorBased(channel_name='valid_objective',
                                     prop_decrease=.0,
                                     N=3)
            term = And(criteria=[cost_crit, term])

        #(layers, A_weight_decay)
        coeffs = None
        if self.reg_factors:
            rf = self.reg_factors
            lhdims = len(self.tagger.hdims)
            l_inputlayer = len(self.tagger.layers[0].layers)
            coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf)
        cost = SeqTaggerCost(coeffs, self.dropout)
        self.cost = cost

        self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective',
                                         save_path=save_best_path)
        mon_dataset = dict(self.dataset)
        if not self.monitor_train:
            del mon_dataset['train']

        _learning_rule = (self.momentum_rule if self.use_momentum else None)
        self.algorithm = SGD(
            batch_size=1,
            learning_rate=self.lr,
            termination_criterion=term,
            monitoring_dataset=mon_dataset,
            cost=cost,
            learning_rule=_learning_rule,
        )

        self.algorithm.setup(self, self.dataset['train'])
        if self.plot_monitor:
            cn = ["valid_objective", "test_objective"]
            if self.monitor_train:
                cn.append("train_objective")
            plots = Plots(channel_names=cn, save_path=self.plot_monitor)
            self.pm = PlotManager([plots], freq=1)
            self.pm.setup(self, None, self.algorithm)
Exemplo n.º 6
0
    def get_extensions(self):
        if 'ext_array' not in self.state:
            return []
        extensions = []

        for ext_obj in self.state.ext_array.values():
            fn = getattr(self, 'get_ext_' + ext_obj.ext_class)
            extensions.append(fn(ext_obj))

        # monitor based save best
        print 'save best channel', self.mbsb_channel_name
        if self.mbsb_channel_name is not None:
            self.save_path = self.save_prefix + str(self.state.config_id) + "_optimum.pkl"
            extensions.append(MonitorBasedSaveBest(
                    channel_name = self.mbsb_channel_name,
                    save_path = self.save_path
                )
            )

        return extensions
Exemplo n.º 7
0
    def create_algorithm(self, data, save_best_path=None):
        self.set_dataset(data)
        self.create_adjustors()
        term = EpochCounter(max_epochs=self.max_epochs)
        if self.valid_stop:
            cost_crit = MonitorBased(channel_name='valid_objective',
                                     prop_decrease=.0, N=3)
            term = And(criteria=[cost_crit, term])

        #(layers, A_weight_decay)
        coeffs = None
        if self.reg_factors:
            rf = self.reg_factors
            lhdims = len(self.tagger.hdims)
            l_inputlayer = len(self.tagger.layers[0].layers)
            coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf)
        cost = SeqTaggerCost(coeffs, self.dropout)
        self.cost = cost

        self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective',
                                         save_path=save_best_path)
        mon_dataset = dict(self.dataset)
        if not self.monitor_train:
            del mon_dataset['train']

        _learning_rule = (self.momentum_rule if self.use_momentum else None)
        self.algorithm = SGD(batch_size=1, learning_rate=self.lr,
                             termination_criterion=term,
                             monitoring_dataset=mon_dataset,
                             cost=cost,
                             learning_rule=_learning_rule,
                             )

        self.algorithm.setup(self, self.dataset['train'])
        if self.plot_monitor:
            cn = ["valid_objective", "test_objective"]
            if self.monitor_train:
                cn.append("train_objective")
            plots = Plots(channel_names=cn, save_path=self.plot_monitor)
            self.pm = PlotManager([plots], freq=1)
            self.pm.setup(self, None, self.algorithm)
Exemplo n.º 8
0
def get_layer_trainer_sgd(model, trainset):
    drop_cost = Dropout(input_include_probs={'h0': .4},
                        input_scales={'h0': 1.})

    # configs on sgd
    train_algo = SGD(train_iteration_mode='batchwise_shuffled_equential',
                     learning_rate=0.2,
                     cost=drop_cost,
                     monitoring_dataset=trainset,
                     termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS),
                     update_callbacks=None)

    extensions = [
        MonitorBasedSaveBest(channel_name="y_kl",
                             save_path="./convnet_test_best.pkl")
    ]

    return Train(model=model,
                 algorithm=train_algo,
                 extensions=extensions,
                 dataset=trainset)
Exemplo n.º 9
0
def get_trainer(model, trainset, validset, save_path):
  
  monitoring  = dict(valid=validset, train=trainset)
  termination = MonitorBased(channel_name='valid_y_misclass', prop_decrease=.001, N=100)
  extensions  = [MonitorBasedSaveBest(channel_name='valid_y_misclass', save_path=save_path),
                #MomentumAdjustor(start=1, saturate=100, final_momentum=.9),
                LinearDecayOverEpoch(start=1, saturate=200, decay_factor=0.01)]

  config = {
  'learning_rate': .01,
  #'learning_rule': Momentum(0.5),
  'learning_rule': RMSProp(),
  'train_iteration_mode': 'shuffled_sequential',
  'batch_size': 1200,#250,
  #'batches_per_iter' : 100,
  'monitoring_dataset': monitoring,
  'monitor_iteration_mode' : 'shuffled_sequential',
  'termination_criterion' : termination,
  }

  return Train(model=model, 
      algorithm=SGD(**config),
      dataset=trainset,
      extensions=extensions)
def main():

    #creating layers
        #2 convolutional rectified layers, border mode valid
    batch_size = 48
    lr = 1.0 #0.1/4
    finMomentum = 0.9
    maxout_units = 2000
    num_pcs = 4
    lay1_reg = lay2_reg = maxout_reg = None
    #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib'
    #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib'
    #save_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'.joblib'
    #best_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'best.joblib'
    save_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb.joblib'
    best_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb_best.joblib'

    #numBatches = 400000/batch_size

    '''
    print 'Applying preprocessing'
    ddmTrain = EmotiwKeypoints(start=0, stop =40000)
    ddmValid = EmotiwKeypoints(start=40000, stop = 44000)
    ddmTest = EmotiwKeypoints(start=44000)

    stndrdz = preprocessing.Standardize()
    stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train')
    stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val')
    stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test')

    GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000)
    GCN.apply(ddmTrain, can_fit =True, name = 'train')
    GCN.apply(ddmValid, can_fit =False, name = 'val')
    GCN.apply(ddmTest, can_fit = False, name = 'test')
    return
    '''

    ddmTrain = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='train')
    ddmValid = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='valid')
    #ddmSmallTrain = ComboDatasetPyTable('/Tmp/zumerjer/all_', which_set='small_train')

    layer1 = ConvRectifiedLinear(layer_name = 'convRect1',
                     output_channels = 64,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [4, 4],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay1_reg)
    layer2 = ConvRectifiedLinear(layer_name = 'convRect2',
                     output_channels = 128,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [3, 3],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay2_reg)

        # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name= 'maxout',
                    irange= .005,
                    num_units= maxout_units,
                    num_pieces= num_pcs,
                    W_lr_scale = 0.1,
                    max_col_norm= maxout_reg)

    #multisoftmax
    n_groups = 196
    n_classes = 96
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size = batch_size,
                 input_space = Conv2DSpace(shape = [96, 96],
                 num_channels = 3, axes=('b', 0, 1, 'c')),
                 layers = [ layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                            missing_target_value=missing_target_value )
    mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : 1.0 }, input_scales= { 'convRect1': 1. })

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    monitoring_dataset = {'validation':ddmValid}#, 'mini-train':ddmSmallTrain}

    term_crit  = MonitorBased(prop_decrease = 1e-7, N = 100, channel_name = 'validation_objective')

    kp_ada = KeypointADADELTA(decay_factor = 0.95, 
            #init_momentum = 0.5, 
                        monitoring_dataset = monitoring_dataset, batch_size = batch_size,
                        termination_criterion = term_crit,
                        cost = mlp_cost)

    #train extension
    #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001)
    #train_ext = LinearDecayOverEpoch(start= 1,saturate= 250,decay_factor= .01)
    #train_ext = ADADELTA(0.95)

    #train object
    train = Train(dataset = ddmTrain,
                  save_path= save_path,
                  save_freq=10,
                  model = MLPerc,
                  algorithm= kp_ada,
                  extensions = [#train_ext, 
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                                     save_path= best_path)#,

#                                MomentumAdjustor(start = 1,#
 #                                                saturate = 25,
  #                                               final_momentum = finMomentum)
  ] )
    train.main_loop()
    train.save()
Exemplo n.º 11
0
def test_tagging():
    """Test the tagging functionality of this extension."""
    try:
        # TODO: serial.save should be able to take an open file-like object so
        # we can direct its output to a StringIO or something and not need to
        # screw around like this in tests that don't actually need to touch
        # the filesystem. /dev/null would work but the test would fail on
        # Windows.
        fd, fn = tempfile.mkstemp(suffix='.pkl')
        os.close(fd)

        # Test that the default key gets created.
        def_model = MockModel()
        def_model.monitor = Monitor(def_model)
        def_ext = MonitorBasedSaveBest(channel_name='foobar', save_path=fn)
        def_ext.setup(def_model, None, None)
        assert 'MonitorBasedSaveBest' in def_model.tag

        # Test with a custom key.
        model = MockModel()
        model.monitor = Monitor(model)
        model.monitor.channels['foobar'] = MockChannel()
        ext = MonitorBasedSaveBest(channel_name='foobar', tag_key='test123',
                                   save_path=fn)
        # Best cost is initially infinity.
        ext.setup(model, None, None)
        assert model.tag['test123']['best_cost'] == float("inf")
        # Best cost after one iteration.
        model.monitor.channels['foobar'].val_record.append(5.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test123']['best_cost'] == 5.0
        # Best cost after a second, worse iteration.
        model.monitor.channels['foobar'].val_record.append(7.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test123']['best_cost'] == 5.0
        # Best cost after a third iteration better than 2 but worse than 1.
        model.monitor.channels['foobar'].val_record.append(6.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test123']['best_cost'] == 5.0
        # Best cost after a fourth, better iteration.
        model.monitor.channels['foobar'].val_record.append(3.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test123']['best_cost'] == 3.0

        # setting the starting epoch of saving models.
        model = MockModel()
        model.monitor = Monitor(model)
        model.monitor.channels['foobar'] = MockChannel()
        ext = MonitorBasedSaveBest(channel_name='foobar', tag_key='test12',
                                   start_epoch=4, save_path=fn)
        ext.setup(model, None, None)
        assert model.tag['test12']['best_cost'] == float("inf")
        # Best cost after one iteration.
        model.monitor.channels['foobar'].val_record.append(5.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test12']['best_cost'] == float("inf")
        # Best cost after a second, better iteration.
        model.monitor.channels['foobar'].val_record.append(3.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test12']['best_cost'] == float("inf")
        # Best cost after a third, worse iteration.
        model.monitor.channels['foobar'].val_record.append(7.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test12']['best_cost'] == float("inf")
        # Best cost after a fourth, worse iteration.
        model.monitor.channels['foobar'].val_record.append(7.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test12']['best_cost'] == 7.0

        # Best cost after a fifth, worse iteration.
        model.monitor.channels['foobar'].val_record.append(10.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test12']['best_cost'] == 7.0
        # Best cost after a fifth, better iteration.
        model.monitor.channels['foobar'].val_record.append(1.0)
        model.monitor.report_epoch()
        ext.on_monitor(model, None, None)
        assert model.tag['test12']['best_cost'] == 1.0

    finally:
        os.remove(fn)
Exemplo n.º 12
0
algorithm = SGD(learning_rate=0.01,
                cost=MethodCost("cost_from_X"),
                batch_size=batchSize,
                monitoring_batch_size=batchSize,
                monitoring_dataset={
                    'train': train,
                    'valid': valid
                },
                monitor_iteration_mode="even_batchwise_shuffled_sequential",
                termination_criterion=EpochCounter(max_epochs=200),
                learning_rule=Momentum(init_momentum=0.99),
                train_iteration_mode="even_batchwise_shuffled_sequential")

train = Train(dataset=train,
              model=model,
              algorithm=algorithm,
              save_path="ConvNet4.pkl",
              save_freq=1,
              extensions=[
                  MonitorBasedSaveBest(channel_name="valid_y_misclass",
                                       save_path="ConvNet4_best.pkl"),
                  MomentumAdjustor(final_momentum=0, start=0, saturate=100)
              ])

print("Starting training session")

train.main_loop()

print("Done!")
Exemplo n.º 13
0
	def set_extensions(self, extensions=None):
		self.extensions = [MonitorBasedSaveBest(channel_name='test_objective',
												save_path = './training/training_monitor_best.pkl')]
Exemplo n.º 14
0
class SequenceTaggerNetwork(Model):
    def __init__(self,
                 dataset,
                 w2i,
                 t2i,
                 featurizer,
                 edim=None,
                 hdims=None,
                 fedim=None,
                 max_epochs=100,
                 use_momentum=False,
                 lr=.01,
                 lr_lin_decay=None,
                 lr_scale=False,
                 lr_monitor_decay=False,
                 valid_stop=False,
                 reg_factors=None,
                 dropout=False,
                 dropout_params=None,
                 embedding_init=None,
                 embedded_model=None,
                 monitor_train=True,
                 plot_monitor=None,
                 num=False):
        super(SequenceTaggerNetwork, self).__init__()
        self.vocab_size = dataset.vocab_size
        self.window_size = dataset.window_size
        self.total_feats = dataset.total_feats
        self.feat_num = dataset.feat_num
        self.n_classes = dataset.n_classes
        self.max_epochs = max_epochs
        if edim is None:
            edim = 50
        if hdims is None:
            hdims = [100]
        if fedim is None:
            fedim = 5
        self.edim = edim
        self.fedim = fedim
        self.hdims = hdims

        self.w2i = w2i
        self.t2i = t2i
        self.featurizer = featurizer

        self._create_tagger()

        A_value = numpy.random.uniform(low=-.1,
                                       high=.1,
                                       size=(self.n_classes + 2,
                                             self.n_classes))
        self.A = sharedX(A_value, name='A')
        self.use_momentum = use_momentum
        self.lr = lr
        self.lr_lin_decay = lr_lin_decay
        self.lr_monitor_decay = lr_monitor_decay
        self.lr_scale = lr_scale
        self.valid_stop = valid_stop
        self.reg_factors = reg_factors
        self.close_cache = {}
        self.dropout_params = dropout_params
        self.dropout = dropout or self.dropout_params is not None
        self.hdims = hdims
        self.monitor_train = monitor_train
        self.num = num
        self.plot_monitor = plot_monitor
        if embedding_init is not None:
            self.set_embedding_weights(embedding_init)

    def _create_tagger(self):
        self.tagger = WordTaggerNetwork(self.vocab_size, self.window_size,
                                        self.total_feats, self.feat_num,
                                        self.hdims, self.edim, self.fedim,
                                        self.n_classes)

    def _create_data_specs(self, dataset):
        self.input_space = CompositeSpace([
            dataset.data_specs[0].components[i]
            for i in xrange(len(dataset.data_specs[0].components) - 1)
        ])
        self.output_space = dataset.data_specs[0].components[-1]

        self.input_source = dataset.data_specs[1][:-1]
        self.target_source = dataset.data_specs[1][-1]

    def __getstate__(self):
        d = {}
        d['vocab_size'] = self.vocab_size
        d['window_size'] = self.window_size
        d['feat_num'] = self.feat_num
        d['total_feats'] = self.total_feats
        d['n_classes'] = self.n_classes
        d['input_space'] = self.input_space
        d['output_space'] = self.output_space
        d['input_source'] = self.input_source
        d['target_source'] = self.target_source
        d['A'] = self.A
        d['tagger'] = self.tagger
        d['w2i'] = self.w2i
        d['t2i'] = self.t2i
        d['featurizer'] = self.featurizer
        d['max_epochs'] = self.max_epochs
        d['use_momentum'] = self.use_momentum
        d['lr'] = self.lr
        d['lr_lin_decay'] = self.lr_lin_decay
        d['lr_monitor_decay'] = self.lr_monitor_decay
        d['lr_scale'] = self.lr_scale
        d['valid_stop'] = self.valid_stop
        d['reg_factors'] = self.reg_factors
        d['dropout'] = self.dropout
        d['dropout_params'] = self.dropout_params
        d['monitor_train'] = self.monitor_train
        d['num'] = self.num
        d['plot_monitor'] = self.plot_monitor
        return d

    def fprop(self, data):
        tagger_out = self.tagger.fprop(data)
        probs = T.concatenate([self.A, tagger_out])
        return probs

    def dropout_fprop(self,
                      data,
                      default_input_include_prob=0.5,
                      input_include_probs=None,
                      default_input_scale=2.0,
                      input_scales=None,
                      per_example=True):
        if input_scales is None:
            input_scales = {'input': 1.0}
        if input_include_probs is None:
            input_include_probs = {'input': 1.0}
        if self.dropout_params is not None:
            if len(self.dropout_params) == len(self.tagger.layers) - 1:
                input_include_probs['tagger_out'] = self.dropout_params[-1]
                input_scales['tagger_out'] = 1.0 / self.dropout_params[-1]
                for i, p in enumerate(self.dropout_params[:-1]):
                    input_include_probs['h{0}'.format(i)] = p
                    input_scales['h{0}'.format(i)] = 1.0 / p
        tagger_out = self.tagger.dropout_fprop(data,
                                               default_input_include_prob,
                                               input_include_probs,
                                               default_input_scale,
                                               input_scales, per_example)
        probs = T.concatenate([self.A, tagger_out])
        return probs

    @functools.wraps(Model.get_lr_scalers)
    def get_lr_scalers(self):
        if not self.lr_scale:
            return {}
        d = self.tagger.get_lr_scalers()
        d[self.A] = 1. / self.n_classes
        return d

    @functools.wraps(Model.get_params)
    def get_params(self):
        return self.tagger.get_params() + [self.A]

    def create_adjustors(self):
        initial_momentum = .5
        final_momentum = .99
        start = 1
        saturate = self.max_epochs
        self.momentum_adjustor = learning_rule.MomentumAdjustor(
            final_momentum, start, saturate)
        self.momentum_rule = learning_rule.Momentum(initial_momentum,
                                                    nesterov_momentum=True)

        if self.lr_monitor_decay:
            self.learning_rate_adjustor = MonitorBasedLRAdjuster(
                high_trigger=1.,
                shrink_amt=0.9,
                low_trigger=.95,
                grow_amt=1.1,
                channel_name='train_objective')
        elif self.lr_lin_decay:
            self.learning_rate_adjustor = LinearDecayOverEpoch(
                start, saturate, self.lr_lin_decay)

    def compute_used_inputs(self):
        seen = {'words': set(), 'feats': set()}
        for sen_w in self.dataset['train'].X1:
            seen['words'] |= reduce(lambda x, y: set(x) | set(y), sen_w, set())
        for sen_f in self.dataset['train'].X2:
            seen['feats'] |= reduce(lambda x, y: set(x) | set(y), sen_f, set())
        words = set(xrange(len(self.w2i)))
        feats = set(xrange(self.total_feats))
        self.notseen = {
            'words': numpy.array(sorted(words - seen['words'])),
            'feats': numpy.array(sorted(feats - seen['feats']))
        }

    def set_dataset(self, data):
        self._create_data_specs(data['train'])
        self.dataset = data
        self.compute_used_inputs()
        self.tagger.notseen = self.notseen

    def create_algorithm(self, data, save_best_path=None):
        self.set_dataset(data)
        self.create_adjustors()
        term = EpochCounter(max_epochs=self.max_epochs)
        if self.valid_stop:
            cost_crit = MonitorBased(channel_name='valid_objective',
                                     prop_decrease=.0,
                                     N=3)
            term = And(criteria=[cost_crit, term])

        #(layers, A_weight_decay)
        coeffs = None
        if self.reg_factors:
            rf = self.reg_factors
            lhdims = len(self.tagger.hdims)
            l_inputlayer = len(self.tagger.layers[0].layers)
            coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf)
        cost = SeqTaggerCost(coeffs, self.dropout)
        self.cost = cost

        self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective',
                                         save_path=save_best_path)
        mon_dataset = dict(self.dataset)
        if not self.monitor_train:
            del mon_dataset['train']

        _learning_rule = (self.momentum_rule if self.use_momentum else None)
        self.algorithm = SGD(
            batch_size=1,
            learning_rate=self.lr,
            termination_criterion=term,
            monitoring_dataset=mon_dataset,
            cost=cost,
            learning_rule=_learning_rule,
        )

        self.algorithm.setup(self, self.dataset['train'])
        if self.plot_monitor:
            cn = ["valid_objective", "test_objective"]
            if self.monitor_train:
                cn.append("train_objective")
            plots = Plots(channel_names=cn, save_path=self.plot_monitor)
            self.pm = PlotManager([plots], freq=1)
            self.pm.setup(self, None, self.algorithm)

    def train(self):
        while True:
            if not self.algorithm.continue_learning(self):
                break
            self.algorithm.train(dataset=self.dataset['train'])
            self.monitor.report_epoch()
            self.monitor()
            self.mbsb.on_monitor(self, self.dataset['valid'], self.algorithm)
            if self.use_momentum:
                self.momentum_adjustor.on_monitor(self, self.dataset['valid'],
                                                  self.algorithm)
            if hasattr(self, 'learning_rate_adjustor'):
                self.learning_rate_adjustor.on_monitor(self,
                                                       self.dataset['valid'],
                                                       self.algorithm)
            if hasattr(self, 'pm'):
                self.pm.on_monitor(self, self.dataset['valid'], self.algorithm)

    def prepare_tagging(self):
        X = self.get_input_space().make_theano_batch(batch_size=1)
        Y = self.fprop(X)
        self.f = theano.function([X[0], X[1]], Y)
        self.start = self.A.get_value()[0]
        self.end = self.A.get_value()[1]
        self.A_value = self.A.get_value()[2:]

    def process_input(self, words, feats):
        return self.f(words, feats)

    def tag_sen(self, words, feats, debug=False, return_probs=False):
        if not hasattr(self, 'f'):
            self.prepare_tagging()
        y = self.process_input(words, feats)
        tagger_out = y[2 + self.n_classes:]
        res = viterbi(self.start, self.A_value, self.end, tagger_out,
                      self.n_classes, return_probs)
        if return_probs:
            return res / res.sum(axis=1)[:, numpy.newaxis]
            #return res.reshape((1, len(res)))

        if debug:
            return numpy.array([[e] for e in res[1]]), tagger_out
        return numpy.array([[e] for e in res[1]])

    def get_score(self, dataset, mode='pwp'):
        self.prepare_tagging()
        tagged = (self.tag_sen(w, f) for w, f in izip(dataset.X1, dataset.X2))
        gold = dataset.y
        good, bad = 0., 0.
        if mode == 'pwp':
            for t, g in izip(tagged, gold):
                g = g.argmax(axis=1)
                t = t.flatten()
                good += sum(t == g)
                bad += sum(t != g)
            return [good / (good + bad)]
        elif mode == 'f1':
            i2t = [t for t, i in sorted(self.t2i.items(), key=lambda x: x[1])]
            f1c = FScCounter(i2t, binary_input=False)
            gold = map(lambda x: x.argmax(axis=1), gold)
            tagged = map(lambda x: x.flatten(), tagged)
            return f1c.count_score(gold, tagged)

    def set_embedding_weights(self, embedding_init):
        # load embedding with gensim
        from gensim.models import Word2Vec
        try:
            m = Word2Vec.load_word2vec_format(embedding_init, binary=False)
            edim = m.layer1_size
        except UnicodeDecodeError:
            try:
                m = Word2Vec.load_word2vec_format(embedding_init, binary=True)
                edim = m.layer1_size
            except UnicodeDecodeError:
                # not in word2vec format
                m = Word2Vec.load(embedding_init)
                edim = m.layer1_size
        except ValueError:
            # glove model
            m = {}
            if embedding_init.endswith('gz'):
                fp = gzip.open(embedding_init)
            else:
                fp = open(embedding_init)
            for l in fp:
                le = l.split()
                m[le[0].decode('utf-8')] = numpy.array(
                    [float(e) for e in le[1:]], dtype=theano.config.floatX)
                edim = len(le) - 1

        if edim != self.edim:
            raise Exception("Embedding dim and edim doesn't match")
        m_lower = {}
        vocab = (m.vocab if hasattr(m, 'vocab') else m)
        for k in vocab:
            if k in ['UNKNOWN', 'PADDING']:
                continue
            if self.num:
                m_lower[replace_numerals(k.lower())] = m[k]
            else:
                m_lower[k.lower()] = m[k]
        # transform weight matrix with using self.w2i
        params = numpy.zeros(
            self.tagger.layers[0].layers[0].get_param_vector().shape,
            dtype=theano.config.floatX)
        e = self.edim
        for w in self.w2i:
            if w in m_lower:
                v = m_lower[w]
                i = self.w2i[w]
                params[i * e:(i + 1) * e] = v
        if 'UNKNOWN' in vocab:
            params[-1 * e:] = vocab['UNKNOWN']
        if 'PADDING' in vocab:
            params[-2 * e:-1 * e] = vocab['PADDING']
        self.tagger.layers[0].layers[0].set_param_vector(params)
Exemplo n.º 15
0
def main(job_id, params, cache):
    # Fix sub directory problems
    sys.path.append(os.path.dirname(os.getcwd()))
    os.chdir(os.path.dirname(os.path.realpath(__file__)))

    # Add parameters that are not currently being tuned but could potentially be tuned.
    params.update(additional_args)

    fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0])
    if 'cached_trainer' + str(fixed_params) not in cache:
        train_params = {
            'train_stop': 20000,
            'valid_stop': 24000,
            'test_stop': 4000,
            'batch_size': 100,
            'max_epochs': 1,
            'max_batches': 10,
            'sgd_seed': sgd_seed_str,
            'mlp_seed': mlp_seed_str,
            'save_file': 'result',

            'kernel_size_h2': int(params['kernel_size_h2'][0]),
            'output_channels_h2': 1 * k,
            'irange_h2': math.pow(10, params['l_ir_h2'][0]),
            'max_kernel_norm_h2': params['max_norm_h2'][0],

            'kernel_size_h3': int(params['kernel_size_h3'][0]),
            'output_channels_h3': int(1.7 * k),
            'irange_h3': math.pow(10, params['l_ir_h3'][0]),
            'max_kernel_norm_h3': params['max_norm_h3'][0],

            'kernel_size_h4': int(params['kernel_size_h4'][0]),
            'output_channels_h4': int(2.5 * k),
            'irange_h4': math.pow(10, params['l_ir_h4'][0]),
            'max_kernel_norm_h4': params['max_norm_h4'][0],

            'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]),
            'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]),
            'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]),
            'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]),
            'max_col_norm_y': params['max_norm_y'][0],
            'irange_y': math.pow(10, params['l_ir_y'][0]),
            'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]),
            'init_momentum': params['init_momentum'][0],
            'rectifier_left_slope': 0.2
        }

        with open('conv_fooddata_spearmint.yaml', 'r') as f:
            trainer = f.read()

        yaml_string = trainer % train_params
        train_obj = yaml_parse.load(yaml_string)

        if 'converge' in params:
            del train_obj.algorithm.termination_criterion._criteria[:]
            train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', 'best_model.pkl'))

        train_obj.setup()
        train_obj.model.monitor.on_channel_conflict = 'ignore'
        cache['cached_trainer' + str(fixed_params)] = train_obj

    else:
        train_obj = cache['cached_trainer' + str(fixed_params)]
        train_obj.model.monitor.set_state([0, 0, 0])
        train_obj.model.training_succeeded = False
        # train_obj.algorithm.update_callbacks[0].reinit_from_monitor()

        model = train_obj.model
        model_params = dict([(param.name, param) for param in model.get_params()])

        rng = model.rng

        update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng)
        update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng)
        update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng)
        update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng)

        train_obj.algorithm.learning_rate.set_value(
                math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32)))
        train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32))
        pass

    if 'converge' not in params:
        train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model)
    train_obj.main_loop(do_setup=False)
    original_misclass = read_channel(train_obj.model, misclass_channel)
    return float(original_misclass) * 50
Exemplo n.º 16
0
 def on_monitor(self, model, dataset, algorithm):
     if self.epoch > self.k:
         return MonitorBasedSaveBest.on_monitor(self, model, dataset, algorithm)
     self.epoch += 1
Exemplo n.º 17
0
 def __init__(self, k, channel_name, save_path):
     MonitorBasedSaveBest.__init__(self, channel_name, save_path)
     self.k = k
     self.epoch = 0
Exemplo n.º 18
0
class SequenceTaggerNetwork(Model):
    def __init__(self, dataset, w2i, t2i, featurizer,
                 edim=None, hdims=None, fedim=None,
                 max_epochs=100, use_momentum=False, lr=.01, lr_lin_decay=None,
                 lr_scale=False, lr_monitor_decay=False,
                 valid_stop=False, reg_factors=None, dropout=False,
                 dropout_params=None, embedding_init=None,
                 embedded_model=None, monitor_train=True, plot_monitor=None,
                 num=False):
        super(SequenceTaggerNetwork, self).__init__()
        self.vocab_size = dataset.vocab_size
        self.window_size = dataset.window_size
        self.total_feats = dataset.total_feats
        self.feat_num = dataset.feat_num
        self.n_classes = dataset.n_classes
        self.max_epochs = max_epochs
        if edim is None:
            edim = 50
        if hdims is None:
            hdims = [100]
        if fedim is None:
            fedim = 5
        self.edim = edim
        self.fedim = fedim
        self.hdims = hdims

        self.w2i = w2i
        self.t2i = t2i
        self.featurizer = featurizer

        self._create_tagger()

        A_value = numpy.random.uniform(low=-.1, high=.1,
                                       size=(self.n_classes + 2,
                                             self.n_classes))
        self.A = sharedX(A_value, name='A')
        self.use_momentum = use_momentum
        self.lr = lr
        self.lr_lin_decay = lr_lin_decay
        self.lr_monitor_decay = lr_monitor_decay
        self.lr_scale = lr_scale
        self.valid_stop = valid_stop
        self.reg_factors = reg_factors
        self.close_cache = {}
        self.dropout_params = dropout_params
        self.dropout = dropout or self.dropout_params is not None
        self.hdims = hdims
        self.monitor_train = monitor_train
        self.num = num
        self.plot_monitor = plot_monitor
        if embedding_init is not None:
            self.set_embedding_weights(embedding_init)

    def _create_tagger(self):
        self.tagger = WordTaggerNetwork(
            self.vocab_size, self.window_size, self.total_feats,
            self.feat_num, self.hdims, self.edim, self.fedim, self.n_classes)

    def _create_data_specs(self, dataset):
        self.input_space = CompositeSpace([
            dataset.data_specs[0].components[i]
            for i in xrange(len(dataset.data_specs[0].components) - 1)])
        self.output_space = dataset.data_specs[0].components[-1]

        self.input_source = dataset.data_specs[1][:-1]
        self.target_source = dataset.data_specs[1][-1]

    def __getstate__(self):
        d = {}
        d['vocab_size'] = self.vocab_size
        d['window_size'] = self.window_size
        d['feat_num'] = self.feat_num
        d['total_feats'] = self.total_feats
        d['n_classes'] = self.n_classes
        d['input_space'] = self.input_space
        d['output_space'] = self.output_space
        d['input_source'] = self.input_source
        d['target_source'] = self.target_source
        d['A'] = self.A
        d['tagger'] = self.tagger
        d['w2i'] = self.w2i
        d['t2i'] = self.t2i
        d['featurizer'] = self.featurizer
        d['max_epochs'] = self.max_epochs
        d['use_momentum'] = self.use_momentum
        d['lr'] = self.lr
        d['lr_lin_decay'] = self.lr_lin_decay
        d['lr_monitor_decay'] = self.lr_monitor_decay
        d['lr_scale'] = self.lr_scale
        d['valid_stop'] = self.valid_stop
        d['reg_factors'] = self.reg_factors
        d['dropout'] = self.dropout
        d['dropout_params'] = self.dropout_params
        d['monitor_train'] = self.monitor_train
        d['num'] = self.num
        d['plot_monitor'] = self.plot_monitor
        return d

    def fprop(self, data):
        tagger_out = self.tagger.fprop(data)
        probs = T.concatenate([self.A, tagger_out])
        return probs

    def dropout_fprop(self, data, default_input_include_prob=0.5,
                      input_include_probs=None, default_input_scale=2.0,
                      input_scales=None, per_example=True):
        if input_scales is None:
            input_scales = {'input': 1.0}
        if input_include_probs is None:
            input_include_probs = {'input': 1.0}
        if self.dropout_params is not None:
            if len(self.dropout_params) == len(self.tagger.layers) - 1:
                input_include_probs['tagger_out'] = self.dropout_params[-1]
                input_scales['tagger_out'] = 1.0/self.dropout_params[-1]
                for i, p in enumerate(self.dropout_params[:-1]):
                    input_include_probs['h{0}'.format(i)] = p
                    input_scales['h{0}'.format(i)] = 1.0/p
        tagger_out = self.tagger.dropout_fprop(
            data, default_input_include_prob, input_include_probs,
            default_input_scale, input_scales, per_example)
        probs = T.concatenate([self.A, tagger_out])
        return probs

    @functools.wraps(Model.get_lr_scalers)
    def get_lr_scalers(self):
        if not self.lr_scale:
            return {}
        d = self.tagger.get_lr_scalers()
        d[self.A] = 1. / self.n_classes
        return d

    @functools.wraps(Model.get_params)
    def get_params(self):
        return self.tagger.get_params() + [self.A]

    def create_adjustors(self):
        initial_momentum = .5
        final_momentum = .99
        start = 1
        saturate = self.max_epochs
        self.momentum_adjustor = learning_rule.MomentumAdjustor(
            final_momentum, start, saturate)
        self.momentum_rule = learning_rule.Momentum(initial_momentum,
                                                    nesterov_momentum=True)

        if self.lr_monitor_decay:
            self.learning_rate_adjustor = MonitorBasedLRAdjuster(
                high_trigger=1., shrink_amt=0.9,
                low_trigger=.95, grow_amt=1.1, channel_name='train_objective')
        elif self.lr_lin_decay:
            self.learning_rate_adjustor = LinearDecayOverEpoch(
                start, saturate, self.lr_lin_decay)

    def compute_used_inputs(self):
        seen = {'words': set(), 'feats': set()}
        for sen_w in self.dataset['train'].X1:
            seen['words'] |= reduce(
                lambda x, y: set(x) | set(y),
                sen_w, set())
        for sen_f in self.dataset['train'].X2:
            seen['feats'] |= reduce(
                lambda x, y: set(x) | set(y),
                sen_f, set())
        words = set(xrange(len(self.w2i)))
        feats = set(xrange(self.total_feats))
        self.notseen = {
            'words': numpy.array(sorted(words - seen['words'])),
            'feats': numpy.array(sorted(feats - seen['feats']))
        }

    def set_dataset(self, data):
        self._create_data_specs(data['train'])
        self.dataset = data
        self.compute_used_inputs()
        self.tagger.notseen = self.notseen

    def create_algorithm(self, data, save_best_path=None):
        self.set_dataset(data)
        self.create_adjustors()
        term = EpochCounter(max_epochs=self.max_epochs)
        if self.valid_stop:
            cost_crit = MonitorBased(channel_name='valid_objective',
                                     prop_decrease=.0, N=3)
            term = And(criteria=[cost_crit, term])

        #(layers, A_weight_decay)
        coeffs = None
        if self.reg_factors:
            rf = self.reg_factors
            lhdims = len(self.tagger.hdims)
            l_inputlayer = len(self.tagger.layers[0].layers)
            coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf)
        cost = SeqTaggerCost(coeffs, self.dropout)
        self.cost = cost

        self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective',
                                         save_path=save_best_path)
        mon_dataset = dict(self.dataset)
        if not self.monitor_train:
            del mon_dataset['train']

        _learning_rule = (self.momentum_rule if self.use_momentum else None)
        self.algorithm = SGD(batch_size=1, learning_rate=self.lr,
                             termination_criterion=term,
                             monitoring_dataset=mon_dataset,
                             cost=cost,
                             learning_rule=_learning_rule,
                             )

        self.algorithm.setup(self, self.dataset['train'])
        if self.plot_monitor:
            cn = ["valid_objective", "test_objective"]
            if self.monitor_train:
                cn.append("train_objective")
            plots = Plots(channel_names=cn, save_path=self.plot_monitor)
            self.pm = PlotManager([plots], freq=1)
            self.pm.setup(self, None, self.algorithm)

    def train(self):
        while True:
            if not self.algorithm.continue_learning(self):
                break
            self.algorithm.train(dataset=self.dataset['train'])
            self.monitor.report_epoch()
            self.monitor()
            self.mbsb.on_monitor(self, self.dataset['valid'], self.algorithm)
            if self.use_momentum:
                self.momentum_adjustor.on_monitor(self, self.dataset['valid'],
                                                  self.algorithm)
            if hasattr(self, 'learning_rate_adjustor'):
                self.learning_rate_adjustor.on_monitor(
                    self, self.dataset['valid'], self.algorithm)
            if hasattr(self, 'pm'):
                self.pm.on_monitor(
                    self, self.dataset['valid'], self.algorithm)

    def prepare_tagging(self):
        X = self.get_input_space().make_theano_batch(batch_size=1)
        Y = self.fprop(X)
        self.f = theano.function([X[0], X[1]], Y)
        self.start = self.A.get_value()[0]
        self.end = self.A.get_value()[1]
        self.A_value = self.A.get_value()[2:]

    def process_input(self, words, feats):
        return self.f(words, feats)

    def tag_sen(self, words, feats, debug=False, return_probs=False):
        if not hasattr(self, 'f'):
            self.prepare_tagging()
        y = self.process_input(words, feats)
        tagger_out = y[2 + self.n_classes:]
        res = viterbi(self.start, self.A_value, self.end, tagger_out,
                               self.n_classes, return_probs)
        if return_probs:
            return res / res.sum(axis=1)[:,numpy.newaxis]
            #return res.reshape((1, len(res)))
        
        if debug:
            return numpy.array([[e] for e in res[1]]), tagger_out
        return numpy.array([[e] for e in res[1]])

    def get_score(self, dataset, mode='pwp'):
        self.prepare_tagging()
        tagged = (self.tag_sen(w, f) for w, f in
                  izip(dataset.X1, dataset.X2))
        gold = dataset.y
        good, bad = 0., 0.
        if mode == 'pwp':
            for t, g in izip(tagged, gold):
                g = g.argmax(axis=1)
                t = t.flatten()
                good += sum(t == g)
                bad += sum(t != g)
            return [good / (good + bad)]
        elif mode == 'f1':
            i2t = [t for t, i in sorted(self.t2i.items(), key=lambda x: x[1])]
            f1c = FScCounter(i2t, binary_input=False)
            gold = map(lambda x:x.argmax(axis=1), gold)
            tagged = map(lambda x:x.flatten(), tagged)
            return f1c.count_score(gold, tagged)

    def set_embedding_weights(self, embedding_init):
        # load embedding with gensim
        from gensim.models import Word2Vec
        try:
            m = Word2Vec.load_word2vec_format(embedding_init, binary=False)
            edim = m.layer1_size
        except UnicodeDecodeError:
            try:
                m = Word2Vec.load_word2vec_format(embedding_init, binary=True)
                edim = m.layer1_size
            except UnicodeDecodeError:
                # not in word2vec format
                m = Word2Vec.load(embedding_init)
                edim = m.layer1_size
        except ValueError:
            # glove model
            m = {}
            if embedding_init.endswith('gz'):
                fp = gzip.open(embedding_init)
            else:
                fp = open(embedding_init)
            for l in fp:
                le = l.split()
                m[le[0].decode('utf-8')] = numpy.array(
                    [float(e) for e in le[1:]], dtype=theano.config.floatX)
                edim = len(le) - 1

        if edim != self.edim:
            raise Exception("Embedding dim and edim doesn't match")
        m_lower = {}
        vocab = (m.vocab if hasattr(m, 'vocab') else m)
        for k in vocab:
            if k in ['UNKNOWN', 'PADDING']:
                continue
            if self.num:
                m_lower[replace_numerals(k.lower())] = m[k]
            else:
                m_lower[k.lower()] = m[k]
        # transform weight matrix with using self.w2i
        params = numpy.zeros(
            self.tagger.layers[0].layers[0].get_param_vector().shape, dtype=theano.config.floatX)
        e = self.edim
        for w in self.w2i:
            if w in m_lower:
                v = m_lower[w]
                i = self.w2i[w]
                params[i*e:(i+1)*e] = v
        if 'UNKNOWN' in vocab:
            params[-1*e:] = vocab['UNKNOWN']
        if 'PADDING' in vocab:
            params[-2*e:-1*e] = vocab['PADDING']
        self.tagger.layers[0].layers[0].set_param_vector(params)
Exemplo n.º 19
0
  
model = MLP(layers = [h0, h1, y], 
            batch_size = batchSize, 
            input_space = inputSpace)
 
algorithm = SGD(learning_rate = 1e-3, 
                cost = MethodCost("cost_from_X"), 
                batch_size = batchSize, 
                monitoring_batch_size = batchSize,
                monitoring_dataset = {'train': train,
                                      'valid':valid}, 
                monitor_iteration_mode = "even_batchwise_shuffled_sequential", 
                termination_criterion = EpochCounter(max_epochs = 100), 
                learning_rule = Momentum(init_momentum = 0.0),
                train_iteration_mode = "even_batchwise_shuffled_sequential")
     
train = Train(dataset = train, 
              model = model, 
              algorithm = algorithm, 
              save_path = "2_layer_conv.pkl", 
              save_freq = 1, 
              extensions = [
                            MonitorBasedSaveBest(channel_name = "valid_y_misclass",
                                                 save_path = "2_layer_conv_best.pkl")
                            ])
     
print("Starting training session")

train.main_loop()

print("Done!")
Exemplo n.º 20
0
def main():

    #creating layers
    #2 convolutional rectified layers, border mode valid
    batch_size = params.batch_size
    lr = params.lr
    finMomentum = params.momentum
    maxout_units = params.units
    num_pcs = params.pieces
    lay1_reg = lay2_reg = maxout_reg = params.norm_reg
    #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib'
    #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib'
    save_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[
        1] + '.joblib'
    best_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[
        1] + 'best.joblib'
    numBatches = 400000 / batch_size

    from emotiw.common.datasets.faces.EmotiwKeypoints import EmotiwKeypoints
    '''
    print 'Applying preprocessing'
    ddmTrain = EmotiwKeypoints(start=0, stop =40000)
    ddmValid = EmotiwKeypoints(start=40000, stop = 44000)
    ddmTest = EmotiwKeypoints(start=44000)
    
    stndrdz = preprocessing.Standardize()
    stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train')
    stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val')
    stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test')

    GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000)
    GCN.apply(ddmTrain, can_fit =True, name = 'train')
    GCN.apply(ddmValid, can_fit =False, name = 'val')
    GCN.apply(ddmTest, can_fit = False, name = 'test')
    return
    '''

    ddmTrain = EmotiwKeypoints(hack='train', preproc='STD')
    ddmValid = EmotiwKeypoints(hack='val', preproc='STD')

    layer1 = ConvRectifiedLinear(layer_name='convRect1',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 W_lr_scale=0.1,
                                 max_kernel_norm=lay1_reg)
    layer2 = ConvRectifiedLinear(layer_name='convRect2',
                                 output_channels=128,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 W_lr_scale=0.1,
                                 max_kernel_norm=lay2_reg)

    # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name='maxout',
                    irange=.005,
                    num_units=maxout_units,
                    num_pieces=num_pcs,
                    W_lr_scale=0.1,
                    max_col_norm=maxout_reg)

    #multisoftmax
    n_groups = 196
    n_classes = 96
    irange = 0
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,
                           irange=0.05,
                           n_classes=n_classes,
                           layer_name=layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size=batch_size,
                 input_space=Conv2DSpace(shape=[96, 96], num_channels=3),
                 layers=[layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                       missing_target_value=missing_target_value)
    mlp_cost.setup_dropout(input_include_probs={'convRect1': 1.0},
                           input_scales={'convRect1': 1.})

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    monitoring_dataset = {'validation': ddmValid}

    term_crit = MonitorBased(prop_decrease=1e-7,
                             N=100,
                             channel_name='validation_objective')

    kpSGD = KeypointSGD(learning_rate=lr,
                        init_momentum=0.5,
                        monitoring_dataset=monitoring_dataset,
                        batch_size=batch_size,
                        termination_criterion=term_crit,
                        cost=mlp_cost)

    #train extension
    #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001)
    train_ext = LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01)

    #train object
    train = Train(dataset=ddmTrain,
                  save_path=save_path,
                  save_freq=10,
                  model=MLPerc,
                  algorithm=kpSGD,
                  extensions=[
                      train_ext,
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                           save_path=best_path),
                      MomentumAdjustor(start=1,
                                       saturate=25,
                                       final_momentum=finMomentum)
                  ])
    train.main_loop()
    train.save()
Exemplo n.º 21
0
train = Train(dataset,
              mlp,
              SGD(0.1,
                  batch_size=128,
                  monitoring_dataset={
                      'train': dataset,
                      'valid': dataset_valid,
                      'test': dataset_test
                  },
                  termination_criterion=EpochCounter(100),
                  train_iteration_mode='even_shuffled_sequential',
                  monitor_iteration_mode='even_sequential'),
              save_path="pkl/multicolumn.pkl",
              save_freq=5,
              extensions=[
                  MonitorBasedSaveBest(channel_name='test_y_misclass',
                                       save_path="pkl/multicolumn_best.pkl")
              ])

# # Load the saved model
# model = serial.load(saved_model_path)
#
# # Remove last layer
# del model.layers[-1]
#
# # Add new layer
# new_output_layer = <make your new layer here>
# model.add_layers([new_output_layer])

# mlp.layers.extend(pretrained_layers[start_layer:])

# , cost=Dropout(input_include_probs={'composite':1.})))
Exemplo n.º 22
0
pdb.set_trace()

algorithm = SGD(
    batch_size=batch_size,
    learning_rate=learning_rate,
    init_momentum=.5,
    monitoring_dataset={'valid': val_ds},
    cost=Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.}),
    termination_criterion=MonitorBased(channel_name="valid_y_misclass",
                                       prop_decrease=0.,
                                       N=100),
    #termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter {max_epochs: 1},
    update_callbacks=ExponentialDecay(decay_factor=1.00004, min_lr=.000001))

extensions = [
    MonitorBasedSaveBest(channel_name='valid_y_misclass',
                         save_path=save_best_path),
    MomentumAdjustor(start=1, saturate=250, final_momentum=.7)
]

model = MLP(batch_size=batch_size,
            input_space=Conv2DSpace(shape=[48, 48],
                                    num_channels=num_chan,
                                    axes=['c', 0, 1, 'b']),
            layers=[
                MaxoutConvC01B(layer_name='h0',
                               pad=0,
                               num_channels=64,
                               num_pieces=2,
                               kernel_shape=[8, 8],
                               pool_shape=[4, 4],
                               pool_stride=[2, 2],
Exemplo n.º 23
0
def test_works():
    load = True
    if load == False:
        ddmTrain = FacialKeypoint(which_set='train', start=0, stop=6000)
        ddmValid = FacialKeypoint(which_set='train', start=6000, stop=7049)
        # valid can_fit = false
        pipeline = preprocessing.Pipeline()
        stndrdz = preprocessing.Standardize()
        stndrdz.apply(ddmTrain, can_fit=True)
        #doubt, how about can_fit = False?
        stndrdz.apply(ddmValid, can_fit=False)
        GCN = preprocessing.GlobalContrastNormalization()
        GCN.apply(ddmTrain, can_fit=True)
        GCN.apply(ddmValid, can_fit=False)

        pcklFile = open('kpd.pkl', 'wb')
        obj = (ddmTrain, ddmValid)
        pickle.dump(obj, pcklFile)
        pcklFile.close()
        return
    else:
        pcklFile = open('kpd.pkl', 'rb')
        (ddmTrain, ddmValid) = pickle.load(pcklFile)
        pcklFile.close()

    #creating layers
    #2 convolutional rectified layers, border mode valid
    layer1 = ConvRectifiedLinear(layer_name='convRect1',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)
    layer2 = ConvRectifiedLinear(layer_name='convRect2',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    # Rectified linear units
    layer3 = RectifiedLinear(dim=3000, sparse_init=15, layer_name='RectLin3')

    #multisoftmax
    n_groups = 30
    n_classes = 98
    irange = 0
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,
                           irange=0.05,
                           n_classes=n_classes,
                           layer_name=layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size=8,
                 input_space=Conv2DSpace(shape=[96, 96], num_channels=1),
                 layers=[layer1, layer2, layer3, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                       missing_target_value=missing_target_value)

    #algorithm

    # learning rate, momentum, batch size, monitoring dataset, cost, termination criteria

    term_crit = MonitorBased(prop_decrease=0.00001,
                             N=30,
                             channel_name='validation_objective')
    kpSGD = KeypointSGD(learning_rate=0.001,
                        init_momentum=0.5,
                        monitoring_dataset={
                            'validation': ddmValid,
                            'training': ddmTrain
                        },
                        batch_size=8,
                        batches_per_iter=750,
                        termination_criterion=term_crit,
                        train_iteration_mode='random_uniform',
                        cost=mlp_cost)

    #train extension
    train_ext = ExponentialDecayOverEpoch(decay_factor=0.998,
                                          min_lr_scale=0.01)
    #train object
    train = Train(dataset=ddmTrain,
                  save_path='kpd_model2.pkl',
                  save_freq=1,
                  model=MLPerc,
                  algorithm=kpSGD,
                  extensions=[
                      train_ext,
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                           save_path='kpd_best.pkl'),
                      MomentumAdjustor(start=1, saturate=20, final_momentum=.9)
                  ])
    train.main_loop()
    train.save()
Exemplo n.º 24
0
def main(job_id, requested_params, cache):
    # Fix sub directory problems
    sys.path.append(os.path.dirname(os.getcwd()))
    os.chdir(os.path.dirname(os.path.realpath(__file__)))

    # Add parameters that are not currently being tuned but could potentially be tuned.
    params = additional_args
    params.update(requested_params)

    output_channels_h2 = int(1.00 * 50)
    output_channels_h3 = int(3.42 * 50)
    output_channels_h4 = int(11.67 * 50)

    dropout_h2 = float(params['dropout_h2'][0]) / 10
    dropout_h3 = float(params['dropout_h3'][0]) / 10
    dropout_h4 = float(params['dropout_h4'][0]) / 10
    dropout_y = float(params['dropout_y'][0]) / 10

    if params.get('rate', None) is not None:
        params['log_init_learning_rate'][0] += numpy.array([params['rate']])

    fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0], params['dropout_h2'][0],
                    params['dropout_h3'][0], params['dropout_h4'][0], params['dropout_y'][0])

    if 'cached_trainer' + str(fixed_params) not in cache:
        train_params = {
            'train_start': params['start'],
            'train_stop': params['stop'],
            'valid_start': 20000,
            'valid_stop': 24000,
            'test_stop': 4000,
            'batch_size': 100,
            'max_epochs': params.get('epochs', 1),
            'max_batches': 50,
            'sgd_seed': sgd_seed_str,
            'mlp_seed': mlp_seed_str,

            'kernel_size_h2': int(params['kernel_size_h2'][0]),
            'output_channels_h2': output_channels_h2,
            'irange_h2': math.pow(10, params['l_ir_h2'][0]),
            'max_kernel_norm_h2': params['max_norm_h2'][0],
            'dropout_h2': dropout_h2,
            'dscale_h2': params['dfac_h2'][0] * 1.0 / dropout_h2,
            'w_lr_sc_h2': math.pow(dropout_h2, 2),
            'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]),

            'kernel_size_h3': int(params['kernel_size_h3'][0]),
            'output_channels_h3': output_channels_h3,
            'irange_h3': math.pow(10, params['l_ir_h3'][0]),
            'max_kernel_norm_h3': params['max_norm_h3'][0],
            'dropout_h3': dropout_h3,
            'dscale_h3': params['dfac_h3'][0] * 1.0 / dropout_h3,
            'w_lr_sc_h3': math.pow(dropout_h3, 2),
            'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]),

            'kernel_size_h4': int(params['kernel_size_h4'][0]),
            'output_channels_h4': output_channels_h4,
            'irange_h4': math.pow(10, params['l_ir_h4'][0]),
            'max_kernel_norm_h4': params['max_norm_h4'][0],
            'dropout_h4': dropout_h4,
            'dscale_h4': params['dfac_h4'][0] * 1.0 / dropout_h4,
            'w_lr_sc_h4': math.pow(dropout_h4, 2),
            'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]),

            'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]),
            'max_col_norm_y': params['max_norm_y'][0],
            'irange_y': math.pow(10, params['l_ir_y'][0]),
            'dropout_y': dropout_y,
            'dscale_y': 1.0 / dropout_y,
            'w_lr_sc_y': math.pow(dropout_y, 2),
            'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]),
            'init_momentum': params['init_momentum'][0],
            'rectifier_left_slope': 0.2
        }

        with open('conv_fooddata_spearmint.yaml', 'r') as f:
            trainer = f.read()

        yaml_string = trainer % train_params
        train_obj = yaml_parse.load(yaml_string)

        if 'converge' in params:
            train_obj.algorithm.termination_criterion._criteria[0]._max_epochs = params.get('epochs', 100)
            train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', params['save']))

        train_obj.setup()
        train_obj.model.monitor.on_channel_conflict = 'ignore'
        # cache['cached_trainer' + str(fixed_params)] = train_obj

    else:
        train_obj = cache['cached_trainer' + str(fixed_params)]
        train_obj.model.monitor.set_state([0, 0, 0])
        train_obj.model.training_succeeded = False
        # train_obj.algorithm.update_callbacks[0].reinit_from_monitor()

        model = train_obj.model
        model_params = dict([(param.name, param) for param in model.get_params()])

        rng = model.rng

        update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng)
        update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng)
        update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng)
        update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng)

        train_obj.algorithm.learning_rate.set_value(
                math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32)))
        train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32))
        pass

    pretrained_model_path = params.get('model', None)
    if pretrained_model_path is not None:
        print 'loading pre trained model'
        pretrained_model = serial.load(pretrained_model_path)
        print 'loading done'
        train_obj.model.set_param_values(pretrained_model.get_param_values())

    if 'converge' not in params:
        train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model)
    train_obj.main_loop(do_setup=False)
    if params.get('savelast', False):
        serial.save(params['save'] + 'f', train_obj.model, on_overwrite='backup')
    original_misclass = read_channel(train_obj.model, misclass_channel)
    return float(original_misclass) * 50