Пример #1
0
 def get_layer_maxout(self, layer_id, layer_name):
     row = self.db.executeSQL(
         """
     SELECT   num_units,num_pieces,pool_stride,randomize_pools,irange,
              sparse_init,sparse_stdev,include_prob,init_bias,W_lr_scale,
              b_lr_scale,max_col_norm,max_row_norm
     FROM hps3.layer_maxout
     WHERE layer_id = %s
     """, (layer_id, ), self.db.FETCH_ONE)
     if not row or row is None:
         raise HPSData("No maxout layer for layer_id=" + str(layer_id))
     (num_units,num_pieces,pool_stride,randomize_pools,irange,
          sparse_init,sparse_stdev,include_prob,init_bias,W_lr_scale,
          b_lr_scale,max_col_norm, max_row_norm) \
              = row
     return Maxout(num_units=num_units,
                   num_pieces=num_pieces,
                   pool_stride=pool_stride,
                   layer_name=layer_name,
                   randomize_pools=randomize_pools,
                   irange=irange,
                   sparse_init=sparse_init,
                   sparse_stdev=sparse_stdev,
                   include_prob=include_prob,
                   init_bias=init_bias,
                   W_lr_scale=W_lr_scale,
                   b_lr_scale=b_lr_scale,
                   max_col_norm=max_col_norm,
                   max_row_norm=max_row_norm)
Пример #2
0
def test_convnet():
    layers = []
    dataset = get_dataset()
    input_space = Conv2DSpace(shape=[256, 256], num_channels=1)

    conv_layer = ConvRectifiedLinear(output_channels=12,
                                     irange=.005,
                                     layer_name="h0",
                                     kernel_shape=[88, 88],
                                     kernel_stride=[8, 8],
                                     pool_shape=[1, 1],
                                     pool_stride=[1, 1],
                                     max_kernel_norm=1.932)

    layers.append(conv_layer)

    maxout_layer = Maxout(layer_name="h1",
                          irange=.005,
                          num_units=600,
                          num_pieces=4,
                          max_col_norm=1.932)

    layers.append(maxout_layer)
    sigmoid_layer = Sigmoid(layer_name="y",
                            dim=484,
                            monitor_style="detection",
                            irange=.005)

    layers.append(sigmoid_layer)
    model = MLP(batch_size=100, layers=layers, input_space=input_space)

    trainer = get_layer_trainer_sgd(model, dataset)
    trainer.main_loop()
Пример #3
0
def test_min_zero():
    """
    This test guards against a bug where the size of the zero buffer used with
    the min_zero flag was specified to have the wrong size. The bug only
    manifested when compiled with optimizations off, because the optimizations
    discard information about the size of the zero buffer.
    """
    mlp = MLP(input_space=VectorSpace(1),
            layers= [Maxout(layer_name="test_layer", num_units=1,
                num_pieces = 2,
            irange=.05, min_zero=True)])
    X = T.matrix()
    output = mlp.fprop(X)
    # Compile in debug mode so we don't optimize out the size of the buffer
    # of zeros
    f = function([X], output, mode="DEBUG_MODE")
    f(np.zeros((1, 1)).astype(X.dtype))
Пример #4
0
def generateNonConvRegressor(teacher_hintlayer, student_output_space):
    dim = teacher_hintlayer.output_space.get_total_dimension()
    layer_name = 'hint_regressor'

    irng = 0.05
    mcn = 0.9

    if isinstance(teacher_hintlayer, MaxoutConvC01B):
        hint_reg_layer = Maxout(layer_name,
                                dim,
                                teacher_hintlayer.num_pieces,
                                irange=irng,
                                max_col_norm=mcn)
    elif isinstance(teacher_hintlayer, ConvRectifiedLinear):
        hint_reg_layer = RectifiedLinear(dim=dim,
                                         layer_name=layer_name,
                                         irange=irng,
                                         max_col_norm=mcn)
    elif isinstance(teacher_hintlayer, ConvElemwise) or isinstance(
            teacher_hintlayer, ConvElemwisePL2):
        if isinstance(teacher_hintlayer.nonlinearity,
                      RectifierConvNonlinearity):
            hint_reg_layer = RectifiedLinear(dim=dim,
                                             layer_name=layer_name,
                                             irange=irng,
                                             max_col_norm=mcn)
        elif isinstance(teacher_hintlayer.nonlinearity,
                        SigmoidConvNonlinearity):
            hint_reg_layer = Sigmoid(dim=dim,
                                     layer_name=layer_name,
                                     irange=irng,
                                     max_col_norm=mcn)
        elif isinstance(teacher_hintlayer.nonlinearity, TanhConvNonlinearity):
            hint_reg_layer = Tanh(dim=dim,
                                  layer_name=layer_name,
                                  irange=irng,
                                  max_col_norm=mcn)
        else:
            raise AssertionError("Unknown layer type")
    else:
        raise AssertionError("Unknown fully-connected layer type")

    return hint_reg_layer
def get_convnet(img_shape=[256, 256],
                output_channels=16,
                kernel_shape=[88, 88],
                kernel_stride=[8, 8]):
    layers = []
    dataset = get_dataset()
    input_space = Conv2DSpace(shape=img_shape, num_channels=1)

    conv_layer = ConvRectifiedLinear(output_channels=output_channels,
                                     irange=.005,
                                     layer_name="h0",
                                     kernel_shape=kernel_shape,
                                     kernel_stride=kernel_stride,
                                     pool_shape=[1, 1],
                                     pool_stride=[1, 1],
                                     max_kernel_norm=1.932)

    layers.append(conv_layer)

    maxout_layer = Maxout(layer_name="h1",
                          irange=.005,
                          num_units=600,
                          num_pieces=4,
                          max_col_norm=1.932)

    layers.append(maxout_layer)
    conv_out_dim = ((img_shape[0] - kernel_shape[0]) / kernel_stride[0] + 1)**2
    sigmoid_layer = Sigmoid(layer_name="y",
                            dim=conv_out_dim,
                            monitor_style="detection",
                            irange=.005)

    layers.append(sigmoid_layer)

    model = MLP(batch_size=100, layers=layers, input_space=input_space)
    return model
def main():

    #creating layers
        #2 convolutional rectified layers, border mode valid
    batch_size = 48
    lr = 1.0 #0.1/4
    finMomentum = 0.9
    maxout_units = 2000
    num_pcs = 4
    lay1_reg = lay2_reg = maxout_reg = None
    #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib'
    #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib'
    #save_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'.joblib'
    #best_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'best.joblib'
    save_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb.joblib'
    best_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb_best.joblib'

    #numBatches = 400000/batch_size

    '''
    print 'Applying preprocessing'
    ddmTrain = EmotiwKeypoints(start=0, stop =40000)
    ddmValid = EmotiwKeypoints(start=40000, stop = 44000)
    ddmTest = EmotiwKeypoints(start=44000)

    stndrdz = preprocessing.Standardize()
    stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train')
    stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val')
    stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test')

    GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000)
    GCN.apply(ddmTrain, can_fit =True, name = 'train')
    GCN.apply(ddmValid, can_fit =False, name = 'val')
    GCN.apply(ddmTest, can_fit = False, name = 'test')
    return
    '''

    ddmTrain = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='train')
    ddmValid = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='valid')
    #ddmSmallTrain = ComboDatasetPyTable('/Tmp/zumerjer/all_', which_set='small_train')

    layer1 = ConvRectifiedLinear(layer_name = 'convRect1',
                     output_channels = 64,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [4, 4],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay1_reg)
    layer2 = ConvRectifiedLinear(layer_name = 'convRect2',
                     output_channels = 128,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [3, 3],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay2_reg)

        # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name= 'maxout',
                    irange= .005,
                    num_units= maxout_units,
                    num_pieces= num_pcs,
                    W_lr_scale = 0.1,
                    max_col_norm= maxout_reg)

    #multisoftmax
    n_groups = 196
    n_classes = 96
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size = batch_size,
                 input_space = Conv2DSpace(shape = [96, 96],
                 num_channels = 3, axes=('b', 0, 1, 'c')),
                 layers = [ layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                            missing_target_value=missing_target_value )
    mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : 1.0 }, input_scales= { 'convRect1': 1. })

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    monitoring_dataset = {'validation':ddmValid}#, 'mini-train':ddmSmallTrain}

    term_crit  = MonitorBased(prop_decrease = 1e-7, N = 100, channel_name = 'validation_objective')

    kp_ada = KeypointADADELTA(decay_factor = 0.95, 
            #init_momentum = 0.5, 
                        monitoring_dataset = monitoring_dataset, batch_size = batch_size,
                        termination_criterion = term_crit,
                        cost = mlp_cost)

    #train extension
    #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001)
    #train_ext = LinearDecayOverEpoch(start= 1,saturate= 250,decay_factor= .01)
    #train_ext = ADADELTA(0.95)

    #train object
    train = Train(dataset = ddmTrain,
                  save_path= save_path,
                  save_freq=10,
                  model = MLPerc,
                  algorithm= kp_ada,
                  extensions = [#train_ext, 
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                                     save_path= best_path)#,

#                                MomentumAdjustor(start = 1,#
 #                                                saturate = 25,
  #                                               final_momentum = finMomentum)
  ] )
    train.main_loop()
    train.save()
Пример #7
0
def main():

    #creating layers
    #2 convolutional rectified layers, border mode valid
    batch_size = params.batch_size
    lr = params.lr
    finMomentum = params.momentum
    maxout_units = params.units
    num_pcs = params.pieces
    lay1_reg = lay2_reg = maxout_reg = params.norm_reg
    #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib'
    #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib'
    save_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[
        1] + '.joblib'
    best_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[
        1] + 'best.joblib'
    numBatches = 400000 / batch_size

    from emotiw.common.datasets.faces.EmotiwKeypoints import EmotiwKeypoints
    '''
    print 'Applying preprocessing'
    ddmTrain = EmotiwKeypoints(start=0, stop =40000)
    ddmValid = EmotiwKeypoints(start=40000, stop = 44000)
    ddmTest = EmotiwKeypoints(start=44000)
    
    stndrdz = preprocessing.Standardize()
    stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train')
    stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val')
    stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test')

    GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000)
    GCN.apply(ddmTrain, can_fit =True, name = 'train')
    GCN.apply(ddmValid, can_fit =False, name = 'val')
    GCN.apply(ddmTest, can_fit = False, name = 'test')
    return
    '''

    ddmTrain = EmotiwKeypoints(hack='train', preproc='STD')
    ddmValid = EmotiwKeypoints(hack='val', preproc='STD')

    layer1 = ConvRectifiedLinear(layer_name='convRect1',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 W_lr_scale=0.1,
                                 max_kernel_norm=lay1_reg)
    layer2 = ConvRectifiedLinear(layer_name='convRect2',
                                 output_channels=128,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 W_lr_scale=0.1,
                                 max_kernel_norm=lay2_reg)

    # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name='maxout',
                    irange=.005,
                    num_units=maxout_units,
                    num_pieces=num_pcs,
                    W_lr_scale=0.1,
                    max_col_norm=maxout_reg)

    #multisoftmax
    n_groups = 196
    n_classes = 96
    irange = 0
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,
                           irange=0.05,
                           n_classes=n_classes,
                           layer_name=layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size=batch_size,
                 input_space=Conv2DSpace(shape=[96, 96], num_channels=3),
                 layers=[layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                       missing_target_value=missing_target_value)
    mlp_cost.setup_dropout(input_include_probs={'convRect1': 1.0},
                           input_scales={'convRect1': 1.})

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    monitoring_dataset = {'validation': ddmValid}

    term_crit = MonitorBased(prop_decrease=1e-7,
                             N=100,
                             channel_name='validation_objective')

    kpSGD = KeypointSGD(learning_rate=lr,
                        init_momentum=0.5,
                        monitoring_dataset=monitoring_dataset,
                        batch_size=batch_size,
                        termination_criterion=term_crit,
                        cost=mlp_cost)

    #train extension
    #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001)
    train_ext = LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01)

    #train object
    train = Train(dataset=ddmTrain,
                  save_path=save_path,
                  save_freq=10,
                  model=MLPerc,
                  algorithm=kpSGD,
                  extensions=[
                      train_ext,
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                           save_path=best_path),
                      MomentumAdjustor(start=1,
                                       saturate=25,
                                       final_momentum=finMomentum)
                  ])
    train.main_loop()
    train.save()
Пример #8
0
                kernel_shape=[3,3], pool_shape=[2,2], pool_stride=[2,2],
                max_kernel_norm= 1.9365, irange=.025)
l5 = MaxoutConvC01B(layer_name='l5',
                tied_b=1,
                num_channels=256, num_pieces=2, pad=2,
                kernel_shape=[3,3], pool_shape=[2,2], pool_stride=[2,2],
                max_kernel_norm= 1.9365, irange=.025)
                
l6 = MaxoutConvC01B(layer_name='l6',
                tied_b=1,
                num_channels=256, num_pieces=2, pad=2,
                kernel_shape=[3,3], pool_shape=[2,2], pool_stride=[2,2],
                max_kernel_norm= 1.9365, irange=.025)

#dense layers                                       
l7 = Maxout(layer_name='l7', num_units=1024, num_pieces=2, irange=.025)
l8 = Maxout(layer_name='l8', num_units=2048, num_pieces=2, irange=.025)
output_layer = mlp.Softmax(layer_name='y', n_classes=121, irange=.01)

layers = [l1,l2,l3,l4,l5, l6,l7, l8, output_layer]

images = []
y = []
file_names = []
dimensions = []
    
train_labels = [x for x in os.listdir("train") if os.path.isdir("{0}{1}{2}".format("train", os.sep, x))]
train_directories = ["{0}{1}{2}".format("train", os.sep, x) for x in train_labels]
train_labels, train_directories = zip(*sorted(zip(train_labels, train_directories), key=lambda x: x[0]))

for idx, folder in enumerate(train_directories):
Пример #9
0
def get_maxout(dim_input):
    config = {
        'batch_size':
        bsize,
        'input_space':
        Conv2DSpace(shape=dim_input[:2],
                    num_channels=dim_input[2],
                    axes=['c', 0, 1, 'b']),
        'layers': [
            MaxoutConvC01B(layer_name='h0',
                           num_channels=96,
                           num_pieces=2,
                           irange=.005,
                           tied_b=1,
                           max_kernel_norm=.9,
                           kernel_shape=[8, 8],
                           pool_shape=[4, 4],
                           pool_stride=[2, 2],
                           W_lr_scale=.05,
                           b_lr_scale=.05),
            MaxoutConvC01B(layer_name='h1',
                           num_channels=128,
                           num_pieces=2,
                           irange=.005,
                           tied_b=1,
                           max_kernel_norm=0.9,
                           kernel_shape=[7, 7],
                           pad=3,
                           pool_shape=[4, 4],
                           pool_stride=[2, 2],
                           W_lr_scale=.05,
                           b_lr_scale=.05),
            MaxoutConvC01B(layer_name='h2',
                           num_channels=160,
                           num_pieces=3,
                           irange=.005,
                           tied_b=1,
                           max_kernel_norm=0.9,
                           kernel_shape=[6, 6],
                           pad=2,
                           pool_shape=[2, 2],
                           pool_stride=[2, 2],
                           W_lr_scale=.05,
                           b_lr_scale=.05),
            MaxoutConvC01B(layer_name='h3',
                           num_channels=192,
                           num_pieces=4,
                           irange=.005,
                           tied_b=1,
                           max_kernel_norm=0.9,
                           kernel_shape=[5, 5],
                           pad=1,
                           pool_shape=[2, 2],
                           pool_stride=[2, 2],
                           W_lr_scale=.05,
                           b_lr_scale=.05),
            Maxout(layer_name='h4',
                   irange=.005,
                   num_units=500,
                   num_pieces=5,
                   max_col_norm=1.9),
            Softmax(layer_name='y',
                    n_classes=nclass,
                    irange=.005,
                    max_col_norm=1.9)
        ]
    }
    return MLP(**config)
Пример #10
0
def test_works():
    load = True

    if load == False:
        ddmTrain = FacialKeypoint(which_set = 'train', start=0, stop =6000)
        ddmValid = FacialKeypoint(which_set = 'train', start=6000, stop = 7049)
        ddmTest = FacialKeypoint(which_set = 'test')
        # valid can_fit = false
        pipeline = preprocessing.Pipeline()
        stndrdz = preprocessing.Standardize()
        stndrdz.apply(ddmTrain, can_fit=True)
        
        #doubt, how about can_fit = False?
        stndrdz.apply(ddmValid, can_fit=False)
        stndrdz.apply(ddmTest, can_fit=False)

        GCN = preprocessing.GlobalContrastNormalization()
        GCN.apply(ddmTrain, can_fit =True)
        GCN.apply(ddmValid, can_fit =False)
        GCN.apply(ddmTest, can_fit =False)
    
        pcklFile = open('kpd.pkl', 'wb')
        obj = (ddmTrain, ddmValid, ddmTest, GCN, stndrdz)
        pickle.dump(obj, pcklFile)
        pcklFile.close()
        return
    else:
        pcklFile = open('kpd.pkl', 'rb')
        (ddmTrain, ddmValid, ddmTest, GCN, stndrdz) = pickle.load(pcklFile)
        pcklFile.close()
        batch_size = 8
        print 'going to compute test error'
        generateTest(ddmTrain, 'kpd_maxout_best.pkl', 'output_maxout2pcs.csv')
        return

    #creating layers
        #2 convolutional rectified layers, border mode valid
    layer1 = ConvRectifiedLinear(layer_name = 'convRect1',
                     output_channels = 64,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [3, 3],
                     pool_stride = [2, 2],
                     max_kernel_norm = 1.9365)
    layer2 = ConvRectifiedLinear(layer_name = 'convRect2',
                     output_channels = 64,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [3, 3],
                     pool_stride = [2, 2],
                     max_kernel_norm = 1.9365)

        # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name= 'maxout',
                    irange= .005,
                    num_units= 2000,
                    num_pieces= 2,
                    max_col_norm= 1.9)


    #multisoftmax
    n_groups = 30
    n_classes = 98 
    irange = 0
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name)
    
    #setting up MLP
    MLPerc = MLP(batch_size = batch_size,
                 input_space = Conv2DSpace(shape = [96, 96],
                 num_channels = 1),
                 layers = [ layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default', 
                            missing_target_value=missing_target_value )
    mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : .8 },
                           input_scales= { 'convRect1': 1. })

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    
    # learning rate, momentum, batch size, monitoring dataset, cost, termination criteria
#monitoring_dataset = {'validation':ddmValid, 'training': ddmTrain}
    term_crit  = MonitorBased(prop_decrease = 0.00001, N = 30, channel_name = 'validation_objective')
    kpSGD = KeypointSGD(learning_rate = 0.001, init_momentum = 0.5, 
                        monitoring_dataset = {'validation':ddmValid, 'training': ddmTrain}, batch_size = batch_size, batches_per_iter = 750,
                        termination_criterion = term_crit,
                        cost = mlp_cost)

    #train extension
    train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.01)
    #train object
    train = Train(dataset = ddmTrain,
                  save_path='kpd_model2pcs_maxout.pkl',
                  save_freq=3,
                  model = MLPerc,
                  algorithm= kpSGD,
                  extensions = [train_ext, 
                                MonitorBasedSaveBest(channel_name='validation_objective',
                                                     save_path= 'kpd_maxout2pcs_best.pkl'),

                                MomentumAdjustor(start = 1,
                                                 saturate = 20,
                                                 final_momentum = .9)] )
    train.main_loop()
    train.save()
Пример #11
0
                               max_kernel_norm=.9,
                               W_lr_scale=0.5,
                               b_lr_scale=0.5),
                MaxoutConvC01B(layer_name='h1',
                               pad=0,
                               num_channels=64,
                               num_pieces=2,
                               kernel_shape=[8, 8],
                               pool_shape=[3, 3],
                               pool_stride=[2, 2],
                               irange=.005,
                               max_kernel_norm=.9,
                               W_lr_scale=0.5,
                               b_lr_scale=0.5),
                Maxout(layer_name='h2',
                       num_units=last_ndim,
                       num_pieces=2,
                       irange=.005),
                Softmax(max_col_norm=1.9365,
                        layer_name='y',
                        n_classes=n_classes,
                        sparse_init=23)
            ])

train = Train(dataset=train_ds,
              model=model,
              algorithm=algorithm,
              extensions=extensions,
              save_path=save_path,
              save_freq=save_freq)

train.main_loop()
def get_layer_MLP():
    
    extraset = BlackBoxDataset( which_set = 'extra')
    
    processor = Standardize();
    
    processor.apply(extraset,can_fit=True)
    
    trainset = BlackBoxDataset( which_set = 'train',
                                start = 0,
                                stop = 900,
                                preprocessor = processor,
                                fit_preprocessor = True,
                                fit_test_preprocessor = True,
                                )
    
    validset = BlackBoxDataset( which_set = 'train',
                                start = 900,
                                stop = 1000 ,
                                preprocessor = processor,
                                fit_preprocessor = True,
                                fit_test_preprocessor = False,
                                )
    
    dropCfg = { 'input_include_probs': { 'h0' : .8 } ,
                'input_scales': { 'h0': 1.}
              }
    
    config = { 'learning_rate': .05,
                'init_momentum': .00,
                'cost' : Dropout(**dropCfg), 
                'monitoring_dataset':  { 'train' : trainset,
                                         'valid' : validset
                                        },
                'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=100,prop_decrease=0),
                'update_callbacks': None
              }
     
    config0 = {
                'layer_name': 'h0',
                'num_units': 1875,
                'num_pieces': 2,
                'irange': .05,
                # Rather than using weight decay, we constrain the norms of the weight vectors
                'max_col_norm': 2.
    }
    
    config1 = {
                'layer_name': 'h1',
                'num_units': 700,
                'num_pieces': 2,
                'irange': .05,
                # Rather than using weight decay, we constrain the norms of the weight vectors
                'max_col_norm': 2.
    }
    
    sftmaxCfg = {
                'layer_name': 'y',
                'init_bias_target_marginals': trainset,
                # Initialize the weights to all 0s
                'irange': .0,
                'n_classes': 9
            }
    
    l1 = Maxout(**config0)
    l2 = Maxout(**config1)
    l3 = Softmax(**sftmaxCfg)

    train_algo = SGD(**config)
    model = MLP(batch_size=75,layers=[l1,l2,l3],nvis=1875)
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None, 
            save_path = "maxout_best_model.pkl",
            save_freq = 1)