Python Trainer.InitUpdateRule примеры использования

Язык программирования: Python

Пространство имен/Пакет: Trainer

Класс/Тип: Trainer

Метод/Функция: InitUpdateRule

Примеров на hotexamples.com: 5

Python Trainer.InitUpdateRule - 5 примеров найдено. Это лучшие примеры Python кода для Trainer.Trainer.InitUpdateRule, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Trainer(30)

fit(9)

data_generator(6)

InitParams(5)

InitUpdateRule(5)

learning(4)

getSaveFilePath(4)

evaluate(4)

get_valid_accuracy(3)

error_estimate(3)

evolve(3)

flush_params_to_dict(2)

add_default_hyperparameters_if_not_overriden(2)

execute(2)

_unpack_resDict(2)

attack_other_trainer_pokemon(1)

generate(1)

generateTrainingSetFile(1)

getCheckpointPth(1)

getLabelFeatures(1)

getName(1)

addExperience(1)

getScores(1)

get_disp_images(1)

get_mean_and_standard_deviation_difference_results(1)

get_parameters(1)

get_predictions(1)

get_train_batch(1)

get_test_batch(1)

get_weights(1)

graph_loss(1)

handle_files(1)

hinge_and_mis(1)

hyp_opt(1)

infer(1)

initComm(1)

initDevPerplexities(1)

initialize(1)

Train(1)

learningHingeLoss(1)

learningRate(1)

restore(1)

gen_relative_graphs(1)

fit_final(1)

backward_discriminator(1)

close(1)

backward_generator(1)

batchSize(1)

batch_add_to_system(1)

begin_training(1)

Пример #1

Показать файл

            0
        )
    )

    # Construct the network
    net.layer_opts['filter_shape'] = (3,1,8,8)
    net.content['l1'] = ConvLayer(net, net.content['input'])

    net.layer_opts['filter_shape'] = (3,3,1,1)
    net.content['l2'] = ConvLayer(net, net.content['l1'])

    net.layer_opts['softmax_norm_dim'] = 1
    net.content['l3']  = SoftmaxLayer(net, net.content['l2'])
    net.content['cost'] = CategoricalCrossEntropy(net, net.content['l3'])

    # Print the network architecture
    net.simpleprint()

    # Initialize learning rate for each updatable layer
    net.InitLR(0.5)

    # Create params list, grad list, momentum list for the theano function to update
    trainer.InitParams(net)
    trainer.opts['validation'] = False
    trainer.opts['test_emp'] = False
    # Update rule
    train_update_rule = trainer.InitUpdateRule(net)
    net.InitTrainFunction(train_update_rule, input, expected_output, ['l3'])
    main_loop = SGDRMainLoop(net)
    main_loop.run(net, trainer)

Пример #2

Показать файл

Файл: ShowAttendTellCoco.py Проект: PhuongHoangMinh/DeepLearningMadeItEasy

def train_Attend_224():
    trained_path = '../../data/trained_model/'
    cap_data_path = "../../data/mscoco/MSCOCO_processed/MSCOCO_224_capdata_train_%d.h5"
    img_data_path = "../../data/mscoco/MSCOCO_processed/MSCOCO_224_imgdata_train_%d.h5"
    val_cap_data_path = "../../data/mscoco/MSCOCO_processed/MSCOCO_224_capdata_val_%d.h5"
    val_img_data_path = "../../data/mscoco/MSCOCO_processed/MSCOCO_224_imgdata_val_%d.h5"
    fourth_cv_mv = "../../data/mscoco/MSCOCO_processed/4thconvo_meanvar.dat"
    [relu_mean, relu_std] = LoadList(fourth_cv_mv)
    relu_mean = theano.shared(relu_mean.astype(theano.config.floatX))
    relu_std = theano.shared(relu_std.astype(theano.config.floatX))

    # LSTM params
    n_word = 1004
    max_len = 40

    memory = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info()
    #print('Memory: %.2f avail before putting train data to shared' % (memory[0]/1024./1024/1024))

    #create net
    net = ShowTellNet()
    net = LoadVGG_Attend(net)
    net.name = "ShowAttendTellCOCO_Re14e-5_deep_out_context_dim_512"
    #net.name = "ShowAttendTellBugFind"
    snapshot_list = glob.glob(trained_path + net.name + '*.dat')

    num_big_epoch = 5000
    big_batch_size = np.asarray([2000], dtype=theano.config.floatX)

    if (len(snapshot_list) == 0):

        # Trainer params
        trainer = Trainer()
        trainer.opts['batch_size'] = 20
        trainer.opts['save'] = False
        trainer.opts['save_freq'] = 2
        #trainer.opts['num_sample'] = num_sample
        #trainer.opts['num_val_sample'] = num_val_sample
        trainer.opts['validation'] = False
        trainer.opts['num_epoch'] = 1
        trainer.opts['dzdw_norm_thres'] = 1
        trainer.opts['dzdb_norm_thres'] = 0.01

        net.layer_opts['updatable'] = True

        # Setting params
        net.net_opts['l1_learning_rate'] = np.asarray(0.005,
                                                      theano.config.floatX)
        net.reset_opts['min_lr'] = np.asarray(0.005,
                                              dtype=theano.config.floatX)
        net.reset_opts['max_lr'] = net.net_opts['l1_learning_rate']

        #Constructing LSTM_ATTEND network from image_feature_region step-by-step
        # step 1: net.content['pool4'] reshape to (N, 196, 512) tensor - image_feature_region
        # step 2: using (N, 196, 512) image_feature_region tensor as input to compute h0, c0 - initial state memory of LSTM_ATTEND
        # step 4: construct DeepOutLayer from h_t, z_t output from LSTM_ATTEND layer
        # step 5: using DeepOutLayer output to compute output vector (instead of h_t), then negative log likelihood calculated by SoftMaxLogLoss Layer
        #pdb.set_trace()

        feature_shape = net.content['relu5_3'].output.shape
        new_shape = (feature_shape[0], feature_shape[1],
                     T.prod(feature_shape[2:]))
        #pdb.set_trace()
        #net.content['relu5_3_norm'] = NormLayer(net, net.content['relu5_3'], relu_mean, relu_std)

        net.content['4th_convol_feature_region'] = ReshapeLayer(
            net, net.content['relu5_3'], new_shape)

        # Adding dropout to VGG output
        net.content['4th_convol_feature_region'] = DropOut(
            net, net.content['4th_convol_feature_region'], 0.2)

        net.layer_opts['num_region'] = 196
        net.content['average_feature_region'] = AverageLayer(
            net, net.content['4th_convol_feature_region'], 2)

        # Done
        # avg_out = net.content['average_feature_region'].output.eval({net.input[0]:X.eval()})

        net.layer_opts['num_lstm_node'] = 512
        input_shape_h0 = (1, 512)
        output_shape_h0 = (1, net.layer_opts['num_lstm_node'])
        n_hidden_h0 = 512

        #GENERATING H0
        # net.content['h0_initial'] = MLPLayer(net, net.content['average_feature_region'],
        # 	input_shape = input_shape_h0, output_shape= output_shape_h0,n_hidden= n_hidden_h0)
        net.layer_opts['num_fc_node'] = n_hidden_h0
        net.content['h0_hidden_layer'] = FCLayer(
            net, net.content['average_feature_region'], input_shape_h0, T.tanh)

        net.layer_opts['num_fc_node'] = output_shape_h0[1]
        hidden_shape = (input_shape_h0[1], n_hidden_h0)
        net.content['h0_initial'] = FCLayer(net,
                                            net.content['h0_hidden_layer'],
                                            hidden_shape)

        out_shape = net.content['h0_initial'].output.shape
        net.content['h0_initial'].output = net.content[
            'h0_initial'].output.reshape((-1, out_shape[0], out_shape[1]))

        # h0_init_out =net.content['h0_initial'].output.eval({net.input[0]: X.eval()})

        #GENERATING C0
        # net.content['c0_initial'] = MLPLayer(net, net.content['average_feature_region'],
        # 	input_shape = input_shape_h0, output_shape = output_shape_h0,n_hidden= n_hidden_h0)
        net.layer_opts['num_fc_node'] = n_hidden_h0
        net.content['c0_hidden_layer'] = FCLayer(
            net, net.content['average_feature_region'], input_shape_h0, T.tanh)

        net.layer_opts['num_fc_node'] = output_shape_h0[1]
        net.content['c0_initial'] = FCLayer(net,
                                            net.content['c0_hidden_layer'],
                                            hidden_shape)

        out_shape = net.content['c0_initial'].output.shape
        net.content['c0_initial'].output = net.content[
            'c0_initial'].output.reshape((-1, out_shape[0], out_shape[1]))

        #Word Embedding Layer
        net.layer_opts['num_emb'] = 400
        net.content['we'] = WordEmbLayer(
            net, net.content['input_sen'],
            (trainer.opts['batch_size'], max_len - 1, n_word, 1))

        we_shape = net.content['we'].output.shape
        net.content['we'].output = net.content['we'].output.reshape(
            (we_shape[0], we_shape[1], we_shape[2], -we_shape[3]))
        net.content['we_dropout'] = DropOut(net, net.content['we'], 0.1)

        net.layer_opts['num_lstm_node'] = 512  #
        net.layer_opts['context_dim'] = 512
        net.layer_opts['num_dimension_feature'] = 512
        net.layer_opts['num_region'] = 196

        net.content['4th_convol_feature_region'].output = T.transpose(
            net.content['4th_convol_feature_region'].output, (0, 2, 1))

        # X = np.zeros((2,3,224,224),dtype=np.float32)
        # Y = np.zeros((2,max_len,n_word,1), dtype=np.float32)
        # im_f_feature = net.content['4th_convol_feature_region'].output.eval({
        #     net.input[0]:X
        #     })
        # we_out = net.content['we'].output.eval({net.input[1]:Y})
        # pdb.set_trace()
        net.content['lstm_attend'] = LSTM_Attend(
            net,
            net.content['we_dropout'],
            (trainer.opts['batch_size'], max_len - 1,
             net.layer_opts['num_emb'], 1),
            net.content['4th_convol_feature_region'].output,
            initial_h0=net.content['h0_initial'].output,
            initial_c0=net.content['c0_initial'].output)
        #we_out = we_out, f_region=f_region)

        net.layer_opts[
            'num_deep_out_node'] = 400  #the same number with word embedding layer
        net.layer_opts["n_word"] = n_word
        net.content['deep_out_layer'] = DeepOutputLayer(
            net, net.content['we_dropout'], net.content['lstm_attend'])

        net.layer_opts['num_affine_node'] = n_word
        net.layer_opts['l2_term'] = 0.000014
        net.content['l2'] = L2WeightDecay(net, net.content['deep_out_layer'])

        net.layer_opts['softmax_norm_dim'] = 2
        net.content['smloss'] = SoftmaxLogLoss(net,
                                               net.content['deep_out_layer'])

        net.content['cost'] = AggregateSumLoss(
            [net.content['l2'], net.content['smloss']])

        net.InitLR(0.2)
        memory = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info()
        print('Memory: %.2f avail before initialize params' %
              (memory[0] / 1024. / 1024 / 1024))

        trainer.InitParams(net)
        print("Done init params")
        train_update_rule = trainer.InitUpdateRule(net)
        print("Done init update rule")
        additional_output = ['deep_out_layer', 'l2']

        # net.InitValFunction([val_X, val_Y[:,:-1,:,:]], val_Y[:,1:,:,:],
        # 	additional_output, val_weight, net.content['lstm_attend'].output_z)
        e = 0
        last_big_e = 0
    else:
        snapshot_list = sorted(snapshot_list)
        print('Loading latest snapshot at %s' % snapshot_list[-1])
        e = 0
        [net, trainer, last_big_e] = LoadList(snapshot_list[-1])

        net.layer_opts['l2_term'] = 0.000014
        net.content['l2'] = L2WeightDecay(net, net.content['deep_out_layer'])

        net.content['cost'] = AggregateSumLoss(
            [net.content['l2'], net.content['smloss']])
        net.InitLR(0.2)
        memory = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info()
        print('Memory: %.2f avail before initialize params' %
              (memory[0] / 1024. / 1024 / 1024))

        trainer.InitParams(net)
        print("Done init params")
        train_update_rule = trainer.InitUpdateRule(net)
        print("Done init update rule")
        additional_output = ['deep_out_layer', 'l2']
    for big_e in range(last_big_e + 1, num_big_epoch):
        # Load train data
        h_list = range(11)
        np.random.shuffle(h_list)
        for h in h_list:
            #break
            #if (not ('train_X' in locals())):
            train_X = LoadH5(img_data_path % h)
            dict_key = train_X.keys()[0]
            train_X = train_X[dict_key]
            num_sample = train_X.shape[0]
            # train_Y has the shape of (num_sample, 5, max_len, n_word, 1)
            train_Y = LoadH5(cap_data_path % h)
            dict_key = train_Y.keys()[0]
            train_Y = train_Y[dict_key]
            Y_shape = train_Y.shape

            # For debugging
            #train_X = train_X[0:100,:,:,:]
            #train_Y = train_Y[0:100,:,:,:,:]
            #num_sample = 100

            #train_Y = train_Y.reshape(5*num_sample, Y_shape[2], Y_shape[3], 1)
            #random_caption_idx = net.net_opts['rng'].randint(0,5,num_sample) + np.asarray([i*5 for i in range(num_sample)])

            # Each image has 5 captions, pick one at random
            #train_Y = train_Y[random_caption_idx, :, :, :]
            train_Y = train_Y[:, 0, :, :, :]
            train_Y = train_Y.astype(theano.config.floatX)

            # Create weight from train_Y
            train_weight = np.copy(train_Y)
            train_weight = train_weight[:, 1:, :, :]
            weight_shape = train_weight.shape
            train_weight = (train_weight[:, :, 0, 0] == 0).reshape(
                weight_shape[0], weight_shape[1], 1, 1)
            train_weight = np.repeat(train_weight, weight_shape[2], 2)
            train_weight = np.repeat(train_weight, weight_shape[3], 3)
            train_weight = train_weight.astype(theano.config.floatX)

            num_big_batch_iteration = np.ceil(
                np.asarray(num_sample, dtype=theano.config.floatX) /
                big_batch_size)

            for j in range(0, num_big_batch_iteration):
                big_batch_range = np.arange(j * big_batch_size,
                                            (j + 1) * big_batch_size)

                if ((j + 1) * big_batch_size > num_sample):
                    big_batch_range = np.arange(j * big_batch_size, num_sample)

                trainer.opts['num_sample'] = big_batch_range.shape[0]
                big_batch_range = np.asarray(big_batch_range, dtype=np.uint32)
                np.random.shuffle(big_batch_range)
                memory = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info(
                )
                print(
                    'Memory: %.2f avail before putting train data to shared' %
                    (memory[0] / 1024. / 1024 / 1024))

                train_Xj = theano.shared(train_X[big_batch_range, :, :, :])
                train_Yj = theano.shared(train_Y[big_batch_range, :, :, :])
                hash_weight = np.asarray([1.3**t for t in range(max_len)])
                hash_value = np.sum(
                    np.argmax(train_Yj[0, :, :, 0].eval(), axis=1) *
                    hash_weight)
                print(hash_value)
                #pdb.set_trace()

                train_weightj = theano.shared(
                    train_weight[big_batch_range, :, :, :])
                memory = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info(
                )
                print('Memory: %.2f avail after' %
                      (memory[0] / 1024. / 1024 / 1024))

                #val_Xtest = train_Xj.eval()[0:2,:,:,:]
                #val_Ytest = train_Yj.eval()[0:2,:-1,:,:]
                #z_m1_dummy = np.zeros((1, 2, net.content['lstm_attend'].Z_shape[0]), dtype=theano.config.floatX)
                #pdb.set_trace()
                #relu5_3norm = net.content['relu5_3_norm'].output.eval({net.input[0]: val_Xtest})
                #relu5_3 = net.content['relu5_3'].output.eval({net.input[0]: val_Xtest})
                #h_out = net.content['lstm_attend'].output.eval({
                #    net.input[0]: val_Xtest,
                #    net.input[1]: val_Ytest,
                #    #net.content['lstm_attend'].z_m1_sym
                #    })
                #z_out = net.content['lstm_attend'].output_z.eval({
                #    net.input[0]: val_Xtest,
                #    net.input[1]: val_Ytest,
                #    #net.content['lstm_attend'].z_m1_sym
                #    })
                #c_out = net.content['lstm_attend'].output_c.eval({
                #    net.input[0]: val_Xtest,
                #    net.input[1]: val_Ytest,
                #    #net.content['lstm_attend'].z_m1_sym
                #    })
                #deep_out0 = net.content['deep_out_layer'].output.eval({ \
                #    net.input[0]: val_Xtest, \
                #    net.input[1]: val_Ytest, \
                #    net.content['lstm_attend'].z_m1_sym: z_m1_dummy \
                #})

                #fourth_cv_out = net.content['4th_convol_feature_region'].output.eval({\
                #        net.input[0]: val_Xtest, \
                #})

                #avg_feature = net.content['average_feature_region'].output.eval({\
                #        net.input[0]: val_Xtest, \
                #})
                #
                #h0_init = net.content['h0_initial'].output.eval({\
                #        net.input[0]: val_Xtest
                #        })

                #img_out = net.content['lstm_attend'].img_out.eval({\
                #        net.input[0]: val_Xtest,\
                #        })
                #pdb.set_trace()
                net.InitTrainFunction(train_update_rule,
                                      [train_Xj, train_Yj[:, :-1, :, :]],
                                      train_Yj[:, 1:, :, :], additional_output,
                                      train_weightj)
                print("Done init train function")

                print("start training")
                trainer.opts['validation'] = False
                trainer.opts['train'] = True
                main_loop = SGDRMainLoop(net, trained_path)
                main_loop.run(net, trainer, e)

                del train_Xj
                del train_Yj
                del train_weightj
                del net.train_function

                train_Xj = None
                train_Yj = None
                train_weightj = None
                net.train_function = None
                print('Finished iteration %d, h5 %d, of big epoch %d' %
                      (j, h, big_e))
                plt.figure()
                plt.plot(trainer.all_i[-1000::5])
                plt.savefig('SAT14e-5_all_i_last1000.png')

                plt.close()

                plt.figure()
                plt.plot(trainer.all_i)
                plt.savefig('SAT14e-5_all_i.png')
                plt.close()

            if (big_e % trainer.opts['save_freq'] == 0):
                net1 = net.NNCopy()
                SaveList([net1, trainer, big_e],
                         '../../data/trained_model/%s_e-%05d.dat' %
                         (net.name, big_e))

        # Validating frequency is the same with save freq
        if (big_e % trainer.opts['save_freq'] == 0):
            for h in range(2):  # Max is 6
                val_X = LoadH5(val_img_data_path % h)
                dict_key = val_X.keys()[0]
                val_X = val_X[dict_key]
                num_val_sample = val_X.shape[0]

                # val_Y has the shape of (num_val_sample, 5, max_len, n_word, 1)
                val_Y = LoadH5(val_cap_data_path % h)

                dict_key = val_Y.keys()[0]
                val_Y = val_Y[dict_key]
                Y_shape = val_Y.shape
                val_Y = val_Y.reshape(5 * num_val_sample, Y_shape[2],
                                      Y_shape[3], 1)

                random_caption_idx = net.net_opts['rng'].randint(
                    0, 5, num_val_sample) + np.asarray(
                        [i * 5 for i in range(num_val_sample)])
                # Each image has 5 captions, pick one at random
                val_Y = val_Y[random_caption_idx, :, :, :]
                val_Y = val_Y.astype(theano.config.floatX)
                # Create weight from val_Y
                val_weight = np.copy(val_Y)
                val_weight = val_weight[:, 1:, :, :]
                weight_shape = val_weight.shape
                val_weight = (val_weight[:, :, 0, 0] == 0).reshape(
                    weight_shape[0], weight_shape[1], 1, 1)
                val_weight = np.repeat(val_weight, weight_shape[2], 2)
                val_weight = np.repeat(val_weight, weight_shape[3], 3)
                val_weight = val_weight.astype(theano.config.floatX)

                num_big_batch_iteration = np.ceil(
                    np.asarray(num_val_sample, dtype=theano.config.floatX) /
                    big_batch_size)

                for j in range(0, num_big_batch_iteration):
                    big_batch_range = np.arange(j * big_batch_size,
                                                (j + 1) * big_batch_size)

                    if ((j + 1) * big_batch_size > num_val_sample):
                        big_batch_range = np.arange(j * big_batch_size,
                                                    num_val_sample)

                    trainer.opts['num_val_sample'] = big_batch_range.shape[0]
                    big_batch_range = np.asarray(big_batch_range,
                                                 dtype=np.uint32)
                    memory = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info(
                    )
                    print(
                        'Memory: %.2f avail before putting val data to shared'
                        % (memory[0] / 1024. / 1024 / 1024))
                    val_Xj = theano.shared(val_X[big_batch_range, :, :, :])
                    val_Yj = theano.shared(val_Y[big_batch_range, :, :, :])

                    hash_weight = np.asarray([1.3**t for t in range(max_len)])
                    hash_value = np.sum(
                        np.argmax(val_Yj[0, :, :, 0].eval(), axis=1) *
                        hash_weight)
                    print(hash_value)
                    val_weightj = theano.shared(
                        val_weight[big_batch_range, :, :, :])

                    memory = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info(
                    )
                    print('Memory: %.2f avail after' %
                          (memory[0] / 1024. / 1024 / 1024))

                    net.InitValFunction([val_Xj, val_Yj[:, :-1, :, :]],
                                        val_Yj[:, 1:, :, :], additional_output,
                                        val_weightj)
                    print("Done init val function")

                    print("start validating")
                    trainer.opts['validation'] = True
                    trainer.opts['train'] = False
                    main_loop = SGDRMainLoop(net, trained_path)
                    main_loop.run(net, trainer, e)

                    del val_Xj
                    del val_Yj
                    del val_weightj
                    del net.val_function

                    val_Xj = None
                    val_Yj = None
                    val_weightj = None
                    net.val_function = None
                    print(
                        'Finished validating at iteration %d, h5 %d, of big epoch %d'
                        % (j, h, big_e))

Пример #3

Показать файл

Файл: ShowAttendTell.py Проект: PhuongHoangMinh/DeepLearningMadeItEasy

def train_Attend_224():
    trained_path = '../../data/trained_model/'
    # LSTM params
    n_word = 2000
    max_len = 40

    train_X, train_Y, train_weight, val_X, val_Y, val_weight = CreateDataFlick224(
        n_word)

    pdb.set_trace()
    #create net
    net = ShowTellNet()
    net.name = "ShowAttendTell"
    snapshot_list = glob.glob(trained_path + net.name + '*.dat')

    X = train_X[0:2, :, :, :]
    Y = train_Y[0:2, :, :, :]
    input_Y = train_Y[:, :-1, :, :]
    expected_Y = train_Y[:, 1:, :, :]
    weight = train_weight[0:2, :, :, :]

    num_sample = 6000
    num_big_epoch = 100
    big_batch_size = np.asarray([2000], dtype=theano.config.floatX)
    num_big_batch_iteration = np.ceil(
        np.asarray(num_sample, dtype=theano.config.floatX) / big_batch_size)

    if (len(snapshot_list) == 0):

        # Trainer params
        trainer = Trainer()
        trainer.opts['batch_size'] = 20
        trainer.opts['save'] = False
        trainer.opts['save_freq'] = 20
        trainer.opts['num_sample'] = 2000
        trainer.opts['num_val_sample'] = 1000
        trainer.opts['validation'] = False
        trainer.opts['num_epoch'] = 1
        trainer.opts['dzdw_norm_thres'] = 1
        trainer.opts['dzdb_norm_thres'] = 0.01

        net = LoadVGG_Attend(net)
        net.layer_opts['updatable'] = True

        # Setting params
        net.net_opts['l1_learning_rate'] = np.asarray(0.005,
                                                      theano.config.floatX)
        net.reset_opts['min_lr'] = np.asarray(0.005,
                                              dtype=theano.config.floatX)
        net.reset_opts['max_lr'] = net.net_opts['l1_learning_rate']

        #Constructing LSTM_ATTEND network from image_feature_region step-by-step
        # step 1: net.content['pool4'] reshape to (N, 196, 512) tensor - image_feature_region
        # step 2: using (N, 196, 512) image_feature_region tensor as input to compute h0, c0 - initial state memory of LSTM_ATTEND
        # step 3: construct LSTM_ATTEND from h0, c0 (kwargs) and (N, 196, 512) image_feature_region tensor
        # step 4: construct DeepOutLayer from h_t, z_t output from LSTM_ATTEND layer
        # step 5: using DeepOutLayer output to compute output vector (instead of h_t), then negative log likelihood calculated by SoftMaxLogLoss Layer

        feature_shape = net.content['relu5_3'].output.shape
        new_shape = (feature_shape[0], feature_shape[1],
                     T.prod(feature_shape[2:]))
        net.content['4th_convol_feature_region'] = ReshapeLayer(
            net, net.content['relu5_3'],
            new_shape)  #net.content['pool4'].output.reshape()

        # Done
        # pdb.set_trace()
        # convol_out = net.content['4th_convol_feature_region'].output.eval({net.input[0]: X.eval()})
        # pdb.set_trace()

        net.layer_opts['num_region'] = 196
        # pdb.set_trace()
        net.content['average_feature_region'] = AverageLayer(
            net, net.content['4th_convol_feature_region'], 2)

        # Done
        # avg_out = net.content['average_feature_region'].output.eval({net.input[0]:X.eval()})

        net.layer_opts['num_lstm_node'] = 512
        input_shape_h0 = (1, 512)
        output_shape_h0 = (1, net.layer_opts['num_lstm_node'])
        n_hidden_h0 = 512

        #GENERATING H0
        # net.content['h0_initial'] = MLPLayer(net, net.content['average_feature_region'],
        # 	input_shape = input_shape_h0, output_shape= output_shape_h0,n_hidden= n_hidden_h0)
        net.layer_opts['num_fc_node'] = n_hidden_h0
        net.content['h0_hidden_layer'] = FCLayer(
            net, net.content['average_feature_region'], input_shape_h0, T.tanh)

        net.layer_opts['num_fc_node'] = output_shape_h0[1]
        hidden_shape = (input_shape_h0[1], n_hidden_h0)
        net.content['h0_initial'] = FCLayer(net,
                                            net.content['h0_hidden_layer'],
                                            hidden_shape)

        out_shape = net.content['h0_initial'].output.shape
        net.content['h0_initial'].output = net.content[
            'h0_initial'].output.reshape((-1, out_shape[0], out_shape[1]))

        # pdb.set_trace()
        # h0_init_out =net.content['h0_initial'].output.eval({net.input[0]: X.eval()})
        # pdb.set_trace()

        #GENERATING C0
        # net.content['c0_initial'] = MLPLayer(net, net.content['average_feature_region'],
        # 	input_shape = input_shape_h0, output_shape = output_shape_h0,n_hidden= n_hidden_h0)
        net.layer_opts['num_fc_node'] = n_hidden_h0
        net.content['c0_hidden_layer'] = FCLayer(
            net, net.content['average_feature_region'], input_shape_h0, T.tanh)

        net.layer_opts['num_fc_node'] = output_shape_h0[1]
        net.content['c0_initial'] = FCLayer(net,
                                            net.content['c0_hidden_layer'],
                                            hidden_shape)

        out_shape = net.content['c0_initial'].output.shape
        net.content['c0_initial'].output = net.content[
            'c0_initial'].output.reshape((-1, out_shape[0], out_shape[1]))

        #Word Embedding Layer
        net.layer_opts['num_emb'] = 512
        net.content['we'] = WordEmbLayer(
            net, net.content['input_sen'],
            (trainer.opts['batch_size'], max_len - 1, n_word, 1))

        # pdb.set_trace()
        # we_out = net.content['we'].output.eval({net.input[1]: Y.eval()})
        # pdb.set_trace()

        net.layer_opts['num_lstm_node'] = 512  #
        net.layer_opts['context_dim'] = 1024
        net.layer_opts['num_dimension_feature'] = 512
        net.layer_opts['num_region'] = 196

        net.content['4th_convol_feature_region'].output = T.transpose(
            net.content['4th_convol_feature_region'].output, (0, 2, 1))

        net.content['lstm_attend'] = LSTM_Attend(
            net,
            net.content['we'], (trainer.opts['batch_size'], max_len - 1,
                                net.layer_opts['num_emb'], 1),
            net.content['4th_convol_feature_region'].output,
            initial_h0=net.content['h0_initial'].output,
            initial_c0=net.content['c0_initial'].output)

        # pdb.set_trace()

        # lstm_out = net.content['lstm_attend'].output.eval({net.input[0]: X.eval(),
        # 	net.input[1]:Y.eval(),
        # 	net.content['lstm_attend'].z_m1_sym: np.zeros((1, 2, net.layer_opts['num_dimension_feature']), dtype=theano.config.floatX)})
        # print(lstm_out[0].shape)
        # print(lstm_out[1].shape)
        # # print(lstm_out[2].shape)
        # pdb.set_trace()

        net.layer_opts['num_deep_out_node'] = 512  #300
        net.layer_opts["n_word"] = n_word
        net.content['deep_out_layer'] = DeepOutputLayer(
            net, net.content['we'], net.content['lstm_attend'])

        # net.layer_opts['num_affine_node'] = n_word
        # net.content['deep_out_layer'] = AffineLayer(net, net.content['lstm_attend'],
        #                                        (trainer.opts['batch_size'],
        #                                         max_len - 1,
        #                                         net.layer_opts['num_lstm_node'],
        #                                         1))

        # pdb.set_trace()
        # deep_out = net.content['deep_out_layer'].output.eval({net.input[0]: X.eval(),
        # 	net.input[1]: Y.eval(),
        # 	net.content['lstm_attend'].z_m1_sym: np.zeros((1, 2, net.layer_opts['num_dimension_feature']), dtype=theano.config.floatX)})

        net.layer_opts['l2_term'] = 0.125
        net.content['l2'] = L2WeightDecay(net, net.content['deep_out_layer'])

        net.layer_opts['softmax_norm_dim'] = 2
        net.content['smloss'] = SoftmaxLogLoss(net,
                                               net.content['deep_out_layer'])

        net.content['cost'] = AggregateSumLoss(
            [net.content['l2'], net.content['smloss']])

        # pdb.set_trace()
        # print(X.eval().shape)
        # print(Y.eval().shape)
        # print(weight.eval().shape)

        # logloss_out = net.content['cost'].output.eval({net.input[0]: X.eval(),
        # 	net.input[1]: input_Y.eval(),
        # 	net.output[0]: expected_Y.eval(),
        # 	net.weight[0]: weight.eval(),
        # 	net.content['lstm_attend'].z_m1_sym: np.zeros((1, 2, net.layer_opts['num_dimension_feature']), dtype=theano.config.floatX)})

        # print("Done creating layer")
        # pdb.set_trace()

        net.InitLR(0.2)
        trainer.InitParams(net)
        print("Done init params")
        train_update_rule = trainer.InitUpdateRule(net)
        print("Done init update rule")
        additional_output = [
            'input_sen', 'deep_out_layer', 'we', 'lstm_attend'
        ]

        # net.InitValFunction([val_X, val_Y[:,:-1,:,:]], val_Y[:,1:,:,:],
        # 	additional_output, val_weight, net.content['lstm_attend'].output_z)
        e = 0
        last_big_e = 0
    else:
        snapshot_list = sorted(snapshot_list)
        print('Loading latest snapshot at %s' % snapshot_list[-1])

    for big_e in range(last_big_e, num_big_epoch):
        for j in range(0, num_big_batch_iteration):
            big_batch_range = np.arange(j * big_batch_size,
                                        (j + 1) * big_batch_size)
            if ((j + 1) * big_batch_size > num_sample):
                big_batch_range = np.arange(j * big_batch_size, num_sample)
            trainer.opts['num_sample'] = big_batch_range.shape[0]
            big_batch_range = np.asarray(big_batch_range, dtype=np.uint32)
            memory = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info()
            print('Memory: %.2f avail before putting train data to shared' %
                  (memory[0] / 1024. / 1024 / 1024))
            train_Xj = theano.shared(train_X[big_batch_range, :, :, :])
            train_Yj = theano.shared(train_Y[big_batch_range, :, :, :])
            train_weightj = theano.shared(
                train_weight[big_batch_range, :, :, :])
            memory = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.mem_info()
            print('Memory: %.2f avail after' %
                  (memory[0] / 1024. / 1024 / 1024))

            net.InitTrainFunction(train_update_rule,
                                  [train_Xj, train_Yj[:, :-1, :, :]],
                                  train_Yj[:, 1:, :, :], additional_output,
                                  train_weightj, net.weight[0])
            print("Done init train function")

            # net.InitValFunction([val_X, val_Y[:,:-1,:,:]], val_Y[:,1:,:,:], additional_output, val_weight)
            # print("Done init val function")

            print("start training")
            trainer.opts['validation'] = False
            trainer.opts['train'] = True
            main_loop = SGDRMainLoop(net, trained_path)
            main_loop.run(net, trainer, e)

            train_Xj = None
            train_Yj = None
            train_weightj = None
            net.train_function = None
            print('Finished iteration %d of big epoch %d' % (j, big_e))

Пример #4

Показать файл

Файл: ShowTellTest.py Проект: PhuongHoangMinh/DeepLearningMadeItEasy

def train():
    trained_path = '../../data/trained_model/'
    # LSTM params
    n_word = 2000
    max_len = 40

    # Create net
    net = ShowTellNet()
    net.name = 'ShowTellCheck'
    #net.name = 'abc'
    # Find latest snapshot
    snapshot_list = glob.glob(trained_path + net.name + '*.dat')

    if (len(snapshot_list) == 0):
        train_X, train_Y, train_weight, val_X, val_Y, val_weight = CreateData(
            n_word)
        #train_X = theano.shared(train_X.eval()[0:200,:,:,:])
        #train_Y = theano.shared(train_Y.eval()[0:200,:,:,:])
        # Trainer params
        trainer = Trainer()
        trainer.opts['batch_size'] = 32
        trainer.opts['save'] = False
        trainer.opts['save_freq'] = 20
        trainer.opts['num_sample'] = 200
        trainer.opts['num_val_sample'] = 1000
        trainer.opts['validation'] = False
        trainer.opts['num_epoch'] = 10000
        trainer.opts['dzdw_norm_thres'] = 1
        trainer.opts['dzdb_norm_thres'] = 0.01
        # Load VGG
        net = LoadVGG(net)
        net.layer_opts['updatable'] = True

        # Setting params
        net.net_opts['l1_learning_rate'] = np.asarray(0.005,
                                                      theano.config.floatX)
        net.reset_opts['min_lr'] = np.asarray(0.005,
                                              dtype=theano.config.floatX)
        net.reset_opts['max_lr'] = net.net_opts['l1_learning_rate']

        # Construct the network

        net.layer_opts['num_fc_node'] = 512
        # net.layer_opts['num_fc_node'] = 128
        # net.content['fc6'] = FCLayer(net, net.content['pool5'], (1, 512, 2, 2))
        net.content['fc6'] = FCLayer(net, net.content['pool5'], (1, 512, 4, 4))

        net.content['fc6_swap'] = SwapDim(net, net.content['fc6'], 1, 2)

        net.layer_opts['num_emb'] = 512
        # net.layer_opts['num_emb'] = 128
        net.content['we'] = WordEmbLayer(
            net, net.content['input_sen'],
            (trainer.opts['batch_size'], max_len - 1, n_word, 1))

        net.content['cat'] = Concat(net, net.content['fc6_swap'],
                                    net.content['we'], 1)

        net.layer_opts['num_lstm_node'] = n_word
        net.content['lstm'] = LSTM(net, net.content['cat'],
                                   (trainer.opts['batch_size'], max_len - 1,
                                    net.layer_opts['num_emb'], 1))

        ################
        # TESTING LSTM #
        ################

        # h_dummy = np.zeros((1, 1, net.layer_opts['num_lstm_node']), dtype=theano.config.floatX)
        # c_dummy = np.zeros((1, 1, net.layer_opts['num_lstm_node']), dtype=theano.config.floatX)
        # h_dummy2 = np.zeros((1, 2, net.layer_opts['num_lstm_node']), dtype=theano.config.floatX)
        # c_dummy2 = np.zeros((1, 2, net.layer_opts['num_lstm_node']), dtype=theano.config.floatX)
        h_dummy5 = np.zeros((1, 5, net.layer_opts['num_lstm_node']),
                            dtype=theano.config.floatX)
        c_dummy5 = np.zeros((1, 5, net.layer_opts['num_lstm_node']),
                            dtype=theano.config.floatX)

        # cat = net.content['cat'].output.eval({net.input[0]:X , net.input[1]: Y})
        # cat = np.reshape(cat, (2, 41, 128))
        # cat0 = np.reshape(cat[1,0,:], (1,1,128))
        # cat1 = np.reshape(cat[1,1,:], (1,1,128))
        # cat2 = np.reshape(cat[1,2,:], (1,1,128))
        #
        # x0 = cat[0,0,:].reshape(1,1,128)
        # x1 = cat[0,1,:].reshape(1,1,128)
        # x2 = cat[0,2,:].reshape(1,1,128)

        # Wi = net.content['lstm'].W['i'].eval()
        # Wf = net.content['lstm'].W['f'].eval()
        # Wc = net.content['lstm'].W['c'].eval()
        # Wo = net.content['lstm'].W['o'].eval()
        #
        # Ui = net.content['lstm'].U['i'].eval()
        # Uf = net.content['lstm'].U['f'].eval()
        # Uc = net.content['lstm'].U['c'].eval()
        # Uo = net.content['lstm'].U['o'].eval()
        #
        # bi = net.content['lstm'].b['i'].eval()
        # bf = net.content['lstm'].b['f'].eval()
        # bc = net.content['lstm'].b['c'].eval()
        # bo = net.content['lstm'].b['o'].eval()
        # hm1 = h_dummy
        # cm1 = c_dummy
        #
        # # First iteration
        # i0 = npsigmoid(np.dot(x0, Wi) + np.dot(hm1, Ui) + bi)
        # f0 = npsigmoid(np.dot(x0, Wf) + np.dot(hm1, Uf) + bf)
        # o0 = npsigmoid(np.dot(x0, Wo) + np.dot(hm1, Uo) + bo)
        # c0 = f0*cm1 + i0*np.tanh(np.dot(x0, Wc) + np.dot(hm1, Uc) + bc)
        # h0 = o0*c0
        #
        # # 2nd iteration
        # i1 = npsigmoid(np.dot(x1, Wi) + np.dot(h0, Ui) + bi)
        # f1 = npsigmoid(np.dot(x1, Wf) + np.dot(h0, Uf) + bf)
        # o1 = npsigmoid(np.dot(x1, Wo) + np.dot(h0, Uo) + bo)
        # c1 = f1 * c0 + i1 * np.tanh(np.dot(x1, Wc) + np.dot(h0, Uc) + bc)
        # h1 = o1 * c1
        #
        # i2 = npsigmoid(np.dot(x2, Wi) + np.dot(h1, Ui) + bi)
        # f2 = npsigmoid(np.dot(x2, Wf) + np.dot(h1, Uf) + bf)
        # o2 = npsigmoid(np.dot(x2, Wo) + np.dot(h1, Uo) + bo)
        # c2 = f2 * c1 + i2 * np.tanh(np.dot(x2, Wc) + np.dot(h1, Uc) + bc)
        # h3 = o2 * c2
        # bp = 1
        #
        # h1, c1 = onestep(cat0, h_dummy, c_dummy, net.content['lstm'].W['i'], net.content['lstm'].W['f'],
        #                  net.content['lstm'].W['c'], net.content['lstm'].W['o'],
        #                  net.content['lstm'].U['i'], net.content['lstm'].U['f'], net.content['lstm'].U['c'],
        #                  net.content['lstm'].U['o'],
        #                  net.content['lstm'].b['i'], net.content['lstm'].b['f'], net.content['lstm'].b['c'],
        #                  net.content['lstm'].b['o'])
        #
        # h1 = h1.eval()
        # c1 = c1.eval()
        #
        # h2, c2 = onestep(cat1, h1, c1, net.content['lstm'].W['i'], net.content['lstm'].W['f'],
        #                  net.content['lstm'].W['c'], net.content['lstm'].W['o'],
        #                  net.content['lstm'].U['i'], net.content['lstm'].U['f'], net.content['lstm'].U['c'],
        #                  net.content['lstm'].U['o'],
        #                  net.content['lstm'].b['i'], net.content['lstm'].b['f'], net.content['lstm'].b['c'],
        #                  net.content['lstm'].b['o'])
        #
        # h2 = h2.eval()
        # c2 = c2.eval()
        #
        # h3, c3 = onestep(cat2, h2, c2, net.content['lstm'].W['i'], net.content['lstm'].W['f'],
        #                  net.content['lstm'].W['c'], net.content['lstm'].W['o'],
        #                  net.content['lstm'].U['i'], net.content['lstm'].U['f'], net.content['lstm'].U['c'],
        #                  net.content['lstm'].U['o'],
        #                  net.content['lstm'].b['i'], net.content['lstm'].b['f'], net.content['lstm'].b['c'],
        #                  net.content['lstm'].b['o'])
        #
        # h3 = h3.eval()
        # c3 = c3.eval()
        #
        # lstm = net.content['lstm'].output.eval({net.input[0]:X, net.input[1]:Y,
        #                                         net.content['lstm'].h_m1_sym: h_dummy2,
        #                                         net.content['lstm'].c_m1_sym: c_dummy2})

        # Remove the first 'word' because it was just image priorcat knowledge, has nothing to do with the actual sentence
        net.content['lstm_r'] = LSTMRemove(net, net.content['lstm'], 1)
        #a = net.content['lstm_r'].output.eval({net.input[1]: train_Y[0:5,0:-1,:,:].eval(),
        #    net.input[0]: train_X[0:5,:,:,:].eval(),
        #        net.content['lstm'].h_m1_sym: h_dummy5,
        #        net.content['lstm'].c_m1_sym: c_dummy5
        #        })
        #print('lstm_r shape:')
        #print(a.shape)
        net.layer_opts['softmax_norm_dim'] = 2
        net.content['softmax'] = SoftmaxLayer(net, net.content['lstm_r'])

        net.content['cost'] = CategoricalCrossEntropy(net,
                                                      net.content['softmax'])

        net.InitLR(0.2)
        trainer.InitParams(net)
        train_update_rule = trainer.InitUpdateRule(net)
        additional_output = ['input_sen', 'lstm_r', 'softmax']
        net.InitTrainFunction(train_update_rule,
                              [train_X, train_Y[:, :-1, :, :]],
                              train_Y[:, 1:, :, :], additional_output,
                              train_weight)
        net.InitValFunction([val_X, val_Y[:, :-1, :, :]], val_Y[:, 1:, :, :],
                            additional_output, val_weight)
        e = 0
    else:
        snapshot_list = sorted(snapshot_list)
        print('Loading latest snapshot at %s' % snapshot_list[-1])
        net, trainer, e = LoadList(snapshot_list[-1])
        trainer.opts['save_freq'] = 10
        print('Finished loading snapshot')

        train_X, train_Y, train_weight, val_X, val_Y, val_weight = CreateData(
            n_word)
        net.net_opts['l1_learning_rate'] = np.asarray(0.00008,
                                                      theano.config.floatX)
        net.reset_opts['min_lr'] = np.asarray(0.00008,
                                              dtype=theano.config.floatX)
        net.reset_opts['max_lr'] = net.net_opts['l1_learning_rate']
        net.InitLR(1000)
        trainer.InitParams(net)
        train_update_rule = trainer.InitUpdateRule(net)
        additional_output = ['input_sen', 'lstm_r', 'softmax']

        net.InitTrainFunction(train_update_rule,
                              [train_X, train_Y[:, :-1, :, :]],
                              train_Y[:, 1:, :, :], additional_output,
                              train_weight)
        net.InitValFunction([val_X, val_Y[:, :-1, :, :]], val_Y[:, 1:, :, :],
                            additional_output, val_weight)

    main_loop = SGDRMainLoop(net, trained_path)
    main_loop.run(net, trainer, e)

Пример #5

Показать файл

Файл: LSTMTest.py Проект: PhuongHoangMinh/DeepLearningMadeItEasy

def train():
    # theano.config.optimizer='fast_compile'

    trainer = Trainer()

    # Setting training params
    trainer.opts['batch_size'] = 100
    trainer.opts['save'] = True
    trainer.opts['save_freq'] = 100
    trainer.opts['num_sample'] = 300000
    trainer.opts['num_epoch'] = 5000
    trainer.opts['train_sentence_length'] = 11
    trainer.opts['test_setence_length'] = 15
    trainer.opts['num_val_sample'] = 1
    trainer.opts['num_test_sample'] = 1
    # Generate data
    num_class = 16
    np.random.seed(13111991)

    x_dim = 32

    train_X, valid_X, test_X, train_Y, valid_Y, test_Y = CreateData(
        x_dim, num_class, trainer)

    # Create a CNN for debugging by fixing a set of real input
    # net = ConvNeuralNet(train_X[1:16,:,:,:].eval())

    # Create a CNN

    net = ShowTellNet()
    net.name = 'lstm_test'
    trained_path = '../../data/trained_model/'
    #trained_path = '/home/kien/data/trained_model/'
    snapshot_list = glob.glob(trained_path + net.name + '*.dat')
    e = -1
    if (len(snapshot_list) == 0):

        net.net_opts['l1_learning_rate'] = np.asarray(
            0.0001, dtype=theano.config.floatX)
        net.reset_opts['min_lr'] = np.asarray(0.00001,
                                              dtype=theano.config.floatX)
        net.reset_opts['max_lr'] = net.net_opts['l1_learning_rate']

        net.layer_opts['num_fc_node'] = 32
        net.content['img_emb'] = FCLayer(
            net, net.content['input_img'],
            (1, trainer.opts['train_sentence_length'], x_dim, 1))
        net.content['img_emb_swap'] = SwapDim(net, net.content['img_emb'], 1,
                                              2)
        # Construct the network

        net.layer_opts['num_emb'] = 32
        net.content['word_emb'] = WordEmbLayer(
            net, net.content['input_sen'],
            (trainer.opts['batch_size'],
             trainer.opts['train_sentence_length'] - 1, num_class, 1))

        net.content['cat'] = Concat(net, net.content['img_emb_swap'],
                                    net.content['word_emb'], 1)

        net.layer_opts['num_lstm_node'] = num_class
        net.content['lstm'] = LSTM(net, net.content['cat'],
                                   (trainer.opts['batch_size'],
                                    trainer.opts['train_sentence_length'] - 1,
                                    net.layer_opts['num_emb'], 1))

        net.content['lstm_r'] = LSTMRemove(net, net.content['lstm'], 0)

        #################### DEBUG #######################
        # X = np.reshape(train_X[0:2, :, :, :].eval(), (2, 10, x_dim, 1))
        # Y = np.reshape(train_Y[0:2, :, :, :].eval(), (2, 10, num_class, 1))
        # h_dummy = np.zeros((1, 1, net.layer_opts['num_lstm_node']), dtype=theano.config.floatX)
        # c_dummy = np.zeros((1, 1, net.layer_opts['num_lstm_node']), dtype=theano.config.floatX)
        h_dummy5 = np.zeros((1, 5, net.layer_opts['num_lstm_node']),
                            dtype=theano.config.floatX)
        c_dummy5 = np.zeros((1, 5, net.layer_opts['num_lstm_node']),
                            dtype=theano.config.floatX)
        # cat = net.content['cat'].output.eval({net.input[0]:X , net.input[1]: Y})
        # cat = np.reshape(cat, (2, 11, x_dim))
        # cat0 = np.reshape(cat[1,0,:], (1,1,x_dim))
        # cat1 = np.reshape(cat[1,1,:], (1,1,x_dim))
        # cat2 = np.reshape(cat[1,2,:], (1,1,x_dim))
        #
        # x0 = cat[0,0,:].reshape(1,1,x_dim)
        # x1 = cat[0,1,:].reshape(1,1,x_dim)
        # x2 = cat[0,2,:].reshape(1,1,x_dim)
        # x3 = cat[0,3,:].reshape(1,1,x_dim)
        # Wi = net.content['lstm'].W['i'].eval()
        # Wf = net.content['lstm'].W['f'].eval()
        # Wc = net.content['lstm'].W['c'].eval()
        # Wo = net.content['lstm'].W['o'].eval()
        #
        # Ui = net.content['lstm'].U['i'].eval()
        # Uf = net.content['lstm'].U['f'].eval()
        # Uc = net.content['lstm'].U['c'].eval()
        # Uo = net.content['lstm'].U['o'].eval()
        #
        # bi = net.content['lstm'].b['i'].eval()
        # bf = net.content['lstm'].b['f'].eval()
        # bc = net.content['lstm'].b['c'].eval()
        # bo = net.content['lstm'].b['o'].eval()
        #
        # hm1 = h_dummy
        # cm1 = c_dummy
        #
        # # First iteration
        # i0 = npsigmoid(np.dot(x0, Wi) + np.dot(hm1, Ui) + bi)
        # f0 = npsigmoid(np.dot(x0, Wf) + np.dot(hm1, Uf) + bf)
        # o0 = npsigmoid(np.dot(x0, Wo) + np.dot(hm1, Uo) + bo)
        # c0 = f0*cm1 + i0*np.tanh(np.dot(x0, Wc) + np.dot(hm1, Uc) + bc)
        # h0 = o0*c0
        #
        # # 2nd iteration
        # i1 = npsigmoid(np.dot(x1, Wi) + np.dot(h0, Ui) + bi)
        # f1 = npsigmoid(np.dot(x1, Wf) + np.dot(h0, Uf) + bf)
        # o1 = npsigmoid(np.dot(x1, Wo) + np.dot(h0, Uo) + bo)
        # c1 = f1 * c0 + i1 * np.tanh(np.dot(x1, Wc) + np.dot(h0, Uc) + bc)
        # h1 = o1 * c1
        #
        # # 3rd iteration
        # i2 = npsigmoid(np.dot(x2, Wi) + np.dot(h1, Ui) + bi)
        # f2 = npsigmoid(np.dot(x2, Wf) + np.dot(h1, Uf) + bf)
        # o2 = npsigmoid(np.dot(x2, Wo) + np.dot(h1, Uo) + bo)
        # c2 = f2 * c1 + i2 * np.tanh(np.dot(x2, Wc) + np.dot(h1, Uc) + bc)
        # h2 = o2 * c2
        #
        # # 4th iteration
        # i3 = npsigmoid(np.dot(x3, Wi) + np.dot(h2, Ui) + bi)
        # f3 = npsigmoid(np.dot(x3, Wf) + np.dot(h2, Uf) + bf)
        # o3 = npsigmoid(np.dot(x3, Wo) + np.dot(h2, Uo) + bo)
        # c3 = f3 * c2 + i3 * np.tanh(np.dot(x3, Wc) + np.dot(h2, Uc) + bc)
        # h3 = o3 * c3
        # bp = 1
        #
        #
        # lstm = net.content['lstm'].output.eval({net.input[0]:X, net.input[1]:Y,
        #                                         net.content['lstm'].h_m1_sym: h_dummy2,
        #                                         net.content['lstm'].c_m1_sym: c_dummy2})

        ####################END DEBUG#####################

        net.layer_opts['softmax_norm_dim'] = 2
        net.content['softmax'] = SoftmaxLayer(net, net.content['lstm_r'])

        net.content['cost'] = CategoricalCrossEntropy(net,
                                                      net.content['softmax'])

        # net.simpleprint()

        net.InitLR(0.01)

        # Create params list, grad list, momentum list for the theano function to update
        trainer.InitParams(net)

        # Update rule
        train_update_rule = trainer.InitUpdateRule(net)
        additional_output = ['input_img', 'word_emb', 'softmax']
        # Clip train_Y before

        net.InitTrainFunction(train_update_rule,
                              [train_X, train_Y[:, :-1, :, :]],
                              train_Y[:, 1:, :, :], additional_output)
        net.InitValFunction([valid_X, valid_Y[:, :-1, :, :]],
                            valid_Y[:, 1:, :, :], additional_output)
    else:
        snapshot_list = sorted(snapshot_list)
        print('Loading latest snapshot at %s' % snapshot_list[-1])
        net, trainer, e = LoadList(snapshot_list[-1])

        # trainer = Trainer()

        # Setting training params
        # trainer.opts['batch_size'] = 100
        # trainer.opts['save'] = True
        # trainer.opts['save_freq'] = 50
        # trainer.opts['num_sample'] = 1000
        # trainer.opts['num_epoch'] = 5000
        # trainer.opts['train_sentence_length'] = 10
        # trainer.opts['test_setence_length'] = 15
        # trainer.opts['num_val_sample'] = 1
        # trainer.opts['num_test_sample'] = 1
        #
        #

        net.net_opts['l1_learning_rate'] = np.asarray(
            0.0001, dtype=theano.config.floatX)
        net.reset_opts['min_lr'] = np.asarray(0.00001,
                                              dtype=theano.config.floatX)
        net.reset_opts['max_lr'] = net.net_opts['l1_learning_rate']
        net.InitLR(100)
        trainer.InitParams(net)
        # Create params list, grad list, momentum list for the theano function to update
        train_update_rule = trainer.InitUpdateRule(net)
        additional_output = ['input_img', 'word_emb', 'softmax']

        ###########################
        # net = ShowTellNet()
        # net.name = 'lstm_test'
        #
        # net.net_opts['l1_learning_rate'] = np.asarray(0.0001, dtype=theano.config.floatX)
        # net.reset_opts['min_lr'] = np.asarray(0.00001, dtype=theano.config.floatX)
        # net.reset_opts['max_lr'] = net.net_opts['l1_learning_rate']
        #
        # net.layer_opts['num_fc_node'] = 16
        # net.content['img_emb'] = FCLayer(net, net.content['input_img'], (1, 10, x_dim, 1))
        # net.content['img_emb_swap'] = SwapDim(net, net.content['img_emb'], 1, 2)
        # # Construct the network
        #
        # net.layer_opts['num_emb'] = 16
        # net.content['word_emb'] = WordEmbLayer(net, net.content['input_sen'],
        #                                        (trainer.opts['batch_size'], trainer.opts['train_sentence_length'],
        #                                         num_class, 1))
        #
        # net.content['cat'] = Concat(net, net.content['img_emb_swap'], net.content['word_emb'], 1)
        #
        # net.layer_opts['num_lstm_node'] = num_class
        # net.content['lstm'] = LSTM(net, net.content['cat'],
        #                            (trainer.opts['batch_size'], trainer.opts['train_sentence_length'],
        #                             net.layer_opts['num_emb'], 1))
        #
        # net.content['lstm_r'] = LSTMRemove(net, net.content['lstm'], 0, 1)
        #
        # net.layer_opts['softmax_norm_dim'] = 2
        # net.content['softmax'] = SoftmaxLayer(net, net.content['lstm_r'])
        #
        # net.content['cost'] = CategoricalCrossEntropy(net, net.content['softmax'])
        # net.InitLR(100)
        # trainer.InitParams(net)
        # train_update_rule = trainer.InitUpdateRule(net)
        # additional_output = ['input_img', 'word_emb', 'softmax']

        #######################3

        # Create params list, grad list, momentum list for the theano function to update

        # net.train_function = theano.function(
        #     [net.index],
        #     outputs=[net.content['cost'].output] + [net.output[0][net.index, :, :, :]],
        #     updates=None,
        #     givens={
        #         net.input[0]: train_X[net.index, :, :, :],
        #         net.input[1]: train_Y[net.index, :, :, :],
        #         net.output[0]: train_X[net.index, :, :, :],
        #         net.content['lstm'].h_m1_sym: T.zeros((1, net.index.shape[0], net.content['lstm'].W_shape[1]),
        #                                               dtype=theano.config.floatX),
        #         net.content['lstm'].c_m1_sym: T.zeros((1, net.index.shape[0], net.content['lstm'].W_shape[1]),
        #                                               dtype=theano.config.floatX)
        #
        #     }
        #
        # )
        net.InitTrainFunction(train_update_rule, [train_X, train_Y], train_Y,
                              additional_output)
        net.InitValFunction([valid_X, valid_Y], valid_Y, additional_output)

    main_loop = SGDRMainLoop(net, trained_path)
    main_loop.run(net, trainer, e)

    a = 2