示例#1
0
def train():
    startup_program = fluid.default_startup_program()
    main_program = fluid.default_main_program()

    raw_data = reader.raw_data('fra.txt', num_samples=num_samples)
    train_data = raw_data[0]
    data_vars = raw_data[1]

    model = BaseModel(hidden_size=latent_dim,
                      src_vocab_size=data_vars['num_encoder_tokens'],
                      tar_vocab_size=data_vars['num_decoder_tokens'],
                      batch_size=batch_size,
                      batch_first=True)

    loss = model.build_graph()

    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(framework.default_startup_program())

    ce_ppl = []
    for epoch_id in range(num_epochs):
        print("epoch ", epoch_id)

        train_data_iter = reader.get_data_iter(train_data, batch_size)

        total_loss = 0
        word_count = 0.0
        for batch_id, batch in enumerate(train_data_iter):

            input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=True)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += word_num

            if batch_id > 0 and batch_id % batch_size == 0:
                print("  ppl", batch_id, np.exp(total_loss / word_count))
                ce_ppl.append(np.exp(total_loss / word_count))
                total_loss = 0.0
                word_count = 0.0
示例#2
0
def train():

    model = BaseModel(batch_size=batch_size, maxlen=n_frames)
    loss, acc, output, no_grad_set = model.build_graph()

    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adadelta(0.001)
    optimizer.minimize(loss, no_grad_set=no_grad_set)

    place = fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    log_writter = LogWriter(log_path, sync_cycle=10)  

    with log_writter.mode("train") as logger:          
        log_train_loss = logger.scalar(tag="train_loss") 
        log_train_acc = logger.scalar(tag="train_acc")

    with log_writter.mode("validation") as logger:
        log_valid_loss = logger.scalar(tag="validation_loss")
        log_valid_acc = logger.scalar(tag="validation_acc")

    def prepare_input(batch):
        x, y, x_seqlen = batch
        res = {}

        res['input'] = np.array(x).astype("float32")
        res['input_seqlen'] = np.array(x_seqlen).astype("int64")
        res['label'] = np.array(y).astype("float32")

        return res

    # (samples, seq, width, height, pixel)
    noisy_movies, shifted_movies = reader.generate_movies(n_samples, n_frames)
    data = noisy_movies[:1000], shifted_movies[:1000]
    train_data, validation_data = split(data, validation_split)

    step_id = 0
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)

        valid_data_iter = reader.get_data_iter(validation_data, batch_size) 
        train_data_iter = reader.get_data_iter(train_data, batch_size) 

        # train
        total_loss = 0
        batch_id = 0
        for batch in train_data_iter:
            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(program=main_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])
            acc_train = fetch_outs[1]
            total_loss += cost_train

            if batch_id > 0 and batch_id % 5 == 0:
                log_train_loss.add_record(step_id, total_loss) 
                log_train_acc.add_record(step_id, acc_train)
                step_id += 1
                print("current loss: %.7f, for batch %d"  % (total_loss, batch_id))
                total_loss = 0.0

            batch_id += 1


        # validate
        total_loss = 0
        total_acc = 0
        batch_id = 0
        for batch in valid_data_iter:
            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(program=inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])
            acc_train = fetch_outs[1]
            total_loss += cost_train
            batch_id += 1

        log_valid_loss.add_record(epoch_id, total_loss)
        log_valid_acc.add_record(epoch_id, total_acc / batch_id)
        print("validation loss: %.7f"  % (total_loss))

    fluid.io.save_inference_model(
        dirname=params_path,
        feeded_var_names=['input', 'input_seqlen'], 
        target_vars=[loss, acc], 
        executor=exe)
示例#3
0
def train():
    def prepare_input(batch):
        src_ids, label = batch
        res = {}

        res['src'] = src_ids
        res['label'] = label

        return res

    # Set parameters:
    # ngram_range = 2 will add bi-grams features
    ngram_range = 2
    max_features = 20000
    maxlen = 400
    batch_size = 32
    embedding_dims = 50
    epochs = 5

    print('Loading data...')
    all_data = reader.raw_data(num_words=max_features)
    x_train, y_train, x_test, y_test = all_data

    print(len(x_train), 'train sequences')
    print(len(x_test), 'test sequences')
    print('Average train sequence length: {}'.format(
        np.mean(list(map(len, x_train)), dtype=int)))
    print('Average test sequence length: {}'.format(
        np.mean(list(map(len, x_test)), dtype=int)))

    if ngram_range > 1:
        print('Adding {}-gram features'.format(ngram_range))
        # Create set of unique n-gram from the training set.
        ngram_set = set()
        for input_list in x_train:
            for i in range(2, ngram_range + 1):
                set_of_ngram = create_ngram_set(input_list, ngram_value=i)
                ngram_set.update(set_of_ngram)

        # Dictionary mapping n-gram token to a unique integer.
        # Integer values are greater than max_features in order
        # to avoid collision with existing features.
        start_index = max_features + 1
        token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}
        indice_token = {token_indice[k]: k for k in token_indice}

        # max_features is the highest integer that could be found in the dataset.
        max_features = np.max(list(indice_token.keys())) + 1

        # Augmenting x_train and x_test with n-grams features
        x_train = add_ngram(x_train, token_indice, ngram_range)
        x_test = add_ngram(x_test, token_indice, ngram_range)
        print('Average train sequence length: {}'.format(
            np.mean(list(map(len, x_train)), dtype=int)))
        print('Average test sequence length: {}'.format(
            np.mean(list(map(len, x_test)), dtype=int)))

    print('Pad sequences (samples x time)')
    x_train = reader.pad_sequences(x_train, maxlen=maxlen)
    x_test = reader.pad_sequences(x_test, maxlen=maxlen)
    print('x_train shape:', x_train.shape)
    print('x_test shape:', x_test.shape)

    all_data = x_train, y_train, x_test, y_test

    print('Build model...')
    model = BaseModel(max_features=max_features)
    loss, acc = model.build_graph()

    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adam(0.01)
    optimizer.minimize(loss)

    place = fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    for epoch_id in range(epochs):
        start_time = time.time()
        print("epoch id", epoch_id)

        train_data_iter = reader.get_data_iter(all_data, batch_size)

        total_loss = 0
        total_acc = 0
        batch_id = 0
        for batch in train_data_iter:

            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])
            acc_train = np.array(fetch_outs[1])
            total_loss += cost_train
            total_acc += acc_train

            if batch_id > 0 and batch_id % 10 == 0:
                print("current loss: %.3f, current acc: %.3f for step %d" %
                      (total_loss, total_acc * 0.1, batch_id))
                total_loss = 0.0
                total_acc = 0.0

            batch_id += 1

    test_data_iter = reader.get_data_iter(all_data, batch_size, mode='test')

    all_acc = []

    for batch in test_data_iter:
        input_data_feed = prepare_input(batch)
        fetch_outs = exe.run(program=inference_program,
                             feed=input_data_feed,
                             fetch_list=[loss.name, acc.name],
                             use_program_cache=False)

        all_acc.append(fetch_outs[1])

    all_acc = np.array(all_acc).astype("float32")

    print("test acc: %.3f" % all_acc.mean())
示例#4
0
def train():
    raw_data, raw_data_test = reader.get_lt5_data()

    model = BaseModel(fine_tune=False)
    loss, acc, output = model.build_graph()

    main_program = fluid.default_main_program()
    test_program = main_program.clone(for_test=True)

    optimizer = fluid.optimizer.Adadelta(0.01)
    optimizer.minimize(loss)

    place = fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    def prepare_input(batch, epoch_id=0):
        x, y = batch
        res = {}

        res['img'] = np.array(x).astype("float32") / 255
        res['label'] = np.array(y).astype("int64")

        return res

    def train_test(test_batch):
        total_acc = []
        input_data_feed = prepare_input(test_batch)
        fetch_outs = exe.run(program=test_program,
                             feed=input_data_feed,
                             fetch_list=[acc.name],
                             use_program_cache=True)

        acc_train = np.array(fetch_outs[0])
        total_acc.append(acc_train)
        print("test avg acc: {0:.2%}".format(np.mean(total_acc)))

    for epoch_id in range(epochs):
        print("epoch id", epoch_id)

        train_data_iter = reader.get_data_iter(raw_data, batch_size)
        test_data_iter  = reader.get_data_iter(raw_data_test, batch_size)

        data_iter = zip(train_data_iter, test_data_iter)

        total_loss = 0
        total_acc = []
        for batch_id, batch in enumerate(data_iter):
            batch_train, batch_test = batch
            input_data_feed = prepare_input(batch_train)
            fetch_outs = exe.run(program=main_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=True)

            cost_train = np.array(fetch_outs[0])
            acc_train = np.array(fetch_outs[1])
            total_loss += cost_train * batch_size
            total_acc.append(acc_train)

        print("train total loss: ", total_loss, np.mean(total_acc))
        train_test(batch_test)
        print()

    shutil.rmtree(temp_model_path, ignore_errors=True)
    os.makedirs(temp_model_path)
    fluid.io.save_params(executor=exe, dirname=temp_model_path)
示例#5
0
def train():
    args = parse_args()

    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    model = BaseModel(hidden_size,
                      src_vocab_size,
                      batch_size,
                      num_layers=num_layers,
                      init_scale=init_scale,
                      dropout=dropout)

    loss, acc = model.build_graph()
    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    lr = args.learning_rate
    opt_type = args.optimizer
    if opt_type == "sgd":
        optimizer = fluid.optimizer.SGD(lr)
    elif opt_type == "adam":
        optimizer = fluid.optimizer.Adam(lr)
    else:
        print("only support [sgd|adam]")
        raise Exception("opt type not support")

    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    def prepare_input(batch, epoch_id=0, with_lr=True):
        src_ids, src_mask, label = batch
        res = {}
        src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1))

        res['src'] = src_ids
        res['label'] = label
        res['src_sequence_length'] = src_mask

        return res

    all_data = reader.raw_data()

    max_epoch = args.max_epoch
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)

        train_data_iter = reader.get_data_iter(all_data, batch_size)

        total_loss = 0
        word_count = 0.0
        batch_id = 0
        for batch in train_data_iter:

            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])
            acc_train = np.array(fetch_outs[1])
            total_loss += cost_train

            if batch_id > 0 and batch_id % 100 == 0:
                print("current loss: %.3f, for step %d" %
                      (total_loss, batch_id))
                total_loss = 0.0

            batch_id += 1

    test_data_iter = reader.get_data_iter(all_data, batch_size, mode='test')

    all_acc = []

    for batch in test_data_iter:
        input_data_feed = prepare_input(batch)
        fetch_outs = exe.run(program=inference_program,
                             feed=input_data_feed,
                             fetch_list=[acc.name],
                             use_program_cache=False)

        all_acc.append(fetch_outs[0])

    all_acc = np.array(all_acc).astype("float32")

    print("test acc:%.3f" % all_acc.mean())
示例#6
0
def train():

    model = BaseModel(batch_size=batch_size, maxlen=7)
    pred = model.build_graph(mode='test')

    inference_program = fluid.default_main_program().clone(for_test=True)

    place = fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    fluid.io.load_params(executor=exe, dirname=infer_model_path)

    def prepare_input(batch):
        x, y, x_seqlen = batch
        res = {}

        res['input'] = np.array(x).astype("float32")
        res['input_seqlen'] = np.array(x_seqlen).astype("int64")
        res['label'] = np.array(y).astype("float32")

        return res

    # (samples, seq, width, height, pixel)
    noisy_movies, shifted_movies = reader.generate_movies(n_samples, n_frames)

    # Testing the network on one movie
    # feed it with the first 7 positions and then
    # predict the new positions
    which = 1004
    track_test = noisy_movies[which][:7, ::, ::, ::]
    track_res = shifted_movies[which][:7, ::, ::, ::]

    track_test = track_test[np.newaxis, ::, ::, ::, ::]
    track_res = track_res[np.newaxis, ::, ::, ::, ::]

    for j in range(16):

        track_raw = track_test, track_res

        data_iter = reader.get_data_iter(track_raw, 1)

        # batch
        for batch in data_iter:
            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(program=inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[pred.name],
                                 use_program_cache=False)

            guess = fetch_outs[0]
            last_seq = guess[0][-1]

            temp = []
            for row in last_seq:
                temp_row = []
                for ele in row:
                    pred_label = np.argsort(ele)[1]
                    temp_row.append([pred_label])
                temp.append(temp_row)

            guess = [[temp]]
            new = np.array(guess)
            track_test = np.concatenate((track_test, new), axis=1)

    # And then compare the predictions
    # to the ground truth
    track2 = noisy_movies[which][::, ::, ::, ::]
    for i in range(15):
        fig = plt.figure(figsize=(10, 5))

        ax = fig.add_subplot(121)

        if i >= 7:
            ax.text(1, 3, 'Predictions !', fontsize=20, color='w')
        else:
            ax.text(1, 3, 'Initial trajectory', fontsize=20)

        toplot = track_test[0][i, ::, ::, 0]

        plt.imshow(toplot)
        ax = fig.add_subplot(122)
        plt.text(1, 3, 'Ground truth', fontsize=20)

        toplot = track2[i, ::, ::, 0]
        if i >= 2:
            toplot = shifted_movies[which][i - 1, ::, ::, 0]

        plt.imshow(toplot)
        plt.savefig('./res/%i_animate.png' % (i + 1))