示例#1
0
    train_data = loader.load_data('train')
    valid_data = loader.load_data('valid')
    test_data  = loader.load_data('test')
    args.n_train = len(train_data[0])
    print("Number of train:{}, valid:{}, test:{}.".format(len(train_data[0]), len(valid_data[0]), len(test_data[0])))

    plot_config(args)

    heads, tails = loader.heads_tails()
    head_idx, tail_idx, head_cache, tail_cache, head_pos, tail_pos = loader.get_cache_list()
    caches = [head_idx, tail_idx, head_cache, tail_cache, head_pos, tail_pos]

    train_data = [torch.LongTensor(vec) for vec in train_data]
    valid_data = [torch.LongTensor(vec) for vec in valid_data]
    test_data  = [torch.LongTensor(vec) for vec in test_data]

    tester_val = lambda: model.test_link(valid_data, n_ent, heads, tails, args.filter)
    tester_tst = lambda: model.test_link(test_data, n_ent, heads, tails, args.filter)

    corrupter = BernCorrupter(train_data, n_ent, n_rel)
    model = BaseModel(n_ent, n_rel, args)


    best_str = model.train(train_data, caches, corrupter, tester_val, tester_tst)
    with open(args.perf_file, 'a') as f:
        print('Training finished and best performance:', best_str)
        f.write('best_performance: '+best_str)


示例#2
0
def infer():
    args = parse_args()

    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size
    # inference process

    print("src", src_vocab_size)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        # dropout type using upscale_in_train, dropout can be remove in inferecen
        # So we can set dropout to 0
        if args.attention:
            model = AttentionModel(hidden_size,
                                   src_vocab_size,
                                   tar_vocab_size,
                                   batch_size,
                                   beam_size=args.beam_size,
                                   num_layers=num_layers,
                                   init_scale=init_scale,
                                   dropout=0.0,
                                   mode='beam_search')
        else:
            model = BaseModel(hidden_size,
                              src_vocab_size,
                              tar_vocab_size,
                              batch_size,
                              beam_size=args.beam_size,
                              num_layers=num_layers,
                              init_scale=init_scale,
                              dropout=0.0,
                              mode='beam_search')

        source_vocab_file = args.vocab_prefix + "." + args.src_lang
        infer_file = args.infer_file

        infer_data = reader.raw_mono_data(source_vocab_file, infer_file)

        def prepare_input(batch, epoch_id=0):
            src_ids, src_mask, tar_ids, tar_mask = batch
            res = {}
            src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1]))
            in_tar = tar_ids[:, :-1]
            label_tar = tar_ids[:, 1:]

            in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1]))
            label_tar = label_tar.reshape(
                (label_tar.shape[0], label_tar.shape[1], 1))
            inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask]
            return inputs, np.sum(tar_mask)

        dir_name = args.reload_model
        print("dir name", dir_name)
        state_dict, _ = fluid.dygraph.load_dygraph(dir_name)
        model.set_dict(state_dict)
        model.eval()

        train_data_iter = reader.get_data_iter(infer_data,
                                               batch_size,
                                               mode='infer')

        tar_id2vocab = []
        tar_vocab_file = args.vocab_prefix + "." + args.tar_lang
        with io.open(tar_vocab_file, "r", encoding='utf-8') as f:
            for line in f.readlines():
                tar_id2vocab.append(line.strip())

        infer_output_file = args.infer_output_file
        infer_output_dir = infer_output_file.split('/')[0]
        if not os.path.exists(infer_output_dir):
            os.mkdir(infer_output_dir)

        with io.open(infer_output_file, 'w', encoding='utf-8') as out_file:

            for batch_id, batch in enumerate(train_data_iter):
                input_data_feed, word_num = prepare_input(batch, epoch_id=0)
                # import ipdb; ipdb.set_trace()
                outputs = model(input_data_feed)
                for i in range(outputs.shape[0]):
                    ins = outputs[i].numpy()
                    res = [tar_id2vocab[int(e)] for e in ins[:, 0].reshape(-1)]
                    new_res = []
                    for ele in res:
                        if ele == "</s>":
                            break
                        new_res.append(ele)

                    out_file.write(space_tok.join(new_res))
                    out_file.write(line_tok)
示例#3
0
def train():
    args = parse_args()

    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    model = BaseModel(hidden_size,
                      src_vocab_size,
                      batch_size,
                      num_layers=num_layers,
                      init_scale=init_scale,
                      dropout=dropout)

    loss, acc = model.build_graph()
    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()
    inference_program = fluid.default_main_program().clone(for_test=True)

    lr = args.learning_rate
    opt_type = args.optimizer
    if opt_type == "sgd":
        optimizer = fluid.optimizer.SGD(lr)
    elif opt_type == "adam":
        optimizer = fluid.optimizer.Adam(lr)
    else:
        print("only support [sgd|adam]")
        raise Exception("opt type not support")

    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    def prepare_input(batch, epoch_id=0, with_lr=True):
        src_ids, src_mask, label = batch
        res = {}
        src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1))

        res['src'] = src_ids
        res['label'] = label
        res['src_sequence_length'] = src_mask

        return res

    all_data = reader.raw_data()

    max_epoch = args.max_epoch
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)

        train_data_iter = reader.get_data_iter(all_data, batch_size)

        total_loss = 0
        word_count = 0.0
        batch_id = 0
        for batch in train_data_iter:

            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])
            acc_train = np.array(fetch_outs[1])
            total_loss += cost_train

            if batch_id > 0 and batch_id % 100 == 0:
                print("current loss: %.3f, for step %d" %
                      (total_loss, batch_id))
                total_loss = 0.0

            batch_id += 1

    test_data_iter = reader.get_data_iter(all_data, batch_size, mode='test')

    all_acc = []

    for batch in test_data_iter:
        input_data_feed = prepare_input(batch)
        fetch_outs = exe.run(program=inference_program,
                             feed=input_data_feed,
                             fetch_list=[acc.name],
                             use_program_cache=False)

        all_acc.append(fetch_outs[0])

    all_acc = np.array(all_acc).astype("float32")

    print("test acc:%.3f" % all_acc.mean())
示例#4
0
def main(args):
    #def main(args, i):

    # set number of threads in pytorch
    torch.set_num_threads(6)

    # select which gpu to use
    logger_init(args)

    # set gpu
    if args.GPU:
        torch.cuda.set_device(args.gpu)

    # the default settings for correspdonding dataset
    args = default_search_hyper(args)

    #    hyperOpt = {"lr":[0.00635456700742798, 0.0049700352658686425, 0.0023726642982752643],
    #                "lamb":[3.162503061522238e-05, 1.9567149674424395e-05, 1.0729611255307008e-05],
    #                "d":[512, 512, 512],
    #                "dr":[0.9933500551931267, 0.9903909316509071, 0.9933910046627364],
    #                "batch_size":[256, 256, 256]}
    #
    #    args.lr = hyperOpt["lr"][i]
    #    args.lamb = hyperOpt["lamb"][i]
    #    args.n_dim = hyperOpt["d"][i]
    #    args.decay_rate = hyperOpt["dr"][i]
    #    args.n_batch = hyperOpt["batch_size"][i]

    # load data
    # read nary data
    if args.n_arity > 2:
        d = nary_dataloader(args.task_dir)

        entity_idxs = {d.entities[i]: i for i in range(len(d.entities))}
        relation_idxs = {d.relations[i]: i for i in range(len(d.relations))}
        n_ent, n_rel = len(entity_idxs), len(relation_idxs)
        print("Number of train:{}, valid:{}, test:{}.".format(
            len(d.train_data), len(d.valid_data), len(d.test_data)))

        train_data = torch.LongTensor(
            get_data_idxs(d.train_data, entity_idxs, relation_idxs))
        valid_data = torch.LongTensor(
            get_data_idxs(d.valid_data, entity_idxs, relation_idxs))
        test_data = torch.LongTensor(
            get_data_idxs(d.test_data, entity_idxs, relation_idxs))
        e1_sp, e2_sp, e3_sp = n_ary_heads(train_data, valid_data, test_data)


#        train_data = torch.LongTensor(get_data_idxs(d.train_data, entity_idxs, relation_idxs))[0:512]
#        valid_data = torch.LongTensor(get_data_idxs(d.valid_data, entity_idxs, relation_idxs))[0:512]
#        test_data = torch.LongTensor(get_data_idxs(d.test_data, entity_idxs, relation_idxs))[0:512]

    else:
        loader = DataLoader(args.task_dir)
        n_ent, n_rel = loader.graph_size()
        train_data = loader.load_data('train')
        valid_data = loader.load_data('valid')
        test_data = loader.load_data('test')
        print("Number of train:{}, valid:{}, test:{}.".format(
            len(train_data[0]), len(valid_data[0]), len(test_data[0])))

        heads, tails = loader.heads_tails()

        train_data = torch.LongTensor(train_data).transpose(0, 1)  #[0:100]
        valid_data = torch.LongTensor(valid_data).transpose(0, 1)  #[0:100]
        test_data = torch.LongTensor(test_data).transpose(0, 1)  #[0:100]

    file_path = "oas_nary" + "_" + str(args.num_blocks)
    directory = os.path.join("results", args.dataset, file_path)
    args.out_dir = directory
    if not os.path.exists(directory):
        os.makedirs(directory)
    os.environ["OMP_NUM_THREADS"] = "4"
    os.environ["MKL_NUM_THREADS"] = "4"
    args.perf_file = os.path.join(
        directory, args.dataset + '_oas_nary_' + str(args.num_blocks) + "_" +
        str(args.trial) + '.txt')

    print('output file name:', args.perf_file)

    plot_config(args)

    def tester_val(facts=None, arch=None):
        if args.n_arity == 2:
            if facts is None:
                return model.test_link(test_data=valid_data,
                                       n_ent=n_ent,
                                       heads=heads,
                                       tails=tails,
                                       filt=args.filter,
                                       arch=arch)
            else:
                return model.test_link(test_data=facts,
                                       n_ent=n_ent,
                                       heads=heads,
                                       tails=tails,
                                       filt=args.filter,
                                       arch=arch)

        elif args.n_arity > 2:
            if facts is None:
                return model.evaluate(valid_data, e1_sp, e2_sp, e3_sp, arch)
            else:
                return model.evaluate(facts, e1_sp, e2_sp, e3_sp, arch)

    def tester_tst():
        if args.n_arity == 2:
            return model.test_link(test_data=test_data,
                                   n_ent=n_ent,
                                   heads=heads,
                                   tails=tails,
                                   filt=args.filter)
        elif args.n_arity > 2:
            return model.evaluate(test_data, e1_sp, e2_sp, e3_sp)

    tester_trip_class = None
    model = BaseModel(n_ent, n_rel, args)
    model.train(train_data, valid_data, tester_val, tester_tst,
                tester_trip_class)
#!/usr/bin/python3
from base_model import BaseModel

my_model = BaseModel()
my_model.name = "Holberton"
my_model.my_number = 89
print(my_model.id)
print(my_model)
print(type(my_model.created_at))
print("--")
my_model_json = my_model.to_dict()
print(my_model_json)
print("JSON of my_model:")
for key in my_model_json.keys():
    print(
        "\t{}: ({}) - {}".format(
            key, type(my_model_json[key]), my_model_json[key]
        )
    )

print("--")
my_new_model = BaseModel(**my_model_json)
print(my_new_model.id)
print(my_new_model)
print(type(my_new_model.created_at))

print("--")
print(my_model is my_new_model)
示例#6
0
        space4kge = {
            "lr": hp.uniform("lr", 0, 1),
            "lamb": hp.uniform("lamb", -5, 0),
            "decay_rate": hp.uniform("decay_rate", 0.99, 1.0),
            "n_batch": hp.choice("n_batch", [128, 256, 512, 1024]),
            "n_dim": hp.choice("n_dim", [64]),
        }

        trials = Trials()
        best = fmin(run_kge,
                    space4kge,
                    algo=partial(tpe.suggest, n_startup_jobs=30),
                    max_evals=200,
                    trials=trials)

    else:
        plot_config(args)
        model = BaseModel(n_ent, n_rel, args, struct)
        tester_val = lambda: model.test_link(valid_data, valid_head_filter,
                                             valid_tail_filter)
        tester_tst = lambda: model.test_link(test_data, test_head_filter,
                                             test_tail_filter)
        best_mrr, best_str = model.train(train_data, tester_val, tester_tst)

        with open(args.perf_file, 'a') as f:
            print('structure:', struct, best_str)
            for s in struct:
                f.write(str(s) + ' ')
            f.write('\t\tbest_performance: ' + best_str + '\n')
示例#7
0
def predict(save_dir):

    warnings.filterwarnings("ignore")
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"]=FLAGS.GPU_device[len(FLAGS.GPU_device)-1]
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

    # Load the useful files to build the architecture
    print("Loading the connection matrix...")
    start = time.time()

    adj_matrix = pd.read_csv(os.path.join(FLAGS.dir_data,"adj_matrix.csv"),index_col=0)
    first_matrix_connection = pd.read_csv(os.path.join(FLAGS.dir_data,"first_matrix_connection_GO.csv"),index_col=0)
    csv_go = pd.read_csv(os.path.join(FLAGS.dir_data,"go_level.csv"),index_col=0)

    connection_matrix = []
    connection_matrix.append(np.array(first_matrix_connection.values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(7)].loc[lambda x: x==1].index,csv_go[str(6)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(6)].loc[lambda x: x==1].index,csv_go[str(5)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(5)].loc[lambda x: x==1].index,csv_go[str(4)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(4)].loc[lambda x: x==1].index,csv_go[str(3)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(3)].loc[lambda x: x==1].index,csv_go[str(2)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.ones((FLAGS.n_hidden_6, FLAGS.n_classes),dtype=np.float32))

    end = time.time()
    elapsed=end - start
    print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour,
    time.gmtime(elapsed).tm_min,
    time.gmtime(elapsed).tm_sec))

    # Load the data
    print("Loading the test dataset...")

    loaded = np.load(os.path.join(FLAGS.dir_data,"X_test.npz"))
    X_test = loaded['x']
    y_test = loaded['y']
    if FLAGS.n_classes>=2:
        y_test=to_categorical(y_test)

    end = time.time()
    elapsed=end - start
    print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour,
    time.gmtime(elapsed).tm_min,
    time.gmtime(elapsed).tm_sec))


    # Launch the model
    print("Launching the evaluation")
    if FLAGS.type_training != "":
        print("with {} and ALPHA={}".format(FLAGS.type_training,FLAGS.alpha))

    tf.reset_default_graph() 
   
    # -- Inputs of the model --
    X = tf.placeholder(tf.float32, shape=[None, FLAGS.n_input])
    Y = tf.placeholder(tf.float32, shape=[None, FLAGS.n_classes])

    # -- Hyperparameters of the neural network --
    is_training = tf.placeholder(tf.bool,name="is_training") # Batch Norm hyperparameter
    keep_prob = tf.placeholder(tf.float32, name="keep_prob") # Dropout hyperparameter

    network=BaseModel(X=X,n_input=FLAGS.n_input,n_classes=FLAGS.n_classes,
        n_hidden_1=FLAGS.n_hidden_1,n_hidden_2=FLAGS.n_hidden_2,n_hidden_3=FLAGS.n_hidden_3,n_hidden_4=FLAGS.n_hidden_4,
        n_hidden_5=FLAGS.n_hidden_5,n_hidden_6=FLAGS.n_hidden_6,keep_prob=keep_prob,is_training=is_training) # Model instantiation
    pred = network()
    # -- Compute the prediction error --
    if FLAGS.n_classes>=2:
        y_hat = tf.argmax(pred,1)
    else:
        y_hat = tf.nn.sigmoid(pred)
        y_hat = tf.cast(pred>0.5,dtype=tf.int64)

    # -- Configure the use of the gpu --
    config = tf.ConfigProto(log_device_placement=False,allow_soft_placement=True)
    #config.gpu_options.allow_growth = True, log_device_placement=True

    if FLAGS.restore : saver = tf.train.Saver()

    start = time.time()

    with tf.device(FLAGS.GPU_device):
        with tf.Session(config=config) as sess: 
            if  FLAGS.restore:
                saver.restore(sess,os.path.join(save_dir,"model")) 
            
            # -- Predict the outcome predictions of the samples from the test set --

            y_hat = sess.run([y_hat], feed_dict={X: X_test,Y: y_test,is_training:FLAGS.is_training,keep_prob:1})          

    end = time.time()
    elapsed=end - start
    print("Total time: {}h {}min {}sec ".format(time.gmtime(elapsed).tm_hour,
    time.gmtime(elapsed).tm_min,
    time.gmtime(elapsed).tm_sec))   

    return y_hat
示例#8
0
def train():
    raw_data, raw_data_test = reader.get_gte5_data()

    model = BaseModel(fine_tune=True)
    loss, acc, output = model.build_graph()

    main_program = fluid.default_main_program()
    test_program = main_program.clone(for_test=True)

    optimizer = fluid.optimizer.Adadelta(0.01)
    optimizer.minimize(loss)

    place = fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    fluid.io.load_params(executor=exe, dirname=temp_model_path)

    def prepare_input(batch):
        x, y = batch
        res = {}

        res['img'] = np.array(x).astype("float32") / 255
        res['label'] = np.array(y).astype("int64")

        return res

    def train_test(test_batch):
        total_acc = []
        input_data_feed = prepare_input(test_batch)
        fetch_outs = exe.run(program=test_program,
                             feed=input_data_feed,
                             fetch_list=[acc.name],
                             use_program_cache=True)

        acc_train = np.array(fetch_outs[0])
        total_acc.append(acc_train)
        print("test avg acc: {0:.2%}".format(np.mean(total_acc)))

    for epoch_id in range(epochs):
        print("epoch id", epoch_id)

        train_data_iter = reader.get_data_iter(raw_data, batch_size)
        test_data_iter = reader.get_data_iter(raw_data_test, batch_size)

        data_iter = zip(train_data_iter, test_data_iter)

        total_loss = 0
        total_acc = []
        for batch_id, batch in enumerate(data_iter):
            batch_train, batch_test = batch
            input_data_feed = prepare_input(batch_train)
            fetch_outs = exe.run(feed=input_data_feed,
                                 fetch_list=[loss.name, acc.name],
                                 use_program_cache=True)

            cost_train = np.array(fetch_outs[0])
            acc_train = np.array(fetch_outs[1])
            total_loss += cost_train * batch_size
            total_acc.append(acc_train)

        print("train total loss: ", total_loss, np.mean(total_acc))
        train_test(batch_test)
        print()
示例#9
0
def train():

    model = BaseModel(batch_size=batch_size, maxlen=7)
    pred = model.build_graph(mode='test')

    inference_program = fluid.default_main_program().clone(for_test=True)

    place = fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    fluid.io.load_params(executor=exe, dirname=infer_model_path)

    def prepare_input(batch):
        x, y, x_seqlen = batch
        res = {}

        res['input'] = np.array(x).astype("float32")
        res['input_seqlen'] = np.array(x_seqlen).astype("int64")
        res['label'] = np.array(y).astype("float32")

        return res

    # (samples, seq, width, height, pixel)
    noisy_movies, shifted_movies = reader.generate_movies(n_samples, n_frames)

    # Testing the network on one movie
    # feed it with the first 7 positions and then
    # predict the new positions
    which = 1004
    track_test = noisy_movies[which][:7, ::, ::, ::]
    track_res = shifted_movies[which][:7, ::, ::, ::]

    track_test = track_test[np.newaxis, ::, ::, ::, ::]
    track_res = track_res[np.newaxis, ::, ::, ::, ::]

    for j in range(16):

        track_raw = track_test, track_res

        data_iter = reader.get_data_iter(track_raw, 1)

        # batch
        for batch in data_iter:
            input_data_feed = prepare_input(batch)
            fetch_outs = exe.run(program=inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[pred.name],
                                 use_program_cache=False)

            guess = fetch_outs[0]
            last_seq = guess[0][-1]

            temp = []
            for row in last_seq:
                temp_row = []
                for ele in row:
                    pred_label = np.argsort(ele)[1]
                    temp_row.append([pred_label])
                temp.append(temp_row)

            guess = [[temp]]
            new = np.array(guess)
            track_test = np.concatenate((track_test, new), axis=1)

    # And then compare the predictions
    # to the ground truth
    track2 = noisy_movies[which][::, ::, ::, ::]
    for i in range(15):
        fig = plt.figure(figsize=(10, 5))

        ax = fig.add_subplot(121)

        if i >= 7:
            ax.text(1, 3, 'Predictions !', fontsize=20, color='w')
        else:
            ax.text(1, 3, 'Initial trajectory', fontsize=20)

        toplot = track_test[0][i, ::, ::, 0]

        plt.imshow(toplot)
        ax = fig.add_subplot(122)
        plt.text(1, 3, 'Ground truth', fontsize=20)

        toplot = track2[i, ::, ::, 0]
        if i >= 2:
            toplot = shifted_movies[which][i - 1, ::, ::, 0]

        plt.imshow(toplot)
        plt.savefig('./res/%i_animate.png' % (i + 1))
示例#10
0
 def test_fixed_init_xgb(self):
     model = BaseModel('xgb', 'test_params_set')
     model.fit(X, y)
     self.assertEqual(model.predict(X).shape[0], n_samples)
     self.assertEqual(model.predict(X).shape[1], num_classes)
示例#11
0
 def test_fixed_init_rf(self):
     model = BaseModel('c-rf', 'test_params_rf_set')
     model.fit(X, y)
     self.assertEqual(model.predict_proba(X).shape[0], n_samples)
     self.assertEqual(model.predict_proba(X).shape[1], num_classes)
示例#12
0
 def test_init_rf(self):
     model = BaseModel('c-rf', 'test_params_rf_random')
     self.assertEqual(model.name, 'c-rf')
     self.assertEqual(model.params['n_jobs'], -1)
示例#13
0
 def test_init_xgb(self):
     model = BaseModel('xgb', 'test_params_random')
     self.assertEqual(model.name, 'xgb')
     self.assertEqual(model.params['num_class'], 10)
     self.assertTrue(model.num_rounds >= 10)
     self.assertTrue(model.num_rounds <= 75)
示例#14
0
def train(save_dir):

    warnings.filterwarnings("ignore")
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"]=FLAGS.GPU_device[len(FLAGS.GPU_device)-1]
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

    # Load the useful files to build the architecture
    print("Loading the connection matrix...")
    start = time.time()

    adj_matrix = pd.read_csv(os.path.abspath(os.path.join(FLAGS.dir_data,"adj_matrix.csv")),index_col=0)
    first_matrix_connection = pd.read_csv(os.path.abspath(os.path.join(FLAGS.dir_data,"first_matrix_connection_GO.csv")),index_col=0)
    csv_go = pd.read_csv(os.path.abspath(os.path.join(FLAGS.dir_data,"go_level.csv")),index_col=0)

    connection_matrix = []
    connection_matrix.append(np.array(first_matrix_connection.values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(7)].loc[lambda x: x==1].index,csv_go[str(6)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(6)].loc[lambda x: x==1].index,csv_go[str(5)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(5)].loc[lambda x: x==1].index,csv_go[str(4)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(4)].loc[lambda x: x==1].index,csv_go[str(3)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(3)].loc[lambda x: x==1].index,csv_go[str(2)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.ones((FLAGS.n_hidden_6, FLAGS.n_classes),dtype=np.float32))

    end = time.time()
    elapsed=end - start
    print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour,
    time.gmtime(elapsed).tm_min,
    time.gmtime(elapsed).tm_sec))

    # Load the data
    print("Loading the data...")

    start = time.time()
    loaded = np.load(os.path.abspath(os.path.join(FLAGS.dir_data,"X_train.npz")))
    X_train = loaded['x']
    y_train = loaded['y']
    if FLAGS.n_classes>=2:
        y_train=to_categorical(y_train)

    loaded = np.load(os.path.abspath(os.path.join(FLAGS.dir_data,"X_test.npz")))
    X_test = loaded['x']
    y_test = loaded['y']
    if FLAGS.n_classes>=2:
        y_test=to_categorical(y_test)

    end = time.time()
    elapsed=end - start
    print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour,
    time.gmtime(elapsed).tm_min,
    time.gmtime(elapsed).tm_sec))


    # Launch the model
    print("Launching the learning")
    if FLAGS.type_training != "":
        print("with {} and ALPHA={}".format(FLAGS.type_training,FLAGS.alpha))

    tf.reset_default_graph() 
   
    # -- Inputs of the model --
    X = tf.placeholder(tf.float32, shape=[None, FLAGS.n_input])
    Y = tf.placeholder(tf.float32, shape=[None, FLAGS.n_classes])

    # -- Hyperparameters of the neural network --
    is_training = tf.placeholder(tf.bool,name="is_training") # Batch Norm hyperparameter
    learning_rate = tf.placeholder(tf.float32, name="learning_rate") # Optimizer hyperparameter
    keep_prob = tf.placeholder(tf.float32, name="keep_prob") # Dropout hyperparameter
    total_batches=len(X_train)//FLAGS.batch_size

    network=BaseModel(X=X,n_input=FLAGS.n_input,n_classes=FLAGS.n_classes,
        n_hidden_1=FLAGS.n_hidden_1,n_hidden_2=FLAGS.n_hidden_2,n_hidden_3=FLAGS.n_hidden_3,n_hidden_4=FLAGS.n_hidden_4,
        n_hidden_5=FLAGS.n_hidden_5,n_hidden_6=FLAGS.n_hidden_6,keep_prob=keep_prob,is_training=is_training) # Model instantiation
    pred = network()

    # -- Loss function --

    # ---- CE loss  ----
    # Compute the average of the loss across all the dimensions
    if FLAGS.n_classes>=2:
        ce_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=Y)) 
    else:
        ce_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=Y))
    
    # ---- Regularization loss (LGO, L2, L1) ----
    additional_loss = 0
    if FLAGS.type_training=="LGO":
        for idx,weight in enumerate(network.weights.values()):
            additional_loss+=l2_loss_func(weight*(1-connection_matrix[idx])) # Penalization of the noGO connections
    elif FLAGS.type_training=="L2" :
        for weight in network.weights.values():
            additional_loss += l2_loss_func(weight)
    elif FLAGS.type_training=="L1" :
        for idx,weight in enumerate(network.weights.values()):
            additional_loss+=l1_loss_func(weight)
            
    # ---- Total loss  ----
    if FLAGS.type_training!='' :
        total_loss = ce_loss + FLAGS.alpha*additional_loss
    else:
        total_loss = ce_loss
    
    
    # ---- Norm of the weights of the connections ----
    norm_no_go_connections=0
    norm_go_connections=0
    for idx,weight in enumerate(list(network.weights.values())[:-1]):
        norm_no_go_connections+=tf.norm((weight*(1-connection_matrix[idx])),ord=1)/np.count_nonzero(1-connection_matrix[idx])
        norm_go_connections+=tf.norm((weight*connection_matrix[idx]),ord=1)/np.count_nonzero(connection_matrix[idx])
    norm_no_go_connections/=FLAGS.n_layers
    norm_go_connections/=FLAGS.n_layers

    # -- Optimizer --
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        if FLAGS.lr_method=="adam":
            trainer = tf.train.AdamOptimizer(learning_rate = learning_rate)
        elif FLAGS.lr_method=="momentum":
            trainer = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum=0.09, use_nesterov=True)
        elif FLAGS.lr_method=="adagrad":
            trainer = tf.train.AdagradOptimizer(learning_rate=learning_rate)
        elif FLAGS.lr_method=="rmsprop":
            trainer = tf.train.RMSPropOptimizer(learning_rate = learning_rate)
        optimizer = trainer.minimize(total_loss)

    # -- Compute the prediction error --
    if FLAGS.n_classes>=2:
        correct_prediction = tf.equal(tf.argmax(pred,1), tf.argmax(Y, 1))
    else:
        sig_pred=tf.nn.sigmoid(pred)
        sig_pred=tf.cast(sig_pred>0.5,dtype=tf.int64)
        ground_truth=tf.cast(Y,dtype=tf.int64)
        correct_prediction = tf.equal(sig_pred,ground_truth)

    # -- Calculate the accuracy across all the given batches and average them out --
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  

    # -- Initialize the variables --
    init = tf.global_variables_initializer()

    # -- Configure the use of the gpu --
    config = tf.ConfigProto(log_device_placement=False,allow_soft_placement=True)
    #config.gpu_options.allow_growth = True, log_device_placement=True

    if FLAGS.save or FLAGS.restore : saver = tf.train.Saver()

    start = time.time()

    with tf.device(FLAGS.GPU_device):
        with tf.Session(config=config) as sess: 
            sess.run(init)

            train_c_accuracy=[]
            train_c_total_loss=[]

            test_c_accuracy=[]
            test_c_total_loss=[]

            c_l1_norm_go=[]
            c_l1_norm_no_go=[]

            if FLAGS.type_training!="":
                train_c_ce_loss=[]
                test_c_ce_loss=[]
                train_c_additional_loss=[]
                test_c_additional_loss=[]

            for epoch in tqdm(np.arange(0,FLAGS.epochs)):

                index = np.arange(X_train.shape[0])
                np.random.shuffle(index)
                batch_X = np.array_split(X_train[index], total_batches)
                batch_Y = np.array_split(y_train[index], total_batches)

                # -- Optimization --
                for batch in range(total_batches):
                    batch_x,batch_y=batch_X[batch],batch_Y[batch]
                    sess.run(optimizer, feed_dict={X: batch_x,Y: batch_y,is_training:FLAGS.is_training,keep_prob:FLAGS.keep_prob,learning_rate:FLAGS.learning_rate})

                if ((epoch+1) % FLAGS.display_step == 0) or (epoch==0) :
                    if not((FLAGS.display_step==FLAGS.epochs) and (epoch==0)):

                        # -- Calculate batch loss and accuracy after a specific epoch on the train and test set --

                        avg_cost,avg_acc,l1_norm_no_go,l1_norm_go = sess.run([total_loss, accuracy,norm_no_go_connections,norm_go_connections], feed_dict={X: X_train,Y: y_train,
                                                               is_training:False,keep_prob:1.0})
                        train_c_total_loss.append(avg_cost)
                        train_c_accuracy.append(avg_acc)
                        c_l1_norm_go.append(l1_norm_go)
                        c_l1_norm_no_go.append(l1_norm_no_go)

                        if FLAGS.type_training!="":
                            avg_ce_loss,avg_additional_loss= sess.run([ce_loss, additional_loss], feed_dict={X: X_train,Y: y_train,is_training:False,keep_prob:1.0})
                            train_c_additional_loss.append(avg_additional_loss)
                            train_c_ce_loss.append(avg_ce_loss)

                        avg_cost,avg_acc = sess.run([total_loss, accuracy], feed_dict={X: X_test,Y: y_test,is_training:False,keep_prob:1.0})
                        test_c_total_loss.append(avg_cost)
                        test_c_accuracy.append(avg_acc)

                        if FLAGS.type_training!="": 
                            avg_ce_loss,avg_additional_loss= sess.run([ce_loss, additional_loss], feed_dict={X: X_test,Y: y_test,is_training:False,keep_prob:1.0})
                            test_c_additional_loss.append(avg_additional_loss)
                            test_c_ce_loss.append(avg_ce_loss)                

                        current_idx=len(train_c_total_loss)-1                   
                        print('| Epoch: {}/{} | Train: Loss {:.6f} Accuracy : {:.6f} '\
                        '| Test: Loss {:.6f} Accuracy : {:.6f}\n'.format(
                        epoch+1, FLAGS.epochs,train_c_total_loss[current_idx], train_c_accuracy[current_idx],test_c_total_loss[current_idx],test_c_accuracy[current_idx]))

            if FLAGS.save: saver.save(sess=sess, save_path=os.path.join(save_dir,"model"))

    end = time.time()
    elapsed=end - start
    print("Total time: {}h {}min {}sec ".format(time.gmtime(elapsed).tm_hour,
    time.gmtime(elapsed).tm_min,
    time.gmtime(elapsed).tm_sec))   

    performances = {
                    'total_loss':train_c_total_loss,'test_total_loss':test_c_total_loss,
                    'acc':train_c_accuracy,'test_acc':test_c_accuracy
                    }

    performances['norm_go']=c_l1_norm_go
    performances['norm_no_go']=c_l1_norm_no_go

    if FLAGS.type_training!="":      
        performances['additional_loss']=train_c_additional_loss
        performances['test_additional_loss']=test_c_additional_loss
        performances['ce_loss']=train_c_ce_loss
        performances['test_ce_loss']=test_c_ce_loss


    return performances
示例#15
0
def main():
    args = parse_args()
    print(args)
    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    if args.enable_ce:
        fluid.default_main_program().random_seed = 102
        framework.default_startup_program().random_seed = 102

    train_program = fluid.Program()
    startup_program = fluid.Program()

    with fluid.program_guard(train_program, startup_program):
        # Training process

        if args.attention:
            model = AttentionModel(hidden_size,
                                   src_vocab_size,
                                   tar_vocab_size,
                                   batch_size,
                                   num_layers=num_layers,
                                   init_scale=init_scale,
                                   dropout=dropout)
        else:
            model = BaseModel(hidden_size,
                              src_vocab_size,
                              tar_vocab_size,
                              batch_size,
                              num_layers=num_layers,
                              init_scale=init_scale,
                              dropout=dropout)
        loss = model.build_graph()
        inference_program = train_program.clone(for_test=True)
        fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(
            clip_norm=max_grad_norm))
        lr = args.learning_rate
        opt_type = args.optimizer
        if opt_type == "sgd":
            optimizer = fluid.optimizer.SGD(lr)
        elif opt_type == "adam":
            optimizer = fluid.optimizer.Adam(lr)
        else:
            print("only support [sgd|adam]")
            raise Exception("opt type not support")

        optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(startup_program)

    device_count = len(fluid.cuda_places()) if args.use_gpu else len(
        fluid.cpu_places())

    CompiledProgram = fluid.CompiledProgram(train_program).with_data_parallel(
        loss_name=loss.name)

    train_data_prefix = args.train_data_prefix
    eval_data_prefix = args.eval_data_prefix
    test_data_prefix = args.test_data_prefix
    vocab_prefix = args.vocab_prefix
    src_lang = args.src_lang
    tar_lang = args.tar_lang
    print("begin to load data")
    raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix,
                               train_data_prefix, eval_data_prefix,
                               test_data_prefix, args.max_len)
    print("finished load data")
    train_data, valid_data, test_data, _ = raw_data

    def prepare_input(batch, epoch_id=0, with_lr=True):
        src_ids, src_mask, tar_ids, tar_mask = batch
        res = {}
        src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1]))
        in_tar = tar_ids[:, :-1]
        label_tar = tar_ids[:, 1:]

        in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1]))
        label_tar = label_tar.reshape(
            (label_tar.shape[0], label_tar.shape[1], 1))

        res['src'] = src_ids
        res['tar'] = in_tar
        res['label'] = label_tar
        res['src_sequence_length'] = src_mask
        res['tar_sequence_length'] = tar_mask

        return res, np.sum(tar_mask)

    # get train epoch size
    def eval(data, epoch_id=0):
        eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval')
        total_loss = 0.0
        word_count = 0.0
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed, word_num = prepare_input(batch,
                                                      epoch_id,
                                                      with_lr=False)
            fetch_outs = exe.run(inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += word_num

        ppl = np.exp(total_loss / word_count)

        return ppl

    def train():
        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data,
                                                       batch_size,
                                                       enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)

            total_loss = 0
            word_count = 0.0
            batch_times = []
            for batch_id, batch in enumerate(train_data_iter):
                batch_start_time = time.time()
                input_data_feed, word_num = prepare_input(batch,
                                                          epoch_id=epoch_id)
                word_count += word_num
                fetch_outs = exe.run(program=CompiledProgram,
                                     feed=input_data_feed,
                                     fetch_list=[loss.name],
                                     use_program_cache=True)

                cost_train = np.mean(fetch_outs[0])
                # print(cost_train)
                total_loss += cost_train * batch_size
                batch_end_time = time.time()
                batch_time = batch_end_time - batch_start_time
                batch_times.append(batch_time)

                if batch_id > 0 and batch_id % 100 == 0:
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f" %
                        (epoch_id, batch_id, batch_time,
                         np.exp(total_loss / word_count)))
                    ce_ppl.append(np.exp(total_loss / word_count))
                    total_loss = 0.0
                    word_count = 0.0

                # profiler tools
                if args.profile and epoch_id == 0 and batch_id == 100:
                    profiler.reset_profiler()
                elif args.profile and epoch_id == 0 and batch_id == 105:
                    return

            end_time = time.time()
            epoch_time = end_time - start_time
            ce_time.append(epoch_time)
            print(
                "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step\n"
                % (epoch_id, epoch_time, sum(batch_times) / len(batch_times)))

            if not args.profile:
                save_path = os.path.join(args.model_path,
                                         "epoch_" + str(epoch_id),
                                         "checkpoint")
                print("begin to save", save_path)
                fluid.save(train_program, save_path)
                print("save finished")
                dev_ppl = eval(valid_data)
                print("dev ppl", dev_ppl)
                test_ppl = eval(test_data)
                print("test ppl", test_ppl)

        if args.enable_ce:
            card_num = get_cards()
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))

    with profile_context(args.profile, args.profiler_path):
        train()
示例#16
0
def main():
    args = parse_args()
    print(args)
    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        #args.enable_ce = True
        if args.enable_ce:
            fluid.default_startup_program().random_seed = 102
            fluid.default_main_program().random_seed = 102
            np.random.seed(102)
            random.seed(102)

        # Training process

        if args.attention:
            model = AttentionModel(hidden_size,
                                   src_vocab_size,
                                   tar_vocab_size,
                                   batch_size,
                                   num_layers=num_layers,
                                   init_scale=init_scale,
                                   dropout=dropout)
        else:
            model = BaseModel(hidden_size,
                              src_vocab_size,
                              tar_vocab_size,
                              batch_size,
                              num_layers=num_layers,
                              init_scale=init_scale,
                              dropout=dropout)
        gloabl_norm_clip = GradClipByGlobalNorm(max_grad_norm)
        lr = args.learning_rate
        opt_type = args.optimizer
        if opt_type == "sgd":
            optimizer = fluid.optimizer.SGD(lr,
                                            parameter_list=model.parameters())
        elif opt_type == "adam":
            optimizer = fluid.optimizer.Adam(lr,
                                             parameter_list=model.parameters())
        else:
            print("only support [sgd|adam]")
            raise Exception("opt type not support")

        train_data_prefix = args.train_data_prefix
        eval_data_prefix = args.eval_data_prefix
        test_data_prefix = args.test_data_prefix
        vocab_prefix = args.vocab_prefix
        src_lang = args.src_lang
        tar_lang = args.tar_lang
        print("begin to load data")
        raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix,
                                   train_data_prefix, eval_data_prefix,
                                   test_data_prefix, args.max_len)
        print("finished load data")
        train_data, valid_data, test_data, _ = raw_data

        def prepare_input(batch, epoch_id=0):
            src_ids, src_mask, tar_ids, tar_mask = batch
            res = {}
            src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1]))
            in_tar = tar_ids[:, :-1]
            label_tar = tar_ids[:, 1:]

            in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1]))
            label_tar = label_tar.reshape(
                (label_tar.shape[0], label_tar.shape[1], 1))
            inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask]
            return inputs, np.sum(tar_mask)

        # get train epoch size
        def eval(data, epoch_id=0):
            model.eval()
            eval_data_iter = reader.get_data_iter(data,
                                                  batch_size,
                                                  mode='eval')
            total_loss = 0.0
            word_count = 0.0
            for batch_id, batch in enumerate(eval_data_iter):
                input_data_feed, word_num = prepare_input(batch, epoch_id)
                loss = model(input_data_feed)

                total_loss += loss * batch_size
                word_count += word_num
            ppl = np.exp(total_loss.numpy() / word_count)
            model.train()
            return ppl

        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            model.train()
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data,
                                                       batch_size,
                                                       enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)

            total_loss = 0
            word_count = 0.0
            batch_times = []
            for batch_id, batch in enumerate(train_data_iter):
                batch_start_time = time.time()
                input_data_feed, word_num = prepare_input(batch,
                                                          epoch_id=epoch_id)
                word_count += word_num
                loss = model(input_data_feed)
                # print(loss.numpy()[0])
                loss.backward()
                optimizer.minimize(loss, grad_clip=gloabl_norm_clip)
                model.clear_gradients()
                total_loss += loss * batch_size
                batch_end_time = time.time()
                batch_time = batch_end_time - batch_start_time
                batch_times.append(batch_time)

                if batch_id > 0 and batch_id % 100 == 0:
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f" %
                        (epoch_id, batch_id, batch_time,
                         np.exp(total_loss.numpy() / word_count)))
                    ce_ppl.append(np.exp(total_loss.numpy() / word_count))
                    total_loss = 0.0
                    word_count = 0.0

            end_time = time.time()
            epoch_time = end_time - start_time
            print(
                "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step\n"
                % (epoch_id, epoch_time, sum(batch_times) / len(batch_times)))
            ce_time.append(epoch_time)

            dir_name = os.path.join(args.model_path, "epoch_" + str(epoch_id))
            print("begin to save", dir_name)
            paddle.fluid.save_dygraph(model.state_dict(), dir_name)
            print("save finished")
            dev_ppl = eval(valid_data)
            print("dev ppl", dev_ppl)
            test_ppl = eval(test_data)
            print("test ppl", test_ppl)

        if args.enable_ce:
            card_num = get_cards()
            _ppl = 0
            _time = 0
            try:
                _time = ce_time[-1]
                _ppl = ce_ppl[-1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time))
            print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
示例#17
0
def train():
    args = parse_args()

    num_layers = args.num_layers
    src_vocab_size = args.src_vocab_size
    tar_vocab_size = args.tar_vocab_size
    batch_size = args.batch_size
    dropout = args.dropout
    init_scale = args.init_scale
    max_grad_norm = args.max_grad_norm
    hidden_size = args.hidden_size
    # inference process

    print("src", src_vocab_size)

    # dropout type using upscale_in_train, dropout can be remove in inferecen
    # So we can set dropout to 0
    if args.attention:
        model = AttentionModel(hidden_size,
                               src_vocab_size,
                               tar_vocab_size,
                               batch_size,
                               num_layers=num_layers,
                               init_scale=init_scale,
                               dropout=0.0)
    else:
        model = BaseModel(hidden_size,
                          src_vocab_size,
                          tar_vocab_size,
                          batch_size,
                          num_layers=num_layers,
                          init_scale=init_scale,
                          dropout=0.0)

    beam_size = args.beam_size
    trans_res = model.build_graph(mode='beam_search', beam_size=beam_size)
    # clone from default main program and use it as the validation program
    main_program = fluid.default_main_program()

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(framework.default_startup_program())

    source_vocab_file = args.vocab_prefix + "." + args.src_lang
    infer_file = args.infer_file

    infer_data = reader.raw_mono_data(source_vocab_file, infer_file)

    def prepare_input(batch, epoch_id=0, with_lr=True):
        src_ids, src_mask, tar_ids, tar_mask = batch
        res = {}
        src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1))
        in_tar = tar_ids[:, :-1]
        label_tar = tar_ids[:, 1:]

        in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1], 1))
        in_tar = np.zeros_like(in_tar, dtype='int64')
        label_tar = label_tar.reshape(
            (label_tar.shape[0], label_tar.shape[1], 1))
        label_tar = np.zeros_like(label_tar, dtype='int64')

        res['src'] = src_ids
        res['tar'] = in_tar
        res['label'] = label_tar
        res['src_sequence_length'] = src_mask
        res['tar_sequence_length'] = tar_mask

        return res, np.sum(tar_mask)

    dir_name = args.reload_model
    print("dir name", dir_name)
    fluid.io.load_params(exe, dir_name)

    train_data_iter = reader.get_data_iter(infer_data, 1, mode='eval')

    tar_id2vocab = []
    tar_vocab_file = args.vocab_prefix + "." + args.tar_lang
    with open(tar_vocab_file, "r") as f:
        for line in f.readlines():
            tar_id2vocab.append(line.strip())

    infer_output_file = args.infer_output_file

    out_file = open(infer_output_file, 'w')

    for batch_id, batch in enumerate(train_data_iter):
        input_data_feed, word_num = prepare_input(batch, epoch_id=0)

        fetch_outs = exe.run(feed=input_data_feed,
                             fetch_list=[trans_res.name],
                             use_program_cache=False)

        res = [tar_id2vocab[e] for e in fetch_outs[0].reshape(-1)]

        res = res[1:]

        new_res = []
        for ele in res:
            if ele == "</s>":
                break
            new_res.append(ele)

        out_file.write(' '.join(new_res))
        out_file.write('\n')

    out_file.close()
示例#18
0
def train_dense_nodes():
    for dn in [4096, 2048, 1024, 512, 256, 128]:
        m = BaseModel('data/', dense_nodes=dn)
        train_one(m)
    def test_should_raise_exception_on_renderer_type(self):
        # given
        model = BaseModel(standalone=False)

        # then
        self.assertRaises(AttributeError, lambda: model.renderer_type)
示例#20
0
def evaluate(save_dir):

    warnings.filterwarnings("ignore")
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"]=FLAGS.GPU_device[len(FLAGS.GPU_device)-1]
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

    # Load the useful files to build the architecture
    print("Loading the connection matrix...")
    start = time.time()

    adj_matrix = pd.read_csv(os.path.join(FLAGS.dir_data,"adj_matrix.csv"),index_col=0)
    first_matrix_connection = pd.read_csv(os.path.join(FLAGS.dir_data,"first_matrix_connection_GO.csv"),index_col=0)
    csv_go = pd.read_csv(os.path.join(FLAGS.dir_data,"go_level.csv"),index_col=0)

    connection_matrix = []
    connection_matrix.append(np.array(first_matrix_connection.values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(7)].loc[lambda x: x==1].index,csv_go[str(6)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(6)].loc[lambda x: x==1].index,csv_go[str(5)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(5)].loc[lambda x: x==1].index,csv_go[str(4)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(4)].loc[lambda x: x==1].index,csv_go[str(3)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(3)].loc[lambda x: x==1].index,csv_go[str(2)].loc[lambda x: x==1].index].values,dtype=np.float32))
    connection_matrix.append(np.ones((FLAGS.n_hidden_6, FLAGS.n_classes),dtype=np.float32))

    end = time.time()
    elapsed=end - start
    print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour,
    time.gmtime(elapsed).tm_min,
    time.gmtime(elapsed).tm_sec))

    # Load the data
    print("Loading the test dataset...")

    loaded = np.load(os.path.join(FLAGS.dir_data,"X_test.npz"))
    X_test = loaded['x']
    y_test = loaded['y']
    if FLAGS.n_classes>=2:
        y_test=to_categorical(y_test)

    end = time.time()
    elapsed=end - start
    print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour,
    time.gmtime(elapsed).tm_min,
    time.gmtime(elapsed).tm_sec))


    # Launch the model
    print("Launching the evaluation")
    if FLAGS.type_training != "":
        print("with {} and ALPHA={}".format(FLAGS.type_training,FLAGS.alpha))

    tf.reset_default_graph() 
   
    # -- Inputs of the model --
    X = tf.placeholder(tf.float32, shape=[None, FLAGS.n_input])
    Y = tf.placeholder(tf.float32, shape=[None, FLAGS.n_classes])

    # -- Hyperparameters of the neural network --
    is_training = tf.placeholder(tf.bool,name="is_training") # Batch Norm hyperparameter
    keep_prob = tf.placeholder(tf.float32, name="keep_prob") # Dropout hyperparameter

    network=BaseModel(X=X,n_input=FLAGS.n_input,n_classes=FLAGS.n_classes,
        n_hidden_1=FLAGS.n_hidden_1,n_hidden_2=FLAGS.n_hidden_2,n_hidden_3=FLAGS.n_hidden_3,n_hidden_4=FLAGS.n_hidden_4,
        n_hidden_5=FLAGS.n_hidden_5,n_hidden_6=FLAGS.n_hidden_6,keep_prob=keep_prob,is_training=is_training) # Model instantiation
    pred = network()

    # -- Loss function --

    # ---- CE loss  ----
    # Compute the average of the loss across all the dimensions
    if FLAGS.n_classes>=2:
        ce_loss = f.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=Y)) 
    else:
        ce_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=Y))
    
    # ---- Regularization loss (LGO, L2, L1) ----
    additional_loss = 0
    if FLAGS.type_training=="LGO":
        for idx,weight in enumerate(network.weights.values()):
            additional_loss+=l2_loss_func(weight*(1-connection_matrix[idx])) # Penalization of the noGO connections
    elif FLAGS.type_training=="L2" :
        for weight in network.weights.values():
            additional_loss += l2_loss_func(weight)
    elif FLAGS.type_training=="L1" :
        for idx,weight in enumerate(network.weights.values()):
            additional_loss+=l1_loss_func(weight)

    # ---- Total loss  ----
    if FLAGS.type_training!='' :
        total_loss = ce_loss + FLAGS.alpha*additional_loss
    else:
        total_loss = ce_loss

    # -- Compute the prediction error --
    if FLAGS.n_classes>=2:
        correct_prediction = tf.equal(tf.argmax(pred,1), tf.argmax(Y, 1))
    else:
        sig_pred=tf.nn.sigmoid(pred)
        sig_pred=tf.cast(sig_pred>0.5,dtype=tf.int64)
        ground_truth=tf.cast(Y,dtype=tf.int64)
        correct_prediction = tf.equal(sig_pred,ground_truth)

    # -- Calculate the accuracy across all the given batches and average them out --
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))  

    # -- Configure the use of the gpu --
    config = tf.ConfigProto(log_device_placement=False,allow_soft_placement=True)
    #config.gpu_options.allow_growth = True, log_device_placement=True

    if FLAGS.restore : saver = tf.train.Saver()

    start = time.time()

    with tf.device(FLAGS.GPU_device):
        with tf.Session(config=config) as sess: 
            if  FLAGS.restore:
                saver.restore(sess,os.path.join(save_dir,"model")) 

            # -- Calculate the final loss and the final accuracy on the test set --

            avg_cost,avg_acc = sess.run([total_loss, accuracy], feed_dict={X: X_test,Y: y_test,is_training:FLAGS.is_training,keep_prob:1})          
         
            print('Test loss {:.6f}, test accuracy : {:.6f}\n'.format(avg_cost,avg_acc))

    end = time.time()
    elapsed=end - start
    print("Total time: {}h {}min {}sec ".format(time.gmtime(elapsed).tm_hour,
    time.gmtime(elapsed).tm_min,
    time.gmtime(elapsed).tm_sec))   

    return