Exemplo n.º 1
0
def train(load_from=None, save_to=None):
    print_file = sys.stderr
    if gconfig.printout:
        print_file = sys.stdout
    train_data_src = read_corpus(paths.train_source, source='src')
    train_data_tgt = read_corpus(paths.train_target, source='tgt')

    dev_data_src = read_corpus(paths.dev_source, source='src')
    dev_data_tgt = read_corpus(paths.dev_target, source='tgt')

    train_data = zip_data(train_data_src, train_data_tgt)
    dev_data = zip_data(dev_data_src, dev_data_tgt)

    train_batch_size = tconfig.batch_size
    valid_niter = gconfig.valid_niter
    log_every = gconfig.log_every
    if save_to is not None:
        model_save_path = save_to
    else:
        model_save_path = paths.model_mixed

    max_epoch = tconfig.max_epoch

    if gconfig.sanity:
        log_every = 1
        train_data = train_data[:150]
        dev_data = dev_data[:150]
        max_epoch = 2
    pretraining = gconfig.pretraining
    pretraining_encoder = gconfig.pretraining_encoder
    if load_from is not None:
        print("Loading from", load_from)
        model = MixedPrecisionModel.load(load_from)
        pretraining = False
        pretraining_encoder = False
    else:
        print("No loading file provided : training from scratch")
        model = MixedPrecisionModel()

    if gconfig.cuda:
        model.to_gpu()
    else:
        print("No cuda support")
    model.quantize()
    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cumulative_tgt_words = report_tgt_words = 0
    cumulative_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    lr = tconfig.lr
    max_patience = tconfig.patience
    max_num_trial = tconfig.max_num_trial
    lr_decay = tconfig.lr_decay

    if pretraining_encoder:
        #print("Pretraining the encoder")
        #pretrain.train_encoder(model, train_data, dev_data)
        print("Pretraining the encoder")
        routine.train_encoder(model, train_data, dev_data, model_save_path,
                              train_batch_size, valid_niter, log_every,
                              tconfig.max_epoch_pretraining_encoder, lr,
                              max_patience, max_num_trial, lr_decay)
        model.reset_optimizer()

    if pretraining:

        print("Pretraining the decoder")
        routine.train_decoder(model, train_data, dev_data, model_save_path,
                              train_batch_size, valid_niter, log_every,
                              tconfig.max_epoch_pretraining, lr, max_patience,
                              max_num_trial, lr_decay)
        model.reset_optimizer()

    model = routine.train_model(model, train_data, dev_data, model_save_path,
                                train_batch_size, valid_niter, log_every,
                                max_epoch, lr, max_patience, max_num_trial,
                                lr_decay)
    model.to_cpu()
Exemplo n.º 2
0
def train(helper=False):
    print_file = sys.stderr
    if config.printout:
        print_file = sys.stdout
    train_data_src = read_corpus(paths.train_source, source='src')
    train_data_tgt = read_corpus(paths.train_target, source='tgt')

    if config.use_helper:

        train_data_src_helper = read_corpus(paths.train_source_helper,
                                            source='src',
                                            lg=config.helper_language(
                                                config.language))
        train_data_tgt_helper = read_corpus(paths.train_target_helper,
                                            source='tgt',
                                            lg=config.helper_language(
                                                config.language))
        train_data_src = train_data_src + train_data_src_helper
        train_data_tgt = train_data_tgt + train_data_tgt_helper

    dev_data_src = read_corpus(paths.dev_source, source='src')
    dev_data_tgt = read_corpus(paths.dev_target, source='tgt')

    train_data = zip_data(train_data_src, train_data_tgt)
    dev_data = zip_data(dev_data_src, dev_data_tgt)

    train_batch_size = config.batch_size
    valid_niter = config.valid_niter
    log_every = config.log_every
    model_save_path = paths.model(
        helper=False) + (".subwords" if config.subwords else "")
    max_epoch = config.max_epoch

    if config.sanity:
        log_every = 1
        train_data = train_data[:150]
        dev_data = dev_data[:150]
        max_epoch = 2
    pretraining = config.pretraining
    pretraining_encoder = config.pretraining_encoder
    loaded_model = False
    if config.load:
        model = NMTModel.load(model_save_path)
        try:
            model = NMTModel.load(model_save_path)
            pretraining = False
            pretraining_encoder = False
            loaded_model = True
        except:
            print("Impossible to load the model ; creating a new one.")
    if not loaded_model:
        model = NMTModel()
        if config.encoder_embeddings:
            if config.mode == "normal":
                print("loading encoder embeddings")
                encoder_embeddings = np.load(paths.get_enc_vec())
                model.initialize_enc_embeddings(encoder_embeddings)
            if config.mode == "multi":
                print("loading encoder embeddings")
                lrl_embedding_path, hrl_embedding_path = paths.get_enc_vec()
                lrl_embedding, hrl_embedding = np.load(
                    lrl_embedding_path), np.load(hrl_embedding_path)
                model.initialize_enc_embeddings((lrl_embedding, hrl_embedding))
        if config.decoder_embeddings:
            print("loading decoder embeddings")
            decoder_embeddings = np.load(paths.get_dec_vec())
            model.initialize_dec_embeddings(decoder_embeddings)

    if config.cuda:
        model.to_gpu()
    else:
        print("No cuda support")

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cumulative_tgt_words = report_tgt_words = 0
    cumulative_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    lr = config.lr
    max_patience = config.patience
    max_num_trial = config.max_num_trial
    lr_decay = config.lr_decay

    if pretraining_encoder:
        #print("Pretraining the encoder")
        #pretrain.train_encoder(model, train_data, dev_data)
        print("Loading monolingual data")
        mono_data_src = read_corpus(paths.data_monolingual)
        mono_data_tgt = [[] for i in range(len(mono_data_src))]
        #train_helper_src = read_corpus(paths.train_source_helper)
        #train_helper_tgt = [[] for i in range(len(train_helper_src))]
        source_data = zip_data(mono_data_src, mono_data_tgt, "mono",
                               train_data_src, train_data_tgt, "low")
        print("Pretraining the encoder")
        routine.train_encoder(model, source_data, dev_data, model_save_path,
                              config.mono_batch_size, valid_niter, log_every,
                              config.max_epoch_pretraining_encoder, lr,
                              max_patience, max_num_trial, lr_decay)

    if pretraining:
        #print("Pretraining the encoder")
        #pretrain.train_encoder(model, train_data, dev_data)
        print("loading all target data")
        #target_data_tgt = []
        # for lg in config.all_languages:
        #    target_data_tgt = target_data_tgt + \
        #        read_corpus(paths.get_data_path(set="train", mode="tg", lg=lg))
        #train_helper_tgt = read_corpus(paths.train_target_helper)
        #train_helper_src = [[] for i in range(len(train_helper_tgt))]

        #target_data = zip_data(train_helper_src, train_helper_tgt, "one")
        print("Pretraining the decoder")
        routine.train_decoder(model, train_data, dev_data, model_save_path,
                              train_batch_size, valid_niter, log_every,
                              config.max_epoch_pretraining, lr, max_patience,
                              max_num_trial, lr_decay)

    model = routine.train_model(model, train_data, dev_data, model_save_path,
                                train_batch_size, valid_niter, log_every,
                                max_epoch, lr, max_patience, max_num_trial,
                                lr_decay)
    model.to_cpu()
    exit(0)
Exemplo n.º 3
0
def train():
    print_file = sys.stderr
    if config.printout:
        print_file = sys.stdout
    train_data_src_low = read_corpus(paths.train_source, source='src')
    train_data_tgt_low = read_corpus(paths.train_target, source='tgt')

    dev_data_src_low = read_corpus(paths.dev_source, source='src')
    dev_data_tgt_low = read_corpus(paths.dev_target, source='tgt')

    train_data_src_helper = read_corpus(paths.train_source_helper,
                                        source='src')
    train_data_tgt_helper = read_corpus(paths.train_target_helper,
                                        source='tgt')

    dev_data_src_helper = read_corpus(paths.dev_source_helper, source='src')
    dev_data_tgt_helper = read_corpus(paths.dev_target_helper, source='tgt')

    train_data = zip_data(train_data_src_low, train_data_tgt_low, "low",
                          train_data_src_helper, train_data_tgt_helper,
                          "helper")

    train_data_low = zip_data(train_data_src_low, train_data_tgt_low, "low")
    train_data_helper = zip_data(train_data_src_helper, train_data_tgt_helper,
                                 "helper")

    dev_data_low = zip_data(dev_data_src_low, dev_data_tgt_low, "low")
    dev_data_helper = zip_data(dev_data_src_helper, dev_data_tgt_helper,
                               "helper")

    train_batch_size = config.batch_size
    valid_niter = config.valid_niter
    log_every = config.log_every
    model_save_path = paths.model(helper=False) + ".multi"
    max_epoch = config.max_epoch
    sampling = config.sampling

    if config.sanity:
        log_every = 1
        valid_niter = 5
        train_data = dict([(k, v[:150]) for (k, v) in train_data.items()])
        dev_data_low = dict([(k, v[:150]) for (k, v) in dev_data_low.items()])
        dev_data_helper = dict([(k, v[:150])
                                for (k, v) in dev_data_helper.items()])
        train_data_low = dict([(k, v[:150])
                               for (k, v) in train_data_low.items()])
        train_data_helper = dict([(k, v[:150])
                                  for (k, v) in train_data_helper.items()])
        max_epoch = 2
    pretraining_decoder = config.pretraining_decoder
    pretraining_encoders = config.pretraining_encoders
    if config.load:
        #model = MultiWayModel.load(model_save_path)

        try:
            model = MultiWayModel.load(model_save_path)
            pretraining_decoder = False
            pretraining_encoders = False
        except:
            print("Impossible to load the model ; creating a new one.")
            model = MultiWayModel()
    else:
        model = MultiWayModel()

    if config.cuda:
        model.to_gpu()
    else:
        print("No cuda support")

    num_trial = 0
    train_iter = patience = cum_loss = report_loss = cumulative_tgt_words = report_tgt_words = 0
    cumulative_examples = report_examples = epoch = valid_num = 0
    hist_valid_scores = []
    train_time = begin_time = time.time()
    lr = config.lr
    max_patience = config.patience
    max_num_trial = config.max_num_trial
    lr_decay = config.lr_decay

    if pretraining_decoder:
        # print("Pretraining the encoder")
        # pretrain.train_encoder(model, train_data, dev_data)
        print("Pretraining the decoder")
        model.activate_discriminator = False
        routine.train_decoder(model, train_data_helper, dev_data_helper,
                              model_save_path, train_batch_size, valid_niter,
                              log_every, config.max_epoch_pretraining_decoder,
                              lr, max_patience, max_num_trial, lr_decay)
        routine.train_decoder(model, train_data_low, dev_data_low,
                              model_save_path, train_batch_size, valid_niter,
                              log_every, config.max_epoch_pretraining_decoder,
                              lr, max_patience, max_num_trial, lr_decay)

    if pretraining_encoders:
        # print("Pretraining the encoder")
        # pretrain.train_encoder(model, train_data, dev_data)
        model.activate_discriminator = False
        print("Pretraining the helper encoder")
        routine.train_model(model, train_data_helper, dev_data_helper,
                            model_save_path, train_batch_size, valid_niter,
                            log_every, config.max_epoch_pretraining_helper, lr,
                            max_patience, max_num_trial, lr_decay)
        print("Pretraining the low-resource encoder")
        routine.train_model(model, train_data_low, dev_data_low,
                            model_save_path, train_batch_size, valid_niter,
                            log_every, config.max_epoch_pretraining_low, lr,
                            max_patience, max_num_trial, lr_decay)

    print("Multitask training")
    model.activate_discriminator = True
    model = routine.train_model(model,
                                train_data,
                                dev_data_low,
                                model_save_path,
                                train_batch_size,
                                valid_niter,
                                log_every,
                                max_epoch,
                                lr,
                                max_patience,
                                max_num_trial,
                                lr_decay,
                                sampling_multi=sampling)
    model.to_cpu()
    exit(0)