def retrain(load_from, save_to=None): print_file = sys.stderr if gconfig.printout: print_file = sys.stdout train_data_src = read_corpus(paths.train_source, source='src') train_data_tgt = read_corpus(paths.train_target, source='tgt') dev_data_src = read_corpus(paths.dev_source, source='src') dev_data_tgt = read_corpus(paths.dev_target, source='tgt') train_data = zip_data(train_data_src, train_data_tgt) dev_data = zip_data(dev_data_src, dev_data_tgt) train_batch_size = tconfig.batch_size valid_niter = gconfig.valid_niter log_every = gconfig.log_every if save_to is None: model_save_path = paths.model_postfactorized_retrained else: model_save_path = save_to max_epoch = cconfig.max_epoch_retraining if gconfig.sanity: log_every = 1 train_data = train_data[:150] dev_data = dev_data[:150] max_epoch = 2 print("Loading from ", load_from) model = PostFactorizedModel.load(load_from) print("Loaded.") if gconfig.cuda: model.to_gpu() else: print("No cuda support") num_trial = 0 train_iter = patience = cum_loss = report_loss = cumulative_tgt_words = report_tgt_words = 0 cumulative_examples = report_examples = epoch = valid_num = 0 hist_valid_scores = [] train_time = begin_time = time.time() lr = tconfig.lr max_patience = tconfig.patience max_num_trial = tconfig.max_num_trial lr_decay = tconfig.lr_decay model = routine.train_model(model, train_data, dev_data, model_save_path, train_batch_size, valid_niter, log_every, max_epoch, lr, max_patience, max_num_trial, lr_decay) model.to_cpu()
def train(helper=False): print_file = sys.stderr if config.printout: print_file = sys.stdout train_data_src = read_corpus(paths.train_source, source='src') train_data_tgt = read_corpus(paths.train_target, source='tgt') if config.use_helper: train_data_src_helper = read_corpus(paths.train_source_helper, source='src', lg=config.helper_language( config.language)) train_data_tgt_helper = read_corpus(paths.train_target_helper, source='tgt', lg=config.helper_language( config.language)) train_data_src = train_data_src + train_data_src_helper train_data_tgt = train_data_tgt + train_data_tgt_helper dev_data_src = read_corpus(paths.dev_source, source='src') dev_data_tgt = read_corpus(paths.dev_target, source='tgt') train_data = zip_data(train_data_src, train_data_tgt) dev_data = zip_data(dev_data_src, dev_data_tgt) train_batch_size = config.batch_size valid_niter = config.valid_niter log_every = config.log_every model_save_path = paths.model( helper=False) + (".subwords" if config.subwords else "") max_epoch = config.max_epoch if config.sanity: log_every = 1 train_data = train_data[:150] dev_data = dev_data[:150] max_epoch = 2 pretraining = config.pretraining pretraining_encoder = config.pretraining_encoder loaded_model = False if config.load: model = NMTModel.load(model_save_path) try: model = NMTModel.load(model_save_path) pretraining = False pretraining_encoder = False loaded_model = True except: print("Impossible to load the model ; creating a new one.") if not loaded_model: model = NMTModel() if config.encoder_embeddings: if config.mode == "normal": print("loading encoder embeddings") encoder_embeddings = np.load(paths.get_enc_vec()) model.initialize_enc_embeddings(encoder_embeddings) if config.mode == "multi": print("loading encoder embeddings") lrl_embedding_path, hrl_embedding_path = paths.get_enc_vec() lrl_embedding, hrl_embedding = np.load( lrl_embedding_path), np.load(hrl_embedding_path) model.initialize_enc_embeddings((lrl_embedding, hrl_embedding)) if config.decoder_embeddings: print("loading decoder embeddings") decoder_embeddings = np.load(paths.get_dec_vec()) model.initialize_dec_embeddings(decoder_embeddings) if config.cuda: model.to_gpu() else: print("No cuda support") num_trial = 0 train_iter = patience = cum_loss = report_loss = cumulative_tgt_words = report_tgt_words = 0 cumulative_examples = report_examples = epoch = valid_num = 0 hist_valid_scores = [] train_time = begin_time = time.time() lr = config.lr max_patience = config.patience max_num_trial = config.max_num_trial lr_decay = config.lr_decay if pretraining_encoder: #print("Pretraining the encoder") #pretrain.train_encoder(model, train_data, dev_data) print("Loading monolingual data") mono_data_src = read_corpus(paths.data_monolingual) mono_data_tgt = [[] for i in range(len(mono_data_src))] #train_helper_src = read_corpus(paths.train_source_helper) #train_helper_tgt = [[] for i in range(len(train_helper_src))] source_data = zip_data(mono_data_src, mono_data_tgt, "mono", train_data_src, train_data_tgt, "low") print("Pretraining the encoder") routine.train_encoder(model, source_data, dev_data, model_save_path, config.mono_batch_size, valid_niter, log_every, config.max_epoch_pretraining_encoder, lr, max_patience, max_num_trial, lr_decay) if pretraining: #print("Pretraining the encoder") #pretrain.train_encoder(model, train_data, dev_data) print("loading all target data") #target_data_tgt = [] # for lg in config.all_languages: # target_data_tgt = target_data_tgt + \ # read_corpus(paths.get_data_path(set="train", mode="tg", lg=lg)) #train_helper_tgt = read_corpus(paths.train_target_helper) #train_helper_src = [[] for i in range(len(train_helper_tgt))] #target_data = zip_data(train_helper_src, train_helper_tgt, "one") print("Pretraining the decoder") routine.train_decoder(model, train_data, dev_data, model_save_path, train_batch_size, valid_niter, log_every, config.max_epoch_pretraining, lr, max_patience, max_num_trial, lr_decay) model = routine.train_model(model, train_data, dev_data, model_save_path, train_batch_size, valid_niter, log_every, max_epoch, lr, max_patience, max_num_trial, lr_decay) model.to_cpu() exit(0)
def train(load_from=None, save_to=None): print_file = sys.stderr if gconfig.printout: print_file = sys.stdout train_data_src = read_corpus(paths.train_source, source='src') train_data_tgt = read_corpus(paths.train_target, source='tgt') dev_data_src = read_corpus(paths.dev_source, source='src') dev_data_tgt = read_corpus(paths.dev_target, source='tgt') train_data = zip_data(train_data_src, train_data_tgt) dev_data = zip_data(dev_data_src, dev_data_tgt) train_batch_size = tconfig.batch_size valid_niter = gconfig.valid_niter log_every = gconfig.log_every if save_to is not None: model_save_path = save_to else: model_save_path = paths.model_mixed max_epoch = tconfig.max_epoch if gconfig.sanity: log_every = 1 train_data = train_data[:150] dev_data = dev_data[:150] max_epoch = 2 pretraining = gconfig.pretraining pretraining_encoder = gconfig.pretraining_encoder if load_from is not None: print("Loading from", load_from) model = MixedPrecisionModel.load(load_from) pretraining = False pretraining_encoder = False else: print("No loading file provided : training from scratch") model = MixedPrecisionModel() if gconfig.cuda: model.to_gpu() else: print("No cuda support") model.quantize() num_trial = 0 train_iter = patience = cum_loss = report_loss = cumulative_tgt_words = report_tgt_words = 0 cumulative_examples = report_examples = epoch = valid_num = 0 hist_valid_scores = [] train_time = begin_time = time.time() lr = tconfig.lr max_patience = tconfig.patience max_num_trial = tconfig.max_num_trial lr_decay = tconfig.lr_decay if pretraining_encoder: #print("Pretraining the encoder") #pretrain.train_encoder(model, train_data, dev_data) print("Pretraining the encoder") routine.train_encoder(model, train_data, dev_data, model_save_path, train_batch_size, valid_niter, log_every, tconfig.max_epoch_pretraining_encoder, lr, max_patience, max_num_trial, lr_decay) model.reset_optimizer() if pretraining: print("Pretraining the decoder") routine.train_decoder(model, train_data, dev_data, model_save_path, train_batch_size, valid_niter, log_every, tconfig.max_epoch_pretraining, lr, max_patience, max_num_trial, lr_decay) model.reset_optimizer() model = routine.train_model(model, train_data, dev_data, model_save_path, train_batch_size, valid_niter, log_every, max_epoch, lr, max_patience, max_num_trial, lr_decay) model.to_cpu()
def train(load_helper=False, train_helper=False, load=True): if load_helper: print("Loading model trained on helper language :", config.helper_language) else: print("Loading model trained on low-ressource language :", config.language) print_file = sys.stderr if config.printout: print_file = sys.stdout train_batch_size = config.batch_size valid_niter = config.valid_niter(train_helper) log_every = config.log_every(train_helper) max_epoch = config.max_epoch(train_helper) if config.sanity: log_every = 1 max_epoch = 2 pretraining = config.pretraining if load_helper: model_save_path = paths.model(helper=True) + ".transfer" else: model_save_path = paths.model(helper=False) + ".transfer" if load: try: model = TransferModel.load(model_save_path, helper=load_helper) pretraining = False except: print("Impossible to load the model ; creating a new one.") model = TransferModel() else: model = TransferModel() if config.cuda: model.to_gpu() else: print("No cuda support") num_trial = 0 train_iter = patience = cum_loss = report_loss = cumulative_tgt_words = report_tgt_words = 0 cumulative_examples = report_examples = epoch = valid_num = 0 hist_valid_scores = [] train_time = begin_time = time.time() lr = config.lr max_patience = config.patience max_num_trial = config.max_num_trial lr_decay = config.lr_decay if train_helper: print("Training model on helper language :", config.helper_language) model_save_path = paths.model(helper=True) + ".transfer" if not load_helper: print( "Transfering model from low-resource language to helper language" ) model.switch() # model.save(model_save_path) else: print("Training model on low-ressource language :", config.language) model_save_path = paths.model(helper=False) + ".transfer" pretraining = False if load_helper: print( "Transfering model from helper language to low-resource language" ) model.switch() # model.save(model_save_path) if train_helper: train_data_src = read_corpus(paths.train_source_helper, source='src') train_data_tgt = read_corpus(paths.train_target_helper, source='tgt') dev_data_src = read_corpus(paths.dev_source_helper, source='src') dev_data_tgt = read_corpus(paths.dev_target_helper, source='tgt') else: train_data_src = read_corpus(paths.train_source, source='src') train_data_tgt = read_corpus(paths.train_target, source='tgt') dev_data_src = read_corpus(paths.dev_source, source='src') dev_data_tgt = read_corpus(paths.dev_target, source='tgt') train_data = list(zip(train_data_src, train_data_tgt)) dev_data = list(zip(dev_data_src, dev_data_tgt)) if config.sanity: train_data = train_data[:150] dev_data = dev_data[:150] if pretraining: #print("Pretraining the encoder") #pretrain.train_encoder(model, train_data, dev_data) print("Pretraining the decoder") pretrain.train_decoder(model, train_data, dev_data, model_save_path, train_batch_size, valid_niter, log_every, config.max_epoch_pretraining, lr, max_patience, max_num_trial, lr_decay) model = routine.train_model(model, train_data, dev_data, model_save_path, train_batch_size, valid_niter, log_every, max_epoch, lr, max_patience, max_num_trial, lr_decay)
def train(): print_file = sys.stderr if config.printout: print_file = sys.stdout train_data_src_low = read_corpus(paths.train_source, source='src') train_data_tgt_low = read_corpus(paths.train_target, source='tgt') dev_data_src_low = read_corpus(paths.dev_source, source='src') dev_data_tgt_low = read_corpus(paths.dev_target, source='tgt') train_data_src_helper = read_corpus(paths.train_source_helper, source='src') train_data_tgt_helper = read_corpus(paths.train_target_helper, source='tgt') dev_data_src_helper = read_corpus(paths.dev_source_helper, source='src') dev_data_tgt_helper = read_corpus(paths.dev_target_helper, source='tgt') train_data = zip_data(train_data_src_low, train_data_tgt_low, "low", train_data_src_helper, train_data_tgt_helper, "helper") train_data_low = zip_data(train_data_src_low, train_data_tgt_low, "low") train_data_helper = zip_data(train_data_src_helper, train_data_tgt_helper, "helper") dev_data_low = zip_data(dev_data_src_low, dev_data_tgt_low, "low") dev_data_helper = zip_data(dev_data_src_helper, dev_data_tgt_helper, "helper") train_batch_size = config.batch_size valid_niter = config.valid_niter log_every = config.log_every model_save_path = paths.model(helper=False) + ".multi" max_epoch = config.max_epoch sampling = config.sampling if config.sanity: log_every = 1 valid_niter = 5 train_data = dict([(k, v[:150]) for (k, v) in train_data.items()]) dev_data_low = dict([(k, v[:150]) for (k, v) in dev_data_low.items()]) dev_data_helper = dict([(k, v[:150]) for (k, v) in dev_data_helper.items()]) train_data_low = dict([(k, v[:150]) for (k, v) in train_data_low.items()]) train_data_helper = dict([(k, v[:150]) for (k, v) in train_data_helper.items()]) max_epoch = 2 pretraining_decoder = config.pretraining_decoder pretraining_encoders = config.pretraining_encoders if config.load: #model = MultiWayModel.load(model_save_path) try: model = MultiWayModel.load(model_save_path) pretraining_decoder = False pretraining_encoders = False except: print("Impossible to load the model ; creating a new one.") model = MultiWayModel() else: model = MultiWayModel() if config.cuda: model.to_gpu() else: print("No cuda support") num_trial = 0 train_iter = patience = cum_loss = report_loss = cumulative_tgt_words = report_tgt_words = 0 cumulative_examples = report_examples = epoch = valid_num = 0 hist_valid_scores = [] train_time = begin_time = time.time() lr = config.lr max_patience = config.patience max_num_trial = config.max_num_trial lr_decay = config.lr_decay if pretraining_decoder: # print("Pretraining the encoder") # pretrain.train_encoder(model, train_data, dev_data) print("Pretraining the decoder") model.activate_discriminator = False routine.train_decoder(model, train_data_helper, dev_data_helper, model_save_path, train_batch_size, valid_niter, log_every, config.max_epoch_pretraining_decoder, lr, max_patience, max_num_trial, lr_decay) routine.train_decoder(model, train_data_low, dev_data_low, model_save_path, train_batch_size, valid_niter, log_every, config.max_epoch_pretraining_decoder, lr, max_patience, max_num_trial, lr_decay) if pretraining_encoders: # print("Pretraining the encoder") # pretrain.train_encoder(model, train_data, dev_data) model.activate_discriminator = False print("Pretraining the helper encoder") routine.train_model(model, train_data_helper, dev_data_helper, model_save_path, train_batch_size, valid_niter, log_every, config.max_epoch_pretraining_helper, lr, max_patience, max_num_trial, lr_decay) print("Pretraining the low-resource encoder") routine.train_model(model, train_data_low, dev_data_low, model_save_path, train_batch_size, valid_niter, log_every, config.max_epoch_pretraining_low, lr, max_patience, max_num_trial, lr_decay) print("Multitask training") model.activate_discriminator = True model = routine.train_model(model, train_data, dev_data_low, model_save_path, train_batch_size, valid_niter, log_every, max_epoch, lr, max_patience, max_num_trial, lr_decay, sampling_multi=sampling) model.to_cpu() exit(0)