def load_options(model_name): # load model model_options with open('%s.pkl' % model_name, 'rb') as f: options = DefaultOptions.copy() options.update(pkl.load(f)) print 'Options:' pprint(options) return options
def load_options_test(model_name): # load model model_options with open('%s.pkl' % model_name, 'rb') as f: options = DefaultOptions.copy() options.update(pkl.load(f)) if 'fix_dp_bug' not in options: options['fix_dp_bug'] = False print('Options:') pprint(options) return options
def test(model_name, beam_size, reload_=True, Hard = False, k = -1, type = None): print(1) with open('%s.pkl' % model_name, 'rb') as f: model_options = DefaultOptions.copy() model_options.update(pkl.load(f)) #model_options['temperature'] = 1.0 #model_options['scale'] = 1.0 #model_options['gate_dropout'] = 0.0 #model_options['fix_dp_bug'] = False print(2) model = NMTModel(model_options) #model.O['small_train_datasets'] = (r'\\GCR\Scratch\RR1\dihe\stochastic_lstm\data\test\test.de-en.bpe.25000.de', r'\\GCR\Scratch\RR1\dihe\stochastic_lstm\data\test\test.de-en.bpe.25000.en',) + (r'\\GCR\Scratch\RR1\dihe\stochastic_lstm\data\test\test.de-en.en',) print(3) params = model.initializer.init_params() print(4) params = load_params_v2(model_name, params, k, type) print(5) model.init_tparams(params) print(6) print(model_options) check_options(model_options) trng, use_noise, stochastic_mode, hyper_param,\ x, x_mask, y, y_mask, \ opt_ret, \ cost, test_cost, x_emb, stochastic_updates, _ = model.build_model() print 'Building sampler' f_init, f_next = model.build_sampler(trng=trng, use_noise=use_noise, batch_mode=True, stochastic_mode=stochastic_mode, hyper_param=hyper_param) uidx = search_start_uidx(reload_, model_name) if Hard: stochastic_mode.set_value(2) bleu_hard = translate_dev_get_bleu(model, f_init, f_next, trng, use_noise, beam_size, alpha) message('{} {} BLEU = {:.2f} at uidx {} beam_size = {}'.format(type, k, bleu_hard, uidx, beam_size)) sys.stdout.flush() bleu_soft = 0.0 else: stochastic_mode.set_value(0) alpha = 1.0 while True: bleu_soft = translate_dev_get_bleu(model, f_init, f_next, trng, use_noise, beam_size, alpha) message('{} {} {} BLEU = {:.2f} at uidx {} beam_size = {}'.format(type, k, alpha, bleu_soft, uidx, beam_size)) sys.stdout.flush() break bleu_hard = 0.0 return (bleu_soft, bleu_hard)
def main(model, dictionary, dictionary_target, source_file, saveto, k=5, normalize=False, n_process=5, chr_level=False): # load model model_options with open('%s.pkl' % model, 'rb') as f: options = DefaultOptions.copy() options.update(pkl.load(f)) print 'Options:' pprint(options) word_dict, word_idict, word_idict_trg = load_translate_data( dictionary, dictionary_target, source_file, batch_mode=False, chr_level=chr_level, load_input=False, echo=False, ) # create input and output queues for processes queue = Queue() rqueue = Queue() processes = [None] * n_process for midx in xrange(n_process): processes[midx] = Process(target=translate_model, args=(queue, rqueue, midx, model, options, k, normalize)) processes[midx].start() # utility function def _send_jobs(fname): with open(fname, 'r') as f: for idx, line in enumerate(f): if chr_level: words = list(line.decode('utf-8').strip()) else: words = line.strip().split() x = map(lambda w: word_dict[w] if w in word_dict else 1, words) x = map(lambda ii: ii if ii < options['n_words_src'] else 1, x) x += [0] queue.put((idx, x)) return idx + 1 def _finish_processes(): for midx in xrange(n_process): queue.put(None) def _retrieve_jobs(n_samples): trans = [None] * n_samples for idx in xrange(n_samples): resp = rqueue.get() trans[resp[0]] = resp[1] if np.mod(idx, 10) == 0: print 'Sample ', (idx + 1), '/', n_samples, ' Done' return trans print 'Translating ', source_file, '...' n_samples = _send_jobs(source_file) trans = seqs2words(_retrieve_jobs(n_samples), word_idict_trg) _finish_processes() with open(saveto, 'w') as f: print >> f, '\n'.join(trans) print 'Done'
def build_regression(args, top_options): """The main function to build the regression. :param args: Options from the argument parser :param top_options: Options from top-level (like options in train_nmt.py) """ # Initialize and load options. old_options = DefaultOptions.copy() old_options.update(top_options) load_options(old_options) # Initialize options of new model. new_options = old_options.copy() new_options['n_encoder_layers'] = args.n_encoder_layers new_options['n_decoder_layers'] = args.n_decoder_layers new_options['encoder_many_bidirectional'] = args.connection_type == 1 new_options['unit'] = args.unit new_options['attention_layer_id'] = args.attention_layer_id new_options['residual_enc'] = args.residual_enc new_options['residual_dec'] = args.residual_dec new_options['use_zigzag'] = args.use_zigzag only_encoder = new_options['n_decoder_layers'] == old_options['n_decoder_layers'] # Old and new models. old_model = NMTModel(old_options) new_model = NMTModel(new_options) print('Loading data...', end='') text_iterator = TextIterator( old_options['datasets'][0], old_options['datasets'][1], old_options['vocab_filenames'][0], old_options['vocab_filenames'][1], old_options['batch_size'], old_options['maxlen'], old_options['n_words_src'], old_options['n_words'], ) small_train_iterator = TextIterator( old_options['small_train_datasets'][0], old_options['small_train_datasets'][1], old_options['vocab_filenames'][0], old_options['vocab_filenames'][1], old_options['batch_size'], old_options['maxlen'], old_options['n_words_src'], old_options['n_words'], ) valid_text_iterator = TextIterator( old_options['valid_datasets'][0], old_options['valid_datasets'][1], old_options['vocab_filenames'][0], old_options['vocab_filenames'][1], old_options['valid_batch_size'], old_options['maxlen'], old_options['n_words_src'], old_options['n_words'], ) print('Done') if only_encoder: f_initialize = ParameterInitializer.init_input_to_context else: f_initialize = ParameterInitializer.init_input_to_decoder_context # Initialize and reload model parameters. print('Initializing and reloading model parameters...', end='') f_initialize(old_model.initializer, old_model.P, reload_=True) f_initialize( new_model.initializer, new_model.P, reload_=args.warm_start_file is not None, preload=args.warm_start_file, ) print('Done') # Build model. if only_encoder: print('Building model...', end='') input_, context_old, _ = old_model.input_to_context() x, x_mask, y, y_mask = input_ _, context_new, _ = new_model.input_to_context(input_) print('Done') # Build output and MSE loss. f_context_old, f_context_new, loss, f_loss = build_loss(x, x_mask, context_old, context_new, args) else: print('Building model...', end='') input_, hidden_decoder_old, context_decoder_old = old_model.input_to_decoder_context() x, x_mask, y, y_mask = input_ _, hidden_decoder_new, context_decoder_new = new_model.input_to_decoder_context(input_) print('Done') f_context_old, f_context_new, loss, f_loss = build_decoder_loss( x, x_mask, y, y_mask, hidden_decoder_old, hidden_decoder_new, context_decoder_old, context_decoder_new, args) # Compute gradient. print('Computing gradient...', end='') trainable_parameters = new_model.P.copy() if args.fix_embedding: print('Fix word embedding!') del trainable_parameters['Wemb'] if not only_encoder: del trainable_parameters['Wemb_dec'] # Build L2 regularization. l2_regularization(loss, trainable_parameters, args.decay_c) grads = T.grad(loss, wrt=itemlist(trainable_parameters)) # Apply gradient clipping. _, g2 = apply_gradient_clipping(args.clip_c, grads) print('Done') # Build optimizer. inputs = [x, x_mask] if only_encoder else [x, x_mask, y, y_mask] print('Building optimizers...', end='') lr = T.scalar(name='lr') f_grad_shared, f_update, _ = Optimizers[args.regression_optimizer]( lr, trainable_parameters, grads, inputs, loss, g2=g2) print('Done') print('Optimization') start_time = time.time() iteration = 0 estop = False if args.dump_before_train: print('Dumping before train...', end='') new_model.save_whole_model(args.model_file, iteration) print('Done') # Validate before train new_model.save_whole_model(args.model_file, iteration=-1) best_val_cost = validate(valid_text_iterator, small_train_iterator, f_loss, only_encoder, top_options['maxlen']) learning_rate = args.learning_rate for epoch in xrange(args.max_epoch): n_samples = 0 for i, (x, y) in enumerate(text_iterator): n_samples += len(x) iteration += 1 x, x_mask, y, y_mask = prepare_data(x, y, maxlen=top_options['maxlen']) if x is None: print('Minibatch with zero sample under length ', top_options['maxlen']) iteration -= 1 continue inputs = [x, x_mask] if only_encoder else [x, x_mask, y, y_mask] if args.debug: print('Cost before train: {}'.format(f_loss(*inputs))) # Train! cost, g2_value = f_grad_shared(*inputs) f_update(learning_rate) if args.debug: print('Cost after train: {}'.format(f_loss(*inputs))) if np.isnan(cost) or np.isinf(cost): print('NaN detected') learning_rate *= 0.5 print('Discount learning rate to {}'.format(learning_rate)) print('Reloading best model {}...'.format(args.model_file), end='') new_model.load_whole_model(args.model_file, iteration=-1) print('Done') print('Training restart') continue # verbose if np.mod(iteration, args.disp_freq) == 0: print('Epoch {} Update {} Cost {:.6f} Time {:.6f}min'.format( epoch, iteration, float(cost), (time.time() - start_time) / 60.0, )) if True: print('G2 value: {:.6f}'.format(float(g2_value))) sys.stdout.flush() if args.save_freq > 0 and np.mod(iteration, args.save_freq) == 0: new_model.save_whole_model(args.model_file, iteration) if np.mod(iteration, args.valid_freq) == 0: curr_val_cost = validate(valid_text_iterator, small_train_iterator, f_loss, only_encoder, top_options['maxlen']) if curr_val_cost < best_val_cost: best_val_cost = curr_val_cost new_model.save_whole_model(args.model_file, iteration=-1) if args.debug and args.dump_hidden is not None: print('Dumping input and hidden state to {}...'.format(args.dump_hidden), end='') np.savez( args.dump_hidden, x=x, x_mask=x_mask, y=y, y_mask=y_mask, hidden_old=f_context_old(*inputs), hidden_new=f_context_new(*inputs), ) print('Done') if args.discount_lr_freq > 0 and np.mod(iteration, args.discount_lr_freq) == 0: learning_rate *= 0.5 print('Discount learning rate to {}'.format(learning_rate)) # finish after this many updates if iteration >= args.finish_after: print ('Finishing after {} iterations!'.format(iteration)) estop = True break print('Seen {} samples'.format(n_samples)) if estop: break return 0.
def main(model, dictionary, dictionary_target, source_file, saveto, k=5, normalize=False, chr_level=False, batch_size=-1, args=None): batch_mode = batch_size > 0 # load model model_options option_file = '%s.pkl' % model if not os.path.exists(option_file): m = re.search("iter(\d+)\.npz", model) if m: uidx = int(m.group((1))) option_file = '%s.iter%d.npz.pkl' % (os.path.splitext(model)[0], uidx) assert os.path.exists(option_file) with open(option_file, 'rb') as f: options = DefaultOptions.copy() options.update(pkl.load(f)) if 'fix_dp_bug' not in options: options['fix_dp_bug'] = False print 'Options:' pprint(options) from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams trng = RandomStreams(1234) use_noise = theano.shared(np.float32(0.)) model_type = 'NMTModel' if args.trg_attention: model_type = 'TrgAttnNMTModel' model, _ = build_and_init_model(model, options=options, build=False, model_type=model_type) f_init, f_next = model.build_sampler(trng=trng, use_noise=use_noise, batch_mode=batch_mode, dropout=options['use_dropout']) if not batch_mode: word_dict, word_idict, word_idict_trg, input_ = load_translate_data( dictionary, dictionary_target, source_file, batch_mode=False, chr_level=chr_level, options=options, ) print 'Translating ', source_file, '...' trans = seqs2words( translate(input_, model, f_init, f_next, trng, k, normalize), word_idict_trg, ) else: word_dict, word_idict, word_idict_trg, all_src_blocks, m_block = load_translate_data( dictionary, dictionary_target, source_file, batch_mode=True, chr_level=chr_level, n_words_src=options['n_words_src'], batch_size=batch_size, ) print 'Translating ', source_file, '...' all_sample = [] for bidx, seqs in enumerate(all_src_blocks): all_sample.extend( translate_block(seqs, model, f_init, f_next, trng, k)) print bidx, '/', m_block, 'Done' trans = seqs2words(all_sample, word_idict_trg) with open(saveto, 'w') as f: print >> f, '\n'.join(trans) print 'Done'
parser.add_argument('model_prefix', help='model file prefix.') parser.add_argument('start', type=int, help='start iteration number.') parser.add_argument('end', type=int, help='end iteration number') parser.add_argument('gap', type=int, default=10000, help='the gap between each saved model.') args = parser.parse_args() num_of_model = args.end - args.start + 1 option_file = '%s.iter%d.npz.pkl' % (os.path.splitext( args.model_prefix)[0], args.start * args.gap) with open(option_file, 'rb') as f: options = DefaultOptions.copy() options.update(pkl.load(f)) if 'fix_dp_bug' not in options: options['fix_dp_bug'] = False # pprint(options) model = NMTModel(options) from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams trng = RandomStreams(1234) use_noise = theano.shared(np.float32(0.)) print('initialize the model.') params = model.initializer.init_params()