def get_config_EsFr2En_single(): cgs = ['es_en', 'fr_en'] enc_ids, dec_ids = get_enc_dec_ids_mSrc(cgs) # Model related config = prototype_config_mSrc(cgs) config['saveto'] = 'esfr2en_single' config['batch_sizes'] = get_odict(cgs, 80) # Convenience basedirectory basedir = 'dl4mt-multi-src/data' # Vocabulary/dataset related config['src_vocabs'] = get_paths(enc_ids, paths.src_vocabs, basedir) config['trg_vocabs'] = get_paths(dec_ids, paths.trg_vocabs, basedir) config['src_vocab_sizes'] = get_odict_pair(enc_ids, [20624, 20335]) config['trg_vocab_sizes'] = get_odict(dec_ids, 20212) # Dataset related config['src_datas'] = get_paths(cgs, paths.src_datas, basedir) config['trg_datas'] = get_paths(cgs, paths.trg_datas, basedir) # Early stopping based on bleu related config['save_freq'] = 5000 config['val_burn_in'] = 1 # Validation set for log probs related config['log_prob_sets'] = get_paths(cgs, paths.log_prob_sets, basedir) return ReadOnlyDict(config)
def get_config_multiWay(): cgs = ['fi_en', 'de_en', 'en_de'] enc_ids, dec_ids = get_enc_dec_ids(cgs) # Model related config = prototype_config_multiCG_08(cgs) config['saveto'] = 'multiWay' # Vocabulary/dataset related basedir = '' config['src_vocabs'] = get_paths(enc_ids, src_vocabs, basedir) config['trg_vocabs'] = get_paths(dec_ids, trg_vocabs, basedir) config['src_datas'] = get_paths(cgs, src_datas, basedir) config['trg_datas'] = get_paths(cgs, trg_datas, basedir) # Early stopping based on bleu related config['save_freq'] = 5000 config['bleu_script'] = basedir + '/multi-bleu.perl' config['val_sets'] = get_paths(cgs, val_sets_src, basedir) config['val_set_grndtruths'] = get_paths(cgs, val_sets_ref, basedir) config['val_set_outs'] = get_val_set_outs(config['cgs'], config['saveto']) config['val_burn_in'] = 1 # Validation set for log probs related config['log_prob_sets'] = get_paths(cgs, log_prob_sets, basedir) return ReadOnlyDict(config)
def get_config_single(): cgs = ['de_en'] config = prototype_config_multiCG_08(cgs) enc_ids, dec_ids = get_enc_dec_ids(cgs) config['saveto'] = 'single' basedir = '' config['batch_sizes'] = OrderedDict([('de_en', 80)]) config['schedule'] = OrderedDict([('de_en', 12)]) config['src_vocabs'] = get_paths(enc_ids, src_vocabs, basedir) config['trg_vocabs'] = get_paths(dec_ids, trg_vocabs, basedir) config['src_datas'] = get_paths(cgs, src_datas, basedir) config['trg_datas'] = get_paths(cgs, trg_datas, basedir) config['save_freq'] = 5000 config['val_burn_in'] = 60000 config['bleu_script'] = basedir + '/multi-bleu.perl' config['val_sets'] = get_paths(cgs, val_sets_src, basedir) config['val_set_grndtruths'] = get_paths(cgs, val_sets_ref, basedir) config['val_set_outs'] = get_val_set_outs(config['cgs'], config['saveto']) config['log_prob_sets'] = get_paths(cgs, log_prob_sets, basedir) return ReadOnlyDict(config)