"--tuning-algorithm", dest="alg", default='mert', help="Tuning Algorithm (mert|pro|wpro)") parser.add_argument("-w", "--instance-weights", dest="instance_weights_path", help="Instance weights for wpro algorithm") parser.add_argument("-s", "--predictable-seed", dest="pred_seed", action='store_true', help="Tune with predictable seed to avoid randomness") args = parser.parse_args() U.set_theano_device(args.device) from dlm.reranker import augmenter from dlm.reranker import mosesIniReader as iniReader if os.environ.has_key('MOSES_ROOT'): moses_root = os.environ['MOSES_ROOT'] else: L.error("Set MOSES_ROOT variable to your moses root directory") U.mkdir_p(args.out_dir) #cmd = moses_root + '/bin/moses -show-weights -f ' + args.input_config + ' 2> /dev/null' #features = U.capture(cmd).strip().split('\n') features = iniReader.parseIni(args.input_config)
f.write("\t"+str(val)) f.write("\n") def write_biases(f, biases): for bias in biases: f.write(str(bias) + "\n") # Arguments for this script parser = argparse.ArgumentParser() parser.add_argument("-m", "--corelm-model", dest="corelm_model", required=True, help="The input NPLM model file") parser.add_argument("-v", "--vocab-file", dest="vocab_path", required=True, help="The input vocabulary") parser.add_argument("-dir", "--directory", dest="out_dir", help="The output directory for log file, model, etc.") args = parser.parse_args() U.set_theano_device('cpu',1) from dlm.models.mlp import MLP if args.out_dir is None: args.out_dir = 'corelm_convert-' + U.curr_time() U.mkdir_p(args.out_dir) # Loading CoreLM model and creating classifier class L.info("Loading CoreLM model") classifier = MLP(model_path=args.corelm_model) args_nn = classifier.args params_nn = classifier.params U.xassert(len(params_nn)==7, "CoreLM model is not compatible with NPLM architecture. 2 hidden layers and an output linear layer is required.") embeddings = params_nn[0].get_value() W1 = params_nn[1].get_value()
parser.add_argument("-i", "--input-nbest", dest="input_nbest", required=True, help="Input n-best file") parser.add_argument("-v", "--vocab-file", dest="vocab_path", required=True, help="The vocabulary file that was used in training") parser.add_argument("-m", "--model-file", dest="model_path", required=True, help="Input CoreLM model file") parser.add_argument("-r", "--reference-files", dest="ref_paths", required=True, help="A comma-seperated list of reference files") parser.add_argument("-c", "--config", dest="input_config", required=True, help="Input moses config (ini) file") parser.add_argument("-o", "--output-dir", dest="out_dir", required=True, help="Output directory") parser.add_argument("-d", "--device", dest="device", default="gpu", help="The computing device (cpu or gpu)") parser.add_argument("-t", "--threads", dest="threads", default = 14, type=int, help="Number of MERT threads") parser.add_argument("-iv", "--init-value", dest="init_value", default = '0.05', help="The initial value of the feature") parser.add_argument("-n", "--no-aug", dest="no_aug", action='store_true', help="Augmentation will be skipped, if this flag is set") parser.add_argument("-a", "--tuning-algorithm", dest="alg", default = 'mert', help="Tuning Algorithm (mert|pro|wpro)") parser.add_argument("-w", "--instance-weights", dest="instance_weights_path", help="Instance weights for wpro algorithm") parser.add_argument("-s", "--predictable-seed", dest="pred_seed", action='store_true', help="Tune with predictable seed to avoid randomness") args = parser.parse_args() U.set_theano_device(args.device) from dlm.reranker import augmenter from dlm.reranker import mosesIniReader as iniReader if os.environ.has_key('MOSES_ROOT'): moses_root = os.environ['MOSES_ROOT'] else: L.error("Set MOSES_ROOT variable to your moses root directory") U.mkdir_p(args.out_dir) #cmd = moses_root + '/bin/moses -show-weights -f ' + args.input_config + ' 2> /dev/null' #features = U.capture(cmd).strip().split('\n') features = iniReader.parseIni(args.input_config)
import dlm.io.logging as L def convert_type(param): return np.float32(param) # Arguments for this script parser = argparse.ArgumentParser() parser.add_argument("-m", "--nplm-model", dest="nplm_model", required=True, help="The input NPLM model file") parser.add_argument("-dir", "--directory", dest="out_dir", help="The output directory for log file, model, etc.") args = parser.parse_args() U.set_theano_device('cpu',1) from dlm.models.mlp import MLP if args.out_dir is None: args.out_dir = 'nplm_convert-' + U.curr_time() U.mkdir_p(args.out_dir) # Reading the NPLM Model args_nn = argparse.Namespace() model_dict = dict() lines = [] req_attribs = ['\config','\\vocab', '\input_vocab', '\output_vocab', '\input_embeddings', '\hidden_weights 1', '\hidden_biases 1', '\hidden_weights 2', '\hidden_biases 2', '\output_weights', '\output_biases','\end'] attrib = ''
L.set_file_path(os.path.abspath(args.out_dir) + "/log.txt") L.info('Command: ' + ' '.join(sys.argv)) curr_version = U.curr_version() if curr_version: L.info("Version: " + curr_version) if args.emb_path: U.xassert(args.vocab, 'When --emb-path is used, vocab file must be given too (using --vocab).') if args.loss_function == "nll": args.num_noise_samples = 0 U.print_args(args) U.set_theano_device(args.device, args.threads) import dlm.trainer from dlm.io.mmapReader import MemMapReader from dlm.models.mlp import MLP ######################### ## Loading datasets # trainset = MemMapReader(args.trainset, batch_size=args.batchsize, instance_weights_path=args.instance_weights_path) devset = MemMapReader(args.devset) testset = None if args.testset: testset = MemMapReader(args.testset)
curr_version = U.curr_version() if curr_version: L.info("Version: " + curr_version) if args.emb_path: U.xassert( args.vocab, 'When --emb-path is used, vocab file must be given too (using --vocab).' ) if args.loss_function == "nll": args.num_noise_samples = 0 U.print_args(args) U.set_theano_device(args.device, args.threads) import dlm.trainer from dlm.io.mmapReader import MemMapReader from dlm.io.featuresmmapReader import FeaturesMemMapReader from dlm.models.mlp import MLP ######################### ## Loading datasets # if args.feature_emb_dim is None: trainset = FeaturesMemMapReader( args.trainset, batch_size=args.batchsize, instance_weights_path=args.instance_weights_path)