optparser.add_option('--n-hidden', None, dest='n_hidden', default=3, help='num hidden nodes') optparser.add_option('--l1-reg', None, dest='l1_reg', default=0, help='L1 regression weight') optparser.add_option('--l2-reg', None, dest='l2_reg', default=0, help='L2 regression weight') optparser.add_option('--batch-size', None, dest='batch_size', default=100, help='mini batch size') optparser.add_option('--epochs', None, dest='n_epochs', default=10000, help='epochs to run') optparser.add_option('--learning-rate', None, dest='learning_rate', default=0.01, help='weight update learning rate') optparser.add_option('--seed', None, dest='seed', default=92374652, help='rng seed') optparser.add_option('--weights-file', None, dest='weights_file', default=None, help='pickle model params from previous run') optparser.add_option('--print-internal-vars', None, action="store_true", dest='print_internal_vars', default=False, help='whether to wrap selected symbolic nodes in a theano.print') optparser.add_option('--dump-hidden-weights', None, action="store_true", dest='dump_hidden', default=False, help='whether to write projection/hidden/softmax layer weights out to a file') options, arguments = optparser.parse_args() print "options", options # load data from load_data import load_trigram_data vocab_size, input_feature_names, feature_input_indexes, datasets = load_trigram_data(options.trigrams) print "vocab_size", vocab_size print "input_feature_names", input_feature_names print "feature_input_indexes", feature_input_indexes print "datasets", datasets # run run_nplm(datasets=datasets, n_in=2, # use bigram to pick next token vocab_size=vocab_size, projection_dim=int(options.n_projection), n_hidden=int(options.n_hidden), input_feature_names=input_feature_names, feature_input_indexes=feature_input_indexes, L1_reg=float(options.l1_reg), L2_reg=float(options.l2_reg), batch_size=int(options.batch_size), n_epochs=int(options.n_epochs), learning_rate=float(options.learning_rate),
# create output dir if requested (and write options to a file) if opts.output_dir is not None: if not os.path.exists(opts.output_dir): os.makedirs(opts.output_dir) with open(opts.output_dir + "/options.json", 'w') as f: f.write(str(opts)) # create some timing gates for dumping checkpoints and costs checkpointer_gate = FrequencyGate(opts.checkpoint_freq) if opts.checkpoint_freq else None dump_cost_gate = FrequencyGate(opts.cost_progress_freq) # slurp in training data, converting from "C A B" to idx "0 1 2" and storing in idxs # idxs => (w1, w2, w3) for lr; (w1, w2) for sm # label y => 1.0 or 0.0 for lr; w3 for sm idxs, y, token_idx = load_trigram_data(opts.trigrams_file, opts.mode) if opts.output_dir is not None: token_idx.write_to_file(opts.output_dir + "/vocab.tsv") VOCAB_SIZE = token_idx.seq # decide batching sizes BATCH_SIZE = opts.batch_size NUM_BATCHES = int(math.ceil(float(len(idxs)) / BATCH_SIZE)) print("#egs", len(idxs), "batch_size", BATCH_SIZE, "=> num_batches", NUM_BATCHES, file=sys.stderr) EPOCHS = opts.epochs LAMBDA1, LAMBDA2 = opts.lambda1, opts.lambda2 NUM_EMBEDDING_NODES = 3 if opts.mode=='lr' else 2 NUM_OUTPUT_NODES = 1 if opts.mode=='lr' else VOCAB_SIZE # decide if we're going to dump cost progress
# create output dir if requested (and write options to a file) if opts.output_dir is not None: if not os.path.exists(opts.output_dir): os.makedirs(opts.output_dir) with open(opts.output_dir + "/options.json", 'w') as f: f.write(str(opts)) # create some timing gates for dumping checkpoints and costs checkpointer_gate = FrequencyGate( opts.checkpoint_freq) if opts.checkpoint_freq else None dump_cost_gate = FrequencyGate(opts.cost_progress_freq) # slurp in training data, converting from "C A B" to idx "0 1 2" and storing in idxs # idxs => (w1, w2, w3) for lr; (w1, w2) for sm # label y => 1.0 or 0.0 for lr; w3 for sm idxs, y, token_idx = load_trigram_data(opts.trigrams_file, opts.mode) if opts.output_dir is not None: token_idx.write_to_file(opts.output_dir + "/vocab.tsv") VOCAB_SIZE = token_idx.seq # decide batching sizes BATCH_SIZE = opts.batch_size NUM_BATCHES = int(math.ceil(float(len(idxs)) / BATCH_SIZE)) print("#egs", len(idxs), "batch_size", BATCH_SIZE, "=> num_batches", NUM_BATCHES, file=sys.stderr) EPOCHS = opts.epochs