def score_beams_prog(beams, target, inp, history, print_out=False, test_mode=False): """Score beams for program synthesis.""" tgt_prog = linearize(target, program_utils.prog_vocab, True, 1) hist_progs = [linearize(h, program_utils.prog_vocab, True, 1) for h in history] tgt_set = set(target) if print_out: print("target: ", tgt_prog) inps, tgt_outs = [], [] for i in xrange(3): ilist = [inp[i + 1, l] for l in xrange(inp.shape[1])] clist = [program_utils.prog_vocab[x] for x in ilist if x > 0] olist = clist[clist.index("]") + 1:] # outputs clist = clist[1:clist.index("]")] # inputs inps.append([int(x) for x in clist]) if olist[0] == "[": # olist may be [int] or just int tgt_outs.append(str([int(x) for x in olist[1:-1]])) else: if len(olist) == 1: tgt_outs.append(olist[0]) else: print([program_utils.prog_vocab[x] for x in ilist if x > 0]) print(olist) print(tgt_prog) print(program_utils.evaluate(tgt_prog, {"a": inps[-1]})) print("AAAAA") tgt_outs.append(olist[0]) if not test_mode: for _ in xrange(7): ilen = np.random.randint(len(target) - 3) + 1 inps.append([random.choice(range(-15, 15)) for _ in range(ilen)]) tgt_outs.extend([program_utils.evaluate(tgt_prog, {"a": inp}) for inp in inps[3:]]) best, best_prog, best_score = None, "", -1000.0 for beam in beams: b_prog = linearize(beam, program_utils.prog_vocab, True, 1) b_set = set(beam) jsim = len(tgt_set & b_set) / float(len(tgt_set | b_set)) b_outs = [program_utils.evaluate(b_prog, {"a": inp}) for inp in inps] errs = len([x for x in b_outs if x == "ERROR"]) imatches = len([i for i in xrange(3) if b_outs[i] == tgt_outs[i]]) perfect = 10.0 if imatches == 3 else 0.0 hist_score = 20.0 if b_prog in hist_progs else 0.0 if test_mode: score = perfect - errs else: matches = len([i for i in xrange(10) if b_outs[i] == tgt_outs[i]]) score = perfect + matches + jsim - errs if score < 10.0: score -= hist_score # print b_prog # print "jsim: ", jsim, " errs: ", errs, " mtchs: ", matches, " s: ", score if score > best_score: best = beam best_prog = b_prog best_score = score if print_out: print("best score: ", best_score, " best prog: ", best_prog) return best, best_score
def prog_io_pair(prog, max_len, counter=0): try: ilen = np.random.randint(max_len - 3) + 1 bound = max(15 - (counter / 20), 1) inp = [random.choice(range(-bound, bound)) for _ in range(ilen)] inp_toks = [ program_utils.prog_rev_vocab[t] for t in program_utils.tokenize(str(inp)) if t != "," ] out = program_utils.evaluate(prog, {"a": inp}) out_toks = [ program_utils.prog_rev_vocab[t] for t in program_utils.tokenize(str(out)) if t != "," ] if counter > 400: out_toks = [] if (out_toks and out_toks[0] == program_utils.prog_rev_vocab["["] and len(out_toks) != len([o for o in out if o == ","]) + 3): raise ValueError("generated list with too long ints") if (out_toks and out_toks[0] != program_utils.prog_rev_vocab["["] and len(out_toks) > 1): raise ValueError("generated one int but tokenized it to many") if len(out_toks) > max_len: raise ValueError("output too long") return (inp_toks, out_toks) except ValueError: return prog_io_pair(prog, max_len, counter + 1)
def prog_io_pair(prog, max_len, counter=0): try: ilen = np.random.randint(max_len - 3) + 1 bound = max(15 - (counter / 20), 1) inp = [random.choice(range(-bound, bound)) for _ in range(ilen)] inp_toks = [program_utils.prog_rev_vocab[t] for t in program_utils.tokenize(str(inp)) if t != ","] out = program_utils.evaluate(prog, {"a": inp}) out_toks = [program_utils.prog_rev_vocab[t] for t in program_utils.tokenize(str(out)) if t != ","] if counter > 400: out_toks = [] if (out_toks and out_toks[0] == program_utils.prog_rev_vocab["["] and len(out_toks) != len([o for o in out if o == ","]) + 3): raise ValueError("generated list with too long ints") if (out_toks and out_toks[0] != program_utils.prog_rev_vocab["["] and len(out_toks) > 1): raise ValueError("generated one int but tokenized it to many") if len(out_toks) > max_len: raise ValueError("output too long") return (inp_toks, out_toks) except ValueError: return prog_io_pair(prog, max_len, counter+1)
def score_beams_prog(beams, target, inp, history, print_out=False, test_mode=False): """Score beams for program synthesis.""" tgt_prog = linearize(target, program_utils.prog_vocab, True, 1) hist_progs = [linearize(h, program_utils.prog_vocab, True, 1) for h in history] tgt_set = set(target) if print_out: print "target: ", tgt_prog inps, tgt_outs = [], [] for i in xrange(3): ilist = [inp[i + 1, l] for l in xrange(inp.shape[1])] clist = [program_utils.prog_vocab[x] for x in ilist if x > 0] olist = clist[clist.index("]") + 1:] # outputs clist = clist[1:clist.index("]")] # inputs inps.append([int(x) for x in clist]) if olist[0] == "[": # olist may be [int] or just int tgt_outs.append(str([int(x) for x in olist[1:-1]])) else: if len(olist) == 1: tgt_outs.append(olist[0]) else: print [program_utils.prog_vocab[x] for x in ilist if x > 0] print olist print tgt_prog print program_utils.evaluate(tgt_prog, {"a": inps[-1]}) print "AAAAA" tgt_outs.append(olist[0]) if not test_mode: for _ in xrange(7): ilen = np.random.randint(len(target) - 3) + 1 inps.append([random.choice(range(-15, 15)) for _ in range(ilen)]) tgt_outs.extend([program_utils.evaluate(tgt_prog, {"a": inp}) for inp in inps[3:]]) best, best_prog, best_score = None, "", -1000.0 for beam in beams: b_prog = linearize(beam, program_utils.prog_vocab, True, 1) b_set = set(beam) jsim = len(tgt_set & b_set) / float(len(tgt_set | b_set)) b_outs = [program_utils.evaluate(b_prog, {"a": inp}) for inp in inps] errs = len([x for x in b_outs if x == "ERROR"]) imatches = len([i for i in xrange(3) if b_outs[i] == tgt_outs[i]]) perfect = 10.0 if imatches == 3 else 0.0 hist_score = 20.0 if b_prog in hist_progs else 0.0 if test_mode: score = perfect - errs else: matches = len([i for i in xrange(10) if b_outs[i] == tgt_outs[i]]) score = perfect + matches + jsim - errs if score < 10.0: score -= hist_score # print b_prog # print "jsim: ", jsim, " errs: ", errs, " mtchs: ", matches, " s: ", score if score > best_score: best = beam best_prog = b_prog best_score = score if print_out: print "best score: ", best_score, " best prog: ", best_prog return best, best_score def get_best_beam(beam_model, sess, inp, target, batch_size, beam_size, bucket, history, p, test_mode=False): """Run beam_model, score beams, and return the best as target and in input.""" _, output_logits, _, _ = beam_model.step( sess, inp, target, None, beam_size=FLAGS.beam_size) new_targets, new_firsts, scores, new_inp = [], [], [], np.copy(inp) for b in xrange(batch_size): outputs = [] history_b = [[h[b, 0, l] for l in xrange(data.bins[bucket])] for h in history] for beam_idx in xrange(beam_size): outputs.append([int(o[beam_idx * batch_size + b]) for o in output_logits]) target_t = [target[b, 0, l] for l in xrange(data.bins[bucket])] best, best_score = score_beams( outputs, [t for t in target_t if t > 0], inp[b, :, :], [[t for t in h if t > 0] for h in history_b], p, test_mode=test_mode) scores.append(best_score) if 1 in best: # Only until _EOS. best = best[:best.index(1) + 1] best += [0 for _ in xrange(len(target_t) - len(best))] new_targets.append([best]) first, _ = score_beams( outputs, [t for t in target_t if t > 0], inp[b, :, :], [[t for t in h if t > 0] for h in history_b], p, test_mode=True) if 1 in first: # Only until _EOS. first = first[:first.index(1) + 1] first += [0 for _ in xrange(len(target_t) - len(first))] new_inp[b, 0, :] = np.array(first, dtype=np.int32) new_firsts.append([first]) # Change target if we found a great answer. new_target = np.array(new_targets, dtype=np.int32) for b in xrange(batch_size): if scores[b] >= 10.0: target[b, 0, :] = new_target[b, 0, :] new_first = np.array(new_firsts, dtype=np.int32) return new_target, new_first, new_inp, scores def train(): """Train the model.""" batch_size = FLAGS.batch_size * FLAGS.num_gpus (model, beam_model, min_length, max_length, checkpoint_dir, (train_set, dev_set, en_vocab_path, fr_vocab_path), sv, sess) = initialize() with sess.as_default(): quant_op = model.quantize_op max_cur_length = min(min_length + 3, max_length) prev_acc_perp = [1000000 for _ in xrange(5)] prev_seq_err = 1.0 is_chief = FLAGS.task < 1 do_report = False # Main traning loop. while not sv.ShouldStop(): global_step, max_cur_length, learning_rate = sess.run( [model.global_step, model.cur_length, model.lr]) acc_loss, acc_l1, acc_total, acc_errors, acc_seq_err = 0.0, 0.0, 0, 0, 0 acc_grad_norm, step_count, step_c1, step_time = 0.0, 0, 0, 0.0 # For words in the word vector file, set their embedding at start. bound1 = FLAGS.steps_per_checkpoint - 1 if FLAGS.word_vector_file_en and global_step < bound1 and is_chief: assign_vectors(FLAGS.word_vector_file_en, "embedding:0", en_vocab_path, sess) if FLAGS.max_target_vocab < 1: assign_vectors(FLAGS.word_vector_file_en, "target_embedding:0", en_vocab_path, sess) if FLAGS.word_vector_file_fr and global_step < bound1 and is_chief: assign_vectors(FLAGS.word_vector_file_fr, "embedding:0", fr_vocab_path, sess) if FLAGS.max_target_vocab < 1: assign_vectors(FLAGS.word_vector_file_fr, "target_embedding:0", fr_vocab_path, sess) for _ in xrange(FLAGS.steps_per_checkpoint): step_count += 1 step_c1 += 1 global_step = int(model.global_step.eval()) train_beam_anneal = global_step / float(FLAGS.train_beam_anneal) train_beam_freq = FLAGS.train_beam_freq * min(1.0, train_beam_anneal) p = random.choice(FLAGS.problem.split("-")) train_set = global_train_set[p][-1] bucket_id = get_bucket_id(train_buckets_scale[p][-1], max_cur_length, train_set) # Prefer longer stuff 60% of time if not wmt. if np.random.randint(100) < 60 and FLAGS.problem != "wmt": bucket1 = get_bucket_id(train_buckets_scale[p][-1], max_cur_length, train_set) bucket_id = max(bucket1, bucket_id) # Run a step and time it. start_time = time.time() inp, target = data.get_batch(bucket_id, batch_size, train_set, FLAGS.height) noise_param = math.sqrt(math.pow(global_step + 1, -0.55) * prev_seq_err) * FLAGS.grad_noise_scale # In multi-step mode, we use best from beam for middle steps. state, new_target, scores, history = None, None, None, [] while (FLAGS.beam_size > 1 and train_beam_freq > np.random.random_sample()): # Get the best beam (no training, just forward model). new_target, new_first, new_inp, scores = get_best_beam( beam_model, sess, inp, target, batch_size, FLAGS.beam_size, bucket_id, history, p) history.append(new_first) # Training step with the previous input and the best beam as target. _, _, _, state = model.step(sess, inp, new_target, FLAGS.do_train, noise_param, update_mem=True, state=state) # Change input to the new one for the next step. inp = new_inp # If all results are great, stop (todo: not to wait for all?). if FLAGS.nprint > 1: print scores if sum(scores) / float(len(scores)) >= 10.0: break # The final step with the true target. loss, res, gnorm, _ = model.step( sess, inp, target, FLAGS.do_train, noise_param, update_mem=True, state=state) step_time += time.time() - start_time acc_grad_norm += 0.0 if gnorm is None else float(gnorm) # Accumulate statistics. acc_loss += loss acc_l1 += loss errors, total, seq_err = data.accuracy( inp, res, target, batch_size, 0, new_target, scores) if FLAGS.nprint > 1: print "seq_err: ", seq_err acc_total += total acc_errors += errors acc_seq_err += seq_err # Report summary every 10 steps. if step_count + 3 > FLAGS.steps_per_checkpoint: do_report = True # Don't polute plot too early. if is_chief and step_count % 10 == 1 and do_report: cur_loss = acc_l1 / float(step_c1) acc_l1, step_c1 = 0.0, 0 cur_perp = data.safe_exp(cur_loss) summary = tf.Summary() summary.value.extend( [tf.Summary.Value(tag="log_perplexity", simple_value=cur_loss), tf.Summary.Value(tag="perplexity", simple_value=cur_perp)]) sv.SummaryComputed(sess, summary, global_step) # Normalize and print out accumulated statistics. acc_loss /= step_count step_time /= FLAGS.steps_per_checkpoint acc_seq_err = float(acc_seq_err) / (step_count * batch_size) prev_seq_err = max(0.0, acc_seq_err - 0.02) # No noise at error < 2%. acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0 t_size = float(sum([len(x) for x in train_set])) / float(1000000) msg = ("step %d step-time %.2f train-size %.3f lr %.6f grad-norm %.4f" % (global_step + 1, step_time, t_size, learning_rate, acc_grad_norm / FLAGS.steps_per_checkpoint)) data.print_out("%s len %d ppl %.6f errors %.2f sequence-errors %.2f" % (msg, max_cur_length, data.safe_exp(acc_loss), 100*acc_errors, 100*acc_seq_err)) # If errors are below the curriculum threshold, move curriculum forward. is_good = FLAGS.curriculum_ppx > data.safe_exp(acc_loss) is_good = is_good and FLAGS.curriculum_seq > acc_seq_err if is_good and is_chief: if FLAGS.quantize: # Quantize weights. data.print_out(" Quantizing parameters.") sess.run([quant_op]) # Increase current length (until the next with training data). sess.run(model.cur_length_incr_op) # Forget last perplexities if we're not yet at the end. if max_cur_length < max_length: prev_acc_perp.append(1000000) # Lower learning rate if we're worse than the last 5 checkpoints. acc_perp = data.safe_exp(acc_loss) if acc_perp > max(prev_acc_perp[-5:]) and is_chief: sess.run(model.lr_decay_op) prev_acc_perp.append(acc_perp) # Save checkpoint. if is_chief: checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) # Run evaluation. bin_bound = 4 for p in FLAGS.problem.split("-"): total_loss, total_err, tl_counter = 0.0, 0.0, 0 for bin_id in xrange(len(data.bins)): if bin_id < bin_bound or bin_id % FLAGS.eval_bin_print == 1: err, _, loss = single_test(bin_id, model, sess, FLAGS.nprint, batch_size * 4, dev_set, p, beam_model=beam_model) if loss > 0.0: total_loss += loss total_err += err tl_counter += 1 test_loss = total_loss / max(1, tl_counter) test_err = total_err / max(1, tl_counter) test_perp = data.safe_exp(test_loss) summary = tf.Summary() summary.value.extend( [tf.Summary.Value(tag="test/%s/loss" % p, simple_value=test_loss), tf.Summary.Value(tag="test/%s/error" % p, simple_value=test_err), tf.Summary.Value(tag="test/%s/perplexity" % p, simple_value=test_perp)]) sv.SummaryComputed(sess, summary, global_step) def linearize(output, rev_fr_vocab, simple_tokenizer=None, eos_id=wmt.EOS_ID): # If there is an EOS symbol in outputs, cut them at that point (WMT). if eos_id in output: output = output[:output.index(eos_id)] # Print out French sentence corresponding to outputs. if simple_tokenizer or FLAGS.simple_tokenizer: vlen = len(rev_fr_vocab) def vget(o): if o < vlen: return rev_fr_vocab[o] return "UNK" return " ".join([vget(o) for o in output]) else: return wmt.basic_detokenizer([rev_fr_vocab[o] for o in output]) def evaluate(): """Evaluate an existing model.""" batch_size = FLAGS.batch_size * FLAGS.num_gpus with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: (model, beam_model, _, _, _, (_, dev_set, en_vocab_path, fr_vocab_path), _, sess) = initialize(sess) for p in FLAGS.problem.split("-"): for bin_id in xrange(len(data.bins)): if (FLAGS.task >= 0 and bin_id > 4) or (FLAGS.nprint == 0 and bin_id > 8 and p == "wmt"): break single_test(bin_id, model, sess, FLAGS.nprint, batch_size, dev_set, p, beam_model=beam_model) path = FLAGS.test_file_prefix xid = "" if FLAGS.task < 0 else ("%.4d" % (FLAGS.task+FLAGS.decode_offset)) en_path, fr_path = path + ".en" + xid, path + ".fr" + xid # Evaluate the test file if they exist. if path and tf.gfile.Exists(en_path) and tf.gfile.Exists(fr_path): data.print_out("Translating test set %s" % en_path) # Read lines. en_lines, fr_lines = [], [] with tf.gfile.GFile(en_path, mode="r") as f: for line in f: en_lines.append(line.strip()) with tf.gfile.GFile(fr_path, mode="r") as f: for line in f: fr_lines.append(line.strip()) # Tokenize and convert to ids. en_vocab, _ = wmt.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = wmt.initialize_vocabulary(fr_vocab_path) if FLAGS.simple_tokenizer: en_ids = [wmt.sentence_to_token_ids( l, en_vocab, tokenizer=wmt.space_tokenizer, normalize_digits=FLAGS.normalize_digits) for l in en_lines] else: en_ids = [wmt.sentence_to_token_ids(l, en_vocab) for l in en_lines] # Translate. results = [] for idx, token_ids in enumerate(en_ids): if idx % 5 == 0: data.print_out("Translating example %d of %d." % (idx, len(en_ids))) # Which bucket does it belong to? buckets = [b for b in xrange(len(data.bins)) if data.bins[b] >= len(token_ids)] if buckets: result, result_cost = [], 100000000.0 for bucket_id in buckets: if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR: break # Get a 1-element batch to feed the sentence to the model. used_batch_size = 1 # batch_size inp, target = data.get_batch( bucket_id, used_batch_size, None, FLAGS.height, preset=([token_ids], [[]])) loss, output_logits, _, _ = model.step( sess, inp, target, None, beam_size=FLAGS.beam_size) outputs = [int(o[0]) for o in output_logits] loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm) if FLAGS.simple_tokenizer: cur_out = outputs if wmt.EOS_ID in cur_out: cur_out = cur_out[:cur_out.index(wmt.EOS_ID)] res_tags = [rev_fr_vocab[o] for o in cur_out] bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags) loss += 1000.0 * bad_words + 100.0 * bad_brack # print (bucket_id, loss) if loss < result_cost: result = outputs result_cost = loss final = linearize(result, rev_fr_vocab) results.append("%s\t%s\n" % (final, fr_lines[idx])) # print result_cost sys.stderr.write(results[-1]) sys.stderr.flush() else: sys.stderr.write("TOOO_LONG\t%s\n" % fr_lines[idx]) sys.stderr.flush() if xid: decode_suffix = "beam%dln%dn" % (FLAGS.beam_size, int(100 * FLAGS.length_norm)) with tf.gfile.GFile(path + ".res" + decode_suffix + xid, mode="w") as f: for line in results: f.write(line) def mul(l): res = 1.0 for s in l: res *= s return res def interactive(): """Interactively probe an existing model.""" with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Initialize model. (model, _, _, _, _, (_, _, en_path, fr_path), _, _) = initialize(sess) # Load vocabularies. en_vocab, rev_en_vocab = wmt.initialize_vocabulary(en_path) _, rev_fr_vocab = wmt.initialize_vocabulary(fr_path) # Print out vectors and variables. if FLAGS.nprint > 0 and FLAGS.word_vector_file_en: print_vectors("embedding:0", en_path, FLAGS.word_vector_file_en) if FLAGS.nprint > 0 and FLAGS.word_vector_file_fr: print_vectors("target_embedding:0", fr_path, FLAGS.word_vector_file_fr) total = 0 for v in tf.trainable_variables(): shape = v.get_shape().as_list() total += mul(shape) print (v.name, shape, mul(shape)) print total # Start interactive loop. sys.stdout.write("Input to Neural GPU Translation Model.\n") sys.stdout.write("> ") sys.stdout.flush() inpt = sys.stdin.readline(), "" while inpt: cures = [] # Get token-ids for the input sentence. if FLAGS.simple_tokenizer: token_ids = wmt.sentence_to_token_ids( inpt, en_vocab, tokenizer=wmt.space_tokenizer, normalize_digits=FLAGS.normalize_digits) else: token_ids = wmt.sentence_to_token_ids(inpt, en_vocab) print [rev_en_vocab[t] for t in token_ids] # Which bucket does it belong to? buckets = [b for b in xrange(len(data.bins)) if data.bins[b] >= max(len(token_ids), len(cures))] if cures: buckets = [buckets[0]] if buckets: result, result_cost = [], 10000000.0 for bucket_id in buckets: if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR: break glen = 1 for gen_idx in xrange(glen): # Get a 1-element batch to feed the sentence to the model. inp, target = data.get_batch( bucket_id, 1, None, FLAGS.height, preset=([token_ids], [cures])) loss, output_logits, _, _ = model.step( sess, inp, target, None, beam_size=FLAGS.beam_size, update_mem=False) # If it is a greedy decoder, outputs are argmaxes of output_logits. if FLAGS.beam_size > 1: outputs = [int(o) for o in output_logits] else: loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm) outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits] print [rev_fr_vocab[t] for t in outputs] print loss, data.bins[bucket_id] print linearize(outputs, rev_fr_vocab) cures.append(outputs[gen_idx]) print cures print linearize(cures, rev_fr_vocab) if FLAGS.simple_tokenizer: cur_out = outputs if wmt.EOS_ID in cur_out: cur_out = cur_out[:cur_out.index(wmt.EOS_ID)] res_tags = [rev_fr_vocab[o] for o in cur_out] bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags) loss += 1000.0 * bad_words + 100.0 * bad_brack if loss < result_cost: result = outputs result_cost = loss print ("FINAL", result_cost) print [rev_fr_vocab[t] for t in result]