def test_train_predict3(): ''' Test that a model trained on sequencees of one length can be used for predictions on other sequence lengths ''' import tempfile sp = SequencePattern("sorted", in_seq_len=10, out_seq_len=10) tempdir = tempfile.mkdtemp() ts2s = TFLearnSeq2Seq(sp, seq2seq_model="embedding_attention", data_dir=tempdir, name="attention") tf.reset_default_graph() ts2s.train(num_epochs=1, num_points=1000, weights_output_fn=1, weights_input_fn=0) assert os.path.exists(ts2s.weights_output_fn) tf.reset_default_graph() sp = SequencePattern("sorted", in_seq_len=20, out_seq_len=8) tf.reset_default_graph() ts2s = TFLearnSeq2Seq(sp, seq2seq_model="embedding_attention", data_dir="DATA", name="attention", verbose=1) x = np.random.randint(0, 9, 20) prediction, y = ts2s.predict(x, weights_input_fn=1) assert len(prediction == 8) os.system("rm -rf %s" % tempdir)
def test_sp2(): ''' Test two SequencePattern instance with lengths different from default ''' sp = SequencePattern("sorted", in_seq_len=20, out_seq_len=5) x = np.random.randint(0, 9, 20) y = sp.generate_output_sequence(x) assert len(y)==5 y_exp = sorted(x)[:5] assert all(y==y_exp)
def test_sp2(): ''' Test two SequencePattern instance with lengths different from default ''' sp = SequencePattern("sorted", in_seq_len=20, out_seq_len=5) x = np.random.randint(0, 9, 20) y = sp.generate_output_sequence(x) assert len(y) == 5 y_exp = sorted(x)[:5] assert all(y == y_exp)
def test_train_predict2(): ''' Test that the embedding_attention model works, with saving and loading of weights ''' import tempfile sp = SequencePattern() tempdir = tempfile.mkdtemp() ts2s = TFLearnSeq2Seq(sp, seq2seq_model="embedding_attention", data_dir=tempdir, name="attention") tf.reset_default_graph() ts2s.train(num_epochs=1, num_points=1000, weights_output_fn=1, weights_input_fn=0) assert os.path.exists(ts2s.weights_output_fn) tf.reset_default_graph() ts2s = TFLearnSeq2Seq(sp, seq2seq_model="embedding_attention", data_dir="DATA", name="attention", verbose=1) prediction, y = ts2s.predict(Xin=range(10), weights_input_fn=1) assert len(prediction == 10) os.system("rm -rf %s" % tempdir)
def test_sp1(): ''' Test two different SequencePattern instances ''' sp = SequencePattern("maxmin_dup") y = sp.generate_output_sequence(range(10)) assert all(y == np.array([9, 0, 2, 3, 4, 5, 6, 7, 8, 9])) sp = SequencePattern("sorted") y = sp.generate_output_sequence([5, 6, 1, 2, 9]) assert all(y == np.array([1, 2, 5, 6, 9])) sp = SequencePattern("reversed") y = sp.generate_output_sequence(range(10)) assert all(y == np.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]))
def test_predict1(): ''' Test simple preductions using weights just produced (in test_train1) ''' sp = SequencePattern() ts2s = TFLearnSeq2Seq(sp, verbose=1) wfn = "test_%s" % ts2s.canonical_weights_fn(0) print("using weights filename %s" % wfn) tf.reset_default_graph() prediction, y = ts2s.predict(Xin=range(10), weights_input_fn=wfn) assert len(prediction == 10)
def test_train1(): ''' Test simple training of an embedding_rnn seq2seq model ''' sp = SequencePattern() ts2s = TFLearnSeq2Seq(sp) ofn = "test_%s" % ts2s.canonical_weights_fn(0) print("using weights filename %s" % ofn) if os.path.exists(ofn): os.unlink(ofn) tf.reset_default_graph() ts2s.train(num_epochs=1, num_points=10000, weights_output_fn=ofn) assert os.path.exists(ofn)
def test_sp1(): ''' Test two different SequencePattern instances ''' sp = SequencePattern("maxmin_dup") y = sp.generate_output_sequence(range(10)) assert all(y==np.array([9, 0, 2, 3, 4, 5, 6, 7, 8, 9])) sp = SequencePattern("sorted") y = sp.generate_output_sequence([5,6,1,2,9]) assert all(y==np.array([1, 2, 5, 6, 9])) sp = SequencePattern("reversed") y = sp.generate_output_sequence(range(10)) assert all(y==np.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]))
def CommandLine(args=None, arglist=None): ''' Main command line. Accepts args, to allow for simple unit testing. ''' help_text = """ Commands: train - give size of training set to use, as argument predict - give input sequence as argument (or specify inputs via --from-file <filename>) """ parser = argparse.ArgumentParser( description=help_text, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("cmd", help="command") parser.add_argument("cmd_input", nargs='*', help="input to command") parser.add_argument( '-v', "--verbose", nargs=0, help="increase output verbosity (add more -v to increase versbosity)", action=VAction, dest='verbose') parser.add_argument( "-m", "--model", help= "seq2seq model name: either embedding_rnn (default) or embedding_attention", default=None) parser.add_argument("-r", "--learning-rate", type=float, help="learning rate (default 0.0001)", default=0.0001) parser.add_argument("-e", "--epochs", type=int, help="number of trainig epochs", default=10) parser.add_argument("-i", "--input-weights", type=str, help="tflearn file with network weights to load", default=None) parser.add_argument( "-o", "--output-weights", type=str, help="new tflearn file where network weights are to be saved", default=None) parser.add_argument("-p", "--pattern-name", type=str, help="name of pattern to use for sequence", default=None) parser.add_argument( "-n", "--name", type=str, help="name of model, used when generating default weights filenames", default=None) parser.add_argument("--in-len", type=int, help="input sequence length (default 10)", default=None) parser.add_argument("--out-len", type=int, help="output sequence length (default 10)", default=None) parser.add_argument( "--from-file", type=str, help="name of file to take input data sequences from (json format)", default=None) parser.add_argument( "--iter-num", type=int, help= "training iteration number; specify instead of input- or output-weights to use generated filenames", default=None) parser.add_argument( "--data-dir", help= "directory to use for storing checkpoints (also used when generating default weights filenames)", default=None) # model parameters parser.add_argument( "-L", "--num-layers", type=int, help="number of RNN layers to use in the model (default 1)", default=1) parser.add_argument("--cell-size", type=int, help="size of RNN cell to use (default 32)", default=32) parser.add_argument("--cell-type", type=str, help="type of RNN cell to use (default BasicLSTMCell)", default="BasicLSTMCell") parser.add_argument("--embedding-size", type=int, help="size of embedding to use (default 20)", default=20) parser.add_argument("--tensorboard-verbose", type=int, help="tensorboard verbosity level (default 0)", default=0) if not args: args = parser.parse_args(arglist) if args.iter_num is not None: args.input_weights = args.iter_num args.output_weights = args.iter_num + 1 model_params = dict( num_layers=args.num_layers, cell_size=args.cell_size, cell_type=args.cell_type, embedding_size=args.embedding_size, learning_rate=args.learning_rate, tensorboard_verbose=args.tensorboard_verbose, ) if args.cmd == "train": try: num_points = int(args.cmd_input[0]) except: raise Exception( "Please specify the number of datapoints to use for training, as the first argument" ) sp = SequencePattern(args.pattern_name, in_seq_len=args.in_len, out_seq_len=args.out_len) ts2s = TFLearnSeq2Seq(sp, seq2seq_model=args.model, data_dir=args.data_dir, name=args.name, verbose=args.verbose) ts2s.train(num_epochs=args.epochs, num_points=num_points, weights_output_fn=args.output_weights, weights_input_fn=args.input_weights, model_params=model_params) return ts2s elif args.cmd == "predict": if args.from_file: inputs = json.loads(args.from_file) try: input_x = list(map(int, args.cmd_input)) inputs = [input_x] except: raise Exception( "Please provide a space-delimited input sequence as the argument" ) sp = SequencePattern(args.pattern_name, in_seq_len=args.in_len, out_seq_len=args.out_len) ts2s = TFLearnSeq2Seq(sp, seq2seq_model=args.model, data_dir=args.data_dir, name=args.name, verbose=args.verbose) results = [] for x in inputs: prediction, y = ts2s.predict(x, weights_input_fn=args.input_weights, model_params=model_params) print("==> For input %s, prediction=%s (expected=%s)" % (x, prediction, sp.generate_output_sequence(x))) results.append([prediction, y]) ts2s.prediction_results = results return ts2s else: print("Unknown command %s" % args.cmd)
def CommandLine(args=None, arglist=None): ''' Main command line. Accepts args, to allow for simple unit testing. ''' help_text = """ Commands: train - give size of training set to use, as argument predict - give input sequence as argument (or specify inputs via --from-file <filename>) """ parser = argparse.ArgumentParser( description=help_text, formatter_class=argparse.RawTextHelpFormatter) #parser.add_argument("cmd", help="command") #parser.add_argument("cmd_input", nargs='*', help="input to command") #parser.add_argument('-v', "--verbose", nargs=0, help="increase output verbosity (add more -v to increase versbosity)", action=VAction, dest='verbose') #parser.add_argument("-m", "--model", help="seq2seq model name: either embedding_rnn (default) or embedding_attention", default=None) #parser.add_argument("-r", "--learning-rate", type=float, help="learning rate (default 0.0001)", default=0.0001) #parser.add_argument("-e", "--epochs", type=int, help="number of trainig epochs", default=10) #parser.add_argument("-i", "--input-weights", type=str, help="tflearn file with network weights to load", default=None) #parser.add_argument("-o", "--output-weights", type=str, help="new tflearn file where network weights are to be saved", default=None) #parser.add_argument("-p", "--pattern-name", type=str, help="name of pattern to use for sequence", default=None) #parser.add_argument("-n", "--name", type=str, help="name of model, used when generating default weights filenames", default=None) #parser.add_argument("--in-len", type=int, help="input sequence length (default 10)", default=None) #parser.add_argument("--out-len", type=int, help="output sequence length (default 10)", default=None) #parser.add_argument("--from-file", type=str, help="name of file to take input data sequences from (json format)", default=None) parser.add_argument( "--iter-num", type=int, help= "training iteration number; specify instead of input- or output-weights to use generated filenames", default=None) #parser.add_argument("--data-dir", help="directory to use for storing checkpoints (also used when generating default weights filenames)", default=None) # model parameters #parser.add_argument("-L", "--num-layers", type=int, help="number of RNN layers to use in the model (default 1)", default=1) #parser.add_argument("--cell-size", type=int, help="size of RNN cell to use (default 32)", default=32) #parser.add_argument("--cell-type", type=str, help="type of RNN cell to use (default BasicLSTMCell)", default="BasicLSTMCell") #parser.add_argument("--embedding-size", type=int, help="size of embedding to use (default 20)", default=20) #parser.add_argument("--tensorboard-verbose", type=int, help="tensorboard verbosity level (default 0)", default=0) if not args: args = parser.parse_args(arglist) p_num_layers = 1 p_cell_size = 32 p_cell_type = 'BasicLSTMCell' p_embedding_size = 32 p_learning_rate = 0.0001 operation = "train" p_train_data_size = 10000 p_pattern_name = "sorted" p_in_len = 32 p_out_len = 32 p_model = "embedding_rnn" p_data_dir = "models" p_name = "test1" p_epochs = 50 #p_input_weights = "/share/users/bsamadi/seq2seq/tflearn_seq2seq/sort_32_orig_input" p_input_weights = None #p_ouput_weights = "test_hame_yek" #p_ouput_weights = "sort_256" #p_ouput_weights = "sort_256_orig_input" #p_ouput_weights = "" p_ouput_weights = "orig_32" #p_ouput_weights = "try_on_hist" #p_ouput_weights = None A = np.load("input_histograms_32.npy").astype(np.uint32) B = np.load("output_histograms_32.npy").astype(np.uint32) max_input = np.max(A) max_output = np.max(A) #max_input = 3000 #max_output = 3000 if args.iter_num is not None: args.input_weights = args.iter_num args.output_weights = args.iter_num + 1 model_params = dict( num_layers=p_num_layers, cell_size=p_cell_size, cell_type=p_cell_type, embedding_size=p_embedding_size, learning_rate=p_learning_rate, ) if operation == "train": num_points = p_train_data_size sp = SequencePattern(p_pattern_name, in_seq_len=p_in_len, out_seq_len=p_out_len, max_input=max_input, max_output=max_output) ts2s = TFLearnSeq2Seq(sp, seq2seq_model=p_model, data_dir=p_data_dir, name=p_name) ts2s.train(num_epochs=p_epochs, num_points=num_points, weights_output_fn=p_ouput_weights, weights_input_fn=p_input_weights, model_params=model_params, batch_size=8) return ts2s elif operation == "predict": A = np.load("input_histograms_32.npy").astype(np.uint32) A = A[0:1, :] A = np.array(A) print(A) inputs = A print(inputs) #if args.from_file: #inputs = json.loads(args.from_file) #try: #input_x = list(map(int, args.cmd_input)) #inputs = [input_x] #except: #raise Exception("Please provide a space-delimited input sequence as the argument") sp = SequencePattern(p_pattern_name, in_seq_len=p_in_len, out_seq_len=p_out_len, max_input=max_input, max_output=max_output) ts2s = TFLearnSeq2Seq(sp, seq2seq_model=p_model, data_dir=p_data_dir, name=p_name) results = [] print("inputs", inputs, A) for x in inputs: prediction, y = ts2s.predict(x, weights_input_fn=p_input_weights, model_params=model_params) #print("==> For input %s, prediction=%s (expected=%s)" % (x, prediction, sp.generate_output_sequence(x))) results.append([prediction]) print(results) exit() ts2s.prediction_results = results return ts2s else: print("Unknown command %s" % args.cmd)
def CommandLine(args=None, arglist=None): ''' Main command line. Accepts args, to allow for simple unit testing. ''' help_text = """ Commands: train - give size of training set to use, as argument predict - give input sequence as argument (or specify inputs via --from-file <filename>) """ parser = argparse.ArgumentParser(description=help_text, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("cmd", help="command") parser.add_argument("cmd_input", nargs='*', help="input to command") parser.add_argument('-v', "--verbose", nargs=0, help="increase output verbosity (add more -v to increase versbosity)", action=VAction, dest='verbose') parser.add_argument("-m", "--model", help="seq2seq model name: either embedding_rnn (default) or embedding_attention", default=None) parser.add_argument("-r", "--learning-rate", type=float, help="learning rate (default 0.0001)", default=0.0001) parser.add_argument("-e", "--epochs", type=int, help="number of trainig epochs", default=10) parser.add_argument("-i", "--input-weights", type=str, help="tflearn file with network weights to load", default=None) parser.add_argument("-o", "--output-weights", type=str, help="new tflearn file where network weights are to be saved", default=None) parser.add_argument("-p", "--pattern-name", type=str, help="name of pattern to use for sequence", default=None) parser.add_argument("-n", "--name", type=str, help="name of model, used when generating default weights filenames", default=None) parser.add_argument("--in-len", type=int, help="input sequence length (default 10)", default=None) parser.add_argument("--out-len", type=int, help="output sequence length (default 10)", default=None) parser.add_argument("--from-file", type=str, help="name of file to take input data sequences from (json format)", default=None) parser.add_argument("--iter-num", type=int, help="training iteration number; specify instead of input- or output-weights to use generated filenames", default=None) parser.add_argument("--data-dir", help="directory to use for storing checkpoints (also used when generating default weights filenames)", default=None) # model parameters parser.add_argument("-L", "--num-layers", type=int, help="number of RNN layers to use in the model (default 1)", default=1) parser.add_argument("--cell-size", type=int, help="size of RNN cell to use (default 32)", default=32) parser.add_argument("--cell-type", type=str, help="type of RNN cell to use (default BasicLSTMCell)", default="BasicLSTMCell") parser.add_argument("--embedding-size", type=int, help="size of embedding to use (default 20)", default=20) parser.add_argument("--tensorboard-verbose", type=int, help="tensorboard verbosity level (default 0)", default=0) if not args: args = parser.parse_args(arglist) if args.iter_num is not None: args.input_weights = args.iter_num args.output_weights = args.iter_num + 1 model_params = dict(num_layers=args.num_layers, cell_size=args.cell_size, cell_type=args.cell_type, embedding_size=args.embedding_size, learning_rate=args.learning_rate, tensorboard_verbose=args.tensorboard_verbose, ) if args.cmd=="train": try: num_points = int(args.cmd_input[0]) except: raise Exception("Please specify the number of datapoints to use for training, as the first argument") sp = SequencePattern(args.pattern_name, in_seq_len=args.in_len, out_seq_len=args.out_len) ts2s = TFLearnSeq2Seq(sp, seq2seq_model=args.model, data_dir=args.data_dir, name=args.name, verbose=args.verbose) ts2s.train(num_epochs=args.epochs, num_points=num_points, weights_output_fn=args.output_weights, weights_input_fn=args.input_weights, model_params=model_params) return ts2s elif args.cmd=="predict": if args.from_file: inputs = json.loads(args.from_file) try: input_x = list(map(int, args.cmd_input)) inputs = [input_x] except: raise Exception("Please provide a space-delimited input sequence as the argument") sp = SequencePattern(args.pattern_name, in_seq_len=args.in_len, out_seq_len=args.out_len) ts2s = TFLearnSeq2Seq(sp, seq2seq_model=args.model, data_dir=args.data_dir, name=args.name, verbose=args.verbose) results = [] for x in inputs: prediction, y = ts2s.predict(x, weights_input_fn=args.input_weights, model_params=model_params) print("==> For input %s, prediction=%s (expected=%s)" % (x, prediction, sp.generate_output_sequence(x))) results.append([prediction, y]) ts2s.prediction_results = results return ts2s else: print("Unknown command %s" % args.cmd)