示例#1
0
def loadEmbeddings(filepath=DEFAULT_FILE_PATH, dimensions=50):
    '''
    Read the embedding mapping.
    '''
    count = 0
    with open(filepath, "r") as fin:
        for line in fin:
            line = line.strip()
            if not line:
                continue
            count += 1
        vprint(True, "Vocabulary size = " + str(count), color="BLUE")

    embeddings = np.zeros((count, dimensions))
    tok2id = {}
    id = 0
    with open(filepath) as ifs:
        for line in ifs:
            line = line.strip()
            if not line:
                continue
            row = line.split()
            token = row[0]
            tok2id[token] = id
            data = [float(x) for x in row[1:]]
            if len(data) != dimensions:
                print len(data)
                print dimensions
                raise RuntimeError("wrong number of dimensions")
            embeddings[id] = np.asarray(data)
            id += 1
    # with open("dictionary.txt", "w") as fout:
    # 	for key in tok2id.keys():
    # 		fout.write("%s %s\n" % (key, tok2id[key]))
    return embeddings, tok2id
def main():
    '''
	This function is used for unit testing this module.
	    args.hidden_size (i.e. the size of hidden state)
		args.num_layers (default = 1, i.e. no stacking), 
		args.input_seq_length,
		args.target_seq_length, 
	    args.input_embedding_size, 
		args.output_vocab_size,
		args.batch_size (i.e. the number of sequences in each batch)
		args.optimizer_choice (defualt = "rms", also could be "adam", "grad_desc")
		args.learning_rate, 
		args.grad_clip
		args.test
		args.verbose
	'''
	parser = argparse.ArgumentParser()
	# RNN cell hidden state's size
	parser.add_argument('--hidden_size', type=int, default=96,
	                    help='size of RNN cell hidden state')
	# Number of stacked RNN layers. Only a single layer implemented
	parser.add_argument('--num_layers', type=int, default=1,
	                    help='number of stacked RNN layers')
	# Larger than the max length of each input sequence
	parser.add_argument('--input_seq_length', type=int, default=20,
	                    help='maximum length of each input sequence or larger')
	# Larger than the max of each target sequence
	parser.add_argument('--target_seq_length', type=int, default=20,
	                    help='maximum length of each target sequence or larger')
	# Embedding size of input
	parser.add_argument('--input_embedding_size', type=int, default=96,
	                    help='embedding size of input vectors')
	# Embedding size of output
	parser.add_argument('--output_vocab_size', type=int, default=92,
	                    help='size of output vocabulary')
	# Batch size
	parser.add_argument('--batch_size', type=int, default=100,
	                    help='number of sequences in a batch')
	# Choice of optimzier
	parser.add_argument('--optimizer_choice', type=str, default='rms',
	                    help='rms (defualt), adam, grad_desc')
	# Learning rate
	parser.add_argument('--learning_rate', type=float, default=0.002,
	                    help='Learning rate')	
	# Gradient clip, i.e. maximum value of gradient amplitute allowed
	parser.add_argument('--grad_clip', type=float, default=None,
	                    help='gradient upbound, i.e. maximum value of gradient amplitute allowed')
	# Model unit testing flag, default to False
	parser.add_argument('-t','--test', action='store_true',
	                    help='only set to true when performing unit test')
	# Verbosity flag, default to False
	parser.add_argument('-v','--verbose', action='store_true',
	                    help='only set to true when you want verbosity')
	# Parse the arguments, and construct the model
	args = parser.parse_args()
	#args.test = True
	model = HierLSTMTransModel(args)
	print "arguments:"
	vprint(args.verbose, model.get_args().__dict__, color=None)
示例#3
0
    def __init__(self,
                 batch_size=10,
                 seq_lengths=[2, 3],
                 token_sizes=[20, 1],
                 usage="train",
                 if_testing=False):
        '''
        Initialization of a data loader.
        Params:
            batch_size: size of batch.
            seq_lengths: a integer list, [input_seq_length, target_seq_length]
            token_sizes: a integer list, [input_embedding_size, target_word_index_size], where target_word_index_size = 1
            usage: what this data loader is used for: "train", "dev", test"
            if_testing: if True, returns random data.
        Returns:
            None
        '''
        #train samples 84973
        #dev samples 10614
        #test samples 10617
        self.batch_size = batch_size
        self.seq_lengths = seq_lengths
        self.complex_length = seq_lengths[0]
        self.simple_length = seq_lengths[1]
        self.token_sizes = token_sizes
        self.embedding_size = token_sizes[0]
        self.usage = usage
        self.if_testing = if_testing
        self.num_batches = int(10614 /
                               batch_size)  # Changed from 102696 % batch_size
        #self.if_first_batch = True

        vprint(True, "Loading embeddings...", color="BLUE")
        if self.usage == "train":
            #self.data_file = open('../data/train_data.txt')
            self.data_file = open('../data/dev_new_data.txt')
            self.data_file_path = '../data/dev_new_data.txt'
        elif self.usage == "test":
            #self.data_file = open('../data/test_data.txt')
            self.data_file = open('../data/dev_new_data.txt')
            #self.data_file_path = '../data/test_data.txt'
            self.data_file_path = '../data/dev_new_data.txt'
        if self.if_testing == True:
            # Testing code. Returns random numbers. Do not touch..
            self.num_batches = 15
        elif self.if_testing == False:
            # Real work
            self.embeddings, self.tok2id = loadEmbeddings(
                filepath=DEFAULT_FILE_PATH, dimensions=self.embedding_size)
            vprint(True, "Finished loading embeddings", color="BLUE")
示例#4
0
def loadDict(filepath=DEFAULT_FILE_PATH, dimensions=50):
    count = 0
    with open(filepath, "r") as fin:
        for line in fin:
            line = line.strip()
            if not line:
                continue
            count += 1
        vprint(True, "Vocabulary size = " + str(count), color="BLUE")

    id2tok = {}
    id = 0
    with open(filepath) as ifs:
        for line in ifs:
            line = line.strip()
            if not line:
                continue
            row = line.split()
            token = row[0]
            id2tok[id] = token
            id += 1
    return id2tok
示例#5
0
    def __init__(self, args, training=True):
        '''
		Initialization function for the class Model.
		Params:
		  args: contains arguments required for the Model creation --
		    args.hidden_size (i.e. the size of hidden state)
		    args.num_layers (default = 1, i.e. no stacking), 
		    args.input_seq_length,
			args.target_seq_length, 
		    args.input_embedding_size,
			args.output_vocab_size,
			args.target_token_size (=1, target token is target word's index)
		    args.batch_size (i.e. the number of sequences in each batch),
		    args.optimizer_choice (defualt = "rms", also could be "adam", "grad_desc"),
		    args.learning_rate, 
		    args.grad_clip
			args.test
			args.verbose
		  training: indicates whether this is a training session
		Returns:
		    None
		NOTE Each cell's input is batch_size x 1 x input_embedding_size
		NOTE Each cell's output is batch_size x 1 x hidden_size (needs to be converted)
		'''
        #if training == False:
        #	args.batch_size = 2

        # Store the arguments, and print the important argument values
        self.args = args
        verbose = self.args.verbose
        print("VanillaLSTMTransModel initializer is called..\n" \
              + "Time: " + time.ctime() + "\n" \
           + "  args.hidden_size (H) = " + str(self.args.hidden_size) + "\n" \
           + "  args.input_embedding_size (Di) = " + str(self.args.input_embedding_size) + "\n" \
           + "  args.output_vocab_size (Vo) = " + str(self.args.output_vocab_size) + "\n" \
           + "  args.num_layers = " + str(self.args.num_layers) + "\n" \
           + "  args.optimizer_choice = " + self.args.optimizer_choice + "\n" \
           + "  args.learning_rate = " + str(self.args.learning_rate) + "\n" \
           + "  args.grad_clip = " + str(self.args.grad_clip) + "\n")

        if training:
            print("This is a training session..")
            print("Input batch size = " + str(self.args.batch_size) + "\n")
        else:
            print("This is a session other than training..")
            print("Input batch size = " + str(self.args.batch_size) + "\n")

        # initialize LSTM cell units, hidden_size is the dimension of hidden state
        # encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.args.hidden_size, state_is_tuple=True)
        # decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.args.hidden_size, state_is_tuple=True)
        encoder_cell = tf.nn.rnn_cell.LSTMCell(
            num_units=self.args.hidden_size,
            initializer=tf.contrib.layers.xavier_initializer(),
            state_is_tuple=True)
        decoder_cell = tf.nn.rnn_cell.LSTMCell(
            num_units=self.args.hidden_size,
            initializer=tf.contrib.layers.xavier_initializer(),
            state_is_tuple=True)

        # convert cell's outputs (batch_size x hidden_size for each cell) to batch_size x output_vocab_size
        # y_hat = softmax(tf.add(tf.matmul(cell_output, output_ws), output_bs)), output_bs = zeros, for now
        with tf.variable_scope("vanLSTM_decoder/decoder_accessory"):
            self.output_ws = tf.get_variable(
                "output_ws",
                [self.args.hidden_size, self.args.output_vocab_size])
            output_affine_map_lambda = lambda cell_output_: tf.matmul(
                cell_output_, self.output_ws)
            output_converter_lambda = lambda cell_output_: tf.nn.softmax(
                logits=output_affine_map_lambda(
                    cell_output_), dim=-1)  # -1: last dimension
            self.output_affine_map_lambda = output_affine_map_lambda
            self.output_converter_lambda = output_converter_lambda

        # Multi-layer RNN ocnstruction, if more than one layer
        if self.args.num_layers <= 0 or isinstance(self.args.num_layers,
                                                   int) == False:
            raise ValueError(
                "Specified number of layers is non-positive or is not an integer."
            )
        elif self.args.num_layers >= 2:
            vprint(
                True,
                "Stacked RNN: number of layers = " + str(self.args.num_layers))
            encoder_cell = tf.nn.rnn_cell.MultiRNNCell([encoder_cell] *
                                                       self.args.num_layers,
                                                       state_is_tuple=True)
            decoder_cell = tf.nn.rnn_cell.MultiRNNCell([decoder_cell] *
                                                       self.args.num_layers,
                                                       state_is_tuple=True)

        # TODO: (improve) Dropout layer can be added here
        # Store the recurrent unit
        self.encoder_cell = encoder_cell
        self.decoder_cell = decoder_cell

        # Create encoder and decoder RNNChain instances
        encoder = RNNChain(self.encoder_cell,
                           name="vanLSTM_decoder",
                           scope="vanLSTM_encoder")
        decoder = RNNChain(self.decoder_cell,
                           name="vanLSTM_decoder",
                           scope="vanLSTM_decoder")
        self.encoder = encoder
        self.decoder = decoder

        # Input data contains sequences of input tokens of input_embedding_size dimension
        self.input_data = tf.placeholder(
            tf.float32,
            [None, self.args.input_seq_length, self.args.input_embedding_size])
        # Target data contains sequences of output tokens of target_token_size dimension (=1)
        self.target_data = tf.placeholder(
            tf.int32,
            [None, self.args.target_seq_length, self.args.target_token_size])
        # Target lengths list contains numbers of non-padding input tokens in each sequence in this batch,
        # each element is an integer, indicating the number of non-padding tokens of a sequence.
        self.target_lens_list = tf.placeholder(tf.int32, [None])

        # Learning rate
        self.lr = tf.Variable(self.args.learning_rate,
                              trainable=False,
                              name="learning_rate")

        # Initial cell state of LSTM (initialized with zeros)
        self.initial_state = encoder_cell.zero_state(
            batch_size=self.args.batch_size, dtype=tf.float32)

        # Preprocessing the information got from placeholders.
        # First, target_lens_list does not need any further actions.
        target_lens_list = self.target_lens_list
        # Second, input_data and target_data need reshaping.
        # Split inputs and targets according to sequences: a 3D Tensor, num_of_seq x seq_length x Di/Vo
        # -> list of size seq_length, each of whose element is of num_of_seq x 1 x Di/Vo
        if tf.__version__[0:2] == '0.':
            input_data_temp = tf.split(split_dim=1,
                                       num_split=self.args.input_seq_length,
                                       value=self.input_data)
            target_data_temp = tf.split(split_dim=1,
                                        num_split=self.args.target_seq_length,
                                        value=self.target_data)
        elif tf.__version__[0:2] == '1.':
            input_data_temp = tf.split(value=self.input_data,
                                       num_split=self.args.input_seq_length,
                                       split_dim=1)
            target_data_temp = tf.split(value=self.target_data,
                                        num_split=self.args.target_seq_length,
                                        split_dim=1)
        # Squeeze: list of size seq_length, each of which is num_of_seq x 1 x Di/Vo
        # -> list of size seq_length, each of which is num_of_seq x Di/Vo
        input_data_list = [
            tf.squeeze(input=list_member, axis=[1])
            for list_member in input_data_temp
        ]
        target_data_list = [
            tf.squeeze(input=list_member, axis=[1])
            for list_member in target_data_temp
        ]
        del input_data_temp, target_data_temp

        ## This is where the LSTM models differ from each other in substance.
        ## The other code might also differ but they are not substantial.
        # call the encoder
        #print("[DEBUG] self.initial_state: " + str(self.initial_state))
        #with tf.variable_scope("vanLSTM_encoder"):
        vprint(True, "Building encoder...", color="MAG")
        encoder_start_time = time.time()
        _, self.encoder_final_state = encoder.run(
            inputs=input_data_list,
            chain_length=None,
            cell_input_size=[
                self.args.batch_size, self.args.input_embedding_size
            ],
            initial_state=self.initial_state,
            feed_previous=False,
            verbose=self.args.verbose)
        self.encoder_end_state = self.initial_state
        encoder_end_time = time.time()
        vprint(True,
               " -- Encoder built. Time used: " +
               str(encoder_end_time - encoder_start_time) + " s",
               color="MAG")

        # call the decoder
        #with tf.variable_scope("vanLSTM_decoder"):
        #print("[DEBUG] self.encoder_final_state: " + str(self.encoder_final_state))
        #print("[DEBUG] self.decoder_inital_state: " + str(self.decoder_cell.zero_state(batch_size=self.args.batch_size, dtype=tf.float32)))
        vprint(True, "Building decoder...", color="MAG")
        decoder_start_time = time.time()
        # cell_outputs is list of length target_seq_length, each element is batch_size x hidden_size
        self.cell_outputs, _ = decoder.run(
            inputs=input_data_list,
            chain_length=self.args.target_seq_length,
            cell_input_size=[
                self.args.batch_size, self.args.output_vocab_size
            ],
            initial_state=self.encoder_final_state,
            feed_previous=True,
            loop_func=self.output_converter_lambda,
            verbose=self.args.verbose)
        decoder_end_time = time.time()
        vprint(True,
               " -- Decoder built. Time used: " +
               str(decoder_end_time - decoder_start_time) + " s",
               color="MAG")

        vprint(True, "Building output converter...", color="MAG")
        converter_start_time = time.time()
        # output_data is softmaxed. It is a list of length target_seq_length, each element is batch_size x output_vocab_size
        self.output_data = [
            output_converter_lambda(cell_output_)
            for cell_output_ in self.cell_outputs
        ]
        converter_end_time = time.time()
        vprint(True,
               " -- Converter built. Time used: " +
               str(converter_end_time - converter_start_time) + " s",
               color="MAG")

        # Compute the cost scalar: specifically, the average cost per sequence
        vprint(True, "Building cost calculator...", color="MAG")
        sum_of_cost = self.get_sum_of_cost(cell_outputs=self.cell_outputs,
                                           targets=target_data_list,
                                           targets_lens=target_lens_list)
        #self.cost = tf.Variable(0.)
        self.cost = tf.div(sum_of_cost, self.args.batch_size)
        print("\n[DEBUG] self.cost: ")
        print self.cost

        # We only deal with back-propagration during training phase.
        if training == True:
            # Get trainable_variables list and count them.
            # Also clip the gradients if they are larger than self.args.grad_clip
            vprint(True,
                   "\nAggregating all trainable variables...",
                   color="BLUE")
            trainable_vars = tf.trainable_variables()
            num_trainable_components = 0
            vprint(True,
                   "\nNumber of trainable Tensors = " +
                   str(len(trainable_vars)),
                   color="GREEN")
            for i, var in enumerate(trainable_vars):
                num_trainable_components += np.product(
                    trainable_vars[i].get_shape().as_list())
                vprint(True,
                 " " + str(trainable_vars[i].name) + \
                 "\t"  + str(trainable_vars[i].get_shape()) + \
                 " x " + str(trainable_vars[i].dtype.name),
                 color="GREEN")
            vprint(True,
                   "Number of trainable scalar components = " +
                   str(num_trainable_components),
                   color="GREEN")
            if num_trainable_components >= 1e3 and num_trainable_components < 1e4:
                vprint(True,
                       " -- that is in the order of 10e3: thousands\n",
                       color="GREEN")
            elif num_trainable_components >= 1e4 and num_trainable_components < 1e5:
                vprint(True,
                       " -- that is in the order of 10e4: tens of thousands\n",
                       color="GREEN")
            elif num_trainable_components >= 1e5 and num_trainable_components < 1e6:
                vprint(
                    True,
                    " -- that is in the order of 10e5: hundreds of thousands\n",
                    color="GREEN")
            elif num_trainable_components >= 1e6 and num_trainable_components < 1e7:
                vprint(True,
                       " -- that is in the order of 10e6: millions\n",
                       color="GREEN")
            elif num_trainable_components >= 1e7 and num_trainable_components < 1e8:
                vprint(True,
                       " -- that is in the order of 10e7: tens of millions\n",
                       color="GREEN")
            elif num_trainable_components >= 1e8 and num_trainable_components < 1e9:
                vprint(
                    True,
                    " -- that is in the order of 10e8: hundreds of millions\n",
                    color="GREEN")
            elif num_trainable_components >= 1e9:
                vprint(
                    True,
                    " -- that is in the order of 10e9 to 10e-Infinity: billions or higher",
                    color="GREEN")
            self.num_of_trainable_components = num_trainable_components

            # Compute the gradient of cost with respect of the trainable variables.
            vprint(
                True,
                "Calculating gradient expressions for all trainable variables. Be patient...",
                color="BLUE")
            grad_start_time = time.time()
            # self.gradients is a list of tuples of (grad_value, variable_name)
            self.gradients = tf.gradients(self.cost, trainable_vars)
            grad_end_time = time.time()
            vprint(
                True,
                " -- Finished calculating gradient expressions. Time used: " +
                str(grad_end_time - grad_start_time) + " s",
                color="BLUE")
            # A hack: when testing, elements in gradients may ALL be None, and it causes problems in clip_by_global_norm()

            # This is just for validation of the code.
            if self.args.test == True:
                print("TESTING TESTING TESTING")
                for i in xrange(len(self.gradients)):
                    if self.gradients[i] == None:
                        self.gradients[i] = tf.zeros(
                            shape=trainable_vars[i].get_shape(),
                            dtype=tf.float32)

            if self.args.grad_clip != None:
                clipped_grads, _ = tf.clip_by_global_norm(
                    self.gradients, self.args.grad_clip)
            else:
                clipped_grads = self.gradients

            # Using RMSprop, inspired by the LSTM paper of Dr. Alahi, Prof. Saverese, and Prof. Fei-Fei Li
            if self.args.optimizer_choice == "rms":
                optimizer = tf.train.RMSPropOptimizer(self.lr)
            elif self.args.optimizer_choice == "adam":
                optimizer = tf.train.AdamOptimizer(self.lr)
            elif self.args.optimizer_choice == "grad_desc":
                optimizer = tf.train.GradientDescentOptimizer(self.lr)
            else:
                raise ValueError("Optimizer not supported: " +
                                 self.args.optimizer_choice)

            # Train operator. Apply gradients. If a gradient of a variable is None, it will be weeded out.
            self.train_op = optimizer.apply_gradients(
                zip(clipped_grads, trainable_vars))
示例#6
0
    def __init__(self, args, model_choice="V", if_testing=False):
        '''
        Instantiate a model, a save file, and a log text file.
        Params:
            args: contains arguments required for the model creation.
            model_choice: specify the choice of model, default to "VanillaLSTMTransModel".
                V: VanillaLSTMTransModel
                H: HierLSTMTransModel, i.e. Hierarchical LSTM Model
                A/AH: AttenHierLSTMTransModel, i.e. Hierarchical LSTM Model with Attention
        Returns:
            None
        '''
        # Save the args
        self.args = args
        self.if_testing = if_testing

        # Instantiate a model
        build_start_time = time.time()
        if args.continue_training == False:
            # First time training
            vprint(True, "Trainer is called. First time training.")
            vprint(True,"\033[1;m" + "Building computation graph for the model..." + "\033[0;m", color="CYAN")
            if model_choice == "VanillaLSTMTransModel" or model_choice == "V":
                model = VanillaLSTMTransModel(args)
                model_abbr = "V"
            elif model_chice == "AttenVanillaLSTMTransModel" or model_choice == "AV":
                model = AttenVanillaLSTMTransModel(args)
                model_abbr = "AV"
            elif model_choice == "HierLSTMTransModel" or model_choice == "H":
                model = HierLSTMTransModel(args)
                model_abbr = "H"
            elif model_choice + "AttenHierLSTMTransModel" or model_choice == "AH":
                model = AttenHierLSTMTransModel(args)
                model_abbr = "AH"
            else:
                raise ValueError("Model choice: " + str(model_choice) + " is not supported")
            self.model = model
            # Directory to save things
            if args.test == True:
                self.directory = "../RUN_" + model_abbr
                #self.directory = "../TestRUN_" + model_abbr + time.strftime("_%b%d_%H-%M-%S")
            else:
                self.directory = "../RUN_" + model_abbr
            os.mkdir(self.directory)
        else:
            # Continuing training
            if model_choice == "VanillaLSTMTransModel" or model_choice == "V":
                model_abbr = "V"
            elif model_choice == "HierLSTMTransModel" or model_choice == "H":
                model_abbr = "H"
            elif model_choice + "AttenHierLSTMTransModel" or model_choice == "AH" or model_choice == "A":
                model_abbr = "A"
            else:
                raise ValueError("Model choice: " + str(model_choice) + " is not supported")
            self.directory = "../RUN_" + model_abbr
            vprint(True, "Trainer is called. Continuing training.")
            try:
                with open(os.path.join(self.directory, 'args.pkl'), 'r+') as f:
                    saved_args = pickle.load(f)
                    self.args = saved_args
                    # Don't forget this line below
                    self.args.continue_training = True
            except:
                raise ValueError("The specified model is either not trained, damaged,\
                                or in a wrong path. It should be ../RUN_" + model_abbr + "/args.pkl")
            vprint(True, "\033[1;m" + "Rebuilding computation graph for the model..." + "\033[0;m", color="CYAN")
            if model_choice == "VanillaLSTMTransModel" or model_choice == "V":
                model = VanillaLSTMTransModel(saved_args)
            elif model_choice == "HierLSTMTransModel" or model_choice == "H":
                model = HierLSTMTransModel(saved_args)
            elif model_choice + "AttenHierLSTMTransModel" or model_choice == "AH" or model_choice == "A":
                model = AttenHierLSTMTransModel(saved_args)
            self.model = model
        
        build_end_time = time.time()
        vprint(True,"\033[1;m" + "Graph built. Time used: " + str(build_end_time - build_start_time) + " seconds" + "\033[0;m", color="CYAN")

        # Create/open a save file to save things.
        if args.continue_training == False:
            with open(os.path.join(self.directory, 'args.pkl'), 'a') as f:
                pickle.dump(args, f)
                vprint(True, "Arguments saved to file: " + self.directory + "/args.pkl")
        else:
            # Continuing from previous traing, do not write the arguments again.
            pass
        log = open(os.path.join(self.directory, 'log.txt'), 'a') # append from EOF. Create file if not found.
        log.write("Log file: " + self.directory + '\n')
        log.close()
        reduced_log = open(os.path.join(self.directory, 'reduced_log.txt'), 'a') # append from EOF. Create file if not found.
        reduced_log.write("Log file: " + self.directory + '\n')
示例#7
0
    def train(self, num_epochs=100, save_every_batch=400):
        '''
        Train the model.
        Params:
            num_epochs: number of epochs, defualt to 100
            save_every_batch: period of saving, epoch * data_loader.get_num_batches() + batch_index, defualt to 400.
                NOTE in the current implementation, this argument is unused. I opted to save after each epoch.
        Returns:
            None
        '''
        args = self.args
        decay_rate = 0.95 # You may modify it yourself. decay_rate in (0,1]
        log = open(os.path.join(self.directory, 'log.txt'), 'a')
        reduced_log = open(os.path.join(self.directory, 'reduced_log.txt'), 'a')

        data_loader = Dataloader(batch_size=args.batch_size, 
                                 seq_lengths=[args.input_seq_length, args.target_seq_length], 
                                 token_sizes=[args.input_embedding_size, args.target_token_size],
                                 if_testing=self.if_testing)
        num_batches = data_loader.get_num_batches()
        
        # Tic
        train_start_time = time.time()
        vprint(True, "")
        with tf.Session() as sess:
            if args.continue_training == False:
                # Initialize all varaibles in the computational graph
                # r0.11 or earlier: sess.run(tf.initialize_all_variables())
                sess.run(tf.global_variables_initializer())
                # Add all the variables to the registration list of variables to be saved
                saver = tf.train.Saver(tf.global_variables(), max_to_keep=50)
            else:
                # Access the checkpoint file
                ckpt = tf.train.get_checkpoint_state(checkpoint_dir=self.directory, latest_filename=None)
                saver = tf.train.Saver(tf.global_variables(), max_to_keep=50)
                saver.restore(sess=sess, save_path=ckpt.model_checkpoint_path)
                print ckpt.model_checkpoint_path

            train_loss = 0.0
            # For each epoch
            for e in range(num_epochs):
                # Reset data loader so that it reads from the beginning.
                data_loader.reset()
                print args.continue_training
                vprint(args.continue_training, "\033[1;mContinued training\033[0;m", color="MAG")
                vprint(True, "\033[1;mStepped in epoch e = " + str(e+1) + "\033[0;m", color="MAG")
                # Assign the learning rate (decayed acceleration to the epoch number)
                sess.run(tf.assign(self.model.lr, args.learning_rate * (decay_rate ** e)))
                #Get the initial state of the encoder
                state = sess.run(self.model.initial_state)

                # For each batch in this epoch
                for b in range(num_batches):
                    vprint(True, "Stepped in epoch = " + str(e+1) + ", batch b = " + str(b+1), color="MAG")
                    # Tic
                    batch_start_time = time.time()

                    # Get the input (x) and target (y) data of the current batch
                    vprint(True, "Getting batch.. b = " + str(b+1), color="MAG")
                    # x: input batch. It is a list of length batch_size, each element of which is of size input_seq_length x input_embedding_size
                    # y: target batch. It is a list of length batch_size, each element of which is of size target_seq_length x target_token_size (=1)
                    # yl: target sequences' lengths. It is a list of length batch_size, each element of which is an integer.
                    x, y, yl = data_loader.next_batch()
                    vprint(True, "Got batch. Run the session...", color="MAG")

                    # Feed the input and target data and the initial cell state
                    feed = {self.model.input_data: x, self.model.target_data: y, self.model.target_lens_list: yl, self.model.initial_state: state}

                    # Fetch the loss of the self.model on this batch
                    # output_data is softmaxed. It is a list of length target_seq_length, each element is batch_size x output_vocab_size
                    try:
                        _, train_loss = sess.run([self.model.train_op, self.model.cost], feed_dict=feed)
                        #print output_data[0]
                    except Exception as exception_msg:
                        vprint(True, "sess.run() runtime error.", color="RED")
                        print exception_msg

                    # Toc
                    batch_end_time = time.time()

                    # Print something and write to log
                    log_entry = "epoch {}/{}, global step number {}/{}, \n\
                                 train_loss = {:.5f}, \n\
                                 time/batch = {:.3f} s \n".format(e + 1,
                                                                  num_epochs,
                                                                  e * num_batches + b + 1, 
                                                                  num_epochs * num_batches,
                                                                  train_loss,
                                                                  batch_end_time - batch_start_time)
                    reduced_log_entry = "{} {} {} {} {:.5f}\n".format(e + 1, 
                                                                      num_epochs, 
                                                                      e * num_batches + b + 1, 
                                                                      num_epochs * num_batches, 
                                                                      train_loss)
                    # Print on screen
                    vprint(True, log_entry, color=None)
                    # Append to log.txt and reduced_log.text.
                    log.write(log_entry)
                    reduced_log.write(reduced_log_entry)
                    
                # Save the model after each epoch
                checkpoint_path = os.path.join(self.directory, 'model.ckpt')
                time_stamp_integer = int(time.time())
                saver.save(sess, checkpoint_path, global_step=time_stamp_integer)
                print("Saved to {}".format(checkpoint_path + "-" + str(time_stamp_integer)))
                log.write("Saved to {}".format(checkpoint_path + "-" + str(time_stamp_integer)))
            
            train_end_time = time.time()
            vprint(True, "\033[1;m" + "\nTraining finished. Time used: " + str(train_end_time - train_start_time) + " seconds" + "\033[0;m", color="CYAN")
            log.write("Training finished.\n")
            log.close()
            reduced_log.close()
示例#8
0
    def run(self,
            inputs,
            chain_length,
            initial_state,
            cell_input_size=None,
            feed_previous=False,
            loop_func=None,
            verbose=False):
        '''
        RNN segment works.
        Params:
            inputs: list of Tensors, variable length, each element is of size 
                batch_size x input_vector_size. If feed_previous == True, then
                inputs does nothing. inputs should not be None, because it has
                other delicate uses later in this code.
            chain_length: length of this RNN chain. If feed_previous=True, then
                chain_length does nothing (can be None) because the length of 
                chain is determined by inputs' length; if False, then chain_length 
                determines the length of this RNN chain.
            initial_state: the initial state, of size batch_size x hidden_size
            cell_input_size: the size of each cell's input, can be either None, or 
                a 2-integer list [batch_size, input_vector_size]. If cell_input_size 
                is None or feed_previous=False, then cell_input_size assumes the 
                value of the size of the input acquired by the first cell, regardless 
                of what cell_input_size is.
            feed_previous: if True, then a cell's input is the previous cell's
                output processed by the loop affine function, except the first cell, 
                whose input is specified in code with size of cell_input_size (i.e. 
                [batch_size, input_vector_size] 2-integer list).
            loop_func: a lambda function that converts a cell's output from 
                batch_size x hidden_size to batch_size x output_vocab_size
            verbose: verbosity flag
        Returns:
            outputs: list of tensors, length equals to the inputs, each element is 
                of size batch_size x hidden_size. NOTE NOT converted to yhat
            cell_state: the cell state in the end of this cell segment
        '''
        if inputs == None:
            raise ValueError("RNNChain::run()'s inputs should not be None")
        if feed_previous == True and loop_func == None:
            raise ValueError(
                "feed_previous is True, but loop_func is not given")
        if feed_previous == True:
            # This is a hack. Reassign inputs.
            inputs = range(chain_length)

        cell = self.cell
        scope = self.scope
        state = initial_state
        outputs = []

        # if verbose:
        # 	print "\n\033[32m[INFO] an rnn_segement.run() is linked into the computational graph in scope " + scope + "\033[m"
        # 	print "\n[INFO] an rnn_segement.run() is linked into the computational graph in scope " + scope
        vprint(
            verbose,
            "\n[INFO] an rnn_segement.run() is linked into the computational graph in scope "
            + scope,
            color="g")

        with tf.variable_scope(scope):
            cell_state = initial_state
            vprint(verbose, self.get_info_str(cell_state))
            outputs = []
            if feed_previous == True:
                if cell_input_size == None or isinstance(
                        cell_input_size,
                        list) == False or len(list(cell_input_size)) != 2:
                    raise ValueError(
                        "cell_input_size should be a two-integer list, [batch_size, input_vector_size]"
                    )
                # TODO: (improve) assume the input to the first cell is a tensor of zero, for now
                prev_cell_output_yhat = tf.zeros(list(cell_input_size))
            for i, cell_input in enumerate(inputs):
                if (feed_previous == True) and (prev_cell_output_yhat != None):
                    # in this case, ignore cell_input's value and reassign it
                    cell_input = prev_cell_output_yhat
                if i > 0:
                    # if the cell is reused once, declare reusing since the second time you use it
                    tf.get_variable_scope().reuse_variables()
                vprint(verbose,
                       "\n[INFO] BEFORE CELL " + scope + "\n" + str(i) +
                       " cell_input: Tensor " +
                       str(cell_input.get_shape().as_list()),
                       color="b")
                vprint(verbose, self.get_info_str(cell_state), color="b")

                # cell_output: batch_size x hidden_size, cell_state's dimension depends on cell's type
                cell_output, cell_state = cell(cell_input, cell_state)
                vprint(verbose,
                       "[INFO] AFTER CELL " + scope + "\n" + str(i) +
                       " cell_output: Tensor " +
                       str(cell_output.get_shape().as_list()),
                       color="cyan")
                vprint(verbose, self.get_info_str(cell_state), color="cyan")

                # append the cell's output to the output sequence
                outputs.append(cell_output)

                if feed_previous == True:
                    prev_cell_output_yhat = loop_func(cell_output)
                    vprint(verbose,
                           "\n[INFO] PREV_CELL_OUTPUT " + scope + "\n" +
                           str(i) + " prev_cell_output_yhat: " +
                           str(prev_cell_output_yhat.get_shape().as_list()),
                           color="YELLOW")
                # the last cell's state is the final state
                final_state = cell_state

        # NOTE "outputs" are the cells' immediate outputs, not converted to yhat.
        self.outputs = outputs
        self.final_state = final_state
        return outputs, final_state
示例#9
0
    '-v',
    '--verbose',
    action='store_true',
    help='only set to true when you want verbosity')  # Do NOT Touch
# Continuing training flag, default to False
parser.add_argument(
    '-c',
    '--continue_training',
    action='store_true',
    help='if set, then continue training from the previous checkpoint'
)  # Do NOT Touch
# Parse the arguments, and construct the model
args = parser.parse_args()

# You may perform testing without actually loading data by calling: python run.py -t
vprint(True, "run.py -- arg.test = " + str(args.test), color="CYAN")
if args.test == True:
    # Test the code using artificial numbers, without actually loading the data.
    ############################## RNN Configuration ##############################
    args.hidden_size = 16
    args.num_layers = 2
    ################################## Data Info ##################################
    args.input_seq_length = 21
    args.target_seq_length = 23
    args.input_embedding_size = 16
    args.output_vocab_size = 150
    ############################### Trainer Settings ##############################
    args.epochs = 5
    args.batch_size = 8
    args.grad_clip = 15
    args.learning_rate = 0.05
示例#10
0
    def __init__(self, args, training=True):
		'''
		Initialization function for the class Model.
		Params:
		  args: contains arguments required for the Model creation --
		    args.hidden_size (i.e. the size of hidden state)
		    args.num_layers (default = 1, i.e. no stacking), 
		    args.input_seq_length,
			args.target_seq_length, 
		    args.input_embedding_size,
			args.output_vocab_size,
			args.target_token_size (=1, target token is target word's index)
		    args.batch_size (i.e. the number of sequences in each batch),
		    args.optimizer_choice (defualt = "rms", also could be "adam", "grad_desc"),
		    args.learning_rate, 
		    args.grad_clip
			args.test
			args.verbose
		  training: indicates whether this is a training session
		Returns:
		    None
		NOTE Each cell's input is batch_size x 1 x input_embedding_size
		NOTE Each cell's output is batch_size x 1 x hidden_size (needs to be converted)
		'''
		if training == False:
			args.batch_size = 1

		# Store the arguments, and print the important argument values
		self.args = args
		verbose = self.args.verbose
		print("VanillaLSTMTransModel initializer is called..\n" \
		      + "Time: " + time.ctime() + "\n" \
			  + "  args.hidden_size (H) = " + str(self.args.hidden_size) + "\n" \
			  + "  args.input_embedding_size (Di) = " + str(self.args.input_embedding_size) + "\n" \
			  + "  args.output_vocab_size (Vo) = " + str(self.args.output_vocab_size) + "\n" \
			  + "  args.num_layers = " + str(self.args.num_layers) + "\n" \
			  + "  args.optimizer_choice = " + self.args.optimizer_choice + "\n" \
			  + "  args.learning_rate = " + str(self.args.learning_rate) + "\n" \
			  + "  args.grad_clip = " + str(self.args.grad_clip) + "\n")
		
		if training:
			print("This is a training session..")
			print("Input batch size = " + str(self.args.batch_size) + "\n")
		else:
			print("This is a session other than training..")
			print("Input batch size = " + str(self.args.batch_size) + "\n")

		# initialize LSTM cell units, hidden_size is the dimension of hidden state
		word_encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.args.hidden_size, initializer=tf.contrib.layers.xavier_initializer(), sstate_is_tuple=True)
		sent_encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.args.hidden_size, initializer=tf.contrib.layers.xavier_initializer(), sstate_is_tuple=True)
		sent_decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.args.hidden_size, initializer=tf.contrib.layers.xavier_initializer(), sstate_is_tuple=True)
		word_decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.args.hidden_size, initializer=tf.contrib.layers.xavier_initializer(), sstate_is_tuple=True)

		# convert cell's outputs (batch_size x hidden_size for each cell) to batch_size x output_vocab_size
		# y_hat = softmax(tf.add(tf.matmul(cell_output, output_ws), output_bs)), output_bs = zeros, for now
		with tf.variable_scope("vanLSTM_decoder/decoder_accessory"):
			self.output_ws = tf.get_variable("output_ws", [self.args.hidden_size, self.args.output_vocab_size])
			output_affine_map_lambda = lambda cell_output_: tf.matmul(cell_output_, self.output_ws)
			output_converter_lambda = lambda cell_output_: tf.nn.softmax(logits=output_affine_map_lambda(cell_output_), dim=-1) # -1: last
			self.output_affine_map_lambda = output_affine_map_lambda
			self.output_converter_lambda = output_converter_lambda

		# Multi-layer RNN ocnstruction, if more than one layer
		if self.args.num_layers <= 0 or isinstance(self.args.num_layers, int) == False:
			raise ValueError("Specified number of layers is non-positive or is not an integer.")
		elif self.args.num_layers >= 2:
			vprint(True, "Stacked RNN: number of layers = " + str(self.args.num_layers))
			word_encoder_cell = tf.nn.rnn_cell.MultiRNNCell([word_encoder_cell] * self.args.num_layers, state_is_tuple=True)
			sent_encoder_cell = tf.nn.rnn_cell.MultiRNNCell([sent_encoder_cell] * self.args.num_layers, state_is_tuple=True)
			sent_decoder_cell = tf.nn.rnn_cell.MultiRNNCell([sent_decoder_cell] * self.args.num_layers, state_is_tuple=True)
			word_decoder_cell = tf.nn.rnn_cell.MultiRNNCell([word_decoder_cell] * self.args.num_layers, state_is_tuple=True)
		
		# TODO: (improve) Dropout layer can be added here
		# Store the recurrent unit
		self.word_encoder_cell = word_encoder_cell
		self.sent_encoder_cell = sent_encoder_cell
		self.sent_decoder_cell = sent_decoder_cell
		self.word_decoder_cell = word_decoder_cell

		# Create encoder and decoder RNNChain instances
		word_encoder = RNNChain(self.word_encoder_cell, name="hierLSTM_word_encoder", scope="hierLSTM_word_encoder")
		sent_encoder = RNNChain(self.sent_encoder_cell, name="hierLSTM_sent_encoder", scope="hierLSTM_sent_encoder")
		sent_decoder = RNNChain(self.sent_decoder_cell, name="hierLSTM_sent_decoder", scope="heirLSTM_sent_decoder")
		word_decoder = RNNChain(self.word_decoder_cell, name="hierLSTM_word_decoder", scope="hierLSTM_word_decoder")
		self.word_encoder = word_encoder
		self.sent_encoder = sent_encoder
		self.sent_decoder = sent_decoder
		self.word_decoder = word_decoder

		# Input data contains sequences of input tokens of input_embedding_size dimension
		self.input_data = tf.placeholder(tf.float32, [None, self.args.input_seq_length, self.args.input_embedding_size])
		# Target data contains sequences of putput tokens of target_token_size dimension (=1)
		self.target_data = tf.placeholder(tf.int32, [None, self.args.target_seq_length, self.args.target_token_size])
		# Target lengths list contains numbers of non-padding input tokens in each sequence in this batch,
		# each i-th element is a list of integers, indicating the number of non-padding tokens in each sentence, and the list's length indicating the number of non-padding sentences in this i-th sequence (which consists of one or more sentences).
		self.target_lens_list = tf.placeholder(tf.int32, [None, self.args.input_num_sent])

		# Learning rate
		self.lr = tf.Variable(self.args.learning_rate, trainable=False, name="learning_rate")

		# Initial cell state of LSTM (initialized with zeros)
		# TODO: (improve) might use xavier initializer?
		self.initial_word_state = word_encoder_cell.zero_state(batch_size=self.args.batch_size, dtype=tf.float32)
		self.initial_sent_state = sent_encoder_cell.zero_state(batch_size=self.args.batch_size, dtype=tf.float32)

		# Preprocessing the information got from placeholders.
		# First, target_lens_list does not need any further actions.
		target_lens_list = self.target_lens_list
示例#11
0
    def __init__(self, args, model_choice="V", test_batch_size=1):
        '''
        Instantiate a tester.
        Params:
            args: contains arguments required for the model rebuilding.
            model_choice: specify the choice of model, default to "VanillaLSTMTransModel".
                V: VanillaLSTMTransModel
                H: HierLSTMTransModel, i.e. Hierarchical LSTM Model
                A/AH: AttenHierLSTMTransModel, i.e. Hierarchical LSTM Model with Attention
            test_batch_size: the size of batch in testing (not necessarily the same as training batch size), default to 1.
        Returns:
            None.
        '''
        self.args = args
        self.args.model_choice = model_choice
        self.test_batch_size = test_batch_size

        if model_choice == "VanillaLSTMTransModel" or model_choice == "V":
            model_abbr = "V"
        elif model_choice == "AttenVanillaLSTMTransModel" or model_choice == "AV":
            model_abbr = "AV"
        elif model_choice == "HierLSTMTransModel" or model_choice == "H":
            model_abbr = "H"
        elif model_choice == "AttenHierLSTMTransModel" or model_choice == "AH":
            model_abbr = "AH"
        else:
            raise ValueError("Model choice: " + str(model_choice) +
                             " is not supported")

        self.directory = "../RUN_" + model_abbr
        try:
            with open(os.path.join(self.directory, 'args.pkl'), 'r+') as f:
                saved_args = pickle.load(f)
                saved_args.batch_size = self.test_batch_size
        except:
            raise ValueError(
                "This model is either not trained, damaged, or in a wrong path, which should be ../RUN_"
                + model_abbr + "args.pkl")

        # Instantiate a model with the saved args
        vprint(True,
               "\033[1;m" +
               "Testing. Rebuilding computation graph for the model..." +
               "\033[0;m",
               color="CYAN")
        if model_choice == "VanillaLSTMTransModel" or model_choice == "V":
            model = VanillaLSTMTransModel(saved_args, training=False)
        elif model_choice == "AttenVanillaLSTMTransModel" or model_choice == "AV":
            model = AttenVanillaLSTMTransModel(saved_args, training=False)
        elif model_choice == "HierLSTMTransModel" or model_choice == "H":
            model = HierLSTMTransModel(saved_args, training=False)
        elif model_choice + "AttenHierLSTMTransModel" or model_choice == "AH":
            model = AttenHierLSTMTransModel(saved_args, training=False)
        self.model = model

        # Instantiate a TensorFlow interactive session
        sess = tf.InteractiveSession()

        # Initiate a TensorFlow saver
        saver = tf.train.Saver(tf.global_variables())
        # Get the checkpoint file to load the model
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir=self.directory,
                                             latest_filename=None)
        # Load the model parameters into session
        vprint(True,
               "\033[1;m" + "Loading the model parameters..." + "\033[0;m",
               color="CYAN")
        saver.restore(sess, ckpt.model_checkpoint_path)

        # Link the session to Tester.
        self.sess = sess
示例#12
0
    def test(self, printout=False, if_testing=False):
        '''
        Read in a sequence and output a sequence.
        Params:
            printout: if True, print the input, target, and output words.
            if_testing: if True, use random number.
        Returns:
            None
        '''
        # For this testing session
        self.total_loss = 0.0
        self.loss_on_each_seq = []

        rf = open(os.path.join(self.directory, 'test_results.txt'),
                  'w')  # Rewrite
        rf.write("Test log file: " + self.directory + '\n')
        rf.close()

        rtf = open(os.path.join(self.directory, 'test_true_label.txt'),
                   'w')  # Rewrite
        # Acquire the model and session
        model = self.model
        sess = self.sess

        data_loader = Dataloader(batch_size=self.test_batch_size,
                                 seq_lengths=[
                                     self.args.input_seq_length,
                                     self.args.target_seq_length
                                 ],
                                 token_sizes=[
                                     self.args.input_embedding_size,
                                     self.args.target_token_size
                                 ],
                                 usage="test",
                                 if_testing=self.args.test)
        # Reset the pointers in the data loader object
        data_loader.reset()

        #self.num_batches = data_loader.get_num_batches()
        self.num_batches = 1
        rf = open(os.path.join(self.directory, 'test_results.txt'),
                  'a')  # append from EOF. Create file if not found.
        lf = open(os.path.join(self.directory, 'test_log.txt'),
                  'a')  # append from EOF. Create file if not found.
        for b in range(self.num_batches):
            test_start_time = time.time()

            # First, Make predictions
            vprint(True, "\nGetting batch.. b = " + str(b + 1), color="MAG")
            # x: input batch. It is a list of length test_batch_size, each element of which is of size input_seq_length x input_embedding_size
            # y: target batch. It is a list of length test_batch_size, each element of which is of size target_seq_length x target_token_size (=1)
            # yl: target sequences' lengths. It is a list of length test_batch_size, each element of which is an integer.
            x, y, yl = data_loader.next_batch()
            # Feed into the model and get out the prediction
            vprint(True,
                   "Got batch. Making a batch of prediction...",
                   color="MAG")
            #sess.run(tf.assign(self.model.lr, 0.0))
            feed = {
                self.model.input_data: x,
                self.model.target_data: y,
                self.model.target_lens_list: yl
            }
            # output_data is softmaxed. It is a list of length target_seq_length, each element is test_batch_size x output_vocab_size
            test_loss, output_data = sess.run(
                [self.model.cost, self.model.output_data], feed_dict=feed)

            # print "target batch y:"
            # for k in xrange(self.test_batch_size):
            #     print data_loader.data_file_path
            #     print "-- the sequence # " + str(k+1) + " in this test batch"
            #     print [list(yki) for yki in y[k]]

            # For k-th test batch, it is a sequence
            for k in xrange(self.test_batch_size):
                for i in xrange(len(y[k])):
                    yki = int(y[k][i])
                    rtf.write(str([yki]) + " ")
                rtf.write("\n")
            print("test_loss = " + str(test_loss))
            self.loss_on_each_seq.append(test_loss)
            self.total_loss += test_loss

            # Second, document the predictions
            # For k-th test batch
            for k in xrange(self.test_batch_size):
                # For i-th toekn position
                for i in xrange(len(output_data)):
                    # If too long - I do not care what the output is beyond a certain length limit
                    # Allow the output to exceed a little bit - maybe 1.2 times
                    if i > yl[k] * 1.2:
                        break
                    # word_prob_b is the probability distribution over output_vocab_size. It is of size 1 x output_vocab_size
                    word_prob = output_data[i][k]
                    #print word_prob
                    word_index = tf.argmax(word_prob, axis=0)
                    word_index_singleton = [word_index.eval()]
                    rf.write(str(word_index_singleton) + " ")
                rf.write("\n")
            rf.write("\n")
            test_end_time = time.time()
            vprint(
                True,
                "time/batch = {:.3f}".format(test_end_time - test_start_time) +
                " s",
                color=None)

            #  NOTE that during testing, batch_size = 1. i.e. test the sequences one by one.
            #  output_data is a list of length target_seq_length, each element of which is of size test_batch_size x output_vocab_size
            #  For each token position i-th
            # for i in xrange(len(output_data)):
            #     # Each output_data_item is NumPy ndarray, of size test_batch_size x output_vocab_size
            #     output_data_item = output_data[i]
            #     # Find the largest probability term as the prediction.
            #     output_data_item_index = tf.argmax(output_data_item, axis=1)
            #     # For each test batch 0-th in this token position
            #     if i >= yl[0]:
            #         break
            #     rf.write(str(output_data_item_index.eval()) + " ")
            # rf.write("\n")
        rf.close()
        rtf.close()

        vprint(
            True, "Total loss of this model is " +
            str(self.total_loss / self.num_batches))
        lf.write("Total loss: " + str(self.total_loss / self.num_batches) +
                 "\n")
        lf.write("Loss on each sequence: \n")
        for i in xrange(len(self.loss_on_each_seq)):
            lf.write(str(self.loss_on_each_seq[i]) + "\n")
        lf.close()