def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.b: Optimize the model and return the intermediate losses. Optimize the model using stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. In here, the gradient descent is stochastic, meaning that you don't need to use all the data at once before you update the model parameters. Instead, you update the model parameters as you iterate over the data. You must use MinibatchIndefinitelyGenerator to iterate over the data, otherwise your solution might differ from the one of the autograder. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.shuffle when you instantiate the generator. You will have to choose a proper batch size too. Useful member variables and methods: self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" train_data_generator = MinibatchIndefinitelyGenerator(train_data, 1, self.shuffle) val_data_generator = MinibatchIndefinitelyGenerator(val_data, 1, self.shuffle)
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.c: Optimize the model and return the intermediate losses. Optimize the model using minibatch stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. For minibatch stochastic gradient descent, you will need to iterate over the data in minibatches. As before, you must use MinibatchIndefinitelyGenerator to iterate over the data. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.batch_size and self.shuffle when you instantiate the generator. Useful member variables and methods: self.batch_size self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None, ) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" loss_tensor = self.get_loss_tensor( model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [ tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates ] train_losses = [] val_losses = [] trainGenerator = MinibatchIndefinitelyGenerator( train_data, self.batch_size, True) valGenerator = MinibatchIndefinitelyGenerator(val_data, self.batch_size, True) for iter_ in range(self.iterations): # train_loss should be the loss of this iteration using only the training data that was used for the updates # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead trainData = trainGenerator.next() valData = valGenerator.next() feed_dict_train = {} for i in range(len(trainData)): # print(trainGenerator.next()) tG1 = trainData[i] feed_dict_train[placeholders[i]] = tG1 train_loss = session.run(loss_tensor, feed_dict_train) session.run(update_ops, feed_dict_train) feed_dict_val = {} for i in range(len(valData)): vG1 = valData[i] feed_dict_val[placeholders[i]] = vG1 val_loss = session.run(loss_tensor, feed_dict_val) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.b: Optimize the model and return the intermediate losses. Optimize the model using stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. In here, the gradient descent is stochastic, meaning that you don't need to use all the data at once before you update the model parameters. Instead, you update the model parameters as you iterate over the data. You must use MinibatchIndefinitelyGenerator to iterate over the data, otherwise your solution might differ from the one of the autograder. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.shuffle when you instantiate the generator. You will have to choose a proper batch size too. Useful member variables and methods: self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None, ) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" loss_tensor = self.get_loss_tensor( model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [ tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates ] train_losses = [] val_losses = [] trainGenerator = MinibatchIndefinitelyGenerator(train_data, 1, True) valGenerator = MinibatchIndefinitelyGenerator(val_data, 1, True) for iter_ in xrange(self.iterations): "*** YOUR CODE HERE ***" # train_loss should be the loss of this iteration using only the training data that was used for the updates # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead #for data in xrange(num_train_batches): # iterations is the num of gradient updates! """ # should only call .next() once! .next() changes the dataset train_loss = session.run(loss_tensor, feed_dict={placeholders[0]: batch_train.next()[0], placeholders[1]: batch_train.next()[1]}) session.run(update_ops, feed_dict={placeholders[0]: batch_train.next()[0], placeholders[1]: batch_train.next()[1]}) val_loss = session.run(loss_tensor, feed_dict={placeholders[0]: batch_val.next()[0], placeholders[1]: batch_val.next()[1]}) """ tG1, tG2 = trainGenerator.next() vG1, vG2 = valGenerator.next() train_loss = session.run(loss_tensor, feed_dict={ placeholders[0]: tG1, placeholders[1]: tG2 }) session.run(update_ops, feed_dict={ placeholders[0]: tG1, placeholders[1]: tG2 }) val_loss = session.run(loss_tensor, feed_dict={ placeholders[0]: vG1, placeholders[1]: vG2 }) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.c: Optimize the model and return the intermediate losses. Optimize the model using minibatch stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. For minibatch stochastic gradient descent, you will need to iterate over the data in minibatches. As before, you must use MinibatchIndefinitelyGenerator to iterate over the data. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.batch_size and self.shuffle when you instantiate the generator. Useful member variables and methods: self.batch_size self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] train_data_generator = MinibatchIndefinitelyGenerator(train_data, self.batch_size, self.shuffle) val_data_generator = MinibatchIndefinitelyGenerator(val_data, self.batch_size, self.shuffle) for iter_ in range(self.iterations): train_dict = dict(zip(placeholders, train_data_generator.next())) val_dict = dict(zip(placeholders, val_data_generator.next())) train_loss, _ = session.run([loss_tensor, update_ops], feed_dict=train_dict) val_loss = session.run(loss_tensor, feed_dict=val_dict) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses
def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.b: Optimize the model and return the intermediate losses. Optimize the model using stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. In here, the gradient descent is stochastic, meaning that you don't need to use all the data at once before you update the model parameters. Instead, you update the model parameters as you iterate over the data. You must use MinibatchIndefinitelyGenerator to iterate over the data, otherwise your solution might differ from the one of the autograder. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.shuffle when you instantiate the generator. You will have to choose a proper batch size too. Useful member variables and methods: self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" train_data_generator = MinibatchIndefinitelyGenerator(train_data, 1, self.shuffle) val_data_generator = MinibatchIndefinitelyGenerator(val_data, 1, self.shuffle) loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" train = train_data_generator.next() val = val_data_generator.next() train_loss = session.run([loss_tensor] + update_ops, feed_dict = {model.input_ph: train[0], target_ph: train[1]})[0] val_loss = session.run(loss_tensor, feed_dict = {model.input_ph: val[0], target_ph: val[1]}) # train_loss should be the loss of this iteration using only the training data that was used for the updates # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses