grad = T.grad(ls, params, disconnected_inputs='warn') grad_fn = T.function([network.input_layer.input_var, out_var], grad, allow_input_downcast=True) loss_fn = T.function([network.input_layer.input_var, out_var], loss, allow_input_downcast=True) out = T.function([network.input_layer.input_var], prediction, allow_input_downcast=True) Xs, Ys = synthesize_data(dim, batch_size, tasks, seed=seed) train(train_fn, np.concatenate(Xs), np.concatenate(Ys), init_epochs) print('------- initial training complete ---------------') init_param_value = np.copy(network.get_param_values()) #Xs, Ys = synthesize_data(dim, 2000, tasks) task_grads = [] task_grad_samples = [] for i in range(len(Xs)): task_grads.append([]) task_grad_samples.append([]) total_grads = [] net_weight_values = [] #for i in range(epochs): # net_weight_values.append(network.get_param_values()) # train(train_fn, np.concatenate(Xs), np.concatenate(Ys), 1) epochs = 1 for i in range(epochs):
loss = loss.mean() params = network.get_params(trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=0.0005) train_fn = T.function([network.input_layer.input_var, out_var], loss, updates=updates, allow_input_downcast=True) ls = TT.mean((prediction - out_var)**2) grad = T.grad(ls, params, disconnected_inputs='warn') grad_fn = T.function([network.input_layer.input_var, out_var], grad, allow_input_downcast=True) loss_fn = T.function([network.input_layer.input_var, out_var], loss, allow_input_downcast=True) out = T.function([network.input_layer.input_var], prediction, allow_input_downcast=True) if not load_init_policy: losses=train(train_fn, np.concatenate(trainingXs), np.concatenate(trainingYs), init_epochs) joblib.dump(network, 'data/trained/gradient_temp/supervised_split_' + append + '/init_network.pkl', compress=True) print('------- initial training complete ---------------') init_param_value = np.copy(network.get_param_values()) #Xs, Ys = synthesize_data(dim, 2000, tasks) task_grads = [] for i in range(len(trainingXs)): task_grads.append([]) if not load_split_data: net_weight_values = [] for i in range(epochs): net_weight_values.append(network.get_param_values()) train(train_fn, np.concatenate(trainingXs), np.concatenate(trainingYs), 1) joblib.dump(net_weight_values, 'data/trained/gradient_temp/supervised_split_' + append + '/net_weight_values.pkl', compress=True) else: net_weight_values = joblib.load('data/trained/gradient_temp/supervised_split_' + append + '/net_weight_values.pkl') for i in range(epochs): network.set_param_values(net_weight_values[i])