def training(num_batches, batch_size, x_train, label_train, mask_train): for i in range(num_batches): idx = range(i * batch_size, (i + 1) * batch_size) x_batch = x_train[idx] y_batch = label_train[idx] mask_batch = mask_train[idx] loss, out, batch_norm = train(x_batch, y_batch, mask_batch) norms.append(batch_norm) preds.append(out) losses.append(loss) predictions = np.concatenate(preds, axis=0) loss_train = np.mean(losses) all_losses_train.append(loss_train) acc_train = utils.proteins_acc(predictions, label_train[0:num_batches * batch_size], mask_train[0:num_batches * batch_size]) print('acc_train: ', acc_train) all_accuracy_train.append(acc_train) mean_norm = np.mean(norms) all_mean_norm.append(mean_norm) print " average training loss: %.5f" % loss_train print " average training accuracy: %.5f" % acc_train print " average norm: %.5f" % mean_norm
def testing(num_batches, batch_size, X, y, mask): for i in range(num_batches): idx = range(i * batch_size, (i + 1) * batch_size) x_batch = X[idx] y_batch = y[idx] mask_batch = mask[idx] loss, out = evaluate(x_batch, y_batch, mask_batch) preds.append(out) losses.append(loss) predictions = np.concatenate(preds, axis=0) loss_eval = np.mean(losses) all_losses.append(loss_eval) acc_eval = utils.proteins_acc(predictions, y, mask) all_accuracy.append(acc_eval) print("Average evaluation loss ({}): {:.5f}".format(subset, loss_eval)) print("Average evaluation accuracy ({}): {:.5f}".format( subset, acc_eval)) return i
def validate(sess): gen = data_gen.gen_valid valid_masks = [] valid_outs = [] valid_targets = [] sum = 0 for batch, i in gen(): valid_fetches = [prediction] valid_feed_dict = { X_input: batch['X'], t_input: batch['t'], X_length: batch['length'], t_mask: batch['mask'], is_training_pl: False } valid_out = sess.run(fetches=valid_fetches, feed_dict=valid_feed_dict)[0] h_out = np.zeros((i, 700, 8), dtype="float32") h_out[:, :valid_out.shape[1], :] = valid_out h_mask = np.zeros((i, 700), dtype="float32") h_mask[:, :valid_out.shape[1]] = batch['mask'] h_targets = np.zeros((i, 700), dtype="int32") h_targets[:, :valid_out.shape[1]] = batch['t'] valid_masks.append(h_mask) valid_targets.append(h_targets) valid_outs.append(h_out) sum += i valid_outs = np.concatenate(valid_outs, axis=0)[:sum] valid_targets = np.concatenate(valid_targets, axis=0)[:sum] valid_masks = np.concatenate(valid_masks, axis=0)[:sum] valid_accs = utils.proteins_acc(valid_outs, valid_targets, valid_masks) print(" valid_accs,", valid_accs) sum_fetches = [val_summaries, global_step] sum_feed_dict = { valid_accs_pl: valid_accs, } summaries, i = sess.run(sum_fetches, sum_feed_dict) summary_writer.add_summary(summaries, i)
import utils if len(sys.argv) < 2: sys.exit("Usage: python eval_avrg.py <predictions_path> [subset=test]") predictions_path_all = glob.glob(sys.argv[1] + "*") mybool = False for predictions_path in predictions_path_all: print(predictions_path) if not mybool: predictions = np.load(predictions_path) mybool = True else: predictions = predictions + np.load(predictions_path) import Data_Manipulator if len(sys.argv) == 3: subset = sys.argv[2] assert subset in ['train', 'valid', 'test', 'test_valid'] else: subset = 'test' if subset == "test": _, mask, y, _ = Data_Manipulator.get_test() acc = utils.proteins_acc(predictions, y, mask) print "Accuracy (%s) is: %.5f" % (subset, acc)
else: subset = 'test' if subset == "test": _, mask, y, _ = data.get_test() elif subset == "train": y = data.labels_train mask = data.mask_train elif subset == "train_valid": y = data.labels mask = data.mask else: y = data.labels_valid mask = data.mask_valid acc = utils.proteins_acc(predictions, y, mask) print "Accuracy (%s) is: %.5f" % (subset,acc) ## Alternative model avrg!! ## john = np.zeros((640,700,8)) for predictions_path in predictions_path_all: print(predictions_path) predictions = np.load(predictions_path)#.ravel() predictions = np.argmax(predictions, axis=2) for i in range(640): for j in range(700): num = predictions[i, j] john[i, j, num] = john[i, j, num] + 1
nn.layers.set_all_param_values(l_out, metadata['param_values']) print "Compile functions" predict = theano.function([sym_x, sym_mask], inference) print "Predict" predictions = [] batch_size = config.batch_size num_batches = np.size(X, axis=0) // batch_size for i in range(num_batches): idx = range(i * batch_size, (i + 1) * batch_size) x_batch = X[idx] mask_batch = mask[idx] p = predict(x_batch, mask_batch) predictions.append(p) predictions = np.concatenate(predictions, axis=0) predictions_path = os.path.join( "predictions", os.path.basename(metadata_path).replace("dump_", "predictions_").replace( ".pkl", ".npy")) print(utils.proteins_acc(predictions, data.labels_test, data.mask_test)) print "Storing predictions in %s" % predictions_path np.save(predictions_path, predictions)
def main(): sym_y = T.imatrix('target_output') sym_mask = T.matrix('mask') sym_x = T.tensor3() TOL = 1e-5 num_epochs = config.epochs batch_size = config.batch_size #### DATA #### # print "@@@@TESTING@@@@" # l_in = nn.layers.InputLayer(shape=(None, 700, 42)) # l_dim_a = nn.layers.DimshuffleLayer( # l_in, (0,2,1)) # l_conv_a = nn.layers.Conv1DLayer( # incoming=l_dim_a, num_filters=42, border_mode='same', # filter_size=3, stride=1, nonlinearity=nn.nonlinearities.rectify) # l_dim_b = nn.layers.DimshuffleLayer( # l_conv_a, (0,2,1)) # out = nn.layers.get_output(l_dim_b, sym_x) # testvar = np.ones((128, 700, 42)).astype('float32') # print "@@@@EVAL@@@@" # john = out.eval({sym_x: testvar}) # print("Johns shape") # print(john.shape) print("Building network ...") ##########################DEBUG########################## l_in, l_out = config.build_model() ##########################DEBUG########################## all_layers = nn.layers.get_all_layers(l_out) num_params = nn.layers.count_params(l_out) print(" number of parameters: %d" % num_params) print(" layer output shapes:") for layer in all_layers: name = layer.__class__.__name__ print(" %s %s" % (name, nn.layers.get_output_shape(layer))) print("Creating cost function") # lasagne.layers.get_output produces a variable for the output of the net out_train = nn.layers.get_output( l_out, sym_x, deterministic=False) # testvar = np.ones((128, 700, 42)).astype('float32') # john = out_train.eval({sym_x: testvar}) # print("@@@@@JOHN@@@@@") # print(john.shape) # print(john.reshape((-1, num_classes)).shape) print("Creating eval function") out_eval = nn.layers.get_output( l_out, sym_x, deterministic=True) probs_flat = out_train.reshape((-1, num_classes)) lambda_reg = config.lambda_reg all_params = nn.layers.get_all_params(l_out) for i, p in enumerate(all_params): if p.ndim == 3: values = p.get_value() if side == 'right': values[..., int(values.shape[2] / 2.0 - 0.5):] = 0 p.set_value(values) all_params[i] = p[..., : int(values.shape[2] / 2.0 - 0.5)] else: values[..., : int(values.shape[2] / 2.0 + 0.5)] = 0 p.set_value(values) all_params[i] = p[..., int(values.shape[2] / 2.0 + 0.5):] params = [el for el in all_params if el.name == "W" or el.name == "gamma"] reg_term = sum(T.sum(p ** 2) for p in params) cost = T.nnet.categorical_crossentropy(T.clip(probs_flat, TOL, 1 - TOL), sym_y.flatten()) cost = T.sum(cost * sym_mask.flatten()) / T.sum(sym_mask) + lambda_reg * reg_term # Retrieve all parameters from the network all_params = [el for el in all_params if el.name == "W" or el.name == "gamma" or el.name == "beta"] # Setting the weights if hasattr(config, 'set_weights'): nn.layers.set_all_param_values(l_out, config.set_weights()) # Compute SGD updates for training print("Computing updates ...") if hasattr(config, 'learning_rate_schedule'): learning_rate_schedule = config.learning_rate_schedule # Import learning rate schedule else: learning_rate_schedule = {0: config.learning_rate} learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) all_grads = T.grad(cost, all_params) cut_norm = config.cut_grad updates, norm_calc = nn.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True) if optimizer == "rmsprop": updates = nn.updates.rmsprop(updates, all_params, learning_rate) elif optimizer == "adadelta": updates = nn.updates.adadelta(updates, all_params, learning_rate) elif optimizer == "adagrad": updates = nn.updates.adagrad(updates, all_params, learning_rate) elif optimizer == "nag": momentum_schedule = config.momentum_schedule momentum = theano.shared(np.float32(momentum_schedule[0])) updates = nn.updates.nesterov_momentum(updates, all_params, learning_rate, momentum) else: sys.exit("please choose either <rmsprop/adagrad/adadelta/nag> in configfile") # Theano functions for training and computing cost print ("config.batch_size %d" % batch_size) print ("data.num_classes %d" % num_classes) if hasattr(config, 'build_model'): print("has build model") print("Compiling train ...") # Use this for training (see deterministic = False above) train = theano.function( [sym_x, sym_y, sym_mask], [cost, out_train, norm_calc], updates=updates) print("Compiling eval ...") # use this for eval (deterministic = True + no updates) eval = theano.function([sym_x, sym_y, sym_mask], [cost, out_eval]) # Start timers start_time = time.time() prev_time = start_time all_losses_train = [] all_accuracy_train = [] all_losses_eval_train = [] all_losses_eval_valid = [] all_losses_eval_test = [] all_accuracy_eval_train = [] all_accuracy_eval_valid = [] all_accuracy_eval_test = [] all_mean_norm = [] import data X_train, X_valid, y_train, y_valid, mask_train, mask_valid, num_seq_train \ = data.get_train() X_train, X_valid = X_train[..., 21:], X_valid[..., 21:] # Only train with pssm scores print("y shape") print(y_valid.shape) print("X shape") print(X_valid.shape) # Start training for i in range(y_train.shape[0]): for j in range(y_train.shape[1]): if y_train[i][j] == 5: y_train[i][j] = 1 else: y_train[i][j] = 0 for i in range(y_valid.shape[0]): for j in range(y_valid.shape[1]): if y_valid[i][j] == 5: y_valid[i][j] = 1 else: y_valid[i][j] = 0 for epoch in range(num_epochs): if (epoch % 10) == 0: print ("Epoch %d of %d" % (epoch + 1, num_epochs)) if epoch in learning_rate_schedule: lr = np.float32(learning_rate_schedule[epoch]) print (" setting learning rate to %.7f" % lr) learning_rate.set_value(lr) if optimizer == "nag": if epoch in momentum_schedule: mu = np.float32(momentum_schedule[epoch]) print (" setting learning rate to %.7f" % mu) momentum.set_value(mu) # print "Shuffling data" seq_names = np.arange(0, num_seq_train) np.random.shuffle(seq_names) X_train = X_train[seq_names] y_train = y_train[seq_names] mask_train = mask_train[seq_names] num_batches = num_seq_train // batch_size losses = [] preds = [] norms = [] for i in range(num_batches): idx = range(i * batch_size, (i + 1) * batch_size) x_batch = X_train[idx] y_batch = y_train[idx] mask_batch = mask_train[idx] loss, out, batch_norm = train(x_batch, y_batch, mask_batch) # print(batch_norm) norms.append(batch_norm) preds.append(out) losses.append(loss) # if ((i+1) % config.write_every_batch == 0) | (i == 0): # if i == 0: # start_place = 0 # else: # start_place = i-config.write_every_batch # print "Batch %d of %d" % (i + 1, num_batches) # print " curbatch training loss: %.5f" % np.mean(losses[start_place:(i+1)]) # print " curbatch training acc: %.5f" % np.mean(accuracy[start_place:(i+1)]) predictions = np.concatenate(preds, axis=0) loss_train = np.mean(losses) all_losses_train.append(loss_train) acc_train = utils.proteins_acc(predictions, y_train[0:num_batches * batch_size], mask_train[0:num_batches * batch_size]) all_accuracy_train.append(acc_train) mean_norm = np.mean(norms) all_mean_norm.append(mean_norm) if 1 == 1: print (" average training loss: %.5f" % loss_train) print (" average training accuracy: %.5f" % acc_train) print (" average norm: %.5f" % mean_norm) sets = [ # ('train', X_train, y_train, mask_train, all_losses_eval_train, all_accuracy_eval_train), ('valid', X_valid, y_valid, mask_valid, all_losses_eval_valid, all_accuracy_eval_valid)] for subset, X, y, mask, all_losses, all_accuracy in sets: print (" validating: %s loss" % subset) preds = [] num_batches = np.size(X, axis=0) // config.batch_size for i in range(num_batches): ## +1 to get the "rest" # print(i) idx = range(i * batch_size, (i + 1) * batch_size) x_batch = X[idx] y_batch = y[idx] mask_batch = mask[idx] loss, out = eval(x_batch, y_batch, mask_batch) preds.append(out) # acc = utils.proteins_acc(out, y_batch, mask_batch) losses.append(loss) # accuracy.append(acc) predictions = np.concatenate(preds, axis=0) # print " pred" # print(predictions.shape) # print(predictions.dtype) loss_eval = np.mean(losses) all_losses.append(loss_eval) # acc_eval = np.mean(accuracy) acc_eval = utils.proteins_acc(predictions, y, mask) all_accuracy.append(acc_eval) print (" average evaluation loss (%s): %.5f" % (subset, loss_eval)) print (" average evaluation accuracy (%s): %.5f" % (subset, acc_eval)) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_prev * (num_epochs - epoch) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print (" %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)) print (" estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)) print() if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0): print (" saving parameters and metadata") with open((metadata_path + side + "-%d" % (epoch) + ".pkl"), 'wb') as f: pickle.dump({ 'config_name': config_name, 'param_values': nn.layers.get_all_param_values(l_out), 'losses_train': all_losses_train, 'accuracy_train': all_accuracy_train, 'losses_eval_train': all_losses_eval_train, 'losses_eval_valid': all_losses_eval_valid, 'losses_eval_test': all_losses_eval_test, 'accuracy_eval_valid': all_accuracy_eval_valid, 'accuracy_eval_train': all_accuracy_eval_train, 'accuracy_eval_test': all_accuracy_eval_test, 'mean_norm': all_mean_norm, 'time_since_start': time_since_start, 'i': i, }, f, pickle.HIGHEST_PROTOCOL) print (" stored in %s" % metadata_path) print()
def main(): sym_y = T.imatrix('target_output') sym_mask = T.matrix('mask') sym_x = T.tensor3() TOL = 1e-5 num_epochs = config.epochs batch_size = config.batch_size #### DATA #### # print "@@@@TESTING@@@@" # l_in = nn.layers.InputLayer(shape=(None, 700, 42)) # l_dim_a = nn.layers.DimshuffleLayer( # l_in, (0,2,1)) # l_conv_a = nn.layers.Conv1DLayer( # incoming=l_dim_a, num_filters=42, border_mode='same', # filter_size=3, stride=1, nonlinearity=nn.nonlinearities.rectify) # l_dim_b = nn.layers.DimshuffleLayer( # l_conv_a, (0,2,1)) # out = nn.layers.get_output(l_dim_b, sym_x) # testvar = np.ones((128, 700, 42)).astype('float32') # print "@@@@EVAL@@@@" # john = out.eval({sym_x: testvar}) # print("Johns shape") # print(john.shape) print("Building network ...") l_in, l_out = config.build_model() all_layers = nn.layers.get_all_layers(l_out) num_params = nn.layers.count_params(l_out) print(" number of parameters: %d" % num_params) print(" layer output shapes:") for layer in all_layers: name = string.ljust(layer.__class__.__name__, 32) print(" %s %s" % (name, nn.layers.get_output_shape(layer))) print("Creating cost function") # lasagne.layers.get_output produces a variable for the output of the net out_train = nn.layers.get_output( l_out, sym_x, mask=sym_mask, deterministic=False) # testvar = np.ones((128, 700, 42)).astype('float32') # john = out_train.eval({sym_x: testvar}) # print("@@@@@JOHN@@@@@") # print(john.shape) # print(john.reshape((-1, num_classes)).shape) out_eval = nn.layers.get_output( l_out, sym_x, mask=sym_mask, deterministic=True) probs_flat = out_train.reshape((-1, num_classes)) lambda_reg = config.lambda_reg params = nn.layers.get_all_params(l_out, regularizable=True) reg_term = sum(T.sum(p**2) for p in params) cost = T.nnet.categorical_crossentropy(T.clip(probs_flat, TOL, 1-TOL), sym_y.flatten()) cost = T.sum(cost*sym_mask.flatten()) / T.sum(sym_mask) + lambda_reg * reg_term # Retrieve all parameters from the network all_params = nn.layers.get_all_params(l_out, trainable=True) # Setting the weights if hasattr(config, 'set_weights'): nn.layers.set_all_param_values(l_out, config.set_weights()) # Compute SGD updates for training print("Computing updates ...") if hasattr(config, 'learning_rate_schedule'): learning_rate_schedule = config.learning_rate_schedule # Import learning rate schedule else: learning_rate_schedule = { 0: config.learning_rate } learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) all_grads = T.grad(cost, all_params) cut_norm = config.cut_grad updates, norm_calc = nn.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True) if optimizer == "rmsprop": updates = nn.updates.rmsprop(updates, all_params, learning_rate) elif optimizer == "adadelta": updates = nn.updates.adadelta(updates, all_params, learning_rate) elif optimizer == "adagrad": updates = nn.updates.adagrad(updates, all_params, learning_rate) elif optimizer == "nag": momentum_schedule = config.momentum_schedule momentum = theano.shared(np.float32(momentum_schedule[0])) updates = nn.updates.nesterov_momentum(updates, all_params, learning_rate, momentum) else: sys.exit("please choose either <rmsprop/adagrad/adadelta/nag> in configfile") # Theano functions for training and computing cost print "config.batch_size %d" %batch_size print "data.num_classes %d" %num_classes if hasattr(config, 'build_model'): print("has build model") print("Compiling functions ...") # Use this for training (see deterministic = False above) train = theano.function( [sym_x, sym_y, sym_mask], [cost, out_train, norm_calc], updates=updates) # use this for eval (deterministic = True + no updates) eval = theano.function([sym_x, sym_y, sym_mask], [cost, out_eval]) # Start timers start_time = time.time() prev_time = start_time all_losses_train = [] all_accuracy_train = [] all_losses_eval_train = [] all_losses_eval_valid = [] all_losses_eval_test = [] all_accuracy_eval_train = [] all_accuracy_eval_valid = [] all_accuracy_eval_test = [] all_mean_norm = [] import data X_train = data.X_train X_valid = data.X_valid X_test = data.X_test y_train = data.labels_train y_valid = data.labels_valid y_test = data.labels_test mask_train = data.mask_train mask_valid = data.mask_valid mask_test = data.mask_test print("y shape") print(y_valid.shape) print("X shape") print(X_valid.shape) # Start training if config.batch_norm: collect_out = nn.layers.get_output(l_out, sym_x, deterministic=True, collect=True) f_collect = theano.function([sym_x], [collect_out]) for epoch in range(num_epochs): if (epoch % 10) == 0: print "Epoch %d of %d" % (epoch + 1, num_epochs) if epoch in learning_rate_schedule: lr = np.float32(learning_rate_schedule[epoch]) print " setting learning rate to %.7f" % lr learning_rate.set_value(lr) if optimizer == "nag": if epoch in momentum_schedule: mu = np.float32(momentum_schedule[epoch]) print " setting learning rate to %.7f" % mu momentum.set_value(mu) print "Shuffling data" seq_names = np.arange(0,data.num_seq_train) np.random.shuffle(seq_names) X_train = X_train[seq_names] y_train = y_train[seq_names] mask_train = mask_train[seq_names] num_batches = data.num_seq_train // batch_size losses = [] preds = [] norms = [] for i in range(num_batches): idx = range(i*batch_size, (i+1)*batch_size) x_batch = X_train[idx] y_batch = y_train[idx] mask_batch = mask_train[idx] loss, out, batch_norm = train(x_batch, y_batch, mask_batch) print(batch_norm) norms.append(batch_norm) preds.append(out) losses.append(loss) # if ((i+1) % config.write_every_batch == 0) | (i == 0): # if i == 0: # start_place = 0 # else: # start_place = i-config.write_every_batch # print "Batch %d of %d" % (i + 1, num_batches) # print " curbatch training loss: %.5f" % np.mean(losses[start_place:(i+1)]) # print " curbatch training acc: %.5f" % np.mean(accuracy[start_place:(i+1)]) predictions = np.concatenate(preds, axis = 0) loss_train = np.mean(losses) all_losses_train.append(loss_train) acc_train = utils.proteins_acc(predictions, y_train[0:num_batches*batch_size], mask_train[0:num_batches*batch_size]) all_accuracy_train.append(acc_train) mean_norm = np.mean(norms) all_mean_norm.append(mean_norm) if 1==1: print " average training loss: %.5f" % loss_train print " average training accuracy: %.5f" % acc_train print " average norm: %.5f" % mean_norm if 1==1:#(i + 1) % config.validate_every == 0: if config.batch_norm: _ = f_collect(X_train) sets = [#('train', X_train, y_train, mask_train, all_losses_eval_train, all_accuracy_eval_train), ('valid', X_valid, y_valid, mask_valid, all_losses_eval_valid, all_accuracy_eval_valid), ('test', X_test, y_test, mask_test, all_losses_eval_test, all_accuracy_eval_test)] for subset, X, y, mask, all_losses, all_accuracy in sets: print " validating: %s loss" % subset preds = [] num_batches = np.size(X,axis=0) // config.batch_size for i in range(num_batches): ## +1 to get the "rest" print(i) idx = range(i*batch_size, (i+1)*batch_size) x_batch = X[idx] y_batch = y[idx] mask_batch = mask[idx] loss, out = eval(x_batch, y_batch, mask_batch) preds.append(out) # acc = utils.proteins_acc(out, y_batch, mask_batch) losses.append(loss) # accuracy.append(acc) predictions = np.concatenate(preds, axis = 0) print " pred" print(predictions.shape) print(predictions.dtype) loss_eval = np.mean(losses) all_losses.append(loss_eval) # acc_eval = np.mean(accuracy) acc_eval = utils.proteins_acc(predictions, y, mask) all_accuracy.append(acc_eval) # print " average evaluation loss (%s): %.5f" % (subset, loss_eval) print " average evaluation accuracy (%s): %.5f" % (subset, acc_eval) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * num_epochs eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0): print " saving parameters and metadata" with open((metadata_path + "-%d" % (epoch) + ".pkl"), 'w') as f: pickle.dump({ 'config_name': config_name, 'param_values': nn.layers.get_all_param_values(l_out), 'losses_train': all_losses_train, 'accuracy_train': all_accuracy_train, 'losses_eval_train': all_losses_eval_train, 'losses_eval_valid': all_losses_eval_valid, 'losses_eval_test': all_losses_eval_test, 'accuracy_eval_valid': all_accuracy_eval_valid, 'accuracy_eval_train': all_accuracy_eval_train, 'accuracy_eval_test': all_accuracy_eval_test, 'mean_norm' : all_mean_norm, 'time_since_start': time_since_start, 'i': i, }, f, pickle.HIGHEST_PROTOCOL) print " stored in %s" % metadata_path print
predictions = [] batch_size = config.batch_size num_batches = np.size(X,axis=0) // batch_size for i in range(num_batches): idx = range(i*batch_size, (i+1)*batch_size) x_batch = X[idx] mask_batch = mask[idx] p = predict(x_batch, mask_batch) predictions.append(p) predictions = np.concatenate(predictions, axis = 0) predictions_path = os.path.join("predictions", os.path.basename(metadata_path).replace("dump_", "predictions_").replace(".pkl", ".npy")) print(utils.proteins_acc(predictions, data.labels_test, data.mask_test)) print "Storing predictions in %s" % predictions_path np.save(predictions_path, predictions)