def Logger(q): import time all_auc = [] registered_gpus = {} logger = lib.logger.CSVLogger('results.csv', output_dir, [ 'fold', 'seq_acc', 'gnn_nuc_acc', 'bilstm_nuc_acc', 'auc', 'original_seq_acc', 'original_gnn_nuc_acc', 'original_bilstm_nuc_acc', 'original_auc' ]) while True: msg = q.get() print(msg) if type(msg) is str and msg == 'kill': logger.close() print('%s ROC AUC: %.3f\u00B1%.3f' % (TRAIN_RBP_ID, np.mean(all_auc), np.std(all_auc))) break elif type(msg) is str and msg.startswith('worker'): process_id = int(msg.split('_')[-1]) if process_id in registered_gpus: print(process_id, 'found, returning', registered_gpus[process_id]) q.put('master_%d_' % (process_id) + registered_gpus[process_id]) else: print(process_id, 'not found') all_registered_devices = list(registered_gpus.values()) from collections import Counter c1 = Counter(DEVICES) c2 = Counter(all_registered_devices) free_devices = list((c1 - c2).elements()) # free_devices = list(set(DEVICES).difference(set(all_registered_devices))) if len(free_devices) > 0: _device = np.random.choice(free_devices) print('free device', _device) q.put('master_%d_' % (process_id) + _device) registered_gpus[process_id] = _device else: print('no free device!') print(registered_gpus) q.put('master_%d_/cpu:0' % (process_id)) elif type(msg) is dict: logger.update_with_dict(msg) all_auc.append(msg['original_auc']) else: q.put(msg) time.sleep(np.random.rand() * 5)
all_means = np.concatenate(ret_dict['all_means'], axis=0) au_mean = np.mean(all_means, axis=0, keepdims=True) au_var = all_means - au_mean ns = au_var.shape[0] au_var = (au_var**2).sum(axis=0) / (ns - 1) delta = 0.01 au = (au_var >= delta).sum().item() lib.plot_utils.plot('Validation_active_units', au, index=1) lib.plot_utils.plot('Beta', beta, index=1) tocsv = {'Epoch': epoch} for name, val in lib.plot_utils._since_last_flush.items(): if lib.plot_utils._ticker_registry[name] == 1: tocsv[name] = list(val.values())[0] logger.update_with_dict(tocsv) lib.plot_utils.set_xlabel_for_tick(index=1, label='epoch') lib.plot_utils.flush() lib.plot_utils.tick(index=1) if best_valid_weight_path is not None: print('Loading best weights from: %s' % (best_valid_weight_path)) model.load_state_dict( torch.load(best_valid_weight_path)['model_weights']) model.eval() test_loss, test_pearson_corr = evaluate_regressor(test_loader) print('Test pearson corr:', test_pearson_corr) ret_dict = evaluate_posterior_decoding(test_loader)
def run_one_rbp(fold_idx, q): fold_output = os.path.join(output_dir, 'fold%d' % (fold_idx)) os.makedirs(fold_output) outfile = open(os.path.join(fold_output, str(os.getpid())) + ".out", "w") sys.stdout = outfile sys.stderr = outfile import time # todo: replace _identity with pid and let logger check if pid still alive process_id = mp.current_process()._identity[0] print('sending process id', mp.current_process()._identity[0]) q.put('worker_%d' % (process_id)) while True: msg = q.get() if type(msg) is str and msg.startswith('master'): print('worker %d received' % (process_id), msg, str(int(msg.split('_')[1]))) if int(msg.split('_')[1]) == process_id: device = msg.split('_')[-1] print('Process', mp.current_process(), 'received', device) break q.put(msg) time.sleep(np.random.rand() * 2) print('training fold', fold_idx) train_idx, test_idx = dataset['splits'][fold_idx] model = JMRT(dataset['VOCAB_VEC'].shape[1], dataset['VOCAB_VEC'], device, **hp) train_data = [dataset['seq'][train_idx], dataset['segment_size'][train_idx], dataset['raw_seq'][train_idx]] model.fit(train_data, dataset['label'][train_idx], EPOCHS, BATCH_SIZE, fold_output, logging=True) test_data = [dataset['seq'][test_idx], dataset['segment_size'][test_idx], dataset['raw_seq'][test_idx]] cost, acc, auc = model.evaluate(test_data, dataset['label'][test_idx], BATCH_SIZE, random_crop=False) print('Evaluation (with masking) on modified held-out test set, acc: %s, auc: %.3f' % (acc, auc)) original_test_data = [original_dataset['seq'][test_idx], original_dataset['segment_size'][test_idx], original_dataset['raw_seq'][test_idx]] original_cost, original_acc, original_auc = model.evaluate(original_test_data, original_dataset['label'][test_idx], BATCH_SIZE, random_crop=False) print('Evaluation (with masking) on original held-out test set, acc: %s, auc: %.3f' % (original_acc, original_auc)) # get predictions logger = lib.logger.CSVLogger('predictions.csv', fold_output, ['id', 'label', 'pred_neg', 'pred_pos']) all_pos_preds = [] all_idx = [] for idx, (_id, _label, _pred) in enumerate( zip(original_dataset['id'][test_idx], original_dataset['label'][test_idx], model.predict(original_test_data, BATCH_SIZE))): logger.update_with_dict({ 'id': _id, 'label': np.max(_label), 'pred_neg': _pred[0], 'pred_pos': _pred[1], }) if np.max(_label) == 1: all_pos_preds.append(_pred[1]) all_idx.append(idx) logger.close() # plot some motifs graph_dir = os.path.join(fold_output, 'integrated_gradients') if not os.path.exists(graph_dir): os.makedirs(graph_dir) all_pos_preds = np.array(all_pos_preds) all_idx = np.array(all_idx) # top 10 strongly predicted examples, descending order idx = all_idx[np.argsort(all_pos_preds)[::-1][:min(10, len(all_pos_preds))]] model.integrated_gradients(model.indexing_iterable(original_test_data, idx), original_dataset['label'][test_idx][idx], original_dataset['id'][test_idx][idx], save_path=graph_dir) # common ig plots idx = [] for i, _id in enumerate(dataset['id'][test_idx]): if _id in ig_ids: idx.append(i) common_graph_path = os.path.join(output_dir, 'common_integrated_gradients') if not os.path.exists(common_graph_path): os.makedirs(common_graph_path) model.integrated_gradients(model.indexing_iterable(original_test_data, idx), original_dataset['label'][test_idx][idx], original_dataset['id'][test_idx][idx], save_path=common_graph_path) model.delete() reload(lib.plot) reload(lib.logger) q.put({ 'fold': fold_idx, 'seq_acc': acc[0], 'nuc_acc': acc[1], 'auc': auc, 'original_seq_acc': original_acc[0], 'original_nuc_acc': original_acc[1], 'original_auc': original_auc })
lib.plot_utils.plot('train_pearson_corr', train_pearson_corr) lib.plot_utils.plot('valid_loss', valid_loss) lib.plot_utils.plot('valid_pearson_corr', valid_pearson_corr) lib.plot_utils.set_xlabel_for_tick(index=0, label='epoch') lib.plot_utils.flush() lib.plot_utils.tick(index=0) print( 'Epoch %d, train_loss: %.2f, train_pearson_corr: %2f, ' 'valid_loss: %.2f, valid_pearson_corr: %.2f' % (epoch, train_loss, train_pearson_corr, valid_loss, valid_pearson_corr)) logger.update_with_dict({ 'epoch': epoch, 'train_loss': train_loss, 'valid_loss': valid_loss, 'train_pearson_corr': train_pearson_corr, 'valid_pearson_corr': valid_pearson_corr }) if valid_loss < best_valid_loss: best_valid_loss = valid_loss if len(last_2_epochs) >= 2: to_remove_epoch = last_2_epochs.pop(0) os.remove(os.path.join(save_dir, "model.epoch-" + str(to_remove_epoch))) last_2_epochs.append(epoch) best_valid_weight_path = os.path.join(save_dir, "model.epoch-" + str(epoch)) torch.save( {'model_weights': model.state_dict(), 'opt_weights': optimizer.state_dict()}, best_valid_weight_path) print('Validation loss improved, saving current weights to path:', best_valid_weight_path) last_improved = 0
save_dict = { 'epoch': epoch, 'train_loss': train_loss, 'valid_loss': valid_loss } for cate_idx in range(output_size): save_dict['train_roc_score_%d' % (cate_idx)] = train_roc_auc[cate_idx] save_dict['valid_roc_score_%d' % (cate_idx)] = valid_roc_auc[cate_idx] save_dict['train_ap_score_%d' % (cate_idx)] = train_ap_score[cate_idx] save_dict['valid_ap_score_%d' % (cate_idx)] = valid_ap_score[cate_idx] logger.update_with_dict(save_dict) if valid_loss < best_valid_loss: best_valid_loss = valid_loss if len(last_5_epochs) >= 5: to_remove_epoch = last_5_epochs.pop(0) os.remove( os.path.join(save_dir, "model.epoch-" + str(to_remove_epoch))) last_5_epochs.append(epoch) best_valid_weight_path = os.path.join( save_dir, "model.epoch-" + str(epoch)) torch.save( { 'model_weights': model.state_dict(), 'opt_weights': optimizer.state_dict()
def fit(self, X, y, epochs, batch_size, output_dir, logging=False, epoch_to_start=0): checkpoints_dir = os.path.join(output_dir, 'checkpoints/') if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) # split validation set row_sum = np.array(list(map(lambda label: np.sum(label), y))) pos_idx, neg_idx = np.where(row_sum > 0)[0], np.where(row_sum == 0)[0] dev_idx = np.array(list(np.random.choice(pos_idx, int(len(pos_idx) * 0.1), False)) + \ list(np.random.choice(neg_idx, int(len(neg_idx) * 0.1), False))) train_idx = np.delete(np.arange(len(y)), dev_idx) dev_data = self.indexing_iterable(X, dev_idx) dev_targets = y[dev_idx] X = self.indexing_iterable(X, train_idx) train_targets = y[train_idx] best_dev_cost = np.inf # best_dev_auc = 0. lib.plot.set_output_dir(output_dir) if logging: logger = lib.logger.CSVLogger('run.csv', output_dir, ['epoch', 'cost', 'graph_cost', 'gnn_cost', 'bilstm_cost', 'seq_acc', 'gnn_acc', 'bilstm_acc', 'auc', 'dev_cost', 'dev_graph_cost', 'dev_gnn_cost', 'dev_bilstm_cost', 'dev_seq_acc', 'dev_gnn_acc', 'dev_bilstm_acc', 'dev_auc']) train_generator = BackgroundGenerator(X, train_targets, batch_size, random_crop=False) val_generator = BackgroundGenerator(dev_data, dev_targets, batch_size) iters_per_epoch = train_generator.iters_per_epoch for epoch in range(epoch_to_start, epochs): prepro_time = 0. training_time = 0. for i in range(iters_per_epoch): prepro_start = time.time() _node_tensor, _mask_offset, all_adj_mat, _labels = train_generator.next() feed_dict = { self.node_input_ph: _node_tensor, self.adj_mat_ph: all_adj_mat, self.labels: _labels, self.mask_offset: _mask_offset, self.global_step: i + epoch * iters_per_epoch, self.hf_iters_per_epoch: iters_per_epoch // 2, self.is_training_ph: True, } prepro_end = time.time() prepro_time += (prepro_end - prepro_start) self.sess.run(self.train_op, feed_dict) training_time += (time.time() - prepro_end) print('preprocessing time: %.4f, training time: %.4f' % (prepro_time / (i + 1), training_time / (i + 1))) train_cost, train_acc, train_auc = self.evaluate_with_generator(train_generator) lib.plot.plot('train_cost', train_cost[0]) lib.plot.plot('train_graph_cost', train_cost[1]) lib.plot.plot('train_gnn_cost', train_cost[2]) lib.plot.plot('train_bilstm_cost', train_cost[3]) lib.plot.plot('train_seq_acc', train_acc[0]) lib.plot.plot('train_gnn_acc', train_acc[1]) lib.plot.plot('train_bilstm_acc', train_acc[2]) lib.plot.plot('train_auc', train_auc) dev_cost, dev_acc, dev_auc = self.evaluate_with_generator(val_generator) lib.plot.plot('dev_cost', dev_cost[0]) lib.plot.plot('dev_graph_cost', dev_cost[1]) lib.plot.plot('dev_gnn_cost', dev_cost[2]) lib.plot.plot('dev_bilstm_cost', dev_cost[3]) lib.plot.plot('dev_seq_acc', dev_acc[0]) lib.plot.plot('dev_gnn_acc', dev_acc[1]) lib.plot.plot('dev_bilstm_acc', dev_acc[2]) lib.plot.plot('dev_auc', dev_auc) logger.update_with_dict({ 'epoch': epoch, 'cost': train_cost[0], 'graph_cost': train_cost[1], 'gnn_cost': train_cost[2], 'bilstm_cost': train_cost[3], 'seq_acc': train_acc[0], 'gnn_acc': train_acc[1], 'bilstm_acc': train_acc[2], 'auc': train_auc, 'dev_cost': dev_cost[0], 'dev_graph_cost': dev_cost[1], 'dev_gnn_cost': dev_cost[2], 'dev_bilstm_cost': dev_cost[3], 'dev_seq_acc': dev_acc[0], 'dev_gnn_acc': dev_acc[1], 'dev_bilstm_acc': dev_acc[2], 'dev_auc': dev_auc, }) lib.plot.flush() lib.plot.tick() if dev_cost[0] < best_dev_cost and epoch - epoch_to_start >= 10: # unstable loss in the beginning best_dev_cost = dev_cost[0] save_path = self.saver.save(self.sess, checkpoints_dir, global_step=epoch) print('Validation sample cost improved. Saved to path %s\n' % (save_path), flush=True) else: print('\n', flush=True) print('Loading best weights %s' % (save_path), flush=True) self.saver.restore(self.sess, save_path) if logging: logger.close() train_generator.kill.set() val_generator.kill.set() train_generator.next() val_generator.next() train_generator.join() val_generator.join()
def fit(self, X, y, epochs, batch_size, output_dir, logging=False, epoch_to_start=0, random_crop=False): checkpoints_dir = os.path.join(output_dir, 'checkpoints/') if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) # split validation set row_sum = np.array(list(map(lambda label: np.sum(label), y))) pos_idx, neg_idx = np.where(row_sum > 0)[0], np.where(row_sum == 0)[0] dev_idx = np.array(list(np.random.choice(pos_idx, int(len(pos_idx) * 0.1), False)) + \ list(np.random.choice(neg_idx, int(len(neg_idx) * 0.1), False))) train_idx = np.delete(np.arange(len(y)), dev_idx) dev_data = self.indexing_iterable(X, dev_idx) dev_targets = y[dev_idx] X = self.indexing_iterable(X, train_idx) train_targets = y[train_idx] size_train = train_targets.shape[0] iters_per_epoch = size_train // batch_size + (0 if size_train % batch_size == 0 else 1) best_dev_cost = np.inf lib.plot.set_output_dir(output_dir) if logging: logger = lib.logger.CSVLogger('run.csv', output_dir, [ 'epoch', 'cost', 'graph_cost', 'nuc_cost', 'seq_acc', 'nuc_acc', 'auc', 'dev_cost', 'dev_graph_cost', 'dev_nuc_cost', 'dev_seq_acc', 'dev_nuc_acc', 'dev_auc' ]) for epoch in range(epoch_to_start, epochs): permute = np.random.permutation(size_train) node_tensor, segment_length, raw_seq = self.indexing_iterable( X, permute) y = train_targets[permute] if random_crop: # augmentation node_tensor, segment_length, y = \ self.random_crop(node_tensor, raw_seq, y) prepro_time = 0. training_time = 0. for i in range(iters_per_epoch): prepro_start = time.time() _node_tensor, _segment, _labels \ = node_tensor[i * batch_size: (i + 1) * batch_size], \ segment_length[i * batch_size: (i + 1) * batch_size], \ y[i * batch_size: (i + 1) * batch_size] _max_len = max(_segment) _labels = np.array([ np.pad(label, [_max_len - len(label), 0], mode='constant') for label in _labels ]) feed_dict = { self.node_input_ph: np.concatenate(_node_tensor, axis=0), self.labels: _labels, self.max_len: _max_len, self.segment_length: _segment, self.global_step: i, self.hf_iters_per_epoch: iters_per_epoch // 2, self.is_training_ph: True } prepro_end = time.time() prepro_time += (prepro_end - prepro_start) self.sess.run(self.train_op, feed_dict) training_time += (time.time() - prepro_end) print('preprocessing time: %.4f, training time: %.4f' % (prepro_time / (i + 1), training_time / (i + 1))) train_cost, train_acc, train_auc = self.evaluate( X, train_targets, batch_size) lib.plot.plot('train_cost', train_cost[0]) lib.plot.plot('train_graph_cost', train_cost[1]) lib.plot.plot('train_nuc_cost', train_cost[2]) lib.plot.plot('train_seq_acc', train_acc[0]) lib.plot.plot('train_nuc_acc', train_acc[1]) lib.plot.plot('train_auc', train_auc) dev_cost, dev_acc, dev_auc = self.evaluate(dev_data, dev_targets, batch_size) lib.plot.plot('dev_cost', dev_cost[0]) lib.plot.plot('dev_graph_cost', dev_cost[1]) lib.plot.plot('dev_nuc_cost', dev_cost[2]) lib.plot.plot('dev_seq_acc', dev_acc[0]) lib.plot.plot('dev_nuc_acc', dev_acc[1]) lib.plot.plot('dev_auc', dev_auc) logger.update_with_dict({ 'epoch': epoch, 'cost': train_cost[0], 'graph_cost': train_cost[1], 'nuc_cost': train_cost[2], 'seq_acc': train_acc[0], 'nuc_acc': train_acc[1], 'auc': train_auc, 'dev_cost': dev_cost[0], 'dev_graph_cost': dev_cost[1], 'dev_nuc_cost': dev_cost[2], 'dev_seq_acc': dev_acc[0], 'dev_nuc_acc': dev_acc[1], 'dev_auc': dev_auc, }) lib.plot.flush() lib.plot.tick() if dev_cost[ 0] < best_dev_cost and epoch - epoch_to_start >= 10: # unstable loss in the beginning best_dev_cost = dev_cost[0] save_path = self.saver.save(self.sess, checkpoints_dir, global_step=epoch) print('Validation sample cost improved. Saved to path %s\n' % (save_path), flush=True) else: print('\n', flush=True) print('Loading best weights %s' % (save_path), flush=True) self.saver.restore(self.sess, save_path) if logging: logger.close()
lib.plot_utils.set_xlabel_for_tick(index=0, label='epoch') lib.plot_utils.flush() lib.plot_utils.tick(index=0) print( 'Epoch %d, train_loss: %.2f, train_acc: %.2f, train_roc_auc: %.2f, ' 'valid_loss: %.2f, valid_acc: %.2f, valid_roc_auc: %.2f' % (epoch, train_loss, train_acc, train_roc_auc, valid_loss, valid_acc, valid_roc_auc)) logger.update_with_dict({ 'epoch': epoch, 'train_loss': train_loss, 'valid_loss': valid_loss, 'train_acc': train_acc, 'valid_acc': valid_acc, 'train_roc_auc': train_roc_auc, 'valid_roc_auc': valid_roc_auc }) if valid_loss < best_valid_loss: best_valid_loss = valid_loss if len(last_5_epochs) >= 5: to_remove_epoch = last_5_epochs.pop(0) os.remove( os.path.join(save_dir, "model.epoch-" + str(to_remove_epoch))) last_5_epochs.append(epoch) best_valid_weight_path = os.path.join( save_dir, "model.epoch-" + str(epoch))