hyper_dim=int(model_weight[11]), eps=eps)) n_outlier = len(outliers) if n_outlier <= 50: outlier_list.append(outliers) break np.save('outlier_list.npy', np.array(outlier_list)) outlier_list_uni, outlier_count = np.unique(np.hstack(outlier_list), return_counts=True) outlier_list_ulti = outlier_list_uni[np.where(outlier_count > 1)] print('Writing pdb files') # write the pdb according the outlier indices traj_info = open('./scheduler_logs/openmm_log.txt', 'r').read().split() traj_dict = dict(zip(traj_info[::2], np.array(traj_info[1::2]).astype(int))) outliers_pdb = os.path.join(work_dir, 'outlier_pdbs') make_dir_p(outliers_pdb) for outlier in outlier_list_ulti: traj_file, frame_number = find_frame(traj_dict, outlier) outlier_pdb_file = os.path.join( outliers_pdb, '%d_%s_%d.pdb' % (outlier, traj_file[:18], frame_number)) outlier_pdb = write_pdb_frame(traj_file, pdb_file, frame_number, outlier_pdb_file) print('Finishing and cleaning up the jobs. ') subprocess.Popen('bash prerun_clean.sh'.split(" "))
if n_outlier <= 50: outlier_list.append(outliers) break outlier_list_uni, outlier_count = np.unique(np.hstack(outlier_list), return_counts=True) print('\nPreparing to write new pdb files') # write the pdb according the outlier indices traj_info = open('./scheduler_logs/openmm_log.txt', 'r').read().split() traj_dict = dict(zip(traj_info[::2], np.array(traj_info[1::2]).astype(int))) # Write the new outliers n_outlier_iter = 0 new_outlier_list = [] for outlier in outlier_list_uni: traj_file, num_frame = find_frame(traj_dict, outlier) if num_frame == 0: print('Detected initial point as outlier, skipping...') continue outlier_pdb_file = os.path.join(outliers_pdb_path, '{}_{:06d}.pdb'.format(traj_file[:18], num_frame)) new_outlier_list.append(outlier_pdb_file) if outlier_pdb_file not in outlier_pdb_files: print('Found a new outlier# {} at frame {} of {}'.format(outlier, num_frame, traj_file)) outlier_pdb = write_pdb_frame(traj_file, pdb_file, num_frame, outlier_pdb_file) print(' Written as {}'.format(outlier_pdb_file)) outlier_pdb_files.append(outlier_pdb_file) n_outlier_iter += 1 for outlier_pdb_file in outlier_pdb_files: if outlier_pdb_file not in new_outlier_list: print('Old outlier {} is now connected to a cluster and removing it from the outlier list '.format(outlier_pdb_file[-29:]))
outlier_list_ranked, _ = outliers_from_latent_loc(cm_predict, n_outliers=n_outliers, n_jobs=12) print("Done outlier searching...") # print(outlier_list_ranked) # Write the outliers using MDAnalysis outliers_pdb_path = os.path.abspath('./outlier_pdbs') os.makedirs(outliers_pdb_path, exist_ok=True) print('Writing outliers in %s' % outliers_pdb_path) # identify new outliers new_outliers_list = [] for outlier in outlier_list_ranked: # find the location of outlier traj_dir, num_frame = find_frame(traj_dict, outlier) traj_file = os.path.join(traj_dir, 'output.dcd') # get the outlier name - traj_label + frame number run_name = os.path.basename(traj_dir) pdb_name = f"{run_name}_{num_frame:06}.pdb" outlier_pdb_file = os.path.join(outliers_pdb_path, pdb_name) new_outliers_list.append(outlier_pdb_file) # Only write new pdbs to reduce I/O redundancy. if not os.path.exists(outlier_pdb_file): print(f'New outlier at frame {num_frame} of {run_name}') outlier_pdb = write_pdb_frame(traj_file, pdb_file, num_frame, outlier_pdb_file) # Clean up outdated outliers (just for bookkeeping) outliers_list = glob(os.path.join(outliers_pdb_path, 'omm_runs*.pdb'))