예제 #1
0
                               hyper_dim=int(model_weight[11]),
                               eps=eps))
        n_outlier = len(outliers)
        if n_outlier <= 50:
            outlier_list.append(outliers)
            break

np.save('outlier_list.npy', np.array(outlier_list))
outlier_list_uni, outlier_count = np.unique(np.hstack(outlier_list),
                                            return_counts=True)
outlier_list_ulti = outlier_list_uni[np.where(outlier_count > 1)]

print('Writing pdb files')
# write the pdb according the outlier indices
traj_info = open('./scheduler_logs/openmm_log.txt', 'r').read().split()

traj_dict = dict(zip(traj_info[::2], np.array(traj_info[1::2]).astype(int)))

outliers_pdb = os.path.join(work_dir, 'outlier_pdbs')
make_dir_p(outliers_pdb)

for outlier in outlier_list_ulti:
    traj_file, frame_number = find_frame(traj_dict, outlier)
    outlier_pdb_file = os.path.join(
        outliers_pdb, '%d_%s_%d.pdb' % (outlier, traj_file[:18], frame_number))
    outlier_pdb = write_pdb_frame(traj_file, pdb_file, frame_number,
                                  outlier_pdb_file)

print('Finishing and cleaning up the jobs. ')
subprocess.Popen('bash prerun_clean.sh'.split(" "))
예제 #2
0
            if n_outlier <= 50: 
                outlier_list.append(outliers)
                break
    
    outlier_list_uni, outlier_count = np.unique(np.hstack(outlier_list), return_counts=True) 
    
    print('\nPreparing to write new pdb files') 
    # write the pdb according the outlier indices
    traj_info = open('./scheduler_logs/openmm_log.txt', 'r').read().split()
    traj_dict = dict(zip(traj_info[::2], np.array(traj_info[1::2]).astype(int)))
    
    # Write the new outliers 
    n_outlier_iter = 0
    new_outlier_list = []
    for outlier in outlier_list_uni: 
        traj_file, num_frame = find_frame(traj_dict, outlier) 
        if num_frame == 0: 
            print('Detected initial point as outlier, skipping...') 
            continue
        outlier_pdb_file = os.path.join(outliers_pdb_path, '{}_{:06d}.pdb'.format(traj_file[:18], num_frame))
        new_outlier_list.append(outlier_pdb_file) 
        if outlier_pdb_file not in outlier_pdb_files: 
            print('Found a new outlier# {} at frame {} of {}'.format(outlier, num_frame, traj_file))
            outlier_pdb = write_pdb_frame(traj_file, pdb_file, num_frame, outlier_pdb_file) 
            print('     Written as {}'.format(outlier_pdb_file))
            outlier_pdb_files.append(outlier_pdb_file) 
            n_outlier_iter += 1

    for outlier_pdb_file in outlier_pdb_files: 
        if outlier_pdb_file not in new_outlier_list: 
            print('Old outlier {} is now connected to a cluster and removing it from the outlier list '.format(outlier_pdb_file[-29:]))
예제 #3
0
    outlier_list_ranked, _ = outliers_from_latent_loc(cm_predict,
                                                      n_outliers=n_outliers,
                                                      n_jobs=12)
    print("Done outlier searching...")
    # print(outlier_list_ranked)

    # Write the outliers using MDAnalysis
    outliers_pdb_path = os.path.abspath('./outlier_pdbs')
    os.makedirs(outliers_pdb_path, exist_ok=True)
    print('Writing outliers in %s' % outliers_pdb_path)

    # identify new outliers
    new_outliers_list = []
    for outlier in outlier_list_ranked:
        # find the location of outlier
        traj_dir, num_frame = find_frame(traj_dict, outlier)
        traj_file = os.path.join(traj_dir, 'output.dcd')
        # get the outlier name - traj_label + frame number
        run_name = os.path.basename(traj_dir)
        pdb_name = f"{run_name}_{num_frame:06}.pdb"
        outlier_pdb_file = os.path.join(outliers_pdb_path, pdb_name)

        new_outliers_list.append(outlier_pdb_file)
        # Only write new pdbs to reduce I/O redundancy.
        if not os.path.exists(outlier_pdb_file):
            print(f'New outlier at frame {num_frame} of {run_name}')
            outlier_pdb = write_pdb_frame(traj_file, pdb_file, num_frame,
                                          outlier_pdb_file)

    # Clean up outdated outliers (just for bookkeeping)
    outliers_list = glob(os.path.join(outliers_pdb_path, 'omm_runs*.pdb'))