def writeInitialStructures(field1,
                           field2,
                           crit1,
                           crit2,
                           centers_info,
                           filename_template,
                           traj,
                           topology=None,
                           use_pdb=False):
    for cluster_num, field1, field2 in zip(centers_info, field1, field2):
        epoch_num, traj_num, snap_num = map(
            int, centers_info[cluster_num]['structure'])
        trajectory = "{}/{}{}.xtc".format(
            epoch_num, traj, traj_num) if topology else "{}/{}{}.pdb".format(
                epoch_num, traj, traj_num)
        snapshots = utilities.getSnapshots(trajectory,
                                           topology=topology,
                                           use_pdb=use_pdb)
        filename = filename_template.format(cluster_num, crit1, field1, crit2,
                                            field2)
        if not topology:
            with open(filename, "w") as fw:
                fw.write(snapshots[snap_num])
        else:
            splitTrajectory.main("", [
                trajectory,
            ],
                                 topology, [snap_num + 1],
                                 template=filename,
                                 use_pdb=use_pdb)
def extract_snapshot_from_xtc(path, f_id, output, topology, step, out_freq, f_out):
    f_in = glob.glob(os.path.join(os.path.dirname(path), "*trajectory*_{}.xtc".format(f_id)))
    if not f_in:
        f_in = glob.glob(os.path.join(os.path.dirname(path), "*trajectory*_{}.*".format(f_id)))
    if len(f_in) == 0: 
        sys.exit("Trajectory {} not found. Be aware that PELE trajectories must contain the label \'trajectory\' in their file name to be detected".format("*trajectory*_{}".format(f_id)))
    splitTrajectory.main(output, [f_in[0], ], topology, [(step)/out_freq+1, ], template= f_out)
    print("Model {} selected".format(f_out))
示例#3
0
def split_trajectory(paths):
    files = glob.glob(os.path.join(paths.adap_ex_output, "*/traj*.*"))
    epoch_files = [
        report for report in files
        if (os.path.basename(os.path.dirname(report)).isdigit())
    ]
    for file in epoch_files:
        output_dir = os.path.join(paths.pele_dir, "ini_str",
                                  os.path.splitext(os.path.basename(file))[0])
        st.main(output_dir, [
            file,
        ], paths.topology, None, template=None)
        yield output_dir
示例#4
0
 def extract_snapshots_from_xtc(self, min_values, steps):
     paths = min_values[DIR].tolist()
     epochs = [
         os.path.basename(os.path.normpath(os.path.dirname(Path)))
         for Path in paths
     ]
     values1 = min_values[self.crit1].tolist()
     values2 = min_values[self.crit2].tolist()
     file_ids = min_values.report.tolist()
     step_indexes = min_values[steps].tolist()
     files_out = ["epoch{}_trajectory_{}.{}_{}{:.2f}_{}{:.3f}.pdb".format(epoch, report, int(step), self.crit1.replace(" ",""),
         value1, self.crit2.replace(" ",""), value2) \
         for epoch, step, report, value1, value2 in zip(epochs, step_indexes, file_ids, values1, values2)]
     for f_id, f_out, step, path in zip(file_ids, files_out, step_indexes,
                                        paths):
         f_in = glob.glob(
             os.path.join(os.path.dirname(path),
                          "*trajectory*_{}.xtc".format(f_id)))
         found = st.main(output,
                         f_in,
                         topology, [step % self.ad_steps / out_freq + 1],
                         template=f_out)
         if found:
             print("MODEL {} has been selected".format(f_out))
         else:
             print("MODEL {} not found. Check -f option".format(f_out))
def write_snapshot(snap_num,
                   trajectory,
                   filename,
                   topology=None,
                   use_pdb=False):
    if not topology:
        snapshots = utilities.getSnapshots(trajectory,
                                           topology=topology,
                                           use_pdb=use_pdb)
        with open(filename, "w") as fw:
            fw.write(snapshots[snap_num])
    else:
        splitTrajectory.main("", [
            trajectory,
        ],
                             topology, [snap_num + 1],
                             template=filename,
                             use_pdb=use_pdb)
示例#6
0
def cluster_with_dbscan(paths,
                        snapshots,
                        all_coordinates,
                        out_freq=1,
                        topology=None):
    """
    Use high performance computing hdbscan
    to do an all-atom cluster of the chosen
    plot structures
    """

    n_samples = len(snapshots)

    # Clusterize
    labels = []
    results = []
    t0 = time.time()
    try:
        db = hdbscan.HDBSCAN(min_samples=int(n_samples * 0.10) +
                             1).fit(all_coordinates)
    except ValueError:
        raise ValueError(
            "Ligand not found check the option --resname. i.e python interactive.py 5 6 7 --resname LIG"
        )
    result = db.labels_
    labels.append(len(set(result)))
    results.append(result)
    t1 = time.time()
    print("time clustering")
    print(t1 - t0)

    # Get Best Result
    t0 = time.time()
    mx_idx = np.argmax(np.array(labels))
    final_result = results[mx_idx]
    try:
        silhouette_samples = mt.silhouette_samples(all_coordinates,
                                                   final_result)
    except ValueError:
        raise ValueError(
            "Clustering failed. Structures do not follow any pattern or they are not enough"
        )
    max_clust = {
        label: [path, snap, sil]
        for (path, snap, label,
             sil) in zip(paths, snapshots, final_result, silhouette_samples)
    }

    # Get representative
    for path, snapshot, label, sil in zip(paths, snapshots, final_result,
                                          silhouette_samples):
        if sil > max_clust[label][2]:
            max_clust[label] = [path, snapshot, sil]

    # Get Structures
    for i, (label, info) in enumerate(max_clust.items()):
        # if label == -1: continue
        output = "Clusters"
        if not os.path.exists(output):
            os.mkdir(output)
        f_out = "cluster_{}.pdb".format(label + 1)
        f_in, snapshot, _ = info

        #XTC
        if topology:
            found = st.main(output, [
                f_in,
            ],
                            topology, [
                                snapshot,
                            ],
                            template=f_out)
            if found:
                print("MODEL {} has been selected as {}".format(f_in, f_out))
            else:
                print("MODEL {} not found. Check -f option".format(f_in))
        #PDB
        else:
            traj = []
            model = (snapshot) / out_freq + 1

            with open(f_in, 'r') as input_file:
                file_content = input_file.read()
                trajectory_selected = re.search(
                    'MODEL\s+%d(.*?)ENDMDL' % int(model), file_content,
                    re.DOTALL)
            with open(os.path.join(output, f_out), 'w') as f:
                traj.append("MODEL     %d" % int(model))
                try:
                    traj.append(trajectory_selected.group(1))
                except AttributeError:
                    raise AttributeError(
                        "Model {} not found. Check the -f option.".format(
                            f_in))
                traj.append("ENDMDL\n")
                f.write("\n".join(traj))
            print("MODEL {} has been selected as {}".format(f_in, f_out))
    t1 = time.time()
    print("Time post processing")
    print(t1 - t0)