Пример #1
0
        print(
            "ERROR: {} is an unrecognized file type. \nThe mean structure must either be provided as an oxDNA configuration file with the extension .dat, .conf or .oxdna or as the .json file produced by compute_mean.py.",
            file=stderr)
        exit(1)

    cms = np.mean(align_conf,
                  axis=0)  #all structures must have the same center of mass
    align_conf -= cms

    #Compute the deviations
    if not parallel:
        r = ErikReader(traj_file)
        covariation_matrix = get_cov(r, align_conf, num_confs)

    if parallel:
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, get_cov, num_confs, n_cpus, align_conf)
        covariation_matrix = np.sum([i for i in out], axis=0)

    covariation_matrix /= (num_confs - 1)

    #now that we have the covatiation matrix we're going to use eigendecomposition to get the principal components.
    #make_heatmap(covariance)
    print("INFO: calculating eigenvectors", file=stderr)
    evalues, evectors = np.linalg.eig(
        covariation_matrix)  #these eigenvalues are already sorted
    evectors = evectors.T  #vectors come out as the columns of the array
    print("INFO: eigenvectors calculated", file=stderr)

    import matplotlib.pyplot as plt
    print("INFO: Saving scree plot to scree.png", file=stderr)
    plt.scatter(range(0, len(evalues)), evalues, s=25)
Пример #2
0
        centroid, centroid_a1s, centroid_a3s, centroid_rmsf, centroid_time = compute_centroid(
            r, mean_structure, num_confs)

    #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available.
    #Each of those chunks is then calculated seperatley and the results are compiled .
    if parallel:
        print(
            "INFO: Computing centroid from the mean of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        candidates = []
        rmsfs = []
        a1s = []
        a3s = []
        ts = []
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, compute_centroid, num_confs, n_cpus, mean_structure)
        [candidates.append(i[0]) for i in out]
        [rmsfs.append(i[3]) for i in out]
        [a1s.append(i[1]) for i in out]
        [a3s.append(i[2]) for i in out]
        [ts.append(i[4]) for i in out]
        min_id = rmsfs.index(min(rmsfs))
        centroid = candidates[min_id]
        centroid_a1s = a1s[min_id]
        centroid_a3s = a3s[min_id]
        centroid_time = ts[min_id]
        centroid_rmsf = rmsfs[min_id]

    print(
        "INFO: Centroid configuration found at configuration t = {}, RMSF = {}"
        .format(centroid_time, centroid_rmsf),
        align_conf_id, align_poses = pick_starting_configuration(traj_file, num_confs)
        n_nuc = len(align_poses.positions)
        # we are just interested in the nucleotide positions
        align_conf = align_poses.positions[indexes]

    #Actually compute mean structure
    if not parallel:
        print("INFO: Computing mean of {} configurations with an alignment of {} particles using 1 core.".format(num_confs, len(align_conf)), file=stderr)
        r = ErikReader(traj_file)
        mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, processed_frames = compute_mean(r, align_conf, num_confs)

    #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available.
    #Each of those chunks is then calculated seperatley and the result is summed.
    if parallel:
        print("INFO: Computing mean of {} configurations with an alignment of {} particles using {} cores.".format(num_confs, len(align_conf), n_cpus), file=stderr)
        out = parallelize_erik_onefile.fire_multiprocess(traj_file, compute_mean, num_confs, n_cpus, align_conf)
        mean_pos_storage = np.sum(np.array([i[0] for i in out]), axis=0)
        mean_a1_storage = np.sum(np.array([i[1] for i in out]), axis=0)
        mean_a3_storage = np.sum(np.array([i[2] for i in out]), axis=0)
        intermediate_mean_structures = []
        [intermediate_mean_structures.extend(i[3]) for i in out]
        processed_frames = sum((i[4] for i in out))
    # finished task entry
    print("INFO: processed frames total: {}".format(processed_frames), file=stderr)

    #Convert mean structure to a json file
    mean_file = dumps({
                "i_means" : intermediate_mean_structures,
                "g_mean" : prep_pos_for_json(
                    mean_pos_storage / processed_frames
                ),
        r = ErikReader(traj_file)
        deviations, RMSDs = compute_deviations(r, mean_structure,
                                               indexed_mean_structure,
                                               num_confs)

    #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available.
    #Each of those chunks is then calculated seperatley and the results are compiled .
    if parallel:
        print(
            "INFO: Computing deviations from the mean of {} configurations with an alignment of {} particles using {} cores."
            .format(num_confs, len(indexed_mean_structure), n_cpus),
            file=stderr)
        deviations = []
        RMSDs = []
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, compute_deviations, num_confs, n_cpus, mean_structure,
            indexed_mean_structure)
        [deviations.extend(i[0]) for i in out]
        [RMSDs.extend(i[1]) for i in out]

    #compute_deviations() returns the deviation of every particle in every configuration
    #take the mean of the per-configuration deviations to get the RMSF
    rmsfs = np.sqrt(np.mean(np.square(np.array(deviations)), axis=0)) * 0.8518

    #write the deviations to a json file
    print("INFO: writing deviations to {}".format(outfile), file=stderr)
    with open(outfile, "w") as file:
        file.write(dumps({"RMSF (nm)": rmsfs.tolist()}))

    #plot RMSDs
    print("INFO: writing RMSD plot to {}".format(plot_name), file=stderr)