Пример #1
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description="Compress given configuration.")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('outfile', type=str, nargs=1, help='minified file')

    parser.add_argument('-a', action='store_true', help='Discard a vectors.')
    parser.add_argument(
        '-p',
        type=int,
        nargs=1,
        help=
        'Round positions and orientations to the specified number of digits.')

    args = parser.parse_args()

    traj_file = args.trajectory[0]
    out = args.outfile[0]
    # get the number of configurations
    n_confs = cal_confs(traj_file)

    try:  # make sure there is no out file
        remove(out)
    except:
        pass

    with ErikReader(traj_file) as reader:
        for i in range(n_confs):
            print(i + 1, ":", n_confs)
            # Erik reader ignores velocities
            system = reader.read()
            if args.p:  # round positions
                system.positions = round(system.positions, args.p[0])
                system.a1s = round(system.a1s, args.p[0])
                system.a3s = round(system.a3s, args.p[0])
            if args.a:  # discard a vectors
                system.a1s -= system.a1s
                system.a3s -= system.a3s
            # output conf
            system.write_append(out)
Пример #2
0
def main():
    #read data from files
    parser = argparse.ArgumentParser(prog = path.basename(__file__), description="Compare the bonds found at each trajectory with the intended design")
    parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation")
    parser.add_argument('trajectory', type=str, nargs=1, help="The trajecotry file to compare against the designed pairs")
    parser.add_argument('designed_pairs', type=str, nargs=1, help="The file containing the desired nucleotides pairings in the format \n a b\nc d")
    parser.add_argument('output_file', type=str, nargs=1, help="name of the file to save the output json overlay to")
    parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use")
    
    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    args = parser.parse_args()
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]
    designfile = args.designed_pairs[0]
    outfile = args.output_file[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)

    with open(designfile, 'r') as file:
        pairs = file.readlines()

    if not parallel:
        print("INFO: Computing base pairs in {} configurations using 1 core.".format(num_confs), file=stderr)
        r = LorenzoReader2(traj_file,top_file)
        tot_bonds, tot_missbonds, out_array, confid = bond_analysis(r, pairs, inputfile, num_confs)
        try:
            _ = tot_bonds #this will fail if DNAnalysis failed.
        except:
            print("ERROR: DNAnalysis encountered an error and could not analyze the trajectory")
            exit(1)

    if parallel:
        print("INFO: Computing base pairs in {} configurations using {} cores.".format(num_confs, n_cpus), file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, bond_analysis, num_confs, n_cpus, pairs, inputfile)

        tot_bonds = 0
        tot_missbonds = 0
        out_array = np.zeros(len(open(top_file, 'r').readlines())-1)
        confid = 0
        for i in out:
            if i[0] is not None:
                tot_bonds += i[0]
                tot_missbonds += i[1]
                #out_array += i[2]
                confid += i[3]
            else:
                print("WARNING: Some configurations were invalid and not included in the analysis.  Please check the logs", file=stderr)

            #tot_bonds = sum((i[0] for i in out if i[0] != None))
            #tot_missbonds = sum((i[1] for i in out if i[1] != None))
        out_array = sum((i[2] for i in out if len(i[2]) > 0))
            #confid = sum((i[3] for i in out if i[3] != None))

    print("\nSummary:\navg bonds: {}\navg_missbonds: {}".format(tot_bonds/(int(confid)),tot_missbonds/int(confid)))

    print("INFO: Writing bond occupancy data to {}".format(outfile))
    with open(outfile, "w+") as file:
        file.write("{\n\"occupancy\" : [")
        file.write(str(out_array[0]/int(confid)))
        for n in out_array[1:]:
            file.write(", {}".format(n/int(confid)))
        file.write("] \n}") 
Пример #3
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculate differences between structures and automatically apply DBSCAN to retrieve clusters"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "matplotlib"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)
    import UTILS.base  #this needs to be imported after the model type is set

    r2 = LorenzoReader2(traj_file, top_file)

    #how do you want to get your eRMSDs?  Do you need to do the time-consuming calculation or is it done and you have a pickle?
    if not parallel:
        r1 = LorenzoReader2(traj_file, top_file)

        eRMSDs = get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs)
    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file,
                                                            top_file,
                                                            get_eRMSDs,
                                                            num_confs,
                                                            n_cpus,
                                                            r2,
                                                            inputfile,
                                                            traj_file,
                                                            top_file,
                                                            matrix=True)
        eRMSDs = np.sum((i for i in out), axis=0)
    #eRMSDs = pickle.load(open('tmp_eRMSDs', 'rb'))

    #the eRMSD matrix is actually only half a matrix
    for ni, i in enumerate(eRMSDs):
        for nj, j in enumerate(i):
            eRMSDs[nj][ni] = j
            if ni == nj:
                eRMSDs[ni][nj] = 0

    #since calculating the eRMSDs are so time-consuming to calculate we're gonna pickle it to iterate the DBSCAN later.
    with open("tmp_eRMSDs", "wb") as file:
        pickle.dump(eRMSDs, file)

    ###############################################################################################################
    #Next, we're going to perform a DBSCAN on that matrix of eRMSDs to find clusters of similar structures
    perform_DBSCAN(eRMSDs, num_confs, traj_file, inputfile, "precomputed", 12,
                   8)
Пример #4
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculates a principal component analysis of nucleotide deviations over a trajectory"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='The mean structure .json file from compute_mean.py')
    parser.add_argument(
        'outfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the PCA will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument(
        '-c',
        metavar='cluster',
        dest='cluster',
        action='store_const',
        const=True,
        default=False,
        help="Run the clusterer on each configuration's position in PCA space?"
    )
    args = parser.parse_args()

    check_dependencies(["python", "numpy", "Bio"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    mean_file = args.meanfile[0]
    outfile = args.outfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    #-c makes it run the clusterer on the output
    cluster = args.cluster
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    import UTILS.base  #this needs to be imported after the model type is set

    num_confs = cal_confs(traj_file)

    if mean_file.split(".")[-1] == "json":
        with open(mean_file) as file:
            align_conf = load(file)['g_mean']

    elif mean_file.split(".")[-1] == "dat":
        fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides])
        with LorenzoReader2(mean_file, top_file) as reader:
            s = reader._get_system()
            align_conf = fetch_np(s)

    cms = np.mean(align_conf,
                  axis=0)  #all structures must have the same center of mass
    align_conf -= cms

    #Compute the deviations
    if not parallel:
        r = LorenzoReader2(traj_file, top_file)
        deviations_matrix = get_pca(r, align_conf, num_confs)

    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_pca, num_confs, n_cpus, align_conf)
        deviations_matrix = np.concatenate([i for i in out])

    #now that we have the deviations matrix we're gonna get the covariance and PCA it
    #note that in the future we might want a switch for covariance vs correlation matrix because correlation (cov/stdev so all diagonals are 1) is better for really floppy structures
    pca = PCA(n_components=3)
    pca.fit(deviations_matrix)
    transformed = pca.transform(deviations_matrix)

    #THIS IS AS FAR AS I GOT

    import matplotlib.pyplot as plt
    print("INFO: Saving scree plot to scree.png", file=stderr)
    plt.scatter(range(0, len(evalues)), evalues, s=25)
    plt.xlabel("component")
    plt.ylabel("eigenvalue")
    plt.savefig("scree.png")

    print(
        "INFO: Creating coordinate plot from first three eigenvectors.  Saving to coordinates.png",
        file=stderr)
    #if you want to weight the components by their eigenvectors
    #mul = np.einsum('ij,i->ij',evectors[0:3], evalues[0:3])
    mul = evectors

    #reconstruct configurations in component space
    out = np.dot(deviations_matrix, mul).astype(float)

    #make a quick plot from the first three components
    from mpl_toolkits.mplot3d import Axes3D
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.scatter(out[:, 0], out[:, 1], out[:, 2], c='g', s=25)
    plt.savefig("coordinates.png")

    #Create an oxView overlay showing the first SUM components
    SUM = 1
    print(
        "INFO: Change the number of eigenvalues to sum and display by modifying the SUM variable in the script.  Current value: {}"
        .format(SUM),
        file=stderr)
    weighted_sum = np.zeros_like(evectors[0])
    for i in range(0, SUM):  #how many eigenvalues do you want?
        weighted_sum += evalues[i] * evectors[i]

    prep_pos_for_json = lambda conf: list(list(p) for p in conf)
    with catch_warnings(
    ):  #this produces an annoying warning about casting complex values to real values that is not relevant
        simplefilter("ignore")
        output_vectors = weighted_sum.reshape(int(weighted_sum.shape[0] / 3),
                                              3).astype(float)
    with open(outfile, "w+") as file:
        file.write(dumps({"pca": prep_pos_for_json(output_vectors)}))

    #If we're running clustering, feed the linear terms into the clusterer
    if cluster:
        print("INFO: Mapping configurations to component space...",
              file=stderr)

        #If you want to cluster on only some of the components, uncomment this
        #out = out[:,0:3]

        from clustering import perform_DBSCAN
        labs = perform_DBSCAN(out, num_confs, traj_file, inputfile,
                              "euclidean", 12, 8)
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description="Fit vectors to every duplex in the structure")
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help="The trajectory file from the simulation")
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        type=str,
                        nargs=1,
                        help='name of the file to write the angle list to')
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    #Process command line arguments:
    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        outfile = "angles.txt"
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #Get relevant parameters from the input file
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    #Calculate the number of configurations.
    num_confs = cal_confs(traj_file)

    r0 = LorenzoReader2(traj_file, top_file)
    r0._get_system()

    #launch find_angle using the appropriate number of threads to find all duplexes.
    if not parallel:
        print(
            "INFO: Fitting duplexes to {} configurations using 1 core.".format(
                num_confs),
            file=stderr)
        r = LorenzoReader2(traj_file, top_file)
        duplexes_at_step = find_angles(r, inputfile, num_confs)

    if parallel:
        print("INFO: Fitting duplexes to {} configurations using {} cores.".
              format(num_confs, n_cpus),
              file=stderr)
        duplexes_at_step = []
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, find_angles, num_confs, n_cpus, inputfile)
        [duplexes_at_step.extend(i) for i in out]

    if [] in duplexes_at_step:
        print(
            "WARNING: Some configurations were invalid and not included in the analysis.  Please check the log to view the error",
            file=stderr)

    #print duplexes to a file
    print(
        "INFO: Writing duplex data to {}.  Use duplex_angle_plotter to graph data"
        .format(outfile),
        file=stderr)
    output = open(outfile, 'w')
    output.write(
        "time\tduplex\tstart1\tend1\tstart2\tend2\taxisX\taxisY\taxisZ\thel_pos\n"
    )
    for i in range(0, len(duplexes_at_step)):
        for j in range(0, len(duplexes_at_step[i])):
            line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t[{},{},{}]\n'.format(
                duplexes_at_step[i][j].time, duplexes_at_step[i][j].index,
                duplexes_at_step[i][j].start1, duplexes_at_step[i][j].end1,
                duplexes_at_step[i][j].start2, duplexes_at_step[i][j].end2,
                duplexes_at_step[i][j].axis[0], duplexes_at_step[i][j].axis[1],
                duplexes_at_step[i][j].axis[2],
                duplexes_at_step[i][j].final_hel_pos[0],
                duplexes_at_step[i][j].final_hel_pos[1],
                duplexes_at_step[i][j].final_hel_pos[2])
            output.write(line)
    output.close()
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="Computes the mean structure of a trajectory file")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        nargs=1,
                        help='The filename to save the mean structure to')
    parser.add_argument(
        '-f',
        '--format',
        metavar='<json/oxDNA/both>',
        nargs=1,
        help=
        'Output format for the mean file.  Defaults to json.  Options are \"json\", \"oxdna/oxDNA\", and \"both\"'
    )
    parser.add_argument(
        '-d',
        '--deviations',
        metavar='deviation_file',
        nargs=1,
        help='Immediatley run compute_deviations.py from the output')
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Compute mean structure of a subset of particles from a space-separated list in the provided file'
    )
    parser.add_argument(
        '-a',
        '--align',
        metavar='alignment_configuration',
        nargs=1,
        help='The id of the configuration to align to, otherwise random')
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "Bio", "numpy"])

    #get file names
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        from oxDNA_analysis_tools.UTILS import parallelize_erik_onefile
        n_cpus = args.parallel[0]

    #-f defines the format of the output file
    outjson = False
    outoxdna = False
    if args.format:
        if "json" in args.format:
            outjson = True
        if "oxDNA" in args.format or "oxdna" in args.format:
            outoxdna = True
        if "both" in args.format:
            outjson = True
            outoxdna = True
        if outjson == outoxdna == False:
            print(
                "ERROR: unrecognized output format\nAccepted formats are \"json\", \"oxDNA/oxdna\", and \"both\"",
                file=stderr)
            exit(1)
    else:
        print("INFO: No output format specified, defaulting to oxDNA",
              file=stderr)
        outoxdna = True

    #-o names the output file
    if args.output:
        outfile = args.output[0]
    else:
        if outjson and not outoxdna:
            ext = ".json"
        elif outjson and outoxdna:
            ext = ".json/.dat"
        elif outoxdna and not outjson:
            ext = ".dat"
        outfile = "mean{}".format(ext)
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #-d will run compute_deviations.py when this script is completed.
    dev_file = None
    if args.deviations:
        dev_file = args.deviations[0]

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(traj_file) as r:
            indexes = list(range(len(r.read().positions)))

    # The reference configuration which is used to define alignment
    align_conf = []

    #calculate the number of configurations in the trajectory
    num_confs = cal_confs(traj_file)

    # if we have no align_conf we need to chose one
    # and realign its cms to be @ 0,0,0
    if align_conf == []:
        align = None
        if args.align:
            align = args.align[0]
        align_conf_id, align_poses = pick_starting_configuration(
            traj_file, num_confs, align)
        # we are just interested in the nucleotide positions
        align_conf = align_poses.positions[indexes]

    #Actually compute mean structure
    if not parallel:
        print(
            "INFO: Computing mean of {} configurations with an alignment of {} particles using 1 core."
            .format(num_confs, len(align_conf)),
            file=stderr)
        r = ErikReader(traj_file)
        mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, processed_frames = compute_mean(
            r, align_conf, indexes, num_confs)

    #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available.
    #Each of those chunks is then calculated seperatley and the result is summed.
    if parallel:
        print(
            "INFO: Computing mean of {} configurations with an alignment of {} particles using {} cores."
            .format(num_confs, len(align_conf), n_cpus),
            file=stderr)
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, compute_mean, num_confs, n_cpus, align_conf, indexes)
        mean_pos_storage = np.sum(np.array([i[0] for i in out]), axis=0)
        mean_a1_storage = np.sum(np.array([i[1] for i in out]), axis=0)
        mean_a3_storage = np.sum(np.array([i[2] for i in out]), axis=0)
        intermediate_mean_structures = []
        [intermediate_mean_structures.extend(i[3]) for i in out]
        processed_frames = sum((i[4] for i in out))
    # finished task entry
    print("INFO: processed frames total: {}".format(processed_frames),
          file=stderr)

    #Convert mean structure to a json file
    mean_file = dumps({
        "i_means":
        intermediate_mean_structures,
        "g_mean":
        prep_pos_for_json(mean_pos_storage / processed_frames),
        "a1_mean":
        prep_pos_for_json(
            [normalize(v) for v in (mean_a1_storage / processed_frames)]),
        "a3_mean":
        prep_pos_for_json(
            [normalize(v) for v in (mean_a3_storage / processed_frames)]),
        "p_frames":
        processed_frames,
        "ini_conf": {
            "conf": prep_pos_for_json(align_conf),
            "id": align_conf_id
        }
    })

    #Save the mean structure to the specified output file.
    if outjson or dev_file:
        #save output as json format
        if outoxdna == True:
            #if making both outputs, automatically set file extensions.
            jsonfile = outfile.split(".")[0] + ".json"
        else:
            jsonfile = outfile
        print("INFO: Writing mean configuration to", jsonfile, file=stderr)
        with open(jsonfile, "w") as file:
            file.write(mean_file)

    if outoxdna:
        #save output as oxDNA .dat format
        if outjson == True:
            #if making both outputs, automatically set file extensions.
            outname = outfile.split(".")[0] + ".dat"
        else:
            outname = outfile
        from oxDNA_analysis_tools.mean2dat import make_dat
        make_dat(loads(mean_file), outname)

    #If requested, run compute_deviations.py using the output from this script.
    if dev_file:
        print("INFO: launching compute_deviations.py", file=stderr)

        #this is probably horrible practice, but to maintain the ability to call things from the command line, I cannot pass arguments between main() calls.
        #so instead we're gonna spoof a global variable to make it look like compute_deviations was called explicitally
        argv.clear()
        argv.extend([
            'compute_deviations.py', '-o', dev_file, "-r",
            dev_file.split('.')[0] + "_rmsd.png", "-d",
            dev_file.split('.')[0] + "_rmsd_data.json"
        ])
        if args.index_file:
            argv.append("-i")
            argv.append(index_file)
        if parallel:
            argv.append("-p")
            argv.append(str(n_cpus))
        argv.append(jsonfile)
        argv.append(traj_file)

        from oxDNA_analysis_tools import compute_deviations
        from sys import executable
        print(executable)
        print(argv)

        compute_deviations.main()

        #compute_deviations needs the json meanfile, but its not useful for visualization
        #so we dump it
        if not outjson:
            print("INFO: deleting {}".format(jsonfile), file=stderr)
            from os import remove
            remove(jsonfile)

    print(time.time() - start_t)
Пример #7
0
def main():
    #handle commandline arguments
    #the positional arguments for this are:
    # 1. the mean structure from compute_mean.py in json format
    # 2. the trajectory from which to compute the centroid
    # 3. the name of the file to write out the centroid to.  Should be a .dat because oxView uses file extensions
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "Compute the RMSD of each nucleotide from the mean structure produced by compute_mean.py"
    )
    parser.add_argument(
        'mean_structure',
        type=str,
        nargs=1,
        help="The mean structure .json file from compute_mean.py")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument('-o',
                        '--output',
                        metavar='output_file',
                        nargs=1,
                        help='The filename to save the centroid to')
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Compute mean structure of a subset of particles from a space-separated list in the provided file'
    )
    args = parser.parse_args()

    #system check
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "Bio", "numpy"])

    #-o names the output file
    if args.output:
        outfile = args.output[0].strip()
    else:
        outfile = "centroid.dat"
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #prepare the data files and calculate how many configurations there are to run
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    num_confs = cal_confs(traj_file)

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(traj_file) as r:
            indexes = list(range(len(r.read().positions)))

    # load mean structure
    mean_file = args.mean_structure[0]
    if mean_file.split(".")[-1] == "json":
        with open(mean_file) as file:
            mean_structure = load(file)['g_mean'][indexes]

    elif mean_file.split(".")[-1] == "dat":
        with ErikReader(mean_file) as reader:
            s = reader.read()
            mean_structure = s.positions[indexes]
    print("INFO: mean structure loaded", file=stderr)

    #Calculate centroid, in parallel if available
    if not parallel:
        print(
            "INFO: Computing centroid from the mean of {} configurations using 1 core."
            .format(num_confs),
            file=stderr)
        r = ErikReader(traj_file)
        centroid, centroid_a1s, centroid_a3s, centroid_rmsf, centroid_time = compute_centroid(
            r, mean_structure, indexes, num_confs)

    #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available.
    #Each of those chunks is then calculated seperatley and the results are compiled .
    if parallel:
        print(
            "INFO: Computing centroid from the mean of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        candidates = []
        rmsfs = []
        a1s = []
        a3s = []
        ts = []
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, compute_centroid, num_confs, n_cpus, mean_structure,
            indexes)
        [candidates.append(i[0]) for i in out]
        [rmsfs.append(i[3]) for i in out]
        [a1s.append(i[1]) for i in out]
        [a3s.append(i[2]) for i in out]
        [ts.append(i[4]) for i in out]
        min_id = rmsfs.index(min(rmsfs))
        centroid = candidates[min_id]
        centroid_a1s = a1s[min_id]
        centroid_a3s = a3s[min_id]
        centroid_time = ts[min_id]
        centroid_rmsf = rmsfs[min_id]

    print(
        "INFO: Centroid configuration found at configuration t = {}, RMSF = {}"
        .format(centroid_time, centroid_rmsf),
        file=stderr)

    from oxDNA_analysis_tools.mean2dat import make_dat

    make_dat(
        {
            'g_mean': centroid,
            'a1_mean': centroid_a1s,
            'a3_mean': centroid_a3s
        }, outfile)
def main():
    #handle commandline arguments
    #the positional arguments for this are:
    # 1. the mean structure from compute_mean.py in json format
    # 2. the trajectory from which to compute the deviations
    from sys import argv
    print(argv)
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "Compute the RMSD of each nucleotide from the mean structure produced by compute_mean.py"
    )
    parser.add_argument(
        'mean_structure',
        type=str,
        nargs=1,
        help="The mean structure .json file from compute_mean.py")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument(
        '-o',
        '--output',
        metavar='output_file',
        nargs=1,
        help='The filename to save the deviations json file to')
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Compute mean structure of a subset of particles from a space-separated list in the provided file'
    )
    parser.add_argument('-r',
                        metavar='rmsd_plot',
                        dest='rmsd_plot',
                        nargs=1,
                        help='The name of the file to save the RMSD plot to.')
    parser.add_argument(
        '-d',
        metavar='rmsd_data',
        dest='rmsd_data',
        nargs=1,
        help='The name of the file to save the RNSD data in json format.')
    args = parser.parse_args()

    #system check
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "Bio", "numpy", "matplotlib"])

    #-o names the output file
    if args.output:
        outfile = args.output[0].strip()
        if not outfile.split(".")[-1] == 'json':
            outfile += ".json"
    else:
        outfile = "devs.json"
        print("INFO: No outfile name provided, defaulting to \"{}\"".format(
            outfile),
              file=stderr)

    #prepare the data files and calculate how many configurations there are to run
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        from oxDNA_analysis_tools.UTILS import parallelize_erik_onefile
        n_cpus = args.parallel[0]
    num_confs = cal_confs(traj_file)

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(traj_file) as r:
            indexes = list(range(len(r.read().positions)))

    #-r names the file to print the RMSD plot to
    if args.rmsd_plot:
        plot_name = args.rmsd_plot[0]
    else:
        plot_name = 'rmsd.png'

    # -d names the file to print the RMSD data to
    if args.rmsd_data:
        data_file = args.rmsd_data[0]

    # load mean structure
    mean_structure_file = args.mean_structure[0]
    with open(mean_structure_file) as file:
        mean_data = loads(file.read())
    mean_structure = np.array(mean_data["g_mean"])
    indexed_mean_structure = mean_structure[indexes]
    print("INFO: mean structure loaded", file=stderr)

    #Calculate deviations, in parallel if available
    if not parallel:
        print(
            "INFO: Computing deviations from the mean of {} configurations with an alignment of {} particles using 1 core."
            .format(num_confs, len(indexed_mean_structure)),
            file=stderr)
        r = ErikReader(traj_file)
        deviations, RMSDs = compute_deviations(r, mean_structure,
                                               indexed_mean_structure, indexes,
                                               num_confs)

    #If parallel, the trajectory is split into a number of chunks equal to the number of CPUs available.
    #Each of those chunks is then calculated seperatley and the results are compiled .
    if parallel:
        print(
            "INFO: Computing deviations from the mean of {} configurations with an alignment of {} particles using {} cores."
            .format(num_confs, len(indexed_mean_structure), n_cpus),
            file=stderr)
        deviations = []
        RMSDs = []
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, compute_deviations, num_confs, n_cpus, mean_structure,
            indexed_mean_structure, indexes)
        [deviations.extend(i[0]) for i in out]
        [RMSDs.extend(i[1]) for i in out]

    #compute_deviations() returns the deviation of every particle in every configuration
    #take the mean of the per-configuration deviations to get the RMSF
    rmsfs = np.sqrt(np.mean(np.square(np.array(deviations)), axis=0)) * 0.8518

    #write the deviations to a json file
    print("INFO: writing deviations to {}".format(outfile), file=stderr)
    with open(outfile, "w") as file:
        file.write(dumps({"RMSF (nm)": rmsfs.tolist()}))

    #plot RMSDs
    print("INFO: writing RMSD plot to {}".format(plot_name), file=stderr)
    plt.plot(RMSDs)
    plt.axhline(np.mean(RMSDs), color='red')
    plt.xlabel('Configuration')
    plt.ylabel('RMSD (nm)')
    plt.savefig(plot_name)

    #print RMSDs
    print("INFO: writing RMSD data to {}".format(data_file), file=stderr)
    if args.rmsd_data:
        with open(data_file, 'w') as f:
            f.write(dumps({"RMSD (nm)": RMSDs}))
Пример #9
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="Computes the deviations in the backbone torsion angles")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'topology',
        type=str,
        nargs=1,
        help="The topology file associated with the trajectory file")
    parser.add_argument('outfile',
                        type=str,
                        nargs=1,
                        help='The file name for the output .json file.')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    top_file = args.topology[0]
    traj_file = args.trajectory[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    num_confs = cal_confs(traj_file)

    r = LorenzoReader2(traj_file, top_file)

    if not parallel:
        torsions, dihedrals = get_internal_coords(r, num_confs)

    if parallel:
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_internal_coords, num_confs, n_cpus)
        # Out Dims: 1 Processor, 2 Torsion or Dihedrals, 3 Specific list of torsions listed by conf
        torsions = np.concatenate([out[i][0] for i in range(n_cpus)], axis=1)
        dihedrals = np.concatenate([out[i][1] for i in range(n_cpus)], axis=1)

    torsion_mean = np.mean(torsions, axis=1).tolist()
    dihedral_mean = np.mean(dihedrals, axis=1).tolist()
    #make something akin to a ramachandran plot for DNA origami??
    import matplotlib.pyplot as plt
    plt.scatter(torsion_mean[1:], dihedral_mean)
    plt.xlabel("torsion_angle")
    plt.ylabel("dihedral_angle")
    plt.show()

    torsion_mean.insert(0, torsion_mean[0])
    torsion_mean.insert(0, torsion_mean[0])
    with open(args.outfile[0], "w") as file:
        file.write(dumps({"torsion": torsion_mean}))
def main():
    #at 2.5 you start to see the hard edges caused by end-loops and see some loop interactions
    cutoff_distance = 2.5

    #get commandline arguments
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculate molecular contacts, and assembles an average set of contacts based on MDS"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='the name of the .dat file where the mean will be written')
    parser.add_argument(
        'devfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the devs will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")

    #process commandline arguments
    args = parser.parse_args()
    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    meanfile = args.meanfile[0]
    devfile = args.devfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    #get the number of configurations in the trajectory
    num_confs = cal_confs(traj_file)

    #Get the mean distance to all other particles
    if not parallel:
        print(
            "INFO: Computing interparticle distances of {} configurations using 1 core."
            .format(num_confs),
            file=stderr)
        r = LorenzoReader2(traj_file, top_file)
        cartesian_distances = get_mean(r, inputfile, num_confs)
        mean_distance_map = cartesian_distances * (1 / (num_confs))

    if parallel:
        print(
            "INFO: Computing interparticle distances of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_mean, num_confs, n_cpus, inputfile)
        cartesian_distances = np.sum(np.array([i for i in out]), axis=0)

    mean_distance_map = cartesian_distances * (1 / (num_confs))

    #Making a new configuration file from scratch is hard, so we're just going to read in one and then overwrite the positional information
    r = LorenzoReader2(traj_file, top_file)
    output_system = r._get_system()

    #make heatmap of the summed distances
    #make_heatmap(mean_distance_map)

    masked_mean = np.ma.masked_array(mean_distance_map,
                                     ~(mean_distance_map < cutoff_distance))

    #I tried to use DGSOL to analytically solve this, but origamis were too big
    #f = open('test_dist.nmr', 'w+')
    #for i, line in enumerate(masked_mean):
    #    for j, dist in enumerate(line):
    #        if dist != "--" and dist != 0 and i < j:
    #            if j%2 == 0:
    #                f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(i+1, j+1, dist, dist))
    #            else:
    #                f.write("{}\t{}\t1\t1\t{}\t{}\tn\tn\tn\tn\n".format(j+1, i+1, dist, dist))

    #super_cutoff_ids = mean_distance_map > cutoff_distance
    #mean_distance_map[super_cutoff_ids] = 0
    #sparse_map = csr_matrix(mean_distance_map)

    print("INFO: fitting local distance data", file=stderr)

    #Many embedding algorithms were tried...

    #from sklearn.manifold import LocallyLinearEmbedding
    #from megaman.geometry import Geometry
    #from scipy.sparse import csr_matrix

    #geom = Geometry()
    #geom = Geometry(adjacency_kwds={'radius':cutoff_distance})#, laplacian_kwds={'scaling_epps':cutoff_distance})
    #geom.set_data_matrix(masked_mean)
    #geom.set_adjacency_matrix(masked_mean)
    #from megaman.embedding import LocallyLinearEmbedding
    #lle = LocallyLinearEmbedding(n_neighbors=5, n_components=3, eigen_solver='arpack', max_iter=3000)
    #lle = LocallyLinearEmbedding(n_components=3, eigen_solver='arpack', geom=geom)
    #out_coords = lle.fit_transform(masked_mean, input_type='adjacency')
    #out_coords = lle.fit_transform(masked_mean)
    #init = np.array([p.cm_pos for p in out_conf._nucleotides])

    #Run multidimensional scaling on the average distances to find average positions
    from sklearn.manifold import MDS
    mds = MDS(n_components=3,
              metric=True,
              max_iter=3000,
              eps=1e-12,
              dissimilarity="precomputed",
              n_jobs=1,
              n_init=1)
    out_coords = mds.fit_transform(
        masked_mean)  #, init=init) #this one worked best

    #Overwrite the system we made earlier with the coordinates calculated via MDS
    for i, n in enumerate(output_system._nucleotides):
        n.cm_pos = out_coords[i]
        n._a1 = np.array([0, 0, 0])
        n._a3 = np.array(
            [0, 0, 0]
        )  #since the orientation vectors are all 0, this cannot be used in a simulation, but the viewer will handle it

    #Write the mean structure out as a new .dat and .top pair
    output_system.print_lorenzo_output("{}.dat".format(meanfile),
                                       "{}.top".format(meanfile))
    print("INFO: wrote output files: {}.dat, {}.top".format(
        meanfile, meanfile),
          file=stderr)

    #Loop through the trajectory again and calculate deviations from the average distances
    print(
        "INFO: Computing distance deviations of {} configurations using 1 core."
        .format(num_confs),
        file=stderr)
    if not parallel:
        r = LorenzoReader2(traj_file, top_file)
        devs = get_devs(r, masked_mean, inputfile, cutoff_distance, num_confs)

    if parallel:
        print(
            "INFO: Computing distance deviations of {} configurations using {} cores."
            .format(num_confs, n_cpus),
            file=stderr)
        out = parallelize_lorenzo_onefile.fire_multiprocess(
            traj_file, top_file, get_devs, num_confs, n_cpus, masked_mean,
            inputfile, cutoff_distance)
        devs = np.sum(np.array([i for i in out]), axis=0)

    #Dump the deviations to an oxView overlay file
    devs = np.ma.masked_array(
        devs,
        ~(devs != 0.0))  #mask all the 0s so they don't contribute to the mean
    devs *= (1 / num_confs)
    devs = np.mean(devs, axis=0)
    devs = np.sqrt(devs)
    with open(devfile + ".json", "w") as file:
        file.write(dumps({"contact deviation": list(devs)}))
    print("INFO: wrote file {}.json".format(devfile), file=stderr)
Пример #11
0
def main():
    parser = argparse.ArgumentParser(
        prog=path.basename(__file__),
        description=
        "Calculates a principal component analysis of nucleotide deviations over a trajectory"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        'meanfile',
        type=str,
        nargs=1,
        help='The mean structure .json file from compute_mean.py')
    parser.add_argument(
        'outfile',
        type=str,
        nargs=1,
        help='the name of the .json file where the PCA will be written')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    parser.add_argument(
        '-c',
        metavar='cluster',
        dest='cluster',
        action='store_const',
        const=True,
        default=False,
        help="Run the clusterer on each configuration's position in PCA space?"
    )
    args = parser.parse_args()

    check_dependencies(["python", "numpy", "Bio"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    mean_file = args.meanfile[0]
    outfile = args.outfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]
    #-c makes it run the clusterer on the output
    cluster = args.cluster

    num_confs = cal_confs(traj_file)

    if mean_file.split(".")[-1] == "json":
        with open(mean_file) as file:
            align_conf = load(file)['g_mean']

    elif mean_file.split(".")[-1] == "dat" or mean_file.split(
            ".")[-1] == "conf" or mean_file.split(".")[-1] == "oxdna":
        with ErikReader(mean_file) as reader:
            align_conf = reader.read().positions
    else:
        print(
            "ERROR: {} is an unrecognized file type. \nThe mean structure must either be provided as an oxDNA configuration file with the extension .dat, .conf or .oxdna or as the .json file produced by compute_mean.py.",
            file=stderr)
        exit(1)

    cms = np.mean(align_conf,
                  axis=0)  #all structures must have the same center of mass
    align_conf -= cms

    #Compute the deviations
    if not parallel:
        r = ErikReader(traj_file)
        covariation_matrix = get_cov(r, align_conf, num_confs)

    if parallel:
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, get_cov, num_confs, n_cpus, align_conf)
        covariation_matrix = np.sum([i for i in out], axis=0)

    covariation_matrix /= (num_confs - 1)

    #now that we have the covatiation matrix we're going to use eigendecomposition to get the principal components.
    #make_heatmap(covariance)
    print("INFO: calculating eigenvectors", file=stderr)
    evalues, evectors = np.linalg.eig(
        covariation_matrix)  #these eigenvalues are already sorted
    evectors = evectors.T  #vectors come out as the columns of the array
    print("INFO: eigenvectors calculated", file=stderr)

    import matplotlib.pyplot as plt
    print("INFO: Saving scree plot to scree.png", file=stderr)
    plt.scatter(range(0, len(evalues)), evalues, s=25)
    plt.xlabel("component")
    plt.ylabel("eigenvalue")
    plt.savefig("scree.png")

    total = sum(evalues)
    running = 0
    i = 0
    while running < 0.9:
        running += (evalues[i] / total)
        i += 1

    print("90% of the variance is found in the first {} components".format(i))

    #if you want to weight the components by their eigenvectors
    #mul = np.einsum('ij,i->ij',evectors, evalues)
    mul = evectors

    #reconstruct configurations in component space
    #because we donlist't save the difference matrix, this involves running through the whole trajectory again
    if not parallel:
        r = ErikReader(traj_file)
        coordinates = change_basis(r, align_conf, mul, num_confs)
    if parallel:
        out = parallelize_erik_onefile.fire_multiprocess(
            traj_file, change_basis, num_confs, n_cpus, align_conf, mul)
        coordinates = np.concatenate([i for i in out])

    #make a quick plot from the first three components
    print(
        "INFO: Creating coordinate plot from first three eigenvectors.  Saving to coordinates.png",
        file=stderr)
    from mpl_toolkits.mplot3d import Axes3D
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.scatter(coordinates[:, 0],
               coordinates[:, 1],
               coordinates[:, 2],
               c='g',
               s=25)
    plt.savefig("coordinates.png")

    #Create an oxView overlays for the first N components
    N = 3
    prep_pos_for_json = lambda conf: list(list(p) for p in conf)
    print(
        "INFO: Change the number of eigenvalues to sum and display by modifying the N variable in the script.  Current value: {}"
        .format(N),
        file=stderr)
    for i in range(0, N):  #how many eigenvalues do you want?
        try:
            if outfile.split(".")[1] != "json":
                raise Exception
            f = outfile.split(".")[0] + str(i) + "." + outfile.split(".")[1]
        except:
            print(
                "ERROR: oxView overlays must have a '.json' extension.  No overlays will be produced",
                file=stderr)
            break
        out = np.sqrt(evalues[i]) * evectors[i]

        with catch_warnings(
        ):  #this produces an annoying warning about casting complex values to real values that is not relevant
            simplefilter("ignore")
            output_vectors = out.reshape(int(out.shape[0] / 3),
                                         3).astype(float)

        with open(f, "w+") as file:
            file.write(dumps({"pca": prep_pos_for_json(output_vectors)}))

    #If we're running clustering, feed the linear terms into the clusterer
    if cluster:
        print("INFO: Mapping configurations to component space...",
              file=stderr)

        #If you want to cluster on only some of the components, uncomment this
        #out = out[:,0:3]

        from oxDNA_analysis_tools.clustering import perform_DBSCAN
        labs = perform_DBSCAN(coordinates, num_confs, traj_file, inputfile,
                              "euclidean", 12, 8)