def pick_starting_configuration(traj_file, top_file, max_bound):
    """
        Pick a random conf out of the trajectory file to use as the reference structure.
        
        We assume that that is optimal to align against.  Based on experience, the choice of reference configuration has very little impact on the mean structure

        Parameters: 
            traj_file (string): The name of the trajectory file
            top_file (string): The name of the topology file associated with the trajectory file
            max_bound (int): The reference configuration will be chosen at random from the first max_bound configurations in the trajectory file

        Returns:
            stop_at (int): The configuration ID of the reference configuration
            initial_structure (base.System): The oxDNA system representing the reference configuration.
    """
    with LorenzoReader2(traj_file, top_file) as reader:
        if args.align:
            stop_at = int(args.align[0])
        else:
            stop_at = randint(0, max_bound-1)
        print("INFO: We chose {} as reference".format(stop_at), file=stderr)
        initial_structure = reader._get_system(N_skip=stop_at) #this is way faster than using next(), but doesn't automatically inbox the system
        if not initial_structure:
            print("ERROR: Couldn't read structure at conf num {0}.  Something has gone weird".format(stop_at), file=stderr)
            exit(1)
        print("INFO: reference structure loaded", file=stderr)
        initial_structure.inbox()
    return stop_at, initial_structure
Пример #2
0
def get_centroid(points, metric_name, num_confs, labs, traj_file, inputfile):
    """
    Takes the output from DBSCAN and produces the trajectory and centroid from each cluster.

    Parameters:
        points (numpy.array): The points fed to the clstering algorithm.
        metric_name (str): The type of data the points represent.
        labs (numpy.array): The cluster each point belongs to.
        traj_file (str): The analyzed trajectory file.
        inputfile (str): The input file used to run the analyzed simulation.
    """
    
    print("INFO: splitting clusters...", file=stderr)
    print("INFO: Will write cluster trajectories to traj_<cluster_number>.dat", file=stderr)
    print ("cluster\tn\tavg_E\tE_dev\tavg_H\tH_dev\tcentroid_t")
    for cluster in (set(labs)):
        if metric_name == "precomputed":
            masked = points[labs == cluster]
            in_cluster_id = np.sum(masked, axis = 1).argmin()

        in_cluster = list(labs).count(cluster)
        centroid_id = find_element(in_cluster_id, cluster, labs)
        top_file = get_input_parameter(inputfile, "topology")

        r = LorenzoReader2(traj_file, top_file)
        output = r._get_system(N_skip=centroid_id)
        filename = "centroid"+str(cluster)

        output.print_lorenzo_output(filename+".dat", filename+".top")
        
        make_heatmap(inputfile, output, filename)
Пример #3
0
def split_trajectory(traj_file, top_file, num_confs, n_cpus,
                     confs_per_processor):
    """
    Splits a trajectory file into temporary files and attaches a reader to each file.

    Parameters:
        traj_file (str): Name of the trajectory file to split.  
        top_file (str): Name of the topology file associated with the trajectory. 
        num_confs (int): The number of configurations in the trajectory.  
        n_cpus (int): The number of chunks to split the trajectory into.  
        conf_per_processor (int): The number of configurations per chunk (equivalent to floor(num_confs/n_cpus))  

    Returns:
        readers (list of LorenzoReader2s): A list of readers with each one on a unique chunk of the file.
    """
    n_files = 0
    readers = []
    files = []
    rem = num_confs % n_cpus

    with open(traj_file, "rb") as f:
        it = blocks(f)
        chunk = next(it)  # iterator producing 1 MB chunks of the trajectory
        last_conf_byte = 0

        #create a number of temporary file equal to the number of CPUs
        while n_files < n_cpus:
            out = NamedTemporaryFile(mode='w+b', delete=False)
            conf_count = 0

            #If there is a remainder after dividing the number of configurations by the number of CPUs
            #Add one extra configuration to the first rem files
            if n_files < rem:
                a = 1
            else:
                a = 0

            #Find successive configuration start points and write them out to the tempfiles
            while conf_count < confs_per_processor + a:
                next_conf_byte = chunk.find(b"t", last_conf_byte + 1)
                if next_conf_byte == -1:
                    out.write(chunk[last_conf_byte:])
                    try:
                        chunk = next(it)
                    except:  #next() throws an error if there isn't another chunk
                        break
                    last_conf_byte = 0
                else:
                    out.write(chunk[last_conf_byte:next_conf_byte])
                    conf_count += 1
                    last_conf_byte = next_conf_byte

            #create a reader from the newly created trajectory chunk
            readers.append(LorenzoReader2(out.name, top_file))
            files.append(out)
            n_files += 1

    return (readers, files)
def fire_multiprocess(traj_file, top_file, function, num_confs, n_cpus, *args,
                      **kwargs):
    confs_per_processor = int(np.floor(num_confs / n_cpus))

    reader_pool = []
    processor_pool = pp.Pool(n_cpus)

    #for calculations on symmetric matricies (eRMSD)
    #can't just hand each line to the parallelizer
    if ("matrix", True) in kwargs.items():
        total_calculations = sum([(num_confs - i)
                                  for i in range(1, num_confs)])
        calcs_per_cpus = total_calculations / n_cpus
        split_ends = []
        i = 0
        while i < num_confs:
            e = 0
            calcs = 0
            while calcs < calcs_per_cpus:
                calcs += num_confs - i
                e += 1
                i += 1
                if i >= num_confs: break
            split_ends.append(e)

    #define sizes of trajectory chunks
    else:
        split_ends = [confs_per_processor for _ in range(n_cpus)]
        split_ends[
            -1] += num_confs % n_cpus  #last chunk gets all the leftovers

    #now figure out which configuration each chunk starts on
    split_starts = [0]
    for i in range(n_cpus):
        reader_pool.append(LorenzoReader2(traj_file, top_file))
        #rint(split_starts[i-1], split_ends[i-1])
        if i != 0:
            split_starts.append(split_starts[i - 1] + split_ends[i - 1])

    #staple everything together, send it out to the workers, and collect the results as a list
    results = []
    lst = [(r, *args, num_confs, s, e)
           for r, s, e in zip(reader_pool, split_starts, split_ends)]
    results = processor_pool.starmap_async(function, lst).get()
    processor_pool.close()

    return (results)
Пример #5
0
def get_eRMSDs(r1,
               r2,
               inputfile,
               traj_file,
               top_file,
               num_confs,
               start=None,
               stop=None):
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    system1 = r1._get_system(N_skip=start)
    system2 = r2._get_system(N_skip=start + 1)
    eRMSDs = np.zeros((num_confs, num_confs))
    i = start
    j = start + 1
    while system1 != False and confid < stop:
        print("working on configuration", i, "t =", system1._time)
        system1.inbox_system()
        mat1 = calc_matrix(system1, inputfile)
        while system2:
            print("working on configuration", i, "compared to", j)
            system2.inbox_system()
            mat2 = calc_matrix(system2, inputfile)
            eRMSDs[i][j] = calc_eRMSD(mat1, mat2)
            system2 = r2._get_system()
            j += 1

        i += 1
        j = i + 1
        confid += 1
        system1 = r1._get_system()
        r2 = LorenzoReader2(traj_file, top_file)
        system2 = r2._get_system(N_skip=j)

    return (eRMSDs)
Пример #6
0
def main():
    #doesn't actually do anything...
    import argparse
    from UTILS.readers import LorenzoReader2, get_input_parameter
    parser = argparse.ArgumentParser(
        description=
        "A python wrapper for getting all vectors between nucleotides from a simulation"
    )
    parser.add_argument('inputfile',
                        type=str,
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument(
        'trajectory',
        type=str,
        nargs=1,
        help=
        "The file containing the configurations of which the contact map is needed"
    )
    args = parser.parse_args()

    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy"])

    inputfile = args.inputfile[0]
    traj_file = args.trajectory[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    import UTILS.base  #this needs to be imported after the model type is set

    r = LorenzoReader2(traj_file, top_file)
    system = r._get_system()

    while system:
        m = all_vectors(inputfile, system, True)
        system = r._get_system()

    print("well, it finished...")
Пример #7
0
    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)
    import UTILS.base  #this needs to be imported after the model type is set

    r2 = LorenzoReader2(traj_file, top_file)

    #how do you want to get your eRMSDs?  Do you need to do the time-consuming calculation or is it done and you have a pickle?
    if not parallel:
        r1 = LorenzoReader2(traj_file, top_file)

        eRMSDs = get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs)
    if parallel:
        out = parallelize.fire_multiprocess(traj_file,
                                            top_file,
                                            get_eRMSDs,
                                            num_confs,
                                            n_cpus,
                                            r2,
                                            inputfile,
                                            traj_file,
Пример #8
0
else:
    outfile = "forces.txt"
    print(
        "INFO: No outfile name provided, defaulting to \"{}\"".format(outfile),
        file=stderr)

if args.pairs:
    pairsfile = args.pairs[0]
else:
    pairsfile = False

#Get relevant parameters from the input file
top_file = get_input_parameter(inputfile, "topology")

#get base pairs
r = LorenzoReader2(conf_file, top_file)
mysystem = r._get_system()
out = output_bonds(inputfile, mysystem)
out = out.split('\n')

#Find out the forming bonds series
print("INFO: Analyze the output...", file=stderr)
Bonded = {}
for i in out:
    if i[0] == '#':
        continue
    splitline = i.split(' ')
    try:
        HB = float(splitline[6])
    except:
        continue
Пример #9
0
    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    try:
        outfile = args.outfile[0]
        visualize = True
    except:
        visualize = False

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    import UTILS.base  #this needs to be imported after the model type is set

    myreader = LorenzoReader2(traj_file, top_file)
    mysystem = myreader._get_system()

    energies = np.zeros(mysystem.N)
    count = 0

    while mysystem != False:
        out = output_bonds(inputfile, mysystem)
        if visualize:
            for line in out.split('\n'):
                if not line.startswith('#'):
                    line = [float(l) for l in line.split(' ')]
                    energies[int(line[0])] += sum(line[2:])
                    energies[int(line[1])] += sum(line[2:])
        else:
            print(out)
Пример #10
0
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    import UTILS.base #this needs to be imported after the model type is set
    
    num_confs = cal_confs(traj_file)
    
    if mean_file.split(".")[-1] == "json":
        with open(mean_file) as file:
            align_conf = load(file)['g_mean']

    elif mean_file.split(".")[-1] == "dat":
        fetch_np = lambda conf: np.array([
            n.cm_pos for n in conf._nucleotides
        ])
        with LorenzoReader2(mean_file, top_file) as reader:
            s = reader._get_system()
            align_conf = fetch_np(s)

    cms = compute_cms(align_conf) #all structures must have the same center of mass
    align_conf -= cms 
        
    #Compute the deviations
    if not parallel:
        r = LorenzoReader2(traj_file,top_file)
        deviations_matrix = get_pca(r, align_conf, num_confs)
    
    if parallel:
        out = parallelize.fire_multiprocess(traj_file, top_file, get_pca, num_confs, n_cpus, align_conf)
        deviations_matrix = np.concatenate([i for i in out])
    
Пример #11
0
def split_trajectory(traj_file, inputfile, labs, n_clusters):
    """
    Splits the trajectory into the clustered trajectories

    Parameters:
        traj_file (str): The analyzed trajectory file.
        inputfile (str): The input file used to run the analyzed simulation.
        labs (numpy.array): The cluster each point belongs to.
    """
    top_file = get_input_parameter(inputfile, "topology")

    print ("cluster\tmembers")

    #energies = []
    #H_counts = []

    for cluster in (set(labs)):
        in_cluster = list(labs).count(cluster)

        print ("{}\t{}".format(cluster, in_cluster))

        #energies.append([])
        #H_counts.append([])

        #for making trajectories of each cluster
        try:
            remove("cluster_"+str(cluster)+".dat")
        except: pass

    confid = 0
    r1 = LorenzoReader2(traj_file, top_file)
    system = r1._get_system() 
    
    print ("INFO: splitting trajectory...", file=stderr)
    print ("INFO: Will write cluster trajectories to cluster_<cluster number>.dat", file=stderr)

    while system != False:
        system.print_traj_output("cluster_"+str(labs[confid])+".dat", "/dev/null")

        ###########
        #If you want to get additional information about a cluster, add that code here
        #for example, if you want average energy and hydrogen bonds:
        '''
        energies[labs[confid]].append(0)
        H_counts[labs[confid]].append(0)
        system.map_nucleotides_to_strands()
        out = output_bonds(inputfile, system)

        for line in out.split('\n'):
            if line[0] != '#' and line[0] != '\n':
                line = line.split(" ")
                for m in line[2:9]:
                    energies[labs[confid]][-1] += float(m)
                if float(line[6]) != 0:
                    H_counts[labs[confid]][-1] += 1
        energies[labs[confid]][-1] /= len(system._nucleotides)
        '''
        ############
            
        confid += 1
        system = r1._get_system()

    #This is where you print the information about each cluster
    '''    
Пример #12
0
                    type=str,
                    nargs=1,
                    help="The reference configuration to superimpose to")
parser.add_argument('victims',
                    type=str,
                    nargs='+',
                    help="The configuraitons to superimpose on the reference")
args = parser.parse_args()

#Get the reference files
top_file = args.topology[0]
ref_dat = args.reference[0]

#Create list of configurations to superimpose
to_sup = []
r = LorenzoReader2(ref_dat, top_file)
ref = r._get_system()
ref.inbox()
ref_conf = fetch_np(ref)
for i in args.victims:
    r = LorenzoReader2(i, top_file)
    sys = r._get_system()
    to_sup.append(sys)

sup = SVDSuperimposer()

#Run the biopython superimposer on each configuration and rewrite its configuration file
for i, sys in enumerate(to_sup):
    cur_conf = fetch_np(sys)
    sup.set(ref_conf, cur_conf)
    sup.run()
            outfile),
              file=stderr)

    #Get relevant parameters from the input file
    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"

    import UTILS.base  #this needs to be imported after the model type is set

    #Calculate the number of configurations.
    num_confs = cal_confs(traj_file)

    r0 = LorenzoReader2(traj_file, top_file)
    r0._get_system()

    #launch find_angle using the appropriate number of threads to find all duplexes.
    if not parallel:
        print(
            "INFO: Fitting duplexes to {} configurations using 1 core.".format(
                num_confs),
            file=stderr)
        r = LorenzoReader2(traj_file, top_file)
        duplexes_at_step = find_angles(r, num_confs)

    if parallel:
        print("INFO: Fitting duplexes to {} configurations using {} cores.".
              format(num_confs, n_cpus),
              file=stderr)