Пример #1
3
def rnaplot(seq, struct=None, path='rnaplots', name='temp'):

    import RNA
    if struct==None:
        struct = RNA.fold(seq)[0]
    filename = os.path.join(path,name+'.ps')
    #RNA.svg_rna_plot(seq,struct,filename)
    colors = [" 1. 0. .2", " 0. .9 .5"]
    macro = format_cmark_values(range(0,10), rgb=colors[0])
    RNA.PS_rna_plot_a(seq, struct, filename, '', macro)
    return filename
def local_search(start_seq_, target_structs_, seq_constraint_,
                 context_front=None, context_back=None):
    global start_seq
    global seq_constraint
    global target_structs

    rna.check_struct_seq_match(target_structs_[0], start_seq_)
    rna.check_struct_seq_match(target_structs_[1], start_seq_)

    start_seq = start_seq_
    target_structs = target_structs_
    seq_constraint = seq_constraint_

    # # TODO: has to be checked
    # preset_dangles = RNA.dangles
    # if preset_dangles != 0:
    #     RNA.dangles = 1

    if (SEARCH_STRATEGY == SearchStrategy.adaptive_walk or
        SEARCH_STRATEGY == SearchStrategy.stochastic_local_search):
        seq, cost, steps = local_search_sls_pf()
    elif SEARCH_STRATEGY == SearchStrategy.full_local_search:
        seq, cost, steps = local_search_fls_pf()
    else:
        raise ValueError("Specified search strategy not valid.")

    eval_seq_container.reset()
    vienna_rna.free_pf_arrays()
    vienna_rna.free_arrays()

    # RNA.dangles = preset_dangles
    return seq, cost, steps
Пример #3
0
def main():
    """ for sequence string, calculate mfe structure, mfe, pf, base pair probability matrix, plot structures and bppms, calculate accessibilities"""
    print 'name mfe_low mfe_high pf_low pf_high RRS_acces_low RRS_acces_high AUG_acces_low AUG_acces_high'
    for seq_file in SeqIO.parse(sys.stdin, 'fasta'):
         sequ = str(seq_file.seq)
         fc_low = RNA.fold_compound(sequ, MODEL_LOW_TEMPERATURE)
         fc_high = RNA.fold_compound(sequ, MODEL_HIGH_TEMPERATURE)
         struct_low, mfe_low = fc_low.mfe()
         struct_high, mfe_high = fc_high.mfe()
         pfstruct_low, pf_low = fc_low.pf()
         pfstruct_high, pf_high = fc_high.pf()
         bppm_low = fc_low.bpp()
         bppm_high = fc_high.bpp()
         
         plot_2bppms(bppm_low, bppm_high, seq_file.id)
         RNA.PS_rna_plot(sequ, struct_low, '{:s}_low_ss.ps'.format(str2filename(seq_file.id)))    
         RNA.PS_rna_plot(sequ, struct_high, '{:s}_high_ss.ps'.format(str2filename(seq_file.id)))
    
         constr1, constr2 = seqconstraints(sequ,RRS,START,SPACER)
         RRS_acces_low = accessibility(sequ,MODEL_LOW_TEMPERATURE,constr1,pf_low)
         RRS_acces_high = accessibility(sequ,MODEL_HIGH_TEMPERATURE,constr1,pf_high)
         AUG_acces_low = accessibility(sequ,MODEL_LOW_TEMPERATURE,constr2,pf_low)
         AUG_acces_high = accessibility(sequ,MODEL_HIGH_TEMPERATURE,constr2,pf_high)

         print seq_file.id, mfe_low, mfe_high, pf_low, pf_high, RRS_acces_low, RRS_acces_high, AUG_acces_low, AUG_acces_high
    print versions_used()
Пример #4
0
def getBPPM(sequence, structure = "", bppm_cutoff = 0.00001):
    """
        Requires ViennaRNAtools Python module
        Returns the base pair probability matrix using Vienna pf_fold, get_pr and free_pf_arrays functions.
        returns upper triangular matrix, whose entries exceed a threshold
    """
    bppm = {}
    
    
     #'--noPS', '-d 2', t, P
     
     
     
    if structure != "":
        RNA.cvar.fold_constrained = 1
    else:
        RNA.cvar.fold_constrained = 0
    #print "Before", structure
    RNA.pf_fold(sequence, structure)
    #print "After", structure
    seq_len = len(sequence)+1
    for i in xrange(1, seq_len):
        for j in xrange(1, seq_len):
            if i<j:
                bpp = RNA.get_pr(i,j)
                if bpp > bppm_cutoff:
                    bppm[str(i) + "_" + str(j)] = bpp
                else:
                    bppm[str(i) + "_" + str(j)] = 0
    RNA.free_pf_arrays()
    #print bppm
    #exit(1)
    return bppm
Пример #5
0
def mfe_bp_distance(S, G, masked=None):
    """This function takes an RNA sequence S, a secondary structure G,
    and returns de base pairs distance between the mfe structure (a mask
    for the folding can be provided as an optional argument) of S
    and G
    """
    Sec_struct = RNA.fold(S)[0]
    return RNA.bp_distance(Sec_struct, G)
Пример #6
0
def prep_sec2(seq_five, seq_three, seq_apta, shift, rand):
    (take, dump) = RNA.fold(seq_five + "N" + "G" * 100 + "N" * 4 + "C" * 100 + "N" + seq_three)
    (take1, dump) = RNA.fold(seq_apta)
    seq = (
        len(take.split("." + "(" * 100 + "." * 4 + ")" * 100 + ".", 1)[0]) * "."
        + "(" * rand
        + take1
        + ")" * (rand)
        + "." * shift
        + len(take.split("." + "(" * 100 + "." * 4 + ")" * 100 + ".", 1)[1]) * "."
    )
    return seq
Пример #7
0
def fold_probability(S, G=None):
    """Given a sequence S a secondary structure G (default mfe), we compute 
    the partition function of S given G as a constraint. The output
    is a triple (A,B,C) where A is the annotated partition folding,
    B is the energie of the ensemble A, and C a dictionary having as keys 
    a pair of positions and as value the probability of having the pair.
    """
    struct, energy = RNA.pf_fold(S, G) #Compute the partition function
    dict_probabilities = {}
    for left, right in ((x,y) for x in range(len(S)) for y in range(len(S))
                       if x < y):
        dict_probabilities[left,right] =RNA.get_pr(left + 1,right +1)
    return (struct, energy, dict_probabilities)
Пример #8
0
def temperature_reactivity( sequence, structure, temperature1, temperature2 ):
    """Evaluate temperature-dependent difference in energy, entropy, enthalpy."""
    temperature_model1 = RNA.md()
    temperature_model2 = RNA.md()
    temperature_model1.temperature = temperature1
    temperature_model2.temperature = temperature2
    fc1 = RNA.fold_compound(sequence, temperature_model1)
    fc2 = RNA.fold_compound(sequence, temperature_model2)
    energy_of_struct1 = fc1.eval_structure(structure)
    energy_of_struct2 = fc2.eval_structure(structure)
    # normalize delta_energy, add 0.001 to prevent division by 0
    delta_energy = abs((energy_of_struct2 - energy_of_struct1) / (energy_of_struct2 + 0.001))

    return (delta_energy, energy_of_struct1, energy_of_struct2)
Пример #9
0
    def Rewards(self,k):
        #copy_unpairedposition=list(unpairedposition)
        #copy_bppused=list(bppused)
        if k > len(str_uindex)-1:
            posbasep=self.position[len(str_uindex):self.n]
            posbase=self.position[0:len(str_uindex)]
            e=list(itertools.chain(*posbasep))
            for i in range(len(a)):
                posbase.insert(b[i],e[c[i]])
            mutated_s= ''.join(map(str, posbase))
            mutated_str1=RNA.fold(mutated_s)
            mutated_str=mutated_str1[0]

            d=0.0
            g=0.0
            n=len(s)
            for i in range(len(s)):
                if mutated_str[i]!=s[i]:
                    d=d+1
            g=(n-d)/n
            if g==1.0:
                solution.append(mutated_s)
                return g
            else:
                return g




        if k <= len(str_uindex)-1:
            posbasep=self.position[len(str_uindex):self.n]
            posbase=self.position[0:len(str_uindex)]
            e=list(itertools.chain(*posbasep))
            for i in range(len(a)):
                posbase.insert(b[i],e[c[i]])
            mutated_s= ''.join(map(str, posbase))
            mutated_str1=RNA.fold(mutated_s)
            mutated_str=mutated_str1[0]
            d=0.0
            g=0.0
            n=len(s)
            for i in range(len(s)):
                if mutated_str[i]!=s[i]:
                    d=d+1
            g=(n-d)/n
            if g==1.0:
                solution.append(mutated_s)
                return g
            else:
                return g
Пример #10
0
 def test_centroid(self):
     print "test_centroid\n"
     fc=RNA.fold_compound(align)
     fc.pf()
     (sc,dist) = fc.centroid()
     print  sc,"\tDistance of :  %6.2f" %dist ,"\n"
     self.assertTrue(sc and dist)
Пример #11
0
    def test_eval_structure_pt(self):
        print "test_eval_structure_pt\n"
        fc=RNA.fold_compound(seq1)
        energy= fc.eval_structure_pt(struct1_pt) /100; #/100 for dcal

        self.assertEqual("%6.2f" % energy, "%6.2f" % -5.60)
        print  struct1, "[%6.2f" % energy,"]\n"
Пример #12
0
def bt(i,j,k,l,d,data=None):
    """
    The backtracking callback must return a list of base pairs
    Here, the base pairs may be given in one of the three ways
    shown below:
    """

    if d == RNA.DECOMP_PAIR_HP:
        """
        1. We create a list of dictionaries with 'i' and 'j'
        keys that specify the coordinates of the base pair (i,j)
        """
        bp = { 'i' : i+1, 'j' : j-1 }

        """
        2. We create a list of tuples (i,j)
        """
        bp = (i+1, j-1)

        """
        3. We create a list of RNA::basepair objects
        """
        bp = RNA.basepair()
        bp.i = i+1
        bp.j = j-1

        return [ bp ]

    return None
Пример #13
0
    def __init__(self, fullseq, vrna_md):
        super(TrafoLandscape, self).__init__()

        self._full_sequence = fullseq
        self._model_details = vrna_md
        self._fold_compound = RNA.fold_compound(fullseq, vrna_md)

        # Adjust simulation parameters
        self._RT = 0.61632077549999997
        if vrna_md.temperature != 37.0:
            kelvin = 273.15 + vrna_md.temperature
            self._RT = (self._RT/310.15) * kelvin

        # Private instance variables:
        self._transcript_length = 0
        self._total_time = 0
        self._nodeid = 0

        # Default parameters:
        self._p_min = 0.01  # probability threshold
        self._fpath = 20    # findpath_search_width

        self._k0 = 2e5    # set directly
        self._dG_max = 0  # set using t_slow
        self._dG_min = 0  # set using t_fast
Пример #14
0
def main():
    for monster in monsters:
        # calculate 1) foldcompound 2) partition function 3) base pair probability matrix in that order (!)
        foldmonster = RNA.fold_compound(str(monster))
        pfstruct, pf = foldmonster.pf()
        bppm = foldmonster.bpp()
        plot_bppm(bppm, monster.id)
    print versions_used()
def score_match(query):
    
    motif = 'CCTCCT'
    length = len(motif)
    
    score = RNA.cofold(query + '&' + motif)
        
    return score[1]
Пример #16
0
 def test_E_int_loop(self):
     print "test_E_int_loop"
     #    "123456789012"
     seq1 =  "AGACAAAAGACA"
     struct1=".(.(....).)."
     fc=RNA.fold_compound(seq1,None,RNA.OPTION_MFE)
     e = fc.E_int_loop(2,11)
     print seq1, " 2,7  = [ %6.2f" %e ,"] \n"
     self.assertEqual("%6.2f" %e,"%6.2f" % +80)
Пример #17
0
 def test_pf(self):
     print "test_pf"
     fc= RNA.fold_compound(seq1)
     (ss,gfe) = fc.pf()
     print ss, "[ %6.2f" %gfe ,"]\n"
     self.assertTrue(ss)
     bp_dis = fc.mean_bp_distance()
     print seq1 ,"\t meanBPDistance : ", bp_dis,"\n"
     self.assertTrue(bp_dis)
Пример #18
0
def accessibility ( sequence, md, constr, pf_noconstr ):
    fc_constr = RNA.fold_compound(sequence, md)
    fc_constr.constraints_add(constr, RNA.CONSTRAINT_DB_DEFAULT)
    pf_constr_struct, pf_constr = fc_constr.pf()
    if re.search('x', constr):
        acces = exp((pf_noconstr - pf_constr)/(BOLTZMANN_K * (md.temperature + 273.15)))
    else:
        acces = 0
    return acces
Пример #19
0
def prep_sec1(seq_five, seq_three, seq_apta, shift):
    (take, dump) = RNA.fold(seq_five + "N" + "G" * 100 + "N" * 4 + "C" * 100 + "N" + seq_three)
    seq = (
        len(take.split("." + "(" * 100 + "." * 4 + ")" * 100 + ".", 1)[0]) * "."
        + "(" * shift
        + "." * (len(seq_apta))
        + ")" * shift
        + len(take.split("." + "(" * 100 + "." * 4 + ")" * 100 + ".", 1)[1]) * "."
    )
    return seq
Пример #20
0
def rnafold(seq, name=None):
    """Run RNAfold for precursor"""

    import RNA
    try:
        x = RNA.fold(seq)
    except Exception as e:
        print (e)
        return
    return x
Пример #21
0
def prep_sec2_comp(seq_five, seq_three, seq_apta, comp, rand):
    (take, dump) = RNA.fold(seq_five + "N" + "G" * 100 + "N" * 4 + "C" * 100 + "N" + seq_three)
    seq = (
        (len(seq_five) - comp) * "."
        + "(" * (rand + comp)
        + "." * (len(seq_apta) - comp)
        + ")" * (rand + comp)
        + len(seq_three) * "."
    )
    return seq
Пример #22
0
    def test_eval_hp_loop(self):
        print "test_eval_hp_loop"
        seq1  =      "GCAAAAGG"
        struct1=    ".(....)."

        fc=RNA.fold_compound(seq1)
        #ehair = fc.eval_hp_loop(2,7)
        ehair = fc.E_hp_loop(2,7)
        print seq1, " 2,7  = [ %6.2f" %ehair ,"] \n"
        self.assertEqual("%6.2f" %ehair,"%6.2f" % +410)
Пример #23
0
def prep_sec2_left(seq_five, seq_three, seq_apta, shift, rand):
    (take, dump) = RNA.fold(seq_five + "N" + "C" * 100 + "N" * 4 + "G" * 100 + "N" + seq_three)
    seq = (
        (len(seq_five) - shift) * "."
        + "(" * (rand + shift)
        + "." * (len(seq_apta) - shift)
        + ")" * (rand)
        + ")" * shift
        + len(seq_three) * "."
    )
    return seq
    def test_file_SHAPE_read(self):
        print "test_file_SHAPE_read"
        reactivities = getShapeDataFromFile("data/TPP_riboswitch_E.coli.shape_2rows")

        (a,b,c) = RNA.file_SHAPE_read("data/TPP_riboswitch_E.coli.shape_2rows", 79, -1)
        print "read file:"
        print a
        print b
        print c

        print reactivities
        print a
Пример #25
0
    def test_eval_covar_structure(self):
        print "test_eval_covar_structure\n"
        s1="CCCCAAAACGGG"
        s2="CCCGAAAAGGGG"
        s3="CCCCAAAAGGGG"
        ali = [s1,s2,s3]
        covarStructure = "((((....))))"

        fc = RNA.fold_compound(ali)
        pseudoEScore=fc.eval_covar_structure2(covarStructure)
        print covarStructure, "[ %6.2f" %pseudoEScore ,"]\n"
        self.assertTrue(pseudoEScore)
    def add(self, seq):
        def bp_pr(i, j):
            if i > j:
                i, j = j, i
            return bppm[iindx[i + 1] - (j + 1)]

        def pos_bp_pr_iter():
            """Yield probability for base i being paired in each structure."""
            struct_0, struct_1 = target_structs
            for i in xrange(len(seq)):
                a = struct_0.basepairs[i]
                b = struct_1.basepairs[i]
                if a is not None:
                    if b is not None:
                        yield bp_pr(i, a), bp_pr(i, b)
                    else:
                        sum_ = sum((bp_pr(i, j) for j in xrange(size)))
                        yield bp_pr(i, a), 1 - sum_
                elif b is not None:
                    sum_ = sum((bp_pr(i, j) for j in xrange(size)))
                    yield 1 - sum_, bp_pr(i, b)
                else:
                    sum_ = 1 - sum((bp_pr(i, j) for j in xrange(size)))
                    yield sum_, sum_

        seq_str = str(seq)
        if seq_str in self.container:
            return
        size = len(seq)
        self.container[seq_str] = {}

        # with LOCK_VIENNA_RNA:
        energy_ensemble = vienna_rna.pf_fold_par(seq_str, None, None, 1, 0,
                                                 0)
        bppm = vienna_rna.doubleArray_frompointer(vienna_rna.export_bppm())
        i = vienna_rna.get_iindx(len(seq_str))
        iindx = vienna_rna.intArray_frompointer(i)
        energy_structs = (
            vienna_rna.energy_of_struct(seq_str, str(target_structs[0])),
            vienna_rna.energy_of_struct(seq_str, str(target_structs[1])))
        struct_mfe = " " * len(seq_str)
        energy_mfe = vienna_rna.fold_par(seq_str, struct_mfe, None, 0, 0)
        self.container[seq_str]["pos_bp_pr"] = list(pos_bp_pr_iter())
        self.container[seq_str]["energy_ensemble"] = energy_ensemble
        self.container[seq_str]["energy_mfe"] = energy_mfe
        self.container[seq_str]["struct_mfe"] = struct_mfe
        self.container[seq_str]["seq_pr"] = (
            math.exp((energy_ensemble - energy_structs[0]) / kT),
            math.exp((energy_ensemble - energy_structs[1]) / kT))
Пример #27
0
def full_hairpin(seq_five, seq_three, aptamer, shift):
    RNAbet = ["a", "c", "g", "u"]
    DNAbet = ["a", "c", "g", "t"]
    take, free_E = RNA.fold(aptamer)
    final_seq = seq_five
    for k in range(0, shift):
        final_seq += random.choice(DNAbet)
    final_seq += aptamer
    for k in range(0, shift):
        final_seq += random.choice(DNAbet)
    final_seq += seq_three
    active_seq = final_seq.split(aptamer)[0] + take.replace(".", "N") + final_seq.split(aptamer)[1]
    return final_seq, active_seq
Пример #28
0
def tenfold(fasta):
    """Fold tabbed FASTA input (gene;chr:start-stop \t sequence) and
    convert to binary output representing highest likelihood secondary  
    structure
    1 = double-stranded
    0 = single-standed
    """
    for line in open(fasta):
        label, sequence = line.strip().split("\t")
        dotplot, fe = RNA.fold(sequence)
        re1 = re.sub(r'\(|\)',r'1',dotplot)
        re10 = re.sub(r'\.',r'0',re1)
        print label + "\t" + re10
Пример #29
0
def main(argv):
    ArgumentDic = CmdParser(argv)
    if ArgumentDic["all"]:
        # necessary to allow the Traceback to finish without reaching the recursionlimit
        # only needed for calculating all possible structures
        resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
        sys.setrecursionlimit(10**6)    
    s, m = InputParser(ArgumentDic["FastaFile"])
    m = Nussinov(s, m)
    # this catches sequences with no secondary structure
    if m[0][len(s) - 1] == 0:
        print 
        print "  Your Sequence has no computable secondary structure."
        print 
    else:
        if ArgumentDic["all"]:
            p, n = cTraceBack(s, m)
        else:
            p, n = TraceBack(s, m)
        print 
        print "  This sequence has a maximum of " + str(n) + " base pairs."
        print     
        print s
        R = PosSecStruc(p, n)
        for res in R:
            basepairseq = ""
            for _ in range(len(s)):
                basepairseq +="."
            for pair in res:
                bps = basepairseq[:pair[0]] + "(" + basepairseq[pair[0] + 1: pair[1]] + ")" + basepairseq[pair[1] + 1:]
                basepairseq = bps
            print basepairseq
        print
        if ArgumentDic["graphic"] == True:
            RNA.gmlRNA(s, basepairseq, "Nussinov.gml", "A")
            g = igraph.read("Nussinov.gml")            
            layout = g.layout("kk")
            igraph.plot(g, layout = layout)            
Пример #30
0
    def test_eval_structure_verbose(self):
        print "test_eval_structure_verbose"
        fc = RNA.fold_compound(seq1)
        filename= "test-RNA-mfe_eval.py.out"
        try:
            f = open(filename, "w")
            print filename ," is opened for writing\n"
            energy = fc.eval_structure_verbose(struct1,f)
            energy2 = fc.eval_structure_verbose(struct1,None)

            self.assertEqual("%6.2f" % energy, "%6.2f" % -5.60)
            print  struct1, "[%6.2f" % energy,"]\n"
        except IOError:
            print "Could not open ",filename
Пример #31
0
#FoldingFreeEnergy
'''Given a mRNA sequence, RNAFold can calculate the folding free energy of the mRNA secondary structure.
   Please donwload the library on your system from 'https://www.tbi.univie.ac.at/RNA/'. '''

import RNA

mrna = 'ATGC'  #input sequence.
mfe = RNA.fold(str(mrna))
print(float('%6.2f' % mfe[1]))  #Contain to two-place decimal.
Пример #32
0
 def test_mfe_window(self):
     print "test_mfe_window"
     fc = RNA.fold_compound(seq1, None, RNA.OPTION_MFE | RNA.OPTION_WINDOW)
     (mfe) = fc.mfe_window()
     print "[ %6.2f ]" % mfe
     self.assertEqual("%6.2f" % mfe, "%6.2f" % -5.60)
Пример #33
0
    sequence = "GAAGUGUGGCUGGCAAGGAGAAUUAUGUGUGAAAAUUUGUCGGUAGAUAGGCAGUGGUGGCGAAGGGAGGGGGAAACGAUUUUGCCUCCGACGUCCAUCAUCGCCAGACAGGACGGUCUCCCUUCCUACAGGUCUCUGGCACAUAUCCUC"
    s1 = "....(((.....))).(((....(((((((.(.....((((....))))(((.((((((((....(((((.(((......))).)))))...).))))))).)))((((((((.((....)).))))....)))).).)))))))))).."
    s2 = "((.(((((((.((.(((((((....(.(((.......((((....))))(((.(((((((((...(((((.(((......))).)))))..)).))))))).)))..))).)....)))).))))).....)))....))))...))..."

    sequence = "GGAUAUUUCUUGUUGGCGCUCGGGCCGUCACUCUCCUCCCAACGAAACCCCAGGAGAGACAUCACAUAAGCAAACCUUUUGAUUUGAUGUAACCGUGGAGAAAACAAGUUCCUGUUACUUGGACACGUCUUUAGAAAAAACAGGAACGGU"
    s1 = "......((((.((((((......)))....(((((((..............)))))))((((((((.............))...)))))))))...))))...((..(((((((((....(((....)))........))))))))).))"
    s2 = "........(((((((((......)))....(((((((..............)))))))((((((.((((.........)).)).)))))).....((((((...(((((.......)))))......)))))).....))))))......"

    sequence = "AACGGGUGGGUACUCCCUGGUAAAGCCCGAGUCGAGACAUUGUCAUAUGUAUGAGAUUCCUUUGUUGUUGGUCGGCUGGG"
    s1 = "..((((((((....))).......)))))((((((.....(.((((....)))).)...((........))))))))..."
    s2 = "...((((...((((....))))..)))).(((((((((....((..........)).......))).....))))))..."

    Debug = True
    # Debug = False

    sections = merge_check(sequence, s1, s2, Debug=Debug)

    print("input")
    print(sequence)
    print(s1)
    print(s2)

    fc = RNA.fold_compound(sequence)
    pt1 = RNA.ptable_from_string(s1)
    pt2 = RNA.ptable_from_string(s2)

    helper.print_tables(s1, s2, pt1, pt2)
    # res = merge_recursive.recursive_merge(sequence, s1, s2, sections=sections, search_width=500, Debug=False, Verbose=True, new=True, plot_graph=False)

    print(sections)
Пример #34
0
 def test_subopt3(self):
     print "test_subopt_cb (as fold_compound method)\n"
     a = RNA.fold_compound(sequence)
     a.subopt_cb(500, print_subopt_result)
Пример #35
0
def merge_check(sequence, s1, s2, Debug=False):

    if Debug:
        coloredlogs.DEFAULT_LOG_FORMAT = '%(levelname)s %(message)s'
        coloredlogs.install(level='DEBUG')

    # logging.info("It works!")
    # logging.debug("This is a log message.")
    # print_d("This is a log message.", "test")
    # logging.error("this is an error message")

    ptables_s1 = RNA.ptable_from_string(s1)
    ptables_s2 = RNA.ptable_from_string(s2)
    ltables_s1 = RNA.loopidx_from_ptable(ptables_s1)
    ltables_s2 = RNA.loopidx_from_ptable(ptables_s2)

    def next_int_loops(min_pos, max_pos):
        '''
        new function to find best interior loops with largest bp_dist
        '''
        print_d("start next int loops", min_pos, max_pos)

        curr_lvl = 0
        diff = 0

        candidates = defaultdict(lambda: [float('inf'), float('inf'), 0, 0, 0])
        c2 = defaultdict(list)
        c3 = []

        for i, (p1, p2) in enumerate(zip(ptables_s1[1:], ptables_s2[1:])):
            # if i==0: continue
            if i < min_pos or i > max_pos: continue

            if p1 == 0 and p2 == 0:
                continue
            # if p1==p2 and p1 > i:
            #     curr_lvl += 1
            # elif p1==p2 and p1 < i:
            #     curr_lvl -= 1

            # check which compatible sections have the highest potential for recursion
            if p1 == p2 and i < p1:

                j = p1
                # check compatibility
                last_i = i - 1
                next_j = j + 1

                curr_i = i + 1
                next_i = i + 2
                last_j = j - 1

                # out of bounds
                if next_j > len(s1) or last_i < 0:
                    continue

                # inner/outer section not compatible
                # if ptables_s1[last_i] != ptables_s2[last_i] or ptables_s1[next_j] != ptables_s2[next_j]:
                #     print_d(last_i,i,j,next_j)
                #     print_d("fail1",ptables_s1[last_i], ptables_s2[last_i] )
                #     print_d("fail2",ptables_s1[next_j], ptables_s2[next_j] )
                #     continue

                print_d(curr_i, next_i, last_j, j)
                # print_d("fail1",ptables_s1[curr_i], ptables_s2[curr_i] )
                # print_d("fail2",ptables_s1[i], ptables_s2[i] )

                if ptables_s1[curr_i] != ptables_s2[curr_i] or ptables_s1[
                        i] != ptables_s2[i]:
                    print_d(curr_i, next_i, last_j, j)
                    print_d("fail1", ptables_s1[curr_i], ptables_s2[curr_i])
                    print_d("fail2", ptables_s1[last_j], ptables_s2[last_j])
                    continue

                # debatable - extra ( & ) at end/start pos
                # if ptables_s1[last_i] == 0 or ptables_s1[next_j] == 0:
                #     continue

                # if start and end are unpaired, both i and j have to be unpaired,
                # otherwise energies don't add up properly / various errors

                last_i = i

                if ptables_s1[last_i] == 0 and ptables_s2[last_i] != 0:
                    continue
                if ptables_s1[next_j] == 0 and ptables_s2[next_j] != 0:
                    continue

                # if ptables_s1[i+1] == 0 or ptables_s1[j] == 0:
                #     continue#
                if ptables_s1[i] == 0 or ptables_s1[j + 1] == 0:
                    continue

                # if ptables_s1[i] < ptables_s1[j+1]: # last i,j: )( instead of ()
                if ptables_s1[ptables_s1[i]] != ptables_s1[
                        j + 1]:  # last i,j: )( instead of ()
                    continue

                outer_s1 = s1[min_pos:i] + "." * (p1 - i) + s1[p1:max_pos + 1]
                outer_s2 = s2[min_pos:i] + "." * (p2 - i) + s2[p2:max_pos + 1]

                print_d(outer_s1, s1[i - 1], ptables_s1[i], s1[p1],
                        ptables_s1[j + 1], "//", ptables_s1[ptables_s1[i]])
                print_d(outer_s2)

                inner_s1 = s1[i:p1]
                inner_s2 = s2[i:p1]

                print_d(inner_s1)
                print_d(inner_s2)

                inner_size = p1 - i
                outer_size = max_pos - min_pos - inner_size
                # print (outer_s1)
                # print (outer_s2)
                # print (inner_s1)
                # print (inner_s2)
                bp_dist_inner = RNA.bp_distance(inner_s1, inner_s2)
                bp_dist_outer = RNA.bp_distance(outer_s1, outer_s2)
                # bp_dist = max(bp_dist_outer, bp_dist_inner) - min(bp_dist_outer, bp_dist_inner)

                # optimize = (inner_size/outer_size)*bp_dist_inner
                optimize = (inner_size / outer_size)
                optimize = abs(0.6 - (inner_size / outer_size))

                # if the step is too small
                # if bp_dist_outer < 3:
                #     continue

                # if min(bp_dist_outer, bp_dist_inner) < 10: # 300_min10
                #     continue

                if min(bp_dist_outer, bp_dist_inner) < 3:  # 300_r
                    continue

                # this is the standard.. ?
                # if min(bp_dist_outer, bp_dist_inner) < 1:
                #     continue

                # overwrite with better candidate
                print_d("candidate", i, p1, p2, curr_lvl, diff, "inner size",
                        inner_size, "outer size", outer_size, "opt:", optimize)

                c3.append((i, j, optimize, inner_size, outer_size,
                           bp_dist_inner, bp_dist_outer))
                """
                recursion conditions: inner section > 20 bp
                outer / inner section in the region of 0.25 to 0.75

                maximize inner section size which has at least a 3 bp distance to the outer section

                """

            # print ("---")
        print_d("found candidates:")

        c3 = sorted(c3, key=lambda item: item[2],
                    reverse=False)  # highest opt.
        available = [0 for i in ptables_s1]

        for key in c3:
            print_d("c3", i, j)
            i = key[0]
            j = key[1]

            # all nucleotides between i and j need to be available
            if all(i == 0 for i in available[i + 1:j]):
                print_d('add', i, j)
                available[i:j] = [1] * (j - i)

        mode = 0
        indices = []
        for i in range(len(available)):
            if available[i] == 1:
                if mode == 0:
                    start_pos = i
                mode = 1
            else:
                if mode == 1:
                    indices.append([start_pos, i])
                mode = 0
        print_d("final indices", indices)
        return indices

    def ignore_unpaired_nt(i, j):
        while i < j:
            # if ptables_s1[i] != 0 or ptables_s2[i] != 0:
            if ptables_s1[i] != 0 and ptables_s2[i] != 0:
                # i -= 1
                return i
            i += 1
        # case if no paired nt
        return j

    def ignore_non_aligned(i, j):
        while i < j:
            if ptables_s1[i] == ptables_s2[
                    i] and ptables_s1[i] != 0 and i > ptables_s1[i]:
                # i -= 1
                return i
            i += 1
        # case if no paired nt
        return j

    def bp_dist_section(i, j):
        # bp dist s1 to s2 for a given section
        return RNA.bp_distance(s1[i - 1:j], s2[i - 1:j])

    def new_exterior_loops(i, j):
        if i == 0:
            i = 1

        def exterior_loops_per_s(ptable, ltable, i, j):
            i_s1 = i
            j_s1 = j
            s1_list = []
            while True:
                # print ("s", i_s1)
                # ignore unpair nt
                while i_s1 < j:
                    if ptable[i_s1] != 0:
                        break
                    i_s1 += 1
                if i_s1 >= j:
                    break
                while j_s1 < j:
                    if ptable[j_s1] != 0:
                        break
                    j_s1 -= 1

                i_s1_start = i_s1
                end_loop = ltable[i_s1]

                i_s1 += 1

                while i_s1 < j:
                    if ltable[i_s1] == end_loop and ptable[i_s1] != 0:
                        break
                    i_s1 += 1
                # print (i_s1_start,j_s1, i_s1)
                s1_list.append((i_s1_start, i_s1))
                # break
                i_s1 += 1

            return s1_list

        s1_list = exterior_loops_per_s(ptables_s1, ltables_s1, i, j)
        s2_list = exterior_loops_per_s(ptables_s2, ltables_s2, i, j)
        # print (s1_list)
        # print (s2_list)
        i = 0
        j = 0
        p_min = None
        exterior_loops = []
        # go over the list of exterior loops of s1 and s2 and see if
        # there are overlapping sections, which have to be merged here

        while (len(s1_list) != i and len(s2_list) != j):

            # print ("start", i, j, s1_list[i])

            if p_min == None:
                p_min = min(s1_list[i][0], s2_list[j][0])

            t1 = s1_list[i]
            t2 = s2_list[j]
            min_t = min(min(t1[0], t2[0]), p_min)
            max_t = max(t1[1], t2[1])
            print_d('current min/max:', min_t, max_t)

            if i + 1 < len(s1_list):
                next_i_min = s1_list[i + 1][0]
                print_d('s1 list:', next_i_min, max_t)
                if next_i_min < max_t:
                    i += 1
                    continue
            if j + 1 < len(s2_list):
                next_j_min = s2_list[j + 1][0]
                print_d('s2 list:', next_j_min, max_t)
                if next_j_min < max_t:
                    j += 1
                    continue
            exterior_loops.append((min_t, max_t))

            i += 1
            j += 1
            p_min = None
            # print ("end", i, j)
        # check here for )(  /   ).  /   .(
        #                )(  /   .(  /   ).
        loop_id = 0
        while len(exterior_loops) > 1 and loop_id + 1 < len(exterior_loops):
            loop = exterior_loops[loop_id][1]
            next_loop = exterior_loops[loop_id + 1][0]
            print_d("check", loop, next_loop)
            print_d(s1[loop - 1], s1[next_loop - 1])
            print_d(s2[loop - 1], s2[next_loop - 1])
            # check for compatibility if end and start for the next loop is next to each other
            # if loop+1 == next_loop and (s1[loop-1] != s2[loop-1] or s1[next_loop-1] != s2[next_loop-1]):
            # if loop+1 == next_loop and (ptables_s1[loop] != ptables_s2[loop] or ptables_s1[next_loop] != ptables_s2[next_loop]):
            if loop + 1 <= next_loop and (
                    ptables_s1[loop] != ptables_s2[loop]
                    or ptables_s1[next_loop] != ptables_s2[next_loop]):

                print_d('merge', loop_id, loop_id + 1)
                exterior_loops[loop_id] = (min(exterior_loops[loop_id][0], exterior_loops[loop_id+1][0]),\
                                            max(exterior_loops[loop_id][1], exterior_loops[loop_id+1][1]) )
                exterior_loops.pop(loop_id + 1)
                loop_id = 0
            # overlapping
            if loop >= next_loop:
                print_d('merge', loop_id, loop_id + 1)
                exterior_loops[loop_id] = (min(exterior_loops[loop_id][0], exterior_loops[loop_id+1][0]),\
                                            max(exterior_loops[loop_id][1], exterior_loops[loop_id+1][1]) )
                exterior_loops.pop(loop_id + 1)
                loop_id = 0

            loop_id += 1
        return exterior_loops

    def recursive_walk(i, j, r_depth=0):
        # collect paths at each recursion level
        sections = []

        # if r_depth==0:
        #     e = new_exterior_loops(i,j)
        # else:
        #     # e = interior_loops(i,j)
        #     e = next_int_loops(i-1,j-1)
        # print_d ("all ext/int loops:", e)

        e = next_int_loops(i, j)
        for loop in e:
            ext_i, ext_j = loop
            to_add = [ext_i]

            # if r_depth==0:
            #     return_value = recursive_walk(ext_i, ext_j, r_depth=r_depth+1)
            # else:
            return_value = recursive_walk(ext_i,
                                          ext_j - 1,
                                          r_depth=r_depth + 1)

            to_add += return_value
            to_add += [ext_j]
            sections.append(to_add)

        if r_depth == 0:
            sections.insert(0, 0)  # start with 0
            sections.append(j)  #j-1 # end with total length

        return sections

    return recursive_walk(0, len(s1))
Пример #36
0
    def next_int_loops(min_pos, max_pos):
        '''
        new function to find best interior loops with largest bp_dist
        '''
        print_d("start next int loops", min_pos, max_pos)

        curr_lvl = 0
        diff = 0

        candidates = defaultdict(lambda: [float('inf'), float('inf'), 0, 0, 0])
        c2 = defaultdict(list)
        c3 = []

        for i, (p1, p2) in enumerate(zip(ptables_s1[1:], ptables_s2[1:])):
            # if i==0: continue
            if i < min_pos or i > max_pos: continue

            if p1 == 0 and p2 == 0:
                continue
            # if p1==p2 and p1 > i:
            #     curr_lvl += 1
            # elif p1==p2 and p1 < i:
            #     curr_lvl -= 1

            # check which compatible sections have the highest potential for recursion
            if p1 == p2 and i < p1:

                j = p1
                # check compatibility
                last_i = i - 1
                next_j = j + 1

                curr_i = i + 1
                next_i = i + 2
                last_j = j - 1

                # out of bounds
                if next_j > len(s1) or last_i < 0:
                    continue

                # inner/outer section not compatible
                # if ptables_s1[last_i] != ptables_s2[last_i] or ptables_s1[next_j] != ptables_s2[next_j]:
                #     print_d(last_i,i,j,next_j)
                #     print_d("fail1",ptables_s1[last_i], ptables_s2[last_i] )
                #     print_d("fail2",ptables_s1[next_j], ptables_s2[next_j] )
                #     continue

                print_d(curr_i, next_i, last_j, j)
                # print_d("fail1",ptables_s1[curr_i], ptables_s2[curr_i] )
                # print_d("fail2",ptables_s1[i], ptables_s2[i] )

                if ptables_s1[curr_i] != ptables_s2[curr_i] or ptables_s1[
                        i] != ptables_s2[i]:
                    print_d(curr_i, next_i, last_j, j)
                    print_d("fail1", ptables_s1[curr_i], ptables_s2[curr_i])
                    print_d("fail2", ptables_s1[last_j], ptables_s2[last_j])
                    continue

                # debatable - extra ( & ) at end/start pos
                # if ptables_s1[last_i] == 0 or ptables_s1[next_j] == 0:
                #     continue

                # if start and end are unpaired, both i and j have to be unpaired,
                # otherwise energies don't add up properly / various errors

                last_i = i

                if ptables_s1[last_i] == 0 and ptables_s2[last_i] != 0:
                    continue
                if ptables_s1[next_j] == 0 and ptables_s2[next_j] != 0:
                    continue

                # if ptables_s1[i+1] == 0 or ptables_s1[j] == 0:
                #     continue#
                if ptables_s1[i] == 0 or ptables_s1[j + 1] == 0:
                    continue

                # if ptables_s1[i] < ptables_s1[j+1]: # last i,j: )( instead of ()
                if ptables_s1[ptables_s1[i]] != ptables_s1[
                        j + 1]:  # last i,j: )( instead of ()
                    continue

                outer_s1 = s1[min_pos:i] + "." * (p1 - i) + s1[p1:max_pos + 1]
                outer_s2 = s2[min_pos:i] + "." * (p2 - i) + s2[p2:max_pos + 1]

                print_d(outer_s1, s1[i - 1], ptables_s1[i], s1[p1],
                        ptables_s1[j + 1], "//", ptables_s1[ptables_s1[i]])
                print_d(outer_s2)

                inner_s1 = s1[i:p1]
                inner_s2 = s2[i:p1]

                print_d(inner_s1)
                print_d(inner_s2)

                inner_size = p1 - i
                outer_size = max_pos - min_pos - inner_size
                # print (outer_s1)
                # print (outer_s2)
                # print (inner_s1)
                # print (inner_s2)
                bp_dist_inner = RNA.bp_distance(inner_s1, inner_s2)
                bp_dist_outer = RNA.bp_distance(outer_s1, outer_s2)
                # bp_dist = max(bp_dist_outer, bp_dist_inner) - min(bp_dist_outer, bp_dist_inner)

                # optimize = (inner_size/outer_size)*bp_dist_inner
                optimize = (inner_size / outer_size)
                optimize = abs(0.6 - (inner_size / outer_size))

                # if the step is too small
                # if bp_dist_outer < 3:
                #     continue

                # if min(bp_dist_outer, bp_dist_inner) < 10: # 300_min10
                #     continue

                if min(bp_dist_outer, bp_dist_inner) < 3:  # 300_r
                    continue

                # this is the standard.. ?
                # if min(bp_dist_outer, bp_dist_inner) < 1:
                #     continue

                # overwrite with better candidate
                print_d("candidate", i, p1, p2, curr_lvl, diff, "inner size",
                        inner_size, "outer size", outer_size, "opt:", optimize)

                c3.append((i, j, optimize, inner_size, outer_size,
                           bp_dist_inner, bp_dist_outer))
                """
                recursion conditions: inner section > 20 bp
                outer / inner section in the region of 0.25 to 0.75

                maximize inner section size which has at least a 3 bp distance to the outer section

                """

            # print ("---")
        print_d("found candidates:")

        c3 = sorted(c3, key=lambda item: item[2],
                    reverse=False)  # highest opt.
        available = [0 for i in ptables_s1]

        for key in c3:
            print_d("c3", i, j)
            i = key[0]
            j = key[1]

            # all nucleotides between i and j need to be available
            if all(i == 0 for i in available[i + 1:j]):
                print_d('add', i, j)
                available[i:j] = [1] * (j - i)

        mode = 0
        indices = []
        for i in range(len(available)):
            if available[i] == 1:
                if mode == 0:
                    start_pos = i
                mode = 1
            else:
                if mode == 1:
                    indices.append([start_pos, i])
                mode = 0
        print_d("final indices", indices)
        return indices
Пример #37
0
def expand_graph(CG, saddles, args, mode='default'):
    """ Find new neighbors and add them to the Conformation Graph

  The function is devided into two parts. 1) The current mfe structure
  is connected to all present structures, 2) The conformation graph is
  expanded using helix-breathing.

  :param CG: Conformation Graph (NetworkX)
  :param saddles: dictionary of all previous findpath runs
  :param args: commandline arguments and other global variables
    (using: cutoff, verbose)
  :param mode: choose from (1) mfe-only: only use current mfe as potential new
    neighbor (2) breathing-only: only use breathing neighborhood, (3) default:
    do both mfe and breathing.

  :return: Number of new nodes

  """
    cutoff = args.occupancy_cutoff
    verb = args.verbose
    mfree = args.min_breathing

    csid = CG.graph['seqid']
    fseq = CG.graph['full_sequence']
    tlen = CG.graph['transcript_length']
    seq = fseq[0:tlen]

    if mode not in ['default', 'mfe-only', 'breathing-only']:
        raise ValueError('unknown expansion mode')

    # Add MFE
    ss, mfe = RNA.fold(seq)
    future = '.' * (len(fseq) - tlen)
    ss = ss + future
    #print >> sys.stderr, "{}\n{} {:6.2f}".format(seq, ss, mfe)

    regular_mode = True  # NOTE: HACK! this is only here to produce any possible graph

    # If there is no node bec we are in the beginning, add the node,
    # otherwise, go through all nodes and try to add transition edges

    if nx.number_of_nodes(CG) == 0:
        en = round(RNA.energy_of_structure(fseq, ss, 0), 2)
        CG.add_node(ss,
                    energy=en,
                    occupancy=1.0,
                    identity=CG.graph['seqid'],
                    active=True)
        CG.graph['seqid'] += 1
    elif mode == 'default' or mode == 'mfe-only':
        for ni in CG.nodes():
            if CG.node[ni]['active'] == False: continue
            if ni == ss or CG.has_edge(ni, ss): continue

            if CG.has_node(ss):  # from a previous iteration
                if add_transition_edges(CG, saddles, args, ni, ss):
                    CG.node[ss][
                        'active'] = True  # in case it was there but inactive
            elif add_transition_edges(CG, saddles, args, ni, ss):
                en = round(RNA.energy_of_structure(fseq, ss, 0), 2)
                CG.node[ss]['active'] = True
                CG.node[ss]['energy'] = en
                CG.node[ss]['occupancy'] = 0.0
                CG.node[ss]['identity'] = CG.graph['seqid']
                CG.graph['seqid'] += 1

    if mode == 'default' or mode == 'breathing-only':
        """ do the helix breathing graph expansion """
        for ni, data in CG.nodes_iter(data=True):
            if data['active'] == False: continue
            en = data['energy']
            occ = data['occupancy']
            if regular_mode and occ < cutoff: continue

            sss = ni[0:len(seq)]

            opened = open_breathing_helices(seq, sss, free=mfree)
            #print opened
            for onbr in opened:
                nbr = fold_exterior_loop(seq, onbr)
                future = '.' * (len(ni) - len(nbr))
                nbr += future

                if ni == nbr or CG.has_edge(ni, nbr):
                    continue

                if CG.has_node(nbr):
                    if add_transition_edges(CG, saddles, args, ni, nbr):
                        CG.node[nbr][
                            'active'] = True  # in case it was there but inactive
                elif add_transition_edges(CG, saddles, args, ni, nbr):
                    enbr = round(RNA.energy_of_structure(fseq, nbr, 0), 2)
                    CG.node[nbr]['energy'] = enbr
                    CG.node[nbr]['active'] = True
                    CG.node[nbr]['occupancy'] = 0.0
                    CG.node[nbr]['identity'] = CG.graph['seqid']
                    CG.graph['seqid'] += 1
                else:
                    """# WARNING: Could not add transition edge!"""

    if not CG.has_node(ss) or CG.node[ss]['active'] is False:
        print "# WARNING: ", ss, "[mfe secondary structure not connected]"

    return CG.graph['seqid'] - csid
Пример #38
0
    """
    representatives are the structures and their free energies.
    """
    f = open(pathToFile,"w")
    f.write(" "+seq+"\n")
    for i in range(len(representatives)):
        f.write(" "+str(i+1)+" "+representatives[i][0]+" "+str(representatives[i][1])+"\n")
    f.close()

if __name__ == "__main__":
    #inputfile = sys.argv[1]
    #records = readFasta(inputfile)
    
    #records = readRNAxplorerFile(inputfile)
    seq = "GGGAAUUAUUGUUCCCUGAGAGCGGUAGUUCUC"
    (mfe_struct, mfe) = RNA.fold(seq)
    print mfe
    print mfe_struct
    sss = mainloop(seq, mfe_struct, mfe, 1)
    sss = uniq_list(sss)
    
    #cluster secondary-structure-set
    clusters = generateClusters(sss)   
    #extract cluster representatives (could be centroid or mfe; here we chose mfe)
    representatives = extractClusterRepresentatives(seq, clusters)
    
    #openchain = "." * len(seq)
    #energy = RNA.energy_of_struct(seq,openchain)
    #representatives.insert(0, (openchain,energy))
    
    #sort according to the free energy and if it is equal, lexographically.
Пример #39
0
 def bp_dist_section(i, j):
     # bp dist s1 to s2 for a given section
     return RNA.bp_distance(s1[i - 1:j], s2[i - 1:j])