示例#1
0
    def test_angle_stat_get_angle_from_cg(self):
        fa_text = """>1
        AAACCGGGCCCCCCAAUUU
        (((..(((...)))..)))
        """

        cg, = ftmc.CoarseGrainRNA.from_fasta_text(fa_text)

        cg.coords["s0"] = np.array([0., 0., 0.]), np.array([0., 0., 1.])
        cg.twists["s0"] = np.array([0., -1., 0]), np.array([0., 1., 0.])

        cg.coords["s1"] = np.array([0., 0., 2.]), np.array([0., 1., 3.])
        cg.twists["s1"] = np.array([-1., 0., 0.]), np.array([1., 0., 0.])

        cg.coords["h0"] = np.array([0, 1, 3]), np.array([0, 2, 4])
        cg.add_bulge_coords_from_stems()

        print (cg.coords["i0"])
        print (cg.twists)

        as1, as2 = cg.get_bulge_angle_stats("i0")

        self.assertAlmostEqual(as1.get_angle(),
                               ftuv.vec_angle(cg.coords["s0"][0] - cg.coords["s0"][1],
                                              cg.coords["s1"][1] - cg.coords["s1"][0])
                               )
        self.assertAlmostEqual(as2.get_angle(),
                               ftuv.vec_angle(cg.coords["s1"][1] - cg.coords["s1"][0],
                                              cg.coords["s0"][0] - cg.coords["s0"][1])
                               )
        self.assertAlmostEqual(as1.get_angle(), math.radians(135))
        self.assertAlmostEqual(as2.get_angle(), math.radians(135))
示例#2
0
    def test_angle_stat_get_angle_from_cg(self):
        fa_text = """>1
        AAACCGGGCCCCCCAAUUU
        (((..(((...)))..)))
        """

        cg, = ftmc.CoarseGrainRNA.from_fasta_text(fa_text)

        cg.coords["s0"] = np.array([0., 0., 0.]), np.array([0., 0., 1.])
        cg.twists["s0"] = np.array([0., -1., 0]), np.array([0., 1., 0.])

        cg.coords["s1"] = np.array([0., 0., 2.]), np.array([0., 1., 3.])
        cg.twists["s1"] = np.array([-1., 0., 0.]), np.array([1., 0., 0.])

        cg.coords["h0"] = np.array([0, 1, 3]), np.array([0, 2, 4])
        cg.add_bulge_coords_from_stems()

        print(cg.coords["i0"])
        print(cg.twists)

        as1, as2 = cg.get_bulge_angle_stats("i0")

        self.assertAlmostEqual(
            as1.get_angle(),
            ftuv.vec_angle(cg.coords["s0"][0] - cg.coords["s0"][1],
                           cg.coords["s1"][1] - cg.coords["s1"][0]))
        self.assertAlmostEqual(
            as2.get_angle(),
            ftuv.vec_angle(cg.coords["s1"][1] - cg.coords["s1"][0],
                           cg.coords["s0"][0] - cg.coords["s0"][1]))
        self.assertAlmostEqual(as1.get_angle(), math.radians(135))
        self.assertAlmostEqual(as2.get_angle(), math.radians(135))
示例#3
0
def main():
    # Moving segment
    moving = make_random_chain(20)
    # Fixed segment
    # Last three residues of the moving segment
    # after applying a random rotation/translation
    fixed = rotate_last_three(moving)

    angles1 = [
        vec_angle(moving[i - 1] - moving[i - 2], moving[i] - moving[i - 1])
        for i in range(2, len(moving))
    ]
    distances1 = [
        magnitude(moving[i] - moving[i - 1]) for i in range(1, len(moving))
    ]

    #print "moving:", moving

    if len(sys.argv) < 2:
        moving = ccd(moving, fixed, 10, True)
    else:
        moving = ccd(moving, fixed, iterations=int(sys.argv[1]), print_d=False)

    #print "moving:", moving

    angles2 = [
        vec_angle(moving[i - 1] - moving[i - 2], moving[i] - moving[i - 1])
        for i in range(2, len(moving))
    ]
    distances2 = [
        magnitude(moving[i] - moving[i - 1]) for i in range(1, len(moving))
    ]

    assert (allclose(distances1, distances2))
    assert (allclose(angles1, angles2))
示例#4
0
文件: ccd.py 项目: pkerpedjiev/ernwin
def main():
    # Moving segment
    moving=make_random_chain(20)
    # Fixed segment 
    # Last three residues of the moving segment
    # after applying a random rotation/translation
    fixed=rotate_last_three(moving)

    angles1 = [vec_angle(moving[i-1] - moving[i-2], moving[i] - moving[i-1]) for i in range(2, len(moving))]
    distances1 = [magnitude(moving[i] - moving[i-1]) for i in range(1, len(moving))]

    #print "moving:", moving

    if len(sys.argv) < 2:
        moving = ccd(moving, fixed, 10, True)
    else:
        moving = ccd(moving, fixed, iterations = int(sys.argv[1]), print_d = False)

    #print "moving:", moving

    angles2 = [vec_angle(moving[i-1] - moving[i-2], moving[i] - moving[i-1]) for i in range(2, len(moving))]
    distances2 = [magnitude(moving[i] - moving[i-1]) for i in range(1, len(moving))]

    assert(allclose(distances1, distances2))
    assert(allclose(angles1, angles2))
示例#5
0
    def verify_virtual_twist_angles(self, cg, s):
        sl = cg.stem_length(s)

        for i in range(0, sl):
            (pos, vec, vec_l, vec_r) = ftug.virtual_res_3d_pos_core(cg.coords[s],
                                                                    cg.twists[s],i,sl)

            if i > 1:
                self.assertGreater(ftuv.vec_angle(vec, prev_vec), 0.53)
                self.assertLess(ftuv.vec_angle(vec, prev_vec), 0.73)

            prev_vec = vec
示例#6
0
    def verify_virtual_twist_angles(self, cg, s):
        sl = cg.stem_length(s)

        for i in range(0, sl):
            (pos, vec, vec_l, vec_r) = ftug.virtual_res_3d_pos_core(cg.coords[s],
                                                                    cg.twists[s],i,sl)

            if i > 1:
                self.assertGreater(ftuv.vec_angle(vec, prev_vec), 0.1)
                self.assertLess(ftuv.vec_angle(vec, prev_vec), 0.95)

            prev_vec = vec
示例#7
0
def main():
    usage = """
    usage
    """
    num_args = 0
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg = ftmc.from_pdb(args[0])

    angles = []
    for loop in it.chain(cg.iloop_iterator(), cg.mloop_iterator()):
        conn = cg.connections(loop)

        (s1b, s1e) = cg.get_sides(conn[0], loop)
        (s2b, s2e) = cg.get_sides(conn[1], loop)

        angle = ftuv.vec_angle(
            cg.coords[conn[0]][s1b] - cg.coords[conn[0]][s1e],
            cg.coords[conn[1]][s2e] - cg.coords[conn[1]][s2b])

        for rn in cg.define_residue_num_iterator(loop, adjacent=True):
            angles += [(rn, angle)]

    for rn, angle in sorted(angles):
        print "{}:{}".format(rn, angle)
示例#8
0
 def get_angle(self):
     '''
     Return the angle between the two connected stems.
     '''
     return ftuv.vec_angle(
         np.array([-1., 0., 0.]),
         ftuv.spherical_polar_to_cartesian([1, self.u, self.v]))
示例#9
0
def main():
    usage = """
    python interior_loop_angles.py pdb_file

    Iterate over the interior loop angles and calculate how much of a kink
    they introduce between the two adjacent stems.
    """
    num_args = 0
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg = ftmc.from_pdb(op.expanduser(args[0]))
    for iloop in cg.iloop_iterator():
        conn = cg.connections(iloop)
        angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0],
                               cg.coords[conn[1]][1] - cg.coords[conn[1]][0])

        fud.pv('iloop, angle')
示例#10
0
def main():
    usage = """
    usage
    """
    num_args= 0
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg = ftmc.from_pdb(args[0])

    angles = []
    for loop in it.chain(cg.iloop_iterator(), cg.mloop_iterator()):
        conn = cg.connections(loop)

        (s1b, s1e) = cg.get_sides(conn[0], loop)
        (s2b, s2e) = cg.get_sides(conn[1], loop)

        angle = ftuv.vec_angle(cg.coords[conn[0]][s1b] - cg.coords[conn[0]][s1e], 
                               cg.coords[conn[1]][s2e] - cg.coords[conn[1]][s2b])

        for rn in cg.define_residue_num_iterator(loop, adjacent=True):
            angles += [(rn, angle)]

    for rn, angle in sorted(angles):
        print "{}:{}".format(rn, angle)
示例#11
0
def main():
    usage = """
    python interior_loop_angles.py pdb_file

    Iterate over the interior loop angles and calculate how much of a kink
    they introduce between the two adjacent stems.
    """
    num_args= 0
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg = ftmc.from_pdb(op.expanduser(args[0]))
    for iloop in cg.iloop_iterator():
        conn = cg.connections(iloop)
        angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0], cg.coords[conn[1]][1] - cg.coords[conn[1]][0])

        fud.pv('iloop, angle')
示例#12
0
def output_all_distances(bg):
    for (key1, key2) in it.permutations(bg.defines.keys(), 2):
        if bg.has_connection(key1, key2):
            continue

        longrange = "N"

        if key2 in bg.longrange[key1]:
            longrange = "Y"

        #point1 = bg.get_point(key1)
        #point2 = bg.get_point(key2)

        try:
            (i1,i2) = cuv.line_segment_distance(bg.coords[key1][0], bg.coords[key1][1],
                                             bg.coords[key2][0], bg.coords[key2][1])


            if abs(cuv.magnitude(i2 - i1)) < 0.000001:
                continue

            vec1 = bg.coords[key1][1] - bg.coords[key1][0]
            '''
            basis = cuv.create_orthonormal_basis(vec1)
            coords2 = cuv.change_basis(i2 - i1, basis, cuv.standard_basis)
            (r, u, v) = cuv.spherical_cartesian_to_polar(coords2)
            '''
            v = cuv.vec_angle(vec1, i2 - i1)

        except KeyError as ke:
            #print >>sys.stderr, 'Skipping %s or %s.' % (key1, key2)
            continue

        seq1 = 'x'
        seq2 = 'x'


        '''
        receptor_angle = 0.
        if bg.get_type(key1) != 's' and bg.get_type(key1) != 'i' and bg.get_length(key1) > 1:
            seq1 = bg.get_seq(key1)
        if bg.get_type(key2) != 's' and bg.get_type(key2) != 'i'and bg.get_length(key2) > 1:
            seq2 = bg.get_seq(key2)
        if bg.get_type(key1) == 'l' and bg.get_type(key2) == 's':
            receptor_angle = cgg.receptor_angle(bg, key1, key2)
        '''

        print "%s %s %d %s %s %d %f %s %s %s %f" % (key1, 
                                     key1[0], 
                                     bg.get_length(key1),
                                     key2, 
                                     key2[0],
                                     bg.get_length(key2),
                                     cuv.magnitude(i2-i1),
                                     seq1, seq2, longrange, v)
示例#13
0
def get_relative_orientation(cg, l1, l2):
    '''
    Return how l1 is related to l2 in terms of three parameters. l2 should
    be the receptor of a potential A-Minor interaction, whereas l1 should
    be the donor.

        1. Distance between the closest points of the two elements
        2. The angle between l2 and the vector between the two
        3. The angle between the minor groove of l2 and the vector between
           l1 and l2
    '''
    (i1, i2) = ftuv.line_segment_distance(cg.coords[l1][0],
                                          cg.coords[l1][1],
                                          cg.coords[l2][0],
                                          cg.coords[l2][1])

    '''
    angle1 = ftuv.vec_angle(cg.coords[l2][1] - cg.coords[l2][0],
                           i2 - i1)
    '''
    angle1 = ftuv.vec_angle(cg.coords[l2][1] - cg.coords[l2][0],
                            cg.coords[l1][1] - cg.coords[l1][0])
    #fud.pv('angle1')

    tw = cg.get_twists(l2)

    if l2[0] != 's':
        angle2 = ftuv.vec_angle((tw[0] + tw[1]) / 2.,
                               i2 - i1)
    else:
        stem_len = cg.stem_length(l2)

        pos = ftuv.magnitude(i2 - cg.coords[l2][0]) / ftuv.magnitude(cg.coords[l2][1] - cg.coords[l2][0]) * stem_len
        vec = ftug.virtual_res_3d_pos_core(cg.coords[l2], cg.twists[l2], pos, stem_len)[1]
        angle2 = ftuv.vec_angle(vec,
                               i2 - i1)

    dist = ftug.element_distance(cg, l1, l2)

    return (dist, angle1, angle2)
示例#14
0
def main():
    usage = """
    python interior_loop_angles.py pdb_file

    Iterate over the interior loop angles and calculate how much of a kink
    they introduce between the two adjacent stems.
    """
    num_args = 0
    parser = OptionParser(usage=usage)

    parser.add_option("-o",
                      "--output",
                      action="store",
                      help="Store data in csv with this filename")
    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    data = list()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg, = ftmc.CoarseGrainRNA.from_pdb(op.expanduser(args[0]))
    for iloop in cg.iloop_iterator():
        conn = cg.connections(iloop)
        angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0],
                               cg.coords[conn[1]][1] - cg.coords[conn[1]][0])
        data.append([iloop, angle])

        fud.pv('iloop, angle')

    if options.output and len(data):
        with open(options.output, 'w') as FILE:
            writer = csv.writer(FILE, delimiter="\t", lineterminator="\n")
            writer.writerow(["iloop", "angle"])
            for row in data:
                writer.writerow(row)
示例#15
0
 def get_angle(self):
     '''
     Return the angle between the two connected stems.
     '''
     return ftuv.vec_angle(np.array([-1., 0., 0.]), ftuv.spherical_polar_to_cartesian([1, self.u, self.v]))
示例#16
0
def describe_ml_segments(cg):
    data = defaultdict(list)
    loops = cg.find_mlonly_multiloops()
    for loop in it.chain(loops, [[i] for i in cg.iloop_iterator()]):
        print(loop)
        if loop[0][0] == "i":
            description = ["interior_loop"]
        else:
            description = cg.describe_multiloop(loop)
        try:
            j3_roles = cg._assign_loop_roles(loop)
        except ValueError:
            j3_roles = None
        if j3_roles:
            j3_familyFlat = cg._junction_family_westhof1(j3_roles)
            j3_family3D = cg._junction_family_3d(j3_roles)
            j3_familyPerp = cg._junction_family_is_perpenticular(j3_roles)
            j3_Delta = cg.get_length(j3_roles["J23"]) - cg.get_length(
                j3_roles["J31"])
        else:
            j3_family3D = None
            j3_familyFlat = None
            j3_familyPerp = None
            j3_Delta = None

        loop_start = float("inf")
        for segment in loop:
            if cg.define_a(segment)[0] < loop_start:
                loop_start = cg.define_a(segment)[0]
        for segment in loop:
            if segment[0] not in "mi":
                continue
            data["loop_start_after"].append(loop_start)
            data["segment_start_after"].append(cg.define_a(segment)[0])
            data["segment"].append(segment)
            data["junction_length"].append(len(loop))
            data["segment_length"].append(cg.get_length(segment))
            if segment[0] == "i":
                dims = list(sorted(cg.get_bulge_dimensions(segment)))
            else:
                dims = [-1, -1]
            data["iloop_length_1"].append(dims[0])
            data["iloop_length_2"].append(dims[1])
            data["loops_largest_segment_length"].append(
                max(cg.get_length(x) for x in loop))
            data["loops_shortest_segment_length"].append(
                min(cg.get_length(x) for x in loop))
            data["sum_of_loops_segment_lengths"].append(
                sum(cg.get_length(x) for x in loop))
            data["loop_segment_lengths"].append(",".join(
                map(str, sorted(cg.get_length(x) for x in loop))))

            data["angle_type"].append(
                abs(cg.get_angle_type(segment, allow_broken=True)))
            s1, s2 = cg.connections(segment)

            vec1 = cg.coords.get_direction(s1)
            if cg.get_sides(s1, segment) == (1, 0):
                vec1 = -vec1
            else:
                assert cg.get_sides(s1, segment) == (0, 1)
            vec2 = cg.coords.get_direction(s2)
            if cg.get_sides(s2, segment) == (1, 0):
                vec2 = -vec2
            else:
                assert cg.get_sides(s2, segment) == (0, 1)
            data["angle_between_stems"].append(ftuv.vec_angle(vec1, vec2))
            data["offset1"].append(
                ftuv.point_line_distance(
                    cg.coords[s1][cg.get_sides(s1, segment)[0]],
                    cg.coords[s2][0], cg.coords.get_direction(s2)))
            data["offset2"].append(
                ftuv.point_line_distance(
                    cg.coords[s2][cg.get_sides(s2, segment)[0]],
                    cg.coords[s1][0], cg.coords.get_direction(s1)))
            closer1, far1 = cg.coords[s1][cg.get_sides(
                s1, segment)[0]], cg.coords[s1][cg.get_sides(s1, segment)[1]]
            closer2, far2 = cg.coords[s2][cg.get_sides(
                s2, segment)[0]], cg.coords[s2][cg.get_sides(s2, segment)[1]]

            data["offset"].append(
                ftuv.vec_distance(*ftuv.line_segment_distance(
                    closer1, closer1 +
                    (closer1 - far1) * 100000, closer2, closer2 +
                    (closer2 - far2) * 100000)))
            data["junction_va_distance"].append(
                ftug.junction_virtual_atom_distance(cg, segment))
            data["is_external_multiloop"].append("open" in description)
            data["is_pseudoknotted_multiloop"].append(
                "pseudoknot" in description)
            data["is_regular_multiloop"].append(
                "regular_multiloop" in description)
            data["is_interior_loop"].append("interior_loop" in description)
            if j3_roles is not None:
                elem_role, = [
                    x[0] for x in j3_roles.items() if x[1] == segment
                ]
            else:
                elem_role = "?"
            data["j3_role"].append(elem_role)
            data["j3_familyFlat"].append(j3_familyFlat)
            data["j3_family3D"].append(j3_family3D)
            data["j3_familyPerp"].append(j3_familyPerp)
            data["j3_Delta_j23_j31"].append(j3_Delta)
            dssr_stacking = False
            if "dssr_stacks" in cg.infos:
                if segment in cg.infos["dssr_stacks"]:
                    dssr_stacking = True
            data["dssr_stacking"].append(dssr_stacking)

            kh_stem_angle = float("nan")
            if abs(cg.get_angle_type(segment, allow_broken=True)) == 5:
                next_ml = cg.get_next_ml_segment(segment)
                if isinstance(next_ml, str) and next_ml[0] == "m" and abs(
                        cg.get_angle_type(next_ml, allow_broken=True)) == 5:
                    stems1 = cg.edges[segment]
                    stems2 = cg.edges[next_ml]
                    try:
                        s1, s2 = (stems1 | stems2) - (stems1 & stems2)
                    except ValueError:
                        pass
                    else:
                        vec1 = cg.coords.get_direction(s1)
                        vec2 = cg.coords.get_direction(s2)
                        angle = ftuv.vec_angle(vec1, vec2)
                        if angle > math.pi / 2:
                            angle = math.pi - angle
                        kh_stem_angle = angle
            data["kh_stem_angle"].append(kh_stem_angle)
    if data:
        data["pk_number"] = number_by(data, "loop_start_after",
                                      "is_pseudoknotted_multiloop")
        data["loop_number"] = number_by(data, "loop_start_after", None)
        data["reguler_multiloop_number"] = number_by(data, "loop_start_after",
                                                     "is_regular_multiloop")
    return data
示例#17
0
def describe_rna(cg, file_num, dist_pais, angle_pairs):
    data = {}
    data["nt_length"] = cg.seq_length
    data["num_cg_elems"] = len(cg.defines)
    for letter in "smifth":
        data["num_" + letter] = len([x for x in cg.defines if x[0] == letter])
    multiloops = cg.find_mlonly_multiloops()
    descriptors = []
    junct3 = 0
    junct4 = 0
    reg = 0
    pk = 0
    op = 0
    for ml in multiloops:
        descriptors = cg.describe_multiloop(ml)
        if "regular_multiloop" in descriptors:
            if len(ml) == 3:
                junct3 += 1
            elif len(ml) == 4:
                junct4 += 1
            reg += 1
        if "pseudoknot" in descriptors:
            pk += 1
        if "open" in descriptors:
            op += 1
    data["3-way-junctions"] = junct3
    data["4-way-junctions"] = junct4

    #print (descriptors)
    data["open_mls"] = op
    # print(data["open_mls"][-1])
    data["pseudoknots"] = pk
    data["regular_mls"] = reg
    data["total_mls"] = len(multiloops)
    try:
        data["longest_ml"] = max(len(x) for x in multiloops)
    except ValueError:
        data["longest_ml"] = 0
    try:
        data["rog_fast"] = cg.radius_of_gyration("fast")
    except (ftmc.RnaMissing3dError, AttributeError):
        data["rog_fast"] = float("nan")
        data["rog_vres"] = float("nan")
        data["anisotropy_fast"] = float("nan")
        data["anisotropy_vres"] = float("nan")
        data["asphericity_fast"] = float("nan")
        data["asphericity_vres"] = float("nan")
    else:
        data["rog_vres"] = cg.radius_of_gyration("vres")
        data["anisotropy_fast"] = ftmd.anisotropy(cg.get_ordered_stem_poss())
        data["anisotropy_vres"] = ftmd.anisotropy(
            cg.get_ordered_virtual_residue_poss())
        data["asphericity_fast"] = ftmd.asphericity(cg.get_ordered_stem_poss())
        data["asphericity_vres"] = ftmd.asphericity(
            cg.get_ordered_virtual_residue_poss())
    for from_nt, to_nt in dist_pairs:
        try:
            dist = ftuv.vec_distance(
                cg.get_virtual_residue(int(from_nt), True),
                cg.get_virtual_residue(int(to_nt), True))
        except Exception as e:
            dist = float("nan")
            log.warning(
                "%d%s File %s: Could not calculate distance between "
                "%d and %d: %s occurred: %s", file_num, {
                    1: "st",
                    2: "nd",
                    3: "rd"
                }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]),
                      "th"), cg.name, from_nt, to_nt,
                type(e).__name__, e)
        data["distance_{}_{}".format(from_nt, to_nt)] = dist
    for elem1, elem2 in angle_pairs:
        try:
            angle = ftuv.vec_angle(cg.coords.get_direction(elem1),
                                   cg.coords.get_direction(elem2))
        except Exception as e:
            angle = float("nan")
            log.warning(
                "%d%s File %s: Could not calculate angle between "
                "%s and %s: %s occurred: %s", file_num, {
                    1: "st",
                    2: "nd",
                    3: "rd"
                }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]),
                      "th"), cg.name, elem1, elem2,
                type(e).__name__, e)
        data["angle_{}_{}".format(elem1, elem2)] = angle
    data["missing_residues_5prime"] = (len(cg.seq.with_missing[:1]) - 1)
    data["missing_residues_3prime"] = (
        len(cg.seq.with_missing[cg.seq_length:]) - 1)
    data["missing_residues_middle"] = (
        len(cg.seq.with_missing[1:cg.seq_length]) -
        len(cg.seq[1:cg.seq_length]))
    data["missing_residues_total"] = (len(cg.seq.with_missing[:]) -
                                      len(cg.seq[:]))
    fp = len(cg.seq.with_missing[:1]) - 1
    tp = 0
    old_bp = None
    bp = None
    for bp in cg.backbone_breaks_after:
        fp += len(cg.seq.with_missing[bp:bp + 1].split('&')[1]) - 1
        tp += len(cg.seq.with_missing[bp:bp + 1].split('&')[0]) - 1
    tp += len(cg.seq.with_missing[cg.seq_length:]) - 1
    data["missing_residues_5prime_chain"] = (fp)
    data["missing_residues_3prime_chain"] = (tp)
    data["missing_residues_middle_chain"] = (data["missing_residues_total"] -
                                             fp - tp)
    incomplete_elem_types = Counter(x[0] for x in cg.incomplete_elements)
    data["s_with_missing"] = incomplete_elem_types["s"]
    data["i_with_missing"] = incomplete_elem_types["i"]
    data["m_with_missing"] = incomplete_elem_types["m"]
    data["h_with_missing"] = incomplete_elem_types["h"]
    mp = ""
    if incomplete_elem_types["s"]:
        for elem in cg.incomplete_elements:
            if elem[0] != "s":
                continue
            for i in range(cg.defines[elem][0], cg.defines[elem][1]):
                left_s = cg.seq.with_missing[i:i + 1]
                if len(left_s) > 2:
                    right_s = cg.seq.with_missing[cg.pairing_partner(i + 1):cg.
                                                  pairing_partner(i)]
                    if len(right_s) > 2:
                        mp += "{}&{};".format(left_s, right_s)
    data["missing_basepairs"] = mp
    return data
示例#18
0
 def update(self, sm, step):
     angle = ftuv.vec_angle(sm.bg.coords.get_direction(self._elem1),
                            sm.bg.coords.get_direction(self._elem2))
     self.history[0].append(angle)
     return "{:6.2f}".format(math.degrees(angle))
示例#19
0
def describe_ml_segments(cg):
    data = defaultdict(list)
    loops = cg.find_mlonly_multiloops()
    for loop in it.chain(loops, [[i] for i in cg.iloop_iterator()]):
        print(loop)
        if loop[0][0] == "i":
            description = ["interior_loop"]
        else:
            description = cg.describe_multiloop(loop)
        try:
            j3_roles = cg._assign_loop_roles(loop)
        except ValueError:
            j3_roles = None
        if j3_roles:
            j3_familyFlat = cg._junction_family_westhof1(j3_roles)
            j3_family3D = cg._junction_family_3d(j3_roles)
            j3_familyPerp = cg._junction_family_is_perpenticular(j3_roles)
            j3_Delta = cg.get_length(
                j3_roles["J23"]) - cg.get_length(j3_roles["J31"])
        else:
            j3_family3D = None
            j3_familyFlat = None
            j3_familyPerp = None
            j3_Delta = None

        loop_start = float("inf")
        for segment in loop:
            if cg.define_a(segment)[0] < loop_start:
                loop_start = cg.define_a(segment)[0]
        for segment in loop:
            if segment[0] not in "mi":
                continue
            data["loop_start_after"].append(loop_start)
            data["segment_start_after"].append(cg.define_a(segment)[0])
            data["segment"].append(segment)
            data["junction_length"].append(len(loop))
            data["segment_length"].append(cg.get_length(segment))
            if segment[0] == "i":
                dims = list(sorted(cg.get_bulge_dimensions(segment)))
            else:
                dims = [-1, -1]
            data["iloop_length_1"].append(dims[0])
            data["iloop_length_2"].append(dims[1])
            data["loops_largest_segment_length"].append(
                max(cg.get_length(x) for x in loop))
            data["loops_shortest_segment_length"].append(
                min(cg.get_length(x) for x in loop))
            data["sum_of_loops_segment_lengths"].append(
                sum(cg.get_length(x) for x in loop))
            data["loop_segment_lengths"].append(
                ",".join(map(str, sorted(cg.get_length(x) for x in loop))))

            data["angle_type"].append(
                abs(cg.get_angle_type(segment, allow_broken=True)))
            s1, s2 = cg.connections(segment)

            vec1 = cg.coords.get_direction(s1)
            if cg.get_sides(s1, segment) == (1, 0):
                vec1 = -vec1
            else:
                assert cg.get_sides(s1, segment) == (0, 1)
            vec2 = cg.coords.get_direction(s2)
            if cg.get_sides(s2, segment) == (1, 0):
                vec2 = -vec2
            else:
                assert cg.get_sides(s2, segment) == (0, 1)
            data["angle_between_stems"].append(ftuv.vec_angle(vec1, vec2))
            data["offset1"].append(ftuv.point_line_distance(cg.coords[s1][cg.get_sides(s1, segment)[0]],
                                                            cg.coords[s2][0], cg.coords.get_direction(
                                                                s2)
                                                            ))
            data["offset2"].append(ftuv.point_line_distance(cg.coords[s2][cg.get_sides(s2, segment)[0]],
                                                            cg.coords[s1][0], cg.coords.get_direction(
                                                                s1)
                                                            ))
            closer1, far1 = cg.coords[s1][cg.get_sides(
                s1, segment)[0]], cg.coords[s1][cg.get_sides(s1, segment)[1]]
            closer2, far2 = cg.coords[s2][cg.get_sides(
                s2, segment)[0]], cg.coords[s2][cg.get_sides(s2, segment)[1]]

            data["offset"].append(ftuv.vec_distance(*ftuv.line_segment_distance(closer1, closer1 + (closer1 - far1) * 100000,
                                                                                closer2, closer2 + (closer2 - far2) * 100000)))
            data["junction_va_distance"].append(
                ftug.junction_virtual_atom_distance(cg, segment))
            data["is_external_multiloop"].append("open" in description)
            data["is_pseudoknotted_multiloop"].append(
                "pseudoknot" in description)
            data["is_regular_multiloop"].append(
                "regular_multiloop" in description)
            data["is_interior_loop"].append("interior_loop" in description)
            if j3_roles is not None:
                elem_role, = [x[0]
                              for x in j3_roles.items() if x[1] == segment]
            else:
                elem_role = "?"
            data["j3_role"].append(elem_role)
            data["j3_familyFlat"].append(j3_familyFlat)
            data["j3_family3D"].append(j3_family3D)
            data["j3_familyPerp"].append(j3_familyPerp)
            data["j3_Delta_j23_j31"].append(j3_Delta)
            dssr_stacking = False
            if "dssr_stacks" in cg.infos:
                if segment in cg.infos["dssr_stacks"]:
                    dssr_stacking = True
            data["dssr_stacking"].append(dssr_stacking)

            kh_stem_angle = float("nan")
            if abs(cg.get_angle_type(segment, allow_broken=True)) == 5:
                next_ml = cg.get_next_ml_segment(segment)
                if isinstance(next_ml, str) and next_ml[0] == "m" and abs(cg.get_angle_type(next_ml, allow_broken=True)) == 5:
                    stems1 = cg.edges[segment]
                    stems2 = cg.edges[next_ml]
                    try:
                        s1, s2 = (stems1 | stems2) - (stems1 & stems2)
                    except ValueError:
                        pass
                    else:
                        vec1 = cg.coords.get_direction(s1)
                        vec2 = cg.coords.get_direction(s2)
                        angle = ftuv.vec_angle(vec1, vec2)
                        if angle > math.pi / 2:
                            angle = math.pi - angle
                        kh_stem_angle = angle
            data["kh_stem_angle"].append(kh_stem_angle)
    if data:
        data["pk_number"] = number_by(data, "loop_start_after",
                                      "is_pseudoknotted_multiloop")
        data["loop_number"] = number_by(data, "loop_start_after", None)
        data["reguler_multiloop_number"] = number_by(data, "loop_start_after",
                                                     "is_regular_multiloop")
    return data
示例#20
0
def describe_rna(cg, file_num, dist_pais, angle_pairs):
    data = {}
    data["nt_length"] = cg.seq_length
    data["num_cg_elems"] = len(cg.defines)
    for letter in "smifth":
        data["num_" + letter] = len([x for x in cg.defines if x[0] == letter])
    multiloops = cg.find_mlonly_multiloops()
    descriptors = []
    junct3 = 0
    junct4 = 0
    reg = 0
    pk = 0
    op = 0
    for ml in multiloops:
        descriptors = cg.describe_multiloop(ml)
        if "regular_multiloop" in descriptors:
            if len(ml) == 3:
                junct3 += 1
            elif len(ml) == 4:
                junct4 += 1
            reg += 1
        if "pseudoknot" in descriptors:
            pk += 1
        if "open" in descriptors:
            op += 1
    data["3-way-junctions"] = junct3
    data["4-way-junctions"] = junct4

    #print (descriptors)
    data["open_mls"] = op
    #print(data["open_mls"][-1])
    data["pseudoknots"] = pk
    data["regular_mls"] = reg
    data["total_mls"] = len(multiloops)
    try:
        data["longest_ml"] = max(len(x) for x in multiloops)
    except ValueError:
        data["longest_ml"] = 0
    try:
        data["rog_fast"] = cg.radius_of_gyration("fast")
    except (ftmc.RnaMissing3dError, AttributeError):
        data["rog_fast"] = float("nan")
        data["rog_vres"] = float("nan")
        data["anisotropy_fast"] = float("nan")
        data["anisotropy_vres"] = float("nan")
        data["asphericity_fast"] = float("nan")
        data["asphericity_vres"] = float("nan")
    else:
        data["rog_vres"] = cg.radius_of_gyration("vres")
        data["anisotropy_fast"] = ftmd.anisotropy(cg.get_ordered_stem_poss())
        data["anisotropy_vres"] = ftmd.anisotropy(
            cg.get_ordered_virtual_residue_poss())
        data["asphericity_fast"] = ftmd.asphericity(cg.get_ordered_stem_poss())
        data["asphericity_vres"] = ftmd.asphericity(
            cg.get_ordered_virtual_residue_poss())
    for from_nt, to_nt in dist_pairs:
        try:
            dist = ftuv.vec_distance(
                cg.get_virtual_residue(int(from_nt), True),
                cg.get_virtual_residue(int(to_nt), True))
        except Exception as e:
            dist = float("nan")
            log.warning(
                "%d%s File %s: Could not calculate distance between "
                "%d and %d: %s occurred: %s", file_num, {
                    1: "st",
                    2: "nd",
                    3: "rd"
                }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]),
                      "th"), cg.name, from_nt, to_nt,
                type(e).__name__, e)
        data["distance_{}_{}".format(from_nt, to_nt)] = dist
    for elem1, elem2 in angle_pairs:
        try:
            angle = ftuv.vec_angle(cg.coords.get_direction(elem1),
                                   cg.coords.get_direction(elem2))
        except Exception as e:
            angle = float("nan")
            log.warning(
                "%d%s File %s: Could not calculate angle between "
                "%s and %s: %s occurred: %s", file_num, {
                    1: "st",
                    2: "nd",
                    3: "rd"
                }.get(file_num % 10 * (file_num % 100 not in [11, 12, 13]),
                      "th"), cg.name, elem1, elem2,
                type(e).__name__, e)
        data["angle_{}_{}".format(elem1, elem2)] = angle
    return data
示例#21
0
def get_relative_orientation(cg, loop, stem):
    '''
    Return how loop is related to stem in terms of three parameters.

    The stem is the receptor of a potential A-Minor interaction, whereas the
    loop is the donor.

    The 3 parameters are:

        1.  Distance between the closest points of the two elements
        2.  The angle between the stem and the vector between the two
        3.  The angle between the minor groove of l2 and the projection of
            the vector between stem and loop onto the plane normal to the stem
            direction.
    '''
    point_on_stem, point_on_loop = ftuv.line_segment_distance(
        cg.coords[stem][0], cg.coords[stem][1], cg.coords[loop][0],
        cg.coords[loop][1])
    conn_vec = point_on_loop - point_on_stem
    dist = ftuv.magnitude(conn_vec)
    angle1 = ftuv.vec_angle(cg.coords.get_direction(stem), conn_vec)
    # The direction of the stem vector is irrelevant, so
    # choose the smaller of the two angles between two lines
    if angle1 > np.pi / 2:
        angle1 = np.pi - angle1
    tw = cg.get_twists(stem)
    if dist == 0:
        angle2 = float("nan")
    else:
        if stem[0] != 's':
            raise ValueError(
                "The receptor needs to be a stem, not {}".format(stem))
        else:
            stem_len = cg.stem_length(stem)
            # Where along the helix our A-residue points to the minor groove.
            # This can be between residues. We express it as floating point nucleotide coordinates.
            # So 0.0 means at the first basepair, while 1.5 means between the second and the third basepair.
            pos = ftuv.magnitude(
                point_on_stem - cg.coords[stem][0]) / ftuv.magnitude(
                    cg.coords.get_direction(stem)) * (stem_len - 1)
            # The vector pointing to the minor groove, even if we are not at a virtual residue (pos is a float value)
            virt_twist = ftug.virtual_res_3d_pos_core(cg.coords[stem],
                                                      cg.twists[stem], pos,
                                                      stem_len)[1]
            # The projection of the connection vector onto the plane normal to the stem
            conn_proj = ftuv.vector_rejection(conn_vec,
                                              cg.coords.get_direction(stem))
            try:
                # Note: here the directions of both vectors are well defined,
                # so angles >90 degrees make sense.
                angle2 = ftuv.vec_angle(virt_twist, conn_proj)
            except ValueError:
                if np.all(virt_twist == 0):
                    angle2 = float("nan")
                else:
                    raise
            # Furthermore, the direction of the second angle is meaningful.
            # We call use a positive angle, if the cross-product of the two vectors
            # has the same sign as the stem vector and a negative angle otherwise
            cr = np.cross(virt_twist, conn_proj)
            sign = ftuv.is_almost_parallel(cr, cg.coords.get_direction(stem))
            #assert sign != 0, "{} vs {} not (anti) parallel".format(
            #    cr, cg.coords.get_direction(stem))
            angle2 *= sign

    return dist, angle1, angle2
示例#22
0
def get_relative_orientation(cg, loop, stem):
    '''
    Return how loop is related to stem in terms of three parameters.

    The stem is the receptor of a potential A-Minor interaction, whereas the
    loop is the donor.

    The 3 parameters are:

        1.  Distance between the closest points of the two elements
        2.  The angle between the stem and the vector between the two
        3.  The angle between the minor groove of l2 and the projection of
            the vector between stem and loop onto the plane normal to the stem
            direction.
    '''
    point_on_stem, point_on_loop = ftuv.line_segment_distance(cg.coords[stem][0],
                                                              cg.coords[stem][1],
                                                              cg.coords[loop][0],
                                                              cg.coords[loop][1])
    conn_vec = point_on_loop - point_on_stem
    dist = ftuv.magnitude(conn_vec)
    angle1 = ftuv.vec_angle(cg.coords.get_direction(stem),
                            conn_vec)
    # The direction of the stem vector is irrelevant, so
    # choose the smaller of the two angles between two lines
    if angle1 > np.pi / 2:
        angle1 = np.pi - angle1
    tw = cg.get_twists(stem)
    if dist == 0:
        angle2 = float("nan")
    else:
        if stem[0] != 's':
            raise ValueError(
                "The receptor needs to be a stem, not {}".format(stem))
        else:
            stem_len = cg.stem_length(stem)
            # Where along the helix our A-residue points to the minor groove.
            # This can be between residues. We express it as floating point nucleotide coordinates.
            # So 0.0 means at the first basepair, while 1.5 means between the second and the third basepair.
            pos = ftuv.magnitude(point_on_stem - cg.coords[stem][0]) / ftuv.magnitude(
                cg.coords.get_direction(stem)) * (stem_len - 1)
            # The vector pointing to the minor groove, even if we are not at a virtual residue (pos is a float value)
            virt_twist = ftug.virtual_res_3d_pos_core(
                cg.coords[stem], cg.twists[stem], pos, stem_len)[1]
            # The projection of the connection vector onto the plane normal to the stem
            conn_proj = ftuv.vector_rejection(
                conn_vec, cg.coords.get_direction(stem))
            try:
                # Note: here the directions of both vectors are well defined,
                # so angles >90 degrees make sense.
                angle2 = ftuv.vec_angle(virt_twist, conn_proj)
            except ValueError:
                if np.all(virt_twist == 0):
                    angle2 = float("nan")
                else:
                    raise
            # Furthermore, the direction of the second angle is meaningful.
            # We call use a positive angle, if the cross-product of the two vectors
            # has the same sign as the stem vector and a negative angle otherwise
            cr = np.cross(virt_twist, conn_proj)
            sign = ftuv.is_almost_parallel(cr,  cg.coords.get_direction(stem))
            #assert sign != 0, "{} vs {} not (anti) parallel".format(
            #    cr, cg.coords.get_direction(stem))
            angle2 *= sign

    return dist, angle1, angle2
示例#23
0
def describe_ml_segments(cg):
    data = defaultdict(list)
    loops = cg.find_mlonly_multiloops()
    for loop in loops:
        description = cg.describe_multiloop(loop)
        try:
            j3_roles = cg.assign_loop_roles(loop)
        except ValueError:
            j3_roles = None
        if j3_roles:
            j3_familyFlat = cg.junction_family_westhof1(j3_roles)
            j3_family3D = cg.junction_family_3d(j3_roles)
            j3_familyPerp = cg.junction_family_is_perpenticular(j3_roles)
            j3_Delta = cg.get_length(j3_roles["J23"]) - cg.get_length(
                j3_roles["J31"])

        else:
            j3_family3D = None
            j3_familyFlat = None
            j3_familyPerp = None
            j3_Delta = None
        loop_start = float("inf")
        for segment in loop:
            if cg.define_a(segment)[0] < loop_start:
                loop_start = cg.define_a(segment)[0]
        for segment in loop:
            if segment[0] != "m":
                continue
            data["loop_start_after"].append(loop_start)
            data["segment_start_after"].append(cg.define_a(segment)[0])
            data["segment"].append(segment)
            data["junction_length"].append(len(loop))
            data["segment_length"].append(cg.get_length(segment))
            data["loops_largest_segment_length"].append(
                max(cg.get_length(x) for x in loop))
            data["loops_shortest_segment_length"].append(
                min(cg.get_length(x) for x in loop))
            data["sum_of_loops_segment_lengths"].append(
                sum(cg.get_length(x) for x in loop))
            data["loop_segment_lengths"].append(",".join(
                map(str, sorted(cg.get_length(x) for x in loop))))

            data["angle_type"].append(
                abs(cg.get_angle_type(segment, allow_broken=True)))
            s1, s2 = cg.connections(segment)

            vec1 = cg.coords.get_direction(s1)
            if cg.get_sides(s1, segment) == (1, 0):
                vec1 = -vec1
            else:
                assert cg.get_sides(s1, segment) == (0, 1)
            vec2 = cg.coords.get_direction(s2)
            if cg.get_sides(s2, segment) == (1, 0):
                vec2 = -vec2
            else:
                assert cg.get_sides(s2, segment) == (0, 1)
            data["angle_between_stems"].append(ftuv.vec_angle(vec1, vec2))
            data["junction_va_distance"].append(
                ftug.junction_virtual_atom_distance(cg, segment))
            data["is_external_multiloop"].append("open" in description)
            data["is_pseudoknotted_multiloop"].append(
                "pseudoknot" in description)
            data["is_regular_multiloop"].append(
                "regular_multiloop" in description)
            if j3_roles is not None:
                elem_role, = [
                    x[0] for x in j3_roles.items() if x[1] == segment
                ]
            else:
                elem_role = "?"
            data["j3_role"].append(elem_role)
            data["j3_familyFlat"].append(j3_familyFlat)
            data["j3_family3D"].append(j3_family3D)
            data["j3_familyPerp"].append(j3_familyPerp)
            data["j3_Delta_j23_j31"].append(j3_Delta)
    if data:
        data["pk_number"] = number_by(data, "loop_start_after",
                                      "is_pseudoknotted_multiloop")
        data["loop_number"] = number_by(data, "loop_start_after", None)
        data["reguler_multiloop_number"] = number_by(data, "loop_start_after",
                                                     "is_regular_multiloop")
    return data
示例#24
0
def extend_pk_description(dataset, filename, pk_type, rna, pk, pk_number):
    """
    Return a extended descripiton of current pseudoknot in the current files
    e.g. angles between stems

    :param dataset:  Current dataset that will be updated
    :param filename: Filename of the current structure
    :parma pk_type: Class of the pseudoknot
    :param rna: A forgi CoarseGrainRNA object
    :param pk: Structure of the pseudoknot, a NumberedDotbracket object,
               in a condensed (shadow-like) representation.
               This representation always contains the most 5' basepair.
    :param pk_number: consecutive number of the pseudoknot
    """
    domains = rna.get_domains()
    helices = domains["rods"]  # A list of elements, e.g. ["s0", "i0", "s1"]
    log.debug("Helices: %s", helices)
    #rna.log(logging.WARNING)
    stems_5p = []
    stems_3p = []

    nums = []
    log.debug("pk Residue numbers %s", pk.residue_numbers)
    log.debug("pk helix ends %s", pk.helix_ends)

    for i, resnum in enumerate(pk.residue_numbers):
        num = rna.seq.to_integer(resnum)
        nums.append(num)
        element_5p = rna.get_node_from_residue_num(num)
        stems_5p.append(element_5p)

        num2 = rna.seq.to_integer(pk.helix_ends[i])
        log.debug("num %s nums2 %s", num, num2)
        element_3p = rna.get_node_from_residue_num(num2)
        stems_3p.append(element_3p)
    log.debug("nums %s", nums)
    for i, stem1_5p in enumerate(stems_5p):
        dataset["Filename"].append(filename)
        dataset["rnaname"] = rna.name
        dataset["pk_type"].append(pk_type)
        dataset["pk_id"].append(pk_number)
        dataset["angle_nr"].append(i)
        if pk_type == "other":
            dataset["pk_structure"].append(str(pk))
        else:
            dataset["pk_structure"].append("")
        #is this the first occurrence of stem in stems?
        if stems_5p.index(stem1_5p) == i:
            #first occurrence. Strand 0, look at 3' end of helix
            stem1 = stems_3p[i]
            strand = 0
        else:
            assert i > stems_5p.index(stem1_5p)
            stem1 = stem1_5p
            strand = 1
        try:
            stem2_5p = stems_5p[i + 1]
        except IndexError:
            stem2_5p = stems_5p[0]
            outside_pk = True
        else:
            outside_pk = False
        if outside_pk or stems_5p.index(stem2_5p) == i + 1:
            #first occurrence
            stem2 = stem2_5p
            strand2 = 0
        else:
            strand2 = 1
            if outside_pk:
                stem2 = stems_3p[0]
            else:
                stem2 = stems_3p[i + 1]
        log.debug("Stem 5' %s, 3' %s, stem1 %s stem2 %s", stems_5p, stems_3p,
                  stem1, stem2)
        # enable stacking analysis via DSSR
        # differentiate between stacking (True), no stacking (False) and brakes
        # within/aorund the pseudoknot (-1) incl. 'virtual' angles e.g. H-Type angle_type3
        ml_stack = []
        if rna.dssr:
            nc_bps = list(rna.dssr.noncanonical_pairs())
            nc_dict = defaultdict(list)
            for nt1, nt2, typ in nc_bps:
                nc_dict[nt1].append((nt2, typ))
                nc_dict[nt2].append((nt1, typ))
            stacking_loops = rna.dssr.stacking_loops()
            start_found = 0
            connection = []
            stacking = None
            branch = None
            log.debug("Checking %s and %s for stacking, strand %s", stem1,
                      stem2, strand)
            for elem in rna.iter_elements_along_backbone(
            ):  #walk along the backbone
                if start_found == strand + 1:
                    if branch:
                        log.debug("in branch: elem %s, branch %s, stacking %s",
                                  elem, branch, stacking)
                        if elem == branch:
                            log.debug("End branch at %s", elem)
                            branch = None
                            log.debug("Branch end")
                        continue
                    if elem[0] != "s":
                        connection.append(elem)
                        if rna.defines[elem] and rna.defines[elem][
                                -1] in rna.backbone_breaks_after:
                            stacking = -1
                        if elem not in stacking_loops and stacking != -1:
                            stacking = False
                    elif elem == stem2:
                        if stacking is None:
                            stacking = True
                        log.debug("Found second stem, elem %s, stacking %s",
                                  elem, stacking)
                        break
                    elif elem[0] == "s" and connection:
                        branch = elem
                        if rna.defines[elem][-1] in rna.backbone_breaks_after:
                            stacking = -1
                    log.debug("elem %s, stacking %s, branch %s", elem,
                              stacking, branch)
                elif elem == stem1:
                    start_found += 1
                    if rna.defines[elem][strand * 2 +
                                         1] in rna.backbone_breaks_after:
                        stacking = -1
                    log.debug("First stem, elem %s, stacking %s", elem,
                              stacking)
            else:
                log.debug("End iteration, stacking->-1")
                stacking = -1
            log.debug("Finally, stacking = %s", stacking)
            # more detailed stacking (including backbone brackes within and around the pseudoknot)
            dataset["this_loop_stacking_dssr"].append(stacking)
            dataset["connecting_loops"].append(",".join(connection))

            # more genereal stacking information
            connecting_loops = rna.edges[stem1] & rna.edges[stem2]
            for loop in connecting_loops:
                if loop in stacking_loops:
                    ml_stack.append(loop)
            stacks = rna.dssr.coaxial_stacks()
            log.info("Stacks: %s", stacks)
            for stack in stacks:
                if stem1 in stack and stem2 in stack:
                    # the two stems stack, but we do not specify along which
                    # multiloop segment they stack.
                    dataset["is_stacking_dssr"].append(True)
                    break
            else:
                dataset["is_stacking_dssr"].append(False)

            # Does the connection form base-triples with the stem?
            stem1_triples = 0
            stem2_triples = 0
            aminors1 = 0
            aminors2 = 0
            aminors = list(rna.dssr.aminor_interactions())
            for elem in connection:
                for nt in rna.define_residue_num_iterator(elem, seq_ids=True):
                    if (nt, stem1) in aminors:
                        aminors1 += 1
                        log.debug("AMinor %s (%s), %s", nt, elem, stem1)
                    elif (nt, stem2) in aminors:
                        aminors2 += 1
                        log.debug("AMinor %s (%s), %s", nt, elem, stem2)
                    else:
                        for partner, typ in nc_dict[nt]:
                            if rna.get_elem(partner) == stem1:
                                log.debug("base_triple %s, %s: %s-%s (%s)",
                                          elem, stem1, nt, partner, typ)
                                stem1_triples += 1
                            elif rna.get_elem(partner) == stem2:
                                log.debug("base_triple %s, %s: %s-%s (%s)",
                                          elem, stem2, nt, partner, typ)
                                stem2_triples += 1
            log.debug("%s has a length of %s and %s triples", stem1,
                      rna.stem_length(stem1), stem1_triples)
            log.debug("%s has a length of %s and %s triples", stem2,
                      rna.stem_length(stem2), stem2_triples)
            dataset["stem1_basetripleperc_dssr"].append(stem1_triples /
                                                        rna.stem_length(stem1))
            dataset["stem2_basetripleperc_dssr"].append(stem2_triples /
                                                        rna.stem_length(stem2))
            dataset["stem1_aminorperc_dssr"].append(aminors1 /
                                                    rna.stem_length(stem1))
            dataset["stem2_aminorperc_dssr"].append(aminors2 /
                                                    rna.stem_length(stem2))

        else:
            dataset["is_stacking_dssr"].append(float("nan"))
            dataset["this_loop_stacking_dssr"].append(float("nan"))
            dataset["connecting_loops"].append("")
            dataset["stem1_basetripleperc_dssr"].append(float("nan"))
            dataset["stem2_basetripleperc_dssr"].append(float("nan"))
            dataset["stem1_aminorperc_dssr"].append(float("nan"))
            dataset["stem2_aminorperc_dssr"].append(float("nan"))

        dataset["stacking_loops"].append(",".join(ml_stack))

        pos1, dir1 = stem_parameters(stem1, rna, not strand)
        pos2, dir2 = stem_parameters(stem2, rna, strand2)
        dataset["stem1"].append(stem1)
        dataset["stem2"].append(stem2)

        dataset["angle_between_stems"].append(ftuv.vec_angle(dir1, dir2))
        dataset["distance_between"].append(ftuv.vec_distance(pos1, pos2))

        next_stem = None
        if not outside_pk:
            next_stem = stem_after_next_ml(rna, nums[i], before=stem2)
            if next_stem == stem2:
                next_stem = None
        if next_stem:
            posN, dirN = stem_parameters(next_stem, rna, 0)
            dataset["angle_to_next"].append(ftuv.vec_angle(dir1, dirN))
            dataset["distance_to_next"].append(ftuv.vec_distance(pos1, posN))
            dataset["next_stem"].append(next_stem)
        else:
            dataset["angle_to_next"].append("")
            dataset["distance_to_next"].append("")
            dataset["next_stem"].append("")
        dataset["outside_pk"].append(outside_pk)
示例#25
0
def describe_rna(cg, file_num, dist_pais, angle_pairs):
    data = {}
    data["nt_length"] = cg.seq_length
    data["num_cg_elems"] = len(cg.defines)
    for letter in "smifth":
        data["num_" + letter] = len([x for x in cg.defines if x[0] == letter])
    multiloops = cg.find_mlonly_multiloops()
    descriptors = []
    junct3 = 0
    junct4 = 0
    reg = 0
    pk = 0
    op = 0
    for ml in multiloops:
        descriptors = cg.describe_multiloop(ml)
        if "regular_multiloop" in descriptors:
            if len(ml) == 3:
                junct3 += 1
            elif len(ml) == 4:
                junct4 += 1
            reg += 1
        if "pseudoknot" in descriptors:
            pk += 1
        if "open" in descriptors:
            op += 1
    data["3-way-junctions"] = junct3
    data["4-way-junctions"] = junct4

    #print (descriptors)
    data["open_mls"] = op
    # print(data["open_mls"][-1])
    data["pseudoknots"] = pk
    data["regular_mls"] = reg
    data["total_mls"] = len(multiloops)
    try:
        data["longest_ml"] = max(len(x) for x in multiloops)
    except ValueError:
        data["longest_ml"] = 0
    try:
        data["rog_fast"] = cg.radius_of_gyration("fast")
    except (ftmc.RnaMissing3dError, AttributeError):
        data["rog_fast"] = float("nan")
        data["rog_vres"] = float("nan")
        data["anisotropy_fast"] = float("nan")
        data["anisotropy_vres"] = float("nan")
        data["asphericity_fast"] = float("nan")
        data["asphericity_vres"] = float("nan")
    else:
        data["rog_vres"] = cg.radius_of_gyration("vres")
        data["anisotropy_fast"] = ftmd.anisotropy(cg.get_ordered_stem_poss())
        data["anisotropy_vres"] = ftmd.anisotropy(
            cg.get_ordered_virtual_residue_poss())
        data["asphericity_fast"] = ftmd.asphericity(cg.get_ordered_stem_poss())
        data["asphericity_vres"] = ftmd.asphericity(
            cg.get_ordered_virtual_residue_poss())
    for from_nt, to_nt in dist_pairs:
        try:
            dist = ftuv.vec_distance(cg.get_virtual_residue(int(from_nt), True),
                                     cg.get_virtual_residue(int(to_nt), True))
        except Exception as e:
            dist = float("nan")
            log.warning("%d%s File %s: Could not calculate distance between "
                        "%d and %d: %s occurred: %s", file_num,
                        {1: "st", 2: "nd", 3: "rd"}.get(
                            file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"),
                        cg.name, from_nt, to_nt, type(e).__name__, e)
        data["distance_{}_{}".format(from_nt, to_nt)] = dist
    for elem1, elem2 in angle_pairs:
        try:
            angle = ftuv.vec_angle(cg.coords.get_direction(elem1),
                                   cg.coords.get_direction(elem2))
        except Exception as e:
            angle = float("nan")
            log.warning("%d%s File %s: Could not calculate angle between "
                        "%s and %s: %s occurred: %s", file_num,
                        {1: "st", 2: "nd", 3: "rd"}.get(
                            file_num % 10 * (file_num % 100 not in [11, 12, 13]), "th"),
                        cg.name, elem1, elem2, type(e).__name__, e)
        data["angle_{}_{}".format(elem1, elem2)] = angle
    data["missing_residues_5prime"] = (len(cg.seq.with_missing[:1]) - 1)
    data["missing_residues_3prime"] = (
        len(cg.seq.with_missing[cg.seq_length:]) - 1)
    data["missing_residues_middle"] = (
        len(cg.seq.with_missing[1:cg.seq_length]) - len(cg.seq[1:cg.seq_length]))
    data["missing_residues_total"] = (
        len(cg.seq.with_missing[:]) - len(cg.seq[:]))
    fp = len(cg.seq.with_missing[:1]) - 1
    tp = 0
    old_bp = None
    bp = None
    for bp in cg.backbone_breaks_after:
        fp += len(cg.seq.with_missing[bp:bp + 1].split('&')[1]) - 1
        tp += len(cg.seq.with_missing[bp:bp + 1].split('&')[0]) - 1
    tp += len(cg.seq.with_missing[cg.seq_length:]) - 1
    data["missing_residues_5prime_chain"] = (fp)
    data["missing_residues_3prime_chain"] = (tp)
    data["missing_residues_middle_chain"] = (
        data["missing_residues_total"] - fp - tp)
    incomplete_elem_types = Counter(x[0] for x in cg.incomplete_elements)
    data["s_with_missing"] = incomplete_elem_types["s"]
    data["i_with_missing"] = incomplete_elem_types["i"]
    data["m_with_missing"] = incomplete_elem_types["m"]
    data["h_with_missing"] = incomplete_elem_types["h"]
    mp = ""
    if incomplete_elem_types["s"]:
        for elem in cg.incomplete_elements:
            if elem[0] != "s":
                continue
            for i in range(cg.defines[elem][0], cg.defines[elem][1]):
                left_s = cg.seq.with_missing[i:i + 1]
                if len(left_s) > 2:
                    right_s = cg.seq.with_missing[cg.pairing_partner(
                        i + 1):cg.pairing_partner(i)]
                    if len(right_s) > 2:
                        mp += "{}&{};".format(left_s, right_s)
    data["missing_basepairs"] = mp
    return data
示例#26
0
def extend_pk_description(dataset, filename, pk_type, rna, pk, pk_number):
    """
    Return a extended descripiton of current pseudoknot in the current files
    e.g. angles between stems

    :param dataset:  Current dataset that will be updated
    :param filename: Filename of the current structure
    :parma pk_type: Class of the pseudoknot
    :param rna: A forgi CoarseGrainRNA object
    :param pk: Structure of the pseudoknot, a NumberedDotbracket object,
               in a condensed (shadow-like) representation.
               This representation always contains the most 5' basepair.
    :param pk_number: consecutive number of the pseudoknot
    """
    domains = rna.get_domains()
    helices = domains["rods"] # A list of elements, e.g. ["s0", "i0", "s1"]
    log.debug("Helices: %s", helices)
    #rna.log(logging.WARNING)
    stems_5p = []
    stems_3p = []

    nums = []
    log.debug("pk Residue numbers %s", pk.residue_numbers)
    log.debug("pk helix ends %s", pk.helix_ends)

    for i, resnum in enumerate(pk.residue_numbers):
        num = rna.seq.to_integer(resnum)
        nums.append(num)
        element_5p = rna.get_node_from_residue_num(num)
        stems_5p.append(element_5p)

        num2 = rna.seq.to_integer(pk.helix_ends[i])
        log.debug("num %s nums2 %s", num, num2)
        element_3p =rna.get_node_from_residue_num(num2)
        stems_3p.append(element_3p)
    log.debug("nums %s", nums)
    for i, stem1_5p in enumerate(stems_5p):
        dataset["Filename"].append(filename)
        dataset["rnaname"] = rna.name
        dataset["pk_type"].append(pk_type)
        dataset["pk_id"].append(pk_number)
        dataset["angle_nr"].append(i)
        if pk_type == "other":
            dataset["pk_structure"].append(str(pk))
        else:
            dataset["pk_structure"].append("")
        #is this the first occurrence of stem in stems?
        if stems_5p.index(stem1_5p)==i:
            #first occurrence. Strand 0, look at 3' end of helix
            stem1 = stems_3p[i]
            strand = 0
        else:
            assert i>stems_5p.index(stem1_5p)
            stem1 = stem1_5p
            strand = 1
        try:
            stem2_5p = stems_5p[i+1]
        except IndexError:
            stem2_5p = stems_5p[0]
            outside_pk = True
        else:
            outside_pk = False
        if outside_pk or stems_5p.index(stem2_5p)==i+1:
            #first occurrence
            stem2 = stem2_5p
            strand2 = 0
        else:
            strand2 = 1
            if outside_pk:
                stem2 = stems_3p[0]
            else:
                stem2 = stems_3p[i+1]
        log.debug("Stem 5' %s, 3' %s, stem1 %s stem2 %s", stems_5p, stems_3p, stem1, stem2)
        # enable stacking analysis via DSSR
        # differentiate between stacking (True), no stacking (False) and brakes
        # within/aorund the pseudoknot (-1) incl. 'virtual' angles e.g. H-Type angle_type3
        ml_stack=[]
        if rna.dssr:
            nc_bps = list(rna.dssr.noncanonical_pairs())
            nc_dict = defaultdict(list)
            for nt1, nt2, typ in nc_bps:
                nc_dict[nt1].append((nt2, typ))
                nc_dict[nt2].append((nt1, typ))
            stacking_loops = rna.dssr.stacking_loops()
            start_found = 0
            connection = []
            stacking = None
            branch = None
            log.debug("Checking %s and %s for stacking, strand %s", stem1, stem2, strand)
            for elem in rna.iter_elements_along_backbone(): #walk along the backbone
                if start_found == strand+1:
                    if branch:
                        log.debug("in branch: elem %s, branch %s, stacking %s", elem, branch, stacking)
                        if elem == branch:
                            log.debug("End branch at %s", elem)
                            branch = None
                            log.debug("Branch end")
                        continue
                    if elem[0] != "s":
                        connection.append(elem)
                        if rna.defines[elem] and rna.defines[elem][-1] in rna.backbone_breaks_after:
                            stacking = -1
                        if elem not in stacking_loops and stacking != -1:
                            stacking = False
                    elif elem == stem2:
                        if stacking is None:
                            stacking = True
                        log.debug("Found second stem, elem %s, stacking %s", elem, stacking)
                        break
                    elif elem[0] == "s" and connection:
                        branch = elem
                        if rna.defines[elem][-1] in rna.backbone_breaks_after:
                            stacking = -1
                    log.debug("elem %s, stacking %s, branch %s", elem, stacking, branch)
                elif elem == stem1:
                    start_found += 1
                    if rna.defines[elem][strand*2+1] in rna.backbone_breaks_after:
                        stacking = -1
                    log.debug("First stem, elem %s, stacking %s", elem, stacking)
            else:
                log.debug("End iteration, stacking->-1")
                stacking = -1
            log.debug("Finally, stacking = %s", stacking)
            # more detailed stacking (including backbone brackes within and around the pseudoknot)
            dataset["this_loop_stacking_dssr"].append(stacking)
            dataset["connecting_loops"].append(",".join(connection))

            # more genereal stacking information
            connecting_loops = rna.edges[stem1]&rna.edges[stem2]
            for loop in connecting_loops:
                if loop in stacking_loops:
                    ml_stack.append(loop)
            stacks = rna.dssr.coaxial_stacks()
            log.info("Stacks: %s", stacks)
            for stack in stacks:
                if stem1 in stack and stem2 in stack:
                    # the two stems stack, but we do not specify along which
                    # multiloop segment they stack.
                    dataset["is_stacking_dssr"].append(True)
                    break
            else:
                dataset["is_stacking_dssr"].append(False)

            # Does the connection form base-triples with the stem?
            stem1_triples=0
            stem2_triples=0
            aminors1 = 0
            aminors2 = 0
            aminors = list(rna.dssr.aminor_interactions())
            for elem in connection:
                for nt in rna.define_residue_num_iterator(elem,seq_ids=True):
                    if (nt, stem1) in aminors:
                        aminors1+=1
                        log.debug("AMinor %s (%s), %s", nt, elem, stem1)
                    elif (nt, stem2) in aminors:
                        aminors2+=1
                        log.debug("AMinor %s (%s), %s", nt, elem, stem2)
                    else:
                        for partner, typ in nc_dict[nt]:
                            if rna.get_elem(partner)==stem1:
                                log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem1, nt,partner,typ)
                                stem1_triples+=1
                            elif rna.get_elem(partner)==stem2:
                                log.debug("base_triple %s, %s: %s-%s (%s)", elem, stem2, nt,partner,typ)
                                stem2_triples+=1
            log.debug("%s has a length of %s and %s triples", stem1, rna.stem_length(stem1),stem1_triples)
            log.debug("%s has a length of %s and %s triples", stem2, rna.stem_length(stem2),stem2_triples)
            dataset["stem1_basetripleperc_dssr"].append(stem1_triples/rna.stem_length(stem1))
            dataset["stem2_basetripleperc_dssr"].append(stem2_triples/rna.stem_length(stem2))
            dataset["stem1_aminorperc_dssr"].append(aminors1/rna.stem_length(stem1))
            dataset["stem2_aminorperc_dssr"].append(aminors2/rna.stem_length(stem2))

        else:
            dataset["is_stacking_dssr"].append(float("nan"))
            dataset["this_loop_stacking_dssr"].append(float("nan"))
            dataset["connecting_loops"].append("")
            dataset["stem1_basetripleperc_dssr"].append(float("nan"))
            dataset["stem2_basetripleperc_dssr"].append(float("nan"))
            dataset["stem1_aminorperc_dssr"].append(float("nan"))
            dataset["stem2_aminorperc_dssr"].append(float("nan"))

        dataset["stacking_loops"].append(",".join(ml_stack))

        pos1, dir1 = stem_parameters(stem1, rna, not strand)
        pos2, dir2 = stem_parameters(stem2, rna, strand2)
        dataset["stem1"].append(stem1)
        dataset["stem2"].append(stem2)

        dataset["angle_between_stems"].append(ftuv.vec_angle(dir1, dir2))
        dataset["distance_between"].append(ftuv.vec_distance(pos1, pos2))

        next_stem = None
        if not outside_pk:
            next_stem = stem_after_next_ml(rna, nums[i], before=stem2)
            if next_stem==stem2:
                next_stem  = None
        if next_stem:
            posN, dirN = stem_parameters(next_stem, rna, 0)
            dataset["angle_to_next"].append(ftuv.vec_angle(dir1, dirN))
            dataset["distance_to_next"].append(ftuv.vec_distance(pos1, posN))
            dataset["next_stem"].append(next_stem)
        else:
            dataset["angle_to_next"].append("")
            dataset["distance_to_next"].append("")
            dataset["next_stem"].append("")
        dataset["outside_pk"].append(outside_pk)