Exemplo n.º 1
0
def get_edge_weight(from_node, to_node, children_coords, parent, args):
    node_overlap = get_node_overlap(from_node, to_node)
    unaligned_range = [
        from_node.query_block_end + 1,
        to_node.query_block_start + node_overlap - 1
    ]
    unaligned_exon_bases = 0
    for child_interval in children_coords:
        if from_node.reference_name == "start":
            is_reverse = to_node.is_reverse
        else:
            is_reverse = from_node.is_reverse
        relative_start = liftoff_utils.get_relative_child_coord(
            parent, child_interval[0], is_reverse)
        relative_end = liftoff_utils.get_relative_child_coord(
            parent, child_interval[1], is_reverse)
        child_start, child_end = min(relative_start, relative_end), max(
            relative_start, relative_end)
        overlap = liftoff_utils.count_overlap(
            child_start, child_end, min(unaligned_range[0],
                                        unaligned_range[1]),
            max(unaligned_range[0], unaligned_range[1]))
        if overlap == 1 and unaligned_range[0] == unaligned_range[1] + 1 and from_node.reference_name == \
                to_node.reference_name:
            unaligned_exon_bases += ((to_node.reference_block_start + node_overlap) - from_node.reference_block_end \
                                     - 1) * args.gap_extend
        else:
            unaligned_exon_bases += max(0, overlap) * args.gap_extend
    if unaligned_exon_bases > 0:
        unaligned_exon_bases += (args.gap_open - args.gap_extend
                                 )  # gap open penalty
    return unaligned_exon_bases
Exemplo n.º 2
0
def find_nearest_aligned_start_and_end(child_start, child_end, shortest_path_nodes, parent):
    relative_coord1 = liftoff_utils.get_relative_child_coord(parent, child_start, shortest_path_nodes[0].is_reverse)
    relative_coord2 = liftoff_utils.get_relative_child_coord(parent, child_end, shortest_path_nodes[0].is_reverse)
    relative_start, relative_end = min(relative_coord1, relative_coord2), max(relative_coord1, relative_coord2)
    nearest_start = find_nearest_aligned_start(relative_start, relative_end, shortest_path_nodes)
    nearest_end = find_nearest_aligned_end(shortest_path_nodes, relative_end, relative_start)
    return nearest_start, nearest_end
Exemplo n.º 3
0
def find_nearest_start_and_end(child_start, child_end, shortest_path_nodes,
                               parent):
    relative_coord1 = liftoff_utils.get_relative_child_coord(
        parent, child_start, shortest_path_nodes[0].is_reverse)
    relative_coord2 = liftoff_utils.get_relative_child_coord(
        parent, child_end, shortest_path_nodes[0].is_reverse)
    relative_start = min(relative_coord1, relative_coord2)
    relative_end = max(relative_coord1, relative_coord2)
    nearest_start, nearest_end = -1, -1
    for node in shortest_path_nodes:
        if relative_start <= node.query_block_end:
            if relative_start >= node.query_block_start:
                nearest_start = relative_start
            else:
                if node.query_block_start < relative_end:
                    nearest_start = node.query_block_start
            break
    for i in range(len(shortest_path_nodes)):
        node = shortest_path_nodes[i]
        if relative_end <= node.query_block_end:

            if relative_end >= node.query_block_start:
                nearest_end = relative_end
            else:
                if i > 0 and shortest_path_nodes[
                        i - 1].query_block_end > relative_start:
                    nearest_end = shortest_path_nodes[i - 1].query_block_end
            break
    if nearest_end == -1 and node.query_block_end < relative_end and node.query_block_end > relative_start:
        nearest_end = node.query_block_end
    return nearest_start, nearest_end
Exemplo n.º 4
0
def get_node_weight(aln, children_coords, parent):
    weight = 0
    for child_interval in children_coords:
        relative_start = liftoff_utils.get_relative_child_coord(parent, child_interval[0], aln.is_reverse)
        relative_end = liftoff_utils.get_relative_child_coord(parent, child_interval[1], aln.is_reverse)
        child_start, child_end = min(relative_start, relative_end), max(relative_start, relative_end)
        weight += len(aln.mismatches[(aln.mismatches >= child_start) & (aln.mismatches <= child_end)])
    return weight
Exemplo n.º 5
0
def contains_child(aln, children_coords, parent):
    for child_interval in children_coords:
        relative_start = liftoff_utils.get_relative_child_coord(parent, child_interval[0], aln.is_reverse)
        relative_end = liftoff_utils.get_relative_child_coord(parent, child_interval[1], aln.is_reverse)
        child_start, child_end = min(relative_start, relative_end), max(relative_start, relative_end)
        overlap = liftoff_utils.count_overlap(child_start, child_end, aln.query_block_start, aln.query_block_end)
        if overlap > 0:
            return True
    return False
Exemplo n.º 6
0
def find_mismatched_bases(start, end, shortest_path_nodes, parent):
    relative_coord1 = liftoff_utils.get_relative_child_coord(parent, start, shortest_path_nodes[0].is_reverse)
    relative_coord2 = liftoff_utils.get_relative_child_coord(parent, end, shortest_path_nodes[0].is_reverse)
    relative_start = min(relative_coord1, relative_coord2)
    relative_end = max(relative_coord1, relative_coord2)
    total_mismatches = 0
    for node in shortest_path_nodes:
        node_mismatches = np.array(node.mismatches)
        total_mismatches += len(
            node_mismatches[np.where((node_mismatches >= relative_start) & (node_mismatches <= relative_end))[0]])
    return total_mismatches
Exemplo n.º 7
0
def find_overlapping_children(aln, children_coords, parent):
    overlapping_children = []
    for child_interval in children_coords:
        relative_start = liftoff_utils.get_relative_child_coord(
            parent, child_interval[0], aln.is_reverse)
        relative_end = liftoff_utils.get_relative_child_coord(
            parent, child_interval[1], aln.is_reverse)
        child_start, child_end = min(relative_start, relative_end), max(
            relative_start, relative_end)
        overlap = liftoff_utils.count_overlap(child_start, child_end,
                                              aln.query_block_start,
                                              aln.query_block_end)
        if overlap > 0:
            overlapping_children.append(child_start)
            overlapping_children.append(child_end)
    return overlapping_children
Exemplo n.º 8
0
def find_mismatched_bases(start, end, shortest_path_nodes, parent):
    all_mismatches = []
    relative_coord1 = liftoff_utils.get_relative_child_coord(
        parent, start, shortest_path_nodes[0].is_reverse)
    relative_coord2 = liftoff_utils.get_relative_child_coord(
        parent, end, shortest_path_nodes[0].is_reverse)
    relative_start = min(relative_coord1, relative_coord2)
    relative_end = max(relative_coord1, relative_coord2)
    for node in shortest_path_nodes:
        node_mismatches = np.array(node.mismatches)
        mismatches = node_mismatches[
            np.where((node_mismatches >= relative_start)
                     & (node_mismatches <= relative_end))[0]]
        if len(mismatches) > 0:
            all_mismatches.extend(mismatches.tolist())
    return all_mismatches
Exemplo n.º 9
0
def get_edge_weight(from_node, to_node, children_coords, parent):
    unaligned_range = [from_node.query_block_end + 1, to_node.query_block_start - 1]
    unaligned_exon_bases = 0
    for child_interval in children_coords:
        if from_node.reference_name == "start":
            is_reverse = to_node.is_reverse
        else:
            is_reverse = from_node.is_reverse
        relative_start = liftoff_utils.get_relative_child_coord(parent, child_interval[0], is_reverse)
        relative_end = liftoff_utils.get_relative_child_coord(parent, child_interval[1], is_reverse)
        child_start, child_end = min(relative_start, relative_end), max(relative_start, relative_end)
        overlap = liftoff_utils.count_overlap(child_start, child_end, unaligned_range[0], unaligned_range[1])
        if overlap == 1 and unaligned_range[0] == unaligned_range[1]:
            unaligned_exon_bases += to_node.reference_block_start - from_node.reference_block_end + 1
        else:
            unaligned_exon_bases += max(0, overlap)
    return unaligned_exon_bases