def get_edge_weight(from_node, to_node, children_coords, parent, args): node_overlap = get_node_overlap(from_node, to_node) unaligned_range = [ from_node.query_block_end + 1, to_node.query_block_start + node_overlap - 1 ] unaligned_exon_bases = 0 for child_interval in children_coords: if from_node.reference_name == "start": is_reverse = to_node.is_reverse else: is_reverse = from_node.is_reverse relative_start = liftoff_utils.get_relative_child_coord( parent, child_interval[0], is_reverse) relative_end = liftoff_utils.get_relative_child_coord( parent, child_interval[1], is_reverse) child_start, child_end = min(relative_start, relative_end), max( relative_start, relative_end) overlap = liftoff_utils.count_overlap( child_start, child_end, min(unaligned_range[0], unaligned_range[1]), max(unaligned_range[0], unaligned_range[1])) if overlap == 1 and unaligned_range[0] == unaligned_range[1] + 1 and from_node.reference_name == \ to_node.reference_name: unaligned_exon_bases += ((to_node.reference_block_start + node_overlap) - from_node.reference_block_end \ - 1) * args.gap_extend else: unaligned_exon_bases += max(0, overlap) * args.gap_extend if unaligned_exon_bases > 0: unaligned_exon_bases += (args.gap_open - args.gap_extend ) # gap open penalty return unaligned_exon_bases
def find_nearest_aligned_start_and_end(child_start, child_end, shortest_path_nodes, parent): relative_coord1 = liftoff_utils.get_relative_child_coord(parent, child_start, shortest_path_nodes[0].is_reverse) relative_coord2 = liftoff_utils.get_relative_child_coord(parent, child_end, shortest_path_nodes[0].is_reverse) relative_start, relative_end = min(relative_coord1, relative_coord2), max(relative_coord1, relative_coord2) nearest_start = find_nearest_aligned_start(relative_start, relative_end, shortest_path_nodes) nearest_end = find_nearest_aligned_end(shortest_path_nodes, relative_end, relative_start) return nearest_start, nearest_end
def find_nearest_start_and_end(child_start, child_end, shortest_path_nodes, parent): relative_coord1 = liftoff_utils.get_relative_child_coord( parent, child_start, shortest_path_nodes[0].is_reverse) relative_coord2 = liftoff_utils.get_relative_child_coord( parent, child_end, shortest_path_nodes[0].is_reverse) relative_start = min(relative_coord1, relative_coord2) relative_end = max(relative_coord1, relative_coord2) nearest_start, nearest_end = -1, -1 for node in shortest_path_nodes: if relative_start <= node.query_block_end: if relative_start >= node.query_block_start: nearest_start = relative_start else: if node.query_block_start < relative_end: nearest_start = node.query_block_start break for i in range(len(shortest_path_nodes)): node = shortest_path_nodes[i] if relative_end <= node.query_block_end: if relative_end >= node.query_block_start: nearest_end = relative_end else: if i > 0 and shortest_path_nodes[ i - 1].query_block_end > relative_start: nearest_end = shortest_path_nodes[i - 1].query_block_end break if nearest_end == -1 and node.query_block_end < relative_end and node.query_block_end > relative_start: nearest_end = node.query_block_end return nearest_start, nearest_end
def get_node_weight(aln, children_coords, parent): weight = 0 for child_interval in children_coords: relative_start = liftoff_utils.get_relative_child_coord(parent, child_interval[0], aln.is_reverse) relative_end = liftoff_utils.get_relative_child_coord(parent, child_interval[1], aln.is_reverse) child_start, child_end = min(relative_start, relative_end), max(relative_start, relative_end) weight += len(aln.mismatches[(aln.mismatches >= child_start) & (aln.mismatches <= child_end)]) return weight
def contains_child(aln, children_coords, parent): for child_interval in children_coords: relative_start = liftoff_utils.get_relative_child_coord(parent, child_interval[0], aln.is_reverse) relative_end = liftoff_utils.get_relative_child_coord(parent, child_interval[1], aln.is_reverse) child_start, child_end = min(relative_start, relative_end), max(relative_start, relative_end) overlap = liftoff_utils.count_overlap(child_start, child_end, aln.query_block_start, aln.query_block_end) if overlap > 0: return True return False
def find_mismatched_bases(start, end, shortest_path_nodes, parent): relative_coord1 = liftoff_utils.get_relative_child_coord(parent, start, shortest_path_nodes[0].is_reverse) relative_coord2 = liftoff_utils.get_relative_child_coord(parent, end, shortest_path_nodes[0].is_reverse) relative_start = min(relative_coord1, relative_coord2) relative_end = max(relative_coord1, relative_coord2) total_mismatches = 0 for node in shortest_path_nodes: node_mismatches = np.array(node.mismatches) total_mismatches += len( node_mismatches[np.where((node_mismatches >= relative_start) & (node_mismatches <= relative_end))[0]]) return total_mismatches
def find_overlapping_children(aln, children_coords, parent): overlapping_children = [] for child_interval in children_coords: relative_start = liftoff_utils.get_relative_child_coord( parent, child_interval[0], aln.is_reverse) relative_end = liftoff_utils.get_relative_child_coord( parent, child_interval[1], aln.is_reverse) child_start, child_end = min(relative_start, relative_end), max( relative_start, relative_end) overlap = liftoff_utils.count_overlap(child_start, child_end, aln.query_block_start, aln.query_block_end) if overlap > 0: overlapping_children.append(child_start) overlapping_children.append(child_end) return overlapping_children
def find_mismatched_bases(start, end, shortest_path_nodes, parent): all_mismatches = [] relative_coord1 = liftoff_utils.get_relative_child_coord( parent, start, shortest_path_nodes[0].is_reverse) relative_coord2 = liftoff_utils.get_relative_child_coord( parent, end, shortest_path_nodes[0].is_reverse) relative_start = min(relative_coord1, relative_coord2) relative_end = max(relative_coord1, relative_coord2) for node in shortest_path_nodes: node_mismatches = np.array(node.mismatches) mismatches = node_mismatches[ np.where((node_mismatches >= relative_start) & (node_mismatches <= relative_end))[0]] if len(mismatches) > 0: all_mismatches.extend(mismatches.tolist()) return all_mismatches
def get_edge_weight(from_node, to_node, children_coords, parent): unaligned_range = [from_node.query_block_end + 1, to_node.query_block_start - 1] unaligned_exon_bases = 0 for child_interval in children_coords: if from_node.reference_name == "start": is_reverse = to_node.is_reverse else: is_reverse = from_node.is_reverse relative_start = liftoff_utils.get_relative_child_coord(parent, child_interval[0], is_reverse) relative_end = liftoff_utils.get_relative_child_coord(parent, child_interval[1], is_reverse) child_start, child_end = min(relative_start, relative_end), max(relative_start, relative_end) overlap = liftoff_utils.count_overlap(child_start, child_end, unaligned_range[0], unaligned_range[1]) if overlap == 1 and unaligned_range[0] == unaligned_range[1]: unaligned_exon_bases += to_node.reference_block_start - from_node.reference_block_end + 1 else: unaligned_exon_bases += max(0, overlap) return unaligned_exon_bases