def contains_child(aln, children_coords, parent): for child_interval in children_coords: relative_start = liftoff_utils.get_relative_child_coord(parent, child_interval[0], aln.is_reverse) relative_end = liftoff_utils.get_relative_child_coord(parent, child_interval[1], aln.is_reverse) child_start, child_end = min(relative_start, relative_end), max(relative_start, relative_end) overlap = liftoff_utils.count_overlap(child_start, child_end, aln.query_block_start, aln.query_block_end) if overlap > 0: return True return False
def get_node_weight(aln, children_coords, parent): weight = 0 for child_interval in children_coords: relative_start = liftoff_utils.get_relative_child_coord( parent, child_interval[0], aln.is_reverse) relative_end = liftoff_utils.get_relative_child_coord( parent, child_interval[1], aln.is_reverse) child_start, child_end = min(relative_start, relative_end), max( relative_start, relative_end) weight += len(aln.mismatches[(aln.mismatches >= child_start) & (aln.mismatches <= child_end)]) return weight
def find_mismatched_bases(start, end, shortest_path_nodes, parent): all_mismatches = [] relative_start = liftoff_utils.get_relative_child_coord( parent, start, shortest_path_nodes[0].is_reverse) relative_end = liftoff_utils.get_relative_child_coord( parent, end, shortest_path_nodes[0].is_reverse) for node in shortest_path_nodes: node_mismatches = np.array(node.mismatches) mismatches = node_mismatches[ np.where((node_mismatches >= relative_start) & (node_mismatches <= relative_end))[0]] if len(mismatches) > 0: all_mismatches.extend(mismatches.tolist()) return all_mismatches
def convert_coord(coord, parent, shortest_path_nodes): relative_coord = liftoff_utils.get_relative_child_coord( parent, coord, shortest_path_nodes[1].is_reverse) if len(shortest_path_nodes) == 2: return 0 for i in range(1, len(shortest_path_nodes) - 1): if relative_coord >= shortest_path_nodes[i].query_block_start: if relative_coord <= shortest_path_nodes[i].query_block_end: return shortest_path_nodes[i].reference_block_start + ( relative_coord - shortest_path_nodes[i].query_block_start) elif relative_coord < shortest_path_nodes[i + 1].query_block_start: return shortest_path_nodes[i].reference_block_end return shortest_path_nodes[1].reference_block_start
def get_edge_weight(from_node, to_node, children_coords, parent): unaligned_range = [ from_node.query_block_end + 1, to_node.query_block_start - 1 ] unaligned_exon_bases = 0 for child_interval in children_coords: if from_node.reference_name == "start": is_reverse = to_node.is_reverse else: is_reverse = from_node.is_reverse relative_start = liftoff_utils.get_relative_child_coord( parent, child_interval[0], is_reverse) relative_end = liftoff_utils.get_relative_child_coord( parent, child_interval[1], is_reverse) child_start, child_end = min(relative_start, relative_end), max( relative_start, relative_end) overlap = liftoff_utils.count_overlap(child_start, child_end, unaligned_range[0], unaligned_range[1]) if overlap == 1 and unaligned_range[0] == unaligned_range[1]: unaligned_exon_bases += to_node.reference_block_start - from_node.reference_block_end + 1 else: unaligned_exon_bases += max(0, overlap) return unaligned_exon_bases + 1
def convert_coord(coord_start, coord_end, parent, shortest_path_nodes): lifted_coords = [] for i in [coord_start, coord_end]: lifted_coord = 0 relative_coord = liftoff_utils.get_relative_child_coord( parent, i, shortest_path_nodes[0].is_reverse) for j in range(0, len(shortest_path_nodes)): if relative_coord >= shortest_path_nodes[j].query_block_start: if relative_coord <= shortest_path_nodes[j].query_block_end: lifted_coord = ( shortest_path_nodes[j].reference_block_start + (relative_coord - shortest_path_nodes[j].query_block_start)) lifted_coords.append(lifted_coord) return lifted_coords[0], lifted_coords[1]