Exemplo n.º 1
0
def get_aligned_blocks(alignment, aln_id, feature_hierarchy, search_type):
    cigar_operations = get_cigar_operations()
    cigar = alignment.cigar
    query_start, query_end = get_query_start_and_end(alignment, cigar, cigar_operations)
    children = feature_hierarchy.children[liftoff_utils.convert_id_to_original(alignment.query_name)]
    parent = feature_hierarchy.parents[liftoff_utils.convert_id_to_original(alignment.query_name)]
    if search_type == "copies" and is_end_to_end_alignment(parent, query_start, query_end) is False:
        return []
    reference_block_start, reference_block_pos = alignment.reference_start, alignment.reference_start
    query_block_start, query_block_pos = query_start, query_start
    new_blocks, mismatches = [], []
    merged_children_coords = liftoff_utils.merge_children_intervals(children)
    for operation, length in cigar:
        if base_is_aligned(operation, cigar_operations):
            query_block_pos, reference_block_pos = add_aligned_base(operation, query_block_pos, reference_block_pos,
                                                                    length, cigar_operations, mismatches)
            if query_block_pos == query_end:
                add_block(query_block_pos, reference_block_pos, aln_id, alignment, query_block_start,
                          reference_block_start, mismatches, new_blocks, merged_children_coords, parent)
        elif is_alignment_gap(operation, cigar_operations):
            add_block(query_block_pos, reference_block_pos, aln_id, alignment, query_block_start, reference_block_start,
                      mismatches, new_blocks, merged_children_coords, parent)
            mismatches, query_block_start, reference_block_start, query_block_pos, reference_block_pos = \
                end_block_at_gap(
                operation, query_block_pos, reference_block_pos, length)
    return new_blocks
Exemplo n.º 2
0
def sort_alignments(parent_dict, alignments):
    parent_list = []
    order = 0
    order_dict = {}
    values = list(alignments.values())
    for alignment in alignments:
        parent_list.append(parent_dict[liftoff_utils.convert_id_to_original(
            alignments[alignment][0].query_name)])
    parent_list.sort(key=lambda x: (x.seqid, x.start))
    for parent in parent_list:
        order_dict[parent.id] = order
        order += 1
    values.sort(key=lambda x: order_dict[liftoff_utils.convert_id_to_original(
        x[0].query_name)])
    return values
Exemplo n.º 3
0
def lift_single_feature(
    threshold,
    feature_order,
    features_to_lift,
    feature_hierarchy,
    previous_feature_start,
    previous_feature_ref_start,
    previous_gene_seq,
    unmapped_features,
    aligned_feature,
    seq_id_threshold,
    feature_locations,
    lifted_features_list,
    distance_factor,
):
    new_parent_name = aligned_feature[0].query_name
    original_parent_name = liftoff_utils.convert_id_to_original(
        new_parent_name)
    parent = features_to_lift[original_parent_name]
    if len(aligned_feature) > 0:
        lifted_children, alignment_coverage, seq_id = find_best_mapping.find_best_mapping(
            aligned_feature, parent.end - parent.start + 1, parent,
            feature_hierarchy, previous_feature_start,
            previous_feature_ref_start, previous_gene_seq, feature_locations,
            lifted_features_list, distance_factor)

        lifted_features = merge_lifted_features.merge_lifted_features(
            lifted_children, parent, unmapped_features, threshold,
            new_parent_name, feature_order, feature_hierarchy,
            alignment_coverage, seq_id, seq_id_threshold)
    else:
        unmapped_features.append(parent)
    return lifted_features, aligned_feature[0].query_name
Exemplo n.º 4
0
def lift_features_subset(all_overlapping_features, threshold, feature_order,
                         parent_dict, children_dict, intermediate_dict,
                         previous_gene_start, unmapped_features,
                         aligned_feature, seq_id_threshold):
    new_parent_name = aligned_feature[0].query_name
    original_parent_name = liftoff_utils.convert_id_to_original(
        new_parent_name)
    copy_tag = liftoff_utils.get_copy_tag(new_parent_name)
    if new_parent_name in all_overlapping_features:
        overlapping_features = all_overlapping_features[new_parent_name]
    else:
        overlapping_features = []
    parent = parent_dict[original_parent_name]
    if len(aligned_feature) > 0:
        lifted_children, shortest_path_weight, alignment_coverage, seq_id = find_best_mapping.find_best_mapping(
            aligned_feature, parent.end - parent.start + 1, parent,
            overlapping_features, children_dict, previous_gene_start, copy_tag)
        lifted_feature_list, feature_start = merge_lifted_features.merge_lifted_features(
            lifted_children, parent, unmapped_features, threshold,
            new_parent_name, feature_order, parent_dict, intermediate_dict,
            alignment_coverage, seq_id, seq_id_threshold)
    else:
        unmapped_features.append(parent)
        feature_start = 0

    return lifted_feature_list, aligned_feature[0].query_name, feature_start
Exemplo n.º 5
0
def remove_alignments_without_children(all_aligned_blocks, unmapped_features, feature_hierarchy):
    features_to_remove = []
    for seq in all_aligned_blocks:
        if all_aligned_blocks[seq] == []:
            features_to_remove.append(seq)
            unmapped_features.append(feature_hierarchy.parents[liftoff_utils.convert_id_to_original(seq)])
    for feature in features_to_remove:
        del all_aligned_blocks[feature]
    return all_aligned_blocks
Exemplo n.º 6
0
def get_aligned_blocks(alignment, aln_id, children_dict, parent_dict,
                       search_type):
    cigar = alignment.cigar
    query_start = alignment.query_alignment_start
    query_end = alignment.query_alignment_end
    children = children_dict[liftoff_utils.convert_id_to_original(
        alignment.query_name)]
    parent = parent_dict[liftoff_utils.convert_id_to_original(
        alignment.query_name)]
    if parent.end - parent.start + 1 != query_end - query_start and search_type == "copies":
        return []
    reference_block_start = alignment.reference_start
    reference_block_pos = reference_block_start
    if cigar[0][0] == 5:
        query_start += cigar[0][1]
        query_end += cigar[0][1]
    query_block_start = query_start
    query_block_pos = query_block_start
    new_blocks = []
    mismatches = []
    merged_children_coords = liftoff_utils.merge_children_intervals(children)
    for operation, length in cigar:
        if operation == 7 or operation == 8:
            query_block_pos, reference_block_pos = adjust_position(
                operation, query_block_pos, reference_block_pos, length)
            if operation == 8:
                mismatches.append(query_block_pos)
            if query_block_pos == query_end:
                add_block(query_block_pos, reference_block_pos, aln_id,
                          alignment, query_block_start, reference_block_start,
                          mismatches, new_blocks, merged_children_coords,
                          parent)
                mismatches = []
        elif operation == 1 or operation == 2:
            add_block(query_block_pos, reference_block_pos, aln_id, alignment,
                      query_block_start, reference_block_start, mismatches,
                      new_blocks, merged_children_coords, parent)
            mismatches = []
            query_block_pos, reference_block_pos = adjust_position(
                operation, query_block_pos, reference_block_pos, length)
            query_block_start = query_block_pos
            reference_block_start = reference_block_pos
    return new_blocks
Exemplo n.º 7
0
def find_neighbor_location(ref_parents, alignment, lifted_feature_list,
                           ref_parent_order):
    ref_feature = ref_parents[liftoff_utils.convert_id_to_original(
        alignment[0].query_name)]
    ref_neighbor_name = liftoff_utils.find_nonoverlapping_upstream_neighbor(
        ref_parent_order, ref_feature.id)
    if ref_neighbor_name is not None:
        ref_neighbor_key = ref_neighbor_name + "_0"
        if ref_neighbor_key in lifted_feature_list:
            previous_feature_start = lifted_feature_list[ref_neighbor_key][
                0].start
            previous_feature_seq = lifted_feature_list[ref_neighbor_key][
                0].seqid
            previous_feature_ref_start = ref_parents[ref_neighbor_name].start
            return previous_feature_start, previous_feature_seq, previous_feature_ref_start
    return 0, "", 0
Exemplo n.º 8
0
def parse_alignment(file, parent_dict, children_dict, unmapped_features,
                    search_type, max_alns):
    all_aligned_blocks = {}
    sam_file = pysam.AlignmentFile(file,
                                   'r',
                                   check_sq=False,
                                   check_header=False)
    sam_file_iter = sam_file.fetch()
    aln_id = 0
    name_dict = {}
    align_count_dict = {}
    for ref_seq in sam_file_iter:
        if ref_seq.is_unmapped is False:
            ref_seq.query_name = edit_name(search_type, ref_seq, name_dict)
            aln_id += 1
            if ref_seq.query_name in align_count_dict:
                align_count = align_count_dict[ref_seq.query_name] + 1
            else:
                align_count = 0
            align_count_dict[ref_seq.query_name] = align_count
            if align_count < max_alns:
                aligned_blocks = get_aligned_blocks(ref_seq, aln_id,
                                                    children_dict, parent_dict,
                                                    search_type)
                if ref_seq.query_name in all_aligned_blocks:
                    all_aligned_blocks[ref_seq.query_name].extend(
                        aligned_blocks)
                else:
                    all_aligned_blocks[ref_seq.query_name] = aligned_blocks
        else:
            unmapped_features.append(parent_dict[ref_seq.query_name])
    unaligned_exons = []
    for seq in all_aligned_blocks:
        if all_aligned_blocks[seq] == []:
            unaligned_exons.append(seq)
            unmapped_features.append(
                parent_dict[liftoff_utils.convert_id_to_original(seq)])
    for seq in unaligned_exons:
        del all_aligned_blocks[seq]
    return all_aligned_blocks
Exemplo n.º 9
0
def resolve_overlapping_homologues(all_aligned_segs, lifted_feature_list,
                                   features_to_remap, unmapped_features,
                                   threshold, parent_dict, intermediate_dict,
                                   children_dict, feature_db,
                                   original_parent_order, seq_id_threshold):
    all_overlapping_features = {}
    starting_remap_feature_num = len(features_to_remap)
    iter = 0
    while len(features_to_remap) > 0:
        iter += 1
        if iter > 10 * starting_remap_feature_num:
            break
        features_to_check = {}
        aligned_segs_to_remap = {}
        for feature_to_remap in features_to_remap:
            del lifted_feature_list[feature_to_remap]
            aligned_segs_to_remap[feature_to_remap] = all_aligned_segs[
                feature_to_remap]
            add_overlapping_feature(features_to_remap, feature_to_remap,
                                    all_overlapping_features)
        lift_features.lift_all_features(aligned_segs_to_remap,
                                        all_overlapping_features, threshold,
                                        feature_db, parent_dict, children_dict,
                                        intermediate_dict, unmapped_features,
                                        lifted_feature_list, seq_id_threshold)
        clean_overlapping_features(lifted_feature_list,
                                   all_overlapping_features, parent_dict,
                                   features_to_remap, unmapped_features)
        for feature_to_remap in features_to_remap:
            if feature_to_remap in lifted_feature_list:
                features_to_check[feature_to_remap] = lifted_feature_list[
                    feature_to_remap]
        features_to_remap = check_homologues(lifted_feature_list,
                                             features_to_check, parent_dict,
                                             original_parent_order)
    for feature in features_to_remap:
        unmapped_features.append(
            parent_dict[liftoff_utils.convert_id_to_original(feature)])
        del lifted_feature_list[feature]
    return lifted_feature_list
Exemplo n.º 10
0
def lift_single_feature(all_overlapping_features, threshold, feature_order,
                        features_to_lift, feature_hierarchy,
                        previous_gene_start, unmapped_features,
                        aligned_feature, seq_id_threshold):
    new_parent_name = aligned_feature[0].query_name
    original_parent_name = liftoff_utils.convert_id_to_original(
        new_parent_name)
    overlapping_features = get_overlapping_features_list(
        new_parent_name, all_overlapping_features)
    parent = features_to_lift[original_parent_name]
    if len(aligned_feature) > 0:
        lifted_children, alignment_coverage, seq_id = find_best_mapping.find_best_mapping(
            aligned_feature, parent.end - parent.start + 1, parent,
            overlapping_features, feature_hierarchy.children,
            previous_gene_start)
        lifted_feature_list, feature_start = merge_lifted_features.merge_lifted_features(
            lifted_children, parent, unmapped_features, threshold,
            new_parent_name, feature_order, feature_hierarchy,
            alignment_coverage, seq_id, seq_id_threshold)
    else:
        unmapped_features.append(parent)
        feature_start = 0
    return lifted_feature_list, aligned_feature[0].query_name, feature_start
Exemplo n.º 11
0
def remove_unresolved_features(features_to_remap, parent_dict,
                               lifted_feature_list, unmapped_features):
    for feature in features_to_remap:
        unmapped_features.append(
            parent_dict[liftoff_utils.convert_id_to_original(feature)])
        del lifted_feature_list[feature]