示例#1
0
def map_extra_copies(target_fasta, reference_fasta, ref_chroms, target_chroms,
                     processes, lifted_feature_list, parent_dict,
                     children_dict, feature_db, intermediate_dict,
                     parent_order, seq_threshold, minimap2_path, inter_files,
                     remap):
    liftoff_utils.clear_scores(lifted_feature_list, parent_dict)
    unmapped_features = []
    liftover_type = "copies"
    extract_features.get_gene_sequences(parent_dict, ref_chroms,
                                        reference_fasta, processes,
                                        inter_files, liftover_type)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, parent_dict,
        children_dict, liftover_type, unmapped_features, reference_fasta,
        minimap2_path, inter_files, remap)

    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, 0.0, feature_db,
                                    parent_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list, seq_threshold)
    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, 0.0, intermediate_dict,
        children_dict, feature_db, parent_order, seq_threshold)
示例#2
0
def lift_original_annotation(gff, target_fasta, reference_fasta, ref_chroms,
                             target_chroms, processes, db, lifted_feature_list,
                             unmapped_features, infer_transcripts, infer_genes,
                             cov_threshold, seq_threshold, minimap2_path,
                             inter_files):
    liftover_type = "chrm_by_chrm"
    if target_chroms[0] == target_fasta:
        cov_threshold, seq_threshold = 0, 0
    parent_dict, children_dict, intermediate_dict, feature_db, original_parent_order = extract_features.extract_features_to_lift(
        gff, db, ref_chroms, reference_fasta, processes, infer_transcripts,
        infer_genes, inter_files, liftover_type)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, parent_dict,
        children_dict, liftover_type, unmapped_features, reference_fasta,
        minimap2_path, inter_files, True)

    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, cov_threshold,
                                    feature_db, parent_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list, seq_threshold)
    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, cov_threshold, intermediate_dict,
        children_dict, feature_db, original_parent_order, seq_threshold)
    return feature_db, parent_dict, intermediate_dict, children_dict, original_parent_order
示例#3
0
def map_unplaced_genes(unmapped_features, target_fasta, reference_fasta,
                       ref_chroms, target_chroms, processes,
                       lifted_feature_list, feature_db, parent_dict,
                       intermediate_dict, children_dict, parent_order,
                       minimap2_path, inter_files):
    liftoff_utils.clear_scores(lifted_feature_list, parent_dict)
    liftover_type = "unplaced"
    unplaced_dict = {}
    for feature_name in parent_dict:
        feature = parent_dict[feature_name]
        if feature.seqid in ref_chroms:
            unplaced_dict[feature.id] = feature
    extract_features.get_gene_sequences(unplaced_dict, ref_chroms,
                                        reference_fasta, processes,
                                        inter_files, liftover_type)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, unplaced_dict,
        children_dict, liftover_type, unmapped_features, reference_fasta,
        minimap2_path, inter_files, True)
    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, 0.0, feature_db,
                                    unplaced_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list, 0.0)

    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, 0.0, intermediate_dict,
        children_dict, feature_db, parent_order, 0.0)
示例#4
0
def resolve_overlapping_homologues(all_aligned_segs, lifted_feature_list,
                                   features_to_remap, unmapped_features,
                                   threshold, parent_dict, intermediate_dict,
                                   children_dict, feature_db,
                                   original_parent_order):
    all_overlapping_features = {}
    while len(features_to_remap) > 0:
        features_to_check = {}
        aligned_segs_to_remap = {}
        for feature_to_remap in features_to_remap:
            del lifted_feature_list[feature_to_remap]
            aligned_segs_to_remap[feature_to_remap] = all_aligned_segs[
                feature_to_remap]
            add_overlapping_feature(features_to_remap, feature_to_remap,
                                    all_overlapping_features)
        lift_features.lift_all_features(aligned_segs_to_remap,
                                        all_overlapping_features, threshold,
                                        feature_db, parent_dict, children_dict,
                                        intermediate_dict, unmapped_features,
                                        lifted_feature_list)
        clean_overlapping_features(lifted_feature_list,
                                   all_overlapping_features, parent_dict)
        for feature_to_remap in features_to_remap:
            if feature_to_remap in lifted_feature_list:
                features_to_check[feature_to_remap] = lifted_feature_list[
                    feature_to_remap]
        features_to_remap = check_homologues(lifted_feature_list,
                                             features_to_check, parent_dict,
                                             original_parent_order)
    return lifted_feature_list
示例#5
0
def map_unmapped_genes_agaisnt_all(unmapped_features, target_fasta, reference_fasta, ref_chroms, target_chroms,
                                       processes, lifted_feature_list, feature_db, parent_dict, intermediate_dict,
                                       children_dict, parent_order,minimap2_path,inter_files):
    liftoff_utils.clear_scores(lifted_feature_list, parent_dict)
    unmapped_dict = {}
    for feature in unmapped_features:
        unmapped_dict[feature.id]=feature
    extract_features.get_gene_sequences(unmapped_dict, ref_chroms, reference_fasta, processes, inter_files)
    unmapped_features = []
    aligned_segments=align_features.align_features_to_target(ref_chroms, target_chroms, processes, target_fasta,
                                                               unmapped_dict, children_dict, "missing", unmapped_features, reference_fasta,
                                                             minimap2_path,inter_files, True)
    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, 0.0, feature_db, unmapped_dict, children_dict,
                                    intermediate_dict, unmapped_features, lifted_feature_list, 0.0)
    fix_overlapping_features.fix_incorrectly_overlapping_features(lifted_feature_list, lifted_feature_list, parent_dict,
                                                                  aligned_segments, unmapped_features, 0.0,
                                                                  intermediate_dict, children_dict, feature_db,
                                                                   parent_order, 0.0, "missing")
    return unmapped_features
示例#6
0
def lift_original_annotation(gff, target_fasta, reference_fasta, ref_chroms,
                             target_chroms, processes, db, lifted_feature_list,
                             unmapped_features, infer_transcripts):
    if target_chroms[0] != target_fasta:
        threshold = 0.5
    else:
        threshold = 0
    parent_dict, children_dict, intermediate_dict, feature_db, original_parent_order = extract_features.extract_features_to_lift(
        gff, db, ref_chroms, reference_fasta, processes, infer_transcripts)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, parent_dict,
        children_dict, "chrm_by_chrm", unmapped_features)
    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, threshold,
                                    feature_db, parent_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list)
    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, threshold, intermediate_dict,
        children_dict, feature_db, original_parent_order)
    return feature_db, parent_dict, intermediate_dict, children_dict, original_parent_order