def merge_gffs(rna_gff_files, cds_gff_files, contigs_fasta_file, output_file_prefix): # parse RNA GFF files rna_hits = {f: get_gff_hits(f) for f in rna_gff_files} # parse CDS files cds_hits = {} for cds_gff_file in cds_gff_files: for contig, hits in generate_hits(cds_gff_file, format=GFF, sort='score'): # get regions with rRNAs for this contig rna_regions = get_rna_regions(rna_hits, contig) # collect CDSs that don't overlap cds_hits.setdefault(contig, []).extend( [h for h in hits if h.checkForOverlap(rna_regions)[1] is None]) # the source data hit_list_dicts = list(rna_hits.values()) hit_list_dicts.append(cds_hits) # output files with open(output_file_prefix + ".gff", 'w') as GFFOUT: with open(output_file_prefix + ".fna", 'w') as FNAOUT: with open(output_file_prefix + ".faa", 'w') as FAAOUT: write_annotations_to_files(hit_list_dicts, contigs_fasta_file, GFFOUT, FNAOUT, FAAOUT)
def get_gff_hits(hit_table_gff, **filter_args): filter_args.setdefault('sort', 'score') filter_args.setdefault('nonoverlapping', True) return { c: list(h) for c, h in generate_hits(hit_table_gff, format=GFF, **filter_args) }
def get_gff_hits(hit_table_gff, **filter_args): filter_args.setdefault('sort', 'score') filter_args.setdefault('nonoverlapping', True) return {c: list(h) for c, h in generate_hits(hit_table_gff, format=GFF, **filter_args)}
def get_gff_hits(hit_table_gff, **filter_args): return {c: list(h) for c, h in generate_hits(hit_table_gff, format=GFF, **filter_args)}