def parse_alignments(contigs_fpaths, contig_report_fpath_pattern): lists_of_aligned_blocks = [] for contigs_fpath in contigs_fpaths: if contig_report_fpath_pattern: report_fpath = contig_report_fpath_pattern % qutils.label_from_fpath_for_fname( contigs_fpath) aligned_blocks, misassembled_id_to_structure = parse_aligner_contig_report( report_fpath) if aligned_blocks is None: continue aligned_blocks = check_misassembled_blocks( aligned_blocks, misassembled_id_to_structure, filter_local=True) lists_of_aligned_blocks.append(aligned_blocks) if lists_of_aligned_blocks: max_contigs = max([ len(aligned_blocks) for aligned_blocks in lists_of_aligned_blocks ]) return get_assemblies(contigs_fpaths, lists_of_aligned_blocks).assemblies, max_contigs else: return None, None
def parse_alignments(contigs_fpaths, contig_report_fpath_pattern): lists_of_aligned_blocks = [] for contigs_fpath in contigs_fpaths: if contig_report_fpath_pattern: report_fpath = contig_report_fpath_pattern % qutils.label_from_fpath_for_fname(contigs_fpath) aligned_blocks, misassembled_id_to_structure = parse_nucmer_contig_report(report_fpath) if aligned_blocks is None: continue aligned_blocks = check_misassembled_blocks(aligned_blocks, misassembled_id_to_structure, filter_local=True) lists_of_aligned_blocks.append(aligned_blocks) if lists_of_aligned_blocks: max_contigs = max([len(aligned_blocks) for aligned_blocks in lists_of_aligned_blocks]) return get_assemblies(contigs_fpaths, lists_of_aligned_blocks).assemblies, max_contigs else: return None, None
def do(contigs_fpaths, contig_report_fpath_pattern, output_dirpath, ref_fpath, cov_fpath=None, physical_cov_fpath=None, gc_fpath=None, stdout_pattern=None, find_similar=True, features=None, json_output_dir=None, genes_by_labels=None): make_output_dir(output_dirpath) lists_of_aligned_blocks = [] contigs_by_assemblies = OrderedDict() structures_by_labels = {} ambiguity_alignments_by_labels = {} total_genome_size = 0 reference_chromosomes = OrderedDict() contig_names_by_refs = None assemblies = None chr_names = [] features_data = None plot_fpath = None if ref_fpath: for name, seq in fastaparser.read_fasta(ref_fpath): chr_name = name.split()[0] chr_names.append(chr_name) chr_len = len(seq) total_genome_size += chr_len reference_chromosomes[chr_name] = chr_len virtual_genome_shift = 100 sorted_ref_names = sorted(reference_chromosomes, key=reference_chromosomes.get, reverse=True) sorted_ref_lengths = sorted(reference_chromosomes.values(), reverse=True) cumulative_ref_lengths = [0] if ref_labels_by_chromosomes: contig_names_by_refs = ref_labels_by_chromosomes elif sum(reference_chromosomes.values() ) > qconfig.MAX_SIZE_FOR_COMB_PLOT: contig_names_by_refs = dict() if len(chr_names) > qconfig.ICARUS_MAX_CHROMOSOMES: summary_len = 0 num_parts = 1 html_name = qconfig.alignment_viewer_part_name + str(num_parts) for chr_name, chr_len in reference_chromosomes.items(): summary_len += chr_len contig_names_by_refs[chr_name] = html_name if summary_len >= qconfig.MAX_SIZE_FOR_COMB_PLOT: summary_len = 0 num_parts += 1 html_name = qconfig.alignment_viewer_part_name + str( num_parts) else: for chr_name in chr_names: contig_names_by_refs[chr_name] = chr_name for i, chr in enumerate(chr_names): chr_length = reference_chromosomes[chr] len_to_append = cumulative_ref_lengths[-1] + chr_length if contig_names_by_refs: if i < len(chr_names) - 1 and contig_names_by_refs[ chr] != contig_names_by_refs[chr_names[i + 1]]: len_to_append = 0 cumulative_ref_lengths.append(len_to_append) virtual_genome_size = sum(reference_chromosomes.values( )) + virtual_genome_shift * (len(reference_chromosomes.values()) - 1) for contigs_fpath in contigs_fpaths: label = qconfig.assembly_labels_by_fpath[contigs_fpath] if not contig_report_fpath_pattern: contigs = parse_contigs_fpath(contigs_fpath) else: report_fpath = contig_report_fpath_pattern % qutils.label_from_fpath_for_fname( contigs_fpath) aligned_blocks, misassembled_id_to_structure, contigs, ambiguity_alignments = parse_aligner_contig_report( report_fpath, list(reference_chromosomes.keys()), cumulative_ref_lengths) if not contigs: contigs = parse_contigs_fpath(contigs_fpath) if aligned_blocks is None: return None for block in aligned_blocks: block.label = label aligned_blocks = check_misassembled_blocks( aligned_blocks, misassembled_id_to_structure) lists_of_aligned_blocks.append(aligned_blocks) structures_by_labels[label] = misassembled_id_to_structure if qconfig.ambiguity_usage == 'all': ambiguity_alignments_by_labels[label] = ambiguity_alignments contigs_by_assemblies[label] = contigs if ref_fpath: features_data = parse_features_data(features, cumulative_ref_lengths, chr_names) if contigs_fpaths and qconfig.gene_finding: parse_genes_data(contigs_by_assemblies, genes_by_labels) if reference_chromosomes and lists_of_aligned_blocks: assemblies = get_assemblies(contigs_fpaths, lists_of_aligned_blocks, virtual_genome_size, find_similar) if qconfig.draw_svg: plot_fpath = draw_alignment_plot(assemblies, virtual_genome_size, output_dirpath, sorted_ref_names, sorted_ref_lengths, virtual_genome_shift) if (assemblies or contigs_by_assemblies) and qconfig.create_icarus_html: icarus_html_fpath = js_data_gen( assemblies, contigs_fpaths, reference_chromosomes, output_dirpath, structures_by_labels, contig_names_by_refs=contig_names_by_refs, ref_fpath=ref_fpath, stdout_pattern=stdout_pattern, ambiguity_alignments_by_labels=ambiguity_alignments_by_labels, contigs_by_assemblies=contigs_by_assemblies, features_data=features_data, gc_fpath=gc_fpath, cov_fpath=cov_fpath, physical_cov_fpath=physical_cov_fpath, json_output_dir=json_output_dir) else: icarus_html_fpath = None return icarus_html_fpath, plot_fpath
def do(contigs_fpaths, contig_report_fpath_pattern, output_dirpath, ref_fpath, cov_fpath=None, physical_cov_fpath=None, stdout_pattern=None, find_similar=True, features=None, json_output_dir=None, genes_by_labels=None): make_output_dir(output_dirpath) lists_of_aligned_blocks = [] contigs_by_assemblies = OrderedDict() structures_by_labels = {} ambiguity_alignments_by_labels = {} total_genome_size = 0 reference_chromosomes = OrderedDict() contig_names_by_refs = None assemblies = None chr_names = [] features_data = None plot_fpath = None max_small_chromosomes = 10 if ref_fpath: for name, seq in fastaparser.read_fasta(ref_fpath): chr_name = name.split()[0] chr_names.append(chr_name) chr_len = len(seq) total_genome_size += chr_len reference_chromosomes[chr_name] = chr_len virtual_genome_shift = 100 sorted_ref_names = sorted(reference_chromosomes, key=reference_chromosomes.get, reverse=True) sorted_ref_lengths = sorted(reference_chromosomes.values(), reverse=True) cumulative_ref_lengths = [0] if ref_labels_by_chromosomes: contig_names_by_refs = ref_labels_by_chromosomes elif sum(reference_chromosomes.values()) > qconfig.MAX_SIZE_FOR_COMB_PLOT: contig_names_by_refs = dict() if len(chr_names) > max_small_chromosomes: summary_len = 0 num_parts = 1 html_name = qconfig.alignment_viewer_part_name + str(num_parts) for chr_name, chr_len in reference_chromosomes.items(): summary_len += chr_len contig_names_by_refs[chr_name] = html_name if summary_len >= qconfig.MAX_SIZE_FOR_COMB_PLOT: summary_len = 0 num_parts += 1 html_name = qconfig.alignment_viewer_part_name + str(num_parts) else: for chr_name in chr_names: contig_names_by_refs[chr_name] = chr_name for i, chr in enumerate(chr_names): chr_length = reference_chromosomes[chr] len_to_append = cumulative_ref_lengths[-1] + chr_length if contig_names_by_refs: if i < len(chr_names) - 1 and contig_names_by_refs[chr] != contig_names_by_refs[chr_names[i + 1]]: len_to_append = 0 cumulative_ref_lengths.append(len_to_append) virtual_genome_size = sum(reference_chromosomes.values()) + virtual_genome_shift * (len(reference_chromosomes.values()) - 1) for contigs_fpath in contigs_fpaths: label = qconfig.assembly_labels_by_fpath[contigs_fpath] if not contig_report_fpath_pattern: contigs = parse_contigs_fpath(contigs_fpath) else: report_fpath = contig_report_fpath_pattern % qutils.label_from_fpath_for_fname(contigs_fpath) aligned_blocks, misassembled_id_to_structure, contigs, ambiguity_alignments = parse_nucmer_contig_report(report_fpath, list(reference_chromosomes.keys()), cumulative_ref_lengths) if not contigs: contigs = parse_contigs_fpath(contigs_fpath) if aligned_blocks is None: return None for block in aligned_blocks: block.label = label aligned_blocks = check_misassembled_blocks(aligned_blocks, misassembled_id_to_structure) lists_of_aligned_blocks.append(aligned_blocks) structures_by_labels[label] = misassembled_id_to_structure if qconfig.ambiguity_usage == 'all': ambiguity_alignments_by_labels[label] = ambiguity_alignments contigs_by_assemblies[label] = contigs if contigs_fpaths and ref_fpath and features: features_data = parse_features_data(features, cumulative_ref_lengths, chr_names) if contigs_fpaths and qconfig.gene_finding: parse_genes_data(contigs_by_assemblies, genes_by_labels) if reference_chromosomes and lists_of_aligned_blocks: assemblies = get_assemblies(contigs_fpaths, virtual_genome_size, lists_of_aligned_blocks, find_similar) if qconfig.draw_svg: plot_fpath = draw_alignment_plot(assemblies, virtual_genome_size, output_dirpath, sorted_ref_names, sorted_ref_lengths, virtual_genome_shift) if (assemblies or contigs_by_assemblies) and qconfig.create_icarus_html: icarus_html_fpath = js_data_gen(assemblies, contigs_fpaths, reference_chromosomes, output_dirpath, structures_by_labels, contig_names_by_refs=contig_names_by_refs, ref_fpath=ref_fpath, stdout_pattern=stdout_pattern, ambiguity_alignments_by_labels=ambiguity_alignments_by_labels, contigs_by_assemblies=contigs_by_assemblies, features_data=features_data, cov_fpath=cov_fpath, physical_cov_fpath=physical_cov_fpath, json_output_dir=json_output_dir) else: icarus_html_fpath = None return icarus_html_fpath, plot_fpath