% (args['site_ratio'], 100 * args['site_prev'])) if args['max_sites'] != float('Inf'): print(" keep <= %s sites" % (args['max_sites'])) print("Number of CPUs to use: %s" % args['threads']) print("===============================") print("") def run_program(program, args): """ Run program specified by user (species, genes, or snps) """ if program == 'species': from midas.merge import species species.run_pipeline(args) elif program == 'genes': from midas.merge import genes genes.run_pipeline(args) elif program == 'snps': from midas.merge import snps snps.run_pipeline(args) else: sys.exit("\nError: Unrecognized program: '%s'\n" % program) if __name__ == '__main__': program = get_program() args = get_arguments(program) check_arguments(program, args) utility.print_copyright() print_arguments(program, args) run_program(program, args)
count_c: count of C allele count_g: count of G allele count_t: count of T allele summary.txt species_id: species id genome_length: number of base pairs in representative genome covered_bases: number of reference sites with at least 1 mapped read fraction_covered: proportion of reference sites with at least 1 mapped read mean_coverage: average read-depth across reference sites with at least 1 mapped read aligned_reads: number of aligned reads BEFORE quality filtering mapped_reads: number of aligned reads AFTER quality filtering Additional information for each species can be found in the reference database: %s/rep_genomes """ % args['db']) outfile.close() if __name__ == '__main__': program = get_program() args = get_arguments(program) check_arguments(program, args) create_directories(program, args) open_log(program, args) utility.print_copyright(args['log']) print_arguments(program, args) run_program(program, args) write_readme(program, args)
# keep/exclude sample if sample.filter(args['sample_depth'], args['fract_cov']): sample.pass_qc = False if args['keep_samples'] and sample.id not in args['keep_samples']: sample.pass_qc = False if args['exclude_samples'] and sample.id in args['exclude_samples']: sample.pass_qc = False if sum([1 for s in samples if s.pass_qc]) >= args['max_samples']: sample.pass_qc = False # store sample samples.append(sample) # select random sample if args['rand_samples']: resample_samples(samples) return samples if __name__ == '__main__': args = parse_arguments() check_args(args) print_copyright() print_args(args) print("\nSelecting subset of samples...") samples = fetch_samples(args) print("Estimating diversity metrics...") pi = compute_pi(args, samples) print("Writing results to output file...") write_pi(args, samples, pi)
'sites_NC', 'sites_1D', 'sites_2D', 'sites_3D', 'sites_4D', 'snps_NC', 'snps_1D', 'snps_2D', 'snps_3D', 'snps_4D', 'pi_NC', 'pi_1D', 'pi_2D', 'pi_3D', 'pi_4D'] outfile.write('\t'.join(fields)+'\n') gene_ids = set([]) for sample in samples.values(): for gene_id in sample.genes: gene_ids.add(gene_id) for sample in samples.values(): for gene_id in gene_ids: if gene_id not in sample.genes: gene = Gene(gene_id) else: gene = sample.genes[gene_id] record = gene.format_record(sample.id, fields) outfile.write(record) if __name__ == '__main__': args = parse_arguments() utility.print_copyright() print("Loading samples...") samples = st.init_samples(args) print("Estimating diversity metrics...") compute_diversity(args, samples) print("Writing results...") write_per_gene_results(args, samples)