Пример #1
0
            % (args['site_ratio'], 100 * args['site_prev']))
    if args['max_sites'] != float('Inf'):
        print("  keep <= %s sites" % (args['max_sites']))
    print("Number of CPUs to use: %s" % args['threads'])
    print("===============================")
    print("")


def run_program(program, args):
    """ Run program specified by user (species, genes, or snps) """
    if program == 'species':
        from midas.merge import species
        species.run_pipeline(args)
    elif program == 'genes':
        from midas.merge import genes
        genes.run_pipeline(args)
    elif program == 'snps':
        from midas.merge import snps
        snps.run_pipeline(args)
    else:
        sys.exit("\nError: Unrecognized program: '%s'\n" % program)


if __name__ == '__main__':
    program = get_program()
    args = get_arguments(program)
    check_arguments(program, args)
    utility.print_copyright()
    print_arguments(program, args)
    run_program(program, args)
Пример #2
0
  count_c: count of C allele
  count_g: count of G allele
  count_t: count of T allele

summary.txt
  species_id: species id
  genome_length: number of base pairs in representative genome
  covered_bases: number of reference sites with at least 1 mapped read
  fraction_covered: proportion of reference sites with at least 1 mapped read
  mean_coverage: average read-depth across reference sites with at least 1 mapped read
  aligned_reads: number of aligned reads BEFORE quality filtering
  mapped_reads: number of aligned reads AFTER quality filtering
  
Additional information for each species can be found in the reference database:
 %s/rep_genomes
""" % args['db'])
    outfile.close()


if __name__ == '__main__':

    program = get_program()
    args = get_arguments(program)
    check_arguments(program, args)
    create_directories(program, args)
    open_log(program, args)
    utility.print_copyright(args['log'])
    print_arguments(program, args)
    run_program(program, args)
    write_readme(program, args)
Пример #3
0
        # keep/exclude sample
        if sample.filter(args['sample_depth'], args['fract_cov']):
            sample.pass_qc = False
        if args['keep_samples'] and sample.id not in args['keep_samples']:
            sample.pass_qc = False
        if args['exclude_samples'] and sample.id in args['exclude_samples']:
            sample.pass_qc = False
        if sum([1 for s in samples if s.pass_qc]) >= args['max_samples']:
            sample.pass_qc = False
        # store sample
        samples.append(sample)
    # select random sample
    if args['rand_samples']: resample_samples(samples)
    return samples


if __name__ == '__main__':
    args = parse_arguments()
    check_args(args)
    print_copyright()
    print_args(args)

    print("\nSelecting subset of samples...")
    samples = fetch_samples(args)

    print("Estimating diversity metrics...")
    pi = compute_pi(args, samples)

    print("Writing results to output file...")
    write_pi(args, samples, pi)
			  'sites_NC', 'sites_1D', 'sites_2D', 'sites_3D', 'sites_4D',
			  'snps_NC', 'snps_1D', 'snps_2D', 'snps_3D', 'snps_4D',
			  'pi_NC', 'pi_1D', 'pi_2D', 'pi_3D', 'pi_4D']
	outfile.write('\t'.join(fields)+'\n')
	gene_ids = set([])
	for sample in samples.values():
		for gene_id in sample.genes:
			gene_ids.add(gene_id)
	for sample in samples.values():
		for gene_id in gene_ids:
			if gene_id not in sample.genes:
				gene = Gene(gene_id)
			else:
				gene = sample.genes[gene_id]
			record = gene.format_record(sample.id, fields)
			outfile.write(record)

if __name__ == '__main__':
	args = parse_arguments()
	utility.print_copyright()
	
	print("Loading samples...")
	samples = st.init_samples(args)
	
	print("Estimating diversity metrics...")
	compute_diversity(args, samples)

	print("Writing results...")
	write_per_gene_results(args, samples)