def process_sample(parse_functions, sample, samples, config, amplicon_list): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-s', '--samples_file', help="Input configuration file for samples") parser.add_argument('-c', '--configuration', help="Configuration file for various settings") parser.add_argument('-l', '--list', help="List file of SNPs to process") args = parser.parse_args() sys.stdout.write("Parsing configuration data\n") config = configuration.configure_runtime(args.configuration) sys.stdout.write("Parsing sample data\n") samples = configuration.configure_samples(args.samples_file, config) sample_cov_data = defaultdict(lambda: defaultdict()) for sample in samples: sys.stdout.write("Processing sample {}\n".format(sample)) sample_cov_data[sample] = process_sample(parse_functions, sample, samples, config, snps) sys.stdout.write("Writing out data\n") with open("glioma_snp_data.txt", 'wb') as out: out.write("SNP\tChr\tPos") for sample in samples: out.write("\t{} - AAF\t{} - Depth".format(sample, sample)) out.write("\n") for snp in snps: out.write("{}".format(snp)) for sample in samples: if sample_snp_data[sample][snp]: out.write("\t{}\t{}".format(sample_snp_data[sample][snp]['freq'], sample_snp_data[sample][snp]['depth'])) else: out.write("\t-\t-") out.write("\n")
parser = argparse.ArgumentParser() parser.add_argument('-r', '--randseed', help="Seed number for reproducible sub-sampling") parser.add_argument('-n', '--number', help="Number of iterations per sample to perform", default=1) parser.add_argument('-s', '--samples_file', help="Input configuration file for samples") parser.add_argument('-c', '--configuration', help="Configuration file for various settings") parser.add_argument('-a', '--address', help="IP Address for Cassandra connection", default='127.0.0.1') parser.add_argument('-u', '--username', help='Cassandra username for login', default=None) argcomplete.autocomplete(parser) Job.Runner.addToilOptions(parser) args = parser.parse_args() fractions = [50, 33, 25] sys.stdout.write("Parsing configuration data\n") config = configuration.configure_runtime(args.configuration) sys.stdout.write("Parsing sample data\n") samples = configuration.configure_samples(args.samples_file, config) # Workflow Graph definition. The following workflow definition should create a valid Directed Acyclic Graph (DAG) root_job = Job.wrapJobFn(pipeline.spawn_batch_jobs, cores=1) if args.username: password = getpass.getpass() auth_provider = PlainTextAuthProvider(username=args.username, password=password) else: auth_provider = None for sample in samples: for fraction in fractions:
import argparse import HTSeq from collections import defaultdict from ddb import configuration if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help="Input config file for samples") parser.add_argument('-c', '--configuration', help="Configuration file for various settings") parser.add_argument('-o', '--output', help="Output file name for CSV file") args = parser.parse_args() sys.stdout.write("Parsing configuration data\n") config = configuration.configure_runtime(args.configuration) sys.stdout.write("Parsing sample data\n") samples = configuration.configure_samples(args.input, config) transcript_counts = defaultdict( lambda: defaultdict(lambda: defaultdict(int))) for sample in samples: sys.stderr.write("Processing sample {}\n".format(sample)) gtf_file = HTSeq.GFF_Reader(samples[sample]['gtf'], end_included=True) for feature in gtf_file: # sys.stderr.write("Processing entry: {}\n".format(feature)) if feature.type is 'transcript': transcript_counts[feature.attr['transcript_id']][sample][ 'FPKM'] = feature.attr['FPKM']