Пример #1
0
def process_sample(parse_functions, sample, samples, config, amplicon_list):

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--samples_file',
                        help="Input configuration file for samples")
    parser.add_argument('-c', '--configuration',
                        help="Configuration file for various settings")
    parser.add_argument('-l', '--list',
                        help="List file of SNPs to process")

    args = parser.parse_args()

    sys.stdout.write("Parsing configuration data\n")
    config = configuration.configure_runtime(args.configuration)

    sys.stdout.write("Parsing sample data\n")
    samples = configuration.configure_samples(args.samples_file, config)

    sample_cov_data = defaultdict(lambda: defaultdict())

    for sample in samples:
        sys.stdout.write("Processing sample {}\n".format(sample))
        sample_cov_data[sample] = process_sample(parse_functions, sample,
                                                 samples, config, snps)

    sys.stdout.write("Writing out data\n")
    with open("glioma_snp_data.txt", 'wb') as out:
        out.write("SNP\tChr\tPos")
        for sample in samples:
            out.write("\t{} - AAF\t{} - Depth".format(sample, sample))
        out.write("\n")
        for snp in snps:
            out.write("{}".format(snp))
            for sample in samples:
                if sample_snp_data[sample][snp]:
                    out.write("\t{}\t{}".format(sample_snp_data[sample][snp]['freq'],
                                                sample_snp_data[sample][snp]['depth']))
                else:
                    out.write("\t-\t-")
            out.write("\n")
Пример #2
0
    parser = argparse.ArgumentParser()
    parser.add_argument('-r', '--randseed', help="Seed number for reproducible sub-sampling")
    parser.add_argument('-n', '--number', help="Number of iterations per sample to perform", default=1)
    parser.add_argument('-s', '--samples_file', help="Input configuration file for samples")
    parser.add_argument('-c', '--configuration', help="Configuration file for various settings")
    parser.add_argument('-a', '--address', help="IP Address for Cassandra connection", default='127.0.0.1')
    parser.add_argument('-u', '--username', help='Cassandra username for login', default=None)

    argcomplete.autocomplete(parser)
    Job.Runner.addToilOptions(parser)
    args = parser.parse_args()

    fractions = [50, 33, 25]

    sys.stdout.write("Parsing configuration data\n")
    config = configuration.configure_runtime(args.configuration)

    sys.stdout.write("Parsing sample data\n")
    samples = configuration.configure_samples(args.samples_file, config)

    # Workflow Graph definition. The following workflow definition should create a valid Directed Acyclic Graph (DAG)
    root_job = Job.wrapJobFn(pipeline.spawn_batch_jobs, cores=1)

    if args.username:
        password = getpass.getpass()
        auth_provider = PlainTextAuthProvider(username=args.username, password=password)
    else:
        auth_provider = None

    for sample in samples:
        for fraction in fractions:
Пример #3
0
import argparse
import HTSeq
from collections import defaultdict
from ddb import configuration

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help="Input config file for samples")
    parser.add_argument('-c',
                        '--configuration',
                        help="Configuration file for various settings")
    parser.add_argument('-o', '--output', help="Output file name for CSV file")
    args = parser.parse_args()

    sys.stdout.write("Parsing configuration data\n")
    config = configuration.configure_runtime(args.configuration)

    sys.stdout.write("Parsing sample data\n")
    samples = configuration.configure_samples(args.input, config)

    transcript_counts = defaultdict(
        lambda: defaultdict(lambda: defaultdict(int)))

    for sample in samples:
        sys.stderr.write("Processing sample {}\n".format(sample))
        gtf_file = HTSeq.GFF_Reader(samples[sample]['gtf'], end_included=True)
        for feature in gtf_file:
            # sys.stderr.write("Processing entry: {}\n".format(feature))
            if feature.type is 'transcript':
                transcript_counts[feature.attr['transcript_id']][sample][
                    'FPKM'] = feature.attr['FPKM']