Пример #1
0
def process_sample(parse_functions, sample, samples, config, amplicon_list):

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--samples_file',
                        help="Input configuration file for samples")
    parser.add_argument('-c', '--configuration',
                        help="Configuration file for various settings")
    parser.add_argument('-l', '--list',
                        help="List file of SNPs to process")

    args = parser.parse_args()

    sys.stdout.write("Parsing configuration data\n")
    config = configuration.configure_runtime(args.configuration)

    sys.stdout.write("Parsing sample data\n")
    samples = configuration.configure_samples(args.samples_file, config)

    sample_cov_data = defaultdict(lambda: defaultdict())

    for sample in samples:
        sys.stdout.write("Processing sample {}\n".format(sample))
        sample_cov_data[sample] = process_sample(parse_functions, sample,
                                                 samples, config, snps)

    sys.stdout.write("Writing out data\n")
    with open("glioma_snp_data.txt", 'wb') as out:
        out.write("SNP\tChr\tPos")
        for sample in samples:
            out.write("\t{} - AAF\t{} - Depth".format(sample, sample))
        out.write("\n")
        for snp in snps:
            out.write("{}".format(snp))
            for sample in samples:
                if sample_snp_data[sample][snp]:
                    out.write("\t{}\t{}".format(sample_snp_data[sample][snp]['freq'],
                                                sample_snp_data[sample][snp]['depth']))
                else:
                    out.write("\t-\t-")
            out.write("\n")
Пример #2
0
    parser.add_argument('-s', '--samples_file', help="Input configuration file for samples")
    parser.add_argument('-c', '--configuration', help="Configuration file for various settings")
    parser.add_argument('-a', '--address', help="IP Address for Cassandra connection", default='127.0.0.1')
    parser.add_argument('-u', '--username', help='Cassandra username for login', default=None)

    argcomplete.autocomplete(parser)
    Job.Runner.addToilOptions(parser)
    args = parser.parse_args()

    fractions = [50, 33, 25]

    sys.stdout.write("Parsing configuration data\n")
    config = configuration.configure_runtime(args.configuration)

    sys.stdout.write("Parsing sample data\n")
    samples = configuration.configure_samples(args.samples_file, config)

    # Workflow Graph definition. The following workflow definition should create a valid Directed Acyclic Graph (DAG)
    root_job = Job.wrapJobFn(pipeline.spawn_batch_jobs, cores=1)

    if args.username:
        password = getpass.getpass()
        auth_provider = PlainTextAuthProvider(username=args.username, password=password)
    else:
        auth_provider = None

    for sample in samples:
        for fraction in fractions:
            iteration = 0
            while iteration < int(args.number):
                job = Job.wrapJobFn(subsample_bam, [args.address], "coveragestore", auth_provider, sample, samples,
Пример #3
0
from ddb import configuration

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help="Input config file for samples")
    parser.add_argument('-c',
                        '--configuration',
                        help="Configuration file for various settings")
    parser.add_argument('-o', '--output', help="Output file name for CSV file")
    args = parser.parse_args()

    sys.stdout.write("Parsing configuration data\n")
    config = configuration.configure_runtime(args.configuration)

    sys.stdout.write("Parsing sample data\n")
    samples = configuration.configure_samples(args.input, config)

    transcript_counts = defaultdict(
        lambda: defaultdict(lambda: defaultdict(int)))

    for sample in samples:
        sys.stderr.write("Processing sample {}\n".format(sample))
        gtf_file = HTSeq.GFF_Reader(samples[sample]['gtf'], end_included=True)
        for feature in gtf_file:
            # sys.stderr.write("Processing entry: {}\n".format(feature))
            if feature.type is 'transcript':
                transcript_counts[feature.attr['transcript_id']][sample][
                    'FPKM'] = feature.attr['FPKM']
                transcript_counts[feature.attr['transcript_id']][sample][
                    'TPM'] = feature.attr['TPM']
    parser = argparse.ArgumentParser()
    parser.add_argument('-s',
                        '--samples_file',
                        help="Input configuration file for samples")
    parser.add_argument('-c',
                        '--configuration',
                        help="Configuration file for various settings")
    Job.Runner.addToilOptions(parser)
    args = parser.parse_args()
    args.logLevel = "INFO"

    sys.stdout.write("Parsing configuration data\n")
    config = configuration.configure_runtime(args.configuration)

    sys.stdout.write("Parsing sample data\n")
    samples = configuration.configure_samples(args.samples_file, config)

    # Workflow Graph definition. The following workflow definition should create a valid Directed Acyclic Graph (DAG)
    root_job = Job.wrapJobFn(pipeline.spawn_batch_jobs, cores=1)
    transcripts_list = list()
    flags = ["keep_retained", "max_intron", "stranded"]

    # Per sample jobs
    for sample in samples:
        # Alignment and Refinement Stages
        align_job = Job.wrapJobFn(hisat.hisat_unpaired,
                                  config,
                                  sample,
                                  samples,
                                  flags,
                                  cores=int(config['hisat']['num_cores']),
Пример #5
0
if __name__ == "__main__":
    type = "colorectal"
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--configuration',
                        help="Configuration file for various settings")
    args = parser.parse_args()

    sys.stdout.write("Parsing configuration data\n")
    config = configuration.configure_runtime(args.configuration)

    # type_cases = defaultdict(defaultdict(list))
    # counts = defaultdict(int)
    for root, dirs, files in os.walk("."):
        for samples_file in fnmatch.filter(files, "1*_M0373?.config"):
            sys.stderr.write("Reading file: {}\n".format(
                os.path.join(root, samples_file)))

            sys.stdout.write("Parsing sample data\n")
            libraries = configuration.configure_samples(
                os.path.join(root, samples_file), config)
            samples = configuration.merge_library_configs_samples(libraries)
            for sample in samples:
                for library in samples[sample]:
                    if samples[sample][library]['report'].startswith(type):
                        print "Colorectal case found: {}\n".format(sample)

    # sys.stderr.write("Type\tCount\n")
    # for report_type in counts:
    #     sys.stdout.write("{}\t{}\n".format(report_type, counts[report_type]))
Пример #6
0
from collections import defaultdict
from ddb import configuration


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', help="Input config file for samples")
    parser.add_argument('-c', '--configuration', help="Configuration file for various settings")
    parser.add_argument('-o', '--output', help="Output file name for CSV file")
    args = parser.parse_args()

    sys.stdout.write("Parsing configuration data\n")
    config = configuration.configure_runtime(args.configuration)

    sys.stdout.write("Parsing sample data\n")
    samples = configuration.configure_samples(args.input, config)

    transcript_counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

    for sample in samples:
        sys.stderr.write("Processing sample {}\n".format(sample))
        gtf_file = HTSeq.GFF_Reader(samples[sample]['gtf'], end_included=True)
        for feature in gtf_file:
            # sys.stderr.write("Processing entry: {}\n".format(feature))
            if feature.type is 'transcript':
                transcript_counts[feature.attr['transcript_id']][sample]['FPKM'] = feature.attr['FPKM']
                transcript_counts[feature.attr['transcript_id']][sample]['TPM'] = feature.attr['TPM']

    with open(args.output, 'w') as output:
        output.write("Transcript")
        for sample in samples: