#!/usr/bin/python # Copyright (c) 2017 The ACEseq workflow developers. # Distributed under the MIT License (license terms are at https://www.github.com/eilslabs/ACEseqWorkflow/LICENSE.txt). from python_modules import Options options = Options.parse( { "hap_file" : str, "vcf_file" : str, "outfile" : str } ) if options: hap_infile = open( options["hap_file"], "r" ) vcf_infile = open( options["vcf_file"], "r" ) outfile = open( options["outfile" ], "w" ) haplo_line = hap_infile.readline() if haplo_line: haplo_line = haplo_line.rstrip().split() for vcf_line in vcf_infile: if vcf_line[0] != "#": vcf_line = vcf_line.rstrip().split("\t") vcf_line[0] = vcf_line[0].replace('chr', '') vcf_line[0] = vcf_line[0].replace( 'X', '23' ) if len(vcf_line) >= 9 and haplo_line: while haplo_line and int(haplo_line[2]) < int(vcf_line[1]):
# This script generates a file name ( inputpath + chromosome + inputsuffix ) for # each chromosome. # For example "~/Data/patient.chr" and ".snp" result in # "~/Data/patient.chr1.snp", ... "~/Data/patient.chr22.snp", # "~/Data/patient.chrX.snp", "~/Data/patient.chrY.snp". # It than merges these files into one output file while filtering for coverage # and combining the 1k windows into 10k windows. import gzip from python_modules import Tabfile from python_modules import Options options = Options.parse({ "inputpath": str, "inputsuffix": str, "output": str, "coverage": int, "mappability": float, "NoOfWindows": int }) if options: outfile = gzip.open(options["output"], "wb") def process_accumulated_lines(lines): if len(lines) >= options["NoOfWindows"]: chromo = lines[0]["chr"] if chromo.startswith("chr"): chromo = chromo.replace("chr", "")
#!/usr/bin/python # Copyright (c) 2017 The ACEseq workflow developers. # Distributed under the MIT License (license terms are at https://www.github.com/eilslabs/ACEseqWorkflow/LICENSE.txt). # This script replaces segments_to_data.pl and segments_to_data_2.pl. # # usage: segments_to_data.py --pscbs [FILE] --input [FILE] --output [FILE] from python_modules import Tabfile from python_modules import Options import subprocess import gzip import sys options = Options.parse({"pscbs": str, "input": str, "output": str}) if options: pscbsfile = Tabfile.Input(gzip.open(options["pscbs"])) #SNPs could be gzipped or not try: outfile = subprocess.Popen("bgzip >%s" % options["output"], shell=True, stdin=subprocess.PIPE) except IOError as (errno, strerror): syst.stderr.write("I/O error (%i): %s\n" % (errno, strerror)) pscbs_line = pscbsfile.readline() while pscbs_line:
# "~/Data/patient.chrX.snp", "~/Data/patient.chrY.snp". # It than merges these files into one output file with some filtering for # coverage and a randomization of the A/B alleles. # # The functionality is described in the Bachelor thesis of Isabell Bludau at # page 17, lines 11-17. import gzip import subprocess from python_modules import Tabfile from python_modules import Options options = Options.parse({ "inputpath": str, "inputsuffix": str, "output": str, "coverage": int }) if options: outfile = subprocess.Popen("bgzip >%s" % options["output"], shell=True, stdin=subprocess.PIPE) outfile.stdin.write( "chr\tstartPos\tAnormal\tBnormal\tAtumor\tBtumor\thaplotype\n" ) #header for chromo in [str(n) for n in range(1, 22 + 1)] + ["X", "Y"]: infile = gzip.open(
# usage: merge_and_filter_cnv.py --inputpath [PATH] --inputsuffix [SUFFIX] --output [FILE] --coverage [INT] # # This script generates a file name ( inputpath + chromosome + inputsuffix ) for # each chromosome. # For example "~/Data/patient.chr" and ".snp" result in # "~/Data/patient.chr1.snp", ... "~/Data/patient.chr22.snp", # "~/Data/patient.chrX.snp", "~/Data/patient.chrY.snp". # It than merges these files into one output file while filtering for coverage # and combining the 1k windows into 10k windows. import gzip from python_modules import Tabfile from python_modules import Options options = Options.parse( { "inputfile" : str, "output" : str, "coverage" : int, "mappability" : float, "NoOfWindows" : int } ) if options: outfile = gzip.open( options["output"], "wb" ) def process_accumulated_lines( lines ): if len( lines ) >= options["NoOfWindows"]: chromo = lines[0]["chr"] if not chromo.startswith( "chr" ): chromo = "chr" + chromo chromo = chromo.replace( "chrX", "chr23" )
#!/usr/bin/python # Copyright (c) 2017 The ACEseq workflow developers. # Distributed under the MIT License (license terms are at https://www.github.com/eilslabs/ACEseqWorkflow/LICENSE.txt). # This script merges all segmentation approaches into a final segmentation. from python_modules import Tabfile from python_modules import Options options = Options.parse({ "crest_deldupinv": str, "crest_tx": str, "known_segments": str, "output": str, "crest_out": str, "DDI_length": int }) if options: crest_ddi_file = Tabfile.Input(open(options["crest_deldupinv"], "r")) crest_tx_file = Tabfile.Input(open(options["crest_tx"], "r")) crest_out = open(options["crest_out"], "w") file_out = open(options["output"], "w") breakpoints = [] for line in crest_ddi_file: line["LENGTH"] = str(int(line["END"]) - int(line["POS"]) + 1)