#!/usr/bin/python

# Copyright (c) 2017 The ACEseq workflow developers.
# Distributed under the MIT License (license terms are at https://www.github.com/eilslabs/ACEseqWorkflow/LICENSE.txt).

from python_modules import Options

options = Options.parse( { "hap_file" : str, "vcf_file" : str,
                           "outfile"  : str  } )
if options:

	hap_infile = open( options["hap_file"], "r" )
	vcf_infile = open( options["vcf_file"], "r" )
	outfile    = open( options["outfile" ], "w" )

	haplo_line = hap_infile.readline()
	
	if haplo_line:
		haplo_line = haplo_line.rstrip().split()
	
	for vcf_line in vcf_infile:
		
		if vcf_line[0] != "#":
			
			vcf_line = vcf_line.rstrip().split("\t")
			vcf_line[0] = vcf_line[0].replace('chr', '')
			vcf_line[0] = vcf_line[0].replace( 'X', '23' )

			if len(vcf_line) >= 9 and haplo_line:
				
				while haplo_line and int(haplo_line[2]) < int(vcf_line[1]):
# This script generates a file name ( inputpath + chromosome + inputsuffix ) for
# each chromosome.
# For example "~/Data/patient.chr" and ".snp" result in
# "~/Data/patient.chr1.snp", ... "~/Data/patient.chr22.snp",
# "~/Data/patient.chrX.snp", "~/Data/patient.chrY.snp".
# It than merges these files into one output file while filtering for coverage
# and combining the 1k windows into 10k windows.

import gzip
from python_modules import Tabfile
from python_modules import Options

options = Options.parse({
    "inputpath": str,
    "inputsuffix": str,
    "output": str,
    "coverage": int,
    "mappability": float,
    "NoOfWindows": int
})
if options:

    outfile = gzip.open(options["output"], "wb")

    def process_accumulated_lines(lines):

        if len(lines) >= options["NoOfWindows"]:

            chromo = lines[0]["chr"]

            if chromo.startswith("chr"):
                chromo = chromo.replace("chr", "")
Пример #3
0
#!/usr/bin/python

# Copyright (c) 2017 The ACEseq workflow developers.
# Distributed under the MIT License (license terms are at https://www.github.com/eilslabs/ACEseqWorkflow/LICENSE.txt).

# This script replaces segments_to_data.pl and segments_to_data_2.pl.
#
# usage: segments_to_data.py --pscbs [FILE] --input [FILE] --output [FILE]

from python_modules import Tabfile
from python_modules import Options
import subprocess
import gzip
import sys

options = Options.parse({"pscbs": str, "input": str, "output": str})

if options:

    pscbsfile = Tabfile.Input(gzip.open(options["pscbs"]))

    #SNPs could be gzipped or not
    try:
        outfile = subprocess.Popen("bgzip >%s" % options["output"],
                                   shell=True,
                                   stdin=subprocess.PIPE)
    except IOError as (errno, strerror):
        syst.stderr.write("I/O error (%i): %s\n" % (errno, strerror))

    pscbs_line = pscbsfile.readline()
    while pscbs_line:
# "~/Data/patient.chrX.snp", "~/Data/patient.chrY.snp".
# It than merges these files into one output file with some filtering for
# coverage and a randomization of the A/B alleles.
#
# The functionality is described in the Bachelor thesis of Isabell Bludau at
# page 17, lines 11-17.

import gzip
import subprocess

from python_modules import Tabfile
from python_modules import Options

options = Options.parse({
    "inputpath": str,
    "inputsuffix": str,
    "output": str,
    "coverage": int
})
if options:

    outfile = subprocess.Popen("bgzip >%s" % options["output"],
                               shell=True,
                               stdin=subprocess.PIPE)

    outfile.stdin.write(
        "chr\tstartPos\tAnormal\tBnormal\tAtumor\tBtumor\thaplotype\n"
    )  #header

    for chromo in [str(n) for n in range(1, 22 + 1)] + ["X", "Y"]:

        infile = gzip.open(
Пример #5
0
# usage: merge_and_filter_cnv.py --inputpath [PATH] --inputsuffix [SUFFIX] --output [FILE] --coverage [INT]
#
# This script generates a file name ( inputpath + chromosome + inputsuffix ) for
# each chromosome. 
# For example "~/Data/patient.chr" and ".snp" result in
# "~/Data/patient.chr1.snp", ... "~/Data/patient.chr22.snp",
# "~/Data/patient.chrX.snp", "~/Data/patient.chrY.snp".
# It than merges these files into one output file while filtering for coverage
# and combining the 1k windows into 10k windows.

import gzip
from python_modules import Tabfile
from python_modules import Options

options = Options.parse( { "inputfile"   : str,
                           "output"      : str,   "coverage"    : int,
                           "mappability" : float, "NoOfWindows" : int } )
if options:

	outfile = gzip.open( options["output"], "wb" )

	def  process_accumulated_lines( lines ):

		if len( lines ) >= options["NoOfWindows"]:

			chromo = lines[0]["chr"]
			
			if not chromo.startswith( "chr" ):
				chromo = "chr" + chromo
				
			chromo = chromo.replace( "chrX", "chr23" )
#!/usr/bin/python

# Copyright (c) 2017 The ACEseq workflow developers.
# Distributed under the MIT License (license terms are at https://www.github.com/eilslabs/ACEseqWorkflow/LICENSE.txt).

# This script merges all segmentation approaches into a final segmentation.

from python_modules import Tabfile
from python_modules import Options

options = Options.parse({
    "crest_deldupinv": str,
    "crest_tx": str,
    "known_segments": str,
    "output": str,
    "crest_out": str,
    "DDI_length": int
})
if options:

    crest_ddi_file = Tabfile.Input(open(options["crest_deldupinv"], "r"))
    crest_tx_file = Tabfile.Input(open(options["crest_tx"], "r"))
    crest_out = open(options["crest_out"], "w")
    file_out = open(options["output"], "w")

    breakpoints = []

    for line in crest_ddi_file:

        line["LENGTH"] = str(int(line["END"]) - int(line["POS"]) + 1)