# This function emulates the exact formating that perl uses for
            # float values. It is of no functional importance for this script.
            def perl_like_float_format(value):
                int_part = str(int(value))
                format_string = "{0:." + str(15 - len(int_part)) + "f}"
                return format_string.format(value).rstrip('0').rstrip('.')

            outfile.write(chromo + '\t' + str(position) + '\t' +
                          perl_like_float_format(normal) + '\t' +
                          perl_like_float_format(tumor) + '\n')

    for chromo in [str(n) for n in range(1, 22 + 1)] + ["X", "Y"]:
        print chromo
        input_filename = options["inputpath"] + chromo + options["inputsuffix"]

        infile = Tabfile.Input(gzip.open(input_filename, 'rb'))

        accum_lines = [infile.readline()]

        for line in infile:

            if (int(line["normal"]) < options["coverage"]
                    or float(line["map"]) < options["mappability"]):
                continue

            if (line["chr"] == accum_lines[0]["chr"]
                    and int(line["pos"]) // 10000
                    == int(accum_lines[0]["pos"]) // 10000):
                accum_lines.append(line)
            else:
                process_accumulated_lines(accum_lines)
示例#2
0
parser.add_argument('--file',
                    '-f',
                    type=file,
                    help="segment file with copy number information")
parser.add_argument('--out',
                    '-o',
                    default=sys.stdout,
                    type=str,
                    help='outputfile')
parser.add_argument('--length', '-l', default=900, type=str, help='outputfile')

args = parser.parse_args()
out = args.out

try:
    infile = Tabfile.Input(args.file)
except IOError as (errno, strerr):
    sys.stderr.write("IOError %i: %s\n" % (errno, strerr))
    sys.exit(2)

if out != sys.stdout:
    try:
        out = open(args.out, 'w')
    except IOError as (errno, strerr):
        sys.exit("IOError %i: %s\n" % (errno, strerr))


def merge_lines_CN(prior_line, newline, next_line):
    start = ""
    end = ""
    #check whether segment could be merged
示例#3
0
# This script replaces segments_to_data.pl and segments_to_data_2.pl.
#
# usage: segments_to_data.py --pscbs [FILE] --input [FILE] --output [FILE]

from python_modules import Tabfile
from python_modules import Options
import subprocess
import gzip
import sys

options = Options.parse({"pscbs": str, "input": str, "output": str})

if options:

    pscbsfile = Tabfile.Input(gzip.open(options["pscbs"]))

    #SNPs could be gzipped or not
    try:
        outfile = subprocess.Popen("bgzip >%s" % options["output"],
                                   shell=True,
                                   stdin=subprocess.PIPE)
    except IOError as (errno, strerror):
        syst.stderr.write("I/O error (%i): %s\n" % (errno, strerror))

    pscbs_line = pscbsfile.readline()
    while pscbs_line:

        current_chromo = pscbs_line["chromosome"]
        print current_chromo
    )
    sys.exit(2)

if not args.sv_out or not args.output:
    sys.stderr.write(
        "Please specify all output files. For more information use -h\n")
    sys.exit(2)

if not args.DDI_length:
    sys.stderr.write(
        "Please specify all minimum duplication deletion and inversion (DDI) lengths. For more information use -h\n"
    )
    sys.exit(2)

try:
    sv_file = Tabfile.Input(open(args.variants, "r"))
    sv_out = open(args.sv_out, "w")
    knownseg_file = Tabfile.Input(open(args.known_segments, "r"))
    file_out = open(args.output, "w")
    files = [sv_file]
except IOError as (errno, strerr):
    sys.stderr.write("IOError %i: %s\n" % (errno, strerr))
    sys.exit(errno)

breakpoints = []
chromosomes = [str(a) for a in range(1, 24 + 1)]

for f in files:
    for line in f:

        if line['svtype'] == 'INV' or line['svtype'] == 'DUP' or line[
示例#5
0
if not args.snps:
    args.snps = ""

out = args.out

if args.out != sys.stdout:
    try:
        out = subprocess.Popen("bgzip >%s" % args.out,
                               shell=True,
                               stdin=subprocess.PIPE)
    except IOError as (errno, strerr):
        sys.stderr.write("IOError %i: %s\n" % (errno, strerr))
        sys.exit(errno)

try:
    snpFile = Tabfile.Input(gzip.open(args.snps))
except IOError as (errno, strerr):
    try:
        snpFile = Tabfile.Input(sys.stdin)
    except IOError:
        sys.stderr.write("IOError %i: %s\n" % (errno, strerr))
        sys.exit(errno)

curr_chrom = ''

for line in snpFile:
    chrom = line['chr']
    if chrom.startswith('chr'):
        chrom = chrom.replace('chr', '')
        line['chr'] = line['chr'].replace('chr', '')
    pos = int(line['startPos'])
if options:

    outfile = subprocess.Popen("bgzip >%s" % options["output"],
                               shell=True,
                               stdin=subprocess.PIPE)

    outfile.stdin.write(
        "chr\tstartPos\tAnormal\tBnormal\tAtumor\tBtumor\thaplotype\n"
    )  #header

    for chromo in [str(n) for n in range(1, 22 + 1)] + ["X", "Y"]:

        infile = gzip.open(
            options["inputpath"] + chromo + options["inputsuffix"], 'rb')

        for line in Tabfile.Input(infile):

            if (int(line["An"]) + int(line["Bn"]) >= options["coverage"]):

                line['haplotype'] = 0

                if line["chr"].startswith("chr"):
                    line["chr"] = line["chr"].replace("chr", "")

                line["chr"] = line["chr"].replace("X", "23")
                line["chr"] = line["chr"].replace("Y", "24")

                outfile.stdin.write(line["chr"] + '\t' + line["pos"] + '\t' +
                                    line["An"] + '\t' + line["Bn"] + '\t' +
                                    line["At"] + '\t' + line["Bt"] + '\t' +
                                    str(line['haplotype']) + '\n')
# This script merges all segmentation approaches into a final segmentation.

from python_modules import Tabfile
from python_modules import Options

options = Options.parse({
    "crest_deldupinv": str,
    "crest_tx": str,
    "known_segments": str,
    "output": str,
    "crest_out": str,
    "DDI_length": int
})
if options:

    crest_ddi_file = Tabfile.Input(open(options["crest_deldupinv"], "r"))
    crest_tx_file = Tabfile.Input(open(options["crest_tx"], "r"))
    crest_out = open(options["crest_out"], "w")
    file_out = open(options["output"], "w")

    breakpoints = []

    for line in crest_ddi_file:

        line["LENGTH"] = str(int(line["END"]) - int(line["POS"]) + 1)

        if (line["SOMATIC_GERMLINE_CLASSIFICATION"] == "somatic"
                and int(line["LENGTH"]) >= options["DDI_length"]
                and "CHROM" in line):

            line["CHROM"] = line["CHROM"].replace("chr", "").replace(
示例#8
0
            coverage = sum([float(line["coverage"])
                            for line in lines]) * 10 / len(lines)

            # This function emulates the exact formating that perl uses for
            # float values. It is of no functional importance for this script.
            def perl_like_float_format(value):
                int_part = str(int(value))
                format_string = "{0:." + str(15 - len(int_part)) + "f}"
                return format_string.format(value).rstrip('0').rstrip('.')

            outfile.write(chromo + '\t' + str(position) + '\t' +
                          perl_like_float_format(coverage) + '\t' + '\n')

#	input_filename = options["inputpath"] + chromo + options["inputsuffix"]

    infile = Tabfile.Input(gzip.open(options['inputfile'], 'rb'))

    accum_lines = [infile.readline()]

    for line in infile:

        if (int(line["coverage"]) < options["coverage"]
                or float(line["map"]) < options["mappability"]):
            continue

        if (line["chr"] == accum_lines[0]["chr"] and int(line["pos"]) // 10000
                == int(accum_lines[0]["pos"]) // 10000):
            accum_lines.append(line)
        else:
            process_accumulated_lines(accum_lines)
            accum_lines = [line]
				distances.append(float(fields[3]) )
				ploidies.append(float(fields[1]) )
				entries.append(fields)
			contin=1
	except:
			sys.stderr.write( "FILE for %s does not exist\n"% pid)
			sys.exit(2)

	m = min( [ abs( j-2.0 ) for j in ploidies ] )
	index = [i for i,j in enumerate(ploidies) if abs(j-2.0)==m ]

	solutions={1: "%s_%s"% (entries[index[0]][1], entries[index[0]][2] ) }
	count=2
	for i,j  in enumerate(entries):
		if i==index[0]: continue
		solutions[count]="%s_%s"% ( entries[i][1], entries[i][2] )
		count+=1
	
	jsonMain={}
	for key in solutions.keys():	
		try:
			infile="%s/%s_cnv_parameter_%s.txt"% (path, pid, solutions[key])
			tabfile = Tabfile.Input( open(infile) )
		except IOError as (errno, strerr ):
			sys.exit("IOError %i:%s\n" % (errno, strerr))

		#convert simple tab seperated file wit header 
		jsonMain[key]=tabfile.readline()

	out.write( json.dumps(jsonMain, indent=2, separators=(",",":")) )