示例#1
0
def main():
    #parse command line arguments
    parser = argparse.ArgumentParser(description='Performs fetal CNV analysis from maternal plasma and phased parental data.')
    parser.add_argument('input', type=str, nargs=1, help='path to input file with allele counts in plasma and parental haplotypes')
    parser.add_argument('target', type=str, nargs=1, help='path to file with background truth - "target"')
    parser.add_argument('plasma', type=str, nargs=1, help='path to file with plasma sequencing DOC for all chromosomal positions')
    parser.add_argument('ref', type=str, nargs=1, help='path to file with reference plasma sequencing DOC for all chromosomal positions')
    parser.add_argument('seq', type=str, nargs=1, help='path to ref. genomic sequence in fasta format')
    parser.add_argument('param', type=str, nargs=1, help='path to file with method parameters')
    parser.add_argument('--ff', type=float, help='fetal mixture ratio', default=-1.)
    parser.add_argument('--useCvrg', help='use coverage flag', action="store_true")
    parser.add_argument('--trainGrad', type=str, help='train by maxll gradient and output new params to given file', default="")
    parser.add_argument('--trainMargin', type=str, help='train by max margin and output new params to given file', default="")
    parser.add_argument('--getObsCounts', help='get observed allele counts', action="store_true")
    args = parser.parse_args()
    
    in_file_name = args.input[0]
    target_file_name = args.target[0]
    plasma_doc_file = open(args.plasma[0], "r")
    ref_doc_file = open(args.ref[0], "r")
    seq_file = open(args.seq[0], "r")
    param_file = open(args.param[0], "r")
    if args.ff > 0: mix = args.ff
    
    runGradTraining = False
    if args.trainGrad != "":
         runGradTraining = True
         res_param_file_name = args.trainGrad
    runMarginTraining = False
    if args.trainMargin != "":
         runMarginTraining = True
         res_param_file_name = args.trainMargin
    
    #print input info
    print "------------------------------------------"
    print "Running fCNV, input parameters:"
    print "input:", in_file_name
    print "target:", target_file_name
    print "plasma:", plasma_doc_file
    print "refDOC:", ref_doc_file
    print "seq:", seq_file
    print "param:", param_file
    print "--ff:", args.ff
    print "--useCvrg:", args.useCvrg
    print "--trainGrad:", args.trainGrad
    print "------------------------------------------"
    os.system("hostname")
    
    #read the pre-processed input
    snp_positions, samples, M, P, MSC, PSC = readInput(in_file_name)
    
    #fetch the method parameters from file
    crfParams = readParams(param_file)
    print "============ CRF PARAMETERS =============="
    for p in sorted(crfParams):
        print p, "=", crfParams[p]
    print "=========================================="
    
    #get genomic positions on the last lines of the pileup files to estimate the length of the chromosome
    if args.useCvrg:
        with open(args.plasma[0], 'rb') as fh:
            fh.seek(-256, 2)
            last_pos_plasma = int(fh.readlines()[-1].decode().split(' ')[0])
            fh.close()
#        with open(args.ref[0], 'rb') as fh:
#            fh.seek(-256, 2)
#            last_pos_ref = int(fh.readlines()[-1].decode().split(' ')[0])
#            fh.close()
        chr_length = last_pos_plasma + 4742
        
        gc_sum = [0] * chr_length
        prefix_sum_plasma = [0] * chr_length
        prefix_count_plasma = [0] * chr_length
        prefix_sum_ref = [0] * chr_length
        prefix_count_ref = [0] * chr_length
    
        #get GC content prefix sums from the reference
        gen_pos = 0
        keep_reading = True
        while keep_reading:
            line = seq_file.readline().strip().upper()
            if len(line) == 0: break
            if line[0] == '>': continue
            for i in range(len(line)):
                gc_sum[gen_pos] = gc_sum[max(gen_pos - 1, 0)]
                if line[i] in 'GC': gc_sum[gen_pos] += 1
                gen_pos += 1
                if gen_pos >= chr_length:
                    keep_reading = False
                    break
        seq_file.close()
        
        last = 0
        while True: 
            line = plasma_doc_file.readline()
            if not line: break
            row = map(int, line.split(' '))
            if row[0] >= chr_length: break
            prefix_sum_plasma[row[0]] = prefix_sum_plasma[last] + row[1]
            prefix_count_plasma[row[0]] = prefix_count_plasma[last] + 1
            last = row[0]
        plasma_doc_file.close()
        
        last = 0
        while True: 
            line = ref_doc_file.readline()
            if not line: break
            row = map(int, line.split(' '))
            if row[0] >= chr_length: break
            prefix_sum_ref[row[0]] = prefix_sum_ref[last] + row[1]
            prefix_count_ref[row[0]] = prefix_count_ref[last] + 1
            last = row[0]
        ref_doc_file.close()
    #ENDIF 
    
    #snp_positions = []
    ground_truth = []
    target_file = open(target_file_name, 'r')
    for line in target_file.readlines():
        line = line.rstrip("\n").split("\t")
        #snp_positions.append(int(line[0]))
        ground_truth.append(int(line[-1]))
    target_file.close()
    
    #fcnv = fcnvCRF.FCNV(crfParams, None, None, False)
    #mix, mix_median, ct = fcnv.estimateMixture(samples, M, P)
    #del fcnv
    #print "Est. Mixture: ", mix, mix_median, '(', ct ,')',
    mix = 0.13 #proportion of fetal genome in plasma
    if args.ff > 0: mix = args.ff
    print "used:", mix
    
    
    cnv_prior = None
    if args.useCvrg:
        cvrg = cvrgHMM.coverageFCNV(snp_positions, prefix_sum_plasma, prefix_count_plasma, prefix_sum_ref, prefix_count_ref, gc_sum)
        cvrg_posterior = cvrg.posteriorDecoding(mix)
        del prefix_sum_plasma, prefix_count_plasma, prefix_sum_ref, prefix_count_ref, gc_sum
        
        cnv_prior = [ [0., 0., 0.] for x in range(len(snp_positions)) ]
        for pos in range(len(cvrg_posterior)):
            for cp_num_posterior in cvrg_posterior[pos]:
                cnv_prior[pos][cp_num_posterior[1]] = cp_num_posterior[0]
        del cvrg, cvrg_posterior
    
    fcnv = fcnvCRF.FCNV(crfParams, snp_positions, cnv_prior, args.useCvrg)
    
    parameterStats = dict()
    #get get observed allele counts
    if args.getObsCounts:
        parameterStats = fcnv.computeCountsTable(ground_truth, samples, M, P, MSC, PSC, mix, parameterStats)
        varFileName = target_file_name.split('/')[-1].split('.')[0].replace(':', '-')+'.observedCounts.txt'
        varFile = open(varFileName, 'w')
        print >>varFile, len(parameterStats)
        for expP in parameterStats.keys():
            print >>varFile, expP
            print >>varFile, " ".join(map( lambda x: '/'.join(map(str, x)), parameterStats[expP]) )
        varFile.close()
        return 0
    
    #run gradient training
    if runGradTraining:
        #run the training iterations
        for iterNum in range(1):
            #print "iterNum: ", iterNum
            ll, params = fcnv.computeLLandGradient(ground_truth, samples, M, P, MSC, PSC, mix)
            print ll, params
            print "------------------------------------------------------------------"
        
        #save the trained parameters to the file
        res_param_file = open(res_param_file_name, "w")    
        for p in sorted(params):
            if isinstance(params[p], list):
                print >>res_param_file, p, "=", " ".join(map(str, params[p]))
            else:
                print >>res_param_file, p, "=", params[p]
        res_param_file.close()
        
        return 0
    
    #run max margin training
    if runMarginTraining:
        #run the training iterations
        for iterNum in range(5): #change
            print "iterNum: ", iterNum
            compute_postloss = True #change + the const C
            pregts, preps, preloss, params, postgts, postps, postloss = fcnv.computeLLandMaxMarginUpdate(ground_truth, samples, M, P, MSC, PSC, mix, 0.0001, compute_postloss)
            print preloss, params
            print "{0} !>= {1}".format(pregts - preps, preloss)
            if compute_postloss: print "{0} >= {1}".format(postgts - postps, postloss)
            print "------------------------------------------------------------------\n\n\n"
        
        #save the trained parameters to the file
        res_param_file = open(res_param_file_name, "w")    
        for p in sorted(params):
            if isinstance(params[p], list):
                print >>res_param_file, p, "=", " ".join(map(str, params[p]))
            else:
                print >>res_param_file, p, "=", params[p]
        res_param_file.close()
        
        return 0
    
    #res_file = open(out_file_name, 'w')
    file_name_prefix = target_file_name.split('/')[-1].split('.')[0].replace(':', '-')
    print "------------------ w/o TRAINING -------------------"
    test(fcnv, snp_positions, samples, M, P, MSC, PSC, mix, ground_truth, file_name_prefix)
示例#2
0
def main():
    #parse command line arguments
    parser = argparse.ArgumentParser(description='Performs fetal CNV analysis from maternal plasma and phased parental data.')
    parser.add_argument('target', type=str, nargs=1, help='path to file with background truth - "target"')
    parser.add_argument('plasma', type=str, nargs=1, help='path to file with plasma sequencing DOC for all chromosomal positions')
    parser.add_argument('ref', type=str, nargs=1, help='path to file with reference plasma sequencing DOC for all chromosomal positions')
    parser.add_argument('seq', type=str, nargs=1, help='path to ref. genomic sequence in fasta format')
    args = parser.parse_args()
    
    target_file_name = args.target[0]
    plasma_doc_file = open(args.plasma[0], "r")
    ref_doc_file = open(args.ref[0], "r")
    seq_file = open(args.seq[0], "r")
    
    #get genomic positions on the last lines of the pileup files to estimate the length of the chromosome
    with open(args.plasma[0], 'rb') as fh:
        fh.seek(-256, 2)
        last_pos_plasma = int(fh.readlines()[-1].decode().split(' ')[0])
        fh.close()
    with open(args.ref[0], 'rb') as fh:
        fh.seek(-256, 2)
        last_pos_ref = int(fh.readlines()[-1].decode().split(' ')[0])
        fh.close()    
    
    chr_length = max(last_pos_plasma, last_pos_ref) + 4742
    
    gc_sum = [0] * chr_length
    prefix_sum_plasma = [0] * chr_length
    prefix_count_plasma = [0] * chr_length
    prefix_sum_ref = [0] * chr_length
    prefix_count_ref = [0] * chr_length
    
    #get GC content prefix sums from the reference
    gen_pos = 0
    keep_reading = True
    while keep_reading:
        line = seq_file.readline().strip().upper()
        if len(line) == 0: break
        if line[0] == '>': continue
        for i in range(len(line)):
            gc_sum[gen_pos] = gc_sum[max(gen_pos - 1, 0)]
            if line[i] in 'GC': gc_sum[gen_pos] += 1
            gen_pos += 1
            if gen_pos >= chr_length:
                keep_reading = False
                break
    seq_file.close()
    
    last = 0
    for line in plasma_doc_file:
        row = map(int, line.split(' '))
        prefix_sum_plasma[row[0]] = prefix_sum_plasma[last] + row[1]
        prefix_count_plasma[row[0]] = prefix_count_plasma[last] + 1
        last = row[0]
    plasma_doc_file.close()
    
    last = 0
    for line in ref_doc_file:
        row = map(int, line.split(' '))
        prefix_sum_ref[row[0]] = prefix_sum_ref[last] + row[1]
        prefix_count_ref[row[0]] = prefix_count_ref[last] + 1
        last = row[0]
    ref_doc_file.close()
    
    snp_positions = []
    ground_truth = []
    target_file = open(target_file_name, 'r')
    for line in target_file.readlines():
        line = line.rstrip("\n").split("\t")
        snp_positions.append(int(line[0]))
        ground_truth.append(int(line[-1]))
    target_file.close()
    
    fcnv = cvrgHMM.coverageFCNV(snp_positions, prefix_sum_plasma, prefix_count_plasma, prefix_sum_ref, prefix_count_ref, gc_sum)
    
    mix = 0.13 #proportion of fetal genome in plasma
    #mix = fcnv.estimateMixture(samples, M, P)
    
    print "Est. Mixture: ", mix
    
    #res_file = open(out_file_name, 'w')
    file_name_prefix = target_file_name.split('/')[-1].split('.')[0].replace(':', '-')
    print "------------------ w/o TRAINING -------------------"
    test(fcnv, snp_positions, mix, ground_truth, file_name_prefix)
示例#3
0
def main():
    #parse command line arguments
    parser = argparse.ArgumentParser(
        description=
        'Performs fetal CNV analysis from maternal plasma and phased parental data.'
    )
    parser.add_argument(
        'input',
        type=str,
        nargs=1,
        help=
        'path to input file with allele counts in plasma and parental haplotypes'
    )
    parser.add_argument('target',
                        type=str,
                        nargs=1,
                        help='path to file with background truth - "target"')
    parser.add_argument(
        'plasma',
        type=str,
        nargs=1,
        help=
        'path to file with plasma sequencing DOC for all chromosomal positions'
    )
    parser.add_argument(
        'ref',
        type=str,
        nargs=1,
        help=
        'path to file with reference plasma sequencing DOC for all chromosomal positions'
    )
    parser.add_argument('seq',
                        type=str,
                        nargs=1,
                        help='path to ref. genomic sequence in fasta format')
    parser.add_argument('--ff',
                        type=float,
                        help='fetal mixture ratio',
                        default=-1.)
    parser.add_argument('--useCvrg',
                        help='use coverage flag',
                        action="store_true")
    args = parser.parse_args()

    in_file_name = args.input[0]
    target_file_name = args.target[0]
    plasma_doc_file = open(args.plasma[0], "r")
    ref_doc_file = open(args.ref[0], "r")
    seq_file = open(args.seq[0], "r")
    if args.ff > 0: mix = args.ff

    #print input info
    print "------------------------------------------"
    print "Running fCNV, input parameters:"
    print "input:", in_file_name
    print "target:", target_file_name
    print "plasma:", plasma_doc_file
    print "refDOC:", ref_doc_file
    print "seq:", seq_file
    print "--ff:", args.ff
    print "--useCvrg:", args.useCvrg
    print "------------------------------------------"
    os.system("hostname")

    #read the pre-processed input
    snp_positions, samples, M, P, MSC, PSC = readInput(in_file_name)

    #get genomic positions on the last lines of the pileup files to estimate the length of the chromosome
    with open(args.plasma[0], 'rb') as fh:
        fh.seek(-256, 2)
        last_pos_plasma = int(fh.readlines()[-1].decode().split(' ')[0])
        fh.close()
#    with open(args.ref[0], 'rb') as fh:
#        fh.seek(-256, 2)
#        last_pos_ref = int(fh.readlines()[-1].decode().split(' ')[0])
#        fh.close()
    chr_length = last_pos_plasma + 4742

    gc_sum = [0] * chr_length
    prefix_sum_plasma = [0] * chr_length
    prefix_count_plasma = [0] * chr_length
    prefix_sum_ref = [0] * chr_length
    prefix_count_ref = [0] * chr_length

    #get GC content prefix sums from the reference
    gen_pos = 0
    keep_reading = True
    while keep_reading:
        line = seq_file.readline().strip().upper()
        if len(line) == 0: break
        if line[0] == '>': continue
        for i in range(len(line)):
            gc_sum[gen_pos] = gc_sum[max(gen_pos - 1, 0)]
            if line[i] in 'GC': gc_sum[gen_pos] += 1
            gen_pos += 1
            if gen_pos >= chr_length:
                keep_reading = False
                break
    seq_file.close()

    last = 0
    while True:
        line = plasma_doc_file.readline()
        if not line: break
        row = map(int, line.split(' '))
        if row[0] >= chr_length: break
        prefix_sum_plasma[row[0]] = prefix_sum_plasma[last] + row[1]
        prefix_count_plasma[row[0]] = prefix_count_plasma[last] + 1
        last = row[0]
    plasma_doc_file.close()

    last = 0
    while True:
        line = ref_doc_file.readline()
        if not line: break
        row = map(int, line.split(' '))
        if row[0] >= chr_length: break
        prefix_sum_ref[row[0]] = prefix_sum_ref[last] + row[1]
        prefix_count_ref[row[0]] = prefix_count_ref[last] + 1
        last = row[0]
    ref_doc_file.close()

    #snp_positions = []
    ground_truth = []
    target_file = open(target_file_name, 'r')
    while True:
        line = target_file.readline()
        if not line: break
        line = line.rstrip("\n").split("\t")
        #snp_positions.append(int(line[0]))
        ground_truth.append(int(line[-1]))
    target_file.close()

    fcnv = fcnvHMM.FCNV(None, None, False)
    mix, mix_median, ct = fcnv.estimateMixture(samples, M, P)
    print "Est. Mixture: ", mix, mix_median, '(', ct, ')',
    #mix = 0.13 #proportion of fetal genome in plasma
    if args.ff > 0: mix = args.ff
    print "used:", mix

    cnv_prior = None
    if args.useCvrg:
        cvrg = cvrgHMM.coverageFCNV(snp_positions, prefix_sum_plasma,
                                    prefix_count_plasma, prefix_sum_ref,
                                    prefix_count_ref, gc_sum)
        cvrg_posterior = cvrg.posteriorDecoding(mix)
        #       byLL = cvrg.likelihoodDecoding(mix)
        del prefix_sum_plasma, prefix_count_plasma, prefix_sum_ref, prefix_count_ref, gc_sum

        cnv_prior = [[0., 0., 0.] for x in range(len(snp_positions))]
        for pos in range(len(cvrg_posterior)):
            for cp_num_posterior in cvrg_posterior[pos]:
                cnv_prior[pos][cp_num_posterior[1]] = cp_num_posterior[0]
        del cvrg, cvrg_posterior


#        ll_state = []
#        ll_value = []
#        for x in byLL[pos]:
#            ll_state.append(x[1])
#            ll_value.append(x[0])
#
#        ll_str = ''
#        for j in range(len(ll_state)):
#            cvrg.logNormalize(ll_value)
#            ll_str += "%.8f"%math.exp(ll_value[j])+' '+str(ll_state[j])+' | '
#
#        posterior_str = ''
#        for x in cvrg_posterior[pos]:
#            posterior_str += "%.8f"%math.exp(x[0])+' '+str(x[1])+' | '
#        posterior_str+='\t'

#        print snp_positions[pos], 'PP:', posterior_str, 'LL:', ll_str
#    del cvrg, cvrg_posterior, byLL

    del fcnv
    fcnv = fcnvHMM.FCNV(snp_positions, cnv_prior, args.useCvrg)

    ground_truth = []
    target_file = open(target_file_name, 'r')
    for line in target_file.readlines():
        line = line.rstrip("\n").split("\t")
        ground_truth.append(int(line[-1]))
    target_file.close()

    #res_file = open(out_file_name, 'w')
    file_name_prefix = target_file_name.split('/')[-1].split('.')[0].replace(
        ':', '-')
    print "------------------ w/o TRAINING -------------------"
    test(fcnv, snp_positions, samples, M, P, MSC, PSC, mix, ground_truth,
         file_name_prefix)
    #test(fcnv, snp_positions[:1000], samples[:1000], M[:1000], P[:1000], MSC[:1000], PSC[:1000], mix, ground_truth[:1000], file_name_prefix)
    '''
示例#4
0
def main():
    #parse command line arguments
    parser = argparse.ArgumentParser(description='Performs fetal CNV analysis from maternal plasma and phased parental data.')
    parser.add_argument('input', type=str, nargs=1, help='path to input file with allele counts in plasma and parental haplotypes')
    parser.add_argument('target', type=str, nargs=1, help='path to file with background truth - "target"')
    parser.add_argument('plasma', type=str, nargs=1, help='path to file with plasma sequencing DOC for all chromosomal positions')
    parser.add_argument('ref', type=str, nargs=1, help='path to file with reference plasma sequencing DOC for all chromosomal positions')
    parser.add_argument('seq', type=str, nargs=1, help='path to ref. genomic sequence in fasta format')
    parser.add_argument('--ff', type=float, help='fetal mixture ratio', default=-1.)
    parser.add_argument('--useCvrg', help='use coverage flag', action="store_true")
    args = parser.parse_args()
    
    in_file_name = args.input[0]
    target_file_name = args.target[0]
    plasma_doc_file = open(args.plasma[0], "r")
    ref_doc_file = open(args.ref[0], "r")
    seq_file = open(args.seq[0], "r")
    if args.ff > 0: mix = args.ff
    
    #print input info
    print "------------------------------------------"
    print "Running fCNV, input parameters:"
    print "input:", in_file_name
    print "target:", target_file_name
    print "plasma:", plasma_doc_file
    print "refDOC:", ref_doc_file
    print "seq:", seq_file
    print "--ff:", args.ff
    print "--useCvrg:", args.useCvrg
    print "------------------------------------------"
    os.system("hostname")
    
    #read the pre-processed input
    snp_positions, samples, M, P, MSC, PSC = readInput(in_file_name)
    
    #get genomic positions on the last lines of the pileup files to estimate the length of the chromosome
    with open(args.plasma[0], 'rb') as fh:
        fh.seek(-256, 2)
        last_pos_plasma = int(fh.readlines()[-1].decode().split(' ')[0])
        fh.close()
#    with open(args.ref[0], 'rb') as fh:
#        fh.seek(-256, 2)
#        last_pos_ref = int(fh.readlines()[-1].decode().split(' ')[0])
#        fh.close()
    chr_length = last_pos_plasma + 4742
    
    gc_sum = [0] * chr_length
    prefix_sum_plasma = [0] * chr_length
    prefix_count_plasma = [0] * chr_length
    prefix_sum_ref = [0] * chr_length
    prefix_count_ref = [0] * chr_length
    
    #get GC content prefix sums from the reference
    gen_pos = 0
    keep_reading = True
    while keep_reading:
        line = seq_file.readline().strip().upper()
        if len(line) == 0: break
        if line[0] == '>': continue
        for i in range(len(line)):
            gc_sum[gen_pos] = gc_sum[max(gen_pos - 1, 0)]
            if line[i] in 'GC': gc_sum[gen_pos] += 1
            gen_pos += 1
            if gen_pos >= chr_length:
                keep_reading = False
                break
    seq_file.close()
    
    last = 0
    while True: 
        line = plasma_doc_file.readline()
        if not line: break
        row = map(int, line.split(' '))
        if row[0] >= chr_length: break
        prefix_sum_plasma[row[0]] = prefix_sum_plasma[last] + row[1]
        prefix_count_plasma[row[0]] = prefix_count_plasma[last] + 1
        last = row[0]
    plasma_doc_file.close()
    
    last = 0
    while True: 
        line = ref_doc_file.readline()
        if not line: break
        row = map(int, line.split(' '))
        if row[0] >= chr_length: break
        prefix_sum_ref[row[0]] = prefix_sum_ref[last] + row[1]
        prefix_count_ref[row[0]] = prefix_count_ref[last] + 1
        last = row[0]
    ref_doc_file.close()
    
    #snp_positions = []
    ground_truth = []
    target_file = open(target_file_name, 'r')
    while True: 
        line = target_file.readline()
        if not line: break
        line = line.rstrip("\n").split("\t")
        #snp_positions.append(int(line[0]))
        ground_truth.append(int(line[-1]))
    target_file.close()
    
    fcnv = fcnvHMM.FCNV(None, None, False)
    mix, mix_median, ct = fcnv.estimateMixture(samples, M, P)
    print "Est. Mixture: ", mix, mix_median, '(', ct ,')',
    #mix = 0.13 #proportion of fetal genome in plasma
    if args.ff > 0: mix = args.ff
    print "used:", mix
    
    cnv_prior = None
    if args.useCvrg:
        cvrg = cvrgHMM.coverageFCNV(snp_positions, prefix_sum_plasma, prefix_count_plasma, prefix_sum_ref, prefix_count_ref, gc_sum)
        cvrg_posterior = cvrg.posteriorDecoding(mix)
#       byLL = cvrg.likelihoodDecoding(mix)
        del prefix_sum_plasma, prefix_count_plasma, prefix_sum_ref, prefix_count_ref, gc_sum
        
        cnv_prior = [ [0., 0., 0.] for x in range(len(snp_positions)) ]
        for pos in range(len(cvrg_posterior)):
            for cp_num_posterior in cvrg_posterior[pos]:
                cnv_prior[pos][cp_num_posterior[1]] = cp_num_posterior[0]
        del cvrg, cvrg_posterior
    
#        ll_state = []
#        ll_value = []
#        for x in byLL[pos]:   
#            ll_state.append(x[1])
#            ll_value.append(x[0])
#        
#        ll_str = ''
#        for j in range(len(ll_state)):
#            cvrg.logNormalize(ll_value)
#            ll_str += "%.8f"%math.exp(ll_value[j])+' '+str(ll_state[j])+' | '
#            
#        posterior_str = ''
#        for x in cvrg_posterior[pos]:
#            posterior_str += "%.8f"%math.exp(x[0])+' '+str(x[1])+' | '
#        posterior_str+='\t'

#        print snp_positions[pos], 'PP:', posterior_str, 'LL:', ll_str
#    del cvrg, cvrg_posterior, byLL
    
    del fcnv
    fcnv = fcnvHMM.FCNV(snp_positions, cnv_prior, args.useCvrg)
    
    ground_truth = []
    target_file = open(target_file_name, 'r')
    for line in target_file.readlines():
        line = line.rstrip("\n").split("\t")
        ground_truth.append(int(line[-1]))
    target_file.close()
    
    #res_file = open(out_file_name, 'w')
    file_name_prefix = target_file_name.split('/')[-1].split('.')[0].replace(':', '-')
    print "------------------ w/o TRAINING -------------------"
    test(fcnv, snp_positions, samples, M, P, MSC, PSC, mix, ground_truth, file_name_prefix)
    #test(fcnv, snp_positions[:1000], samples[:1000], M[:1000], P[:1000], MSC[:1000], PSC[:1000], mix, ground_truth[:1000], file_name_prefix)
    
    '''