示例#1
0
        
        vcf_out.write( line_i + '\n' )
        line_i = vcf_in.readline().rstrip()
    
    vcf_out.write( line_i + '\n' )

    # This line will be #CHROM:
    header = line_i.split('\t')
    sample_index = header.index(sample) - 9
    
    # This will be the first variant line:
    line_i = vcf_in.readline().rstrip()
    
    while line_i:
        
        vcf_i = genome.Vcf_line( line_i )
                
        if vcf_i.filters == 'PASS':
        
            refMQ = float( vcf_i.get_sample_value('refMQ', sample_index) )
            altMQ = float( vcf_i.get_sample_value('altMQ', sample_index) )
            refBQ = float( vcf_i.get_sample_value('refBQ', sample_index) )
            altBQ = float( vcf_i.get_sample_value('altBQ', sample_index) )
            refNM = float( vcf_i.get_sample_value('refNM', sample_index) )
            altNM = float( vcf_i.get_sample_value('altNM', sample_index) )
            fetSB = float( vcf_i.get_sample_value('fetSB', sample_index) )
            fetCD = float( vcf_i.get_sample_value('fetCD', sample_index) )
            zMQ   = float( vcf_i.get_sample_value('zMQ',   sample_index) )
            zBQ   = float( vcf_i.get_sample_value('zBQ',   sample_index) )
            MQ0   = int(   vcf_i.get_sample_value('MQ0',   sample_index) )
            VAF   = float( vcf_i.get_sample_value('VAF',   sample_index) )
示例#2
0
### It will be a single file, otherwise it goes chr1, chr2, ...., or in the list of files input as ordered:
for chr_i_vcf in right_files:

    with genome.open_textfile(chr_i_vcf) as vcf:

        line_i = vcf.readline().rstrip()

        # Skip headers from now on:
        while line_i.startswith('#'):
            line_i = vcf.readline().rstrip()

        # Doing the work here:
        while line_i:

            vcf_i = genome.Vcf_line(line_i)

            num_samples = len(vcf_i.samples)
            if num_samples == 1:
                paired = False

            elif num_samples == 2:
                paired = True

            elif num_samples > 2:
                sys.stderr.write(
                    'We found more than 2 sammples in this VCF file. It may be messed up, but I\'ll just assume the first 2 samples mean anything at all'
                )
                paired = True

            elif num_samples == 0:
示例#3
0
def catch_up(line_1, line_2, file_1, file_2, output_vcf, id_1, id_2, id_12):

    id_1, id_2, id_12 = id_1, id_2, id_12

    vcf_1 = genome.Vcf_line(line_1)
    vcf_2 = genome.Vcf_line(line_2)

    coord_1 = [vcf_1.chromosome, vcf_1.position]
    coord_2 = [vcf_2.chromosome, vcf_2.position]

    print(coord_1, coord_2)

    is_behind = whoisbehind(coord_1, coord_2)

    # As long as the coordinates are not the same, and both files are not finished:
    while is_behind != 10:

        # If 1st VCF is behind:
        if is_behind == 0:

            item_1 = line_1.rstrip('\n').split('\t')

            # Write, unless...
            if item_1[idx_filter] != 'PrintEmALL':

                #item_1[idx_id] = id_1
                id_item = item_1[idx_id].split(';')
                id_item.append(id_1)
                item_1[idx_id] = ';'.join(id_item)
                item_1[idx_id] = re.sub(r'^\.;', '', item_1[idx_id])

                line_1 = '\t'.join(item_1)

                output_vcf.write(line_1 + '\n')

            line_1 = file_1.readline()
            vcf_1 = genome.Vcf_line(line_1)
            coord_1 = [vcf_1.chromosome, vcf_1.position]

        # If 2nd VCF is behind:
        elif is_behind == 1:

            item_2 = line_2.rstrip('\n').split('\t')

            # Write, unless...
            #if item_2[idx_filter] != 'PrintEmALL':

            #IF
            #item_2[idx_id] = id_2
            id_item = item_2[idx_id].split(';')
            id_item.append(id_2)
            item_2[idx_id] = ';'.join(id_item)
            item_2[idx_id] = re.sub(r'^\.;', '', item_2[idx_id])

            line_2 = '\t'.join(item_2)

            output_vcf.write(line_2 + '\n')
            ## FI

            line_2 = file_2.readline()
            vcf_2 = genome.Vcf_line(line_2)
            coord_2 = [vcf_2.chromosome, vcf_2.position]

        is_behind = whoisbehind(coord_1, coord_2)

    # Returns the value of the function:
    if coord_1[0] == coord_2[0] == '':
        result = 42
    else:

        item_1 = line_1.rstrip('\n').split('\t')
        item_2 = line_2.rstrip('\n').split('\t')

        #item_1[idx_id] = id_12
        id_item = item_1[idx_id].split(';')
        id_item.append(id_12)
        item_1[idx_id] = ';'.join(id_item)
        item_1[idx_id] = re.sub(r'^.;', '', item_1[idx_id])

        line_1 = '\t'.join(item_1)

        output_vcf.write(line_1 + '\n')

        result = (
            line_1,
            line_2,
        )

    return result
示例#4
0
        while strelka_line.startswith('#'):
            strelka_line = strelka.readline().rstrip()

    # Get through all the headers:
    while my_line.startswith('#') or my_line.startswith('track='):
        my_line = my_sites.readline().rstrip()

    # First line:
    outhandle.write(out_header.replace('{', '').replace('}', '') + '\n')

    while my_line:

        # If VCF, get all the variants with the same coordinate into a list:
        if is_vcf:

            my_vcf = genome.Vcf_line(my_line)
            my_coordinates = [(my_vcf.chromosome, my_vcf.position)]

            variants_at_my_coordinate = []

            alt_bases = my_vcf.altbase.split(',')
            for alt_i in alt_bases:
                vcf_i = copy(my_vcf)
                vcf_i.altbase = alt_i
                variants_at_my_coordinate.append(vcf_i)

            # As long as the "coordinate" stays the same, it will keep reading until it's different.
            while my_coordinates[0] == (my_vcf.chromosome, my_vcf.position):

                my_line = my_sites.readline().rstrip()
                my_vcf = genome.Vcf_line(my_line)
示例#5
0
        '##FORMAT=<ID=SCORE,Number=1,Type=Float,Description="SomaticSeq Probability (either fraction or Phred)">\n'
    )

    tumor_column = line_in.split('\t').index(tumor)
    tumor_idx = tumor_column - 9
    (normal_column, normal_idx) = (9, 0) if tumor_idx == 1 else (None, None)

    # This is the #CHROM line
    vcfout.write(line_in + '\n')

    line_in = vcfin.readline().rstrip('\n')

    # Move COMBO and NUM_TOOLS from INFO to Tumor Sample, and move QUAL to the Tumor Sample as well
    while line_in:

        vcf_line_in = genome.Vcf_line(line_in)

        # New INFO
        new_info = []
        for info_item in vcf_line_in.get_info_items():
            if not (info_item.startswith('NUM_TOOLS=')
                    or info_item.startswith(caller_string)):
                new_info.append(info_item)

        if new_info == []:
            new_info_line = '.'
        else:
            new_info_line = ';'.join(new_info)

        # FORMAT:
        if somaticseq_trained: