Exemplo n.º 1
0
import sys
from vcfDict import vcfDict


if __name__ == "__main__":
    vcfLoc,bpFile,bpOut = sys.argv[1],sys.argv[2],sys.argv[3]
    outFile = open(bpOut,"w")
    vcf = vcfDict(vcfLoc)
    vcf.loadDict()

    with open(bpFile) as f:
        for line in f:
            if line[0] == "p":
                sline = line.split(",")
                windowID = sline[0]
                if vcf.getLine(windowID) != "":
                    #Site is present 
                    outFile.write(line)

            else:
                outFile.write(line)    


    outFile.close()


Exemplo n.º 2
0
def isDataLine(line):
    """
    Determines in line contains site data
    """
    if len(line) > 1:
        return line[0] != "#"
    return False


if __name__ == "__main__":
    """

    """
    anc_loc, desc_loc, merge_loc = sys.argv[1], sys.argv[2], sys.argv[3]
    desc = vcfDict(desc_loc)
    desc.loadDict()
    merge_file = open(merge_loc, "w")

    #Write header information to merge file
    with open(desc_loc) as desc_file:
        for desc_line in desc_file:
            if not isDataLine(desc_line):
                merge_file.write(desc_line)
            else:
                #data lines start
                break

    with open(anc_loc) as anc_file:
        for anc_line in anc_file:
            if isDataLine(anc_line):
def isDataLine(line):
    """
    Determines in line contains site data
    """
    if len(line) > 1:
        return line[0] != "#"
    return False


if __name__ == "__main__":
    """
    Given a .txt file of locations("CHROM:POS") and a vcf file, 
     creates a new vcf file of sites that match the given locations.
    """
    locations_dir, vcf_dir, out_loc = sys.argv[1], sys.argv[2], sys.argv[3]
    vcf_dict = vcfDict(vcf_dir)
    vcf_dict.loadDict()
    out_file = open(out_loc, "w")

    #Write header information to merge file
    with open(vcf_dir) as vcf_file:
        for vcf_line in vcf_file:
            if not isDataLine(vcf_line):
                out_file.write(vcf_line)
            else:
                #data lines start
                break

    with open(locations_dir) as loc_file:
        for loc in loc_file:
            loc = loc.replace(" ", "")
def isDataLine(line):
    """
    Determines in line contains site data
    """
    if len(line) > 1:
        return line[0] != "#"
    return False


if __name__ == "__main__":
    """

    """
    CC3_loc,GP2_loc,common_sites_loc = sys.argv[1],sys.argv[2],sys.argv[3]
    desc = vcfDict(GP2_loc)
    desc.loadDict()

    merge_file = open(common_sites_loc,"w")
   
    with open(CC3_loc) as anc_file:
        for anc_line in anc_file:
            if isDataLine(anc_line):

                anc_line_col = str.split(anc_line)
                anc_line_key = anc_line_col[0] + ":" + anc_line_col[1]
                desc_line = desc.getLine(anc_line_key)

                if desc_line != "":
                    merge_file.write(desc_line)
                    pass
Exemplo n.º 5
0

def sameAltBase(line_a, line_b):
    """
    Returns whether the sites have the same alternate base
    """
    altIdx = 4
    sline_a = str.split(line_a)
    sline_b = str.split(line_b)
    return sline_a[altIdx] == sline_b[altIdx]


if __name__ == "__main__":
    #Get location of both .vcf files and load vcfDicts for each
    file_a, file_b = sys.argv[1], sys.argv[2]
    dict_a = vcfDict(file_a)
    dict_b = vcfDict(file_b)
    dict_a.loadDict()
    dict_b.loadDict()

    #size information for each file before processing
    aSize = dict_a.getSize()
    bSize = dict_b.getSize()

    a_keys = dict_a.getKeys()
    bAnda = 0
    same_alt = 0

    #Check every key in dict_a against dict_b
    #if a site is present .getLine() removes the site after
    for key in a_keys:
    """
    Determines in line contains site data
    """
    if len(line) > 1:
        return line[0] != "#"
    return False




if __name__ == "__main__":
    """
    """

    known_sites,ccm = sys.argv[1],sys.argv[2]
    sites = vcfDict(known_sites)
    sites.loadDict()
    
    with open(ccm) as ccm_vcf:
        for line in ccm_vcf:
            if isDataLine(line):
                key = str.split(line)[0] + ":"+ str.split(line)[1]
                siteLine = sites.getLine(key)
                if siteLine != "":
                    print siteLine
                else:
                    pass
#                    print key


Exemplo n.º 7
0
import sys
import traceback
from vcfDict import vcfDict
from vcfLine import vcfLine
from vcfSample import vcfSample
from mpileLine import mpileLine

if __name__ == "__main__":
    mpile_in,vcf_in,csv_out = sys.argv[1],sys.argv[2],sys.argv[3]
    
    #Load dictionary of sites in vcf_in file
    vcfSites = vcfDict(vcf_in)
    vcfSites.loadDict()

    #Create output file for csv_out 
    outFile = open(csv_out,"w")
    
    with open(mpile_in) as pileFile:
        for line in pileFile:
            try:
                pile_line = mpileLine(line)
                print pile_line.repr()
                if vcfSites.siteExists(pile_line.siteID):
                    vcf_line = vcfLine(vcfSites.getLine(pile_line.siteID))

            except Exception, e:
                print sys.exc_info()[0] 
                print traceback.format_exc()
                print line

    outFile.close()
def isDataLine(line):
    """
    Determines in line contains site data
    """
    if len(line) > 1:
        return line[0] != "#"
    return False


if __name__ == "__main__":
    """

    """
    common_sites_loc,target_file,output_file = sys.argv[1],sys.argv[2],sys.argv[3]
    desc = vcfDict(common_sites_loc)
    desc.loadDict()

    outFile = open(output_file,"w")
   
    with open(target_file) as t_file:
        for t_line in t_file:
            if isDataLine(t_line):

                t_line_col = str.split(t_line)
                t_line_key = t_line_col[0] + ":" + t_line_col[1]
                desc_line = desc.getLine(t_line_key)

                if desc_line != "":
                    #outFile.write(desc_line)
                    #do not write line
Exemplo n.º 9
0
from vcfDict import vcfDict
import sys


if __name__ == "__main__":
    mpile_in,vcf_sites_loc,mpile_out_loc = sys.argv[1],sys.argv[2],sys.argv[3]
    
    vcfSites = vcfDict(vcf_sites_loc) 
    vcfSites.loadDict()
    
    mpile_out = open(mpile_out_loc,"w")

    with open(mpile_in) as mpile_file:
        for line in mpile_file:
            sline = line.split("\t")
            lineID = sline[0] + ":" + sline[1]
            
            if vcfSites.siteExists(lineID):
                mpile_out.write(line) 

    mpile_out.close()
    mpile_file.close()


Exemplo n.º 10
0
import sys
from vcfDict import vcfDict

if __name__ == "__main__":
    vcfFile, bpFile, outFile_loc = sys.argv[1], sys.argv[2], sys.argv[3]

    vcfSites = vcfDict(vcfFile)
    vcfSites.loadDict()
    outFile = open(outFile_loc, "w")

    with open(vcfFile) as vcf_f:
        for line in vcf_f:
            if line[0] == "#":
                outFile.write(line)

    with open(bpFile) as bp:
        for line in bp:
            if line[0] != '""':
                sline = line.split(",")
                id = sline[1][1:-1]

                resp = vcfSites.getLine(id)
                if resp != "":
                    outFile.write(resp)

    outFile.close()
    vcf_f.close()
    bp.close()
Exemplo n.º 11
0
    #Removes sites from intAB present in other samples except for sample B and sample A
    for i in range(0, len(samples)):
        if i != bSetIdx and (samples[i].getFileName() !=
                             targetDict.getFileName()):
            for ele in samples[i].getKeys():
                try:
                    intAB.remove(ele)
                except:
                    pass
    return intAB


if __name__ == "__main__":
    """
    """
    GPB = vcfDict("../data/compareSNPs/tmp/GP2-3_B_m0.vcf")
    GPC = vcfDict("../data/compareSNPs/tmp/GP2-3_C_m0.vcf")
    GPD = vcfDict("../data/compareSNPs/tmp/GP2-3_D_m0.vcf")
    GPE = vcfDict("../data/compareSNPs/tmp/GP2-3_E_m0.vcf")
    GPF = vcfDict("../data/compareSNPs/tmp/GP2-3_F_m0.vcf")
    GPG = vcfDict("../data/compareSNPs/tmp/GP2-3_G_m0.vcf")
    GPH = vcfDict("../data/compareSNPs/tmp/GP2-3_H_m0.vcf")
    GPI = vcfDict("../data/compareSNPs/tmp/GP2-3_I_m0.vcf")
    GPJ = vcfDict("../data/compareSNPs/tmp/GP2-3_J_m0.vcf")
    GPK = vcfDict("../data/compareSNPs/tmp/GP2-3_K_m0.vcf")
    GPL = vcfDict("../data/compareSNPs/tmp/GP2-3_L_m0.vcf")
    GPM = vcfDict("../data/compareSNPs/tmp/GP2-3_M_m0.vcf")
    GPN = vcfDict("../data/compareSNPs/tmp/GP2-3_N_m0.vcf")
    GPO = vcfDict("../data/compareSNPs/tmp/GP2-3_O_m0.vcf")

    CCB = vcfDict("../data/compareSNPs/tmp/CC_B_m0.vcf")
Exemplo n.º 12
0
    Return whether the given site is one of the target sites
    """
    lineID = lineVcf.chrom + ":" + lineVcf.pos
    return targetSitesDict.getLine(lineID) != ""


if __name__ == "__main__":
    vcf_unfilt, vcf_targetSites, out_loc = sys.argv[1], sys.argv[2], sys.argv[
        3]

    outFile = open(out_loc, "w")
    outFile.write(
        "siteID,start_chrom,start_pos,stop_chrom,stop_pos,altSiteCount,refSiteCount,avgDepth,avgRefReads,avgAltReads,avgOtherReads"
        + "\n")

    targetSitesDict = vcfDict(vcf_targetSites)
    targetSitesDict.loadDict()

    window = []

    with open(vcf_unfilt) as unfilt:
        for line in unfilt:
            lineVcf = vcfLine(line)

            if lineVcf.isDataLine:  #Add data lines from unfiltered file to window
                window.append(lineVcf)

                if len(
                        window
                ) == WINDOW_SIZE + 1:  #Remove oldest site seen keep size == WINDOW_SIZE
                    window.pop(0)