示例#1
0
class VcfFile(object):
    'a class representing a VCF file'

    def __init__(self, filename):
        self.metaline = MetaLines()
        self.headerline = HeaderLines()
        self.miscline = MiscLines()
        self.filename=filename

    def parseMetaAndHeaderLines(self, fh):
        """ parse meta lines that begin with ## and header line that begins with # """
        for line in fh:
        
            if '##fileformat' in line.strip():
                self.metaline.setFileFormat(line.strip())
            elif '##INFO' in line.strip():
                self.metaline.parseMetaInfo(line)
            elif '##FILTER' in line.strip():
                self.metaline.parseMetaFilter(line)
            elif '##FORMAT' in line.strip():
                self.metaline.parseMetaFormat(line)
            elif '#CHROM' in line.strip():
                self.headerline.add_format_column(line.strip() )
                self.headerline.append_samplelist( line.strip() )
                break
            else:
                self.miscline.parseMiscInfo(line)

    def parseMetaLines(self, fh):
        """ parse the meta lines that begin with ## in a VCF file """
        
        for line in fh:
            #print "foo "+ line.strip()
            if '##fileformat' in line.strip():
                self.metaline.setFileFormat(line)
            elif '##INFO' in line.strip():
                self.metaline.parseMetaInfo(line)
            elif '##FILTER' in line.strip():
                self.metaline.parseMetaFilter(line)
            elif '##FORMAT' in line.strip():
                self.metaline.parseMetaFormat(line)
            elif '#CHROM' in line.strip():
                self.headerline.add_format_column(line.strip() )
                self.headerline.append_samplelist( line.strip() )
                self.printHeaderLine()
                break
            #elif '##reference' in line.strip():
                #break
            else:
                self.miscline.parseMiscInfo( line.strip() )

    def hasFormatHeader(self,headerline):
        """ check if header line has FORMAT column """
        self.headerline.add_format_column(headerline)


    def parseHeaderLine(self,fh):
        
        for line in fh:
            #print line.strip()
            if '#CHROM' not in line:
                break
           
            self.headerline.add_format_column(line.strip() )
            self.headerline.append_samplelist( line.strip() )
            
            break
            

    

    def printHeaderLine(self):
        print self.headerline.toString()

    def getSampleList(self):
        return self.headerline.getSampleList()
    
    def setSampleList(self, newlist):
        self.headerline.setSampleList(newlist)

    def getMetaInfoNames(self):
        """  return the IDs of the INFO metalines"""
        return self.metaline.getMetaInfoNames()

    def getMetaInfoDescription(self):
        return self.metaline.getMetaInfoDescription()


    def addMetaInfoHeader(self, id, type, number, description):
        """ add an ##INFO header to a vcf file """
        self.metaline.addMetaInfo(id, type,number,description)

    def addMetaFilterHeader(self, id, description):
        """ add ##FILTER header to a vcf file """
        self.metaline.addMetaFilter(id, description)

    def getMetaFilterNames(self):
        """ return the IDs of the FILTER metalines """
        return self.metaline.getMetaFilterNames()
    
    def getMetaFilterDescription(self):
        """ return a list of tuples with (id, description) of FILTER metalines """
        return self.metaline.getMetaFilterDescription()

    def getMetaFormatNames(self):
        """  return the Ids of the FORMAT metalines"""
        return self.metaline.getMetaFormatNames()
    
    def addMetaFormatHeader(self, formatObj):
        """ add a ##FORMAT header  """
        self.metaline.addMetaFormat(formatObj)
        

    def getMetaFormatDescription(self):
        """ return a list of tuples (id,descripton) for FORMAt metalines """
        return self.metaline.getMetaFormatDescription()

    def yieldMetaInfoLines(self):
        """ yield string representation of  ##INFO metalines """
        return self.metaline.yieldPrintMetaInfoLines()

    def yieldMetaFormatLines(self):
        """ yield string reprsentation ##FORMAT metaline object """
        return self.metaline.yieldPrintMetaFormatLines()

    def yieldMetaFilterLines(self):
        """ yield string represenation of ##FILTER metaline object """
        return self.metaline.yieldPrintMetaFilterLines()


    def printMetaInfoLines(self):
        """print the meta ##INFO lines for a VCF """
        for str in self.yieldMetaInfoLines():
            print str

    def printMetaFormatLines(self):
        for str in self.yieldMetaFormatLines():
            print str
    
    def printMetaFilterLines(self):
        for str in self.yieldMetaFilterLines():
            print str

    def printMetaLines(self):
        self.printMetaInfoLines()
        self.printMetaFormatLines()
        self.printMetaFilterLines()

    
    """ collect all the lines that start with ## and #CHROM and rturn a string """
    def returnHeader(self):


        headerlines=[]

     

        
        fileformat=self.metaline.getFileFormat()
        headerlines.append(fileformat)
        for str in self.yieldMetaInfoLines():
            headerlines.append(str)

        for str in self.metaline.yieldPrintMetaFormatLines():
            headerlines.append(str)

        for str in self.yieldMetaFilterLines():
            headerlines.append(str)


        for str in self.miscline.yieldPrintMiscInfo():
            headerlines.append(str)

        headerlines.append( self.headerline.toString() )
        outstr="\n".join(headerlines)

        return outstr
    
    def printMetaAndHeaderLines(self):
        print  self.returnHeader()

    def yieldVcfRecord(self,fh):
        """ yield VcfRecord object from reading a dataline in a VCF file """
        for line in fh:
            #print line.strip()
            if '#' in line: continue
            fields=line.strip().split('\t')
            (chrom,pos,id,ref,alt,qual,filter,info)=fields[0:8]
            yield VcfRecord(chrom,pos,id,ref,alt,qual,filter,info)


    def yieldVcfDataLine(self,fh):
        """ yield dataline from filehandle fh reprsenting a vcf file """
        for line in fh:
            if '#' in line:continue
            yield line.strip()


    def yieldVcfRecordwithGenotypes(self,fh):
        """ yield a VcfRecord with its genotypes list populated """
        for line in fh:
            fields=line.strip().split('\t')
            
            (chrom,pos,id,ref,alt,qual,filter,info)=fields[0:8]
            vrec=VcfRecord(chrom,pos,id,ref,alt,qual,filter,info)
    
            formatstring=fields[8]
            (genotypestrings)=fields[9::]
            for gstring in genotypestrings:
                vrec.addGenotype( VcfGenotype(formatstring,gstring))
            yield vrec

    def printDataLineWithGenotypes(self,vcfh):
        for vrec in self.yieldVcfRecordwithGenotypes(vcfh):
            print vrec.toStringwithGenotypes()
示例#2
0
 def __init__(self, filename):
     self.metaline = MetaLines()
     self.headerline = HeaderLines()
     self.miscline = MiscLines()
     self.filename=filename