示例#1
0
def pickOutFamilies( orig_filename, outdir,family_groups, \
                     callToString = lambda x:x,\
                     lineFilter = lambda x:True, \
                     cols_to_use = range(len(COLUMN_MAP)), \
                   ) :
    fin = open( orig_filename, "rb" )

    #open filehandles for each group, initialize group's column index list
    if not os.path.isdir(outdir) : os.mkdir(outdir)
    fouts = {}
    groupIXs = {}
    for group in family_groups :
        safe_group = sanitizePatientName( group )
        fouts[group] = open( "%s/%s.vcf" % (outdir,safe_group), 'wb' )
        groupIXs[group] = []

    (columns,headers) = getColumnsAndHeaders( fin )

    header_string = "\n".join(headers)

    for i in range( len(columns) ) :
        for group in family_groups :
            family_names = family_groups[group]
            print columns[i], family_names
            if columns[i] in family_names :
                groupIXs[group].append( i )

    print groupIXs

    #print headers
    for group in family_groups :
        fouts[group].write( "%s\n" % header_string )
        out_header = '\t'.join( [columns[i] for i in \
                                 cols_to_use + groupIXs[group]] )
        fouts[group].write( "%s\n" % out_header )
        #fouts[group].write( '\n'.join(columns) )
        #fouts[group].write( "%s\n" % out_header )

    # 'indexOf' dictionary maps header string to it's column index 
    #in the input file
    indexOf = COLUMN_MAP
    globes.printColumnWarning( orig_filename, indexOf )

    # process the data lines
    for dataline in fin.readlines() :
        splt = dataline.strip().split('\t')
        if lineFilter( splt ) :
            data = [splt[ix] for ix in cols_to_use]
            for group in family_groups :
                calls = [ callToString(splt[ix]) for ix in groupIXs[group] ]
                string = "%s\t%s\n" % ( '\t'.join(data), '\t'.join(calls))
                fouts[group].write( string )

    #close filehandles for each group
    for group in fouts :
        fouts[group].close()

    fin.close()
示例#2
0
 def __init__(self, vcf_file, fast_forward=0) :
     self.indexOf = COLUMN_MAP
     globes.printColumnWarning( vcf_file, self.indexOf )
     self.fin = open( vcf_file, "rb" )
     self.patients = getPatients( self.fin )
     print "after getPatients"
     self.allow_absent = False
     self.group_repeats = False
     self.iterator = globes.splitIterator( self.fin, burn=fast_forward )
示例#3
0
    def __init__(self, vcf_file, fast_forward=0) :
        self.indexOf = broad.COLUMN_MAP
        globes.printColumnWarning( vcf_file, self.indexOf )
        self.fin = open( vcf_file, "rb" )
        self.patients = broad.getPatients( self.fin )

        self.allow_absent = False
        self.group_repeats = False
        self.iterator = self.iterate(fast_forward)