Пример #1
0
 def next(self):
     next_record = None
     used_records = [False for i in range(0, len(self.next_records))]
     for vcf_index in range(0, len(self.next_records)):
         if self.next_records[vcf_index] is None:
             try:
                 self.next_records[vcf_index] = self.vcf_list[vcf_index].next()
             except StopIteration:
                 continue
         if next_record is None or (self.ascii_sort and next_record > self.next_records[vcf_index]) or\
                 (not self.ascii_sort and (next_record.chrnumber, next_record.POS) >
                     (self.next_records[vcf_index].chrnumber, self.next_records[vcf_index].POS)):
             if next_record is not None:
                 used_records = [False for i in range(0, len(self.next_records))]
             next_record = self.next_records[vcf_index]
             used_records[vcf_index] = True
         elif (next_record.CHROM, next_record.POS) == \
                 (self.next_records[vcf_index].CHROM, self.next_records[vcf_index].POS):
             if next_record.merge(self.next_records[vcf_index]):
                 used_records[vcf_index] = True
     if not any(record is not None for record in self.next_records):
         raise StopIteration
     for i in range(0, len(used_records)):
         if used_records[i]:
             self.next_records[i] = None
     for sample in self.samples:
         if next_record.get_sample(sample) is None:
             samp_fmt = self._parse_sample_format('DP:FREQ')
             my_freqs = [None]
             my_freqs.extend([None for alt in next_record.ALT])
             samp_data = [None, my_freqs]
             next_record.add_call(_Call(next_record, sample, samp_fmt(*samp_data)))
     next_record._sort_samples(sort=self.samples)
     return next_record
Пример #2
0
    def _parse_samples(self, samples, samp_fmt, site):
        '''Parse a sample entry according to the format specified in the FORMAT
        column.

        NOTE: this method has a cython equivalent and care must be taken
        to keep the two methods equivalent
        '''

        # check whether we already know how to parse this format
        if samp_fmt not in self._format_cache:
            self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt)
        samp_fmt = self._format_cache[samp_fmt]

        if cparse:
            return cparse.parse_samples(
                self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site)

        samp_data = []
        _map = self._map

        nfields = len(samp_fmt._fields)

        for name, sample in itertools.izip(self.samples, samples):

            # parse the data for this sample
            sampdat = [None] * nfields

            for i, vals in enumerate(sample.split(':')):

                # short circuit the most common
                if samp_fmt._fields[i] == 'GT':
                    sampdat[i] = vals
                    continue
                elif vals == ".":
                    sampdat[i] = None
                    continue

                entry_num = samp_fmt._nums[i]
                entry_type = samp_fmt._types[i]

                # we don't need to split single entries
                if entry_num == 1 or ',' not in vals:

                    if entry_type == 'Integer':
                        try:
                            sampdat[i] = int(vals)
                        except ValueError:
                            sampdat[i] = float(vals)
                    elif entry_type == 'Float':
                        sampdat[i] = float(vals)
                    else:
                        sampdat[i] = vals

                    if entry_num != 1:
                        sampdat[i] = (sampdat[i])

                    continue

                vals = vals.split(',')

                if entry_type == 'Integer':
                    try:
                        sampdat[i] = _map(int, vals)
                    except ValueError:
                        sampdat[i] = _map(float, vals)
                elif entry_type == 'Float' or entry_type == 'Numeric':
                    sampdat[i] = _map(float, vals)
                else:
                    sampdat[i] = vals

            # create a call object
            call = _Call(site, name, samp_fmt(*sampdat))
            samp_data.append(call)

        return samp_data
Пример #3
0
    def _parse_samples(self, samples, samp_fmt, site):
        '''Parse a sample entry according to the format specified in the FORMAT
        column.

        NOTE: this method has a cython equivalent and care must be taken
        to keep the two methods equivalent
        '''

        # check whether we already know how to parse this format
        if samp_fmt not in self._format_cache:
            self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt)

        samp_fmt = self._format_cache[samp_fmt]

        if cparse:
            return cparse.parse_samples(
                self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site)

        samp_data = []
        _map = self._map

        nfields = len(samp_fmt._fields)

        for name, sample in itertools.izip(self.samples, samples):

            # parse the data for this sample
            sampdat = [None] * nfields

            for i, vals in enumerate(sample.split(':')):

                # short circuit the most common
                if vals == '.' or vals == './.':
                    sampdat[i] = None
                    continue

                entry_num = samp_fmt._nums[i]
                entry_type = samp_fmt._types[i]

                # we don't need to split single entries
                if entry_num == 1 or ',' not in vals:

                    if entry_type == 'Integer':
                        try:
                            sampdat[i] = int(vals)
                        except ValueError:
                            sampdat[i] = float(vals)
                    elif entry_type == 'Float':
                        sampdat[i] = float(vals)
                    else:
                        sampdat[i] = vals

                    if entry_num != 1:
                        sampdat[i] = (sampdat[i])

                    continue

                vals = vals.split(',')

                if entry_type == 'Integer':
                    try:
                        sampdat[i] = _map(int, vals)
                    except ValueError:
                        sampdat[i] = _map(float, vals)
                elif entry_type == 'Float' or entry_type == 'Numeric':
                    sampdat[i] = _map(float, vals)
                else:
                    sampdat[i] = vals

            # create a call object
            call = _Call(site, name, samp_fmt(*sampdat))
            samp_data.append(call)

        return samp_data
Пример #4
0
    def _parse_samples(self, samples, samp_fmt, site, EntryDbID):
        '''Parse a sample entry according to the format specified in the FORMAT
        column.

        NOTE: this method has a cython equivalent and care must be taken
        to keep the two methods equivalent
        '''

        # check whether we already know how to parse this format
        # TODO at some point add DB
        # TODO 1 remove print when ready
        print samp_fmt
        individGeno = samp_fmt.split(":")
        IndividualFunctions = []
        CustomGeno = []
        #Supported
        #TODO individual
        #JULIA: AD DP, GLE, GL, EC GP, GT, FT, PL, GQ, HQ, PS, PQ        
        
        for genotype in individGeno:
            if ( genotype == "AD" ):
                IndividualFunctions.append(self.db.createAD)
            elif (genotype == "DP" ):
                 IndividualFunctions.append(self.db.createDP)
            elif (genotype == "EC" ):
                IndividualFunctions.append(self.db.createEC)
            elif (genotype == "FT" ):
                IndividualFunctions.append(self.db.createFT)
            elif (genotype == "GL" ):
                IndividualFunctions.append(self.db.createGL)
            elif (genotype == "GLE" ):      
                 IndividualFunctions.append(self.db.createGLE)
            elif (genotype == "GP" ):
                IndividualFunctions.append(self.db.createGP)
            elif (genotype == "GQ" ):
                IndividualFunctions.append(self.db.createGQ)
            elif (genotype == "GT" ):
                IndividualFunctions.append(self.db.createGT)
            elif (genotype == "HQ" ):
                IndividualFunctions.append(self.db.createHQ)
            elif (genotype == "PL" ):      
                 IndividualFunctions.append(self.db.createPL)
            elif (genotype == "PQ" ):
                IndividualFunctions.append(self.db.createPQ)
            elif (genotype == "PS" ):
                IndividualFunctions.append(self.db.createPS)
            else:
                CustomGeno.append( genotype )
                IndividualFunctions.append(self.db.createIndividualDefault)
           
        
        if samp_fmt not in self._format_cache:
            self._format_cache[samp_fmt] = self._parse_sample_format(samp_fmt)

        samp_fmt = self._format_cache[samp_fmt]

        if cparse:
            return cparse.parse_samples(
                self.samples, samples, samp_fmt, samp_fmt._types, samp_fmt._nums, site)

        samp_data = []
        _map = self._map

        nfields = len(samp_fmt._fields)
        
        indNumber = 0;
        indId = 0

        for name, sample in itertools.izip(self.samples, samples):
            
            customCount = 0
            
            indId = self.db.createIndividualEntry( EntryDbID, indNumber ); 
            if indId == -1:
                print "Failed to create individual entry"
                
            indNumber += 1
            # parse the data for this sample
            sampdat = [None] * nfields

            for i, vals in enumerate(sample.split(':')):
                #TODO individ here
                # short circuit the most common
                #MINE
                if ( IndividualFunctions[i] == self.db.createIndividualDefault ):
                    IndividualFunctions[i]( CustomGeno[customCount], indId, vals )
                    customCount += 1
                else:
                    IndividualFunctions[i]( indId, vals ) 
                
                
                if vals == '.' or vals == './.':
                    sampdat[i] = None
                    continue

                entry_num = samp_fmt._nums[i]
                entry_type = samp_fmt._types[i]

                # we don't need to split single entries
                if entry_num == 1 or ',' not in vals:

                    #TODO: add DB upload and subroutines
                    if entry_type == 'Integer':
                        sampdat[i] = int(vals)
                    elif entry_type == 'Float':
                        sampdat[i] = float(vals)
                    else:
                        sampdat[i] = vals

                    if entry_num != 1:
                        sampdat[i] = (sampdat[i])

                    continue

                vals = vals.split(',')

                if entry_type == 'Integer':
                    sampdat[i] = _map(int, vals)
                elif entry_type == 'Float' or entry_type == 'Numeric':
                    sampdat[i] = _map(float, vals)
                else:
                    sampdat[i] = vals

            # create a call object
            call = _Call(site, name, samp_fmt(*sampdat))
            samp_data.append(call)

        return samp_data