示例#1
0
def create_maf_distribution(
    seqs, distrib_fhand=None, plot_fhand=None, summary_fhand=None, groups=None, group_kind=None
):
    "It creates the distribution of the maf (not takes in account ref allele)"
    title = "maf"
    if groups and group_kind:
        title = "maf (%s: %s)" % (group_kind, ",".join(groups))

    mafs = CachedArray("f")
    for seq in seqs:
        for snv in seq.get_features("snv"):
            maf = calculate_maf_frequency(snv, groups=groups, group_kind=group_kind)
            if maf:
                mafs.append(maf)
    if list(mafs):
        create_distribution(
            mafs,
            labels={"title": title},
            distrib_fhand=distrib_fhand,
            bins=None,
            plot_fhand=plot_fhand,
            range_=None,
            summary_fhand=summary_fhand,
            calculate_freqs=False,
            remove_outliers=False,
        )
示例#2
0
    def write(self, sequence, selected_snv_location):
        'It writes a seq with the alternative alleles in one position and Ns in the others.'
        start = selected_snv_location - self._length
        end =  selected_snv_location + self._length + 1
        if start < 0:
            start = 0
        if end > len(sequence):
            end = len(sequence)
        sequence = sequence[start: end]

        selected_snv_location -= start
        maf_threshold = self._maf
        prev_seq_end = 0
        seq_to_print = ''
        for snv in sequence.get_features(kind='snv'):
            # snv start and end [start, end[.
            # Correcting the previous sequence slice
            snv_start = snv.location.start.position - start
            snv_end = snv.location.end.position - start
            # join the previous sequence to the sequence to print
            seq_to_print += str(sequence[prev_seq_end:snv_start].seq)
            prev_seq_end = snv_end

            if snv_start == selected_snv_location:
                #subtituir por allelos
                snv_kind = calculate_snv_kind(snv)
                if snv_kind != SNP:
                    msg = "We don't know how to print anything but SNPs"
                    raise NotImplementedError(msg)
                alleles = '/'.join([a[0] for a in snv.qualifiers['alleles'].keys()])
                to_print = '[{0:s}]'.format(alleles)
            else:
                if maf_threshold is not None:
                    snv_maf = calculate_maf_frequency(snv)
                    write_abundant_allele = True if snv_maf > maf_threshold else False
                else:
                    write_abundant_allele = False
                if write_abundant_allele:
                    # most abundant allele
                    to_print = _get_major_allele(snv)
                else:
                    # Ns
                    snv_kind = calculate_snv_kind(snv)
                    if snv_kind == SNP:
                        to_print = _snp_to_iupac(snv, sequence)
                    elif snv_kind in (DELETION, COMPLEX, INDEL):
                        ref_allele = snv.qualifiers['reference_allele']
                        to_print = ref_allele[0] + 'N' * (len(ref_allele) - 1)
                    else:
                        to_print = 'N'

            seq_to_print += to_print
        else:
            seq_to_print += str(sequence[prev_seq_end:end + 1].seq)

        name = sequence.name + '_' + str(selected_snv_location + 1)
        self.fhand.write('>%s\n%s\n' % (name, seq_to_print))
        self.fhand.flush()
def calculate_mafs_group(seqs, groups=None, group_kind=None):
    'It calculates the snv heterozygosity of a given group'
    maf_profile = {}
    for seq in seqs:
        for snv in seq.get_features('snv'):
            maf = calculate_maf_frequency(snv, group_kind=group_kind,
                                          groups=groups)
            if maf is not None:
                location = snv.location.start.position
                seq_name = seq.name
                if seq_name not in maf_profile:
                    maf_profile[seq_name] = []
                maf_profile[seq_name].append((location, maf))
    return maf_profile
    def major_allele_freq_filter(sequence):
        'The filter'
        if sequence is None:
            return None
        for snv in sequence.get_features(kind='snv'):
            previous_result = _get_filter_result(snv, 'maf',
                                                 threshold=parameters)
            if previous_result is not None:
                continue
            maf = calculate_maf_frequency(snv, groups=groups,
                                          group_kind=group_kind)

            if maf > frequency or maf is None:
                result = True
            else:
                result = False
            _add_filter_result(snv, 'maf', result, threshold=parameters)
        return sequence
示例#5
0
def _snv_to_n(snv, sequence, position, maf=None):
    'It returns the n for each snp'
    genotype = []
    for allele, kind in snv.qualifiers['alleles'].keys():
        if kind == SNP and not genotype:
            snv_maf = calculate_maf_frequency(snv)
            if maf and snv_maf > maf:
                genotype = [_get_major_allele(snv)]
            else:
                snp_iupac = _snp_to_iupac(snv, sequence)
                genotype = [snp_iupac]

        elif kind == DELETION:
            len_del = len(allele)
            genotype.extend(['N'] * (len_del - len(genotype)))
        elif kind == INSERTION:
            geno = sequence[position] + len(allele) * 'N'
            if genotype:
                genotype[0] = geno
            else:
                genotype.append(geno)
    return genotype