def create_maf_distribution( seqs, distrib_fhand=None, plot_fhand=None, summary_fhand=None, groups=None, group_kind=None ): "It creates the distribution of the maf (not takes in account ref allele)" title = "maf" if groups and group_kind: title = "maf (%s: %s)" % (group_kind, ",".join(groups)) mafs = CachedArray("f") for seq in seqs: for snv in seq.get_features("snv"): maf = calculate_maf_frequency(snv, groups=groups, group_kind=group_kind) if maf: mafs.append(maf) if list(mafs): create_distribution( mafs, labels={"title": title}, distrib_fhand=distrib_fhand, bins=None, plot_fhand=plot_fhand, range_=None, summary_fhand=summary_fhand, calculate_freqs=False, remove_outliers=False, )
def write(self, sequence, selected_snv_location): 'It writes a seq with the alternative alleles in one position and Ns in the others.' start = selected_snv_location - self._length end = selected_snv_location + self._length + 1 if start < 0: start = 0 if end > len(sequence): end = len(sequence) sequence = sequence[start: end] selected_snv_location -= start maf_threshold = self._maf prev_seq_end = 0 seq_to_print = '' for snv in sequence.get_features(kind='snv'): # snv start and end [start, end[. # Correcting the previous sequence slice snv_start = snv.location.start.position - start snv_end = snv.location.end.position - start # join the previous sequence to the sequence to print seq_to_print += str(sequence[prev_seq_end:snv_start].seq) prev_seq_end = snv_end if snv_start == selected_snv_location: #subtituir por allelos snv_kind = calculate_snv_kind(snv) if snv_kind != SNP: msg = "We don't know how to print anything but SNPs" raise NotImplementedError(msg) alleles = '/'.join([a[0] for a in snv.qualifiers['alleles'].keys()]) to_print = '[{0:s}]'.format(alleles) else: if maf_threshold is not None: snv_maf = calculate_maf_frequency(snv) write_abundant_allele = True if snv_maf > maf_threshold else False else: write_abundant_allele = False if write_abundant_allele: # most abundant allele to_print = _get_major_allele(snv) else: # Ns snv_kind = calculate_snv_kind(snv) if snv_kind == SNP: to_print = _snp_to_iupac(snv, sequence) elif snv_kind in (DELETION, COMPLEX, INDEL): ref_allele = snv.qualifiers['reference_allele'] to_print = ref_allele[0] + 'N' * (len(ref_allele) - 1) else: to_print = 'N' seq_to_print += to_print else: seq_to_print += str(sequence[prev_seq_end:end + 1].seq) name = sequence.name + '_' + str(selected_snv_location + 1) self.fhand.write('>%s\n%s\n' % (name, seq_to_print)) self.fhand.flush()
def calculate_mafs_group(seqs, groups=None, group_kind=None): 'It calculates the snv heterozygosity of a given group' maf_profile = {} for seq in seqs: for snv in seq.get_features('snv'): maf = calculate_maf_frequency(snv, group_kind=group_kind, groups=groups) if maf is not None: location = snv.location.start.position seq_name = seq.name if seq_name not in maf_profile: maf_profile[seq_name] = [] maf_profile[seq_name].append((location, maf)) return maf_profile
def major_allele_freq_filter(sequence): 'The filter' if sequence is None: return None for snv in sequence.get_features(kind='snv'): previous_result = _get_filter_result(snv, 'maf', threshold=parameters) if previous_result is not None: continue maf = calculate_maf_frequency(snv, groups=groups, group_kind=group_kind) if maf > frequency or maf is None: result = True else: result = False _add_filter_result(snv, 'maf', result, threshold=parameters) return sequence
def _snv_to_n(snv, sequence, position, maf=None): 'It returns the n for each snp' genotype = [] for allele, kind in snv.qualifiers['alleles'].keys(): if kind == SNP and not genotype: snv_maf = calculate_maf_frequency(snv) if maf and snv_maf > maf: genotype = [_get_major_allele(snv)] else: snp_iupac = _snp_to_iupac(snv, sequence) genotype = [snp_iupac] elif kind == DELETION: len_del = len(allele) genotype.extend(['N'] * (len_del - len(genotype))) elif kind == INSERTION: geno = sequence[position] + len(allele) * 'N' if genotype: genotype[0] = geno else: genotype.append(geno) return genotype