예제 #1
0
    def set_vdj_combo(
        self, vdj_combo_label, cyst_positions, tryp_positions, all_seqs, debug=False, dont_mimic_data_read_length=False
    ):
        """ Set the label which labels the gene/length choice (a tuple of strings) as well as it's constituent parts """
        self.vdj_combo_label = vdj_combo_label
        for region in utils.regions:
            self.genes[region] = vdj_combo_label[utils.index_keys[region + "_gene"]]
            self.original_seqs[region] = all_seqs[region][self.genes[region]]
            self.original_seqs[region] = self.original_seqs[region].replace(
                "N", utils.int_to_nucleotide(random.randint(0, 3))
            )  # replace any Ns with a random nuke (a.t.m. use the same nuke for all Ns in a given seq)
        self.local_cyst_position = cyst_positions[self.genes["v"]]["cysteine-position"]  # cyst position in uneroded v
        self.local_tryp_position = int(tryp_positions[self.genes["j"]])  # tryp position within j only
        self.cdr3_length = int(vdj_combo_label[utils.index_keys["cdr3_length"]])
        for boundary in utils.boundaries:
            self.insertion_lengths[boundary] = int(vdj_combo_label[utils.index_keys[boundary + "_insertion"]])
        for erosion in utils.real_erosions:
            self.erosions[erosion] = int(vdj_combo_label[utils.index_keys[erosion + "_del"]])
        for erosion in utils.effective_erosions:
            if not dont_mimic_data_read_length:  # use v left and j right erosions from data?
                self.effective_erosions[erosion] = int(vdj_combo_label[utils.index_keys[erosion + "_del"]])
            else:  # otherwise ignore data, and keep the entire v and j genes
                self.effective_erosions[erosion] = 0

        # set the original conserved codon words, so we can revert them if they get mutated
        self.original_cyst_word = str(self.original_seqs["v"][self.local_cyst_position : self.local_cyst_position + 3])
        self.original_tryp_word = str(self.original_seqs["j"][self.local_tryp_position : self.local_tryp_position + 3])

        if debug:
            self.print_gene_choice()
예제 #2
0
파일: event.py 프로젝트: wangdi2014/partis
    def set_vdj_combo(self,
                      vdj_combo_label,
                      glfo,
                      debug=False,
                      mimic_data_read_length=False):
        """ Set the label which labels the gene/length choice (a tuple of strings) as well as it's constituent parts """
        self.vdj_combo_label = vdj_combo_label
        for region in utils.regions:
            self.genes[region] = vdj_combo_label[utils.index_keys[region +
                                                                  '_gene']]
            self.original_seqs[region] = glfo['seqs'][region][
                self.genes[region]]
            self.original_seqs[region] = self.original_seqs[region].replace(
                'N', utils.int_to_nucleotide(random.randint(0, 3))
            )  # replace any Ns with a random nuke (a.t.m. use the same nuke for all Ns in a given seq)
        for region, codon in utils.conserved_codons[glfo['locus']].items():
            self.pre_erosion_codon_positions[region] = glfo[
                codon + '-positions'][
                    self.genes[region]]  # position in uneroded germline gene
        for boundary in utils.boundaries:
            self.insertion_lengths[boundary] = int(
                vdj_combo_label[utils.index_keys[boundary + '_insertion']])
        for erosion in utils.real_erosions:
            self.erosions[erosion] = int(
                vdj_combo_label[utils.index_keys[erosion + '_del']])
        for erosion in utils.effective_erosions:
            if mimic_data_read_length:  # use v left and j right erosions from data?
                self.effective_erosions[erosion] = int(
                    vdj_combo_label[utils.index_keys[erosion + '_del']])
            else:  # otherwise ignore data, and keep the entire v and j genes
                self.effective_erosions[erosion] = 0

        if debug:
            self.print_gene_choice()
예제 #3
0
    def set_vdj_combo(self,
                      vdj_combo_label,
                      cyst_positions,
                      tryp_positions,
                      all_seqs,
                      debug=False,
                      dont_mimic_data_read_length=False):
        """ Set the label which labels the gene/length choice (a tuple of strings) as well as it's constituent parts """
        self.vdj_combo_label = vdj_combo_label
        for region in utils.regions:
            self.genes[region] = vdj_combo_label[utils.index_keys[region +
                                                                  '_gene']]
            self.original_seqs[region] = all_seqs[region][self.genes[region]]
            self.original_seqs[region] = self.original_seqs[region].replace(
                'N', utils.int_to_nucleotide(random.randint(0, 3))
            )  # replace any Ns with a random nuke (a.t.m. use the same nuke for all Ns in a given seq)
        self.local_cyst_position = cyst_positions[self.genes['v']][
            'cysteine-position']  # cyst position in uneroded v
        self.local_tryp_position = int(
            tryp_positions[self.genes['j']])  # tryp position within j only
        self.cdr3_length = int(
            vdj_combo_label[utils.index_keys['cdr3_length']])
        for boundary in utils.boundaries:
            self.insertion_lengths[boundary] = int(
                vdj_combo_label[utils.index_keys[boundary + '_insertion']])
        for erosion in utils.real_erosions:
            self.erosions[erosion] = int(
                vdj_combo_label[utils.index_keys[erosion + '_del']])
        for erosion in utils.effective_erosions:
            if not dont_mimic_data_read_length:  # use v left and j right erosions from data?
                self.effective_erosions[erosion] = int(
                    vdj_combo_label[utils.index_keys[erosion + '_del']])
            else:  # otherwise ignore data, and keep the entire v and j genes
                self.effective_erosions[erosion] = 0

        # set the original conserved codon words, so we can revert them if they get mutated
        self.original_cyst_word = str(
            self.original_seqs['v']
            [self.local_cyst_position:self.local_cyst_position + 3])
        self.original_tryp_word = str(
            self.original_seqs['j']
            [self.local_tryp_position:self.local_tryp_position + 3])

        if debug:
            self.print_gene_choice()
예제 #4
0
파일: event.py 프로젝트: psathyrella/partis
    def set_vdj_combo(self, vdj_combo_label, glfo, debug=False, mimic_data_read_length=False):
        """ Set the label which labels the gene/length choice (a tuple of strings) as well as it's constituent parts """
        self.vdj_combo_label = vdj_combo_label
        for region in utils.regions:
            self.genes[region] = vdj_combo_label[utils.index_keys[region + '_gene']]
            self.original_seqs[region] = glfo['seqs'][region][self.genes[region]]
            self.original_seqs[region] = self.original_seqs[region].replace('N', utils.int_to_nucleotide(random.randint(0, 3)))  # replace any Ns with a random nuke (a.t.m. use the same nuke for all Ns in a given seq)
        for region, codon in utils.conserved_codons[glfo['chain']].items():
            self.local_codon_positions[region] = glfo[codon + '-positions'][self.genes[region]]  # position in uneroded germline gene
        for boundary in utils.boundaries:
            self.insertion_lengths[boundary] = int(vdj_combo_label[utils.index_keys[boundary + '_insertion']])
        for erosion in utils.real_erosions:
            self.erosions[erosion] = int(vdj_combo_label[utils.index_keys[erosion + '_del']])
        for erosion in utils.effective_erosions:
            if mimic_data_read_length:  # use v left and j right erosions from data?
                self.effective_erosions[erosion] = int(vdj_combo_label[utils.index_keys[erosion + '_del']])
            else:  # otherwise ignore data, and keep the entire v and j genes
                self.effective_erosions[erosion] = 0

        if debug:
            self.print_gene_choice()