def set_vdj_combo( self, vdj_combo_label, cyst_positions, tryp_positions, all_seqs, debug=False, dont_mimic_data_read_length=False ): """ Set the label which labels the gene/length choice (a tuple of strings) as well as it's constituent parts """ self.vdj_combo_label = vdj_combo_label for region in utils.regions: self.genes[region] = vdj_combo_label[utils.index_keys[region + "_gene"]] self.original_seqs[region] = all_seqs[region][self.genes[region]] self.original_seqs[region] = self.original_seqs[region].replace( "N", utils.int_to_nucleotide(random.randint(0, 3)) ) # replace any Ns with a random nuke (a.t.m. use the same nuke for all Ns in a given seq) self.local_cyst_position = cyst_positions[self.genes["v"]]["cysteine-position"] # cyst position in uneroded v self.local_tryp_position = int(tryp_positions[self.genes["j"]]) # tryp position within j only self.cdr3_length = int(vdj_combo_label[utils.index_keys["cdr3_length"]]) for boundary in utils.boundaries: self.insertion_lengths[boundary] = int(vdj_combo_label[utils.index_keys[boundary + "_insertion"]]) for erosion in utils.real_erosions: self.erosions[erosion] = int(vdj_combo_label[utils.index_keys[erosion + "_del"]]) for erosion in utils.effective_erosions: if not dont_mimic_data_read_length: # use v left and j right erosions from data? self.effective_erosions[erosion] = int(vdj_combo_label[utils.index_keys[erosion + "_del"]]) else: # otherwise ignore data, and keep the entire v and j genes self.effective_erosions[erosion] = 0 # set the original conserved codon words, so we can revert them if they get mutated self.original_cyst_word = str(self.original_seqs["v"][self.local_cyst_position : self.local_cyst_position + 3]) self.original_tryp_word = str(self.original_seqs["j"][self.local_tryp_position : self.local_tryp_position + 3]) if debug: self.print_gene_choice()
def set_vdj_combo(self, vdj_combo_label, glfo, debug=False, mimic_data_read_length=False): """ Set the label which labels the gene/length choice (a tuple of strings) as well as it's constituent parts """ self.vdj_combo_label = vdj_combo_label for region in utils.regions: self.genes[region] = vdj_combo_label[utils.index_keys[region + '_gene']] self.original_seqs[region] = glfo['seqs'][region][ self.genes[region]] self.original_seqs[region] = self.original_seqs[region].replace( 'N', utils.int_to_nucleotide(random.randint(0, 3)) ) # replace any Ns with a random nuke (a.t.m. use the same nuke for all Ns in a given seq) for region, codon in utils.conserved_codons[glfo['locus']].items(): self.pre_erosion_codon_positions[region] = glfo[ codon + '-positions'][ self.genes[region]] # position in uneroded germline gene for boundary in utils.boundaries: self.insertion_lengths[boundary] = int( vdj_combo_label[utils.index_keys[boundary + '_insertion']]) for erosion in utils.real_erosions: self.erosions[erosion] = int( vdj_combo_label[utils.index_keys[erosion + '_del']]) for erosion in utils.effective_erosions: if mimic_data_read_length: # use v left and j right erosions from data? self.effective_erosions[erosion] = int( vdj_combo_label[utils.index_keys[erosion + '_del']]) else: # otherwise ignore data, and keep the entire v and j genes self.effective_erosions[erosion] = 0 if debug: self.print_gene_choice()
def set_vdj_combo(self, vdj_combo_label, cyst_positions, tryp_positions, all_seqs, debug=False, dont_mimic_data_read_length=False): """ Set the label which labels the gene/length choice (a tuple of strings) as well as it's constituent parts """ self.vdj_combo_label = vdj_combo_label for region in utils.regions: self.genes[region] = vdj_combo_label[utils.index_keys[region + '_gene']] self.original_seqs[region] = all_seqs[region][self.genes[region]] self.original_seqs[region] = self.original_seqs[region].replace( 'N', utils.int_to_nucleotide(random.randint(0, 3)) ) # replace any Ns with a random nuke (a.t.m. use the same nuke for all Ns in a given seq) self.local_cyst_position = cyst_positions[self.genes['v']][ 'cysteine-position'] # cyst position in uneroded v self.local_tryp_position = int( tryp_positions[self.genes['j']]) # tryp position within j only self.cdr3_length = int( vdj_combo_label[utils.index_keys['cdr3_length']]) for boundary in utils.boundaries: self.insertion_lengths[boundary] = int( vdj_combo_label[utils.index_keys[boundary + '_insertion']]) for erosion in utils.real_erosions: self.erosions[erosion] = int( vdj_combo_label[utils.index_keys[erosion + '_del']]) for erosion in utils.effective_erosions: if not dont_mimic_data_read_length: # use v left and j right erosions from data? self.effective_erosions[erosion] = int( vdj_combo_label[utils.index_keys[erosion + '_del']]) else: # otherwise ignore data, and keep the entire v and j genes self.effective_erosions[erosion] = 0 # set the original conserved codon words, so we can revert them if they get mutated self.original_cyst_word = str( self.original_seqs['v'] [self.local_cyst_position:self.local_cyst_position + 3]) self.original_tryp_word = str( self.original_seqs['j'] [self.local_tryp_position:self.local_tryp_position + 3]) if debug: self.print_gene_choice()
def set_vdj_combo(self, vdj_combo_label, glfo, debug=False, mimic_data_read_length=False): """ Set the label which labels the gene/length choice (a tuple of strings) as well as it's constituent parts """ self.vdj_combo_label = vdj_combo_label for region in utils.regions: self.genes[region] = vdj_combo_label[utils.index_keys[region + '_gene']] self.original_seqs[region] = glfo['seqs'][region][self.genes[region]] self.original_seqs[region] = self.original_seqs[region].replace('N', utils.int_to_nucleotide(random.randint(0, 3))) # replace any Ns with a random nuke (a.t.m. use the same nuke for all Ns in a given seq) for region, codon in utils.conserved_codons[glfo['chain']].items(): self.local_codon_positions[region] = glfo[codon + '-positions'][self.genes[region]] # position in uneroded germline gene for boundary in utils.boundaries: self.insertion_lengths[boundary] = int(vdj_combo_label[utils.index_keys[boundary + '_insertion']]) for erosion in utils.real_erosions: self.erosions[erosion] = int(vdj_combo_label[utils.index_keys[erosion + '_del']]) for erosion in utils.effective_erosions: if mimic_data_read_length: # use v left and j right erosions from data? self.effective_erosions[erosion] = int(vdj_combo_label[utils.index_keys[erosion + '_del']]) else: # otherwise ignore data, and keep the entire v and j genes self.effective_erosions[erosion] = 0 if debug: self.print_gene_choice()