def __init__(self, generative_model, genomic_data): """Initialize SequenceGenerationVJ This intialization computes all of the cumulative probability distributions that will be needed for efficient Monte Carlo sequence generation out of a GenerativeModelVJ. Parameters ---------- generative_model : GenerativeModelVJ VJ generative model class containing the model parameters. genomic_data : GenomicDataVJ VJ genomic data class containing the V and J germline sequences and info. """ self.CPVJ = (generative_model.PVJ / np.sum(generative_model.PVJ)).flatten().cumsum() self.CPinsVJ = (generative_model.PinsVJ / np.sum(generative_model.PinsVJ)).cumsum() for V in range(generative_model.PdelV_given_V.shape[1]): if np.sum(generative_model.PdelV_given_V[:, V]) > 0: generative_model.PdelV_given_V[:, V] = generative_model.PdelV_given_V[:, V] / np.sum( generative_model. PdelV_given_V[:, V]) self.given_V_CPdelV = generative_model.PdelV_given_V.T.cumsum(axis=1) for J in range(generative_model.PdelJ_given_J.shape[1]): if np.sum(generative_model.PdelJ_given_J[:, J]) > 0: generative_model.PdelJ_given_J[:, J] = generative_model.PdelJ_given_J[:, J] / np.sum( generative_model. PdelJ_given_J[:, J]) self.given_J_CPdelJ = generative_model.PdelJ_given_J.T.cumsum(axis=1) self.C_Rvj = generative_model.Rvj.T.cumsum(axis=1) if generative_model.first_nt_bias_insVJ == None: first_nt_bias_insVJ = calc_steady_state_dist(generative_model.Rvj) else: first_nt_bias_insVJ = generative_model.first_nt_bias_insVJ self.C_first_nt_bias_insVJ = first_nt_bias_insVJ.cumsum() self.num_J_genes = generative_model.PVJ.shape[1] self.cutV_genomic_CDR3_segs = genomic_data.cutV_genomic_CDR3_segs self.cutJ_genomic_CDR3_segs = genomic_data.cutJ_genomic_CDR3_segs
def __init__(self, generative_model, genomic_data, alphabet_file = None): """Initialize PreprocessedParametersVJ This intialization computes all of the attributes that will be needed for Pgen computation of CDR3 sequences generated by VJ recombination. Parameters ---------- generative_model : GenerativeModelVJ VJ generative model class containing the model parameters. genomic_data : GenomicDataVJ VJ genomic data class containing the V and J germline sequences and info. alphabet_file : str, optional File name (full pathing from current directory) for a custom alphabet definition. If no file is provided, the default alphabet is used, i.e. standard amino acids, undetermined amino acids (B, J, X, and Z), and single codon symbols. """ PreprocessedParameters.__init__(self, generative_model, genomic_data, alphabet_file) #Check that the generative_model and genomic_data are VDJ if not all([generative_model.__class__.__name__.endswith('VJ'),genomic_data.__class__.__name__.endswith('VJ')]): raise ValueError #Need VJ model and data self.PVJ = generative_model.PVJ #Set the V genomic Pi arrays self.PVdelV_nt_pos_vec = None self.PVdelV_2nd_nt_pos_per_aa_vec = None self.generate_PVdelV_nt_pos_vecs(generative_model, genomic_data) #Computes and sets the above attributes #Set the J genomic Pi arrays self.PJdelJ_nt_pos_vec = None self.PJdelJ_2nd_nt_pos_per_aa_vec = None self.generate_PJdelJ_nt_pos_vecs(generative_model, genomic_data) #Computes and sets the above attributes #Trim, then zeropad PinsVJ self.PinsVJ = np.append(np.trim_zeros(generative_model.PinsVJ, 'b'), [0., 0., 0., 0.]) #insVJ Dinucleotide bias transition matrix #Note, this used to be called 'RnucleotideVJ_per_nucleotideVJ_5prime' self.Rvj = generative_model.Rvj #Check if first insertion nt probability distribution is given. #Note, this is not normally inferred by IGoR. If the nt bias dist isn't #present, use the steady-state distributions defined from Rvj. #When not using the steady-state distributions we need to compute the #nt bias for the position previous to first_nt_bias (R^{-1}p_0) if generative_model.first_nt_bias_insVJ == None: self.first_nt_bias_insVJ = calc_steady_state_dist(self.Rvj) #In steady-state zero_nt_bias_insVJ = first_nt_bias_insVJ so no need to recalculate self.zero_nt_bias_insVJ = self.first_nt_bias_insVJ else: self.first_nt_bias_insVJ = generative_model.first_nt_bias_insVJ #Require Rvj*zero_nt_bias_insVJ = first_nt_bias_insVJ --- can use pseudo-inverse self.zero_nt_bias_insVJ = np.dot(np.linalg.pinv(self.Rvj), generative_model.first_nt_bias_insVJ) #Compute VJ insertion transfer matrices self.Tvj = None self.Svj = None self.Dvj = None self.lTvj = None self.lDvj = None self.generate_VJ_junction_transfer_matrices() #Computes and sets the transfer matrices
def __init__(self, generative_model, genomic_data, alphabet_file = None): """Initialize PreprocessedParametersVDJ This intialization computes all of the attributes that will be needed for Pgen computation of CDR3 sequences generated by VDJ recombination. Parameters ---------- generative_model : GenerativeModelVDJ VDJ generative model class containing the model parameters. genomic_data : GenomicDataVDJ VDJ genomic data class containing the V, D, and J germline sequences and info. alphabet_file : str, optional File name (full pathing from current directory) for a custom alphabet definition. If no file is provided, the default alphabet is used, i.e. standard amino acids, undetermined amino acids (B, J, X, and Z), and single codon symbols. """ PreprocessedParameters.__init__(self, generative_model, genomic_data, alphabet_file) #Check that the generative_model and genomic_data are VDJ if not all([generative_model.__class__.__name__.endswith('VDJ'),genomic_data.__class__.__name__.endswith('VDJ')]): raise ValueError #Need VDJ model and data self.cutD_genomic_CDR3_segs = genomic_data.cutD_genomic_CDR3_segs self.D_allele_names = [D[0] for D in genomic_data.genD] #Set the V genomic Pi arrays self.PVdelV_nt_pos_vec = None self.PVdelV_2nd_nt_pos_per_aa_vec = None self.generate_PVdelV_nt_pos_vecs(generative_model, genomic_data) #Computes and sets the above attributes #Set the D genomic Pi arrays and info self.PD_nt_pos_vec = None self.PD_2nd_nt_pos_per_aa_vec = None self.min_delDl_given_DdelDr = None self.max_delDl_given_DdelDr = None self.zeroD_given_D = None self.preprocess_D_segs(generative_model, genomic_data) #Computes and sets the above attributes self.PdelDldelDr_given_D = generative_model.PdelDldelDr_given_D #Set the J genomic Pi arrays and info self.PJdelJ_nt_pos_vec = None self.PJdelJ_2nd_nt_pos_per_aa_vec = None self.generate_PJdelJ_nt_pos_vecs(generative_model, genomic_data) #Computes and sets the above attributes #allow for 0 prob J genes self.PD_given_J = np.zeros(generative_model.PDJ.shape) self.PD_given_J[:, np.sum(generative_model.PDJ, axis = 0) > 0] = np.multiply(generative_model.PDJ[:, np.sum(generative_model.PDJ, axis = 0) > 0], 1/np.sum(generative_model.PDJ, axis = 0)[np.sum(generative_model.PDJ, axis = 0) > 0]) #Trim, then zeropad PinsVD and PinsDJ self.PinsVD = np.append(np.trim_zeros(generative_model.PinsVD, 'b'), [0., 0., 0., 0.]) self.PinsDJ = np.append(np.trim_zeros(generative_model.PinsDJ, 'b'), [0., 0., 0., 0.]) #insVD and insDJ Dinucleotide bias transition matrices #Note, these used to be called 'RnucleotideVD_per_nucleotideVD_5prime' #and 'RnucleotideDJ_per_nucleotideDJ_3prime' self.Rvd = generative_model.Rvd self.Rdj = generative_model.Rdj #Check if first insertion nt probability distributions are given. #Note, this is not normally inferred by IGoR. If these nt bias dists #aren't present, use the steady-state distributions defined from Rvd #and Rdj. When not using the steady-state distributions we need to #compute the nt bias for the position previous to first_nt_bias #(R^{-1}p_0) if generative_model.first_nt_bias_insVD == None: self.first_nt_bias_insVD = calc_steady_state_dist(self.Rvd) #In steady-state zero_nt_bias_insVD = first_nt_bias_insVD so no need to recalculate self.zero_nt_bias_insVD = self.first_nt_bias_insVD else: self.first_nt_bias_insVD = generative_model.first_nt_bias_insVD #Require Rvd*zero_nt_bias_insVD = first_nt_bias_insVD --- can use pseudo-inverse self.zero_nt_bias_insVD = np.dot(np.linalg.pinv(self.Rvd), generative_model.first_nt_bias_insVD) if generative_model.first_nt_bias_insDJ == None: self.first_nt_bias_insDJ = calc_steady_state_dist(self.Rdj) #In steady-state zero_nt_bias_insDJ = first_nt_bias_insDJ so no need to recalculate self.zero_nt_bias_insDJ = self.first_nt_bias_insDJ else: self.first_nt_bias_insDJ = generative_model.first_nt_bias_insDJ #Require Rdj*zero_nt_bias_insDJ = first_nt_bias_insDJ --- can use pseudo-inverse self.zero_nt_bias_insDJ = np.dot(np.linalg.pinv(self.Rdj), generative_model.first_nt_bias_insDJ) #Compute VD insertion transfer matrices self.Tvd = None self.Svd = None self.Dvd = None self.lTvd = None self.lDvd = None self.generate_VD_junction_transfer_matrices() #Computes and sets the transfer matrices #Compute DJ insertion transfer matrices self.Tdj = None self.Sdj = None self.Ddj = None self.rTdj = None self.rDdj = None self.generate_DJ_junction_transfer_matrices() #Computes and sets the transfer matrices
def __init__(self, generative_model, genomic_data): """Initialize SequenceGenerationVDJ This intialization computes all of the cumulative probability distributions that will be needed for efficient Monte Carlo sequence generation out of a GenerativeModelVDJ. Parameters ---------- generative_model : GenerativeModelVDJ VDJ generative model class containing the model parameters. genomic_data : GenomicDataVDJ VDJ genomic data class containing the V, D, and J germline sequences and info. """ self.CPV = (generative_model.PV / np.sum(generative_model.PV)).cumsum() self.CPDJ = (generative_model.PDJ / np.sum(generative_model.PDJ)).flatten().cumsum() self.CinsVD = (generative_model.PinsVD / np.sum(generative_model.PinsVD)).cumsum() self.CinsDJ = (generative_model.PinsDJ / np.sum(generative_model.PinsDJ)).cumsum() for V in range(generative_model.PdelV_given_V.shape[1]): if np.sum(generative_model.PdelV_given_V[:, V]) > 0: generative_model.PdelV_given_V[:, V] = generative_model.PdelV_given_V[:, V] / np.sum( generative_model. PdelV_given_V[:, V]) self.given_V_CPdelV = generative_model.PdelV_given_V.T.cumsum(axis=1) for J in range(generative_model.PdelJ_given_J.shape[1]): if np.sum(generative_model.PdelJ_given_J[:, J]) > 0: generative_model.PdelJ_given_J[:, J] = generative_model.PdelJ_given_J[:, J] / np.sum( generative_model. PdelJ_given_J[:, J]) self.given_J_CPdelJ = generative_model.PdelJ_given_J.T.cumsum(axis=1) for D in range(generative_model.PdelDldelDr_given_D.shape[2]): if np.sum(generative_model.PdelDldelDr_given_D[:, :, D]) > 0: generative_model.PdelDldelDr_given_D[:, :, D] = generative_model.PdelDldelDr_given_D[:, :, D] / np.sum( generative_model. PdelDldelDr_given_D[:, :, D] ) self.given_D_CPdelDldelDr = np.array([ generative_model.PdelDldelDr_given_D[:, :, i].flatten().cumsum() for i in range(generative_model.PdelDldelDr_given_D.shape[2]) ]) self.C_Rvd = generative_model.Rvd.T.cumsum(axis=1) self.C_Rdj = generative_model.Rdj.T.cumsum(axis=1) if generative_model.first_nt_bias_insVD is None: first_nt_bias_insVD = calc_steady_state_dist(generative_model.Rvd) else: first_nt_bias_insVD = generative_model.first_nt_bias_insVD if generative_model.first_nt_bias_insDJ is None: first_nt_bias_insDJ = calc_steady_state_dist(generative_model.Rdj) else: first_nt_bias_insDJ = generative_model.first_nt_bias_insDJ self.C_first_nt_bias_insVD = first_nt_bias_insVD.cumsum() self.C_first_nt_bias_insDJ = first_nt_bias_insDJ.cumsum() self.num_J_genes = generative_model.PDJ.shape[1] self.num_delDr_poss = generative_model.PdelDldelDr_given_D.shape[1] self.cutV_genomic_CDR3_segs = genomic_data.cutV_genomic_CDR3_segs self.cutD_genomic_CDR3_segs = genomic_data.cutD_genomic_CDR3_segs self.cutJ_genomic_CDR3_segs = genomic_data.cutJ_genomic_CDR3_segs