Exemplo n.º 1
0
    def __init__(self, generative_model, genomic_data):
        """Initialize SequenceGenerationVJ

        This intialization computes all of the cumulative probability
        distributions that will be needed for efficient Monte Carlo sequence
        generation out of a GenerativeModelVJ.

        Parameters
        ----------
        generative_model : GenerativeModelVJ
            VJ generative model class containing the model parameters.
        genomic_data : GenomicDataVJ
            VJ genomic data class containing the V and J germline
            sequences and info.

        """

        self.CPVJ = (generative_model.PVJ /
                     np.sum(generative_model.PVJ)).flatten().cumsum()
        self.CPinsVJ = (generative_model.PinsVJ /
                        np.sum(generative_model.PinsVJ)).cumsum()

        for V in range(generative_model.PdelV_given_V.shape[1]):
            if np.sum(generative_model.PdelV_given_V[:, V]) > 0:
                generative_model.PdelV_given_V[:,
                                               V] = generative_model.PdelV_given_V[:, V] / np.sum(
                                                   generative_model.
                                                   PdelV_given_V[:, V])

        self.given_V_CPdelV = generative_model.PdelV_given_V.T.cumsum(axis=1)

        for J in range(generative_model.PdelJ_given_J.shape[1]):
            if np.sum(generative_model.PdelJ_given_J[:, J]) > 0:
                generative_model.PdelJ_given_J[:,
                                               J] = generative_model.PdelJ_given_J[:, J] / np.sum(
                                                   generative_model.
                                                   PdelJ_given_J[:, J])

        self.given_J_CPdelJ = generative_model.PdelJ_given_J.T.cumsum(axis=1)

        self.C_Rvj = generative_model.Rvj.T.cumsum(axis=1)

        if generative_model.first_nt_bias_insVJ == None:
            first_nt_bias_insVJ = calc_steady_state_dist(generative_model.Rvj)
        else:
            first_nt_bias_insVJ = generative_model.first_nt_bias_insVJ

        self.C_first_nt_bias_insVJ = first_nt_bias_insVJ.cumsum()

        self.num_J_genes = generative_model.PVJ.shape[1]

        self.cutV_genomic_CDR3_segs = genomic_data.cutV_genomic_CDR3_segs
        self.cutJ_genomic_CDR3_segs = genomic_data.cutJ_genomic_CDR3_segs
    def __init__(self, generative_model, genomic_data, alphabet_file = None):
        """Initialize PreprocessedParametersVJ
        
        This intialization computes all of the attributes that will be needed
        for Pgen computation of CDR3 sequences generated by VJ recombination.
        
        Parameters
        ----------
        generative_model : GenerativeModelVJ
            VJ generative model class containing the model parameters.            
        genomic_data : GenomicDataVJ
            VJ genomic data class containing the V and J germline 
            sequences and info.            
        alphabet_file : str, optional
            File name (full pathing from current directory) for a custom alphabet
            definition. If no file is provided, the default alphabet is used, i.e. 
            standard amino acids, undetermined amino acids (B, J, X, and Z), and
            single codon symbols.
        
        """
        PreprocessedParameters.__init__(self, generative_model, genomic_data, alphabet_file)
        
        #Check that the generative_model and genomic_data are VDJ
        if not all([generative_model.__class__.__name__.endswith('VJ'),genomic_data.__class__.__name__.endswith('VJ')]):
            raise ValueError #Need VJ model and data
            

        self.PVJ = generative_model.PVJ
        
        #Set the V genomic Pi arrays
        self.PVdelV_nt_pos_vec = None
        self.PVdelV_2nd_nt_pos_per_aa_vec = None
        self.generate_PVdelV_nt_pos_vecs(generative_model, genomic_data) #Computes and sets the above attributes
        
        #Set the J genomic Pi arrays
        self.PJdelJ_nt_pos_vec = None
        self.PJdelJ_2nd_nt_pos_per_aa_vec = None
        self.generate_PJdelJ_nt_pos_vecs(generative_model, genomic_data) #Computes and sets the above attributes
    
        #Trim, then zeropad PinsVJ
        self.PinsVJ = np.append(np.trim_zeros(generative_model.PinsVJ, 'b'),  [0., 0., 0., 0.])
        
        
        #insVJ Dinucleotide bias transition matrix
        #Note, this used to be called 'RnucleotideVJ_per_nucleotideVJ_5prime'
        self.Rvj = generative_model.Rvj
        
        #Check if first insertion nt probability distribution is given. 
        #Note, this is not normally inferred by IGoR. If the nt bias dist isn't
        #present, use the steady-state distributions defined from Rvj.
        #When not using the steady-state distributions we need to compute the
        #nt bias for the position previous to first_nt_bias (R^{-1}p_0)
        
        if generative_model.first_nt_bias_insVJ == None:
            self.first_nt_bias_insVJ = calc_steady_state_dist(self.Rvj)
            #In steady-state zero_nt_bias_insVJ = first_nt_bias_insVJ so no need to recalculate
            self.zero_nt_bias_insVJ = self.first_nt_bias_insVJ
        else:
            self.first_nt_bias_insVJ = generative_model.first_nt_bias_insVJ
            #Require Rvj*zero_nt_bias_insVJ = first_nt_bias_insVJ  --- can use pseudo-inverse
            self.zero_nt_bias_insVJ = np.dot(np.linalg.pinv(self.Rvj), generative_model.first_nt_bias_insVJ)
            
        #Compute VJ insertion transfer matrices
        self.Tvj = None
        self.Svj = None
        self.Dvj = None
        self.lTvj = None
        self.lDvj = None
        self.generate_VJ_junction_transfer_matrices() #Computes and sets the transfer matrices
    def __init__(self, generative_model, genomic_data, alphabet_file = None):
        """Initialize PreprocessedParametersVDJ
        
        This intialization computes all of the attributes that will be needed
        for Pgen computation of CDR3 sequences generated by VDJ recombination.
        
        Parameters
        ----------
        generative_model : GenerativeModelVDJ
            VDJ generative model class containing the model parameters.            
        genomic_data : GenomicDataVDJ
            VDJ genomic data class containing the V, D, and J germline 
            sequences and info.           
        alphabet_file : str, optional
            File name (full pathing from current directory) for a custom alphabet
            definition. If no file is provided, the default alphabet is used, i.e. 
            standard amino acids, undetermined amino acids (B, J, X, and Z), and
            single codon symbols.
        
        """
        PreprocessedParameters.__init__(self, generative_model, genomic_data, alphabet_file)
        
        #Check that the generative_model and genomic_data are VDJ
        if not all([generative_model.__class__.__name__.endswith('VDJ'),genomic_data.__class__.__name__.endswith('VDJ')]):
            raise ValueError #Need VDJ model and data
        
        self.cutD_genomic_CDR3_segs = genomic_data.cutD_genomic_CDR3_segs
        self.D_allele_names = [D[0] for D in genomic_data.genD]
        
        #Set the V genomic Pi arrays
        self.PVdelV_nt_pos_vec = None
        self.PVdelV_2nd_nt_pos_per_aa_vec = None
        self.generate_PVdelV_nt_pos_vecs(generative_model, genomic_data) #Computes and sets the above attributes
        
        #Set the D genomic Pi arrays and info
        self.PD_nt_pos_vec = None
        self.PD_2nd_nt_pos_per_aa_vec = None
        self.min_delDl_given_DdelDr = None
        self.max_delDl_given_DdelDr = None
        self.zeroD_given_D = None
        self.preprocess_D_segs(generative_model, genomic_data) #Computes and sets the above attributes
        
        self.PdelDldelDr_given_D = generative_model.PdelDldelDr_given_D
        
        #Set the J genomic Pi arrays and info
        self.PJdelJ_nt_pos_vec = None
        self.PJdelJ_2nd_nt_pos_per_aa_vec = None
        self.generate_PJdelJ_nt_pos_vecs(generative_model, genomic_data) #Computes and sets the above attributes
        
        #allow for 0 prob J genes
        self.PD_given_J = np.zeros(generative_model.PDJ.shape)
        self.PD_given_J[:, np.sum(generative_model.PDJ, axis = 0) > 0] = np.multiply(generative_model.PDJ[:, np.sum(generative_model.PDJ, axis = 0) > 0], 1/np.sum(generative_model.PDJ, axis = 0)[np.sum(generative_model.PDJ, axis = 0) > 0])

        #Trim, then zeropad PinsVD and PinsDJ
        self.PinsVD = np.append(np.trim_zeros(generative_model.PinsVD, 'b'),  [0., 0., 0., 0.])
        self.PinsDJ = np.append(np.trim_zeros(generative_model.PinsDJ, 'b'),  [0., 0., 0., 0.])
        
        
        #insVD and insDJ Dinucleotide bias transition matrices
        #Note, these used to be called 'RnucleotideVD_per_nucleotideVD_5prime'
        #and 'RnucleotideDJ_per_nucleotideDJ_3prime'
        self.Rvd = generative_model.Rvd
        self.Rdj = generative_model.Rdj
        
        
        #Check if first insertion nt probability distributions are given. 
        #Note, this is not normally inferred by IGoR. If these nt bias dists
        #aren't present, use the steady-state distributions defined from Rvd
        #and Rdj. When not using the steady-state distributions we need to
        #compute the nt bias for the position previous to first_nt_bias 
        #(R^{-1}p_0)
        
        if generative_model.first_nt_bias_insVD == None:
            self.first_nt_bias_insVD = calc_steady_state_dist(self.Rvd)
            #In steady-state zero_nt_bias_insVD = first_nt_bias_insVD so no need to recalculate
            self.zero_nt_bias_insVD = self.first_nt_bias_insVD
        else:
            self.first_nt_bias_insVD = generative_model.first_nt_bias_insVD
            #Require Rvd*zero_nt_bias_insVD = first_nt_bias_insVD  --- can use pseudo-inverse
            self.zero_nt_bias_insVD = np.dot(np.linalg.pinv(self.Rvd), generative_model.first_nt_bias_insVD)
        
        if generative_model.first_nt_bias_insDJ == None:
            self.first_nt_bias_insDJ = calc_steady_state_dist(self.Rdj)
            #In steady-state zero_nt_bias_insDJ = first_nt_bias_insDJ so no need to recalculate
            self.zero_nt_bias_insDJ = self.first_nt_bias_insDJ
        else:
            self.first_nt_bias_insDJ = generative_model.first_nt_bias_insDJ
            #Require Rdj*zero_nt_bias_insDJ = first_nt_bias_insDJ  --- can use pseudo-inverse
            self.zero_nt_bias_insDJ = np.dot(np.linalg.pinv(self.Rdj), generative_model.first_nt_bias_insDJ)
            
        
        #Compute VD insertion transfer matrices
        self.Tvd = None
        self.Svd = None
        self.Dvd = None
        self.lTvd = None
        self.lDvd = None
        self.generate_VD_junction_transfer_matrices() #Computes and sets the transfer matrices
        
        #Compute DJ insertion transfer matrices
        self.Tdj = None
        self.Sdj = None
        self.Ddj = None
        self.rTdj = None
        self.rDdj = None
        self.generate_DJ_junction_transfer_matrices() #Computes and sets the transfer matrices
Exemplo n.º 4
0
    def __init__(self, generative_model, genomic_data):
        """Initialize SequenceGenerationVDJ

        This intialization computes all of the cumulative probability
        distributions that will be needed for efficient Monte Carlo sequence
        generation out of a GenerativeModelVDJ.

        Parameters
        ----------
        generative_model : GenerativeModelVDJ
            VDJ generative model class containing the model parameters.
        genomic_data : GenomicDataVDJ
            VDJ genomic data class containing the V, D, and J germline
            sequences and info.

        """

        self.CPV = (generative_model.PV / np.sum(generative_model.PV)).cumsum()
        self.CPDJ = (generative_model.PDJ /
                     np.sum(generative_model.PDJ)).flatten().cumsum()
        self.CinsVD = (generative_model.PinsVD /
                       np.sum(generative_model.PinsVD)).cumsum()
        self.CinsDJ = (generative_model.PinsDJ /
                       np.sum(generative_model.PinsDJ)).cumsum()

        for V in range(generative_model.PdelV_given_V.shape[1]):
            if np.sum(generative_model.PdelV_given_V[:, V]) > 0:
                generative_model.PdelV_given_V[:,
                                               V] = generative_model.PdelV_given_V[:, V] / np.sum(
                                                   generative_model.
                                                   PdelV_given_V[:, V])

        self.given_V_CPdelV = generative_model.PdelV_given_V.T.cumsum(axis=1)

        for J in range(generative_model.PdelJ_given_J.shape[1]):
            if np.sum(generative_model.PdelJ_given_J[:, J]) > 0:
                generative_model.PdelJ_given_J[:,
                                               J] = generative_model.PdelJ_given_J[:, J] / np.sum(
                                                   generative_model.
                                                   PdelJ_given_J[:, J])

        self.given_J_CPdelJ = generative_model.PdelJ_given_J.T.cumsum(axis=1)

        for D in range(generative_model.PdelDldelDr_given_D.shape[2]):
            if np.sum(generative_model.PdelDldelDr_given_D[:, :, D]) > 0:
                generative_model.PdelDldelDr_given_D[:, :,
                                                     D] = generative_model.PdelDldelDr_given_D[:, :, D] / np.sum(
                                                         generative_model.
                                                         PdelDldelDr_given_D[:, :,
                                                                             D]
                                                     )

        self.given_D_CPdelDldelDr = np.array([
            generative_model.PdelDldelDr_given_D[:, :, i].flatten().cumsum()
            for i in range(generative_model.PdelDldelDr_given_D.shape[2])
        ])

        self.C_Rvd = generative_model.Rvd.T.cumsum(axis=1)
        self.C_Rdj = generative_model.Rdj.T.cumsum(axis=1)

        if generative_model.first_nt_bias_insVD is None:
            first_nt_bias_insVD = calc_steady_state_dist(generative_model.Rvd)
        else:
            first_nt_bias_insVD = generative_model.first_nt_bias_insVD

        if generative_model.first_nt_bias_insDJ is None:
            first_nt_bias_insDJ = calc_steady_state_dist(generative_model.Rdj)
        else:
            first_nt_bias_insDJ = generative_model.first_nt_bias_insDJ

        self.C_first_nt_bias_insVD = first_nt_bias_insVD.cumsum()
        self.C_first_nt_bias_insDJ = first_nt_bias_insDJ.cumsum()

        self.num_J_genes = generative_model.PDJ.shape[1]
        self.num_delDr_poss = generative_model.PdelDldelDr_given_D.shape[1]

        self.cutV_genomic_CDR3_segs = genomic_data.cutV_genomic_CDR3_segs
        self.cutD_genomic_CDR3_segs = genomic_data.cutD_genomic_CDR3_segs
        self.cutJ_genomic_CDR3_segs = genomic_data.cutJ_genomic_CDR3_segs