def score_counts(counts, state, EmissionParameters): ''' This function scores the the coutns for each mixture component ''' nr_mixture_components = EmissionParameters['Diag_event_params']['nr_mix_comp'] #Initialize the return array scored_counts = np.zeros((nr_mixture_components, counts.shape[1])) #Compute for each state the log-likelihood of the counts for mix_comp in range(nr_mixture_components): scored_counts[mix_comp, :] = diag_event_model.pred_log_lik(counts, state, EmissionParameters, single_mix=mix_comp) scored_counts[mix_comp, :] += np.log(EmissionParameters['Diag_event_params']['mix_comp'][state][mix_comp]) return scored_counts
def PlotGene(Sequences, Background, gene, IterParameters, TransitionTypeFirst='nonhomo', no_plot=False, Start=0, Stop=-1, figsize=(6, 8), dir_ylim=[], out_name=None): ''' This function plot the coverage and the parameters for the model ''' reload(diag_event_model) reload(emission) set2 = brewer2mpl.get_map('Dark2', 'qualitative', 8).mpl_colors TransitionParameters = IterParameters[1] EmissionParameters = IterParameters[0] TransitionType = EmissionParameters['TransitionType'] PriorMatrix = EmissionParameters['PriorMatrix'] NrOfStates = EmissionParameters['NrOfStates'] Sequences_per_gene = PreloadSequencesForGene(Sequences, gene) Background_per_gene = PreloadSequencesForGene(Background, gene) if EmissionParameters['FilterSNPs']: Ix = tools.GetModelIx(Sequences_per_gene, Type='no_snps_conv', snps_thresh=EmissionParameters['SnpRatio'], snps_min_cov=EmissionParameters['SnpAbs'], Background=Background_per_gene) else: Ix = tools.GetModelIx(Sequences_per_gene) #2) Compute the probabilities for both states EmmisionProbGene = np.log( np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates))) EmmisionProbGene_Dir = np.log( np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates))) EmmisionProbGeneNB_fg = np.log( np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates))) EmmisionProbGeneNB_bg = np.log( np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates))) CurrStackSum = tools.StackData(Sequences_per_gene) CurrStackVar = tools.StackData(Sequences_per_gene, add='no') nr_of_genes = len(Sequences.keys()) gene_nr_dict = {} for i, curr_gene in enumerate(Sequences.keys()): gene_nr_dict[curr_gene] = i #Compute the emission probapility for State in range(NrOfStates): if not EmissionParameters['ExpressionParameters'][0] == None: EmmisionProbGene[ State, :] = emission.predict_expression_log_likelihood_for_gene( CurrStackSum, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters) EmmisionProbGeneNB_fg[ State, :] = emission.predict_expression_log_likelihood_for_gene( CurrStackSum, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters) if EmissionParameters['BckType'] == 'Coverage': EmmisionProbGene[ State, :] += emission.predict_expression_log_likelihood_for_gene( tools.StackData(Background, gene, add='only_cov') + 0, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, curr_type='bg') EmmisionProbGeneNB_bg[ State, :] = emission.predict_expression_log_likelihood_for_gene( tools.StackData(Background, gene, add='only_cov') + 0, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, curr_type='bg') if EmissionParameters['BckType'] == 'Coverage_bck': EmmisionProbGene[ State, :] += emission.predict_expression_log_likelihood_for_gene( tools.StackData(Background, gene, add='only_cov') + 0, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, curr_type='bg') EmmisionProbGeneNB_bg[ State, :] = emission.predict_expression_log_likelihood_for_gene( tools.StackData(Background, gene, add='only_cov') + 0, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, curr_type='bg') if not EmissionParameters['ign_diag']: EmmisionProbGene[State, Ix] += diag_event_model.pred_log_lik( CurrStackVar[:, Ix], State, EmissionParameters) EmmisionProbGene_Dir[State, Ix] = diag_event_model.pred_log_lik( CurrStackVar[:, Ix], State, EmissionParameters) #Get the transition probabilities if TransitionTypeFirst == 'nonhomo': if TransitionType == 'unif_bck' or TransitionType == 'binary_bck': CountsSeq = tools.StackData(Sequences_per_gene, add='all') CountsBck = tools.StackData(Background_per_gene, add='only_cov') Counts = np.vstack((CountsSeq, CountsBck)) else: Counts = tools.StackData(Sequences_per_gene, add='all') TransistionProbabilities = np.float64( trans.PredictTransistions(Counts, TransitionParameters, NrOfStates, TransitionType)) else: TransistionProbabilities = np.float64( np.tile(np.log(TransitionParameters[0]), (EmmisionProbGene.shape[1], 1, 1)).T) MostLikelyPath, LogLik = viterbi.viterbi(np.float64(EmmisionProbGene), TransistionProbabilities, np.float64(np.log(PriorMatrix))) for j in range(NrOfStates): print str(np.sum(MostLikelyPath == j)) if no_plot: return MostLikelyPath, TransistionProbabilities, EmmisionProbGene #pdb.set_trace() fig, axes = plt.subplots(nrows=9, figsize=figsize) fig.subplots_adjust(hspace=1.001) Counts = tools.StackData(Sequences_per_gene, gene, add='no') if Stop == -1: Stop = Counts.shape[1] if Stop == -1: plt_rng = np.array(range(Start, Counts.shape[1])) else: plt_rng = np.array(range(Start, Stop)) i = 0 color = set2[i] nr_of_rep_fg = len(Sequences[gene]['Coverage'].keys()) i += 1 Ix = repl_track_nr([2, 16], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='TC', linewidth=2, color=color) color = set2[i] i += 1 Ix = repl_track_nr([0, 1, 3, 5, 6, 7, 8, 10, 11, 12, 13, 15, 17, 18], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='NonTC', linewidth=2, color=color) color = set2[i] i += 1 Ix = repl_track_nr([20], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='Read-ends', linewidth=2, color=color) color = set2[i] i += 1 Ix = repl_track_nr([4, 9, 14, 19], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='Deletions', linewidth=2, color=color) color = set2[i] i += 1 Ix = repl_track_nr([21], 22, nr_of_rep_fg) ppl.plot(axes[0], plt_rng, (np.sum(Counts[Ix, :], axis=0))[Start:Stop], label='Coverage', linewidth=2, color=color) color = set2[i] i += 1 axes[0].set_ylabel('Counts') axes[0].set_xlabel('Position') axes[0].set_title('Coverage and Conversions') axes[0].get_xaxis().get_major_formatter().set_useOffset(False) BckCov = Background_per_gene['Coverage'][0] for i in range(1, len(Background_per_gene['Coverage'].keys())): BckCov += Background_per_gene['Coverage'][str(i)] ppl.plot(axes[0], plt_rng, (BckCov.T)[Start:Stop], ls='-', label='Bck', linewidth=2, color=color) ppl.legend(axes[0]) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[1], plt_rng, (TransistionProbabilities[j, j, :])[Start:Stop], label='Transition ' + str(j) + ' ' + str(j), linewidth=2, color=color) ppl.legend(axes[1]) axes[1].set_ylabel('log-transition probability') axes[1].set_xlabel('Position') axes[1].set_title('Transition probability') axes[1].get_xaxis().get_major_formatter().set_useOffset(False) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[2], plt_rng, (EmmisionProbGene[j, :][Start:Stop]), label='Emission ' + str(j), linewidth=2, color=color) if EmissionParameters['BckType'] == 'Coverage_bck': axes[2].set_ylim( (np.min(np.min(EmmisionProbGene[0:2, :][:, Start:Stop])), 1)) ppl.legend(axes[2]) axes[2].set_ylabel('log-GLM probability') axes[2].set_xlabel('Position') axes[2].set_title('Emission probability') axes[2].get_xaxis().get_major_formatter().set_useOffset(False) ppl.plot(axes[3], plt_rng, MostLikelyPath[Start:Stop]) axes[3].set_ylabel('State') axes[3].set_xlabel('Position') axes[3].set_title('Most likely path') axes[3].get_xaxis().get_major_formatter().set_useOffset(False) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[4], plt_rng, EmmisionProbGene_Dir[j, :][Start:Stop], label='Dir State ' + str(j), linewidth=2, color=color) if len(dir_ylim) > 0: axes[4].set_ylim(dir_ylim) ppl.legend(axes[4]) axes[4].set_ylabel('log-DMM probability') axes[4].set_xlabel('Position') axes[4].set_title('DMM probability') axes[4].get_xaxis().get_major_formatter().set_useOffset(False) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[5], plt_rng, EmmisionProbGeneNB_fg[j, :][Start:Stop], label='NB fg ' + str(j), linewidth=2, color=color) if EmissionParameters['BckType'] == 'Coverage_bck': axes[5].set_ylim( [np.min(np.min(EmmisionProbGeneNB_fg[0:2, :][:, Start:Stop])), 1]) ppl.legend(axes[5]) axes[5].set_ylabel('prob') axes[5].set_xlabel('Position') axes[5].set_title('prob-fg') axes[5].get_xaxis().get_major_formatter().set_useOffset(False) for j in range(NrOfStates): color = set2[j] ppl.plot(axes[6], plt_rng, EmmisionProbGeneNB_bg[j, :][Start:Stop], label='NB bg ' + str(j), linewidth=2, color=color) if EmissionParameters['BckType'] == 'Coverage_bck': axes[6].set_ylim( [np.min(np.min(EmmisionProbGeneNB_bg[0:3, :][:, Start:Stop])), 1]) ppl.legend(axes[6]) axes[6].set_ylabel('prob') axes[6].set_xlabel('Position') axes[6].set_title('prob-bg') axes[6].get_xaxis().get_major_formatter().set_useOffset(False) fg_state, bg_state = emission.get_fg_and_bck_state(EmissionParameters, final_pred=True) ix_bg = range(EmmisionProbGene.shape[0]) ix_bg.remove(fg_state) FGScore = EmmisionProbGene[fg_state, :] AltScore = EmmisionProbGene[ix_bg, :] norm = logsumexp(AltScore, axis=0) ix_ok = np.isinf(norm) + np.isnan(norm) if np.sum(ix_ok) < norm.shape[0]: SiteScore = FGScore[ix_ok == 0] - norm[ix_ok == 0] else: print 'Score problematic' SiteScore = FGScore ppl.plot(axes[7], plt_rng, SiteScore[Start:Stop]) axes[7].set_ylabel('log-odd score') axes[7].set_xlabel('Position') axes[7].set_title('log-odd score') axes[7].get_xaxis().get_major_formatter().set_useOffset(False) FGScore = EmmisionProbGene_Dir[fg_state, :] AltScore = EmmisionProbGene_Dir[ix_bg, :] norm = logsumexp(AltScore, axis=0) ix_ok = np.isinf(norm) + np.isnan(norm) if np.sum(ix_ok) < norm.shape[0]: SiteScore = FGScore[ix_ok == 0] - norm[ix_ok == 0] else: print 'Score problematic' SiteScore = FGScore ppl.plot(axes[8], plt_rng, SiteScore[Start:Stop]) axes[8].set_ylabel('DMM log-odd score') axes[8].set_xlabel('Position') axes[8].set_title('DMM log-odd score') axes[8].get_xaxis().get_major_formatter().set_useOffset(False) if not (out_name is None): print 'Saving result' fig.savefig(out_name) plt.show() return MostLikelyPath, TransistionProbabilities, EmmisionProbGeneNB_fg
def GetMostLikelyPath(MostLikelyPaths, Sequences, Background, EmissionParameters, TransitionParameters, TransitionTypeFirst, RandomNoise = False, verbosity=1): ''' This function computes the most likely path. Ther are two options, 'h**o' and 'nonhomo' for TransitionType. This specifies whether the transition probabilities should be homogenous or non-homogenous. ''' MostLikelyPaths = {} alpha = EmissionParameters['Diag_event_params'] PriorMatrix = EmissionParameters['PriorMatrix'] NrOfStates = EmissionParameters['NrOfStates'] np_proc = EmissionParameters['NbProc'] LogLikelihood = 0 #Iterate over genes nr_of_genes = len(list(Sequences.keys())) gene_nr_dict = {} for i, curr_gene in enumerate(Sequences.keys()): gene_nr_dict[curr_gene] = i #print("Computing most likely path") t = time.time() for i, gene in enumerate(Sequences.keys()): if i % 1000 == 0: sys.stdout.write('.') sys.stdout.flush() #Score the state sequences #1) Determine the positions where an observation is possible Sequences_per_gene = PreloadSequencesForGene(Sequences, gene) Background_per_gene = PreloadSequencesForGene(Background, gene) Ix = GetModelIx(Sequences_per_gene, Type='all') if np.sum(Ix) == 0: MostLikelyPaths[gene] = 2 * np.ones((0, Ix.shape[0]), dtype=np.int) continue if EmissionParameters['FilterSNPs']: Ix = GetModelIx(Sequences_per_gene, Type='no_snps_conv', snps_thresh=EmissionParameters['SnpRatio'], snps_min_cov=EmissionParameters['SnpAbs'], Background=Background_per_gene) else: Ix = GetModelIx(Sequences_per_gene) #2) Compute the probabilities for both states EmmisionProbGene = np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates)) CurrStackSum = StackData(Sequences_per_gene) CurrStackVar = StackData(Sequences_per_gene, add = 'no') for State in range(NrOfStates): if not EmissionParameters['ExpressionParameters'][0] == None: #EmmisionProbGene[State, :] = FitBinoDirchEmmisionProbabilities.ComputeStateProbForGeneNB_unif(CurrStack, alpha, State, EmissionParameters) EmmisionProbGene[State, :] = emission_prob.predict_expression_log_likelihood_for_gene(CurrStackSum, State, nr_of_genes, gene_nr_dict[gene], EmissionParameters) if EmissionParameters['BckType'] == 'Coverage': EmmisionProbGene[State, :] += emission_prob.predict_expression_log_likelihood_for_gene(StackData(Background, gene, add = 'only_cov'), State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, 'bg') if EmissionParameters['BckType'] == 'Coverage_bck': EmmisionProbGene[State, :] += emission_prob.predict_expression_log_likelihood_for_gene(StackData(Background, gene, add = 'only_cov'), State, nr_of_genes, gene_nr_dict[gene], EmissionParameters, 'bg') EmmisionProbGene[State, Ix] += diag_event_model.pred_log_lik(CurrStackVar[:, Ix], State, EmissionParameters) if RandomNoise: EmmisionProbGene = np.logaddexp(EmmisionProbGene, np.random.uniform(np.min(EmmisionProbGene[np.isfinite(EmmisionProbGene)]) - 4, np.min(EmmisionProbGene[np.isfinite(EmmisionProbGene)]) -1, EmmisionProbGene.shape)) #Add some random noise #Get the transition probabilities TransistionProbabilities = np.float64(np.tile(np.log(TransitionParameters[0]), (EmmisionProbGene.shape[1],1,1)).T) #Perform Viterbi algorithm and append Path CurrPath, Currloglik = viterbi.viterbi(np.float64(EmmisionProbGene), TransistionProbabilities, np.float64(np.log(PriorMatrix))) MostLikelyPaths[gene] = CurrPath #Compute the logliklihood of the gene LogLikelihood += Currloglik del TransistionProbabilities, EmmisionProbGene, CurrStackSum, CurrStackVar if verbosity > 0: print('\nDone: Elapsed time: ' + str(time.time() - t)) return MostLikelyPaths, LogLikelihood
def GetSitesForGene(data): ''' This function determines for each gene the score of the sites ''' #Computing the probabilities for the current gene Sites, gene, nr_of_genes, gene_nr, seq_file, bck_file, EmissionParameters, TransitionParameters, TransitionTypeFirst, fg_state, merge_neighbouring_sites, minimal_site_length = data #Turn the Sequence and Bacground objects into dictionaries again such that the subsequent methods for using these do not need to be modified if len(Sites) == 0: return gene, [] NrOfStates = EmissionParameters['NrOfStates'] Sites = dict([(gene, Sites)]) Sequences = h5py.File(EmissionParameters['DataOutFile_seq'], 'r') Background = h5py.File(EmissionParameters['DataOutFile_bck'], 'r') Sequences_per_gene = PreloadSequencesForGene(Sequences, gene) Background_per_gene = PreloadSequencesForGene(Background, gene) Ix = GetModelIx(Sequences_per_gene, Type='all') if np.sum(Ix) == 0: return gene, [] if EmissionParameters['FilterSNPs']: Ix = GetModelIx(Sequences_per_gene, Type='no_snps_conv', snps_thresh=EmissionParameters['SnpRatio'], snps_min_cov=EmissionParameters['SnpAbs'], Background=Background_per_gene) else: Ix = GetModelIx(Sequences_per_gene, Type='Conv') #Only compute the emission probability for regions where a site is ix_sites = np.zeros_like(Ix) ix_sites_len = Ix.shape[0] for currsite in Sites[gene]: ix_sites[max(0, currsite[0] - 1) : min(ix_sites_len, currsite[1] + 1)] = 1 ix_sites = ix_sites == 1 #2) Compute the probabilities for both states EmmisionProbGene = np.log(np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates))) CurrStackSum = StackData(Sequences_per_gene) CurrStackVar = StackData(Sequences_per_gene, add = 'no') CurrStackSumBck = StackData(Background_per_gene, add = 'only_cov') CurrStackVarSumm = StackData(Sequences_per_gene, add = 'only_var_summed') EmmisionProbGeneDir = np.zeros_like(EmmisionProbGene) if EmissionParameters['glm_weight'] < 0.0: weight1 = 1.0 weight2 = 1.0 elif EmissionParameters['glm_weight'] == 0.0: weight1 = 0.0000001 weight2 = 1.0 - weight1 elif EmissionParameters['glm_weight'] == 1.0: weight1 = 0.9999999 weight2 = 1.0 - weight1 else: weight1 = EmissionParameters['glm_weight'] weight2 = (1.0 - EmissionParameters['glm_weight']) for State in range(NrOfStates): EmmisionProbGene[State, ix_sites] = np.log(weight1) + emission_prob.predict_expression_log_likelihood_for_gene(CurrStackSum[:, ix_sites], State, nr_of_genes, gene_nr, EmissionParameters) if EmissionParameters['BckType'] == 'Coverage': EmmisionProbGene[State, ix_sites] += np.log(weight1) + emission_prob.predict_expression_log_likelihood_for_gene(CurrStackSumBck[:, ix_sites], State, nr_of_genes, gene_nr, EmissionParameters, 'bg') if EmissionParameters['BckType'] == 'Coverage_bck': EmmisionProbGene[State, ix_sites] += np.log(weight1) + emission_prob.predict_expression_log_likelihood_for_gene(CurrStackSumBck[:, ix_sites], State, nr_of_genes, gene_nr, EmissionParameters, 'bg') EmmisionProbGeneDir[State, Ix] = np.log(weight2) + diag_event_model.pred_log_lik(CurrStackVar[:, Ix], State, EmissionParameters) EmmisionProbGene[State, Ix] += np.log(weight2) + EmmisionProbGeneDir[State, Ix] Counts = StackData(Sequences_per_gene, add = 'all') Score = EmmisionProbGene CurrStack = CurrStackVar #Compute the scores when staying in the same state #RowIx = list(range(16)) + list(range(17, 38)) + list(range(39,44)) strand = Sequences_per_gene['strand'] #Get the coverages for the froeground and background CountsSeq = StackData(Sequences_per_gene, add = 'only_cov') CountsBck = StackData(Background_per_gene, add = 'only_cov') if strand == 0: strand = -1 #Since we the transition probabilty is the same for all States we do not need to compute it for the bayes factor #this list contains the returned sites sites = [] for currsite in Sites[gene]: mean_mat_fg, var_mat_fg, mean_mat_bg, var_mat_bg, counts_fg, counts_bg = ComputeStatsForSite(CountsSeq, CountsBck, currsite, fg_state, nr_of_genes, gene_nr, EmissionParameters) site = {} site['Start'] = currsite[0] site['Stop'] = currsite[1] site['Strand'] = strand site['SiteScore'] = EvaluateSite(Score, currsite, fg_state) site['Coverage'] = np.sum(np.sum(Counts[:, site['Start'] : site['Stop']], axis=0)) site['Variants'] = np.sum(CurrStackVarSumm[:, site['Start'] : site['Stop']], axis=1) site['mean_mat_fg'] = mean_mat_fg site['var_mat_fg'] = var_mat_fg site['mean_mat_bg'] = mean_mat_bg site['var_mat_bg'] = var_mat_bg site['counts_fg'] = counts_fg site['counts_bg'] = counts_bg p = mean_mat_fg / var_mat_fg n = (mean_mat_fg ** 2) / (var_mat_fg - mean_mat_fg) site['pv'] = nbinom.logsf(counts_fg, n, p) site['max_pos'] = get_max_position(Score, currsite, fg_state, strand) site['dir_score'] = EvaluateSite(EmmisionProbGeneDir, currsite, fg_state) if site['SiteScore'] < 0.0: continue sites.append(site) Sequences.close() Background.close() return gene, sites
def ParallelGetMostLikelyPathForGene(data): ''' This function computes the most likely path for a gene ''' gene, nr_of_genes, gene_nr, EmissionParameters, TransitionParameters, TransitionTypeFirst, RandomNoise = data #Turn the Sequence and Bacground objects into dictionaries again such that the subsequent methods for using these do not need to be modified Sequences = h5py.File(EmissionParameters['DataOutFile_seq'], 'r') Background = h5py.File(EmissionParameters['DataOutFile_bck'], 'r') #Parse the parameters alpha = EmissionParameters['Diag_event_params'] PriorMatrix = EmissionParameters['PriorMatrix'] NrOfStates = EmissionParameters['NrOfStates'] fg_state, bg_state = emission_prob.get_fg_and_bck_state(EmissionParameters, final_pred=True) fg_pen = EmissionParameters['fg_pen'] #Score the state sequences #1) Determine the positions where an observation is possible Sequences_per_gene = PreloadSequencesForGene(Sequences, gene) Background_per_gene = PreloadSequencesForGene(Background, gene) Ix = GetModelIx(Sequences_per_gene, Type='all') if np.sum(Ix) == 0: CurrPath = 2 * np.ones((0, Ix.shape[0]), dtype=np.int) return [gene, CurrPath, 0] if EmissionParameters['FilterSNPs']: Ix = GetModelIx(Sequences_per_gene, Type='no_snps_conv', snps_thresh=EmissionParameters['SnpRatio'], snps_min_cov=EmissionParameters['SnpAbs'], Background=Background_per_gene) else: Ix = GetModelIx(Sequences_per_gene) #2) Compute the probabilities for both states EmmisionProbGene = np.ones((NrOfStates, Ix.shape[0])) * (1 / np.float64(NrOfStates)) CurrStackSum = StackData(Sequences_per_gene) CurrStackVar = StackData(Sequences_per_gene, add = 'no') CurrStackSumBck = StackData(Background_per_gene, add = 'only_cov') if EmissionParameters['glm_weight'] < 0.0: weight1 = 1.0 weight2 = 1.0 elif EmissionParameters['glm_weight'] == 0.0: weight1 = 0.0000001 weight2 = 1.0 - weight1 elif EmissionParameters['glm_weight'] == 1.0: weight1 = 0.9999999 weight2 = 1.0 - weight1 else: weight1 = EmissionParameters['glm_weight'] weight2 = (1.0 - EmissionParameters['glm_weight']) for State in range(NrOfStates): if not EmissionParameters['ign_GLM']: if isinstance(EmissionParameters['ExpressionParameters'][0], np.ndarray): #EmmisionProbGene[State, :] = FitBinoDirchEmmisionProbabilities.ComputeStateProbForGeneNB_unif(CurrStack, alpha, State, EmissionParameters) EmmisionProbGene[State, :] = np.log(weight1) + emission_prob.predict_expression_log_likelihood_for_gene(CurrStackSum, State, nr_of_genes, gene_nr, EmissionParameters) if EmissionParameters['BckType'] == 'Coverage': EmmisionProbGene[State, :] += np.log(weight1) + emission_prob.predict_expression_log_likelihood_for_gene(CurrStackSumBck, State, nr_of_genes, gene_nr, EmissionParameters, 'bg') if EmissionParameters['BckType'] == 'Coverage_bck': EmmisionProbGene[State, :] += np.log(weight1) + emission_prob.predict_expression_log_likelihood_for_gene(CurrStackSumBck, State, nr_of_genes, gene_nr, EmissionParameters, 'bg') if not EmissionParameters['ign_diag']: EmmisionProbGene[State, Ix] += np.log(weight2) + diag_event_model.pred_log_lik(CurrStackVar[:, Ix], State, EmissionParameters) if State == fg_state: if EmissionParameters['LastIter']: EmmisionProbGene[State, :] -= fg_pen if RandomNoise: EmmisionProbGene = np.logaddexp(EmmisionProbGene, np.random.uniform(np.min(EmmisionProbGene[np.isfinite(EmmisionProbGene)]) - 4, np.min(EmmisionProbGene[np.isfinite(EmmisionProbGene)]) - 0.1, EmmisionProbGene.shape)) #Add some random noise #Get the transition probabilities TransistionProbabilities = np.float64(np.tile(np.log(TransitionParameters[0]), (EmmisionProbGene.shape[1],1,1)).T) CurrPath, Currloglik = viterbi.viterbi(np.float64(EmmisionProbGene), TransistionProbabilities, np.float64(np.log(PriorMatrix))) CurrPath = np.int8(CurrPath) del TransistionProbabilities, EmmisionProbGene, CurrStackSum, CurrStackVar, CurrStackSumBck, Ix Sequences.close() Background.close() return [gene, CurrPath, Currloglik]