def pvalues(self, pBackgroundModel, pDataList, pIndexReferencePoint): # cdf p_value_list = [] for i, data_element in enumerate(pDataList): relative_distance = i - pIndexReferencePoint # check if relative distance is available. # if not, use either min or max key for the distribution if relative_distance not in pBackgroundModel: if relative_distance < 0: relative_distance = min(pBackgroundModel.keys()) else: relative_distance = max(pBackgroundModel.keys()) if data_element == 0.0: p_value_list.append(0.0) else: p_value_list.append( cnb.cdf(data_element, pBackgroundModel[relative_distance][0], pBackgroundModel[relative_distance][1])) # for reason I do not understand atm the values needs to be inverted again, it seems it is not enough to do this in try/catch region p_value_list = np.array(p_value_list, dtype=np.float64) p_value_list = 1 - p_value_list # remove possible occuring nan with a p-value of 1 mask = np.isnan(p_value_list) mask_inf = np.isinf(p_value_list) p_value_list = np.array(p_value_list) mask = np.logical_or(mask, mask_inf) p_value_list[mask] = 1.0 return p_value_list
def compute_new_p_values(pData, pBackgroundModel, pPValue, pMergedLinesDict, pPeakInteractionsThreshold, pViewpointObj): accepted = {} accepted_lines = [] if isinstance(pPValue, float): for key in pData: if key in pBackgroundModel: log.debug('Recompute p-values. Old: {}'.format(pData[key][-3])) pData[key][-3] = 1 - cnb.cdf(float(pData[key][-1]), float(pBackgroundModel[key][0]), float(pBackgroundModel[key][1])) log.debug('new {}\n\n'.format(pData[key][-3])) if pData[key][-3] <= pPValue: if float(pData[key][-1]) >= pPeakInteractionsThreshold: accepted[key] = pData[key] target_content = pMergedLinesDict[key][0][:3] target_content[2] = pMergedLinesDict[key][-1][2] accepted_lines.append(target_content) else: log.debug('key not in background') elif isinstance(pPValue, dict): for key in pData: if key in pBackgroundModel: log.debug('Recompute p-values. Old: {}'.format(pData[key][-3])) pData[key][-3] = 1 - cnb.cdf(float(pData[key][-1]), float(pBackgroundModel[key][0]), float(pBackgroundModel[key][1])) log.debug('new {}\n\n'.format(pData[key][-3])) if pData[key][-3] <= pPValue[key]: if float(pData[key][-1]) >= pPeakInteractionsThreshold: accepted[key] = pData[key] target_content = pMergedLinesDict[key][0][:3] target_content[2] = pMergedLinesDict[key][-1][2] accepted_lines.append(target_content) else: log.debug('key not in background') return accepted, accepted_lines
def compute_p_values_mask(pGenomicDistanceDistributionsObsExp, pGenomicDistanceDistributionsKeyList, pPValuePreselection, pGenomicDistanceDistributionPosition, pResolution, pMinimumInteractionsThreshold, pObsExpThreshold, pQueue): try: true_values = [] if len(pGenomicDistanceDistributionsKeyList) == 0: pQueue.put(true_values) return float_dict = isinstance(pPValuePreselection, float) for i, key in enumerate(pGenomicDistanceDistributionsKeyList): data_obs_exp = np.array(pGenomicDistanceDistributionsObsExp[key]) # do not fit and not compute any p-value if all values on this distance are small than the pMinimumInteractionsThreshold mask = data_obs_exp >= pObsExpThreshold nbinom_parameters = fit_nbinom.fit(data_obs_exp) p_value = 1 - cnb.cdf(data_obs_exp[mask], nbinom_parameters['size'], nbinom_parameters['prob']) if float_dict: mask_distance = p_value <= pPValuePreselection else: key_genomic = int(key * pResolution) mask_distance = p_value <= pPValuePreselection[key_genomic] j = 0 for k, value in enumerate(mask): if value: if mask_distance[j]: true_values.append(pGenomicDistanceDistributionPosition[key][k]) j += 1 except Exception as exp: pQueue.put('Fail: ' + str(exp) + traceback.format_exc()) return pQueue.put(true_values) return