def complete_mutexpairs(self, genepairs, p=0.05, maxOverlap=200, parallel_compute_number=0): print "Generating list of", len(genepairs), " mutually exclusive hypotheses to test on permutation matrices..." # Generate condition functions after analyzing each gene pair for Co-occurrence/min mutex_set_condition_function_list = [] # Generate list of condition functions to test the permutation matrix for for genepair in genepairs: ConditionFunction = Condition(None) condition_dict = {} condition_dict['Genes'] = tuple(genepair) condition_dict['Overlap'] = len(set.intersection(*[self.geneToCases_orig[gene] for gene in condition_dict['Genes']])) condition_dict['Mutex'] = True ConditionFunction.set_params([condition_dict]) mutex_set_condition_function_list.append((genepair, ConditionFunction)) print "Done. Now, calulating p-values of hypotheses..." # Generate co-occurring pairs if parallel_compute_number: mutex_pair_to_pvalue = pac.parallel_compute_new(self.set_to_pvalue, [mutex_set_condition_function_list], mutex_set_condition_function_list, 0, pac.partition_inputs, {0: pac.combine_dictionaries}, number=parallel_compute_number, procnumber=parallel_compute_number) else: mutex_pair_to_pvalue = self.set_to_pvalue(mutex_set_condition_function_list) print "Done. Now, finding mutually exclusive pairs" # Generate dictionary for each pair. Optionally analyze each mutex set as well. mpairsdict = {} mgenedict = {} for genepair in mutex_pair_to_pvalue: if mutex_pair_to_pvalue[genepair] < p: mstats = mex.analyze_mutex_set_new(self.numCases, self.geneToCases_orig, self.patientToGenes_orig, geneset=tuple(genepair)) if mstats['Overlap'] <= maxOverlap: mstats['PermutationProbability'] = mutex_pair_to_pvalue[genepair] mpairsdict[genepair] = mstats gene1, gene2 = tuple(genepair) if gene1 not in mgenedict: mgenedict[gene1] = set() mgenedict[gene1].add(gene2) else: mgenedict[gene1].add(gene2) if gene2 not in mgenedict: mgenedict[gene2] = set() mgenedict[gene2].add(gene1) else: mgenedict[gene2].add(gene1) return mpairsdict, mgenedict
def complete_cooccurpairs(self, genepairs, p=0.05, minCooccur=1, min_cooccurrence_ratio=0.0, parallel_compute_number=0, compute_scores=True): """ :param genepairs: :param cprob: :param minCooccur: :param min_cooccurrence_ratio: :param parallel_compute_number: :return: cpairsdict, cgenedict """ print "Generating list of", len(genepairs), " co-occurring hypotheses to test on permutation matrices..." # Generate condition functions after analyzing each gene pair for Co-occurrence/min cooccur_set_condition_function_list = [] # Generate list of condition functions to test the permutation matrix for for genepair in genepairs: ConditionFunction = Condition(None) condition_dict = {} condition_dict['Genes'] = tuple(genepair) condition_dict['Overlap'] = len(set.intersection(*[self.geneToCases_orig[gene] for gene in condition_dict['Genes']])) condition_dict['Mutex'] = False ConditionFunction.set_params([condition_dict]) cooccur_set_condition_function_list.append((genepair, ConditionFunction)) print "Done. Now, calulating p-values of hypotheses..." # Generate co-occurring pairs if parallel_compute_number: cooccur_pair_to_pvalue = pac.parallel_compute_new(self.set_to_pvalue, [cooccur_set_condition_function_list], cooccur_set_condition_function_list, 0, pac.partition_inputs, {0: pac.combine_dictionaries}, number=parallel_compute_number, procnumber=parallel_compute_number) else: cooccur_pair_to_pvalue = self.set_to_pvalue(cooccur_set_condition_function_list) print "Done. Now, finding co-occurring pairs" # Generate dictionary for each pair. Optionally analyze each cooccur set as well. cpairsdict = {} cgenedict = {} for genepair in cooccur_pair_to_pvalue: if cooccur_pair_to_pvalue[genepair] < p: cstats = mex.analyze_cooccur_set_new(self.numCases, self.geneToCases_orig, self.patientToGenes_orig, geneset=tuple(genepair), compute_scores=compute_scores) if cstats['Overlap'] >= minCooccur and cstats['CooccurrenceRatio'] >= min_cooccurrence_ratio: cstats['PermutationProbability'] = cooccur_pair_to_pvalue[genepair] cpairsdict[genepair] = cstats gene1, gene2 = tuple(genepair) if gene1 not in cgenedict: cgenedict[gene1] = set() cgenedict[gene1].add(gene2) else: cgenedict[gene1].add(gene2) if gene2 not in cgenedict: cgenedict[gene2] = set() cgenedict[gene2].add(gene1) else: cgenedict[gene2].add(gene1) return cpairsdict, cgenedict