def get_mutation_statistics(self, disease_name, mutation_type): study_ids = self._get_studies_from_disease_name(disease_name) if not study_ids: raise DiseaseNotFoundException gene_list_str = self._get_gene_list_str() mutation_dict = {} num_case = 0 for study_id in study_ids: num_case += cbio_client.get_num_sequenced(study_id) mutations = cbio_client.get_mutations(study_id, gene_list_str, mutation_type) for g, a in zip(mutations['gene_symbol'], mutations['amino_acid_change']): mutation_effect = self.find_mutation_effect(g, a) if mutation_effect is None: mutation_effect_key = 'other' else: mutation_effect_key = mutation_effect try: mutation_dict[g][0] += 1.0 mutation_dict[g][1][mutation_effect_key] += 1 except KeyError: effect_dict = {'activate': 0.0, 'deactivate': 0.0, 'other': 0.0} effect_dict[mutation_effect_key] += 1.0 mutation_dict[g] = [1.0, effect_dict] # Normalize entries for k, v in mutation_dict.iteritems(): mutation_dict[k][0] /= num_case effect_sum = numpy.sum(mutation_dict[k][1].values()) mutation_dict[k][1]['activate'] /= effect_sum mutation_dict[k][1]['deactivate'] /= effect_sum mutation_dict[k][1]['other'] /= effect_sum return mutation_dict
def get_mutation_statistics(self, disease_name, mutation_type): study_ids = self._get_studies_from_disease_name(disease_name) if not study_ids: raise DiseaseNotFoundException gene_list_str = self._get_gene_list_str() mutation_dict = {} num_case = 0 for study_id in study_ids: num_case += cbio_client.get_num_sequenced(study_id) mutations = cbio_client.get_mutations(study_id, gene_list_str, mutation_type) for g, a in zip(mutations['gene_symbol'], mutations['amino_acid_change']): mutation_effect = self.find_mutation_effect(g, a) if mutation_effect is None: mutation_effect_key = 'other' else: mutation_effect_key = mutation_effect try: mutation_dict[g][0] += 1.0 mutation_dict[g][1][mutation_effect_key] += 1 except KeyError: effect_dict = {'activate': 0.0, 'deactivate': 0.0, 'other': 0.0} effect_dict[mutation_effect_key] += 1.0 mutation_dict[g] = [1.0, effect_dict] # Normalize entries for k, v in mutation_dict.iteritems(): mutation_dict[k][0] /= num_case effect_sum = numpy.sum(mutation_dict[k][1].values()) mutation_dict[k][1]['activate'] /= effect_sum mutation_dict[k][1]['deactivate'] /= effect_sum mutation_dict[k][1]['other'] /= effect_sum return mutation_dict
def test_get_num_sequenced(): num_case = cbio_client.get_num_sequenced('paad_tcga') assert (num_case > 0)
def test_get_num_sequenced(): num_case = cbio_client.get_num_sequenced('paad_tcga') assert(num_case > 0)