Пример #1
0
    def get_mutation_statistics(self, disease_name, mutation_type):
        study_ids = self._get_studies_from_disease_name(disease_name)
        if not study_ids:
            raise DiseaseNotFoundException
        gene_list = self._get_gene_list()
        mutation_dict = {}
        num_case = 0
        for study_id in study_ids:
            num_case += cbio_client.get_num_sequenced(study_id)
            mutations = cbio_client.get_mutations(study_id, gene_list,
                                                  mutation_type)
            for g, a in zip(mutations['gene_symbol'],
                            mutations['amino_acid_change']):
                mutation_effect = self.find_mutation_effect(g, a)
                if mutation_effect is None:
                    mutation_effect_key = 'other'
                else:
                    mutation_effect_key = mutation_effect
                try:
                    mutation_dict[g][0] += 1.0
                    mutation_dict[g][1][mutation_effect_key] += 1
                except KeyError:
                    effect_dict = {'activate': 0.0, 'deactivate': 0.0,
                                   'other': 0.0}
                    effect_dict[mutation_effect_key] += 1.0
                    mutation_dict[g] = [1.0, effect_dict]
        # Normalize entries
        for k, v in mutation_dict.items():
            mutation_dict[k][0] /= num_case
            effect_sum = numpy.sum(list(v[1].values()))
            mutation_dict[k][1]['activate'] /= effect_sum
            mutation_dict[k][1]['deactivate'] /= effect_sum
            mutation_dict[k][1]['other'] /= effect_sum

        return mutation_dict
Пример #2
0
def test_get_num_sequenced():
    num_case = cbio_client.get_num_sequenced('paad_tcga')
    assert(num_case > 0)
Пример #3
0
    def get_mutation_statistics(self, disease_name, mutation_type):
        study_ids = self._get_studies_from_disease_name(disease_name)
        if not study_ids:
            raise DiseaseNotFoundException
        gene_list = self._get_gene_list()
        mutation_dict = {}
        num_case = 0
        logger.info("Found %d studies and a gene_list of %d elements."
                    % (len(study_ids), len(gene_list)))
        mut_patt = re.compile("([A-Z]+)(\d+)([A-Z]+)")
        for study_id in study_ids:
            try:
                num_case += cbio_client.get_num_sequenced(study_id)
            except Exception as e:
                continue

            mutations = cbio_client.get_mutations(study_id, gene_list,
                                                  mutation_type)

            if not mutations['gene_symbol']:
                logger.info("Found no genes for %s." % study_id)
                continue

            # Create agents from the results of the search.
            agent_dict = {}
            for g, a in zip(mutations['gene_symbol'],
                            mutations['amino_acid_change']):
                m = mut_patt.match(a)
                if m is None:
                    logger.warning("Unrecognized residue: %s" % a)
                    continue
                res_from, pos, res_to = m.groups()
                try:
                    mut = MutCondition(pos, res_from, res_to)
                except InvalidResidueError:
                    logger.warning("Invalid residue: %s or %s."
                                   % (res_from, res_to))
                    continue
                ag = Agent(g, db_refs={'HGNC': hgnc_client.get_hgnc_id(g)},
                           mutations=[mut])
                if g not in agent_dict.keys():
                    agent_dict[g] = []
                agent_dict[g].append(ag)
            if not agent_dict:
                return {}

            # Get the most mutated gene.
            top_gene = max(agent_dict.keys(),
                           key=lambda k: len(agent_dict[k]))
            logger.info("Found %d genes, with top hit %s for %s."
                        % (len(agent_dict.keys()), top_gene, study_id))

            if top_gene not in mutation_dict.keys():
                effect_dict = {'activate': 0, 'deactivate': 0,
                               'other': 0}
                mutation_dict[top_gene] = {'count': 0, 'effects': effect_dict,
                                           'total_effects': 0, 'agents': []}
            for agent in agent_dict[top_gene]:
                # Get the mutations effects for that gene.
                mutation_effect = self.find_mutation_effect(agent)
                if mutation_effect is None:
                    mutation_effect_key = 'other'
                else:
                    mutation_effect_key = mutation_effect
                mutation_dict[top_gene]['count'] += 1
                mutation_dict[top_gene]['effects'][mutation_effect_key] += 1
                mutation_dict[top_gene]['agents'].append(agent)

        # Calculate normalized entries
        for k, v in mutation_dict.items():
            mutation_dict[k]['fraction'] = v['count'] / num_case
            for eff in v['effects'].copy().keys():
                v['effects'][eff + '_percent'] = v['effects'][eff] / v['count']

        return mutation_dict
Пример #4
0
def test_get_num_sequenced():
    num_case = cbio_client.get_num_sequenced('paad_tcga')
    assert num_case > 0