Exemplo n.º 1
0
    def label_som_patient_genes(self, all_pw_map, patients):
        """Labels all patients with matching level of expression

        Parameters
        ----------
        all_pw_map: :obj:`list` of :obj:`networkx.classes.graph.Graph`
            a dictionary of all pathways we are using
        patients: :obj:`list`
            list of patients with mutation mappings
        """
        # check if we already stored all over/under expression pathway data if so restore them
        if self.som_pathways_save_valid(all_pw_map):
            return self.restore_som_pathways(all_pw_map)

        num_pat = len(patients)
        # if there are missing ones calculate all of them
        log('Somatic mutation patient pathway labeling')
        for ind, patient in enumerate(patients):
            pid = patient['pat_id']
            genes = patient['mutated_nodes']  # get uniprot gene ids from indices
            genes = np.array([genes])
            log(f'Checking patient for somatic mutation {ind + 1:4}/{num_pat} pid={pid}')
            label_mapper.mark_label_on_pathways('som', pid, all_pw_map, genes, self.label)

        self.save_som_pathways(all_pw_map)
        return all_pw_map
Exemplo n.º 2
0
    def label_cnv_patient_genes(self, all_pw_map, patients, cnv_type):
        """Labels all patients with matching level of expression

        Parameters
        ----------
        all_pw_map: :obj:`list` of :obj:`networkx.classes.graph.Graph`
            a dictionary of all pathways we are using
        patients: :obj:`dict`
            mapping of patients with mutation mappings
        """
        # check if we already stored all over/under expression pathway data if so restore them
        if self.cnv_pathways_save_valid(all_pw_map, cnv_type):
            return self.restore_cnv_pathways(all_pw_map, cnv_type)

        num_pat = len(patients)
        # if there are missing ones calculate all of them
        log(f'CNV {cnv_type} patient pathway labeling')
        ind = 1
        for pid, patient in patients.items():
            genes = np.array([patient[cnv_type]
                              ])  # get uniprot gene ids from indices
            logr(
                f'Checking patient for cnv {cnv_type} {ind:4}/{num_pat} pid={pid}'
            )
            label_mapper.mark_label_on_pathways(f'cnv_{cnv_type}', pid,
                                                all_pw_map, genes, self.label)
            ind += 1
        log()

        self.save_cnv_pathways(all_pw_map, cnv_type)
        return all_pw_map
Exemplo n.º 3
0
    def label_rppa_patient_genes(self, all_pw_map, pat_ids, GE, uni_ids):
        """Labels all patients with matching level of expression

        Parameters
        ----------
        all_pw_map: :obj:`list` of :obj:`networkx.classes.graph.Graph`
            a dictionary of all pathways we are using
        pat_ids: :obj:`list` of :obj:`str`
            list of patient ids
        GE: :obj:`numpy.ndarray`
            Gene expression data array in shape of genes by patients
        uni_ids: :obj:`numpy.ndarray`
            mapping from uniprot to gene
        """
        # check if we already stored all over/under expression pathway data if so restore them
        if self.rppa_pathways_save_valid(all_pw_map):
            return self.restore_rppa_pathways(all_pw_map)

        num_pat = pat_ids.shape[0]
        # if there are missing ones calculate all of them
        log('RPPA Over and under expressed patient pathway labeling')
        for ind, pid in enumerate(pat_ids):
            if self.args.continuous:
                gene_vals = (GE[..., pat_ids == pid]).flatten()  # over expressed genes
                logr(f'RPPA Checking patient for over-expressed  {ind + 1:4}/{num_pat} pid={pid}')
                label_mapper.mark_cont_label_on_pathways('oe', pid, all_pw_map, uni_ids, gene_vals)
                label_mapper.mark_extra_label_on_pathways(f'oe-{self.label}', pid, all_pw_map, 'oe', self.threshold)

                logr(f'RPPA Checking patient for under-expressed {ind + 1:4}/{num_pat} pid={pid}')
                label_mapper.mark_cont_label_on_pathways('ue', pid, all_pw_map, uni_ids, gene_vals)
                label_mapper.mark_extra_label_on_pathways(f'ue-{self.label}', pid, all_pw_map, 'ue', self.threshold)
            else:
                logr(f'RPPA Checking patient for rppa over-expressed  {ind + 1:4}/{num_pat} pid={pid}')
                gene_ind = (GE[..., pat_ids == pid] == 1).flatten()  # over expressed genes
                genes = uni_ids[gene_ind]  # get uniprot gene ids from indices
                label_mapper.mark_label_on_pathways('oe', pid, all_pw_map, genes, self.label)

                logr(f'RPPA Checking patient for rppa under-expressed {ind + 1:4}/{num_pat} pid={pid}')
                gene_ind = (GE[..., pat_ids == pid] == -1).flatten()  # under expressed genes
                genes = uni_ids[gene_ind]  # get uniprot gene ids from indices
                label_mapper.mark_label_on_pathways('ue', pid, all_pw_map, genes, self.label)
        log()

        self.save_rppa_pathways(all_pw_map)
        return all_pw_map
Exemplo n.º 4
0
    def label_patient_genes(self, all_pw_map, pat_ids, ge, uni_ids):
        """Labels all patients with matching level of expression

        Parameters
        ----------
        all_pw_map: :obj:`list` of :obj:`networkx.classes.graph.Graph`
            a dictionary of all pathways we are using
        pat_ids: :obj:`list` of :obj:`str`
            list of patient ids
        ge: :obj:`numpy.ndarray`
            Gene expression data array in shape of genes by patients
        uni_ids: :obj:`numpy.ndarray`
            Uniprot gene id mapping
        """
        # check if we already stored all over/under expression pathway data if so restore them
        if self.pathways_save_valid(all_pw_map):
            return self.restore_pathways(all_pw_map)

        num_pat = pat_ids.shape[0]
        # if there are missing ones calculate all of them
        log('Over and under expressed patient pathway labeling')
        for ind, pid in enumerate(pat_ids):
            log(f'Checking patient for over-expressed  {ind + 1:4}/{num_pat} pid={pid}'
                )
            gene_ind = (
                ge[..., pat_ids == pid] == 1).flatten()  # over expressed genes
            genes = uni_ids[gene_ind]  # get uniprot gene ids from indices
            label_mapper.mark_label_on_pathways('oe', pid, all_pw_map, genes,
                                                self.label)

            log(f'Checking patient for under-expressed {ind + 1:4}/{num_pat} pid={pid}'
                )
            gene_ind = (
                ge[...,
                   pat_ids == pid] == -1).flatten()  # under expressed genes
            genes = uni_ids[gene_ind]  # get uniprot gene ids from indices
            label_mapper.mark_label_on_pathways('ue', pid, all_pw_map, genes,
                                                self.label)

        self.save_pathways(all_pw_map)
        return all_pw_map