示例#1
0
文件: cimpl.py 项目: rajithbt/pyim
    def _extract_mapping(self, cimpl_obj, cis_sites):
        # Convert CIS sites to frame format.
        cis_frame = CisSite.to_frame(cis_sites)

        # Convert to R representation for cimpl.
        chr_with_prefix = add_prefix(cis_frame['chromosome'], prefix='chr')

        r_base = importr('base')
        cis_frame_r = RDataFrame({
            'id':
            r_base.I(StrVector(cis_frame['id'])),
            'chromosome':
            r_base.I(StrVector(chr_with_prefix)),
            'scale':
            StrVector(cis_frame['scale']),
            'start':
            IntVector(cis_frame['start']),
            'end':
            IntVector(cis_frame['end'])
        })
        cis_frame_r.rownames = StrVector(cis_frame['id'])

        # Retrieve cis matrix from cimpl.
        cis_matrix_r = self._cimpl.getCISMatrix(cimpl_obj, cis_frame_r)
        cis_matrix = dataframe_to_pandas(cis_matrix_r)

        # Extract scale information from cis matrix.
        scale_cols = [c for c in cis_matrix.columns if c.startswith('X')]
        cis_matrix_scales = cis_matrix[['id'] + scale_cols]

        # Melt matrix into long format.
        mapping = pd.melt(cis_matrix_scales, id_vars=['id'])
        mapping = mapping[['id', 'value']]
        mapping = mapping.rename(columns={
            'id': 'insertion_id',
            'value': 'cis_id'
        })

        # Split cis_id column into individual entries (for entries
        # with multiple ids). Then drop any empty rows, as these
        # entries are empty cells in the matrix.
        mapping = mapping.ix[mapping['cis_id'] != '']
        mapping = expand_column(mapping, col='cis_id', delimiter='|')

        mapping_dict = {
            ins_id: set(grp['cis_id'])
            for ins_id, grp in mapping.groupby('insertion_id')
        }

        return mapping_dict