def _extract_mapping(self, cimpl_obj, cis_sites): # Convert CIS sites to frame format. cis_frame = CisSite.to_frame(cis_sites) # Convert to R representation for cimpl. chr_with_prefix = add_prefix(cis_frame['chromosome'], prefix='chr') r_base = importr('base') cis_frame_r = RDataFrame({ 'id': r_base.I(StrVector(cis_frame['id'])), 'chromosome': r_base.I(StrVector(chr_with_prefix)), 'scale': StrVector(cis_frame['scale']), 'start': IntVector(cis_frame['start']), 'end': IntVector(cis_frame['end']) }) cis_frame_r.rownames = StrVector(cis_frame['id']) # Retrieve cis matrix from cimpl. cis_matrix_r = self._cimpl.getCISMatrix(cimpl_obj, cis_frame_r) cis_matrix = dataframe_to_pandas(cis_matrix_r) # Extract scale information from cis matrix. scale_cols = [c for c in cis_matrix.columns if c.startswith('X')] cis_matrix_scales = cis_matrix[['id'] + scale_cols] # Melt matrix into long format. mapping = pd.melt(cis_matrix_scales, id_vars=['id']) mapping = mapping[['id', 'value']] mapping = mapping.rename(columns={ 'id': 'insertion_id', 'value': 'cis_id' }) # Split cis_id column into individual entries (for entries # with multiple ids). Then drop any empty rows, as these # entries are empty cells in the matrix. mapping = mapping.ix[mapping['cis_id'] != ''] mapping = expand_column(mapping, col='cis_id', delimiter='|') mapping_dict = { ins_id: set(grp['cis_id']) for ins_id, grp in mapping.groupby('insertion_id') } return mapping_dict