def read_dataframe(self, group: zarr.Group) -> Union[pd.DataFrame, np.ndarray]: columns = group.attrs.get('columns', None) if columns is None: columns = [col for col in group.array_keys() if col != '_index'] if group.attrs['data_type'] == 'data_frame': data = {col: self.read_series(group, col) for col in columns} _index = self.read_series(group, '_index') index = pd.Index(_index, name = group.attrs['index_name'], dtype = _index.dtype) df = pd.DataFrame(data = data, index = index) # if add columns = columns, the generation will be slow return df else: array = np.rec.fromarrays([self.read_series(group, col) for col in columns], names = columns) return array
def read_mapping(self, group: zarr.Group) -> dict: res_dict = {} if 'scalar' in group.attrs: res_dict.update(group.attrs['scalar']) for key in group.array_keys(): res_dict[key] = self.read_array(group, key) for key in group.group_keys(): sub_group = group[key] data_type = sub_group.attrs['data_type'] value = None if data_type == 'data_frame' or data_type == 'record_array': value = self.read_dataframe(sub_group) elif data_type == 'csr_matrix': value = self.read_csr(sub_group) else: assert data_type == 'dict' value = self.read_mapping(sub_group) res_dict[key] = value return res_dict