Exemplo n.º 1
0
    def write_series(self, group: zarr.Group, name: str, array: np.ndarray,
                     data_type: str) -> None:
        if data_type == 'data_frame':
            if not is_categorical_dtype(
                    array) and name != '_index' and is_string_dtype(array):
                keywords = set(array)
                if len(keywords
                       ) <= array.size / 10.0:  # at least 10x reduction
                    array = pd.Categorical(array,
                                           categories=natsorted(keywords))

            if is_categorical_dtype(array):
                # write category keys
                categories = group.require_group('_categories')
                values = array.categories.values
                if isinstance(values[0], bytes):
                    values = np.array([x.decode() for x in values],
                                      dtype=object)
                dtype = str if values.dtype.kind == 'O' else values.dtype
                categories.create_dataset(name,
                                          data=values,
                                          shape=values.shape,
                                          chunks=calc_chunk(values.shape),
                                          dtype=dtype,
                                          compressor=COMPRESSOR,
                                          overwrite=True)
                # write codes
                codes_arr = group.create_dataset(name,
                                                 data=array.codes,
                                                 shape=array.codes.shape,
                                                 chunks=calc_chunk(
                                                     array.codes.shape),
                                                 dtype=array.codes.dtype,
                                                 compressor=COMPRESSOR,
                                                 overwrite=True)
                codes_arr.attrs['ordered'] = array.ordered

                return None

        dtype = str if array.dtype.kind == 'O' else array.dtype
        group.create_dataset(name,
                             data=array,
                             shape=array.shape,
                             chunks=calc_chunk(array.shape),
                             dtype=dtype,
                             compressor=COMPRESSOR,
                             overwrite=True)
    def write_unimodal_data(self, group: zarr.Group, name: str, data: UnimodalData, overwrite: bool = True) -> None:
        """ Write UnimodalData
        """
        sub_group = group.require_group(name, overwrite = overwrite)
        attrs_dict = {'data_type': 'UnimodalData', '_cur_matrix': data.current_matrix()}
        sub_group.attrs.update(**attrs_dict)

        self.write_dataframe(sub_group, 'barcode_metadata', data.barcode_metadata)
        self.write_dataframe(sub_group, 'feature_metadata', data.feature_metadata)

        if overwrite or data.matrices.is_dirty():
            self.write_mapping(sub_group, 'matrices', data.matrices, overwrite = overwrite)
        if overwrite or data.metadata.is_dirty():
            self.write_mapping(sub_group, 'metadata', data.metadata, overwrite = overwrite)
        if overwrite or data.barcode_multiarrays.is_dirty():
            self.write_mapping(sub_group, 'barcode_multiarrays', data.barcode_multiarrays, overwrite = overwrite)
        if overwrite or data.feature_multiarrays.is_dirty():
            self.write_mapping(sub_group, 'feature_multiarrays', data.feature_multiarrays, overwrite = overwrite)
Exemplo n.º 3
0
    def write_mapping(self,
                      group: zarr.Group,
                      name: str,
                      mapping: dict,
                      overwrite=True) -> None:
        sub_group = group.require_group(name, overwrite=overwrite)
        scalar_dict = sub_group.attrs.pop(
            'scalar', {})  # if overwrite == True, there should be no 'scalar'

        def _write_one_pair(key, value):
            if is_scalar(value):
                scalar_dict[key] = value
            elif isinstance(value, np.ndarray):
                self.write_array(sub_group, key, value)
            elif isinstance(value, pd.DataFrame):
                self.write_dataframe(sub_group, key, value)
            elif is_dict_like(value):
                self.write_mapping(sub_group, key, value)
            elif issparse(value):
                assert isinstance(value, csr_matrix)
                self.write_csr(sub_group, key, value)
            else:
                # assume value is either list or tuple, converting it to np.ndarray
                self.write_array(
                    sub_group, key,
                    value.astype(str)
                    if is_categorical_dtype(value) else np.array(value))

        if overwrite:
            for key, value in mapping.items():
                _write_one_pair(key, value)
        else:
            for key in mapping.deleted:
                if key in scalar_dict:
                    del scalar_dict[key]
                else:
                    del sub_group[key]
            for key in mapping.modified:
                _write_one_pair(key, mapping[key])

        attrs_dict = {'data_type': 'dict'}
        if len(scalar_dict) > 0:
            attrs_dict['scalar'] = scalar_dict
        sub_group.attrs.update(**attrs_dict)