def store_shard(self, ds_in: Iterable, ds_out: Dataset, offset: int, token=None): """ Takes a shard of iteratable ds_in, compute and stores in DatasetView """ def _func_argd(item): if isinstance(item, DatasetView) or isinstance(item, Dataset): item = item.numpy() result = self.call_func( 0, item ) # If the iterable obtained from iterating ds_in is a list, it is not treated as list return result ds_in = list(ds_in) results = self.map( _func_argd, ds_in, ) results = self._unwrap(results) results = self.map(lambda x: self._flatten_dict(x, schema=self.schema), results) results = list(results) results = self._split_list_to_dicts(results) results_values = list(results.values()) if len(results_values) == 0: return 0 n_results = len(results_values[0]) if n_results == 0: return 0 additional = max(offset + n_results - ds_out.shape[0], 0) ds_out.append_shape(additional) self.upload( results, ds_out[offset:offset + n_results], token=token, ) return n_results
def store_shard(self, ds_in: Iterable, ds_out: Dataset, offset: int, token=None): """ Takes a shard of iteratable ds_in, compute and stores in DatasetView """ def _func_argd(item): return self._func(item, **self.kwargs) ds_in = list(ds_in) results = self.map( _func_argd, ds_in, ) results = self._unwrap(results) results = self.map(lambda x: self._flatten_dict(x, schema=self.schema), results) results = list(results) results = self._split_list_to_dicts(results) results_values = list(results.values()) if len(results_values) == 0: return 0 n_results = len(results_values[0]) if n_results == 0: return 0 additional = max(offset + n_results - ds_out.shape[0], 0) ds_out.append_shape(additional) self.upload( results, ds_out[offset:offset + n_results], token=token, ) return n_results