def tset_to_numpy(tset: TSet): cache_tset = tset.cache() test_data_object = cache_tset.get_data() data_items = [] for partition in test_data_object.get_partitions(): for consumer in partition.consumer(): data_items.append(consumer) return data_items
def create_source(self, source_function: SourceFunc, parallelism=0) -> TSet: if not isinstance(source_function, SourceFunc): raise Exception('source_function should be an instance of {}'.format(SourceFunc)) source_function_wrapper = SourceWrapper(source_function) java_src_ref = self.__entrypoint.createSource(cp.dumps(source_function_wrapper), parallelism) src_tset = TSet(java_src_ref, self) return src_tset
def parallelize_list(self, lst: list, parallelism=0) -> TSet: java_src_ref = self.__entrypoint.parallelize(lst, parallelism) src_tset = TSet(java_src_ref, self) return src_tset