def merge( datastreams_and_ns: Tuple[Union[Datastream[T], Tuple[Datastream[T], int]], ...] ) -> Datastream[T]: ''' Creates a merged datastream where samples are drawn one at a time from each underlying datastream (also known as "interleave"). Optionally you can define the number of drawn samples per ``Datastream``. >>> datastream1 = Datastream(Dataset.from_subscriptable([1, 1])) >>> datastream2 = Datastream(Dataset.from_subscriptable([2, 2])) >>> datastream3 = Datastream(Dataset.from_subscriptable([3, 3, 3, 3])) >>> merged_datastream = Datastream.merge([ ... (datastream1, 1), ... (datastream2, 1), ... (datastream3, 2), ... ]) >>> list(merged_datastream) [1, 2, 3, 3, 1, 2, 3, 3] ''' datastreams_and_ns = [ x if type(x) is tuple else (x, 1) for x in datastreams_and_ns ] return Datastream( Dataset.concat( [datastream.dataset for datastream, n in datastreams_and_ns]), MergeSampler(*zip(*[(datastream.sampler, datastream.dataset, n) for (datastream, n) in datastreams_and_ns])), )
def merge( datastreams_and_ns: Tuple[Union[Datastream[T], Tuple[Datastream[T], int]], ...] ) -> Datastream[T]: ''' Merge multiple datastreams by interleaving them. Optionally you can define different lengths per ``Datastream``. .. highlight:: python .. code-block:: python Datastream.merge([ (datastream1, 2), (datastream2, 1), (datastream3, 1), ]) ''' datastreams_and_ns = [ x if type(x) is tuple else (x, 1) for x in datastreams_and_ns ] return Datastream( Dataset.concat( [datastream.dataset for datastream, n in datastreams_and_ns]), MergeSampler(*zip(*[(datastream.sampler, datastream.dataset, n) for (datastream, n) in datastreams_and_ns])), )
def test_concat_merge(): dataset = Dataset.concat([ Dataset.from_subscriptable([1, 2]), Dataset.from_subscriptable([1, 3, 5]), ]) datastream = Datastream.merge([ Datastream(dataset), Datastream( dataset.subset( lambda df: [index < 3 for index in range(len(df))])), ]) assert len( dataset.subset( lambda df: [index < 3 for index in range(len(df))])) == 3 assert len(list(datastream)) == 6
def test_combine_concat_merge(): dataset = Dataset.concat([ Dataset.zip([ Dataset.from_subscriptable([1]), Dataset.from_subscriptable([2]), ]), Dataset.combine([ Dataset.from_subscriptable([3, 3]), Dataset.from_subscriptable([4, 4, 4]), ]), ]) datastream = Datastream.merge([ Datastream(dataset), Datastream( Dataset.zip([ Dataset.from_subscriptable([5]), Dataset.from_subscriptable([6]), ])), ]) assert len(list(datastream)) == 2