def __init__(self): # Create a fake dataset self.dataset = Dataset.from_batch( { "text_a": [ "Before the actor slept, the senator ran.", "The lawyer knew that the judges shouted.", "If the actor slept, the judge saw the artist.", "The lawyers resigned, or the artist slept.", ], "text_b": [ "The actor slept.", "The judges shouted.", "The actor slept.", "The artist slept.", ], "label": [0, 0, 1, 1], "z": [1, 0, 1, 0], "fast": [False, True, True, False], }, identifier=Identifier(_name="MockDataset", version="2.0"), ) # Keep a copy of the original self.original_dataset = deepcopy(self.dataset) assert len(self.dataset) == 4
def __init__(self): # Create a fake batch of data self.batch = { "text": [ "The man is walking.", "The man is running.", "The woman is sprinting.", "The woman is resting.", "The hobbit is flying.", "The hobbit is swimming.", ], "label": [0, 0, 1, 1, 0, 0], "z": [1, 0, 1, 0, 1, 0], "fast": [False, True, True, False, False, False], "metadata": [ {"source": "real"}, {"source": "real"}, {"source": "real"}, {"source": "real"}, {"source": "fictional"}, {"source": "fictional"}, ], } # Create a fake dataset self.dataset = Dataset.from_batch( self.batch, identifier=Identifier(_name="MockDataset", version="1.0"), ) # Keep a copy of the original self.original_dataset = deepcopy(self.dataset) assert len(self.dataset) == 6
def test_from_batch(self): # Build a dataset from a batch dataset = Dataset.from_batch( { "a": [1, 2, 3], "b": [True, False, True], "c": ["x", "y", "z"], "d": [{ "e": 2 }, { "e": 3 }, { "e": 4 }], }, identifier=Identifier(_name="MyDataset"), ) self.assertEqual(set(dataset.column_names), {"a", "b", "c", "d", "index"}) self.assertEqual(len(dataset), 3)