def testReRecoWflow(self): """ Test loading a ReReco like request into Workflow """ parentDset = "/rereco/parent-dataset/tier" rerecoSpec = {"RequestType": "ReReco", "InputDataset": "/rereco/input-dataset/tier", "Campaign": "any-campaign", "RequestName": "whatever_name", "DbsUrl": "a_dbs_url", "SiteWhitelist": ["CERN", "FNAL", "DESY"], "SiteBlacklist": ["FNAL"]} wflow = Workflow(rerecoSpec['RequestName'], rerecoSpec) self.assertEqual(wflow.getName(), rerecoSpec['RequestName']) self.assertEqual(wflow.getDbsUrl(), rerecoSpec['DbsUrl']) self.assertItemsEqual(wflow.getSitelist(), ["CERN", "DESY"]) self.assertItemsEqual(wflow.getCampaigns(), [rerecoSpec["Campaign"]]) self.assertEqual(wflow.getInputDataset(), rerecoSpec["InputDataset"]) self.assertItemsEqual(wflow.getPileupDatasets(), set()) self.assertFalse(wflow.hasParents()) self.assertEqual(wflow.getParentDataset(), "") self.assertEqual(wflow.getPrimaryBlocks(), {}) self.assertEqual(wflow.getSecondarySummary(), {}) self.assertEqual(wflow.getParentBlocks(), {}) self.assertEqual(wflow._getValue("NoKey"), None) self.assertEqual(len(wflow.getDataCampaignMap()), 1) wflow.setParentDataset(parentDset) self.assertEqual(wflow.getParentDataset(), parentDset) self.assertEqual(len(wflow.getDataCampaignMap()), 2)
def testGetChunkBlocks3(self): """ Test the `getChunkBlocks` method and especially the parent/child relationship """ primDict = {"block_A": {"blockSize": 1, "locations": ["Site_A"]}, "block_B": {"blockSize": 2, "locations": ["Site_B"]}} parentDict = {"parent_A": {"blockSize": 11, "locations": ["Site_A"]}, "parent_B": {"blockSize": 12, "locations": ["Site_B"]}, "parent_C": {"blockSize": 13, "locations": ["Site_A", "Site_B"]}} parentage = {"block_A": ["parent_B", "parent_D"], # parent_D has no replicas! "block_B": ["parent_A", "parent_C"]} wflow = Workflow("workflow_1", {"RequestType": "TaskChain", "InputDataset": "Dataset_name_XXX"}) # now set a parent wflow.setParentDataset("Parent_dataset_XXX") wflow.setPrimaryBlocks(primDict) wflow.setParentBlocks(parentDict) wflow.setChildToParentBlocks(parentage) blockChunks, sizeChunks = wflow.getChunkBlocks(1) self.assertEqual(len(blockChunks), 1) self.assertItemsEqual(blockChunks[0], {"block_A", "block_B", "parent_A", "parent_B", "parent_C"}) self.assertEqual(len(sizeChunks), 1) self.assertEqual(sizeChunks[0], 39) blockChunks, sizeChunks = wflow.getChunkBlocks(2) self.assertEqual(len(blockChunks), 2) self.assertItemsEqual(blockChunks[0], {"block_B", "parent_A", "parent_C"}) self.assertItemsEqual(blockChunks[1], {"block_A", "parent_B"}) self.assertEqual(len(sizeChunks), 2) self.assertEqual(sizeChunks[0], 26) self.assertEqual(sizeChunks[1], 13)
def testParentageRelationship(self): """ Test methods related to the primary and parent datasets and blocks """ primDict = {"block_A": {"blockSize": 1, "locations": ["Site_A"]}, "block_B": {"blockSize": 2, "locations": ["Site_B"]}} parentDict = {"parent_A": {"blockSize": 11, "locations": ["Site_A"]}, "parent_B": {"blockSize": 12, "locations": ["Site_B"]}, "parent_C": {"blockSize": 13, "locations": ["Site_A", "Site_B"]}} parentage = {"block_A": ["parent_B", "parent_D"], # parent_D has no replicas! "block_B": ["parent_A", "parent_C"]} wflow = Workflow("workflow_1", {"RequestType": "TaskChain", "InputDataset": "Dataset_name_XXX", "IncludeParents": True}) self.assertEqual(wflow.getParentDataset(), "") wflow.setParentDataset("Parent_dataset_XXX") self.assertEqual(wflow.getParentDataset(), "Parent_dataset_XXX") self.assertEqual(wflow.getPrimaryBlocks(), {}) wflow.setPrimaryBlocks(primDict) self.assertItemsEqual(list(wflow.getPrimaryBlocks()), ["block_A", "block_B"]) self.assertEqual(wflow.getParentBlocks(), {}) wflow.setParentBlocks(parentDict) self.assertItemsEqual(list(wflow.getParentBlocks()), ["parent_A", "parent_B", "parent_C"]) self.assertEqual(wflow.getChildToParentBlocks(), {}) wflow.setChildToParentBlocks(parentage) self.assertItemsEqual(wflow.getChildToParentBlocks(), parentage)
def testGetChunkBlocks1(self): """ Perform single chunk tests on the `getChunkBlocks` method. """ primDict = { "block_A": { "blockSize": 1, "locations": ["Site_A", "Site_B"] } } parentDict = { "parent_A": { "blockSize": 11, "locations": ["Site_A", "Site_B"] }, "parent_B": { "blockSize": 8, "locations": [] } } wflow = Workflow("workflow_1", { "RequestType": "TaskChain", "InputDataset": "Dataset_name_XXX" }) wflow.setPrimaryBlocks(primDict) blockChunks, sizeChunks = wflow.getChunkBlocks(1) self.assertEqual(len(blockChunks), 1) self.assertItemsEqual(blockChunks[0], {"block_A"}) self.assertEqual(len(sizeChunks), 1) self.assertEqual(sizeChunks[0], 1) # now set a parent wflow.setParentDataset("Parent_dataset_XXX") wflow.setParentBlocks(parentDict) blockChunks, sizeChunks = wflow.getChunkBlocks(1) self.assertEqual(len(blockChunks), 1) self.assertItemsEqual(blockChunks[0], {"block_A", "parent_A", "parent_B"}) self.assertEqual(len(sizeChunks), 1) self.assertEqual(sizeChunks[0], 20)
def testCampaignMap(self): """ Test setting the data campaign map for a TaskChain-like request """ parentDset = "/any/parent-dataset/tier" tChainSpec = {"RequestType": "TaskChain", "TaskChain": 4, "Campaign": "top-campaign", "RequestName": "whatever_name", "Task1": {"InputDataset": "/task1/input-dataset/tier", "Campaign": "task1-campaign", "IncludeParents": True}, "Task2": {"DataPileup": "/task2/data-pileup/tier"}, "Task3": {"MCPileup": "/task3/mc-pileup/tier", "Campaign": "task3-campaign"}, "Task4": {"MCPileup": "/task3/mc-pileup/tier", "Campaign": "task3-campaign"}, } wflow = Workflow(tChainSpec['RequestName'], tChainSpec) self.assertEqual(len(wflow.getDataCampaignMap()), 3) for dataIn in wflow.getDataCampaignMap(): if dataIn['type'] == "primary": self.assertItemsEqual(dataIn, {"type": "primary", "campaign": tChainSpec['Task1']['Campaign'], "name": tChainSpec['Task1']['InputDataset']}) elif dataIn['name'] == tChainSpec['Task2']['DataPileup']: self.assertItemsEqual(dataIn, {"type": "secondary", "campaign": tChainSpec['Campaign'], "name": tChainSpec['Task2']['DataPileup']}) else: self.assertItemsEqual(dataIn, {"type": "secondary", "campaign": tChainSpec['Task3']['Campaign'], "name": tChainSpec['Task3']['MCPileup']}) wflow.setParentDataset(parentDset) self.assertEqual(wflow.getParentDataset(), parentDset) self.assertEqual(len(wflow.getDataCampaignMap()), 4) for dataIn in wflow.getDataCampaignMap(): if dataIn['type'] == "parent": self.assertItemsEqual(dataIn, {"type": "parent", "campaign": tChainSpec['Task1']['Campaign'], "name": parentDset})