def test_model_correllation_2(self): """ This test is to try combining Constraints from two different DataSets. This uses the new merge_output keyword option. """ mock_file_1 = ["/red_echidna"] mock_file_2 = ["/blue_echidna"] input_pattern = "/%colour%_%animal%" # Create our mock DataSets. with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_file_1 test_ds_1 = PatternDataSet(input_pattern) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_file_2 test_ds_2 = PatternDataSet(input_pattern) # A ProcessUnit which merges the Constraint on colour. the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%animal%_%colour%.file", "echo", merge_output=["colour"]) output_ds = the_process.execute(simulate=False) outfiles = [metafile for metafile in output_ds.files] self.assertEqual(len(outfiles), 1) self.assertEqual(outfiles[0].full_path, "/tmp/echidna_red-blue.file")
def test_alias_constraints(self): """ The PatternDataSet should be able to alias Constraints. This means when asked to get files for the aliased Constraint, it should return files from another Constraints. """ with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: mock_glob.return_value = self.mock_file_list pattern_ds = PatternDataSet(self.mock_file_pattern) # Apply the constraint alias - when asked for hue, # it will give you colour. pattern_ds.alias_constraint("colour", "hue") found_files = pattern_ds.get_files({ 'hue': 'red', 'animal': 'kangaroo' }) self.assertEqual(1, len(found_files)) self.assertEqual("/fake/red_kangaroo.txt", found_files[0].full_path)
def test_change_mapping(self): """ Test using multiple input datasets, like if you were calculating a change. """ with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: fake_file_1 = '/a/fake/file_1956_red.nc' fake_file_2 = '/a/fake/file_1981_red.nc' mock_glob.return_value = [fake_file_1, fake_file_2] first_pattern_ds = PatternDataSet("/a/fake/file_%date%_%colour%.nc", set([Constraint('date', ['1956'])])) second_pattern_ds = PatternDataSet("/a/fake/file_%date%_%colour%.nc", set([Constraint('date', ['1981'])])) # Overwrite the valid combinations for these mock datasets. first_pattern_ds.valid_combinations = set([frozenset([Constraint('colour', ['red']), Constraint('date', ['1956'])])]) second_pattern_ds.valid_combinations = set([frozenset([Constraint('colour', ['red']), Constraint('date', ['1981'])])]) the_process_unit = ProcessUnit([first_pattern_ds, second_pattern_ds], "/a/final/output/file_%start_date%_%end_date%_%colour%.txt", 'echo', map_dict={'start_date': ('date', 0), 'end_date': ('date', 1)}) ds_result = the_process_unit.execute(simulate=True) outfiles = [file_thing for file_thing in ds_result.files] self.assertEqual(len(outfiles), 1) expected_string = self.script_header + "mkdir -p /a/final/output\necho /a/fake/file_1956_red.nc /a/fake/file_1981_red.nc /a/final/output/file_1956_1981_red.txt\n" self.assertEqual(expected_string, the_process_unit.scheduler.job.to_str())
def test_getfiles(self): """ Ensure that files are correctly returned using 'get_files'. """ with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: # Add the mock fake glob function. mock_glob.return_value = self.mock_file_list test_patternds = PatternDataSet(self.mock_file_pattern) found_files = test_patternds.get_files({'colour': 'green', 'animal': 'echidna'}) expected_files = [MetaFile('green_echidna.txt', '/fake', {})] self.assertEqual(found_files, expected_files) mock_glob.assert_called_once_with()
def compute(self): # Determine file path patterns = self.get_filepath_patterns() logger.debug('Using pattern %s' % patterns) # Create constraints constraints = [ Constraint(attribute, [values]) for attribute, values in self.constraints.iteritems() ] # Add user contraints user_constraints = self.getInputFromPort("added_constraints") if user_constraints: constraints.extend(user_constraints) else: raise ModuleError( self, "No constraints set on DataSet - you can not run a workflow on the entire DataSet" ) # Create dataset based on file search path and contraints dataset = PatternDataSet(patterns, constraints) if not dataset.files: error_string = "No files found for this dataset with constraints: {}".format( constraints) error_string = error_string.replace('],', '],\n') logger.error(error_string) raise ModuleError(self, error_string) self.setResult('out_dataset', dataset)
def setUp(self): """ Set up some basic DataSets.""" mock_file_pattern_1 = "/fake/%food%_%animal%.file" mock_file_list_1 = ["/fake/pizza_moose.file", "/fake/pizza_rabbit.file", "/fake/chocolate_bilby.file","/fake/chocolate_rabbit.file"] mock_file_pattern_2 = "/fake/%animal%.file" mock_file_list_2 = ["/fake/moose.file", "/fake/rabbit.file", "/fake/bilby.file"] with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_file_list_1 self.test_patternds_1 = PatternDataSet(mock_file_pattern_1) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_file_list_2 self.test_patternds_2 = PatternDataSet(mock_file_pattern_2)
def test_build_glob_patterns(self): """ When constraints are given in the constructor, restrict the patterns on the fs to glob. """ given_cons = set([Constraint('colour', ['pink', 'green'])]) pattern_ds = PatternDataSet(self.mock_file_pattern, given_cons) expected_patterns = ['/fake/pink_*.txt', '/fake/green_*.txt'] self.assertItemsEqual(pattern_ds.glob_patterns, expected_patterns)
def test_noconstraints(self): ''' The PatternDataSet should glob the FS to find files. ''' with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: mock_glob.return_value = self.mock_file_list test_patternds = PatternDataSet(self.mock_file_pattern) self.assertEqual(test_patternds.constraints, self.fake_constraints) # Check that we only try to glob the fs once. mock_glob.assert_called_once_with()
def test_cons_from_pattern(self): """ The PatternDataSet should build a complete set of constraints by globbing on the file system.""" with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: mock_glob.return_value = self.mock_file_list pattern_ds = PatternDataSet(self.mock_file_pattern) expected_cons = self.fake_constraints self.assertEqual(pattern_ds.constraints, self.fake_constraints)
def test_model_correllation(self): with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = self.mock_obs_files test_obsds = PatternDataSet(self.observational_pattern) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = self.mock_model_files test_model_ds = PatternDataSet(self.model_pattern) output_pattern = "/%variable%_%obs_model%_%model%.nc" our_process = ProcessUnit([test_obsds, test_model_ds], output_pattern, "echo") output = our_process.execute() all_outs = [thing.full_path for thing in output.files] good_names = ["/tas_HadISST_BadModel.nc", "/tas_AWAP_BadModel.nc", "/tas_HadISST_GoodModel.nc", "/tas_AWAP_GoodModel.nc"] self.assertItemsEqual(good_names, all_outs)
def setUp(self): """ Makes a mock PatternDataSet. """ self.mock_file_list = ['/a/fake_1/file_1/pattern_1'] with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: #Add the mock fake glob function. mock_glob.return_value = self.mock_file_list self.a_pattern_ds = PatternDataSet('/a/%fake%/%file%/%pattern%') self.script_header = "#!/bin/sh\nset -e\n\nmodule purge\nexport CWSL_CTOOLS={}\nexport PYTHONPATH=$PYTHONPATH:{}/pythonlib\n"\ .format(configuration.cwsl_ctools_path, configuration.cwsl_ctools_path)
def test_changefile_generation(self): """ This test works to cover the common case when you want to calculate changes. For example, comparing two dataset by time. """ mock_files_1 = ["/model1_1986_rain", "/model2_1986_rain", "/model3_1986_rain", "/model4_1986_rain", "/model1_1986_temp"] mock_files_2 = ["/model1_2015_rain", "/model2_2015_rain", "/model3_2015_rain", "/model4_2015_rain", "/model1_2015_temp"] input_pattern = "/%model%_%date%_%variable%" # Create our mock DataSets. with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_files_1 test_ds_1 = PatternDataSet(input_pattern) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_files_2 test_ds_2 = PatternDataSet(input_pattern) # A ProcessUnit which merges the Constraint on colour. the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%model%_%date%_%variable%", "echo", merge_output=["date"]) output_ds = the_process.execute(simulate=True) outfile_names = [metafile.full_path for metafile in output_ds.files] expected_files = ["/tmp/model1_1986-2015_rain", "/tmp/model2_1986-2015_rain", "/tmp/model3_1986-2015_rain", "/tmp/model4_1986-2015_rain", "/tmp/model1_1986-2015_temp"] self.assertItemsEqual(expected_files, outfile_names)
def compute(self): cons_list = [ 'model', 'experiment', 'variable', 'season_number', 'region' ] in_cons = set([ Constraint(cons_name, [self.getInputFromPort(cons_name)]) for cons_name in cons_list if self.getInputFromPort(cons_name) ]) file_pattern = "/g/data/ua6/CAWCR_CVC_processed/staging/users/CWSL/SDM/COD/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%" output_ds = PatternDataSet(file_pattern, in_cons) self.setResult('out_dataset', output_ds)
def test_alias_constraints(self): """ The PatternDataSet should be able to alias Constraints. This means when asked to get files for the aliased Constraint, it should return files from another Constraints. """ with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = self.mock_file_list pattern_ds = PatternDataSet(self.mock_file_pattern) # Apply the constraint alias - when asked for hue, # it will give you colour. pattern_ds.alias_constraint("colour", "hue") found_files = pattern_ds.get_files({'hue': 'red', 'animal': 'kangaroo'}) self.assertEqual(1, len(found_files)) self.assertEqual("/fake/red_kangaroo.txt", found_files[0].full_path)
def compute(self): cons_list = [ 'model', 'experiment', 'variable', 'season_number', 'region' ] in_cons = set([ Constraint(cons_name, [self.getInputFromPort(cons_name)]) for cons_name in cons_list if self.getInputFromPort(cons_name) ]) file_pattern = "/home/548/teb548/cod/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%" output_ds = PatternDataSet(file_pattern, in_cons) self.setResult('out_dataset', output_ds)
def test_getfiles(self): """ Ensure that files are correctly returned using 'get_files'. """ with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: # Add the mock fake glob function. mock_glob.return_value = self.mock_file_list test_patternds = PatternDataSet(self.mock_file_pattern) found_files = test_patternds.get_files({ 'colour': 'green', 'animal': 'echidna' }) expected_files = [ MetaFile('green_echidna.txt', '/fake', { 'colour': 'green', 'animal': 'echidna' }) ] self.assertItemsEqual(found_files, expected_files) mock_glob.assert_called_once_with()
def test_model_correllation_3(self): """ This test is to try combining multiple DataSets, each with many files. """ mock_files_1 = ["/red_echidna", "/blue_echidna", "/green_echidna"] mock_files_2 = ["/blue_echidna", "/red_echidna", "/green_echidna"] input_pattern = "/%colour%_%animal%" # Create our mock DataSets. with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_files_1 test_ds_1 = PatternDataSet(input_pattern) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_files_2 test_ds_2 = PatternDataSet(input_pattern) # A ProcessUnit which merges the Constraint on colour. the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%animal%_%colour%.file", "echo", merge_output=["colour"]) output_ds = the_process.execute(simulate=True) outfile_names = [metafile.full_path for metafile in output_ds.files] expected_outfiles = ["/tmp/echidna_red-red.file", "/tmp/echidna_red-blue.file", "/tmp/echidna_red-green.file", "/tmp/echidna_blue-red.file", "/tmp/echidna_blue-blue.file", "/tmp/echidna_blue-green.file", "/tmp/echidna_green-red.file", "/tmp/echidna_green-blue.file", "/tmp/echidna_green-green.file"] self.assertItemsEqual(expected_outfiles, outfile_names)
def setUp(self): # This creates a mock pattern dataset that returns a single file. test_cons = set([Constraint('fake', ['fake_1']), Constraint('file', ['file_1']), Constraint('pattern', ['pattern_1'])]) self.a_pattern_ds = PatternDataSet('/a/%fake%/%file%/%pattern%', constraint_set=test_cons) # Mock the get_files method - we will only return a single, mock file object. mock_file = mock.MagicMock() mock_file.full_path = 'test_file1' mock_file.__str__.return_value = 'test_file1' mock_file.all_atts = {"fake": "fake_1", "file": "file_1", "pattern": "pattern_1"} self.a_pattern_ds.get_files = mock.Mock(return_value=[mock_file]) # Create a valid set of contraints for the mock. self.a_pattern_ds.valid_combinations = set([frozenset(test_cons)]) # Constant header for the job scripts. self.script_header = "#!/bin/sh\nset -e\n\nmodule purge\nexport CWSL_CTOOLS={}\nexport PYTHONPATH=$PYTHONPATH:{}/pythonlib\n"\ .format(configuration.cwsl_ctools_path, configuration.cwsl_ctools_path)
def test_regex(self): ''' Given an input pattern, the PatternDataSet should create a regular expression. ''' test_patternds = PatternDataSet(self.mock_file_pattern) self.assertEqual(test_patternds.regex_pattern, self.mock_regex)
class TestArgumentCreator(unittest.TestCase): """ Tests to ensure that looping and grouping works correctly.""" def setUp(self): """ Set up some basic DataSets.""" mock_file_pattern_1 = "/fake/%food%_%animal%.file" mock_file_list_1 = ["/fake/pizza_moose.file", "/fake/pizza_rabbit.file", "/fake/chocolate_bilby.file","/fake/chocolate_rabbit.file"] mock_file_pattern_2 = "/fake/%animal%.file" mock_file_list_2 = ["/fake/moose.file", "/fake/rabbit.file", "/fake/bilby.file"] with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_file_list_1 self.test_patternds_1 = PatternDataSet(mock_file_pattern_1) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_file_list_2 self.test_patternds_2 = PatternDataSet(mock_file_pattern_2) def test_simple_looping(self): """ Test that basic one-to-one looping works.""" one_to_one_creator = FileCreator("/output/%animal%.output", [self.test_patternds_2.get_constraint("animal")]) looper = ArgumentCreator([self.test_patternds_2], one_to_one_creator) all_outs = [] for thing in looper: self.assertEqual(len(thing[0]), len(thing[1])) all_outs.append(thing) all_files = [thing for thing in one_to_one_creator.files] self.assertEqual(len(all_outs), len(all_files)) def test_many_looping(self): """ Test that simple many-to-one looping works.""" many_to_one_creator = FileCreator("/output/%animal%.output", [self.test_patternds_1.get_constraint("animal")]) looper = ArgumentCreator([self.test_patternds_1], many_to_one_creator) all_outs = [] for thing in looper: all_outs.append(thing) module_logger.debug("All outs are: {}".format(all_outs)) # There are three animals. self.assertEqual(3, len(all_outs)) def test_multi_model(self): """ Test for the case when there are groups on Constraints. This seems to fail when people use FileCreators. """ institute_model_pattern = "/fake/%variable%_%model%_%institute%.file" in_constraints = [Constraint('model', ['model_1', 'model_2']), Constraint('variable', ['variable_1']), Constraint('institute', ['institute_1', 'institute_2'])] test_filecreator = FileCreator(institute_model_pattern, in_constraints) # Set the valid combinations. dummy_file_1 = test_filecreator.get_files({'model': 'model_1', 'institute': 'institute_1'}, update=True) dummy_file_2 = test_filecreator.get_files({'model': 'model_2', 'institute': 'institute_2'}, update=True) # Now create a FileCreator to use as output. output_pattern = "/an/output/fake/%variable%_%model%_%institute%.file" out_constraints = [Constraint('model', ['model_1', 'model_2']), Constraint('variable', ['variable_1']), Constraint('institute', ['institute_1', 'institute_2'])] test_output_filecreator = FileCreator(output_pattern, out_constraints) print("Valid input combinations are: {0}".format(test_filecreator.valid_combinations)) self.assertEqual(2, len(test_filecreator.valid_hashes)) test_argument_creator = ArgumentCreator([test_filecreator], test_output_filecreator) outputs = [combination for combination in test_argument_creator] print("Output is: {0}".format(outputs)) # There should only be two outputs - not 4! self.assertEqual(len(outputs), 2) def test_two_inputs(self): """ Test that the ArgumentCreator works with multiple input datasets.""" multi_ds_creator = FileCreator("/output/%animal%.output", [self.test_patternds_1.get_constraint("animal")]) looper = ArgumentCreator([self.test_patternds_1, self.test_patternds_2], multi_ds_creator) all_outs = [] for thing in looper: self.assertGreaterEqual(len(thing[0]), len(thing[1])) all_outs.append(thing) # There are three animals. self.assertEqual(3, len(all_outs)) print(all_outs) # The order is moose, then rabbit # Moose: 2 ins, 1 out. module_logger.debug("All outs[0]: {}".format(all_outs[0])) self.assertEqual(len(all_outs[0][0]), 2) self.assertEqual(len(all_outs[0][1]), 1) # Rabbit: 3 in, 1 out module_logger.debug("All outs[1]: {}".format(all_outs[1])) self.assertEqual(len(all_outs[1][0]), 3) self.assertEqual(len(all_outs[1][1]), 1)