def test_model_correllation_2(self): """ This test is to try combining Constraints from two different DataSets. This uses the new merge_output keyword option. """ mock_file_1 = ["/red_echidna"] mock_file_2 = ["/blue_echidna"] input_pattern = "/%colour%_%animal%" # Create our mock DataSets. with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_file_1 test_ds_1 = PatternDataSet(input_pattern) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_file_2 test_ds_2 = PatternDataSet(input_pattern) # A ProcessUnit which merges the Constraint on colour. the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%animal%_%colour%.file", "echo", merge_output=["colour"]) output_ds = the_process.execute(simulate=False) outfiles = [metafile for metafile in output_ds.files] self.assertEqual(len(outfiles), 1) self.assertEqual(outfiles[0].full_path, "/tmp/echidna_red-blue.file")
def test_alias_constraints(self): """ The PatternDataSet should be able to alias Constraints. This means when asked to get files for the aliased Constraint, it should return files from another Constraints. """ with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: mock_glob.return_value = self.mock_file_list pattern_ds = PatternDataSet(self.mock_file_pattern) # Apply the constraint alias - when asked for hue, # it will give you colour. pattern_ds.alias_constraint("colour", "hue") found_files = pattern_ds.get_files({ 'hue': 'red', 'animal': 'kangaroo' }) self.assertEqual(1, len(found_files)) self.assertEqual("/fake/red_kangaroo.txt", found_files[0].full_path)
def compute(self): # Determine file path patterns = self.get_filepath_patterns() logger.debug('Using pattern %s' % patterns) # Create constraints constraints = [ Constraint(attribute, [values]) for attribute, values in self.constraints.iteritems() ] # Add user contraints user_constraints = self.getInputFromPort("added_constraints") if user_constraints: constraints.extend(user_constraints) else: raise ModuleError( self, "No constraints set on DataSet - you can not run a workflow on the entire DataSet" ) # Create dataset based on file search path and contraints dataset = PatternDataSet(patterns, constraints) if not dataset.files: error_string = "No files found for this dataset with constraints: {}".format( constraints) error_string = error_string.replace('],', '],\n') logger.error(error_string) raise ModuleError(self, error_string) self.setResult('out_dataset', dataset)
def test_build_glob_patterns(self): """ When constraints are given in the constructor, restrict the patterns on the fs to glob. """ given_cons = set([Constraint('colour', ['pink', 'green'])]) pattern_ds = PatternDataSet(self.mock_file_pattern, given_cons) expected_patterns = ['/fake/pink_*.txt', '/fake/green_*.txt'] self.assertItemsEqual(pattern_ds.glob_patterns, expected_patterns)
def test_noconstraints(self): ''' The PatternDataSet should glob the FS to find files. ''' with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: mock_glob.return_value = self.mock_file_list test_patternds = PatternDataSet(self.mock_file_pattern) self.assertEqual(test_patternds.constraints, self.fake_constraints) # Check that we only try to glob the fs once. mock_glob.assert_called_once_with()
def test_cons_from_pattern(self): """ The PatternDataSet should build a complete set of constraints by globbing on the file system.""" with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: mock_glob.return_value = self.mock_file_list pattern_ds = PatternDataSet(self.mock_file_pattern) expected_cons = self.fake_constraints self.assertEqual(pattern_ds.constraints, self.fake_constraints)
def setUp(self): """ Makes a mock PatternDataSet. """ self.mock_file_list = ['/a/fake_1/file_1/pattern_1'] with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: #Add the mock fake glob function. mock_glob.return_value = self.mock_file_list self.a_pattern_ds = PatternDataSet('/a/%fake%/%file%/%pattern%') self.script_header = "#!/bin/sh\nset -e\n\nmodule purge\nexport CWSL_CTOOLS={}\nexport PYTHONPATH=$PYTHONPATH:{}/pythonlib\n"\ .format(configuration.cwsl_ctools_path, configuration.cwsl_ctools_path)
def test_changefile_generation(self): """ This test works to cover the common case when you want to calculate changes. For example, comparing two dataset by time. """ mock_files_1 = ["/model1_1986_rain", "/model2_1986_rain", "/model3_1986_rain", "/model4_1986_rain", "/model1_1986_temp"] mock_files_2 = ["/model1_2015_rain", "/model2_2015_rain", "/model3_2015_rain", "/model4_2015_rain", "/model1_2015_temp"] input_pattern = "/%model%_%date%_%variable%" # Create our mock DataSets. with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_files_1 test_ds_1 = PatternDataSet(input_pattern) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_files_2 test_ds_2 = PatternDataSet(input_pattern) # A ProcessUnit which merges the Constraint on colour. the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%model%_%date%_%variable%", "echo", merge_output=["date"]) output_ds = the_process.execute(simulate=True) outfile_names = [metafile.full_path for metafile in output_ds.files] expected_files = ["/tmp/model1_1986-2015_rain", "/tmp/model2_1986-2015_rain", "/tmp/model3_1986-2015_rain", "/tmp/model4_1986-2015_rain", "/tmp/model1_1986-2015_temp"] self.assertItemsEqual(expected_files, outfile_names)
def test_model_correllation(self): with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = self.mock_obs_files test_obsds = PatternDataSet(self.observational_pattern) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = self.mock_model_files test_model_ds = PatternDataSet(self.model_pattern) output_pattern = "/%variable%_%obs_model%_%model%.nc" our_process = ProcessUnit([test_obsds, test_model_ds], output_pattern, "echo") output = our_process.execute() all_outs = [thing.full_path for thing in output.files] good_names = ["/tas_HadISST_BadModel.nc", "/tas_AWAP_BadModel.nc", "/tas_HadISST_GoodModel.nc", "/tas_AWAP_GoodModel.nc"] self.assertItemsEqual(good_names, all_outs)
def compute(self): cons_list = [ 'model', 'experiment', 'variable', 'season_number', 'region' ] in_cons = set([ Constraint(cons_name, [self.getInputFromPort(cons_name)]) for cons_name in cons_list if self.getInputFromPort(cons_name) ]) file_pattern = "/home/548/teb548/cod/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%" output_ds = PatternDataSet(file_pattern, in_cons) self.setResult('out_dataset', output_ds)
def compute(self): cons_list = [ 'model', 'experiment', 'variable', 'season_number', 'region' ] in_cons = set([ Constraint(cons_name, [self.getInputFromPort(cons_name)]) for cons_name in cons_list if self.getInputFromPort(cons_name) ]) file_pattern = "/g/data/ua6/CAWCR_CVC_processed/staging/users/CWSL/SDM/COD/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%" output_ds = PatternDataSet(file_pattern, in_cons) self.setResult('out_dataset', output_ds)
def test_model_correllation_3(self): """ This test is to try combining multiple DataSets, each with many files. """ mock_files_1 = ["/red_echidna", "/blue_echidna", "/green_echidna"] mock_files_2 = ["/blue_echidna", "/red_echidna", "/green_echidna"] input_pattern = "/%colour%_%animal%" # Create our mock DataSets. with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_files_1 test_ds_1 = PatternDataSet(input_pattern) with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob: mock_glob.return_value = mock_files_2 test_ds_2 = PatternDataSet(input_pattern) # A ProcessUnit which merges the Constraint on colour. the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%animal%_%colour%.file", "echo", merge_output=["colour"]) output_ds = the_process.execute(simulate=True) outfile_names = [metafile.full_path for metafile in output_ds.files] expected_outfiles = ["/tmp/echidna_red-red.file", "/tmp/echidna_red-blue.file", "/tmp/echidna_red-green.file", "/tmp/echidna_blue-red.file", "/tmp/echidna_blue-blue.file", "/tmp/echidna_blue-green.file", "/tmp/echidna_green-red.file", "/tmp/echidna_green-blue.file", "/tmp/echidna_green-green.file"] self.assertItemsEqual(expected_outfiles, outfile_names)
def setUp(self): # This creates a mock pattern dataset that returns a single file. test_cons = set([Constraint('fake', ['fake_1']), Constraint('file', ['file_1']), Constraint('pattern', ['pattern_1'])]) self.a_pattern_ds = PatternDataSet('/a/%fake%/%file%/%pattern%', constraint_set=test_cons) # Mock the get_files method - we will only return a single, mock file object. mock_file = mock.MagicMock() mock_file.full_path = 'test_file1' mock_file.__str__.return_value = 'test_file1' mock_file.all_atts = {"fake": "fake_1", "file": "file_1", "pattern": "pattern_1"} self.a_pattern_ds.get_files = mock.Mock(return_value=[mock_file]) # Create a valid set of contraints for the mock. self.a_pattern_ds.valid_combinations = set([frozenset(test_cons)]) # Constant header for the job scripts. self.script_header = "#!/bin/sh\nset -e\n\nmodule purge\nexport CWSL_CTOOLS={}\nexport PYTHONPATH=$PYTHONPATH:{}/pythonlib\n"\ .format(configuration.cwsl_ctools_path, configuration.cwsl_ctools_path)
def test_getfiles(self): """ Ensure that files are correctly returned using 'get_files'. """ with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs' ) as mock_glob: # Add the mock fake glob function. mock_glob.return_value = self.mock_file_list test_patternds = PatternDataSet(self.mock_file_pattern) found_files = test_patternds.get_files({ 'colour': 'green', 'animal': 'echidna' }) expected_files = [ MetaFile('green_echidna.txt', '/fake', { 'colour': 'green', 'animal': 'echidna' }) ] self.assertItemsEqual(found_files, expected_files) mock_glob.assert_called_once_with()
def test_regex(self): ''' Given an input pattern, the PatternDataSet should create a regular expression. ''' test_patternds = PatternDataSet(self.mock_file_pattern) self.assertEqual(test_patternds.regex_pattern, self.mock_regex)