示例#1
0
    def test_model_correllation_2(self):
        """ This test is to try combining Constraints from two different DataSets.

        This uses the new merge_output keyword option.

        """

        mock_file_1 = ["/red_echidna"]
        mock_file_2 = ["/blue_echidna"]

        input_pattern = "/%colour%_%animal%"

        # Create our mock DataSets.
        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_file_1
            test_ds_1 = PatternDataSet(input_pattern)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_file_2
            test_ds_2 = PatternDataSet(input_pattern)

        # A ProcessUnit which merges the Constraint on colour.
        the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%animal%_%colour%.file",
                                  "echo", merge_output=["colour"])

        output_ds = the_process.execute(simulate=False)

        outfiles = [metafile for metafile in output_ds.files]

        self.assertEqual(len(outfiles), 1)

        self.assertEqual(outfiles[0].full_path, "/tmp/echidna_red-blue.file")
示例#2
0
    def test_alias_constraints(self):
        """ The PatternDataSet should be able to alias Constraints.

        This means when asked to get files for the aliased Constraint, it should
        return files from another Constraints.

        """

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:
            mock_glob.return_value = self.mock_file_list

            pattern_ds = PatternDataSet(self.mock_file_pattern)

            # Apply the constraint alias - when asked for hue,
            # it will give you colour.
            pattern_ds.alias_constraint("colour", "hue")

            found_files = pattern_ds.get_files({
                'hue': 'red',
                'animal': 'kangaroo'
            })

            self.assertEqual(1, len(found_files))
            self.assertEqual("/fake/red_kangaroo.txt",
                             found_files[0].full_path)
示例#3
0
    def test_change_mapping(self):
        """ Test using multiple input datasets, like if you were calculating a change. """

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            fake_file_1 = '/a/fake/file_1956_red.nc'
            fake_file_2 = '/a/fake/file_1981_red.nc'

            mock_glob.return_value = [fake_file_1, fake_file_2]
            
            first_pattern_ds = PatternDataSet("/a/fake/file_%date%_%colour%.nc",
                                              set([Constraint('date', ['1956'])]))

            second_pattern_ds = PatternDataSet("/a/fake/file_%date%_%colour%.nc",
                                               set([Constraint('date', ['1981'])]))

            # Overwrite the valid combinations for these mock datasets.
            first_pattern_ds.valid_combinations = set([frozenset([Constraint('colour', ['red']),
                                                                  Constraint('date', ['1956'])])])

            second_pattern_ds.valid_combinations = set([frozenset([Constraint('colour', ['red']),
                                                                   Constraint('date', ['1981'])])])
            
            
            the_process_unit = ProcessUnit([first_pattern_ds, second_pattern_ds],
                                           "/a/final/output/file_%start_date%_%end_date%_%colour%.txt",
                                           'echo', map_dict={'start_date': ('date', 0),
                                                             'end_date': ('date', 1)})
        
            ds_result = the_process_unit.execute(simulate=True)
            
            outfiles = [file_thing for file_thing in ds_result.files]
            self.assertEqual(len(outfiles), 1)

            expected_string = self.script_header + "mkdir -p /a/final/output\necho /a/fake/file_1956_red.nc /a/fake/file_1981_red.nc /a/final/output/file_1956_1981_red.txt\n"     
            self.assertEqual(expected_string, the_process_unit.scheduler.job.to_str())
    def test_getfiles(self):
        """ Ensure that files are correctly returned using 'get_files'. """

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:

            # Add the mock fake glob function.
            mock_glob.return_value = self.mock_file_list
            
            test_patternds = PatternDataSet(self.mock_file_pattern)
            
            found_files = test_patternds.get_files({'colour': 'green',
                                                    'animal': 'echidna'})
            expected_files = [MetaFile('green_echidna.txt', '/fake', {})]

            self.assertEqual(found_files, expected_files)
            mock_glob.assert_called_once_with()
示例#5
0
    def compute(self):

        # Determine file path
        patterns = self.get_filepath_patterns()
        logger.debug('Using pattern %s' % patterns)

        # Create constraints
        constraints = [
            Constraint(attribute, [values])
            for attribute, values in self.constraints.iteritems()
        ]

        # Add user contraints
        user_constraints = self.getInputFromPort("added_constraints")
        if user_constraints:
            constraints.extend(user_constraints)
        else:
            raise ModuleError(
                self,
                "No constraints set on DataSet - you can not run a workflow on the entire DataSet"
            )

        # Create dataset based on file search path and contraints
        dataset = PatternDataSet(patterns, constraints)

        if not dataset.files:
            error_string = "No files found for this dataset with constraints: {}".format(
                constraints)
            error_string = error_string.replace('],', '],\n')
            logger.error(error_string)
            raise ModuleError(self, error_string)

        self.setResult('out_dataset', dataset)
    def setUp(self):
        """ Set up some basic DataSets."""

        mock_file_pattern_1 = "/fake/%food%_%animal%.file"
        mock_file_list_1 = ["/fake/pizza_moose.file", "/fake/pizza_rabbit.file",
                            "/fake/chocolate_bilby.file","/fake/chocolate_rabbit.file"]

        mock_file_pattern_2 = "/fake/%animal%.file"
        mock_file_list_2 = ["/fake/moose.file", "/fake/rabbit.file",
                            "/fake/bilby.file"]

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_file_list_1
            self.test_patternds_1 = PatternDataSet(mock_file_pattern_1)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_file_list_2
            self.test_patternds_2 = PatternDataSet(mock_file_pattern_2)
示例#7
0
    def test_build_glob_patterns(self):
        """ When constraints are given in the constructor, restrict the patterns on the fs to glob. """

        given_cons = set([Constraint('colour', ['pink', 'green'])])

        pattern_ds = PatternDataSet(self.mock_file_pattern, given_cons)

        expected_patterns = ['/fake/pink_*.txt', '/fake/green_*.txt']

        self.assertItemsEqual(pattern_ds.glob_patterns, expected_patterns)
示例#8
0
    def test_noconstraints(self):
        ''' The PatternDataSet should glob the FS to find files. '''

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:
            mock_glob.return_value = self.mock_file_list
            test_patternds = PatternDataSet(self.mock_file_pattern)

            self.assertEqual(test_patternds.constraints, self.fake_constraints)
            # Check that we only try to glob the fs once.
            mock_glob.assert_called_once_with()
示例#9
0
    def test_cons_from_pattern(self):
        """ The PatternDataSet should build a complete set of constraints by globbing on the file system."""

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:
            mock_glob.return_value = self.mock_file_list

            pattern_ds = PatternDataSet(self.mock_file_pattern)

            expected_cons = self.fake_constraints

            self.assertEqual(pattern_ds.constraints, self.fake_constraints)
示例#10
0
    def test_model_correllation(self):

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = self.mock_obs_files
            test_obsds = PatternDataSet(self.observational_pattern)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = self.mock_model_files
            test_model_ds = PatternDataSet(self.model_pattern)

        output_pattern = "/%variable%_%obs_model%_%model%.nc"
        our_process = ProcessUnit([test_obsds, test_model_ds],
                                  output_pattern, "echo")

        output = our_process.execute()

        all_outs = [thing.full_path for thing in output.files]

        good_names = ["/tas_HadISST_BadModel.nc", "/tas_AWAP_BadModel.nc",
                      "/tas_HadISST_GoodModel.nc", "/tas_AWAP_GoodModel.nc"]

        self.assertItemsEqual(good_names, all_outs)
示例#11
0
    def setUp(self):
        """ Makes a mock PatternDataSet. """

        self.mock_file_list = ['/a/fake_1/file_1/pattern_1']
        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:
            #Add the mock fake glob function.
            mock_glob.return_value = self.mock_file_list

            self.a_pattern_ds = PatternDataSet('/a/%fake%/%file%/%pattern%')

        self.script_header = "#!/bin/sh\nset -e\n\nmodule purge\nexport CWSL_CTOOLS={}\nexport PYTHONPATH=$PYTHONPATH:{}/pythonlib\n"\
            .format(configuration.cwsl_ctools_path, configuration.cwsl_ctools_path)
示例#12
0
    def test_changefile_generation(self):
        """ This test works to cover the common case when you want to calculate changes.

        For example, comparing two dataset by time.
        """

        mock_files_1 = ["/model1_1986_rain", "/model2_1986_rain",
                        "/model3_1986_rain", "/model4_1986_rain",
                        "/model1_1986_temp"]
        mock_files_2 = ["/model1_2015_rain", "/model2_2015_rain",
                        "/model3_2015_rain", "/model4_2015_rain",
                        "/model1_2015_temp"]

        input_pattern = "/%model%_%date%_%variable%"

        # Create our mock DataSets.
        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_files_1
            test_ds_1 = PatternDataSet(input_pattern)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_files_2
            test_ds_2 = PatternDataSet(input_pattern)

        # A ProcessUnit which merges the Constraint on colour.
        the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%model%_%date%_%variable%",
                                  "echo", merge_output=["date"])

        output_ds = the_process.execute(simulate=True)

        outfile_names = [metafile.full_path for metafile in output_ds.files]

        expected_files = ["/tmp/model1_1986-2015_rain",
                          "/tmp/model2_1986-2015_rain",
                          "/tmp/model3_1986-2015_rain",
                          "/tmp/model4_1986-2015_rain",
                          "/tmp/model1_1986-2015_temp"]

        self.assertItemsEqual(expected_files, outfile_names)
示例#13
0
    def compute(self):

        cons_list = [
            'model', 'experiment', 'variable', 'season_number', 'region'
        ]
        in_cons = set([
            Constraint(cons_name, [self.getInputFromPort(cons_name)])
            for cons_name in cons_list if self.getInputFromPort(cons_name)
        ])

        file_pattern = "/g/data/ua6/CAWCR_CVC_processed/staging/users/CWSL/SDM/COD/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%"
        output_ds = PatternDataSet(file_pattern, in_cons)

        self.setResult('out_dataset', output_ds)
    def test_alias_constraints(self):
        """ The PatternDataSet should be able to alias Constraints.

        This means when asked to get files for the aliased Constraint, it should
        return files from another Constraints.

        """

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = self.mock_file_list

            pattern_ds = PatternDataSet(self.mock_file_pattern)

            # Apply the constraint alias - when asked for hue,
            # it will give you colour.
            pattern_ds.alias_constraint("colour", "hue")

            found_files = pattern_ds.get_files({'hue': 'red',
                                                'animal': 'kangaroo'})

            self.assertEqual(1, len(found_files))
            self.assertEqual("/fake/red_kangaroo.txt",
                             found_files[0].full_path)
示例#15
0
    def compute(self):

        cons_list = [
            'model', 'experiment', 'variable', 'season_number', 'region'
        ]
        in_cons = set([
            Constraint(cons_name, [self.getInputFromPort(cons_name)])
            for cons_name in cons_list if self.getInputFromPort(cons_name)
        ])

        file_pattern = "/home/548/teb548/cod/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%"
        output_ds = PatternDataSet(file_pattern, in_cons)

        self.setResult('out_dataset', output_ds)
示例#16
0
    def test_getfiles(self):
        """ Ensure that files are correctly returned using 'get_files'. """

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:

            # Add the mock fake glob function.
            mock_glob.return_value = self.mock_file_list

            test_patternds = PatternDataSet(self.mock_file_pattern)

            found_files = test_patternds.get_files({
                'colour': 'green',
                'animal': 'echidna'
            })
            expected_files = [
                MetaFile('green_echidna.txt', '/fake', {
                    'colour': 'green',
                    'animal': 'echidna'
                })
            ]

            self.assertItemsEqual(found_files, expected_files)
            mock_glob.assert_called_once_with()
示例#17
0
    def test_model_correllation_3(self):
        """ This test is to try combining multiple DataSets, each with many files. """

        mock_files_1 = ["/red_echidna", "/blue_echidna", "/green_echidna"]
        mock_files_2 = ["/blue_echidna", "/red_echidna", "/green_echidna"]

        input_pattern = "/%colour%_%animal%"

        # Create our mock DataSets.
        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_files_1
            test_ds_1 = PatternDataSet(input_pattern)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_files_2
            test_ds_2 = PatternDataSet(input_pattern)

        # A ProcessUnit which merges the Constraint on colour.
        the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%animal%_%colour%.file",
                                  "echo", merge_output=["colour"])

        output_ds = the_process.execute(simulate=True)

        outfile_names = [metafile.full_path for metafile in output_ds.files]

        expected_outfiles = ["/tmp/echidna_red-red.file",
                             "/tmp/echidna_red-blue.file",
                             "/tmp/echidna_red-green.file",
                             "/tmp/echidna_blue-red.file",
                             "/tmp/echidna_blue-blue.file",
                             "/tmp/echidna_blue-green.file",
                             "/tmp/echidna_green-red.file",
                             "/tmp/echidna_green-blue.file",
                             "/tmp/echidna_green-green.file"]

        self.assertItemsEqual(expected_outfiles, outfile_names)
示例#18
0
    def setUp(self):
        # This creates a mock pattern dataset that returns a single file.
        test_cons = set([Constraint('fake', ['fake_1']),
                         Constraint('file', ['file_1']),
                         Constraint('pattern', ['pattern_1'])])

        self.a_pattern_ds = PatternDataSet('/a/%fake%/%file%/%pattern%',
                                           constraint_set=test_cons)
        # Mock the get_files method - we will only return a single, mock file object.
        mock_file = mock.MagicMock()
        mock_file.full_path = 'test_file1'
        mock_file.__str__.return_value = 'test_file1'
        mock_file.all_atts = {"fake": "fake_1",
                              "file": "file_1",
                              "pattern": "pattern_1"}
        self.a_pattern_ds.get_files = mock.Mock(return_value=[mock_file])

        # Create a valid set of contraints for the mock.
        self.a_pattern_ds.valid_combinations = set([frozenset(test_cons)])

        # Constant header for the job scripts.
        self.script_header = "#!/bin/sh\nset -e\n\nmodule purge\nexport CWSL_CTOOLS={}\nexport PYTHONPATH=$PYTHONPATH:{}/pythonlib\n"\
            .format(configuration.cwsl_ctools_path, configuration.cwsl_ctools_path)
示例#19
0
    def test_regex(self):
        ''' Given an input pattern, the PatternDataSet should create a regular expression. '''

        test_patternds = PatternDataSet(self.mock_file_pattern)
        self.assertEqual(test_patternds.regex_pattern, self.mock_regex)
class TestArgumentCreator(unittest.TestCase):
    """ Tests to ensure that looping and grouping works correctly."""

    def setUp(self):
        """ Set up some basic DataSets."""

        mock_file_pattern_1 = "/fake/%food%_%animal%.file"
        mock_file_list_1 = ["/fake/pizza_moose.file", "/fake/pizza_rabbit.file",
                            "/fake/chocolate_bilby.file","/fake/chocolate_rabbit.file"]

        mock_file_pattern_2 = "/fake/%animal%.file"
        mock_file_list_2 = ["/fake/moose.file", "/fake/rabbit.file",
                            "/fake/bilby.file"]

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_file_list_1
            self.test_patternds_1 = PatternDataSet(mock_file_pattern_1)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_file_list_2
            self.test_patternds_2 = PatternDataSet(mock_file_pattern_2)

    def test_simple_looping(self):
        """ Test that basic one-to-one looping works."""

        one_to_one_creator = FileCreator("/output/%animal%.output",
                                         [self.test_patternds_2.get_constraint("animal")])


        looper = ArgumentCreator([self.test_patternds_2], one_to_one_creator)

        all_outs = []
        for thing in looper:
            self.assertEqual(len(thing[0]), len(thing[1]))
            all_outs.append(thing)

        all_files = [thing for thing in one_to_one_creator.files]
        self.assertEqual(len(all_outs), len(all_files))

    def test_many_looping(self):
        """ Test that simple many-to-one looping works."""

        many_to_one_creator = FileCreator("/output/%animal%.output",
                                          [self.test_patternds_1.get_constraint("animal")])

        looper = ArgumentCreator([self.test_patternds_1], many_to_one_creator)

        all_outs = []
        for thing in looper:
            all_outs.append(thing)

        module_logger.debug("All outs are: {}".format(all_outs))

        # There are three animals.
        self.assertEqual(3, len(all_outs))

    def test_multi_model(self):
        """ Test for the case when there are groups on Constraints.

        This seems to fail when people use FileCreators.
        """

        institute_model_pattern = "/fake/%variable%_%model%_%institute%.file"
        in_constraints = [Constraint('model', ['model_1', 'model_2']),
                          Constraint('variable', ['variable_1']),
                          Constraint('institute', ['institute_1', 'institute_2'])]
        test_filecreator = FileCreator(institute_model_pattern, in_constraints)

        # Set the valid combinations.
        dummy_file_1 = test_filecreator.get_files({'model': 'model_1',
                                                   'institute': 'institute_1'},
                                                  update=True)
        dummy_file_2 = test_filecreator.get_files({'model': 'model_2',
                                                   'institute': 'institute_2'},
                                                  update=True)

        # Now create a FileCreator to use as output.
        output_pattern = "/an/output/fake/%variable%_%model%_%institute%.file"
        out_constraints = [Constraint('model', ['model_1', 'model_2']),
                           Constraint('variable', ['variable_1']),
                           Constraint('institute', ['institute_1', 'institute_2'])]
        test_output_filecreator = FileCreator(output_pattern, out_constraints)

        print("Valid input combinations are: {0}".format(test_filecreator.valid_combinations))
        self.assertEqual(2, len(test_filecreator.valid_hashes))

        test_argument_creator = ArgumentCreator([test_filecreator],
                                                test_output_filecreator)

        outputs = [combination for combination in test_argument_creator]

        print("Output is: {0}".format(outputs))

        # There should only be two outputs - not 4!
        self.assertEqual(len(outputs), 2)


    def test_two_inputs(self):
        """ Test that the ArgumentCreator works with multiple input datasets."""

        multi_ds_creator = FileCreator("/output/%animal%.output",
                                       [self.test_patternds_1.get_constraint("animal")])

        looper = ArgumentCreator([self.test_patternds_1, self.test_patternds_2],
                                 multi_ds_creator)

        all_outs = []
        for thing in looper:
            self.assertGreaterEqual(len(thing[0]), len(thing[1]))
            all_outs.append(thing)

        # There are three animals.
        self.assertEqual(3, len(all_outs))

        print(all_outs)

        # The order is moose, then rabbit
        # Moose: 2 ins, 1 out.
        module_logger.debug("All outs[0]: {}".format(all_outs[0]))
        self.assertEqual(len(all_outs[0][0]), 2)
        self.assertEqual(len(all_outs[0][1]), 1)

        # Rabbit: 3 in, 1 out
        module_logger.debug("All outs[1]: {}".format(all_outs[1]))
        self.assertEqual(len(all_outs[1][0]), 3)
        self.assertEqual(len(all_outs[1][1]), 1)