示例#1
0
    def test_model_correllation_2(self):
        """ This test is to try combining Constraints from two different DataSets.

        This uses the new merge_output keyword option.

        """

        mock_file_1 = ["/red_echidna"]
        mock_file_2 = ["/blue_echidna"]

        input_pattern = "/%colour%_%animal%"

        # Create our mock DataSets.
        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_file_1
            test_ds_1 = PatternDataSet(input_pattern)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_file_2
            test_ds_2 = PatternDataSet(input_pattern)

        # A ProcessUnit which merges the Constraint on colour.
        the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%animal%_%colour%.file",
                                  "echo", merge_output=["colour"])

        output_ds = the_process.execute(simulate=False)

        outfiles = [metafile for metafile in output_ds.files]

        self.assertEqual(len(outfiles), 1)

        self.assertEqual(outfiles[0].full_path, "/tmp/echidna_red-blue.file")
示例#2
0
    def test_alias_constraints(self):
        """ The PatternDataSet should be able to alias Constraints.

        This means when asked to get files for the aliased Constraint, it should
        return files from another Constraints.

        """

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:
            mock_glob.return_value = self.mock_file_list

            pattern_ds = PatternDataSet(self.mock_file_pattern)

            # Apply the constraint alias - when asked for hue,
            # it will give you colour.
            pattern_ds.alias_constraint("colour", "hue")

            found_files = pattern_ds.get_files({
                'hue': 'red',
                'animal': 'kangaroo'
            })

            self.assertEqual(1, len(found_files))
            self.assertEqual("/fake/red_kangaroo.txt",
                             found_files[0].full_path)
示例#3
0
    def compute(self):

        # Determine file path
        patterns = self.get_filepath_patterns()
        logger.debug('Using pattern %s' % patterns)

        # Create constraints
        constraints = [
            Constraint(attribute, [values])
            for attribute, values in self.constraints.iteritems()
        ]

        # Add user contraints
        user_constraints = self.getInputFromPort("added_constraints")
        if user_constraints:
            constraints.extend(user_constraints)
        else:
            raise ModuleError(
                self,
                "No constraints set on DataSet - you can not run a workflow on the entire DataSet"
            )

        # Create dataset based on file search path and contraints
        dataset = PatternDataSet(patterns, constraints)

        if not dataset.files:
            error_string = "No files found for this dataset with constraints: {}".format(
                constraints)
            error_string = error_string.replace('],', '],\n')
            logger.error(error_string)
            raise ModuleError(self, error_string)

        self.setResult('out_dataset', dataset)
示例#4
0
    def test_build_glob_patterns(self):
        """ When constraints are given in the constructor, restrict the patterns on the fs to glob. """

        given_cons = set([Constraint('colour', ['pink', 'green'])])

        pattern_ds = PatternDataSet(self.mock_file_pattern, given_cons)

        expected_patterns = ['/fake/pink_*.txt', '/fake/green_*.txt']

        self.assertItemsEqual(pattern_ds.glob_patterns, expected_patterns)
示例#5
0
    def test_noconstraints(self):
        ''' The PatternDataSet should glob the FS to find files. '''

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:
            mock_glob.return_value = self.mock_file_list
            test_patternds = PatternDataSet(self.mock_file_pattern)

            self.assertEqual(test_patternds.constraints, self.fake_constraints)
            # Check that we only try to glob the fs once.
            mock_glob.assert_called_once_with()
示例#6
0
    def test_cons_from_pattern(self):
        """ The PatternDataSet should build a complete set of constraints by globbing on the file system."""

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:
            mock_glob.return_value = self.mock_file_list

            pattern_ds = PatternDataSet(self.mock_file_pattern)

            expected_cons = self.fake_constraints

            self.assertEqual(pattern_ds.constraints, self.fake_constraints)
示例#7
0
    def setUp(self):
        """ Makes a mock PatternDataSet. """

        self.mock_file_list = ['/a/fake_1/file_1/pattern_1']
        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:
            #Add the mock fake glob function.
            mock_glob.return_value = self.mock_file_list

            self.a_pattern_ds = PatternDataSet('/a/%fake%/%file%/%pattern%')

        self.script_header = "#!/bin/sh\nset -e\n\nmodule purge\nexport CWSL_CTOOLS={}\nexport PYTHONPATH=$PYTHONPATH:{}/pythonlib\n"\
            .format(configuration.cwsl_ctools_path, configuration.cwsl_ctools_path)
示例#8
0
    def test_changefile_generation(self):
        """ This test works to cover the common case when you want to calculate changes.

        For example, comparing two dataset by time.
        """

        mock_files_1 = ["/model1_1986_rain", "/model2_1986_rain",
                        "/model3_1986_rain", "/model4_1986_rain",
                        "/model1_1986_temp"]
        mock_files_2 = ["/model1_2015_rain", "/model2_2015_rain",
                        "/model3_2015_rain", "/model4_2015_rain",
                        "/model1_2015_temp"]

        input_pattern = "/%model%_%date%_%variable%"

        # Create our mock DataSets.
        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_files_1
            test_ds_1 = PatternDataSet(input_pattern)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_files_2
            test_ds_2 = PatternDataSet(input_pattern)

        # A ProcessUnit which merges the Constraint on colour.
        the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%model%_%date%_%variable%",
                                  "echo", merge_output=["date"])

        output_ds = the_process.execute(simulate=True)

        outfile_names = [metafile.full_path for metafile in output_ds.files]

        expected_files = ["/tmp/model1_1986-2015_rain",
                          "/tmp/model2_1986-2015_rain",
                          "/tmp/model3_1986-2015_rain",
                          "/tmp/model4_1986-2015_rain",
                          "/tmp/model1_1986-2015_temp"]

        self.assertItemsEqual(expected_files, outfile_names)
示例#9
0
    def test_model_correllation(self):

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = self.mock_obs_files
            test_obsds = PatternDataSet(self.observational_pattern)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = self.mock_model_files
            test_model_ds = PatternDataSet(self.model_pattern)

        output_pattern = "/%variable%_%obs_model%_%model%.nc"
        our_process = ProcessUnit([test_obsds, test_model_ds],
                                  output_pattern, "echo")

        output = our_process.execute()

        all_outs = [thing.full_path for thing in output.files]

        good_names = ["/tas_HadISST_BadModel.nc", "/tas_AWAP_BadModel.nc",
                      "/tas_HadISST_GoodModel.nc", "/tas_AWAP_GoodModel.nc"]

        self.assertItemsEqual(good_names, all_outs)
示例#10
0
    def compute(self):

        cons_list = [
            'model', 'experiment', 'variable', 'season_number', 'region'
        ]
        in_cons = set([
            Constraint(cons_name, [self.getInputFromPort(cons_name)])
            for cons_name in cons_list if self.getInputFromPort(cons_name)
        ])

        file_pattern = "/home/548/teb548/cod/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%"
        output_ds = PatternDataSet(file_pattern, in_cons)

        self.setResult('out_dataset', output_ds)
示例#11
0
    def compute(self):

        cons_list = [
            'model', 'experiment', 'variable', 'season_number', 'region'
        ]
        in_cons = set([
            Constraint(cons_name, [self.getInputFromPort(cons_name)])
            for cons_name in cons_list if self.getInputFromPort(cons_name)
        ])

        file_pattern = "/g/data/ua6/CAWCR_CVC_processed/staging/users/CWSL/SDM/COD/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%"
        output_ds = PatternDataSet(file_pattern, in_cons)

        self.setResult('out_dataset', output_ds)
示例#12
0
    def test_model_correllation_3(self):
        """ This test is to try combining multiple DataSets, each with many files. """

        mock_files_1 = ["/red_echidna", "/blue_echidna", "/green_echidna"]
        mock_files_2 = ["/blue_echidna", "/red_echidna", "/green_echidna"]

        input_pattern = "/%colour%_%animal%"

        # Create our mock DataSets.
        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_files_1
            test_ds_1 = PatternDataSet(input_pattern)

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs') as mock_glob:
            mock_glob.return_value = mock_files_2
            test_ds_2 = PatternDataSet(input_pattern)

        # A ProcessUnit which merges the Constraint on colour.
        the_process = ProcessUnit([test_ds_1, test_ds_2], "/tmp/%animal%_%colour%.file",
                                  "echo", merge_output=["colour"])

        output_ds = the_process.execute(simulate=True)

        outfile_names = [metafile.full_path for metafile in output_ds.files]

        expected_outfiles = ["/tmp/echidna_red-red.file",
                             "/tmp/echidna_red-blue.file",
                             "/tmp/echidna_red-green.file",
                             "/tmp/echidna_blue-red.file",
                             "/tmp/echidna_blue-blue.file",
                             "/tmp/echidna_blue-green.file",
                             "/tmp/echidna_green-red.file",
                             "/tmp/echidna_green-blue.file",
                             "/tmp/echidna_green-green.file"]

        self.assertItemsEqual(expected_outfiles, outfile_names)
示例#13
0
    def setUp(self):
        # This creates a mock pattern dataset that returns a single file.
        test_cons = set([Constraint('fake', ['fake_1']),
                         Constraint('file', ['file_1']),
                         Constraint('pattern', ['pattern_1'])])

        self.a_pattern_ds = PatternDataSet('/a/%fake%/%file%/%pattern%',
                                           constraint_set=test_cons)
        # Mock the get_files method - we will only return a single, mock file object.
        mock_file = mock.MagicMock()
        mock_file.full_path = 'test_file1'
        mock_file.__str__.return_value = 'test_file1'
        mock_file.all_atts = {"fake": "fake_1",
                              "file": "file_1",
                              "pattern": "pattern_1"}
        self.a_pattern_ds.get_files = mock.Mock(return_value=[mock_file])

        # Create a valid set of contraints for the mock.
        self.a_pattern_ds.valid_combinations = set([frozenset(test_cons)])

        # Constant header for the job scripts.
        self.script_header = "#!/bin/sh\nset -e\n\nmodule purge\nexport CWSL_CTOOLS={}\nexport PYTHONPATH=$PYTHONPATH:{}/pythonlib\n"\
            .format(configuration.cwsl_ctools_path, configuration.cwsl_ctools_path)
示例#14
0
    def test_getfiles(self):
        """ Ensure that files are correctly returned using 'get_files'. """

        with mock.patch('cwsl.core.pattern_dataset.PatternDataSet.glob_fs'
                        ) as mock_glob:

            # Add the mock fake glob function.
            mock_glob.return_value = self.mock_file_list

            test_patternds = PatternDataSet(self.mock_file_pattern)

            found_files = test_patternds.get_files({
                'colour': 'green',
                'animal': 'echidna'
            })
            expected_files = [
                MetaFile('green_echidna.txt', '/fake', {
                    'colour': 'green',
                    'animal': 'echidna'
                })
            ]

            self.assertItemsEqual(found_files, expected_files)
            mock_glob.assert_called_once_with()
示例#15
0
    def test_regex(self):
        ''' Given an input pattern, the PatternDataSet should create a regular expression. '''

        test_patternds = PatternDataSet(self.mock_file_pattern)
        self.assertEqual(test_patternds.regex_pattern, self.mock_regex)