def test_files_method(self): ''' A file creator .files method should only return files that are valid combinations exist. ''' cons_set = set([Constraint('model', ['ACCESS1-0', 'ACCESS1-3']), Constraint('experiment', ['rcp45', 'rcp85'])]) this_file_creator = FileCreator("/a/fake/pattern/%model%_%experiment%.nc", extra_constraints=cons_set) # Now tell the file creator which files are real! # ACCESS1-3 has no rcp85 experiment in this case. file_1 = this_file_creator.get_files({'model': 'ACCESS1-0', 'experiment': 'rcp45'}, check=False, update=True) file_2 = this_file_creator.get_files({'model': 'ACCESS1-0', 'experiment': 'rcp85'}, check=False, update=True) file_3 = this_file_creator.get_files({'model': 'ACCESS1-3', 'experiment': 'rcp45'}, check=False, update=True) # Ensure that the FileCreator has returned a file # for each combination. for file_thing in [file_1, file_2, file_3]: self.assertTrue(file_thing) all_files = [file_thing for file_thing in this_file_creator.files] # There should only be 3 valid file combinations returned. self.assertEqual(len(all_files), 3)
def compute(self): in_dataset = self.getInputFromPort('cod_dataset') command = "${CWSL_CTOOLS}/sdm/sdmrun.py" sdm_config = configuration.cwsl_ctools_path + "/sdm/default.cfg" positional_args = [("dxt-gridded", 0, "raw"), ("-c " + sdm_config, 0, "raw")] # The data is written out to the default # location. output_pattern = os.path.join( configuration.user_basepath, FileCreator.default_pattern(in_dataset.constraints) + ".nc") this_process = ProcessUnit([in_dataset], output_pattern, command, in_dataset.constraints, execution_options=self._required_modules, positional_args=positional_args) this_process.execute(simulate=configuration.simulate_execution) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def compute(self): in_dataset = self.getInputFromPort('cod_dataset') command = "${CWSL_CTOOLS}/sdm/sdmrun.py" sdm_config = configuration.cwsl_ctools_path + "/sdm/default.cfg" positional_args = [("dxt-gridded", 0, "raw"), ("-c " + sdm_config, 0, "raw")] # The data is written out to the default # location. output_pattern = os.path.join(configuration.user_basepath, FileCreator.default_pattern(in_dataset.constraints) + ".nc") this_process = ProcessUnit([in_dataset], output_pattern, command, in_dataset.constraints, execution_options=self._required_modules, positional_args=positional_args) this_process.execute(simulate=configuration.simulate_execution) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def test_files_method(self): ''' A file creator .files method should only return files that are valid combinations exist. ''' cons_set = set([ Constraint('model', ['ACCESS1-0', 'ACCESS1-3']), Constraint('experiment', ['rcp45', 'rcp85']) ]) this_file_creator = FileCreator( "/a/fake/pattern/%model%_%experiment%.nc", extra_constraints=cons_set) # Now tell the file creator which files are real! # ACCESS1-3 has no rcp85 experiment in this case. file_1 = this_file_creator.get_files( { 'model': 'ACCESS1-0', 'experiment': 'rcp45' }, check=False, update=True) file_2 = this_file_creator.get_files( { 'model': 'ACCESS1-0', 'experiment': 'rcp85' }, check=False, update=True) file_3 = this_file_creator.get_files( { 'model': 'ACCESS1-3', 'experiment': 'rcp45' }, check=False, update=True) # Ensure that the FileCreator has returned a file # for each combination. for file_thing in [file_1, file_2, file_3]: self.assertTrue(file_thing) all_files = [file_thing for file_thing in this_file_creator.files] # There should only be 3 valid file combinations returned. self.assertEqual(len(all_files), 3)
def check_filename_pattern(self, glob_pattern, constraints): """ Check that added constraints are also found in the input pattern.""" generated_cons = FileCreator.constraints_from_pattern(glob_pattern) gen_names = [cons.key for cons in generated_cons] for cons in constraints: if cons.key not in gen_names: raise ConstraintNotFoundError("Constraint {} is not found in output pattern {}". format(cons.key, glob_pattern))
def check_filename_pattern(self, glob_pattern, constraints): """ Check that added constraints are also found in the input pattern.""" generated_cons = FileCreator.constraints_from_pattern(glob_pattern) gen_names = [cons.key for cons in generated_cons] for cons in constraints: if cons.key not in gen_names: raise ConstraintNotFoundError( "Constraint {} is not found in output pattern {}".format( cons.key, glob_pattern))
def test_multi_model(self): """ Test for the case when there are groups on Constraints. This seems to fail when people use FileCreators. """ institute_model_pattern = "/fake/%variable%_%model%_%institute%.file" in_constraints = [Constraint('model', ['model_1', 'model_2']), Constraint('variable', ['variable_1']), Constraint('institute', ['institute_1', 'institute_2'])] test_filecreator = FileCreator(institute_model_pattern, in_constraints) # Set the valid combinations. dummy_file_1 = test_filecreator.get_files({'model': 'model_1', 'institute': 'institute_1'}, update=True) dummy_file_2 = test_filecreator.get_files({'model': 'model_2', 'institute': 'institute_2'}, update=True) # Now create a FileCreator to use as output. output_pattern = "/an/output/fake/%variable%_%model%_%institute%.file" out_constraints = [Constraint('model', ['model_1', 'model_2']), Constraint('variable', ['variable_1']), Constraint('institute', ['institute_1', 'institute_2'])] test_output_filecreator = FileCreator(output_pattern, out_constraints) print("Valid input combinations are: {0}".format(test_filecreator.valid_combinations)) self.assertEqual(2, len(test_filecreator.valid_hashes)) test_argument_creator = ArgumentCreator([test_filecreator], test_output_filecreator) outputs = [combination for combination in test_argument_creator] print("Output is: {0}".format(outputs)) # There should only be two outputs - not 4! self.assertEqual(len(outputs), 2)
def __init__(self, inputlist, output_pattern, shell_command, extra_constraints=set([]), map_dict={}, cons_keywords={}, positional_args=[], execution_options={}): """ The class takes in a DataSet object, constraints to change and the path to an executable. It has an self.execute() method that returns a FileCreator to be used as input to the next module. Extra constraints to be applied to the output are given by extra_constraints. map_dict is a dictionary linking constraint names in the input DataSets to new constraints in the output. e.g. if map_dict = {"obs-model" ("model", 0)} then the "model" constraint in the input position 0 is renamed to be the "obs-model" Constraint in the output. """ self.inputlist = inputlist self.cons_keywords = cons_keywords self.positional_args = positional_args self.execution_options = execution_options self.extra_constraints = extra_constraints self.shell_command = shell_command # Make a list of mappings between renamed constraints of the # input and constraints in the output. self.map_dict = map_dict self.mapped_con_names = [cons_name for cons_name in map_dict] # The initial Constraints for the output are built from the # output file pattern. self.output_constraints = set(FileCreator.constraints_from_pattern(output_pattern)) # Apply mappings to copy the required Constraints from the input that will be used # in the output to the final collection of output constraints. self.apply_mappings() # Apply extra constraints given in constructor. self.fill_constraints_from_extras() # Fill the empty output constraints from the input DataSets. self.fill_from_input() module_logger.debug("Final output constraints are: {0}".format(self.output_constraints)) # Make a file_creator from the new, fixed constraints. self.file_creator = FileCreator(output_pattern, self.output_constraints)
def compute(self): in_dataset = self.getInputFromPort('cod_dataset') command = "echo This is the command to run." # The data is written out to the default # location. output_pattern = FileCreator.default_pattern(in_dataset.constraints, temp=True) this_process = ProcessUnit([in_dataset], output_pattern, command, in_dataset.constraints, execution_options=self._required_modules) this_process.execute(simulate=configuration.simulate_execution) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def compute(self): in_dataset = self.getInputFromPort('in_dataset') x_value = self.getInputFromPort('x_value') y_value = self.getInputFromPort('y_value') command = "echo" # The data is written out to the default location. output_pattern = FileCreator.default_pattern(in_dataset.constraints, temp=True) + ".json" this_process = ProcessUnit([in_dataset], output_pattern, command, in_dataset.constraints, execution_options=self._required_modules, positional_args=[(x_value, 1, 'raw'), (y_value, 2, 'raw')]) this_process.execute(simulate=configuration.simulate_execution) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def __init__(self, inputlist, output_pattern, shell_command, extra_constraints=None, map_dict=None, cons_keywords=None, positional_args=None, execution_options=None, kw_string=None, merge_output=None): """ Arguments: inputlist: A list of input DataSets to get files from. output_pattern: A filename pattern to use for data output. shell_command: The base shell command for the process to run. Optional: extra_constraints: Extra constraints to be applied to the output. map_dict: a dictionary linking constraint names in the input DataSets to new constraints in the output. e.g. if map_dict = {"obs-model": ("model", 0)} then the "model" constraint in the input position 0 is renamed to be the "obs-model" Constraint in the output. cons_keywords: Used in building the command to be run, if a constraint has to be used as a keyword argument. positional_args: Used in building the command to be run, if a constraint has to be used as a positional argument. execution_options: A dictionary to pass options like required queues, walltime, required modules etc. to the process unit. Currently only required_modules is implemented. kw_string: A string used for composite constraint keyword arguments, i.e. using multiple attribute values in a single keyword argument. example - kw_string="--title $model_$variable" """ if map_dict: self.map_dict = map_dict else: self.map_dict = {} self.merge_output = merge_output self.mapped_con_names = [cons_name for cons_name in self.map_dict] self.inputlist = inputlist self.shell_command = shell_command # To avoid mutable defaults problems, set # Nones to empty dicts. if execution_options: self.execution_options = execution_options else: self.execution_options = {} if cons_keywords: self.cons_keywords = cons_keywords else: self.cons_keywords = {} if positional_args: self.positional_args = positional_args else: self.positional_args = {} if kw_string: self.kw_string = kw_string else: self.kw_string = None # The initial Constraints are built from the output file pattern. pattern_constraints = set(FileCreator.constraints_from_pattern(output_pattern)) mapped_constraints = self.apply_mappings(pattern_constraints) # Apply extra constraints given in the constructor. filled_constraints = self.fill_constraints_from_extras(mapped_constraints, extra_constraints) # Finallly fill the empty output constraints from the input DataSets. self.final_constraints = self.fill_from_input(self.inputlist, filled_constraints) module_logger.debug("Final output constraints are: {0}".format(self.final_constraints)) for ds in inputlist: module_logger.debug("Input constraints are: {}" .format(ds.constraints)) # Make a file_creator from the new, fixed constraints. self.file_creator = FileCreator(output_pattern, self.final_constraints)