def test_collect_ip_multipe_process(self): v1 = cwt.Variable('file:///file1', 'tas') v2 = cwt.Variable('file:///file2', 'tas') p1 = cwt.Process(identifier='CDAT.aggregate') p1.add_inputs(v1, v2) v3 = cwt.Variable('file:///file3', 'tas') v4 = cwt.Variable('file:///file4', 'tas') p2 = cwt.Process(identifier='CDAT.aggregate') p2.add_inputs(v3, v4) p3 = cwt.Process(identifier='CDAT.max') p3.add_inputs(p1, p2) processes, inputs = p3.collect_input_processes() self.assertEqual(len(inputs), 4) self.assertIn(v1.name, inputs) self.assertIn(v2.name, inputs) self.assertIn(v3.name, inputs) self.assertIn(v4.name, inputs) self.assertEqual(len(processes), 3) self.assertIn(p1.name, processes) self.assertIn(p2.name, processes) self.assertIn(p3.name, processes)
def test_script_generator(self): user = models.User.objects.first() variables = { 'v0': cwt.Variable('file:///test.nc', 'tas', name='v0'), 'v1': cwt.Variable('file:///test.nc', 'tas', name='v1'), } domains = { 'd0': cwt.Domain([cwt.Dimension('time', 0, 200)], name='d0') } gridder = cwt.Gridder(grid='gaussian~32') op = cwt.Process(identifier='CDAT.subset') op.domain = domains.values()[0] op.add_inputs(*variables.values()) op.parameters['gridder'] = gridder op.parameters['axes'] = cwt.NamedParameter('axes', 'time') operations = {'subset': op} sg = wps_service.WPSScriptGenerator(variables, domains, operations, user) data = sg.generate() self.assertIsNotNone(data)
def test_collect_ip_share_inputs(self): v1 = cwt.Variable('file:///file1', 'tas') v2 = cwt.Variable('file:///file2', 'tas') p1 = cwt.Process(identifier='CDAT.max') p1.add_inputs(v1, v2) p2 = cwt.Process(identifier='CDAT.min') p2.add_inputs(v1, v2) p3 = cwt.Process(identifier='CDAT.subtract') p3.add_inputs(p1, p2) processes, inputs = p3.collect_input_processes() self.assertEqual(len(inputs), 2) self.assertIn(v1.name, inputs) self.assertIn(v2.name, inputs) self.assertEqual(len(processes), 3) self.assertIn(p1.name, processes) self.assertIn(p2.name, processes) self.assertIn(p3.name, processes)
def test_add(self): collection = file_manager.DataSetCollection() collection.add(cwt.Variable('file:///test1.nc', 'tas')) collection.add(cwt.Variable('file:///test1.nc', 'tas')) collection.add(cwt.Variable('file:///test1.nc', 'tas')) self.assertEqual(len(collection.datasets), 3)
def setUp(self): self.avg = cwt.Process(type('Process', (object,), dict(identifier='CDAT.avg')), name='avg') self.sum = cwt.Process(type('Process', (object,), dict(identifier='CDAT.sum')), name='sum') self.tas = cwt.Variable('file:///tas.nc', 'tas', name='tas') self.clt = cwt.Variable('file:///clt.nc', 'clt', name='clt')
def test_context_manager(self): fm = file_manager.FileManager([ file_manager.DataSetCollection.from_variables([cwt.Variable('file:///test1.nc', 'tas')]), file_manager.DataSetCollection.from_variables([cwt.Variable('file:///test2.nc', 'tas')]), ]) with fm as fm: self.assertEqual(len(fm.collections), 2) self.assertEqual(len(fm.collections[0].datasets), 1)
def test_context_manager(self, mock_open): collection = file_manager.DataSetCollection() collection.add(cwt.Variable('file:///test1.nc', 'tas')) collection.add(cwt.Variable('file:///test1.nc', 'tas')) collection.add(cwt.Variable('file:///test1.nc', 'tas')) with collection as collection: pass self.assertEqual(3, mock_open.call_count) self.assertEqual(3, mock_open.return_value.close.call_count)
def test_collect_input_processes(self): process = cwt.Process.from_identifier('CDAT.subset') process1 = cwt.Process.from_identifier('CDAT.regrid') process1.add_inputs(cwt.Variable('file:///test.nc', 'tas')) process.add_inputs(process1, cwt.Variable('file:///test1.nc', 'tas')) processes, variables = process.collect_input_processes() self.assertEqual(len(processes), 1) self.assertEqual(len(variables), 2)
def setUp(self): self.client = cwt.WPSClient('https://0.0.0.0:10000/wps') # Mock owslib.WebProcessingService self.client.client = mock.MagicMock() subset = mock.MagicMock() type(subset).identifier = mock.PropertyMock(return_value='CDAT.subset') type(subset).title = mock.PropertyMock(return_value='CDAT.subset') type(subset).processVersion = mock.PropertyMock(return_value='1.0.0') metrics = mock.MagicMock() type(metrics).identifier = mock.PropertyMock( return_value='CDAT.metrics') type(metrics).title = mock.PropertyMock(return_value='CDAT.metrics') type(metrics).processVersion = mock.PropertyMock(return_value='1.0.0') type(self.client.client).processes = mock.PropertyMock(return_value=[ subset, metrics, ]) self.process = cwt.Process.from_dict({ 'name': 'CDAT.subset', 'input': [], 'domain': None, 'result': 'p0', }) self.domain = cwt.Domain(time=(0, 365), name='d0') self.variable = cwt.Variable('file:///test.nc', 'tas', name='v0')
def test_prepare_data_inputs_parameters(self): variable = cwt.Variable('file:///test.nc', 'tas', name='v0') domain = cwt.Domain([ cwt.Dimension('time', 0, 365), ], name='d0') process = cwt.Process('CDAT.subset', name='subset') process.description = mock.MagicMock() process.description.metadata.return_value = {} client = cwt.WPSClient('http://idontexist/wps') data_inputs = client.prepare_data_inputs_str(process, [variable], domain, axes=['lats', 'lons'], weightoptions='generated', test=cwt.NamedParameter( 'test', 'True')) self.assertIn('"axes": "lats|lons"', data_inputs) self.assertIn('"weightoptions": "generated"', data_inputs) self.assertIn('"test": "True"', data_inputs)
def test_execute_block(self, mock_request): mock_request.return_value.status_code = 200 mock_request.return_value.text = self.execute.toxml(bds=cwt.bds) client = cwt.WPSClient('http://idontexist/wps') process = cwt.Process.from_identifier('CDAT.subset') with mock.patch('cwt.process.Process.output', new_callable=mock.PropertyMock) as output: output.return_value = 'test output' process.wait = mock.MagicMock() process.description = mock.MagicMock() result = client.execute(process, [cwt.Variable('file:///test.nc', 'tas')], cwt.Domain([cwt.Dimension('time', 0, 365)]), block=True) process.wait.assert_called() self.assertEqual(result, 'test output')
def run(self): domain_data = { 'id': 'd0', 'lat': { 'start': 70, 'end': 90, 'crs': 'values' }, 'lon': { 'start': 5, 'end': 45, 'crs': 'values' } } d0 = cwt.Domain.from_dict(domain_data) inputs = cwt.Variable( "file:///dass/nobackup/tpmaxwel/.edas/cache/collections/NCML/MERRA_TAS1hr.ncml", "tas", domain="d0", axes="xy") op = cwt.Process.from_dict({'name': "CDSpark.average"}) op.set_inputs(inputs) wps = cwt.WPS('http://localhost:9000/wps', log=True, log_file=os.path.expanduser("~/esgf_api.log")) wps.execute(op, domain=d0, method="GET")
def cache_variable(self, parent_variables, variables, domains, operation, user_id, job_id): self.PUBLISH = base.RETRY | base.FAILURE _, _, o = self.load(parent_variables, variables, domains, operation) proc = process.Process(self.request.id) proc.initialize(user_id, job_id) proc.job.started() output_name = '{}.nc'.format(str(uuid.uuid4())) output_path = os.path.join(settings.LOCAL_OUTPUT_PATH, output_name) try: with cdms2.open(output_path, 'w') as output_file: output_var_name = proc.retrieve(o, None, output_file) except cdms2.CDMSError as e: raise base.AccessError(output_path, e.message) except WPSError: raise if settings.DAP: output_url = settings.DAP_URL.format(filename=output_name) else: output_url = settings.OUTPUT_URL.format(filename=output_name) output_variable = cwt.Variable(output_url, output_var_name).parameterize() return {o.name: output_variable}
def job_succeeded(self, context): if context.output_data is not None: context.job.succeeded(context.output_data) send_success_email_data(context, context.output_data) else: relpath = os.path.relpath(context.output_path, settings.WPS_PUBLIC_PATH) url = settings.WPS_DAP_URL.format(filename=relpath) output = cwt.Variable(url, context.inputs[0].variable.var_name) context.job.succeeded(json.dumps(output.parameterize())) send_success_email(context, [ output, ]) context.process.track(context.user) if (context.operation is not None and context.operation.get_parameter('intermediate') is None): for input in context.inputs: models.File.track(context.user, input.variable) metrics.track_file(input.variable) return context
def test_execute(self): process = models.Process.objects.create(identifier='CDSpark.max', backend='EDAS') job = models.Job.objects.create(server=self.server, user=self.user, process=process) domain = cwt.Domain([cwt.Dimension('time', 0, 200)], name='d0') domains = {'d0': domain} var = cwt.Variable('file:///test.nc', 'tas', name='v0') variables = {'v0': var} proc = cwt.Process(identifier='CDSpark.max', name='max') proc.domain = 'd0' proc.set_inputs('v0') operations = {'max': proc} task = self.backend.execute('CDSpark.max', variables, domains, operations, user=self.user, job=job) self.assertIsNotNone(task)
def retrieve_axes(user, dataset_id, variable, urls): """ Retrieves the axes for a set of urls. Args: user: A wps.models.User object. dataset_id: A str dataset id. variable: A str variable name. urls: A list of str url paths. Returns: A list of dicts containing the axes of each file. """ prefix_id = '{}|{}'.format(dataset_id, variable) axes = [] for url in sorted(urls): var = cwt.Variable(url, variable) context = VariableContext(var) data = process_url(user, prefix_id, context) axes.append(data) return axes
def test_to_dict(self): var = cwt.Variable('file:///tas.nc', 'tas', domain=self.d0, mime_type='application/netcdf', name='tas1') self.assertDictContainsSubset(self.data, var.to_dict())
def test_parameterize(self): var = cwt.Variable('file:///tas.nc', 'tas', domains=[self.d0], mime_type='application/netcdf', name='tas1') self.assertDictContainsSubset(self.data, var.parameterize())
def process_base(self, process_func, num_inputs, operation, user_id, job_id): """ Configures and executes a process. Sets up the process by initializing it with the user_id and job_id, marks the job as started. The processes is then executed and a path to the output is returned in a cwt.Variable instance. The process_func is a method that will take in an list of data chunks, process them and return a single data chunk. This output data chunk will be written to the output file. Args: process_func: A function that will be passed the data to be processed. num_inputs: An integer value of the number of inputs to process. operation: A cwt.Process instance, complete with inputs and domain. user_id: An integer user id. job_id: An integer job id. Returns: A dict mapping operation name to a cwt.Variable instance. {'max': cwt.Variable('http://test.com/some/data', 'tas')} Raises: AccessError: An error occurred acessing a NetCDF file. WPSError: An error occurred processing the data. """ self.PUBLISH = base.ALL proc = process.Process(self.request.id) proc.initialize(user_id, job_id) proc.job.started() output_name = '{}.nc'.format(str(uuid.uuid4())) output_path = os.path.join(settings.LOCAL_OUTPUT_PATH, output_name) try: with cdms2.open(output_path, 'w') as output_file: output_var_name = proc.process(operation, num_inputs, output_file, process_func) except cdms2.CDMSError as e: logger.exception('CDMS ERROR') raise base.AccessError(output_path, e) except WPSError: logger.exception('WPS ERROR') raise if settings.DAP: output_url = settings.DAP_URL.format(filename=output_name) else: output_url = settings.OUTPUT_URL.format(filename=output_name) output_variable = cwt.Variable(output_url, output_var_name).parameterize() return {operation.name: output_variable}
def test_prepare_data_inputs(self): proc = cwt.Process(type('Process', (object,), dict(identifier='CDAT.avg')), name='avg') tas = cwt.Variable('file:///data/tas_6h.nc', 'tas', name='tas1') d0 = cwt.Domain(name='d0') data_inputs = self.wps.prepare_data_inputs(proc, [tas], d0) self.assertEqual(self.data_inputs, data_inputs)
def run( self ): d0 = cwt.Domain([], name="d0") op1 = cwt.Operation.from_dict( { 'name': "CDSpark.multiAverage" } ) op1.add_input( cwt.Variable("file:///dass/nobackup/tpmaxwel/.edas/cache/collections/NCML/MERRA_TAS1hr.ncml", "tas" ) ) op3 = cwt.Operation.from_dict( { 'name': 'CDSpark.regrid', 'crs':'gaussian~128' } ) op3.add_input( op1 ) op2 = cwt.Operation.from_dict( { 'name': "CDSpark.multiAverage" } ) for i in range(1,3): op2.add_input( cwt.Variable('collection:/GISS-E2-R_r%di1p1'%(i), "tas" ) ) op4 = cwt.Operation.from_dict( { 'name': 'CDSpark.regrid', 'crs':'gaussian~128' } ) op4.add_input( op2 ) op5 = cwt.Operation.from_dict( { 'name': 'CDSpark.multiAverage' } ) op5.add_input( op3 ) op5.add_input( op4 ) wps = cwt.WPS( 'http://localhost:9001/wps', log=True, log_file=os.path.expanduser("~/esgf_api.log") ) wps.init() process = cwt.Process( wps, op5 ) process.execute( None, d0, [], True, True, "GET" )
def test_parameterize(self): process = cwt.Process.from_identifier('CDAT.subset') process.set_domain(cwt.Domain([ cwt.Dimension('time', 0, 365), ])) process.add_parameters(test=['value1']) process.add_inputs(cwt.Variable('file:///test.nc', 'tas')) data = process.parameterize()
def test_execute_failed(self, mock_request): mock_request.return_value.status_code = 200 mock_request.return_value.text = self.execute_failed.toxml(bds=cwt.bds) client = cwt.WPSClient('http://idontexist/wps') process = cwt.Process.from_identifier('CDAT.subset') with self.assertRaises(Exception): client.execute(process, [cwt.Variable('file:///test.nc', 'tas')], cwt.Domain([cwt.Dimension('time', 0, 365)]))
def _prepare_inputs(self, workflow_inputs): # type: (JSON) -> List[cwt.Variable] """Convert inputs from cwl inputs to ESGF format""" message = "Preparing execute request for remote ESGF provider." self.update_status(message, Percent.PREPARING, STATUS_RUNNING) files = self._get_files_urls(workflow_inputs) varname = self._get_variable(workflow_inputs) inputs = [cwt.Variable(url, varname) for url in files] return inputs
def test_check_cache(self, mock_open): ds = file_manager.DataSet(cwt.Variable('file:///test1.nc', 'tas')) collection = file_manager.DataSetCollection() collection.get_cache_entry = mock.MagicMock(return_value=None) with self.assertNumQueries(1): cache, cache_obj = collection.check_cache(ds) mock_open.assert_called_with(cache.local_path, 'w') self.assertIsNotNone(cache_obj)
def test_check_cache_error_opening(self, mock_open): mock_open.side_effect = cdms2.CDMSError('some error') ds = file_manager.DataSet(cwt.Variable('file:///test1.nc', 'tas')) collection = file_manager.DataSetCollection() collection.get_cache_entry = mock.MagicMock(return_value=None) with self.assertNumQueries(2): result = collection.check_cache(ds) self.assertIsNone(result)
def execute(self, context, request, client, identifier, files, variable, domain, **kwargs): process = client.processes(identifier)[0] domain = None if domain is None else cwt.Domain(**domain) inputs = [cwt.Variable(x, variable) for x in files] client.execute(process, inputs, domain) context.set_data_inputs(request, inputs, domain, process) return process
def test_collect_ip_simple(self): v1 = cwt.Variable('file:///file1', 'tas') v2 = cwt.Variable('file:///file2', 'tas') p1 = cwt.Process(identifier='CDAT.aggregate') p1.add_inputs(v1, v2) p2 = cwt.Process(identifier='CDAT.subset') p2.add_inputs(p1) processes, inputs = p2.collect_input_processes() self.assertEqual(len(inputs), 2) self.assertIn(v1.name, inputs) self.assertIn(v2.name, inputs) self.assertEqual(len(processes), 2) self.assertIn(p1.name, processes) self.assertIn(p2.name, processes)
def test_combine_inputs(self): inputs = [cwt.Variable('file:///tas.nc', 'tas{}'.format(x)) for x in range(2)] avg = cwt.Process(type('Process', (object,), dict(identifier='CDAT.avg'))) avg.set_inputs(inputs[0]) with mock.patch.object(self.wps, '_WPS__request') as m: m.return_value = self.execute_data self.wps.execute(avg, inputs=[inputs[1]]) self.assertIn('tas0|', m.call_args_list[0][1]['data']) self.assertIn('tas1|', m.call_args_list[0][1]['data'])
def test_api_key(self, mock_request): mock_request.return_value.status_code = 200 mock_request.return_value.text = self.execute.toxml(bds=cwt.bds) client = cwt.WPSClient('http://idontexist/wps', api_key='api_key_7') process = cwt.Process.from_identifier('CDAT.subset') process.description = mock.MagicMock() response = client.execute(process, [cwt.Variable('file:///test.nc', 'tas')], cwt.Domain([cwt.Dimension('time', 0, 365)]))