def _start(self, src, parameters): """ Client specific start implementation. Args: src (str): Input data. parameters (dict): Parameters dict. Returns: dict: response. """ # Get environment and remove it from parameters parameters = parameters.copy() env = parameters.pop('env', dict()) # Checks Apyfal version self._checks_apyfal_version(env) # Initialize metering with self._accelerator_lock: self._init_metering(env, reload=parameters['app'].pop('reload', False)) # Run and return response return self._run_executable(mode='0', input_file=src, input_json=str(_uuid()), output_json=str(_uuid()), parameters=parameters)
def as_tmp_file(self, url, mode): """ Return temporary representation of a file. Args: url (str): apyfal.storage URL of the file. mode (str): Access mode. 'r' or 'w'. Returns: str or file-like object: temporary object. """ # Generates randomized temporary filename local_path = _os_path.join(self._tmp_dir, str(_uuid())) # Gets input file if 'r' in mode: _srg.copy(url, local_path) # Yields local temporary path yield local_path # Sends output file if 'w' in mode: _srg.copy(local_path, url) # Clears temporary file _remove(local_path)
def _get_id(): """ Return an unique ID. Returns: str: id """ return 'pycosio%s' % (str(_uuid()).replace('-', ''))
def state(self, id=None): if not id: id = '.{0}'.format(_uuid()) # adds a leading dot to make use of stateconf's namespace feature. try: return self.get_all_decls()[id] except KeyError: self.get_all_decls()[id] = s = StateDeclaration(id) self.decls.append(s) return s
def run(self, inputs): wf = self.__model tk_id = str(_uuid()) token = models.WorkflowToken(id=tk_id, wf=wf) wf = getWorkflowImplementation(wf.wf_implem) token.put() wf.initTokens(token, inputs) return WorkflowToken(token)
def state(self, id=None): if not id: id = '.'+str(_uuid()) # adds a leading dot to make use of stateconf's namespace feature. try: return self.all_decls[id] except KeyError: self.all_decls[id] = s = StateDeclaration(id, self) self.decls.append(s) return s
def _stop(self): """ Client specific stop implementation. Returns: dict: response. """ try: return self._run_executable(mode='2', output_json=str(_uuid())) finally: _systemctl('stop', 'meteringsession', 'meteringclient')
def _process(self, src, dst, parameters): """ Client specific process implementation. Args: src (str): Input data. dst (str): Output data. parameters (dict): Parameters dict. Returns: dict: response dict. """ return self._run_executable( mode='1', input_file=src, output_file=dst, input_json=str(_uuid()), output_json=str(_uuid()), parameters=parameters, # Reduces verbosity to minimum by default extra_args=['-v4'])
def __init__(self, role=None, policy=None, acs_client_kwargs=None, acs_create_instance_kwargs=None, **kwargs): _CSPHost.__init__(self, **kwargs) # Initializes attributes self._security_group_id = None self._role, self._policy = self._get_role_and_policy(role, policy) section = self._config[self._config_section] self._acs_client_kwargs = ( acs_client_kwargs or section.get_literal('acs_client_kwargs') or dict()) self._acs_create_instance_kwargs = ( acs_create_instance_kwargs or section.get_literal('acs_create_instance_kwargs') or dict()) # ClientToken guarantee idempotence of requests self._client_token = str(_uuid())
def __init__(self, accelerator=None, client_type=None, accelize_client_id=None, accelize_secret_id=None, config=None, **_): self._name = accelerator self._client_type = client_type self._url = None self._stopped = False # Define a session UUID self._session_uuid = str(_uuid()) # Dict to cache values self._cache = {} # Read configuration self._config = config = _cfg.create_configuration(config) # Get Start parameters self._configuration_parameters = self._load_configuration( self.DEFAULT_CONFIGURATION_PARAMETERS, 'configuration') # Add credential information if available client_id = config['accelize'].set('client_id', accelize_client_id) if client_id: self._configuration_parameters['env']['client_id'] = client_id secret_id = config['accelize'].set('secret_id', accelize_secret_id) if secret_id: self._configuration_parameters['env']['client_secret'] = secret_id #: Directories that can be processed remotely on host self._authorized_host_dirs = [ '%s/' % _os_path.abspath(_os_path.expanduser(path)) for path in (config['security'].get_list('authorized_host_dirs') or self.DEFAULT_AUTHORIZED_HOST_DIRS) ] # Get process parameters self._process_parameters = self._load_configuration( self.DEFAULT_PROCESS_PARAMETERS, 'process')
def layout2yaml(layout, site, name, filename): """Write layout for a given siye to a YAML file layout layout from a wflopg.Owflop object site : str site name name : str layout name filename : str file to write to """ output = {} output['name'] = name output['uuid'] = str(_uuid()) output['site'] = site output['layout'] = layout.values.tolist() with open(filename, 'w') as f: _yaml(typ='safe').dump(output, f)
def start(self): if self.is_running(): raise RuntimeError('DML Execution Engine already started, use stop() to stop it.') environment = self.environment cluster_name = 'cluster_%s' % str(_uuid()) self.cluster_working_dir = environment._create_job_home_dir(cluster_name) # put initial status file and metadata files self._prepare_cluster_init_files() # Wait for the application to start and then retrieve commander port try: _log.info('Step 1/4: submitting the engine application...') # submit actual application self.app_id = environment._submit_job( job_working_dir = self.cluster_working_dir, num_workers = environment.get_num_workers(), silent = True) _log.info('Step 2/4: waiting for the engine(%s) to run ...' % self.app_id) self._wait_for_application_start() # Read commander init file to get commander listening URI _log.info('Step 3/4: waiting for commander to be ready...') commander_url = self._wait_for_commander_ready() self.cluster_controller = commander_url # Wait for all workers to be ready _log.info('Step 4/4: waiting for all %s workers to be ready...' % environment.get_num_workers()) self._wait_for_all_workers_ready() _log.info('Cluster is listening at: %s' % commander_url) except Exception as e: _log.error('Error encountered when waiting for DML Execution Engine to start: %s' % e) if self.app_id: environment._cancel_job(self.app_id, silent = True) raise
def _session(self): """ Requests session Returns: requests.sessions.Session: Session """ session_kwargs = dict(max_retries=self._REQUEST_RETRIES) # Gets SSL certificate if self._ssl_cert_crt is None and _os_path.exists( _cfg.APYFAL_CERT_CRT): # Uses default certificate if not provided and not not False self._ssl_cert_crt = _cfg.APYFAL_CERT_CRT elif (self._ssl_cert_crt and (hasattr(self._ssl_cert_crt, 'read') or not _os_path.exists(self._ssl_cert_crt))): # Copies certificate locally if not reachable by local path ssl_cert_crt = _os_path.join(self._tmp_dir, str(_uuid())) _srg_copy(self._ssl_cert_crt, ssl_cert_crt) self._ssl_cert_crt = ssl_cert_crt # Enables certificates verification if self._ssl_cert_crt: session_kwargs['verify'] = self._ssl_cert_crt # Disables hostname verification if wildcard certificate from apyfal._certificates import \ get_host_names_from_certificate with open(self._ssl_cert_crt, 'rb') as crt_file: if get_host_names_from_certificate(crt_file.read()) == ['*']: session_kwargs['assert_hostname'] = False # Force url to use HTTPS self._url = _utl.format_url(self._url, force_secure=bool(self._ssl_cert_crt)) # Initializes session return _utl.http_session(**session_kwargs)
def put_object(self, locator, path, content=None, headers=None, data_range=None, new_file=False): """ Put object. Args: locator (str): locator name path (str): Object path. content (bytes like-object): File content. headers (dict): Header to put with the file. data_range (tuple of int): Range of position of content. new_file (bool): If True, force new file creation. Returns: dict: File header. """ with self._put_lock: if new_file: self.delete_object(locator, path, not_exists_ok=True) try: # Existing file file = self._get_locator_content(locator)[path] except KeyError: # New file self._get_locator_content(locator)[path] = file = { 'Accept-Ranges': 'bytes', 'ETag': str(_uuid()), '_content': bytearray(), '_lock': _Lock() } if self._header_size: file[self._header_size] = 0 if self._header_ctime: file[self._header_ctime] = self._format_date(_time()) # Update file with file['_lock']: if content: file_content = file['_content'] # Write full content if not data_range or (data_range[0] is None and data_range[1] is None): file_content[:] = content # Write content range else: # Define range start, end = data_range if start is None: start = 0 if end is None: end = start + len(content) # Add padding if missing data if start > len(file_content): file_content[len(file_content):start] = ( start - len(file_content)) * b'\0' # Flush new content file_content[start:end] = content if headers: file.update(headers) if self._header_size: file[self._header_size] = len(file['_content']) if self._header_mtime: file[self._header_mtime] = self._format_date(_time()) # Return Header header = file.copy() del header['_content'] return header
def create_job_home_dir(environment, job_name): ''' Given a job name, create a home directory for the job in EC2 cluster ''' return environment.s3_state_path + '/' + job_name + '-' + str(_uuid())
def __init__(self, name, stages=[[]], final_stage=None, environment=None, _exec_dir=None, _task_output_paths=None, _job_type = 'PIPELINE'): """ Construct a job. Parameters ---------- name : str Name of this Job, must be unique. stages: list[list[Task]] Collection of task(s) to be executed. final_stage : list[task] | task Collection of task(s) whose outputs are to be returned._ environment : Environment, optional Environment used for this execution. See :py:class:`~graphlab.deploy.environment.LocalAsync` for an example environment. """ _raise_error_if_not_of_type(name, [str], 'name') _raise_error_if_not_of_type(stages, [list], 'stages') _raise_error_if_not_of_type(final_stage, [list, _Task, type(None)], 'final_stage') self.name = name self.environment = environment self._stages = stages self._num_tasks = 0 self._status = 'Pending' self._start_time = None self._end_time = None self._error = None self._job_type = _job_type # Set the packages self._packages = set() for task in self._stages: for t in task: self._num_tasks += 1 self._packages.update(t.get_required_packages()) self._final_stage = final_stage self._task_status = {} self._session = _gl.deploy._default_session if not _exec_dir: relative_path = "job-results-%s" % str(_uuid()) self._exec_dir = self.get_path_join_method()(self._session.results_dir, relative_path) else: self._exec_dir = _exec_dir # Location where all the outputs for the tasks are saved. if not _task_output_paths: Job._update_exec_dir(self, self._exec_dir) else: self._task_output_paths = _task_output_paths
def __init__(self, command, args=None, kwargs=None, name=None, qtype=None, profile=None, **kwds): """Initialization function arguments. Args: command (function/str): The command or function to execute. args (tuple/dict): Optional arguments to add to command, particularly useful for functions. kwargs (dict): Optional keyword arguments to pass to the command, only used for functions. name (str): Optional name of the job. If not defined, guessed. If a job of the same name is already queued, an integer job number (not the queue number) will be added, ie. <name>.1 qtype (str): Override the default queue type profile (str): The name of a profile saved in the conf *All other keywords are parsed into cluster keywords by the options system. For available keywords see `fyrd.option_help()`* """ ######################## # Sanitize arguments # ######################## _logme.log('Args pre-check: {}'.format(kwds), 'debug') kwds = _options.check_arguments(kwds) _logme.log('Args post-check: {}'.format(kwds), 'debug') # Override autoclean state (set in config file) if 'clean_files' in kwds: self.clean_files = kwds.pop('clean_files') if 'clean_outputs' in kwds: self.clean_outputs = kwds.pop('clean_outputs') # Path handling [kwds, self.runpath, self.outpath, self.scriptpath] = _conf.get_job_paths(kwds) # Save command self.command = command self.args = args # Merge in profile, this includes all args from the DEFAULT profile # as well, ensuring that those are always set at a minumum. profile = profile if profile else 'DEFAULT' prof = _conf.get_profile(profile) if not prof: raise _ClusterError('No profile found for {}'.format(profile)) for k,v in prof.args.items(): if k not in kwds: kwds[k] = v # Use the default profile as a backup if any arguments missing default_args = _conf.DEFAULT_PROFILES['DEFAULT'] default_args.update(_conf.get_profile('DEFAULT').args) for opt, arg in default_args.items(): if opt not in kwds: _logme.log('{} not in kwds, adding from default: {}:{}' .format(opt, opt, arg), 'debug') kwds[opt] = arg # Get environment if not _queue.MODE: _queue.MODE = _queue.get_cluster_environment() self.qtype = qtype if qtype else _queue.MODE self.queue = _queue.Queue(user='******', qtype=self.qtype) self.state = 'Not_Submitted' # Set name if not name: if callable(command): strcmd = str(command).strip('<>') parts = strcmd.split(' ') if parts[0] == 'bound': name = '_'.join(parts[2:3]) else: parts.remove('function') try: parts.remove('built-in') except ValueError: pass name = parts[0] else: name = command.split(' ')[0].split('/')[-1] # Make sure name not in queue self.uuid = str(_uuid()).split('-')[0] names = [i.name.split('.')[0] for i in self.queue] namecnt = len([i for i in names if i == name]) name = '{}.{}.{}'.format(name, namecnt, self.uuid) self.name = name # Set modules self.modules = kwds.pop('modules') if 'modules' in kwds else None if self.modules: self.modules = _run.opt_split(self.modules, (',', ';')) # Make sure args are a tuple or dictionary if args: if isinstance(args, str): args = tuple(args) if not isinstance(args, (tuple, dict)): try: args = tuple(args) except TypeError: args = (args,) # In case cores are passed as None if 'nodes' not in kwds: kwds['nodes'] = default_args['nodes'] if 'cores' not in kwds: kwds['cores'] = default_args['cores'] self.nodes = kwds['nodes'] self.cores = kwds['cores'] # Set output files suffix = kwds.pop('suffix') if 'suffix' in kwds \ else _conf.get_option('jobs', 'suffix') if 'outfile' in kwds: pth, fle = _os.path.split(kwds['outfile']) if not pth: pth = self.outpath kwds['outfile'] = _os.path.join(pth, fle) else: kwds['outfile'] = _os.path.join( self.outpath, '.'.join([name, suffix, 'out'])) if 'errfile' in kwds: pth, fle = _os.path.split(kwds['errfile']) if not pth: pth = self.outpath kwds['errfile'] = _os.path.join(pth, fle) else: kwds['errfile'] = _os.path.join( self.outpath, '.'.join([name, suffix, 'err'])) self.outfile = kwds['outfile'] self.errfile = kwds['errfile'] # Check and set dependencies if 'depends' in kwds: dependencies = _run.listify(kwds.pop('depends')) self.dependencies = [] errmsg = 'Dependencies must be number or list' for dependency in dependencies: if isinstance(dependency, str): if not dependency.isdigit(): raise _ClusterError(errmsg) dependency = int(dependency) if not isinstance(dependency, (int, Job)): raise _ClusterError(errmsg) self.dependencies.append(dependency) ###################################### # Command and Function Preparation # ###################################### # Get imports imports = kwds.pop('imports') if 'imports' in kwds else None # Get syspaths syspaths = kwds.pop('syspaths') if 'syspaths' in kwds else None # Split out sys.paths from imports and set imports in self if imports: self.imports = [] syspaths = syspaths if syspaths else [] for i in imports: if i.startswith('sys.path.append')\ or i.startswith('sys.path.insert'): syspaths.append(i) else: self.imports.append(i) # Function specific initialization if callable(command): self.kind = 'function' script_file = _os.path.join( self.scriptpath, '{}_func.{}.py'.format(name, suffix) ) self.poutfile = self.outfile + '.func.pickle' self.function = _Function( file_name=script_file, function=command, args=args, kwargs=kwargs, imports=self.imports, syspaths=syspaths, outfile=self.poutfile ) # Collapse the _command into a python call to the function script executable = '#!/usr/bin/env python{}'.format( _sys.version_info.major) if _conf.get_option( 'jobs', 'generic_python') else _sys.executable command = '{} {}'.format(executable, self.function.file_name) args = None else: self.kind = 'script' self.poutfile = None # Collapse args into command command = command + ' '.join(args) if args else command ##################### # Script Creation # ##################### # Build execution wrapper with modules precmd = '' if self.modules: for module in self.modules: precmd += 'module load {}\n'.format(module) # Create queue-dependent scripts sub_script = '' if self.qtype == 'slurm': scrpt = _os.path.join( self.scriptpath, '{}.{}.sbatch'.format(name, suffix) ) # We use a separate script and a single srun command to avoid # issues with multiple threads running at once exec_script = _os.path.join(self.scriptpath, '{}.{}.script'.format(name, suffix)) exe_script = _scrpts.CMND_RUNNER_TRACK.format( precmd=precmd, usedir=self.runpath, name=name, command=command) # Create the exec_script Script object self.exec_script = _Script(script=exe_script, file_name=exec_script) # Add all of the keyword arguments at once precmd = _options.options_to_string(kwds, self.qtype) + precmd ecmnd = 'srun bash {}'.format(exec_script) sub_script = _scrpts.SCRP_RUNNER.format(precmd=precmd, script=exec_script, command=ecmnd) elif self.qtype == 'torque': scrpt = _os.path.join(self.scriptpath, '{}.cluster.qsub'.format(name)) # Add all of the keyword arguments at once precmd = _options.options_to_string(kwds, self.qtype) + precmd sub_script = _scrpts.CMND_RUNNER_TRACK.format( precmd=precmd, usedir=self.runpath, name=name, command=command) elif self.qtype == 'local': # Create the pool if not _local.JQUEUE or not _local.JQUEUE.runner.is_alive(): threads = kwds['threads'] if 'threads' in kwds \ else _local.THREADS _local.JQUEUE = _local.JobQueue(cores=threads) scrpt = _os.path.join(self.scriptpath, '{}.cluster'.format(name)) sub_script = _scrpts.CMND_RUNNER_TRACK.format( precmd=precmd, usedir=self.runpath, name=name, command=command) else: raise _ClusterError('Invalid queue type') # Create the submission Script object self.submission = _Script(script=sub_script, file_name=scrpt) # Save the keyword arguments for posterity self.kwargs = kwds
def put_object(self, locator, path, content=None, headers=None, data_range=None, new_file=False): """ Put object. Args: locator (str): locator name path (str): Object path. content (bytes like-object): File content. headers (dict): Header to put with the file. data_range (tuple of int): Range of position of content. new_file (bool): If True, force new file creation. Returns: dict: File header. """ with self._put_lock: if new_file: self.delete_object(locator, path, not_exists_ok=True) try: file = self._get_locator_content(locator)[path] except KeyError: self._get_locator_content(locator)[path] = file = { "Accept-Ranges": "bytes", "ETag": str(_uuid()), "_content": bytearray(), "_lock": _Lock(), } if self._header_size: file[self._header_size] = 0 if self._header_ctime: file[self._header_ctime] = self._format_date(_time()) # Update file with file["_lock"]: if content: file_content = file["_content"] if not data_range or (data_range[0] is None and data_range[1] is None): file_content[:] = content else: start, end = data_range if start is None: start = 0 if end is None: end = start + len(content) if start > len(file_content): file_content[len(file_content):start] = ( start - len(file_content)) * b"\0" file_content[start:end] = content if headers: file.update(headers) if self._header_size: file[self._header_size] = len(file["_content"]) if self._header_mtime: file[self._header_mtime] = self._format_date(_time()) header = file.copy() del header["_content"] return header
def __init__(self, path=None, sensor='SeaWiFS', resolution='9km', mask_file=None, xlim=None, ylim=None): # Initializes the variables to default values. The indices 'n', 'k', # 'j' and 'i' refer to the temporal, height, meridional and zonal # coordinates respectively. If one of these indexes is set to 'None', # then it is assumed infinite size, which is relevant for the 'time' # coordinate. self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict() self.data = dict() self.stencil_coeffs = dict() self.stencil_params = dict() # Sets global parameters for grid. if path == None: path = '/academia/data/raw/oceancolor' self.params['path'] = '%s/%s' % (path, sensor) self.params['mask_file'] = mask_file self.params['uuid'] = str(_uuid()) self.params['var_list'] = ['chla'] # Generates list of files, tries to match them to the pattern and to # extract the time. To help understanding the naming convetion and # pattern, see the following example: # A20131612013168.L3m_8D_CHL_chlor_a_9km.bz2 # resolution = '[0-9]+km' if sensor == 'SeaWiFS': sensor_prefix = 'S' elif sensor == 'MODISA': sensor_prefix = 'A' else: sensor = '.*' file_pattern = ('(%s)([0-9]{4})([0-9]{3})([0-9]{4})([0-9]{3}).(L3m)_' '(8D)_(CHL)_(chlor_a)_(%s).bz2') % (sensor_prefix, resolution) flist = listdir(self.params['path']) flist, match = _reglist(flist, file_pattern) self.params['file_list'] = flist # Reads first file in dataset to determine array geometry and # dimenstions (lon, lat) HDF = self._open_HDF('%s/%s' % (self.params['path'], self.params['file_list'][0])) HDF_att = HDF.attributes() lon = arange(HDF_att['Westernmost Longitude'], HDF_att['Easternmost Longitude'], HDF_att['Longitude Step']) lat = arange(HDF_att['Northernmost Latitude'], HDF_att['Southernmost Latitude'], -HDF_att['Latitude Step']) # If lon_0 is set, calculate how many indices have to be moved in # order for latitude array to start at lon_0. lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits(lon, lat, xlim, ylim) self.params['xlim'], self.params['ylim'] = xlim, ylim self.params['lon_i'], self.params['lat_j'] = ii, jj # Creates a structured array for start year, start day, end year and # end day. Aftwerwards, the dates are converted from julian day to # matplotlib format, i.e. days since 0001-01-01 UTC. time_list = array([('%s-01-01' % (item[1]), atof(item[2]), '%s-01-01' % (item[3]), atof(item[4])) for item in match], dtype=[('start_year', 'a10'), ('start_day', 'f2'), ('end_year', 'a10'), ('end_day', 'f2')]) time_start = (dates.datestr2num(time_list['start_year']) + time_list['start_day'] - 1) time_end = (dates.datestr2num(time_list['end_year']) + time_list['end_day'] - 1) time_middle = 0.5 * (time_start + time_end) # Initializes the grid attributes, dimensions, coordinates and # variables. self.name = 'mass_concentration_of_chlorophyll_a_in_sea_water' self.description = ('Chlorophyll-a pigment concentration ' 'inferred from satellite visible light radiance measurements.') self.attributes['institution'] = HDF_att['Data Center'] self.attributes['sensor name'] = HDF_att['Sensor Name'] self.dimensions = dict(n=time_middle.size, k=0, j=lat.size, i=lon.size) self.coordinates = dict(n='time', k='height', j='latitude', i='longitude') self.variables = dict( time = atlantis.data.Variable(), height = atlantis.data.get_standard_variable('height'), latitude = atlantis.data.get_standard_variable('latitude'), longitude = atlantis.data.get_standard_variable('longitude'), chla = atlantis.data.get_standard_variable( 'mass_concentration_of_chlorophyll_a_in_sea_water' ), xm = atlantis.data.Variable(), ym = atlantis.data.Variable(), ) self.variables['time'].data = time_middle self.variables['time'].canonical_units = 'days since 0001-01-01 UTC' # self.variables['height'].data = 0. self.variables['latitude'].data = lat self.variables['longitude'].data = lon self.variables['chla'].canonical_units = 'mg m-3' # self.variables['xm'].canonical_units = 'km' self.variables['xm'].description = 'Zonal distance.' self.variables['ym'].canonical_units = 'km' self.variables['ym'].description = 'Meridional distance.' self.variables['xm'].data, self.variables['ym'].data = ( metergrid(self.variables['longitude'].data, self.variables['latitude'].data, units='km') ) return
def __init__(self, path=None, sensor='SeaWiFS', resolution='9km', mask_file=None, xlim=None, ylim=None): # Initializes the variables to default values. The indices 'n', 'k', # 'j' and 'i' refer to the temporal, height, meridional and zonal # coordinates respectively. If one of these indexes is set to 'None', # then it is assumed infinite size, which is relevant for the 'time' # coordinate. self.attributes = dict() self.dimensions = dict(n=0, k=0, j=0, i=0) self.coordinates = dict(n=None, k=None, j=None, i=None) self.variables = dict() self.params = dict() self.data = dict() self.stencil_coeffs = dict() self.stencil_params = dict() # Sets global parameters for grid. if path == None: path = '/academia/data/raw/oceancolor' self.params['path'] = '%s/%s' % (path, sensor) self.params['mask_file'] = mask_file self.params['uuid'] = str(_uuid()) self.params['var_list'] = ['chla'] # Generates list of files, tries to match them to the pattern and to # extract the time. To help understanding the naming convetion and # pattern, see the following example: # A20131612013168.L3m_8D_CHL_chlor_a_9km.bz2 # resolution = '[0-9]+km' if sensor == 'SeaWiFS': sensor_prefix = 'S' elif sensor == 'MODISA': sensor_prefix = 'A' else: sensor = '.*' file_pattern = ('(%s)([0-9]{4})([0-9]{3})([0-9]{4})([0-9]{3}).(L3m)_' '(8D)_(CHL)_(chlor_a)_(%s).bz2') % (sensor_prefix, resolution) flist = listdir(self.params['path']) flist, match = _reglist(flist, file_pattern) self.params['file_list'] = flist # Reads first file in dataset to determine array geometry and # dimenstions (lon, lat) HDF = self._open_HDF( '%s/%s' % (self.params['path'], self.params['file_list'][0])) HDF_att = HDF.attributes() lon = arange(HDF_att['Westernmost Longitude'], HDF_att['Easternmost Longitude'], HDF_att['Longitude Step']) lat = arange(HDF_att['Northernmost Latitude'], HDF_att['Southernmost Latitude'], -HDF_att['Latitude Step']) # If lon_0 is set, calculate how many indices have to be moved in # order for latitude array to start at lon_0. lon, lat, xlim, ylim, ii, jj = self.getLongitudeLatitudeLimits( lon, lat, xlim, ylim) self.params['xlim'], self.params['ylim'] = xlim, ylim self.params['lon_i'], self.params['lat_j'] = ii, jj # Creates a structured array for start year, start day, end year and # end day. Aftwerwards, the dates are converted from julian day to # matplotlib format, i.e. days since 0001-01-01 UTC. time_list = array([('%s-01-01' % (item[1]), atof(item[2]), '%s-01-01' % (item[3]), atof(item[4])) for item in match], dtype=[('start_year', 'a10'), ('start_day', 'f2'), ('end_year', 'a10'), ('end_day', 'f2')]) time_start = (dates.datestr2num(time_list['start_year']) + time_list['start_day'] - 1) time_end = (dates.datestr2num(time_list['end_year']) + time_list['end_day'] - 1) time_middle = 0.5 * (time_start + time_end) # Initializes the grid attributes, dimensions, coordinates and # variables. self.name = 'mass_concentration_of_chlorophyll_a_in_sea_water' self.description = ( 'Chlorophyll-a pigment concentration ' 'inferred from satellite visible light radiance measurements.') self.attributes['institution'] = HDF_att['Data Center'] self.attributes['sensor name'] = HDF_att['Sensor Name'] self.dimensions = dict(n=time_middle.size, k=0, j=lat.size, i=lon.size) self.coordinates = dict(n='time', k='height', j='latitude', i='longitude') self.variables = dict( time=atlantis.data.Variable(), height=atlantis.data.get_standard_variable('height'), latitude=atlantis.data.get_standard_variable('latitude'), longitude=atlantis.data.get_standard_variable('longitude'), chla=atlantis.data.get_standard_variable( 'mass_concentration_of_chlorophyll_a_in_sea_water'), xm=atlantis.data.Variable(), ym=atlantis.data.Variable(), ) self.variables['time'].data = time_middle self.variables['time'].canonical_units = 'days since 0001-01-01 UTC' # self.variables['height'].data = 0. self.variables['latitude'].data = lat self.variables['longitude'].data = lon self.variables['chla'].canonical_units = 'mg m-3' # self.variables['xm'].canonical_units = 'km' self.variables['xm'].description = 'Zonal distance.' self.variables['ym'].canonical_units = 'km' self.variables['ym'].description = 'Meridional distance.' self.variables['xm'].data, self.variables['ym'].data = (metergrid( self.variables['longitude'].data, self.variables['latitude'].data, units='km')) return