def __init__(self, nthreads=0, nprocs=1, nohost=False, remote_python=None, memory_limit=None, worker_port=None, nanny_port=None): Thread.__init__(self) self.logger = EDASLogger.getLogger() self.nthreads = nthreads self.nprocs = nprocs self.worker_addrs = self.getHosts() self.ssh_username = os.environ.get('USER', None) self.ssh_port = 22 self.ssh_private_key = get_private_key() self.scheduler_addr = getHost() self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786)) self.logdir = os.path.expanduser("~/.edas/logs") self.active = False self.nohost = nohost self.remote_python = remote_python self.memory_limit = memory_limit self.worker_port = worker_port self.nanny_port = nanny_port # Keep track of all running threads self.threads = []
def __init__(self, _agg: "Aggregation", *args ): self.logger = EDASLogger.getLogger() self.agg = _agg self.start_time = float(args[0].strip()) self.size = int(args[1].strip()) self.relpath = args[2].strip() self.date = datetime.fromtimestamp( self.start_time*60, tz=timezone.utc)
def __init__(self, _name, _spec_file ): self.logger = EDASLogger.getLogger() self.name = _name self.spec = os.path.expanduser( _spec_file ) self.aggs = {} self.parms = {} self._parseSpecFile()
def __init__(self, serverConfiguration: Dict[str, str]): self.config = serverConfiguration self.logger = EDASLogger.getLogger() self.num_wps_requests = 0 self.scheduler_address = serverConfiguration.get( "scheduler.address", None) self.submitters = [] self.active = True if self.scheduler_address is not None: self.logger.info( "Initializing Dask-distributed cluster with scheduler address: " + self.scheduler_address) self.client = Client(self.scheduler_address, timeout=60) else: nWorkers = int( self.config.get("dask.nworkers", multiprocessing.cpu_count())) self.client = Client(LocalCluster(n_workers=nWorkers)) self.scheduler_address = self.client.scheduler.address self.logger.info( f"Initializing Local Dask cluster with {nWorkers} workers, scheduler address = {self.scheduler_address}" ) self.client.submit(lambda x: edasOpManager.buildIndices(x), nWorkers) self.ncores = self.client.ncores() self.logger.info(f" ncores: {self.ncores}") self.scheduler_info = self.client.scheduler_info() self.workers: Dict = self.scheduler_info.pop("workers") self.logger.info(f" workers: {self.workers}") log_metrics = serverConfiguration.get("log.scheduler.metrics", False) if log_metrics: self.metricsThread = Thread(target=self.trackMetrics) self.metricsThread.start()
def __init__(self, host, request_port, response_port): Thread.__init__(self) self.logger = EDASLogger.getLogger() self._response_port = response_port self._request_port = request_port self._host = host self.process = None self.setDaemon(True)
def __init__( self, name: Optional[str], _domId: str, data: Union[xa.DataArray,DataArrayGroupBy] ): self.alwaysPersist = False self.loaded_data = None self.logger = EDASLogger.getLogger() self.domId = _domId if _domId is not None else "" self._data = data self.name = name self.addDomain( _domId )
def __init__( self, spec: KernelSpec ): self.logger = EDASLogger.getLogger() self._spec: KernelSpec = spec self.parent: Optional[str] = None self._minInputs = 1 self._maxInputs = 100000 self.requiredOptions = [] self._id: str = self._spec.name + "-" + ''.join([ random.choice( string.ascii_letters + string.digits ) for n in range(5) ] )
def __init__(self, **kwargs): super(EDASEndpoint, self).__init__() self.logger = EDASLogger.getLogger() self.process = "edas" self.handlers = {} self.processManager = None self._epas = ["edas*", "xarray*"] atexit.register(self.shutdown, "ShutdownHook Called")
def __init__(self): Cluster.__init__(self) self.logger = EDASLogger.getLogger() self.scheduler_host = getHost() self.scheduler_port = int(EdasEnv.get("scheduler.port", 8786)) self.schedulerProcess = self.startup_scheduler() time.sleep(14) self.clusterThread = self.startup_cluster()
def __init__(self, clientId: str, jobId: str, **kwargs): self.logger = EDASLogger.getLogger() self.clientId = clientId self.jobId = jobId self.cacheDir = kwargs.get("cache", "/tmp") self.workers = kwargs.get("workers", 1) self.start_time = time.time() self.filePath = self.cacheDir + "/" + Job.randomStr(6) + ".nc"
def __init__(self, _name, _agg_file ): self.logger = EDASLogger.getLogger() self.name = _name self.spec = _agg_file self.parms = {} self.files: Dict[str,File] = OrderedDict() self.axes: Dict[str,Axis] = {} self.dims = {} self.vars = {} self._parseAggFile()
def __init__(self, _context: zmq.Context, _client_address: str, _response_port: int): super(Responder, self).__init__() self.logger = EDASLogger.getLogger() self.context: zmq.Context = _context self.response_port = _response_port self.executing_jobs: Dict[str, Response] = {} self.status_reports: Dict[str, str] = {} self.clients: Set[str] = set() self.client_address = _client_address self.initSocket()
def __init__(self): self.logger = EDASLogger.getLogger() self.EDAS_CONFIG_DIR = os.environ.get('EDAS_CONFIG_DIR',os.path.expanduser("~/.edas/conf" ) ) assert os.path.isdir( self.EDAS_CONFIG_DIR ), f"Error, the EDAS configuration directory '{self.EDAS_CONFIG_DIR}' does not exist" self.path = os.path.expanduser( os.path.join( self.EDAS_CONFIG_DIR, "app.conf" ) ) assert os.path.isfile( self.path ), f"Error, the EDAS configuration file '{self.path}' does not exist. Copy edas/resourses/app.conf.template to '{self.path}' and edit." aliases = { "wps.server.address": "client.address", "scheduler.address": "dask.scheduler" } self._parms: Dict[str,str] = self.getAppConfiguration( aliases ) self.TRANSIENTS_DIR = os.environ.get('EDAS_CACHE_DIR', self._parms.get( "edas.transients.dir", self._parms.get( "edas.cache.dir", "/tmp" ) ) ) self.COLLECTIONS_DIR = self._parms.get("edas.coll.dir", "~/.edas" ) for cpath in [self.TRANSIENTS_DIR, self.COLLECTIONS_DIR]: if not os.path.exists(cpath): os.makedirs(cpath)
def init(cls, project: str, experiment: str, requestId: str, identifier: str, dataInputs: Dict[str, List[Dict[str, Any]]]): logger = EDASLogger.getLogger() logger.info( "TaskRequest>-> process_name: {}, requestId: {}, datainputs: {}". format(identifier, requestId, str(dataInputs))) uid = UID(requestId) domainManager = DomainManager.new(dataInputs.get("domain")) variableManager = VariableManager.new(dataInputs.get("variable")) operationManager = OperationManager.new(dataInputs.get("operation"), domainManager, variableManager) rv = TaskRequest(uid, project, experiment, identifier, operationManager, {}) return rv
def __init__(self, cid: str, _job: Job, **kwargs): super(TaskExecHandler, self).__init__(**{ "rid": _job.requestId, "cid": cid, **kwargs }) self.logger = EDASLogger.getLogger() self.sthread = None self._processResults = True self.results = queue.Queue() self.job = _job self._status = Status.IDLE self.start_time = time.time() self._exception = None
def __init__(self, requestId: str, project: str, experiment: str, process: str, datainputs: Dict[str, List[Dict[str, Any]]], inputs: List[TaskResult], runargs: Dict[str, str], priority: float): self.logger = EDASLogger.getLogger() self.requestId = requestId self.process = process self.project = project self.inputs = inputs self.experiment = experiment self.dataInputs = datainputs self.runargs = runargs self.priority = priority self.workerIndex = 0 self.logger.info(f"Create job, runargs = {runargs}")
def new(cls, dataset: xa.Dataset, varMap: Dict[str,str]=None, idMap=None ): logger = EDASLogger.getLogger() if varMap is None: varMap = {} if idMap is None: idMap = {} # logger.info( f"$$$$$$$$$$ rename: idMap = {idMap}, initial coords = {list(dataset.coords.keys())}") dataset = cls.rename( dataset, idMap ) # logger.info(f"$$$$$$$$$$ rename: Result = {list(dataset.coords.keys())}") result = OrderedDict() if varMap: for ( vid, domId ) in varMap.items(): result[vid] = EDASArray( vid, domId, dataset[vid] ) else: for ( vid ) in dataset.variables.keys(): result[vid] = EDASArray( vid, None, dataset[vid] ) return EDASDataset( result, dataset.attrs )
def new(cls, job: Job): logger = EDASLogger.getLogger() logger.info( "TaskRequest--> process_name: {}, requestId: {}, datainputs: {}". format(job.process, job.requestId, str(job.dataInputs))) uid = UID(job.requestId) domainManager = DomainManager.new(job.dataInputs.get("domain")) variableManager = VariableManager.new( job.dataInputs.get("variable", job.dataInputs.get("input")), job.inputs) operationManager = OperationManager.new( job.dataInputs.get("operation"), domainManager, variableManager) rv = TaskRequest(uid, job.project, job.experiment, job.process, operationManager, job.runargs) return rv
def __init__(self, _name: str, _start: Union[float, int, str], _end: Union[float, int, str], _step: Union[float, int, str], _system: str, _metadata: Dict, timeDelta: Optional[relativedelta] = None): self.name = _name self.logger = EDASLogger.getLogger() self.type = Axis.parse(_name) self.system = _system self.start = _start self._timeDelta = timeDelta self.end = _end self.step = _step self.metadata = _metadata
def __init__(self, client_address: str, request_port: int, response_port: int): self.logger = EDASLogger.getLogger() self.active = True try: self.request_port = request_port self.zmqContext: zmq.Context = zmq.Context() self.request_socket: zmq.Socket = self.zmqContext.socket(zmq.REP) self.responder = Responder(self.zmqContext, client_address, response_port) self.handlers = {} self.initSocket(client_address, request_port) except Exception as err: self.logger.error( "@@Portal: ------------------------------- EDAS Init error: {} ------------------------------- " .format(err))
def __init__(self, context: zmq.Context, clientId: str, host: str, port: int, **kwargs): from edas.config import EdasEnv Thread.__init__(self) self.context = context self.logger = EDASLogger.getLogger() self.host = host self.port = port self.clientId = clientId self.active = True self.mstate = MessageState.RESULT self.setName('EDAS Response Thread') self.cached_results = {} self.cached_arrays = {} self.filePaths = {} self.diag = bool(kwargs.get("diag", False)) self.setDaemon(True) self.cacheDir = EdasEnv.CACHE_DIR self.log("Created RM, cache dir = " + self.cacheDir)
def __init__(self, host: str = "127.0.0.1", request_port: int = 4556, response_port: int = 4557, **kwargs): try: self.active = True self.app_host = host self.application_thread = None self.clientID = UID.randomId(6) self.logger = EDASLogger.getLogger() self.context = zmq.Context() self.request_socket = self.context.socket(zmq.REQ) # if( connectionMode == ConnectionMode.BIND ): # self.request_port = ConnectionMode.bindSocket( self.request_socket, self.app_host, request_port ) # self.response_port = ConnectionMode.bindSocket( self.response_socket, self.app_host, response_port ) # self.logger.info( "Binding request socket to port: {0} (Requested {1})".format( self.request_port, request_port ) ) # self.logger.info( "Binding response socket to port: {0} (Requested {1}".format( self.response_port, response_port ) ) # else: self.request_port = ConnectionMode.connectSocket( self.request_socket, self.app_host, request_port) self.log("[1]Connected request socket to server {0} on port: {1}". format(self.app_host, self.request_port)) self.response_manager = ResponseManager(self.context, self.clientID, host, response_port, **kwargs) self.response_manager.start() except Exception as err: err_msg = "\n-------------------------------\nWorker Init error: {0}\n{1}-------------------------------\n".format( err, traceback.format_exc()) self.logger.error(err_msg) print(err_msg) self.shutdown()
import dask from dask.distributed import Client from typing import List, Dict, Sequence, Mapping, Any import xarray as xa import time, traceback, logging import numpy as np from edas.workflow.modules.edas import * from edas.util.logging import EDASLogger from edas.workflow.module import edasOpManager from edas.portal.parsers import WpsCwtParser if __name__ == '__main__': logger = EDASLogger.getLogger() logger.info("STARTUP") dataset_path = '/Users/tpmaxwel/Dropbox/Tom/Data/GISS/CMIP5/E2H/r1i1p1/*.nc' dataset_ncml = '/Users/tpmaxwel/.edas/cache/collection/agg/giss_r1i1p1-tas_Amon_GISS-E2-H_historical_r1i1p1_1.ncml' testStr = '[ domain=[ {"name":"d0", \n "lat":{"start":0.0, "end":20.0, "system":"values" }, "lon":{ "start":0.0,"end":20.0, "system":"values" }, "time":{ "start":0,"end":20, "system":"indices" } } ], ' \ 'variable=[{ "collection":"cip_merra2_mon_1980-2015", "name":"tas:v0", "domain":"d0" } ], ' \ 'operation=[{ "name":"edas.ave", "input":"v0", "domain":"d0","axes":"xy"}] ]' try: tstart = time.time() client = Client() tdefine = time.time() logger.info("Defining workflow") def get_results() -> List[xa.Dataset]: dataInputs = WpsCwtParser.parseDatainputs(testStr) request: TaskRequest = TaskRequest.new("requestId", "jobId",
def __init__(self, _project: str, _experiment: str, appConfiguration: Dict[str, str]): self.logger = EDASLogger.getLogger() self.project = _project self.experiment = _experiment self.processManager = ProcessManager(appConfiguration)
def __init__(self, job: Job, processResults, processFailure): Thread.__init__(self) self.job = job self.processResults = processResults self.processFailure = processFailure self.logger = EDASLogger.getLogger()
def __init__(self, name, kernels: Dict[str, Callable[[], Kernel]]): self.logger = EDASLogger.getLogger() self._kernels: Dict[str, Callable[[str], Kernel]] = kernels self._instances: Dict[str, Kernel] = {} OperationModule.__init__(self, name)
class WpsCwtParser: logger = EDASLogger.getLogger() integer = (Optional("-") + Word(nums)).setParseAction(str2int) float = (Optional("-") + Word(nums + ".") + Optional(CaselessLiteral("E") + Optional("-") + Word(nums))).setParseAction(str2float) numval = integer ^ float key = QuotedString('"') name = Word(alphanums + "-") token = key ^ numval delim = Word(",") ^ Word(";") vsep = Word("|") ^ Word(":") @classmethod def getDatainputsParser(cls): dict = cls.keymap(cls.key, cls.token) spec = cls.keymap(cls.key, dict ^ cls.token ^ cls.list(cls.token)) return cls.keymap(cls.name, cls.list(spec), "[]", "=") @classmethod def getOpConnectionsParser(cls): output = Suppress(cls.vsep) + cls.name input = cls.seq(cls.name) item = input + Optional(Group(output)) return cls.seq(Group(item)) @classmethod def strToDatetime(cls, dTime: str) -> datetime: import dateparser dt: datetime = dateparser.parse(dTime, settings={'TIMEZONE': 'UTC'}) return datetime @classmethod def isoDateStr(cls, dTime: str) -> str: import dateparser dt: datetime.datetime = dateparser.parse( dTime) # , settings={'TIMEZONE': 'UTC'} ) return dt.replace(microsecond=0).isoformat( "T", timespec="seconds").split("+")[0] @classmethod def parseDatainputs(cls, datainputs) -> Dict[str, List[Dict[str, Any]]]: try: return cls.getDatainputsParser().parseString(datainputs)[0] except ParseException as err: cls.logger.error( "\n\n -----> Error parsing input at col {}: '{}'\n".format( err.col, err.line)) raise err @classmethod def parseOpConnections(cls, opConnections) -> List[List[List[str]]]: try: opCon = ",".join(opConnections) if hasattr( opConnections, '__iter__') and not isinstance( opConnections, str) else opConnections return cls.getOpConnectionsParser().parseString(str(opCon))[0] except ParseException as err: cls.logger.error( "\n\n -----> Error parsing input at col {}: '{}'\n".format( err.col, err.line)) raise err @classmethod def keymap(cls, key: Token, value: Token, enclosing: str = "{}", sep=":"): elem = (key + Suppress(sep) + value + Suppress(ZeroOrMore(cls.delim))) return (Suppress(enclosing[0]) + OneOrMore(Group(elem)) + Suppress(enclosing[1])).setParseAction(list2dict) @classmethod def list(cls, item, enclosing: str = "[]"): elem = item + Suppress(ZeroOrMore(cls.delim)) return (Suppress(enclosing[0]) + Group(OneOrMore(elem)) + Suppress(enclosing[1])) @classmethod def seq(cls, item): elem = item + Suppress(ZeroOrMore(cls.delim)) return Group(OneOrMore(elem)) @classmethod def postProcessResult( cls, result: Dict[str, List[Dict[str, Any]]] ) -> Dict[str, List[Dict[str, Any]]]: # for key, decls in result.items(): # for decl in decls: # print(".") return result @staticmethod def get(altKeys: List[str], spec: Dict[str, Any]) -> Any: for key in altKeys: value = spec.get(key, None) if value is not None: return value return None @staticmethod def split(sepKeys: List[str], value: str) -> List[str]: for sep in sepKeys: if sep in value: return value.split(sep) return [value] @staticmethod def randomStr(length) -> str: tokens = string.ascii_uppercase + string.ascii_lowercase + string.digits return ''.join(random.SystemRandom().choice(tokens) for _ in range(length))
def __init__(self): self.logger = EDASLogger.getLogger() self.operation_modules: Dict[str, KernelModule] = {} self.utilNodes = {"edas.metrics"} self.build()
def __init__(self): self.logger = EDASLogger.getLogger() self.mgr = LocalTestManager("PlotTESTS", "demo")
def __init__(self, name: str): self.logger = EDASLogger.getLogger() self._name = name