def load_data(groupname=None): """Load cru dat for groupname / location from HDF5 storage. :param groupname: :return: """ groupname = conf['groupname'] storage_name = conf['hdf5storage'] with h5py.File(storage_name, "r") as data_file: x_time = data_file['timestamps'] time_x = [] for t in x_time: dt = date.fromtimestamp(t) time_x.append(dt) global timestamps timestamps = time_x[:120] for ds_name in list(datasets.keys()): data = numpy.array(list(data_file[f'{groupname}/{ds_name}'])[:120]) if conf.get('normalize'): log.error("Normalizing %s", ds_name) datasets[f"{ds_name}_original"] = data.copy() data = normalized_dataset(data) datasets[ds_name] = data return timestamps, datasets
def calculate_moving_mean(): """ :return: a plot. """ moving_avg = [] ds_var = conf['prediction_option'] x_months = conf.get('moving_average_months', 0) if not x_months: log.debug('No moving average defined') return moving_average_result = (x_months - 1) * [0] for value in datasets[ds_var]: moving_avg.append(value) if len(moving_avg) > x_months: # remove oldest value moving_avg.pop(0) if len(moving_avg) == x_months: m_avg = sum(moving_avg) / x_months moving_average_result.append(m_avg) dataset_label = f'{ds_var}_moving_avg_{x_months}' assert len(moving_average_result) == len(datasets[ds_var]) datasets[dataset_label] = moving_average_result
def __init__(self, slotCount=8, environment=None): """ intialize worker :param slotCount: maximum task slot number :param environment: worker environment. 'production' or 'development' """ modulePath = os.path.join(os.getcwd(), DIR_TASK_MODULE) if not os.path.exists(modulePath): os.mkdir(modulePath) signal.signal(signal.SIGTERM, self.__signalHandler) signal.signal(signal.SIGINT, self.__signalHandler) sys.path.append(modulePath) sys.path.append(os.getcwd()) sys.path.append(os.path.join(os.getcwd(), DIR_DEV_MODULE)) self.slotCount = slotCount self.taskSlots = [] self.devTaskSlots = [] self.taskList = dict() self.msgQueue = Queue.Queue() # set excution environment if environment in ('production', 'development'): self.environment = environment else: self.environment = workerConfig.get('main', 'environment') # setup logging logPath, tmp, logFile = workerConfig.get('log', 'file').rpartition('/') logLevel = workerConfig.get('log', 'level') if self.environment == 'production' else 'DEBUG' log.setupLogger('%s/%s_%s' % (logPath, os.getpid(), logFile), logLevel, 'oceanworker') # join worker node worker = dict() worker['environment'] = self.environment worker['maxSlot'] = slotCount worker['tasks'] = [] try: worker['ip'] = socket.gethostbyname(socket.gethostname()) except Exception, e: worker['ip'] = '0.0.0.0'
def es_instance_tap(): global _ES_INSTANCE_TAP if _ES_INSTANCE_TAP is None: es_host = [ 'http://' + host for host in conf.get('es', { 'host': ['localhost'] }).get('host', ['localhost']) ] _ES_INSTANCE_TAP = Elasticsearch(conf["tap_es"]["host"], sniffer_timeout=60, timeout=60) return _ES_INSTANCE_TAP
def __init__( self, url=conf.get("server_url") + ReceiveNumberConstant.RECEIVE_NUMBER_API, save_number=ReceiveNumberConstant.SAVE_NUMBERS_LOG_FLAG, save_number_file_name=ReceiveNumberConstant.SAVE_NUMBERS_FILE_NAME ): self.__url = url self.__save_numbers_log_flag = save_number self.__save_numbers_file_name = save_number_file_name self.__periods = Periods() self.__httper = Httper() self.__save_path = get_time('%Y%m%d')
def get_dataset(datasets, ds_var): """ If there is a moving average month defined in the settings. use that one. :param datasets: :param ds_var: :return: dataset to make prediction with. """ ds_tmp = datasets['tmp'] if conf.get('moving_average_months'): months = conf['moving_average_months'] ds_tmp = datasets[f'{ds_var}_moving_avg_{months}'] return ds_tmp
def __setupMetaServerConnection(self): keeperHosts = config.get('server', 'meta') parsed = urlparse(keeperHosts) if parsed.scheme != 'zk': raise ValueError("Meta URL must start with zk://") if parsed.path in ('/', ''): raise ValueError("Service root path not found.") self.rootpath = parsed.path.rstrip('/') # NOTE: currently, auth_data is not supported servers = parsed.netloc.split('@')[-1] metaClient = KazooClient(hosts=servers, handler=SequentialGeventHandler()) metaClient.add_listener(self.__connection) return metaClient
def open(self): if not workerConfig.has_section('server') or not workerConfig.has_option('server', 'broker'): raise EnvironmentError('Cannot find taskmodule url') self.urlAMQP = workerConfig.get('server', 'broker') self.amqpProducer = producer.AMQPProducer(url=self.urlAMQP)
self.__checkMetaInfo() except Exception, e: log.error(e) raise e try: svrConfig = json.loads(self.metaClient.get(self.rootpath + '/henem/config')[0]) log.info("Config - %s" % json.dumps(svrConfig)) config.update(svrConfig, keepOrgValue=True) except Exception, e: log.error(e) log.info("Service Configuration from local and Zookeeper") log.info(config.dumps()) self.urlQueue = config.get('source', 'mgqueue') try: self.mongoClient = pymongo.MongoClient(self.urlQueue) self.db = self.mongoClient.get_default_database() except Exception, e: log.error(self.urlQueue) log.error(e) raise e self.keeperInfo = dict(name='henem') self.keeperInfo['host'] = "%s:%s" % (socket.gethostbyname(socket.gethostname()), config.get('main', 'port')) self.feeders.append(MessageFeeder(self.db['APATLogQueue'], ordering=False)) self.running = True
def getConfig(section, name): return workerConfig.get(section, name)
self.metaClient = meta.MetaHandler(hosts=ZK_URL, nodeInfo=worker, eventCallback=self.__metaEventHandler) # update global (server-side) configuration try: workerConfig.update(self.metaClient.getConfig(), keepOrgValue=True) except Exception, e: log.error("Meta Configuration error - %s" % e) log.debug(dict(workerConfig.items('db'))) if workerConfig.has_section('db'): dbmanager.initDbParams(dict(workerConfig.items('db'))) if workerConfig.has_section('server') and workerConfig.has_option('server', 'task_module'): self.moduleClient = module.TaskModule(workerConfig.get('server', 'task_module')) if self.environment == 'production': self.downloadModules() self.taskClient = task.TaskManager(self.metaClient) self.slotPool = slot.SlotPool() self.slotPool.open() def __signalHandler(self, signum, frame): self.stop() def __metaEventHandler(self, event, evtObj=None): log.debug("Receive Meta Event - %s, %s" % (event, evtObj)) if self.environment != 'production':
class ManagementInterface(object): def __init__(self, server): self.control = IControl(server) if __name__ == "__main__": server = HenemServer() if settings.environment == "production": cherrypy.config.update({"environment": "production", "engine.autoreload_on": False}) else: cherrypy.config.update({"engine.autoreload_on": False}) PIDFile(cherrypy.engine, config.get("main", "pid-file")).subscribe() signal.signal(signal.SIGTERM, partial(signalHandler, server=server)) signal.signal(signal.SIGHUP, partial(signalHandler, server=server)) signal.signal(signal.SIGINT, partial(signalHandler, server=server)) log.info("Start Henem Server, environment - %s" % settings.environment) bindAddr = config.get("main", "bind") bindPort = config.getint("main", "port") log.info("bind server - %s:%s" % (bindAddr, bindPort)) cherrypy.tools.allow = cherrypy.Tool("on_start_resource", http_methods_allowed) cherrypy.server.bind_addr = (bindAddr, bindPort)