Exemplo n.º 1
0
def load_data(groupname=None):
    """Load cru dat for groupname / location from HDF5 storage.

    :param groupname:
    :return:
    """
    groupname = conf['groupname']
    storage_name = conf['hdf5storage']
    with h5py.File(storage_name, "r") as data_file:

        x_time = data_file['timestamps']

        time_x = []
        for t in x_time:
            dt = date.fromtimestamp(t)
            time_x.append(dt)

        global timestamps
        timestamps = time_x[:120]

        for ds_name in list(datasets.keys()):
            data = numpy.array(list(data_file[f'{groupname}/{ds_name}'])[:120])

            if conf.get('normalize'):
                log.error("Normalizing %s", ds_name)
                datasets[f"{ds_name}_original"] = data.copy()
                data = normalized_dataset(data)
            datasets[ds_name] = data

    return timestamps, datasets
Exemplo n.º 2
0
def calculate_moving_mean():
    """
    :return: a plot.
    """
    moving_avg = []
    ds_var = conf['prediction_option']

    x_months = conf.get('moving_average_months', 0)

    if not x_months:
        log.debug('No moving average defined')
        return

    moving_average_result = (x_months - 1) * [0]

    for value in datasets[ds_var]:
        moving_avg.append(value)

        if len(moving_avg) > x_months:
            # remove oldest value
            moving_avg.pop(0)

        if len(moving_avg) == x_months:
            m_avg = sum(moving_avg) / x_months
            moving_average_result.append(m_avg)

    dataset_label = f'{ds_var}_moving_avg_{x_months}'
    assert len(moving_average_result) == len(datasets[ds_var])
    datasets[dataset_label] = moving_average_result
Exemplo n.º 3
0
    def __init__(self, slotCount=8, environment=None):
        """
        intialize worker

        :param slotCount: maximum task slot number
        :param environment: worker environment. 'production' or 'development'
        """

        modulePath = os.path.join(os.getcwd(), DIR_TASK_MODULE)

        if not os.path.exists(modulePath):
            os.mkdir(modulePath)

        signal.signal(signal.SIGTERM, self.__signalHandler)
        signal.signal(signal.SIGINT, self.__signalHandler)

        sys.path.append(modulePath)
        sys.path.append(os.getcwd())
        sys.path.append(os.path.join(os.getcwd(), DIR_DEV_MODULE))

        self.slotCount = slotCount
        self.taskSlots = []
        self.devTaskSlots = []
        self.taskList = dict()
        self.msgQueue = Queue.Queue()

        # set excution environment
        if environment in ('production', 'development'):
            self.environment = environment
        else:
            self.environment = workerConfig.get('main', 'environment')

        # setup logging
        logPath, tmp, logFile = workerConfig.get('log', 'file').rpartition('/')
        logLevel = workerConfig.get('log', 'level') if self.environment == 'production' else 'DEBUG'
        log.setupLogger('%s/%s_%s' % (logPath, os.getpid(), logFile), logLevel, 'oceanworker')

        # join worker node
        worker = dict()
        worker['environment'] = self.environment
        worker['maxSlot'] = slotCount
        worker['tasks'] = []
        try:
            worker['ip'] = socket.gethostbyname(socket.gethostname())
        except Exception, e:
            worker['ip'] = '0.0.0.0'
Exemplo n.º 4
0
def es_instance_tap():
    global _ES_INSTANCE_TAP

    if _ES_INSTANCE_TAP is None:
        es_host = [
            'http://' + host for host in conf.get('es', {
                'host': ['localhost']
            }).get('host', ['localhost'])
        ]
        _ES_INSTANCE_TAP = Elasticsearch(conf["tap_es"]["host"],
                                         sniffer_timeout=60,
                                         timeout=60)
    return _ES_INSTANCE_TAP
Exemplo n.º 5
0
 def __init__(
         self,
         url=conf.get("server_url") +
     ReceiveNumberConstant.RECEIVE_NUMBER_API,
         save_number=ReceiveNumberConstant.SAVE_NUMBERS_LOG_FLAG,
         save_number_file_name=ReceiveNumberConstant.SAVE_NUMBERS_FILE_NAME
 ):
     self.__url = url
     self.__save_numbers_log_flag = save_number
     self.__save_numbers_file_name = save_number_file_name
     self.__periods = Periods()
     self.__httper = Httper()
     self.__save_path = get_time('%Y%m%d')
Exemplo n.º 6
0
def get_dataset(datasets, ds_var):
    """
    If there is a moving average month defined in the settings.
    use that one.

    :param datasets:
    :param ds_var:
    :return: dataset to make prediction with.
    """

    ds_tmp = datasets['tmp']

    if conf.get('moving_average_months'):
        months = conf['moving_average_months']
        ds_tmp = datasets[f'{ds_var}_moving_avg_{months}']

    return ds_tmp
Exemplo n.º 7
0
    def __setupMetaServerConnection(self):
        keeperHosts = config.get('server', 'meta')
        parsed = urlparse(keeperHosts)

        if parsed.scheme != 'zk':
            raise ValueError("Meta URL must start with zk://")

        if parsed.path in ('/', ''):
            raise ValueError("Service root path not found.")

        self.rootpath = parsed.path.rstrip('/')

        # NOTE: currently, auth_data is not supported
        servers = parsed.netloc.split('@')[-1]
        metaClient = KazooClient(hosts=servers, handler=SequentialGeventHandler())
        metaClient.add_listener(self.__connection)

        return metaClient
Exemplo n.º 8
0
 def open(self):
     if not workerConfig.has_section('server') or not workerConfig.has_option('server', 'broker'):
         raise EnvironmentError('Cannot find taskmodule url')
     self.urlAMQP = workerConfig.get('server', 'broker')
     self.amqpProducer = producer.AMQPProducer(url=self.urlAMQP)
Exemplo n.º 9
0
            self.__checkMetaInfo()
        except Exception, e:
            log.error(e)
            raise e

        try:
            svrConfig = json.loads(self.metaClient.get(self.rootpath + '/henem/config')[0])
            log.info("Config - %s" % json.dumps(svrConfig))
            config.update(svrConfig, keepOrgValue=True)
        except Exception, e:
            log.error(e)

        log.info("Service Configuration from local and Zookeeper")
        log.info(config.dumps())

        self.urlQueue = config.get('source', 'mgqueue')
        try:
            self.mongoClient = pymongo.MongoClient(self.urlQueue)
            self.db = self.mongoClient.get_default_database()
        except Exception, e:
            log.error(self.urlQueue)
            log.error(e)
            raise e

        self.keeperInfo = dict(name='henem')
        self.keeperInfo['host'] = "%s:%s" % (socket.gethostbyname(socket.gethostname()), config.get('main', 'port'))

        self.feeders.append(MessageFeeder(self.db['APATLogQueue'], ordering=False))

        self.running = True
Exemplo n.º 10
0
def getConfig(section, name):
    return workerConfig.get(section, name)
Exemplo n.º 11
0
        self.metaClient = meta.MetaHandler(hosts=ZK_URL, nodeInfo=worker, eventCallback=self.__metaEventHandler)

        # update global (server-side) configuration
        try:
            workerConfig.update(self.metaClient.getConfig(), keepOrgValue=True)
        except Exception, e:
            log.error("Meta Configuration error - %s" % e)

        log.debug(dict(workerConfig.items('db')))

        if workerConfig.has_section('db'):
            dbmanager.initDbParams(dict(workerConfig.items('db')))

        if workerConfig.has_section('server') and workerConfig.has_option('server', 'task_module'):
            self.moduleClient = module.TaskModule(workerConfig.get('server', 'task_module'))
            if self.environment == 'production':
                self.downloadModules()

        self.taskClient = task.TaskManager(self.metaClient)

        self.slotPool = slot.SlotPool()
        self.slotPool.open()


    def __signalHandler(self, signum, frame):
        self.stop()

    def __metaEventHandler(self, event, evtObj=None):
        log.debug("Receive Meta Event - %s, %s" % (event, evtObj))
        if self.environment != 'production':
Exemplo n.º 12
0

class ManagementInterface(object):
    def __init__(self, server):
        self.control = IControl(server)


if __name__ == "__main__":
    server = HenemServer()

    if settings.environment == "production":
        cherrypy.config.update({"environment": "production", "engine.autoreload_on": False})
    else:
        cherrypy.config.update({"engine.autoreload_on": False})

    PIDFile(cherrypy.engine, config.get("main", "pid-file")).subscribe()

    signal.signal(signal.SIGTERM, partial(signalHandler, server=server))
    signal.signal(signal.SIGHUP, partial(signalHandler, server=server))
    signal.signal(signal.SIGINT, partial(signalHandler, server=server))

    log.info("Start Henem Server, environment - %s" % settings.environment)

    bindAddr = config.get("main", "bind")
    bindPort = config.getint("main", "port")

    log.info("bind server - %s:%s" % (bindAddr, bindPort))

    cherrypy.tools.allow = cherrypy.Tool("on_start_resource", http_methods_allowed)
    cherrypy.server.bind_addr = (bindAddr, bindPort)