def __init__(self): self.pool = redis.ConnectionPool( host=ConfManage.getString("REDIS_HOST"), port=ConfManage.getInt("REDIS_PORT"), db=ConfManage.getInt("REDIS_DB"), password=ConfManage.getString("REDIS_PASSWORD"), max_connections=ConfManage.getInt("REDIS_MAX_CONNECTIONS"), decode_responses=True, socket_keepalive=True) self.conn = redis.StrictRedis(connection_pool=self.pool, socket_connect_timeout=5) self.logger = Logger.get_instance(ConfManage.getString("LOG_REQ_NAME"))
def __init__(self, host=ConfManage.getString("HBASE_HOST"), port=ConfManage.getInt("HBASE_PORT")): self.timezone = ConfManage.getString("ARROW_TIMEZONE") self.host = host self.port = port # self.connection = Connection(host=self.host,port=self.port,table_prefix=ConfManage.getString("HBASE_PREFIX")) self.connPool = RConnectionPool( size=ConfManage.getInt("HBASE_CONN_SIZE"), host=self.host, port=self.port, # timeout=10, table_prefix=ConfManage.getString("HBASE_PREFIX"))
def get_run_time(args_date, shift_days=0, floored=True): run_time = None if args_date is not None and len(args_date) > 0: run_time = arrow.get(args_date).replace(tzinfo=ConfManage.getString( "ARROW_TIMEZONE")) # .shift(hours=ENV_ARROW_TZSHIFT) else: run_time = arrow.now(tz=ConfManage.getString( "ARROW_TIMEZONE")) # .shift(hours=ENV_ARROW_TZSHIFT) if floored: run_time = run_time.floor('day').floor('hour').floor('minute').floor( 'second') run_time = run_time.shift(days=shift_days) if shift_days != 0 else run_time return run_time
def init_pickle_cache(): RELOAD_PICKLE_CACHE_KEY = ConfManage.getString("RELOAD_PICKLE_CACHE_KEY") logger.info('start init_pickle_cache RELOAD_PICKLE_CACHE_KEY={}'.format( RELOAD_PICKLE_CACHE_KEY)) if RELOAD_PICKLE_CACHE_KEY is not None: result = reload_pickle_cache(RELOAD_PICKLE_CACHE_KEY) logger.info('init_pickle_cache result={}'.format(result))
def __init__(self, client_class, host=None, port=None, timeout=None): host = host if host is not None else ConfManage.getString( "THRIFT_HOST") port = port if port is not None else ConfManage.getInt("THRIFT_PORT") timeout = timeout if timeout is not None else ConfManage.getInt( "THRIFT_TIMEOUT") socket = TSocket.TSocket(host, port) socket.setTimeout(timeout) self.transport = TTransport.TBufferedTransport(socket) protocol = TBinaryProtocol.TBinaryProtocol(self.transport) # Create a client self.client = client_class.Client(protocol)
def save_mode(self, realtime=None, postfix=None): if self.model is None: raise Exception("1602:Not model in Model, please use train member to produce model") else: if realtime is not None: estmator_key = '%s_%s_%s' % (self.estimator_name, realtime, self.target) else: estmator_key = '%s_%s' % (self.estimator_name, self.target) try: if self.estimator_name == "tf": self.model.save("pickles/{app_mode}-{zone}-{estmator_key}".format( app_mode=ConfManage.getString("APP_MODE"), zone=ConfManage.getString("ZONE"), estmator_key=estmator_key)) else: self.model.save_model( "pickles/{app_mode}-{zone}-{estmator_key}".format(app_mode=ConfManage.getString("APP_MODE"), zone=ConfManage.getString("ZONE"), estmator_key=estmator_key)) except AttributeError: # 非xgboost保存为pkl文件 save_pickle(self.model, estmator_key + postfix, using_joblib=True) logger.info('Estmator Key: {}'.format(estmator_key))
def preprocess(date, pickle, estimator, predict_target, holdout, mode, shift_days): data = load_pickle(pickle) try: run_time = get_run_time(date) logger.info('Run-Time: %s' % run_time.format(loggable)) run_time = run_time.shift(days=shift_days).ceil('day').ceil( 'hour').ceil('minute').ceil('second') start_time = run_time.shift( days=-ConfManage.getInt("TRAINING_INTERVAL")).floor('day').floor( 'hour').floor('minute').floor('second') logger.info('Targeted Training Interval %d [%s - %s]' % \ (ConfManage.getInt("TRAINING_INTERVAL"), start_time.format(loggable), run_time.format(loggable))) logger.info('Preprocessing with Estimator %s (%s)' % (estimator, mode)) # 导入eta类: module_tmp = importlib.import_module('tools.eta.{}_{}'.format( estimator, predict_target)) class_tmp = getattr( module_tmp, '{}{}'.format(estimator.capitalize(), predict_target.capitalize())) estimator_obj = class_tmp() # 数据处理 data = estimator_obj.etl(data) # 去除异常值 data = estimator_obj.filter_data(data) if data is not None and 'time' in data.columns: # 选取某段时间数据 data = data.loc[(data.order_time > start_time) & (data.order_time < run_time)] order_times = data.order_time interval_count = len( order_times.apply( lambda order_time: order_time.date()).unique()) logger.info('Available Training Interval %d/%d [%s - %s]' % (interval_count, ConfManage.getInt("TRAINING_INTERVAL"), \ order_times.min().format(loggable), order_times.max().format(loggable))) # 模型训练: estimator_obj.preprocess(data, mode, holdout) Logger.resource_checkpoint('post-preprocess') else: raise Exception( "Data not yet obtained. Please run `python collect.py` first!") except (AttributeError, ValueError) as err: logger.error(err) logger.error('Trace: {}'.format(traceback.format_exc())) except KeyboardInterrupt: logger.info('Process manually interupted at %s' % arrow.now( tz=ConfManage.getString("ARROW_TIMEZONE")).format(loggable)) logger.info('Releasing Logger...') # Logger.release_instance() return 0
def main(): logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME")) parser = argparse.ArgumentParser() parser.add_argument('-d', '--date', help='日期', type=str) parser.add_argument('-p', '--pickle', type=str, help='数据集', default='data') parser.add_argument('estimator', help='算法选择', nargs='?', type=str, default='xgb') parser.add_argument('predict_target', help='目标值', nargs='?', type=str, default='accept') parser.add_argument('-f', '--feature-selected', help='特征值选择', action='store_true') parser.add_argument('-w', '--withhold', help='是否保存数据到bi数据库', action='store_true') parser.add_argument("-s", "--shift_days", help="The last few days", type=int, default=-1) args = parser.parse_args() logger.info('Arguments: estimator=%s, predict-target=%s, feature-selected=%r, withhold-bi-insertion=%r' % \ (args.estimator, args.predict_target, args.feature_selected, args.withhold)) try: process(logger, args.pickle, args.estimator, args.predict_target, args.withhold, args.date, args.shift_days) except TestDataEmpty: logger.error('Test Data Empty!') except (AttributeError, ValueError) as err: logger.error(err) logger.error('Trace: {}'.format(traceback.format_exc())) except KeyboardInterrupt: logger.info('Process manually interupted at %s' % arrow.now(tz=ConfManage.getString("ARROW_TIMEZONE")).format(loggable)) logger.info('Releasing Logger...')
def load_model_cache(self, name='undefined', using_joblib=False): cache_key = 'pickle_cache_{}'.format(name) ret = self.cache.get(cache_key) if ret is None: logger.debug('load_pickle_cache, fetch from raw pickle') path = "pickles/{app_mode}-{zone}-{estmator_key}".format(app_mode=ConfManage.getString("APP_MODE"), zone=ConfManage.getString("ZONE"), estmator_key=name) if name[:3] == "xgb": ret = xgb.Booster(model_file=path) elif name[:2] == "tf": ret = tf.keras.models.load_model(path, compile=False) ret.compile(optimizer=self.estimator().get_optimizer(), loss=self.estimator().loss_class, # todo:self.estimator()未初始化设置 metrics=['mae', 'mse']) else: ret = load_pickle(name, using_joblib) if ret is not None: cached = self.cache.set(cache_key, ret, ConfManage.getInt("PICKLE_CACHE_EXPIRE")) logger.debug('load_pickle_cache, set cache, cache_key={}, status={}'.format(cache_key, cached)) else: logger.debug('load_pickle_cache, fetch from cache, cache_key={}'.format(cache_key)) return ret
def data_url(self, table, topic, start_time=None, end_time=None, columns=None, record_path=None, meta=None, timeout=10., **kwargs): """ 调用API,可加table_name值查询,无法用于多条件查询 :param table :param topic: string :param time_start: int eg:20190711000000 查询开始时间 :param time_end: 查询终止时间 :param kwargs: table_name值 :return: pd.DataFrame """ route = ConfManage.getString( "DATA_API_PREFIX") + '/' + table + '/' + topic for k, v in kwargs.items(): if isinstance(v, list): object_id = ','.join([str(i) for i in v]) route += '/' + object_id elif isinstance(v, int): object_id = str(v) route += '/' + object_id if start_time is not None and end_time is not None: params = remove_none( dict(time_start=dataApiTimeFmt(start_time), time_end=dataApiTimeFmt(end_time))) else: params = None response = self.get(route=route, queries=params, timeout=timeout) response = json.loads(response) if isinstance(response, (str, bytes)) else response if response['error'] == 0: j2df = json_normalize(response['data'], record_path=record_path, meta=meta) if columns: try: j2df = j2df.loc[:, columns] except KeyError: return j2df return j2df else: raise DataApiException(response['err_msg'])
def get_distance(self, traffic, starting_coordinates, destination_coordinates, timeout=None, zone="", **kwargs): if self.client is None: raise ConnectionNotEstablished( 'OsrmApiClient is not yet initiated.') if not isinstance(starting_coordinates, tuple) or len(starting_coordinates) != 2: raise ValueError( 'Given param: `starting_coordinates` is of wrong type or not paired.' ) if not isinstance(destination_coordinates, tuple) or len(destination_coordinates) != 2: raise ValueError( 'Given param: `destination_coordinates` is of wrong type or not paired.' ) route = zone + ConfManage.getString( "OSRM_API_%s_ROUTE" % traffic.upper()) route += '%.6f,%.6f;%.6f,%.6f' % ( \ starting_coordinates[0], starting_coordinates[1], destination_coordinates[0], destination_coordinates[1] ) if len(kwargs) > 0: params = kwargs else: params = None try: result = self.client.get(route, params, timeout=timeout) except Exception as err: logger.error('OsrmapiError link={}, Msg:{}'.format( ConfManage.getString("OSRM_API_ENDPOINT") + route, err)) result = {"routes": [{"distance": -1}]} return result['routes'][0]["distance"]
class EtaMetricAll(Base): """Object Relational Model class used to establish connection to eta_accept_metrics table""" db_name = ConfManage.getString("BI_MYSQL_DBNAME") table_name = 'eta_metrics_all' columns_msg = { 'id': ['integer unsigned', 'auto_increment primary key'], 'prediction_date': ['date', 'not null'], 'model': ['varchar(30)', 'not null'], 'mae': ['float unsigned', 'not null'], 'mse': ['float unsigned', 'not null'], 'r2': ['float unsigned', 'not null'], 'limit_N_percent': ['float unsigned', 'not null'], 'valid_count': ['integer unsigned', 'not null'], 'total_count': ['integer unsigned', 'not null'] } def checkBidata(self, model, prediction_date): sql = "SELECT * FROM {table_name} WHERE prediction_date='{prediction_date}' AND model='{model}'" \ .format(table_name=EtaMetricAll.table_name, prediction_date=prediction_date, model=model) df = read_sql(sql=sql, con=self.sql_conn) return True if len(df) != 0 else False
from tools.caster import chunks from tools.pickler import delete_pickle, load_pickle, save_pickle from tools.modeler import get_model from tools.timer import get_run_time, LOGGABLE_ARROW_FORMAT as loggable from tools.parallel import multi_thread if sys.version_info[:2] in [(2, 6), (2, 7)]: reload(sys) sys.setdefaultencoding('utf-8') elif sys.version_info[:2] in [(3, 6), (3, 7)]: # pylint: disable=E0401, E0611, E1101 import importlib importlib.reload(sys) ENV_ARROW_TIMEZONE = ConfManage.getString("ARROW_TIMEZONE") ENV_ARROW_TZSHIFT = ConfManage.getInt("ARROW_TZSHIFT") ENV_DATA_API_TIMERANGE = ConfManage.getInt("DATA_API_TIMERANGE") ENV_ZONE = ConfManage.getString("ZONE") logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME")) client = ApiClient() osrm_api_client = OsrmApi() def fetch_model_info(id_list, model_name='order', col=None, chunk_size=500): model = get_model(model_name) result = None id_chunks = chunks(id_list, chunk_size) for id_chunk in id_chunks: if result is None: result = model.fetch_in(
class Cache: """缓存系统工厂类,根据环境变量CACHE_TYPE指定使用哪种缓存方式""" cache_type = ConfManage.getString("CACHE_TYPE") _instance = {} CACHE_TYPE_LOCAL = 'Local' CACHE_TYPE_REDIS = 'Redis' def __init__(self, cache_type=None): if cache_type is None: if Cache.cache_type == "redis": self.client = RedisCache() elif Cache.cache_type == "local": self.client = LocalCache() else: raise Exception( '1303:CACHE_TYPE set error, must be "local" or "redis".') else: self.client = cache_type @staticmethod def get_instance(cache_type=None): cache_type = cache_type if cache_type is not None else Cache.CACHE_TYPE_LOCAL """Get Singleton instance of Cache""" if cache_type not in Cache._instance: if cache_type.capitalize() == Cache.CACHE_TYPE_LOCAL: instance = LocalCache() elif cache_type.capitalize() == Cache.CACHE_TYPE_REDIS: instance = RedisCache() else: raise Exception('1303:cache type error.') Cache._instance[cache_type] = instance return Cache._instance[cache_type] def toCache(self, cacheKey=None, age=ConfManage.getInt("CACHE_AGE")): def getData(func): def save(*args, **kwargs): defineKey = None if "cache_key" in kwargs: defineKey = kwargs["cache_key"] actucal_key = defineKey if defineKey else cacheKey retry = 4 while True: data = self.client.get(actucal_key) if data is None: if self.client.set_mutex(actucal_key, 2): try: data = func(*args, **kwargs).to_json() self.client.set(actucal_key, data, age) self.client.delete(actucal_key + "_mutex") except Exception: self.client.delete(actucal_key + "_mutex") raise break else: time.sleep(0.5) retry -= 1 if retry == 0: logger.error( "Cache msg: Get cache data fail while retry 4 times" ) raise Exception( "1302:Get cache data fail while retry 4 times" ) else: extime = self.client.ttl(actucal_key) if extime <= 8: if self.client.set_mutex(actucal_key, 2): try: data = func(*args, **kwargs).to_json() self.client.set(actucal_key, data, age) self.client.delete(actucal_key + "_mutex") except Exception: logger.error( "Cache msg:get {} failed, return old date" .format(kwargs["topic"])) self.client.delete(actucal_key + "_mutex") return data break else: break return pandas.read_json(data) return save return getData
def main(): """Obtain Information from Data-API and MySQL Database""" logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME")) Logger.resource_checkpoint('init') parser = argparse.ArgumentParser() parser.add_argument('-c', '--clear', help='Clear previously saved pickles.', action='store_true') parser.add_argument('-r', '--reverse', help='whether clear previously data.', action='store_true') parser.add_argument('-d', '--date', help='Date used for calculation.', type=str) parser.add_argument('-p', '--pickle', type=str, default='stream_data', help='Pickle name for saving latest data-collection.') parser.add_argument('-u', '--updata', type=bool, help='Merge data with new feature to data.pkl.', default=False) parser.add_argument('-f', '--funtion', help='Update new feature from funtion.') parser.add_argument( '-m', '--merge_on', help='Field names to join on. Must be found in both DataFrames.') args = parser.parse_args() # 更新新的数据: if args.updata: data = load_pickle(args.pickle) if args.funtion is not None and args.merge_on is not None: update_data(logger, args.funtion, data, args.merge_on) else: logger.error( 'Funtion and Merge_on is None, Please provide corresponding parameters' ) return # 清除所有子pkl: if args.clear: clear_pickles(logger) return is_reverse = False if args.reverse is None else True # 向前或向后收集数据 # 处理时间段 run_time = get_run_time(None, 0, False) logger.info('Run-Time: %s' % run_time) collect_date = None if args.date is None else get_run_time(args.date) logger.info('Collect-Date: %s' % collect_date) end_time = run_time.shift(days=-1).ceil('day') logger.info('End-Time: %s' % end_time) # 读取数据 pickled = load_pickle(args.pickle) collected_count = 0 if pickled is not None and isinstance( pickled, pd.DataFrame) and 'order_time' in pickled.columns: order_times = pickled['order_time'] del pickled # Release pickle collected_count = len( order_times.apply(lambda order_time: order_time.date()).unique()) collected_start_time = order_times.min() logger.info('Min collected order_time Date: %s' % collected_start_time.format(loggable)) collected_end_time = order_times.max() logger.info('Max collected order_time Date: %s' % collected_end_time.format(loggable)) if collect_date is not None: if collect_date > end_time: logger.warning( 'collect_date can not greater then end_time {} > {}'. format(collect_date.format(loggable), end_time.format(loggable))) return if collect_date < collected_start_time.floor('day'): start_time = collect_date.floor('day') end_time = collected_start_time.shift(days=-1).ceil('day') elif collected_start_time.floor( 'day') <= collect_date <= collected_end_time.ceil('day'): trim_data(logger, collect_date, is_reverse) if is_reverse: start_time = collected_start_time.floor('day') end_time = collect_date.ceil('day') else: start_time = collect_date.floor('day') end_time = collected_end_time.ceil('day') elif collect_date > collected_end_time.ceil('day'): start_time = collected_end_time.shift(days=1).floor('day') end_time = collect_date.ceil('day') else: logger.warning('collect_data invalid. {}'.format(collect_date)) return else: if collected_end_time >= end_time: logger.info('Targeted Run-Time already in Collection-Interval') return else: start_time = collected_end_time.shift(days=1).floor('day') gap = start_time.shift(days=-1).date() - end_time.date() gap_days = gap.days else: logger.info('Data empty!') gap_days = -ConfManage.getInt("COLLECTION_GAP") start_time = end_time.shift(days=gap_days + 1).floor('day') logger.info('Total Collection Interval: %d/%d [%s - %s]' % (collected_count, ConfManage.getInt("COLLECTION_INTERVAL"), start_time.format(loggable), end_time.format(loggable))) if gap_days >= 0: logger.info('Targeted Run-Time already in Collection-Interval') return logger.info('Gap: %d' % gap_days) logger.info( 'Gap Interval: %d [%s - %s]' % (gap_days, start_time.format(loggable), end_time.format(loggable))) try: # 这部分代码是针对缺失1天以上的数据进行每日收集 for i in range(-gap_days, 0, -1): end_time = start_time.ceil('day') logger.info('Collecting data in [{} - {}]'.format( start_time.format(loggable), end_time.format(loggable))) collect(logger, start_time, end_time, args.pickle) logger.info('Success collect data in [{} - {}] \n\n'.format( start_time.format(loggable), end_time.format(loggable))) start_time = start_time.shift(days=1) trim_outdated(logger, run_time, args.pickle) # 没有环境变量下,默认截取最近30天的数据 except (AttributeError, ValueError) as err: logger.error(err) logger.error('Trace: {}'.format(traceback.format_exc())) except KeyboardInterrupt: logger.info('Process manually interupted at {}'.format(arrow.utcnow())) logger.info('Releasing Logger...') return 0
#!/usr/bin/env python # -*- coding: utf-8 -*- # 收集实时数据 import argparse import arrow import pandas as pd import traceback from client.api_client import ApiClient from configs.ConfManage import ConfManage from tools.logger import Logger from tools.pickler import delete_pickle, load_pickle, save_pickle from tools.timer import get_run_time, LOGGABLE_ARROW_FORMAT as loggable client = ApiClient() ENV_ARROW_TIMEZONE = ConfManage.getString("ARROW_TIMEZONE") ENV_ARROW_TZSHIFT = ConfManage.getInt("ARROW_TZSHIFT") ENV_DATA_API_TIMERANGE = ConfManage.getInt("DATA_API_TIMERANGE") ENV_ZONE = ConfManage.getString("ZONE") def collect_batch_data(start_time, end_time, table, topic, columns=None): st = start_time et = end_time data_df = pd.DataFrame() hours_interval = int( 24 / ENV_DATA_API_TIMERANGE) if 24 % ENV_DATA_API_TIMERANGE == 0 else 12 while st < et: snt = st.shift(hours=hours_interval) data_df = data_df.append(
class ApiClient(object): """ 根据参数决定调用hbase或者url获取数据 """ def __init__(self): self.conf = ConfManage() if self.conf.getString("API_TYPE") == "URL": self.client = self.dataapi_client() else: self.client = self.hbase_client() @staticmethod def client(): if ConfManage.getString("API_TYPE") == "URL": client = ApiClient.dataapi_client() else: client = ApiClient.hbase_client() return client @staticmethod def hbase_client(): return HbaseClient() @staticmethod def dataapi_client(): return SimpleHttpClient() def set_client(self, client): self.client = client return self.client def get_client(self): return self.client def get_data(self, **kwargs): data = self.client.get_data(**kwargs) return data def get_cache_data(self, key, **kwargs): """ 获取缓存数据 Args: key: cache key **kwargs: Returns: pd.DataFrame """ update_cache_time = self.conf.getInt("CACHE_UPDATE") retry = 3 while True: data = cache.get(key) if data is None: if cache.set_mutex(key, 2): try: data = self.get_data(**kwargs).to_json() cache.set(key, data) cache.delete(key + "_mutex") except Exception: cache.delete(key + "_mutex") raise break else: time.sleep(1) retry -= 1 if retry == 0: logger.error("Cache msg: Get cache data fail while retry three times, key: {}".format(key)) raise Exception("1302:Get cache data fail while retry three times, key: {}".format(key)) else: extime = cache.ttl(key) if extime <= update_cache_time: if cache.set_mutex(key, 2): try: data = self.get_data(**kwargs).to_json() cache.set(key, data) cache.delete(key + "_mutex") except Exception: logger.error("Cache msg:get {} failed, return old date".format(key)) cache.delete(key + "_mutex") return data break else: break return read_json(data) def collect_batch_data(self, start_time, end_time, table, topic, columns=None, record_path=None, meta=None): st = start_time et = end_time data_df = pd.DataFrame() hours_interval = int(24 / ENV_DATA_API_TIMERANGE) if 24 % ENV_DATA_API_TIMERANGE == 0 else 12 while st < et: snt = st.shift(hours=hours_interval) data_df = data_df.append( self.get_data(table=table, topic=topic, start_time=st.shift(seconds=1), end_time=snt, columns=columns, record_path=record_path, meta=meta)) st = snt return data_df def fetch_data(self, start_time, end_time, table, topic, columns=None, record_path=None, meta=None, save_file_prefix="order_accept_"): df = load_pickle(save_file_prefix + topic) if df is None: df = self.collect_batch_data(start_time=start_time, end_time=end_time, table=table, topic=topic, columns=columns, record_path=record_path, meta=meta) save_pickle(df, save_file_prefix + topic) else: if len(df) == 0: df = self.collect_batch_data(start_time=start_time, end_time=end_time, table=table, topic=topic, columns=columns, record_path=record_path, meta=meta) save_pickle(df, save_file_prefix + topic) logger.info('Fetch %s (Count): %d' % (topic, len(df))) return df
# coding=utf-8 from thrift import Thrift from thrift.protocol import TBinaryProtocol from thrift.Thrift import TApplicationException from thrift.transport import TSocket, TTransport from configs.ConfManage import ConfManage from tools.logger import Logger logger = Logger.get_instance(ConfManage.getString("LOG_BASE_NAME")) class ThriftClient: '''thrift client''' def __init__(self, client_class, host=None, port=None, timeout=None): host = host if host is not None else ConfManage.getString( "THRIFT_HOST") port = port if port is not None else ConfManage.getInt("THRIFT_PORT") timeout = timeout if timeout is not None else ConfManage.getInt( "THRIFT_TIMEOUT") socket = TSocket.TSocket(host, port) socket.setTimeout(timeout) self.transport = TTransport.TBufferedTransport(socket) protocol = TBinaryProtocol.TBinaryProtocol(self.transport) # Create a client self.client = client_class.Client(protocol) def close(self): if self.transport.isOpen(): self.transport.close() # logger.info('thrift transport IS CLOSED!')
def __init__(self, log_name): dictConfig({ 'version': 1, 'disable_existing_loggers': True, # 格式化日志 'formatters': { 'verbose': { 'format': "[%(asctime)s][%(filename)s][%(levelname)s]: %(message)s", 'datefmt': "%Y-%m-%d %H:%M:%S" }, 'simple': { 'format': '%(levelname)s %(message)s' }, }, 'handlers': { 'null': { 'level': ConfManage.getInt("LOG_LEVEL"), 'class': 'logging.FileHandler', 'filename': 'logs/{}.log'.format(log_name), }, 'console': { 'level': ConfManage.getInt("LOG_LEVEL"), 'class': 'logging.StreamHandler', 'formatter': 'verbose', }, 'thread': { 'level': ConfManage.getInt("LOG_LEVEL"), 'class': 'logging.handlers.TimedRotatingFileHandler', 'when': "D", # 最多保留10份文件 'backupCount': 10, # If delay is true, # then file opening is deferred until the first call to emit(). 'delay': True, 'formatter': 'verbose', 'filename': 'logs/{}.log'.format(log_name), }, 'process': { 'level': ConfManage.getInt("LOG_LEVEL"), # 如果没有使用并发的日志处理类,在多实例的情况下日志会出现缺失 'class': 'cloghandler.ConcurrentRotatingFileHandler', # 当达到10MB时分割日志 'maxBytes': 1024 * 1024 * 1024, # 最多保留50份文件 'backupCount': 10, 'delay': True, 'formatter': 'verbose', 'filename': 'logs/{}.log'.format(log_name), }, }, 'loggers': { ConfManage.getString("LOG_BASE_NAME"): { 'handlers': ['process'], 'level': ConfManage.getInt("LOG_LEVEL"), }, ConfManage.getString("LOG_CRON_NAME"): { 'handlers': ['thread'], 'level': ConfManage.getInt("LOG_LEVEL"), }, ConfManage.getString("LOG_REQ_NAME"): { 'handlers': ['process'], 'level': ConfManage.getInt("LOG_LEVEL"), }, } }) self.logger = logging.getLogger(log_name) streamHandler = StreamHandler(sys.stdout) self.logger.addHandler(streamHandler)
# AUTHOR: [email protected] # DESCRIPTION: # # HISTORY: # ************************************************************* """Module definining useful pickle-related methods""" import os import sys from pickle import dump, load, PicklingError, UnpicklingError, HIGHEST_PROTOCOL from configs.ConfManage import ConfManage from tools.logger import Logger from tools.cache import Cache from sklearn.externals import joblib from tools.po_cache_ret import POCacheRet logger = Logger.get_instance(ConfManage.getString("LOG_BASE_NAME")) PICKLE_FOLDER = ConfManage.getString("PICKLE_FOLDER") cache = Cache.get_instance() pickle_prefix = '%s-%s-' % (ConfManage.getString("APP_MODE"), ConfManage.getString("ZONE")) if sys.version_info[:2] in [(2, 6), (2, 7)]: reload(sys) sys.setdefaultencoding('utf-8') elif sys.version_info[:2] in [(3, 6), (3, 7)]: # pylint: disable=E0401, E0611, E1101 import importlib importlib.reload(sys)
def __init__(self): self.logger = Logger.get_instance( ConfManage.getString("LOG_BASE_NAME")) self.cache = Cache().client self.cache_expire = ConfManage.getInt("OSRM_CACHE_EXPIRE")
def client(): if ConfManage.getString("API_TYPE") == "URL": client = ApiClient.dataapi_client() else: client = ApiClient.hbase_client() return client
from models_bi.eta_metric_all import EtaMetricAll from configs import conf from tools.logger import Logger from configs.ConfManage import ConfManage import os import arrow from core.process import process from argparse import ArgumentParser from core.stream_collect import main logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME")) ENV_APP_MODE = ConfManage.getString("APP_MODE") ENV_ZONE = ConfManage.getString("ZONE") def check_collect(): """检查collect是否采集""" pickle_prefix = os.getenv('ENV_PICKLE_PREFIX', '') if pickle_prefix == '' and ENV_APP_MODE != 'release': pickle_prefix = '%s-%s-' % (ENV_APP_MODE, ENV_ZONE) filename = '%sdata.pkl' % (pickle_prefix) file = './pickles/%s' % (filename) collect_ctimes = 3 while collect_ctimes > 0: try: datacreattime = arrow.get(os.path.getctime(file)).date() except FileNotFoundError as err: logger.info(err) main() datacreattime = arrow.get(os.path.getctime(file)).date() today = arrow.now().date() if datacreattime != today:
def __init__(self): self.client = SimpleHttpClient( "http://" + ConfManage.getString("OSRM_API_ENDPOINT")) self.S2_cache = S2()