Пример #1
0
    def __init__(self):
        self.pool = redis.ConnectionPool(
            host=ConfManage.getString("REDIS_HOST"),
            port=ConfManage.getInt("REDIS_PORT"),
            db=ConfManage.getInt("REDIS_DB"),
            password=ConfManage.getString("REDIS_PASSWORD"),
            max_connections=ConfManage.getInt("REDIS_MAX_CONNECTIONS"),
            decode_responses=True,
            socket_keepalive=True)

        self.conn = redis.StrictRedis(connection_pool=self.pool,
                                      socket_connect_timeout=5)
        self.logger = Logger.get_instance(ConfManage.getString("LOG_REQ_NAME"))
Пример #2
0
def main():
    logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME"))
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--date', help='日期', type=str)
    parser.add_argument('-p', '--pickle', type=str, help='数据集', default='data')
    parser.add_argument('estimator', help='算法选择', nargs='?', type=str, default='xgb')
    parser.add_argument('predict_target', help='目标值', nargs='?', type=str, default='accept')
    parser.add_argument('-f', '--feature-selected', help='特征值选择', action='store_true')
    parser.add_argument('-w', '--withhold', help='是否保存数据到bi数据库', action='store_true')
    parser.add_argument("-s", "--shift_days", help="The last few days", type=int, default=-1)
    args = parser.parse_args()
    logger.info('Arguments: estimator=%s, predict-target=%s, feature-selected=%r, withhold-bi-insertion=%r' % \
                (args.estimator, args.predict_target, args.feature_selected, args.withhold))
    try:
        process(logger, args.pickle, args.estimator, args.predict_target, args.withhold, args.date, args.shift_days)
    except TestDataEmpty:
        logger.error('Test Data Empty!')
    except (AttributeError, ValueError) as err:
        logger.error(err)
        logger.error('Trace: {}'.format(traceback.format_exc()))
    except KeyboardInterrupt:
        logger.info('Process manually interupted at %s' % arrow.now(tz=ConfManage.getString("ARROW_TIMEZONE")).format(loggable))
    logger.info('Releasing Logger...')
from core.predictor import *
from tools.cache import Cache
from tools.pickler import reload_pickle_cache, init_pickle_cache
from tools.po_cache_ret import POCacheRet
from tools.logger import Logger

if sys.version_info[:2] in [(2, 6), (2, 7)]:
    reload(sys)
    sys.setdefaultencoding('utf-8')
elif sys.version_info[:2] in [(3, 6), (3, 7)]:
    # pylint: disable=E0401, E0611, E1101
    import importlib
    importlib.reload(sys)

logger = Logger.get_instance(ConfManage.getString("LOG_BASE_NAME"))
# Import Thrift generated classes
sys.path.append('gen-py')
sys.path.insert(0, glob.glob('../../lib/py/build/lib*'[0]))
try:
    from eta import EstimateTaskDurations
    from eta.ttypes import Duration, Range, InvalidInput, ModelMissing, Prediction, Params, \
        Prediction_showup, Prediction_quote, Prediction_delivery, PredictionResult, Data
except ImportError:
    logger.error('Cannot find thrift classes.')
    logger.error('Have (you run `thrift --gen py eta.thrift`?')
    raise


def __validate_coordinates__(query):
    if query.showup_distance is None:
Пример #4
0
def main():
    """Obtain Information from Data-API and MySQL Database"""
    logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME"))
    Logger.resource_checkpoint('init')
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--clear',
                        help='Clear previously saved pickles.',
                        action='store_true')
    parser.add_argument('-r',
                        '--reverse',
                        help='whether clear previously data.',
                        action='store_true')
    parser.add_argument('-d',
                        '--date',
                        help='Date used for calculation.',
                        type=str)
    parser.add_argument('-p',
                        '--pickle',
                        type=str,
                        default='stream_data',
                        help='Pickle name for saving latest data-collection.')
    parser.add_argument('-u',
                        '--updata',
                        type=bool,
                        help='Merge data with new feature to data.pkl.',
                        default=False)
    parser.add_argument('-f',
                        '--funtion',
                        help='Update new feature from funtion.')
    parser.add_argument(
        '-m',
        '--merge_on',
        help='Field names to join on. Must be found in both DataFrames.')
    args = parser.parse_args()

    # 更新新的数据:
    if args.updata:
        data = load_pickle(args.pickle)
        if args.funtion is not None and args.merge_on is not None:
            update_data(logger, args.funtion, data, args.merge_on)
        else:
            logger.error(
                'Funtion and Merge_on is None, Please provide corresponding parameters'
            )
        return
    # 清除所有子pkl:
    if args.clear:
        clear_pickles(logger)
        return

    is_reverse = False if args.reverse is None else True  # 向前或向后收集数据
    # 处理时间段
    run_time = get_run_time(None, 0, False)
    logger.info('Run-Time: %s' % run_time)
    collect_date = None if args.date is None else get_run_time(args.date)
    logger.info('Collect-Date: %s' % collect_date)
    end_time = run_time.shift(days=-1).ceil('day')
    logger.info('End-Time: %s' % end_time)
    # 读取数据
    pickled = load_pickle(args.pickle)
    collected_count = 0
    if pickled is not None and isinstance(
            pickled, pd.DataFrame) and 'order_time' in pickled.columns:
        order_times = pickled['order_time']
        del pickled  # Release pickle
        collected_count = len(
            order_times.apply(lambda order_time: order_time.date()).unique())
        collected_start_time = order_times.min()
        logger.info('Min collected order_time Date: %s' %
                    collected_start_time.format(loggable))
        collected_end_time = order_times.max()
        logger.info('Max collected order_time Date: %s' %
                    collected_end_time.format(loggable))

        if collect_date is not None:
            if collect_date > end_time:
                logger.warning(
                    'collect_date can not greater then end_time {} > {}'.
                    format(collect_date.format(loggable),
                           end_time.format(loggable)))
                return
            if collect_date < collected_start_time.floor('day'):
                start_time = collect_date.floor('day')
                end_time = collected_start_time.shift(days=-1).ceil('day')
            elif collected_start_time.floor(
                    'day') <= collect_date <= collected_end_time.ceil('day'):
                trim_data(logger, collect_date, is_reverse)
                if is_reverse:
                    start_time = collected_start_time.floor('day')
                    end_time = collect_date.ceil('day')
                else:
                    start_time = collect_date.floor('day')
                    end_time = collected_end_time.ceil('day')
            elif collect_date > collected_end_time.ceil('day'):
                start_time = collected_end_time.shift(days=1).floor('day')
                end_time = collect_date.ceil('day')
            else:
                logger.warning('collect_data invalid. {}'.format(collect_date))
                return
        else:
            if collected_end_time >= end_time:
                logger.info('Targeted Run-Time already in Collection-Interval')
                return
            else:
                start_time = collected_end_time.shift(days=1).floor('day')

        gap = start_time.shift(days=-1).date() - end_time.date()
        gap_days = gap.days

    else:
        logger.info('Data empty!')
        gap_days = -ConfManage.getInt("COLLECTION_GAP")
        start_time = end_time.shift(days=gap_days + 1).floor('day')

    logger.info('Total Collection Interval: %d/%d [%s - %s]' %
                (collected_count, ConfManage.getInt("COLLECTION_INTERVAL"),
                 start_time.format(loggable), end_time.format(loggable)))

    if gap_days >= 0:
        logger.info('Targeted Run-Time already in Collection-Interval')
        return

    logger.info('Gap: %d' % gap_days)
    logger.info(
        'Gap Interval: %d [%s - %s]' %
        (gap_days, start_time.format(loggable), end_time.format(loggable)))
    try:
        # 这部分代码是针对缺失1天以上的数据进行每日收集
        for i in range(-gap_days, 0, -1):
            end_time = start_time.ceil('day')
            logger.info('Collecting data in [{} - {}]'.format(
                start_time.format(loggable), end_time.format(loggable)))
            collect(logger, start_time, end_time, args.pickle)
            logger.info('Success collect data in [{} - {}] \n\n'.format(
                start_time.format(loggable), end_time.format(loggable)))
            start_time = start_time.shift(days=1)
        trim_outdated(logger, run_time, args.pickle)  # 没有环境变量下,默认截取最近30天的数据
    except (AttributeError, ValueError) as err:
        logger.error(err)
        logger.error('Trace: {}'.format(traceback.format_exc()))
    except KeyboardInterrupt:
        logger.info('Process manually interupted at {}'.format(arrow.utcnow()))
    logger.info('Releasing Logger...')
    return 0
Пример #5
0
 def __init__(self):
     self.logger = Logger.get_instance(
         ConfManage.getString("LOG_BASE_NAME"))
     self.cache = Cache().client
     self.cache_expire = ConfManage.getInt("OSRM_CACHE_EXPIRE")
Пример #6
0
from models_bi.eta_metric_all import EtaMetricAll
from configs import conf
from tools.logger import Logger
from configs.ConfManage import ConfManage
import os
import arrow
from core.process import process
from argparse import ArgumentParser
from core.stream_collect import main
logger = Logger.get_instance(ConfManage.getString("LOG_CRON_NAME"))
ENV_APP_MODE = ConfManage.getString("APP_MODE")
ENV_ZONE = ConfManage.getString("ZONE")

def check_collect():
    """检查collect是否采集"""
    pickle_prefix = os.getenv('ENV_PICKLE_PREFIX', '')
    if pickle_prefix == '' and ENV_APP_MODE != 'release':
        pickle_prefix = '%s-%s-' % (ENV_APP_MODE, ENV_ZONE)
    filename = '%sdata.pkl' % (pickle_prefix)
    file = './pickles/%s' % (filename)
    collect_ctimes = 3
    while collect_ctimes > 0:
        try:
            datacreattime = arrow.get(os.path.getctime(file)).date()
        except FileNotFoundError as err:
            logger.info(err)
            main()
            datacreattime = arrow.get(os.path.getctime(file)).date()
        today = arrow.now().date()
        if datacreattime != today: