示例#1
0
def interrupt_runner():
    """
    a interrupt runner listen `slave_interrupt`
    when accepted, slave quit current work and restart
    """
    global INTERRUPT
    while 1:
        try:
            conn_config = slave_config.get(IP) or slave_config.get('default')
            conn_config = slave_config.get('default')
            connection_pool = redis.ConnectionPool(**conn_config.get('redis'))
            client = redis.Redis(connection_pool=connection_pool)
            pubsub = client.pubsub()
            pubsub.subscribe('slave_interrupt')
            listener = pubsub.listen()
            while 1:
                try:
                    msg = listener.next()
                    if msg['type'] == 'message':
                        INTERRUPT = True
                        log.info('slave interrupt accepted')
                        break
                except StopIteration:
                    INTERRUPT = False
                    break
                except:
                    INTERRUPT = False
                    log.error(traceback.format_exc())
        except:
            INTERRUPT = False
            log.error(traceback.format_exc())
            time.sleep(10)
示例#2
0
def main():
    _slave_info('initing...')

    # get slave ID

    global SLAVEID
    SLAVEID = getMacId()

    # get slave ID

    global IP
    IP = getMyIpInfo()['data']['ip']

    # connection pool

    conn_config = slave_config.get('IP') or slave_config.get('default')
    conn_config = slave_config.get('default')
    dbConnPool = ConnectionPool(max_connections=1000,
                                **conn_config.get('mysql'))
    reConn = getRedisConn2(conn_config.get('redis'))
    _slave_info('init... complete')
    _slave_info('slave start to work')
    dbConnPool.disconnect()
    del reConn
示例#3
0
def prepar():
    # get slave ID
    global SLAVEID
    SLAVEID = getMacId()

    # get slave ID
    global IP
    IP = getMyIpInfo()['data']['ip']

    global log
    log_output = (slave_config.get(IP)['slave_info_log']
                  if slave_config.get(IP) else
                  slave_config.get('default')['slave_info_log'])
    log = ibbdlib.ibbdlog.get_logger(
        log_path='../log/',
        log_file='slave %s %s.log' %
        (PID, time.strftime('%Y%m%d', time.localtime())),
        log_name='spider.slave',
        msg_format=
        '%(asctime)s\t%(name)s\t%(process)d\t%(levelname)s\t%(message)s',
        output=log_output)

    _slave_info('initing...')

    # connection pool
    conn_config = slave_config.get(IP) or slave_config.get('default')
    conn_config = slave_config.get('default')
    dbConnPool = ConnectionPool(
        max_connections=1000,
        **dict((k, v) for k, v in conn_config.get('mysql').items()
               if not isinstance(v, dict)))
    reConn = getRedisConn2(conn_config.get('redis'))
    _slave_info('init... complete')
    _slave_info('slave start to work')

    global DB_POOLS, RE_CONN
    DB_POOLS[conn_config.get('mysql')['host']] = dbConnPool
    RE_CONN = reConn
示例#4
0
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-
import json

import ibbdlib

from slave_config import slave_config


re_conn = ibbdlib.get_redis_conn(**slave_config.get('default').get('redis'))
schedule_db_mapping = {
    'Schedule_topItemFullInfo': {
        'host': '223.4.155.152',
        'user': '******',
        'passwd': 'spider',
        'db': 'topspider',
        'charset': 'utf8'
    }
}
slave_db_server = json.loads(re_conn.get('slave_db_server'))
slave_db_server.update(schedule_db_mapping)
re_conn.set('slave_db_server', json.dumps(slave_db_server))
示例#5
0
def main():

    # get log

    global log
    log = _get_log()

    # slave info

    _slave_info('initing...')

    # get slave ID

    global SLAVEID
    SLAVEID = getMacId()

    # get slave ID

    global IP
    IP = getMyIpInfo()['data']['ip']

    # connection pool

    conn_config = slave_config.get('IP') or slave_config.get('default')

    # dbConnPool = ConnectionPool(max_connections=1000, **conn_config.get('mysql'))

    dbConnPoolList = dict()
    for (db_server_ip, db_server_config) in db_server.items():
        dbConnPoolList[db_server_ip] = ConnectionPool(max_connections=1000, **db_server_config)
    dbConnPoolList['master'] = ConnectionPool(max_connections=1000, **db_server_master)
    reConn = getRedisConn2(conn_config.get('redis'))
    _slave_info('init... complete')
    _slave_info('slave start to work')

    # thread pool

    threadPool = []
    while 1:
        while 1:
            try:
                _slave_activity(reConn)
                _slave_interval(reConn, 10, 10)
                break
            except Exception, e:
                pass
        for priority in sorted(SCHEDULE_PRIORITY.keys()):

            # clear inactive Thread

            threadPool = [childThread for childThread in threadPool if childThread.isAlive()]
            for schedule in SCHEDULE_PRIORITY[priority]:
                log.debug('schedule %s', schedule)

                # how schedule should run
                # 1.this schedule exists
                # 2.schedule not active
                # 3.current thread
                # final:call schedule function by name

                if reConn.exists(schedule) and schedule not in [t.name for t in threadPool] and sum(map(lambda t: t \
                        in [otherSchedule for otherSchedule in SCHEDULE_PRIORITY[priority] if otherSchedule
                        != schedule], [thread.name for thread in threadPool])) == len(threadPool):
                    threadPool.append(threading.Thread(target=globals()[THREAD_TARGET[schedule]], name=schedule,
                                      args=(reConn, dbConnPoolList)))
                    threadPool[-1].start()
        time.sleep(10)
        log.debug('thread waiting for %d', 30)
示例#6
0
def executeItemRateSchedule(
    schedule_name='Schedule_itemRate2',
    schedule_error_name='Schedule_itemRate2Error',
    schedule_process_name='Schedule_itemRate2_process',
    schedule_update_name='TopItemRateUpdateTime',
):

    global RE_CONN, INTERRUPT
    dbConnPool = _reload_slave_db_pool(schedule_name)
    n = 0
    reqinterval = (slave_config.get(IP)['reqinterval'] if slave_config.get(IP)
                   else slave_config.get('default')['reqinterval'])

    for i in range(10):
        if INTERRUPT:
            INTERRUPT = False
            break
        # slave config
        _slave_interval(n, 1)
        itemId = None
        while 1:
            try:
                itemId = RE_CONN.spop(schedule_name)
                RE_CONN.hset(schedule_process_name, itemId, 1)
                n = (n + 1) % 100
                break
            except:
                RE_CONN = getRedisConn2()
        toDate = datetime.strftime(datetime.today(), '%Y-%m-%d')
        if itemId:
            fromDate = RE_CONN.hget(schedule_update_name,
                                    str(itemId)) or '1900-1-1'
            _slave_info(schedule_name, 'Start', itemId, fromDate, toDate)
            try:
                t1 = t2 = time.time()
                data = getRate3('http://item.taobao.com/item.htm?id=%s' %
                                itemId,
                                fromDate,
                                toDate,
                                reqinterval=reqinterval)
                t2 = time.time()
                if len(data) > 0:
                    re_command = ['HSET', schedule_update_name, itemId, toDate]
                    bll_queue.put(
                        (saveTopRate, data, dbConnPool, None, schedule_name,
                         itemId, schedule_process_name, schedule_error_name,
                         t1, t2, re_command))
                else:
                    _slave_info(
                        schedule_name,
                        'Success',
                        itemId,
                        'len',
                        0,
                        'req',
                        '%.1f' % (t2 - t1),
                    )
                    RE_CONN.hdel(schedule_process_name, str(itemId))
            except Exception, e:
                traceback.print_exc()
                _slave_error(
                    schedule_name,
                    'Error',
                    itemId,
                    str(e),
                    'req',
                    '%.1f' % (t2 - t1),
                    'save',
                    '%.1f' % (time.time() - t2),
                )
                RE_CONN.hset(schedule_error_name, itemId,
                             json.dumps({
                                 'SlaveID': SLAVEID,
                                 'msg': str(e)
                             }))
        else:
            break
示例#7
0
def executeItemTradeSchedule(
    schedule_name='Schedule_itemTrade2',
    schedule_error_name='Schedule_itemTradeError',
    schedule_process_name='Schedule_itemTrade_process',
    schedule_update_name='TopItemTradeUpdateTime',
    schedule_config_uri_name='TopItemTradeUri',
    schedule_config_id_name='TopItemShopId',
):

    global RE_CONN, INTERRUPT
    dbConnPool = _reload_slave_db_pool(schedule_name)
    n = 0
    reqinterval = (slave_config.get(IP)['reqinterval'] if slave_config.get(IP)
                   else slave_config.get('default')['reqinterval'])

    for i in range(10):
        if INTERRUPT:
            INTERRUPT = False
            break
        # slave config
        _slave_interval(n, 1)

        # keep connection with Redis Server

        itemId = None
        while 1:
            try:
                itemId = RE_CONN.spop(schedule_name)
                RE_CONN.hset(schedule_process_name, itemId, 1)
                n = (n + 1) % 100
                break
            except:
                RE_CONN = getRedisConn2()
        toDate = datetime.strftime(datetime.today(), '%Y/%m/%d')
        if itemId:
            (fromDate, param_uri_meta,
             config_id_meta) = ibbdlib.redislib.redis_pipe(
                 RE_CONN, [['hget', schedule_update_name,
                            str(itemId)],
                           ['hget', schedule_config_uri_name,
                            str(itemId)],
                           ['hget', schedule_config_id_name,
                            str(itemId)]])
            fromDate = fromDate or '1900/1/1'
            param_uri_meta = (json.loads(unquote(param_uri_meta))
                              if param_uri_meta else {})
            config_id_meta = (json.loads(config_id_meta)
                              if config_id_meta else {})
            _slave_info(schedule_name, 'Start', itemId, fromDate, toDate)
            try:
                t1 = t2 = time.time()
                (data, param_uri, config_id) = getTrades3(
                    'http://item.taobao.com/item.htm?id=%s' % itemId,
                    fromDate,
                    toDate,
                    reqUri=param_uri_meta,
                    config=config_id_meta,
                    reqinterval=reqinterval,
                )
                t2 = time.time()

                if len(data) > 0:

                    re_command = [
                        'HSET', schedule_update_name, itemId,
                        (datetimeparse(data[0][10]) +
                         timedelta(1)).strftime('%Y/%m/%d')
                    ]

                    bll_queue.put(
                        (saveTopTrade, data, dbConnPool, None, schedule_name,
                         itemId, schedule_process_name, schedule_error_name,
                         t1, t2, re_command))
                    if not param_uri_meta:
                        RE_CONN.hset(schedule_config_uri_name, str(itemId),
                                     quote(json.dumps(param_uri)))
                    if not config_id_meta:
                        RE_CONN.hset(schedule_config_id_name, str(itemId),
                                     json.dumps(config_id))
                else:
                    _slave_info(
                        schedule_name,
                        'Success',
                        itemId,
                        'len',
                        0,
                        'req',
                        '%.1f' % (t2 - t1),
                    )
                    RE_CONN.hdel(schedule_process_name, str(itemId))
            except Exception, e:
                traceback.print_exc()
                _slave_error(
                    schedule_name,
                    'Error',
                    itemId,
                    str(e),
                    'req',
                    '%.1f' % (t2 - t1),
                    'save',
                    '%.1f' % (time.time() - t2),
                )
                RE_CONN.hset(schedule_error_name, itemId,
                             json.dumps({
                                 'SlaveID': SLAVEID,
                                 'msg': str(e)
                             }))
        else:
            break