def interrupt_runner(): """ a interrupt runner listen `slave_interrupt` when accepted, slave quit current work and restart """ global INTERRUPT while 1: try: conn_config = slave_config.get(IP) or slave_config.get('default') conn_config = slave_config.get('default') connection_pool = redis.ConnectionPool(**conn_config.get('redis')) client = redis.Redis(connection_pool=connection_pool) pubsub = client.pubsub() pubsub.subscribe('slave_interrupt') listener = pubsub.listen() while 1: try: msg = listener.next() if msg['type'] == 'message': INTERRUPT = True log.info('slave interrupt accepted') break except StopIteration: INTERRUPT = False break except: INTERRUPT = False log.error(traceback.format_exc()) except: INTERRUPT = False log.error(traceback.format_exc()) time.sleep(10)
def main(): _slave_info('initing...') # get slave ID global SLAVEID SLAVEID = getMacId() # get slave ID global IP IP = getMyIpInfo()['data']['ip'] # connection pool conn_config = slave_config.get('IP') or slave_config.get('default') conn_config = slave_config.get('default') dbConnPool = ConnectionPool(max_connections=1000, **conn_config.get('mysql')) reConn = getRedisConn2(conn_config.get('redis')) _slave_info('init... complete') _slave_info('slave start to work') dbConnPool.disconnect() del reConn
def prepar(): # get slave ID global SLAVEID SLAVEID = getMacId() # get slave ID global IP IP = getMyIpInfo()['data']['ip'] global log log_output = (slave_config.get(IP)['slave_info_log'] if slave_config.get(IP) else slave_config.get('default')['slave_info_log']) log = ibbdlib.ibbdlog.get_logger( log_path='../log/', log_file='slave %s %s.log' % (PID, time.strftime('%Y%m%d', time.localtime())), log_name='spider.slave', msg_format= '%(asctime)s\t%(name)s\t%(process)d\t%(levelname)s\t%(message)s', output=log_output) _slave_info('initing...') # connection pool conn_config = slave_config.get(IP) or slave_config.get('default') conn_config = slave_config.get('default') dbConnPool = ConnectionPool( max_connections=1000, **dict((k, v) for k, v in conn_config.get('mysql').items() if not isinstance(v, dict))) reConn = getRedisConn2(conn_config.get('redis')) _slave_info('init... complete') _slave_info('slave start to work') global DB_POOLS, RE_CONN DB_POOLS[conn_config.get('mysql')['host']] = dbConnPool RE_CONN = reConn
#!/usr/bin/python2.7 # -*- coding: utf-8 -*- import json import ibbdlib from slave_config import slave_config re_conn = ibbdlib.get_redis_conn(**slave_config.get('default').get('redis')) schedule_db_mapping = { 'Schedule_topItemFullInfo': { 'host': '223.4.155.152', 'user': '******', 'passwd': 'spider', 'db': 'topspider', 'charset': 'utf8' } } slave_db_server = json.loads(re_conn.get('slave_db_server')) slave_db_server.update(schedule_db_mapping) re_conn.set('slave_db_server', json.dumps(slave_db_server))
def main(): # get log global log log = _get_log() # slave info _slave_info('initing...') # get slave ID global SLAVEID SLAVEID = getMacId() # get slave ID global IP IP = getMyIpInfo()['data']['ip'] # connection pool conn_config = slave_config.get('IP') or slave_config.get('default') # dbConnPool = ConnectionPool(max_connections=1000, **conn_config.get('mysql')) dbConnPoolList = dict() for (db_server_ip, db_server_config) in db_server.items(): dbConnPoolList[db_server_ip] = ConnectionPool(max_connections=1000, **db_server_config) dbConnPoolList['master'] = ConnectionPool(max_connections=1000, **db_server_master) reConn = getRedisConn2(conn_config.get('redis')) _slave_info('init... complete') _slave_info('slave start to work') # thread pool threadPool = [] while 1: while 1: try: _slave_activity(reConn) _slave_interval(reConn, 10, 10) break except Exception, e: pass for priority in sorted(SCHEDULE_PRIORITY.keys()): # clear inactive Thread threadPool = [childThread for childThread in threadPool if childThread.isAlive()] for schedule in SCHEDULE_PRIORITY[priority]: log.debug('schedule %s', schedule) # how schedule should run # 1.this schedule exists # 2.schedule not active # 3.current thread # final:call schedule function by name if reConn.exists(schedule) and schedule not in [t.name for t in threadPool] and sum(map(lambda t: t \ in [otherSchedule for otherSchedule in SCHEDULE_PRIORITY[priority] if otherSchedule != schedule], [thread.name for thread in threadPool])) == len(threadPool): threadPool.append(threading.Thread(target=globals()[THREAD_TARGET[schedule]], name=schedule, args=(reConn, dbConnPoolList))) threadPool[-1].start() time.sleep(10) log.debug('thread waiting for %d', 30)
def executeItemRateSchedule( schedule_name='Schedule_itemRate2', schedule_error_name='Schedule_itemRate2Error', schedule_process_name='Schedule_itemRate2_process', schedule_update_name='TopItemRateUpdateTime', ): global RE_CONN, INTERRUPT dbConnPool = _reload_slave_db_pool(schedule_name) n = 0 reqinterval = (slave_config.get(IP)['reqinterval'] if slave_config.get(IP) else slave_config.get('default')['reqinterval']) for i in range(10): if INTERRUPT: INTERRUPT = False break # slave config _slave_interval(n, 1) itemId = None while 1: try: itemId = RE_CONN.spop(schedule_name) RE_CONN.hset(schedule_process_name, itemId, 1) n = (n + 1) % 100 break except: RE_CONN = getRedisConn2() toDate = datetime.strftime(datetime.today(), '%Y-%m-%d') if itemId: fromDate = RE_CONN.hget(schedule_update_name, str(itemId)) or '1900-1-1' _slave_info(schedule_name, 'Start', itemId, fromDate, toDate) try: t1 = t2 = time.time() data = getRate3('http://item.taobao.com/item.htm?id=%s' % itemId, fromDate, toDate, reqinterval=reqinterval) t2 = time.time() if len(data) > 0: re_command = ['HSET', schedule_update_name, itemId, toDate] bll_queue.put( (saveTopRate, data, dbConnPool, None, schedule_name, itemId, schedule_process_name, schedule_error_name, t1, t2, re_command)) else: _slave_info( schedule_name, 'Success', itemId, 'len', 0, 'req', '%.1f' % (t2 - t1), ) RE_CONN.hdel(schedule_process_name, str(itemId)) except Exception, e: traceback.print_exc() _slave_error( schedule_name, 'Error', itemId, str(e), 'req', '%.1f' % (t2 - t1), 'save', '%.1f' % (time.time() - t2), ) RE_CONN.hset(schedule_error_name, itemId, json.dumps({ 'SlaveID': SLAVEID, 'msg': str(e) })) else: break
def executeItemTradeSchedule( schedule_name='Schedule_itemTrade2', schedule_error_name='Schedule_itemTradeError', schedule_process_name='Schedule_itemTrade_process', schedule_update_name='TopItemTradeUpdateTime', schedule_config_uri_name='TopItemTradeUri', schedule_config_id_name='TopItemShopId', ): global RE_CONN, INTERRUPT dbConnPool = _reload_slave_db_pool(schedule_name) n = 0 reqinterval = (slave_config.get(IP)['reqinterval'] if slave_config.get(IP) else slave_config.get('default')['reqinterval']) for i in range(10): if INTERRUPT: INTERRUPT = False break # slave config _slave_interval(n, 1) # keep connection with Redis Server itemId = None while 1: try: itemId = RE_CONN.spop(schedule_name) RE_CONN.hset(schedule_process_name, itemId, 1) n = (n + 1) % 100 break except: RE_CONN = getRedisConn2() toDate = datetime.strftime(datetime.today(), '%Y/%m/%d') if itemId: (fromDate, param_uri_meta, config_id_meta) = ibbdlib.redislib.redis_pipe( RE_CONN, [['hget', schedule_update_name, str(itemId)], ['hget', schedule_config_uri_name, str(itemId)], ['hget', schedule_config_id_name, str(itemId)]]) fromDate = fromDate or '1900/1/1' param_uri_meta = (json.loads(unquote(param_uri_meta)) if param_uri_meta else {}) config_id_meta = (json.loads(config_id_meta) if config_id_meta else {}) _slave_info(schedule_name, 'Start', itemId, fromDate, toDate) try: t1 = t2 = time.time() (data, param_uri, config_id) = getTrades3( 'http://item.taobao.com/item.htm?id=%s' % itemId, fromDate, toDate, reqUri=param_uri_meta, config=config_id_meta, reqinterval=reqinterval, ) t2 = time.time() if len(data) > 0: re_command = [ 'HSET', schedule_update_name, itemId, (datetimeparse(data[0][10]) + timedelta(1)).strftime('%Y/%m/%d') ] bll_queue.put( (saveTopTrade, data, dbConnPool, None, schedule_name, itemId, schedule_process_name, schedule_error_name, t1, t2, re_command)) if not param_uri_meta: RE_CONN.hset(schedule_config_uri_name, str(itemId), quote(json.dumps(param_uri))) if not config_id_meta: RE_CONN.hset(schedule_config_id_name, str(itemId), json.dumps(config_id)) else: _slave_info( schedule_name, 'Success', itemId, 'len', 0, 'req', '%.1f' % (t2 - t1), ) RE_CONN.hdel(schedule_process_name, str(itemId)) except Exception, e: traceback.print_exc() _slave_error( schedule_name, 'Error', itemId, str(e), 'req', '%.1f' % (t2 - t1), 'save', '%.1f' % (time.time() - t2), ) RE_CONN.hset(schedule_error_name, itemId, json.dumps({ 'SlaveID': SLAVEID, 'msg': str(e) })) else: break