def _getS_shop_items(url): """获取店铺中的所有商品 returns: dict(item_id, item_name) list """ shopurl = TopShop(url).getBasicInfo()['shopLink'] return dict((item[3], item[4]) for item in getShopItemsOverview(url))
def runSchedule(): reConn = getRedisConn2() uids = reConn.hkeys('Schedule_shopAllitemInfoError') for i, uid in enumerate(uids): url = 'http://store.taobao.com/shop/view_shop.htm?user_number_id=%s' % uid try: metadata = getShopItemsOverview(url) saveTopAllitemInfo(metadata) except Exception, e: print uid
def main(): urls = ['http://conba.tmall.com', 'http://hangxian.taobao.com'] urls = [ # 'http://glorylife.taobao.com', # 'http://cafetown.tmall.com', 'http://gerun.tmall.com/' ] for url in urls: metadata = getShopItemsOverview( url, page=1000000, count=1000000, reqinterval=1) # saveTopAllitemInfo(metadata) for (i, tdata) in enumerate(metadata): print i + 1, json.dumps(tdata, indent=4, ensure_ascii=False)
def main(): urls = ['http://conba.tmall.com', 'http://hangxian.taobao.com'] urls = [ # 'http://glorylife.taobao.com', # 'http://cafetown.tmall.com', 'http://gerun.tmall.com/' ] for url in urls: metadata = getShopItemsOverview(url, page=1000000, count=1000000, reqinterval=1) # saveTopAllitemInfo(metadata) for (i, tdata) in enumerate(metadata): print i + 1, json.dumps(tdata, indent=4, ensure_ascii=False)
def executeShopAllitemInfoSchedule( reConn, dbConnPool, schedule_name='Schedule_shopAllitemInfo', schedule_error_name='Schedule_shopAllitemInfoError', schedule_process_name='Schedule_shopAllitemInfo_process', ): n = 0 for i in range(10): # slave config _slave_interval(reConn, n, 3) _busy_interval() # keep connection with Redis Server shopUserId = None while 1: try: shopUserId = reConn.spop(schedule_name) reConn.hset(schedule_process_name, shopUserId, 1) n = (n + 1) % 100 break except: reConn = getRedisConn2() if shopUserId: _slave_info(schedule_name, 'Start', shopUserId) try: t1 = t2 = time.time() shop = TopShop('http://store.taobao.com/shop/view_shop.htm?user_number_id=%s' % shopUserId).getBasicInfo() shopLink = shop['shopLink'] data = getShopItemsOverview(shopLink) t2 = time.time() threading.Thread(target=_thread_top_bll, args=( saveTopAllitemInfo, data, dbConnPool['master'], reConn, schedule_name, shopUserId, schedule_process_name, schedule_error_name, t1, t2, )).start() except Exception, e: _slave_error( schedule_name, 'Error', shopUserId, str(e), 'req', '%.1f' % (t2 - t1), 'save', '%.1f' % (time.time() - t2), ) reConn.hset(schedule_error_name, shopUserId, json.dumps({'SlaveID': SLAVEID, 'msg': str(e)})) else: break
def executeShopAllitemInfoSchedule( schedule_name='Schedule_shopAllitemInfo', schedule_error_name='Schedule_shopAllitemInfoError', schedule_process_name='Schedule_shopAllitemInfo_process', ): global RE_CONN, INTERRUPT dbConnPool = _reload_slave_db_pool(schedule_name) n = 0 for i in range(10): if INTERRUPT: INTERRUPT = False break # slave config _slave_interval(n, 3) # keep connection with Redis Server shopUserId = None while 1: try: shopUserId = RE_CONN.spop(schedule_name) RE_CONN.hset(schedule_process_name, shopUserId, 1) n = (n + 1) % 100 break except: RE_CONN = getRedisConn2() if shopUserId: _slave_info(schedule_name, 'Start', shopUserId) try: t1 = t2 = time.time() shop = TopShop( 'http://store.taobao.com/shop/view_shop.htm?user_number_id=%s' % shopUserId).getBasicInfo() shopLink = shop['shopLink'] data = getShopItemsOverview(shopLink) t2 = time.time() bll_queue.put(( saveTopAllitemInfo, data, dbConnPool, None, schedule_name, shopUserId, schedule_process_name, schedule_error_name, t1, t2, )) except Exception, e: _slave_error( schedule_name, 'Error', shopUserId, str(e), 'req', '%.1f' % (t2 - t1), 'save', '%.1f' % (time.time() - t2), ) RE_CONN.hset(schedule_error_name, shopUserId, json.dumps({ 'SlaveID': SLAVEID, 'msg': str(e) })) else: break