def __init__(self): self.redis = db_redis() self.mongodb = db_mongodb() self.mongodb_remote = None self.date = date() self.collection = self.mongodb['m_{0}'.format(self.date)] self.collection.ensure_index('item_data', unique=True) self.mysql = db_mysql() self.mysql_cursor = self.mysql.cursor()
def save_all_top_category(): redis = db_redis() top_set = set() # todo 由 mysql 改为从 redis leaf_category 获取 for id in all_category_in_redis(): top = get_top_parent(id) top_set.add(top) # print(id, top) redis.hset('ebay:top_category_id_us', int(id), int(top)) print(top_set) print(len(top_set))
def __init__(self, redis=None, mongodb=None, mysql=None, datetime=None): self.redis = redis or db_redis() self.mongodb = mongodb or db_mongodb('mongodb_remote') self.mysql = mysql or db_mysql() self.mysql_cursor = self.mysql.cursor() self.date = datetime or date() try: self.mysql_local = db_mysql('mysql_local') self.mysql_cursor_local = self.mysql_local.cursor() except: pass self.start_statistician()
def main(): redis = db_redis() mongodb = db_mongodb('mongodb_remote') mysql = db_mysql('mysql_remote') datetime = date() # 全站商品数据统计 g = GoodsStatistician(redis=redis, mongodb=mongodb, mysql=mysql, datetime=datetime) g.save() # 全站店铺数据统计 s = ShopStatistician(redis=redis, mongodb=mongodb, mysql=mysql, datetime=datetime) s.save(process=32)
def shop_values(shop_list, redis=None): r = redis or db_redis() for i in shop_list: shop = json.loads(str(i, encoding='utf8')) shop['count'] = int( r.hget('ebay:shop:count', shop['shop_name']) or 0) shop['week_sold'] = int( r.hget('ebay:shop:week_sold', shop['shop_name']) or 0) shop['last_week_sold'] = int( r.hget('ebay:shop:last_week_sold', shop['shop_name']) or 0) shop['has_sold_count'] = int( r.hget('ebay:shop:has_sold_count', shop['shop_name']) or 0) shop['total_sold'] = int( r.hget('ebay:shop:total_sold', shop['shop_name']) or 0) shop['amount'] = round( float(r.hget('ebay:shop:amount', shop['shop_name']) or 0), 2) yield { 'shop_name': shop['shop_name'], 'shop_feedback_score': shop['shop_feedback_score'], 'shop_feedback_percentage': shop['shop_feedback_percent'], 'sold_goods_count': shop['has_sold_count'], 'total_goods_count': shop['count'], 'total_sold': shop['total_sold'], 'weeks_sold': shop['week_sold'], 'last_weeks_sold': shop['last_week_sold'], 'amount': shop['amount'], 'shop_open_time': shop['shop_open_time'], 'weeks_inc_ratio': (shop['week_sold'] - shop['last_week_sold']) / (shop['last_week_sold'] + 1) }
def save_leaf_category(file): redis = db_redis() with open('{0}.xml'.format(file)) as f: xml = bytes(f.read(), 'utf8') data = etree.HTML(xml) result = data.xpath('//html/body/getcategoriesresponse/categoryarray/category') count = 0 for category in result: leaf = category.xpath('leafcategory/text()') if len(leaf) > 0 and leaf[0] == 'true': count += 1 id = category.xpath('categoryid/text()')[0] redis.sadd('ebay:leaf_category_ids_us', int(id)) print() print(file) print(count) print(len(result))
def all_category_in_redis(key='ebay:leaf_category_ids_us'): r = db_redis() ids = r.smembers('ebay:leaf_category_ids_us') for id in ids: yield int(id)
def insert_to_mysql(shop_list, mysql=None, redis=None): redis = redis or db_redis() mysql = mysql or db_mysql() cursor = mysql.cursor() # def shop_values(shop_list, redis=None): r = redis or db_redis() for i in shop_list: shop = json.loads(str(i, encoding='utf8')) shop['count'] = int( r.hget('ebay:shop:count', shop['shop_name']) or 0) shop['week_sold'] = int( r.hget('ebay:shop:week_sold', shop['shop_name']) or 0) shop['last_week_sold'] = int( r.hget('ebay:shop:last_week_sold', shop['shop_name']) or 0) shop['has_sold_count'] = int( r.hget('ebay:shop:has_sold_count', shop['shop_name']) or 0) shop['total_sold'] = int( r.hget('ebay:shop:total_sold', shop['shop_name']) or 0) shop['amount'] = round( float(r.hget('ebay:shop:amount', shop['shop_name']) or 0), 2) yield { 'shop_name': shop['shop_name'], 'shop_feedback_score': shop['shop_feedback_score'], 'shop_feedback_percentage': shop['shop_feedback_percent'], 'sold_goods_count': shop['has_sold_count'], 'total_goods_count': shop['count'], 'total_sold': shop['total_sold'], 'weeks_sold': shop['week_sold'], 'last_weeks_sold': shop['last_week_sold'], 'amount': shop['amount'], 'shop_open_time': shop['shop_open_time'], 'weeks_inc_ratio': (shop['week_sold'] - shop['last_week_sold']) / (shop['last_week_sold'] + 1) } # sql = """ INSERT INTO erp_spider.shop_statistics_{0} (shop_name, shop_feedback_score, shop_feedback_percentage, sold_goods_count, total_goods_count, total_sold, weeks_sold, last_weeks_sold, amount, shop_open_time, weeks_inc_ratio ) VALUES (%(shop_name)s, %(shop_feedback_score)s, %(shop_feedback_percentage)s, %(sold_goods_count)s, %(total_goods_count)s, %(total_sold)s, %(weeks_sold)s, %(last_weeks_sold)s, %(amount)s, %(shop_open_time)s, %(weeks_inc_ratio)s) """.format(date()) data = [i for i in shop_values(shop_list, redis)] # try: cursor.executemany(sql, data) mysql.commit() except pymysql.err.IntegrityError: logger.warning('Duplicate. sqls:\n{0}'.format(data[0]))