def collenction_all_reset(app_id): """收集所有数据时,对is_merge进行初始化""" sql = """select id from product where app_id=%s and is_merge=1 """ % app_id result = pool.find(sql) ids = [int(item.get("id")) for item in result] ids = tuple(ids) sql = """update sale_history set qty=0, updated="%s" WHERE product_id in %s""" % (datetime.datetime.now(), ids) pool.commit(sql)
def save_web_product(web_site, product_id, web_site_product_id, app_id): sql = """select id from web_site_product where web_site=%s and web_site_product_id=%s and app_id=%s and product_id=%s""" result = pool.find(sql, (web_site, web_site_product_id, app_id, product_id)) if not result: sql = """insert into web_site_product(web_site, product_id, web_site_product_id, created, app_id) values("%s","%s","%s",%s, %s)""" pool.commit(sql, (web_site, product_id, web_site_product_id, datetime.datetime.now(), app_id))
def set_last_execute_time(func_name): now = datetime.datetime.now() last_exeucte_time = datetime.datetime(year=now.year, month=now.month, day=now.day, hour=0, minute=0, second=0) sql = """update spider_conf set last_execute=%s, updated=%s where function_name=%s""" param = [last_exeucte_time, datetime.datetime.now(), func_name] pool.commit(sql, param)
def collection_xxskins_product(app_id): func_name = collection_xxskins_product.__name__ sql = """select page, start_page, total_page from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) page = result.get("page") start_page = result.get("start_page") total_page = result.get("total_page") try: while True: url = "https://apis.xxskins.com/goods/%s/0?_=1522230191000&page=%s" % (app_id, page) response = send_request(url) resp_data = json.loads(response.read()) print "xxskins:product:page:%s" % page if resp_data and int(resp_data.get("code")) == 99999: # 总页数 if page == start_page: total_page = resp_data.get("data").get("totalPage") sql = """update spider_conf set total_page=%s where function_name="%s" """ % (total_page, func_name) pool.commit(sql) product_list = resp_data.get("data").get("list") for product in product_list: try: icon_url = "https://steamcommunity-a.akamaihd.net/economy/image/class/%s/%s" % \ (product.get("app_id"), product.get("class_id")) market_name = product.get("market_name") color = product.get("category_rarity_color") market_hash_name = product.get("market_hash_name") steam_product = save_steam_product(market_name, icon_url, app_id, color, market_hash_name=market_hash_name) if steam_product == 200: steam_product = get_steam_product(app_id, market_hash_name=market_hash_name) if not steam_product == -100: save_web_product(WEB_SITE.get("xxskins"), steam_product.get("id"), product.get("goods_item_id"), app_id) except BaseException as e: print "xxskins:product:error:%s" % e continue page = page + 1 if page > total_page: break thread_error_stop(page, func_name) except BaseException as e: print "xxskins:product:error:%s" % e thread_error_stop(page, func_name)
def save_steam_product(market_name, icon_url, app_id, color, market_hash_name=None): """ 保存Steam饰品 :param market_name: :param icon_url: :param app_id: :param color: :param market_hash_name: :return: """ result = base_result() if market_hash_name: steam_product = get_steam_product(app_id, market_hash_name=market_hash_name) else: steam_product = get_steam_product(app_id, market_name=market_name) if steam_product: result["code"] = ResultCode.Success result["data"] = steam_product return result if not market_hash_name: market_hash_name, color = get_steam_market_hash_name( app_id, market_name) is_merge = 0 if u"箱" in market_name and app_id == 730: is_merge = 1 if not market_hash_name: result["code"] = ResultCode.NoneProduct return result sql = """insert into product(market_name, market_hash_name, icon_url, created, app_id, color, is_merge) values("%s","%s","%s", %s, %s, "%s", "%s")""" param = (market_name, market_hash_name, icon_url, datetime.datetime.now(), app_id, color, is_merge) pool.commit(sql, param) steam_product = get_steam_product(app_id, market_hash_name=market_hash_name) result["data"] = steam_product result["code"] = ResultCode.Success return result
def collection_xx_sale_history(app_id): func_name = collection_xx_sale_history.__name__ try: sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get("xxskins") total_page = pool.find_one(sql).get("count(id)") / 100 + 1 sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \ (total_page, datetime.datetime.now(), func_name) pool.commit(sql) web_id = WEB_SITE.get("xxskins") sql = """select page, last_execute, is_first, start_page from spider_conf where function_name="%s" limit 1""" % func_name result = pool.find_one(sql) db_page = result.get("page") last_execute = result.get("last_execute") is_first = result.get("is_first") start_page = result.get("start_page") if is_first == 0: collenction_all_reset(app_id) if db_page == start_page and is_first == 1: set_last_execute_time(func_name) db_rows = 100 while True: db_start = db_page * db_rows sql = """select product_id, web_site_product_id, market_name,is_merge from web_site_product, product where web_site_product.app_id=%s and web_site=%s and web_site_product.product_id=product.id and is_merge = 1 limit %s, %s """ % (app_id, web_id, db_start, db_rows) web_p_list = pool.find(sql) print "xxskins:sale_history:db_page:%s" % db_page for site_product in web_p_list: web_page = 1 is_before = False print "xxskins:sale_history:product:%s" % site_product.get("product_id") while True: url = "https://apis.xxskins.com/goods/saleRecord?_=1522660905000&goodsItemId=%s&page=%s&limit=100" % \ (site_product.get("web_site_product_id"), web_page) response = send_request(url) resp_data = json.loads(response.read()) print url print "%s" % web_page if web_page == 279: print "asdfsadfsadfsadd" print "asdfsadfsadfsadd" print "asdfsadfsadfsadd" if resp_data and int(resp_data.get("code")) == 99999: history_list = resp_data.get("data").get("list") if history_list: for history in history_list: try: if last_execute and is_before_time(history.get("sell_time"), last_execute) and is_first == 1: is_before = True break feature_id = get_feature_id("xxshinks", app_id, site_product.get("product_id"), history.get("sell_time")) sql = """select id, qty from sale_history where feature_id="%s" """ % feature_id result = pool.find_one(sql) if not result: sticker_json = history.get("sticker_json") if not sticker_json: sticker_json = "" else: sticker_json = json.dumps(sticker_json) wear = history.get("worn") if not wear: wear = "" sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, '%s', "%s", "%s")""" % \ (web_id, history.get("count"), history.get("sell_price"), history.get("sell_time"), site_product.get("market_name"), site_product.get("product_id"), site_product.get("web_site_product_id"), datetime.datetime.now(), app_id, sticker_json, wear, feature_id) pool.commit(sql) elif result and site_product.get("is_merge") == 1: total_qty = result.get("qty") + history.get("count") sql = """update sale_history set qty=%s, updated="%s" where id=%s""" % \ (total_qty, datetime.datetime.now(), result.get("id")) pool.commit(sql) except BaseException as e: print "xxskins:sale_history:error:%s" % e continue else: break else: break if is_before: break web_page = web_page + 1 db_page = db_page + 1 if db_page >= total_page: break thread_error_stop(db_page, func_name) except BaseException as e: print "xxskins:sale_history:error:%s" % e thread_error_stop(db_page, func_name)
def update_total_count(function_name, total_count, total_page): sql = """update spider_conf set total_count=%s, total_page=%s, updated=%s where function_name="%s" """ param = (total_count, total_page, datetime.datetime.now(), function_name) return pool.commit(sql, param)
def update_last_execute(function_name): sql = """update spider_conf set last_execute=%s where function_name=%s """ param = [datetime.datetime.now(), function_name] return pool.commit(sql, param)
def save_trade_stop_log(function_name, page): sql = """insert into thread_stop_log(created, function_name, page) values (%s, %s, %s)""" param = [datetime.datetime.now(), function_name, page] pool.commit(sql, param)
def collection_stmbuy_sale_history(app_id): func_name = collection_stmbuy_sale_history.__name__ sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get( "stmbuy") result = pool.find_one(sql) sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ \ %(result.get("count(id)")/100+1, datetime.datetime.now(), func_name) pool.commit(sql) sql = """select page, total_page, last_execute, is_first, start_page from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) db_page = result.get("page") total_page = result.get("total_page") is_first = result.get("is_first") last_execute = result.get("last_execute") start_page = result.get("start_page") if start_page == db_page and is_first == 1: set_last_execute_time(func_name) try: while True: if db_page > total_page: break sql = """select web_site_product_id, product_id, id from web_site_product where web_site=%s limit %s, %s""" \ % (WEB_SITE.get("stmbuy"), db_page, 100) product_list = pool.find(sql) print "stmbuy:sale_history:db_page:%s" % db_page for product in product_list: print "stmbuy:sale_history:product:%s" % product.get("id") web_page = 1 is_before = False while True: url = "http://www.stmbuy.com/item/history.html?class_id=%s&game_name=csgo&sort[_id]=-1&page=%s" \ % (product.get("web_site_product_id"), web_page) response = send_request(url) if response.code == 200: soup = BeautifulSoup(response.read(), "html.parser") none = soup.find("div", attrs={"class": "def-none"}) if none: break ul = soup.find("ul", attrs={"class": "goods-list"}) li = ul.find_all("li") for li_item in li: try: qty = li_item.find("div", attrs={ "class": "amount" }).find("span").string price_div = li_item.find( "div", attrs={"class": "price"}) price = price_div.contents[ 1] + price_div.contents[2].string pay_time = li_item.find_all( "div", attrs={"class": "time fr"})[0].contents[2].strip() if last_execute and is_before_time( pay_time, last_execute) and is_first == 1: is_before = False break wear_p = li_item.find( "div", attrs={"goods-sellinfo"}).find( "p", attrs={"class": "mosundu-num"}) if wear_p: wear = wear_p.find("span").string market_name = li_item.find( "div", attrs={"goods-sellinfo" }).find_all("p")[1].string else: wear = "" market_name = li_item.find( "div", attrs={"goods-sellinfo" }).find("p").string.strip() feature_id = get_feature_id( "stmbuy", app_id, product.get("product_id"), pay_time) if not get_sale_history(feature_id): sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \ (WEB_SITE.get("stmbuy"), qty, price, pay_time, market_name, product.get("product_id"), product.get("id"), datetime.datetime.now(), app_id, "", wear, feature_id) pool.commit(sql) except BaseException as e: print "stmbuy:sale_history:error:%s" % e continue web_page += 1 if is_before: break thread_error_stop(db_page, func_name) except BaseException as e: print "stmbuy:sale_history:error:%s" % e thread_error_stop(db_page, func_name)
def thread_error_stop(page, func_name): page_sql = """update spider_conf set page=%s, updated='%s' where function_name='%s'""" \ % (page, datetime.datetime.now(), func_name) pool.commit(page_sql) thread_list.sleep(func_name)
def collection_steam_sale_history(app_id): try: func_name = collection_steam_sale_history.__name__ rows = 10 sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get( "steam") total_count = pool.find_one(sql).get("count(id)") / rows + 1 sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ %\ (total_count, datetime.datetime.now(), func_name) pool.commit(sql) sql = """select page, is_first, last_execute, start_page from spider_conf where function_name="%s" """ % func_name spider_result = pool.find_one(sql) db_page = spider_result.get("page") is_first = spider_result.get("is_first") last_execute = spider_result.get("last_execute") start_page = spider_result.get("start_page") hearders = {"Accept-Language": "zh-CN,zh;q=0.8,en;q=0.6"} if start_page == db_page and is_first == 1: set_last_execute_time(func_name) while True: start = db_page * rows sql = """select market_hash_name, product.market_name, product.id from product, web_site_product where product.id=web_site_product.product_id and web_site=%s limit %s, %s""" % \ (WEB_SITE.get("steam"), start, rows) product_list = pool.find(sql) print "steam:sale_history:db_page:%s" % db_page for product in product_list: print "steam:sale_history:product:%s" % product.get( "product.id") market_hash_name = str( product.get("market_hash_name").encode("utf-8")) time.sleep(1) url = "https://steamcommunity.com/market/listings/%s/%s" % ( app_id, urllib.quote(market_hash_name)) response = send_request(url, hearders) if response.code == 200: soup = BeautifulSoup(response.read(), "html.parser") pattern = re.compile(r"line1") script = soup.find("script", text=pattern) if not script: continue history_list = json.loads( script.text.split("line1=")[1].split("];")[0] + "]") for history in history_list: sell_time = history[0].split(" ") pay_time = datetime.datetime( year=int(sell_time[2]), month=Time_dict.get(sell_time[0]), day=int(sell_time[1]), hour=int(sell_time[3].split(":")[0])) if last_execute and is_first == 1 and is_before_time( pay_time, last_execute): continue price = history[1] qty = history[2] feature_id = get_feature_id( "steam", app_id, product.get("product.id"), pay_time.strftime("%Y-%m-%d %H:%M:%S")) if not get_sale_history(feature_id): try: sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \ (WEB_SITE.get("steam"), qty, price, pay_time, product.get("product.market_name"), product.get("product.id"), product.get("product.id"), datetime.datetime.now(), app_id, "", "", feature_id) pool.commit(sql) except BaseException as e2: print "steam:sale_history:error:%s" % e2 continue else: break db_page = db_page + 1 if db_page >= total_count: break thread_error_stop(db_page, func_name) except BaseException as e: print "steam:sale_history:error:%s" % e thread_error_stop(db_page, func_name)
def collection_c5_sale_history(app_id): func_name = collection_c5_sale_history.__name__ sql = """select count(id) from product where web_site=%s""" % WEB_SITE.get( "c5game") result = pool.find_one(sql) total_page = result.get("count(id)") / 100 + 1 sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \ (total_page, datetime.datetime.now(), func_name) pool.commit(sql) sql = """select page, total_page, is_first, last_execute from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) db_page = result.get("page") total_page = result.get("total_page") is_first = result.get("is_first") last_execute = result.get("last_execute") while True: try: start = db_page * 100 sql = """select web_site_product_id, product_id from web_site_product where web_site=%s and app_id=%s limit %s, %s""" \ % (WEB_SITE.get("c5game"), app_id, start, 100) site_product_list = pool.find(sql) for site_product in site_product_list: web_site_product_id = site_product.get("web_site_product_id") url = "https://www.c5game.com/csgo/item/history/%s.html" % web_site_product_id response = send_request(url) if not response == 200: break soup = BeautifulSoup(response.read(), "html.parser") tr_list = soup.find("div", attrs={ "id": "history" }).find("table").find_all("tbody")[2].find_all("tr") for tr_item in tr_list: try: none_td = tr_item.find( "td", attrs={"class": "text-center empty"}) if not none_td: break icon_url = tr_item.find("div", attrs={ "class": "img csgo-img-bg ml-0" }).find("img").get("src") market_name = tr_item.find("div", attrs={ "class": "img csgo-img-bg ml-0" }).find("img").get("alt") price = tr_item.find("span", attrs={ "class": "ft-gold" }).string.split("¥")[1] pay_time = "20" + tr_item.find_all("td")[4].string if last_execute and is_first == 1 and is_before_time( pay_time, last_execute): break feature_id = get_feature_id( "c5game", app_id, site_product.get("product_id"), pay_time) if not get_sale_history(feature_id): sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \ (WEB_SITE.get("c5game"), 1, price, pay_time, market_name, site_product.get("product_id"), web_site_product_id, datetime.datetime.now(), app_id, "", "", feature_id) pool.commit(sql) except BaseException as e: print "c5game:sale_history:error:%s" % e continue if db_page >= total_page: break except BaseException as e: print "steam:sale_history:error:%s" % e thread_error_stop(db_page, func_name)