def getRecommend(uid): result = findAllJson2() recomm = getRecommendations(result, uid) resultjson = '' list1 = [] dbutils = DBUtils(host, user, password, db, port) movie_list = [] if recomm[0][1]: print(recomm[0][1]) for i in range(5): redc = recomm[i][1] list1.append(redc) for item in list1: movie_item = getMovieByName(dbutils, item) movie_list.append(movie_item) else: sql = 'select * from movie where id<6' result = dbutils.query(sql) for item in result: movie_item = {} movie_item['id'] = item[0] movie_item['movieName'] = item[1].strip() movie_item['src'] = item[7] movie_item['averating'] = item[9].strip() movie_item['playPage'] = item[8] movie_item['fileName'] = item[10] movie_list.append(movie_item) dbutils.close() print(movie_list) return movie_list
def save_bond_data_info(kezhuanzhai_code='sh110003'): print(kezhuanzhai_code) # 查询每个转债的信息,存入数据库 bond_zh_hs_cov_daily_df = ak.bond_zh_hs_cov_daily(symbol=kezhuanzhai_code) for index, row in bond_zh_hs_cov_daily_df.iterrows(): print(index, row['open'], row['high'], row['low'], row['close'], row['volume']) DBUtils.execute( "insert into kezhuanzhai_data(date,open,high,low,close,volume,bond_code) values " "('{date}',{open},{high},{low},{close},{volume},{bond_code})". format(**{"date": index}, **dict(row), **{"bond_code": kezhuanzhai_code[2:]}))
def save_bond_info(): # 查询 保存所有转债的信息 bond_zh_cov_df = ak.bond_zh_cov() for index, row in bond_zh_cov_df.iterrows(): DBUtils.execute( "insert into kezhuanzhai(`债券代码`,`交易场所`,`债券简称`,`申购日期`,`申购代码`," "`正股代码`,`正股简称`,`债券面值`,`发行价格`,`转股价`,`中签号发布日`,`中签率`,`上市时间`" ",`备忘录`,`正股价`,`市场类型`,`股权登记日`,`申购上限`,`转股价值`,`债现价`,`转股溢价率`" ",`每股配售额`,`发行规模`) " "values('{债券代码}','{交易场所}','{债券简称}','{申购日期}','{申购代码}','{正股代码}'," "'{正股简称}','{债券面值}','{发行价格}','{转股价}','{中签号发布日}','{中签率}','{上市时间}'," "'{备忘录}','{正股价}','{市场类型}','{股权登记日}','{申购上限}','{转股价值}','{债现价}','" "{转股溢价率}','{每股配售额}','{发行规模}')".format(**dict(row)))
def getMovieList(): dbutils = DBUtils(host, user, password, db, port) sql = 'select * from movie' result = dbutils.query(sql) movie_list = [] for item in result: movie_item = {} movie_item['id'] = item[0] movie_item['movieName'] = item[1].strip() movie_item['src'] = item[7] movie_item['averating'] = item[9].strip() movie_list.append(movie_item) dbutils.close() return movie_list
def get_feedback(): db = DBUtils.get_instance() sql = "SELECT F.*, U.fullname AS userName FROM feedback F JOIN user U ON F.loginId = U.loginId" comments = db.get(sql) comments = list( map(lambda x: DBHandler.feedbackDateParser(x), comments)) return comments
def login(user): db = DBUtils.get_instance() sql = "SELECT * FROM login WHERE username = '******' AND password = '******'".format( user['username'], user['password']) result = db.get(sql) return result
def update_recipe(recipe): db = DBUtils.get_instance() recipeId = recipe['recipeId'] sql = "UPDATE recipe SET name = '%s', category = '%s', description = '%s', image = '%s', featured = '%s'" \ "WHERE recipeId = %s" db.update(sql, (recipe['name'], recipe['category'], recipe['description'], recipe['image'], recipe['featured'], recipeId)) sql = "DELETE FROM recipeingredient WHERE recipeId = %s".format( recipeId) db.delete(sql) sql = "DELETE FROM recipedirection WHERE recipeId = %s".format( recipeId) db.delete(sql) ingredients = recipe['ingredients'] for ingredient in ingredients: sql = "INSERT INTO recipeingredient (recipeId, ingredient) VALUES (%s, %s)" db.insert(sql, (recipeId, ingredient['ingredient'])) directions = recipe['directions'] order = 1 for direction in directions: sql = "INSERT INTO recipedirection (recipeId, `order`, direction) VALUES (%s, %s, %s)" db.insert(sql, (recipeId, order, direction['direction'])) order += 1 return recipeId
def getHistoryList(uid): dbutils = DBUtils(host, user, password, db, port) sql = 'select distinct m.id,m.movieName,m.movieType,m.director,m.star,m.release,m.language,m.coverUrl,m.playPage,m.averageScore,' \ 'm.fileName,m.year from movie as m join history ' \ 'as h where h.uid=%d and m.id = h.mid;' % int(uid) result = dbutils.query(sql) movie_list = [] for item in result: movie_item = {} movie_item['id'] = item[0] movie_item['movieName'] = item[1] movie_item['src'] = item[7] movie_item['averating'] = item[9] movie_list.append(movie_item) dbutils.close() return movie_list
def save_stock_price_info(stock_price={}): result = DBUtils.execute( "insert into stock_day_qfq(date,close,high,low,open,volume,outstanding_share,turnover,stock_code) " " values ('{date}',{close},{high},{low},{open},{volume},{outstanding_share},{turnover},'{stock_code}')" .format(**stock_price)) if result is None or result <= 0: print(stock_price)
def add_comment(comment): db = DBUtils.get_instance() sql = "INSERT INTO recipecomments (recipeId, loginId, comment, commentDate) VALUES (%s, %s, %s, %s)" commentId = db.insert(sql, (comment['recipeId'], comment['loginId'], comment['comment'], datetime.now())) return commentId
def save_feedback(feedback): db = DBUtils.get_instance() sql = "INSERT INTO feedback (loginId, title, feedback, feedbackDate) VALUES (%s, %s, %s, %s)" feedbackId = db.insert(sql, (feedback['loginId'], feedback['title'], feedback['feedback'], datetime.now())) return feedbackId
def get_comment(recipeId): db = DBUtils.get_instance() sql = "SELECT C.commentId, C.comment, C.commentDate, U.fullname " \ "FROM recipecomments C JOIN user U ON C.loginId = U.loginId WHERE C.recipeId = {}".format(recipeId) comments = db.get(sql) comments = list(map(lambda x: DBHandler.commentDateParser(x), comments)) return comments
def query_kezhuanzhai_info(): # 查询所有转债的信息 kezhuanzhai_infos = DBUtils.execute( "select * from kezhuanzhai where 上市时间 <'2020-11-30T00:00:00' and " " 债券代码 not in (select bond_code from kezhuanzhai_data group by bond_code) and length(上市时间)>5 " ) for data in kezhuanzhai_infos: save_bond_data_info(str(data["交易场所"][-2:]).lower() + data['债券代码'])
def get_recipe(category): db = DBUtils.get_instance() sql = "SELECT * FROM recipe WHERE category = '{}'".format(category) result = db.get(sql) DBHandler.build_recipe(db, result) return result
def query_RT_stock(): stock_zh_a_spot_df = ak.stock_zh_a_spot() for index, stock_data_info in stock_zh_a_spot_df.iterrows(): result = DBUtils.execute( "INSERT INTO `stock_2020_1218`(`symbol`,`code`,`name`)" "VALUES('{symbol}','{code}','{name}')".format( **dict(stock_data_info))) if result is None or result <= 0: print(stock_data_info)
def getMovieById(id): dbutils = DBUtils(host, user, password, db, port) sql = 'select * from movie where id=%d' % id result = dbutils.query(sql) movie = {} for item in result: movie['id'] = item[0] movie['name'] = item[1].strip() movie['src'] = item[7].strip() movie['averating'] = item[9].strip() movie['year'] = item[11] movie['director'] = item[3].strip() movie['star'] = item[4].strip() movie['type'] = item[2].strip() movie['language'] = item[6].strip() movie['release'] = item[5].strip() movie['playPage'] = item[8] movie['fileName'] = item[10] dbutils.close() return movie
def delete_recipe(recipeId): db = DBUtils.get_instance() sql = "DELETE FROM recipe WHERE recipeId = {}".format(recipeId) db.delete(sql) sql = "DELETE FROM recipeingredient WHERE recipeId = '{}'".format( recipeId) db.delete(sql) sql = "DELETE FROM recipedirection WHERE recipeId = '{}'".format( recipeId) db.delete(sql)
def query_stock_codes_save(): stock_infos = DBUtils.execute("select * from stock_2020_1218") for data in stock_infos: print(data['symbol']) stock_zh_a_daily_hfq_df = ak.stock_zh_a_daily(symbol=data['symbol'], adjust="qfq") for index, stock_price in stock_zh_a_daily_hfq_df.iterrows(): if stock_price["open"] == 'nan' or stock_price["volume"] == 'nan': continue save_stock_price_info({ "date": index, **dict(stock_price), "stock_code": data['symbol'] })
def query_bond_stock_codes_save(): bond_infos = DBUtils.execute( "select 正股代码, 申购日期, 交易场所 from kezhuanzhai " "where 申购日期>'2015-01-01' and 申购日期< '2020-11-01' order by 正股代码") for data in bond_infos: stock_code = str(data["交易场所"][-2:]).lower() + data['正股代码'] print(data['正股代码']) stock_zh_a_daily_hfq_df = ak.stock_zh_a_daily(symbol=stock_code, adjust="qfq") for index, stock_price in stock_zh_a_daily_hfq_df.iterrows(): if stock_price["open"] == 'nan' or stock_price["volume"] == 'nan': continue save_stock_price_info({ "date": index, **dict(stock_price), "stock_code": data['正股代码'] })
def search_recipe(ingredients): db = DBUtils.get_instance() recipeList = [] for ingredient in ingredients: sql = "SELECT recipeId FROM recipeingredient WHERE ingredient = '{}'".format( ingredient) recipeIds = db.get(sql) if recipeIds: recipeList.append(str(recipeIds[0]['recipeId'])) recipeList = ','.join(recipeList) sql = "SELECT * FROM recipe WHERE recipeId IN ({})".format(recipeList) result = db.get(sql) DBHandler.build_recipe(db, result) return result
def register(user): db = DBUtils.get_instance() username = user['username'] sql = "SELECT * FROM login WHERE username = '******'".format(username) result = db.get(sql) if result: return -1 sql = "INSERT INTO login (username, password, userType, active) VALUES (%s, %s, %s, %s)" login_id = db.insert(sql=sql, args=(username, user['password'], 'user', 0)) print(login_id) sql = "INSERT INTO user (loginId, fullname, email) VALUES (%s, %s, %s)" db.insert(sql=sql, args=(int(login_id), user['fullname'], user['email'])) return login_id
def save_recipe(recipe): db = DBUtils.get_instance() sql = "INSERT INTO recipe (name, category, description, image, featured) VALUES (%s, %s, %s, %s, %s)" recipeId = db.insert( sql, (recipe['name'], recipe['category'], recipe['description'], recipe['image'], recipe['featured'])) ingredients = recipe['ingredients'] for ingredient in ingredients: sql = "INSERT INTO recipeingredient (recipeId, ingredient) VALUES (%s, %s)" db.insert(sql, (recipeId, ingredient['ingredient'])) directions = recipe['directions'] order = 1 for direction in directions: sql = "INSERT INTO recipedirection (recipeId, `order`, direction) VALUES (%s, %s, %s)" db.insert(sql, (recipeId, order, direction['direction'])) order += 1 return recipeId
def recordScore(uid, score, movieId): dbutils = DBUtils(host, user, password, db, port) sql = 'insert into history (uid,mid,score,dt) values (%d,%d,%d,now())' % (int(uid), int(movieId), int(score)) dbutils.insert(sql) dbutils.close()
''' @Time : 2017-2-28 21:20 @Author : Moses @File : GetLeagueListing.py ''' import requests from utils.DBUtils import DBUtils # https://wiki.teamfortress.com/wiki/WebAPI/GetLeagueListing url = "http://api.steampowered.com/IDOTA2Match_570/GetLeagueListing/v1" # url ="http://api.steampowered.com/IDOTA2Match_<ID>/GetLeagueListing/v1/" key = "?key=<key>" params = "&language=zh_cn" r = requests.get(url + key + params) rj = r.json() leagues = rj['result']['leagues'] dbutils = DBUtils() for l in leagues: league_name = l['name'] league_id = l['leagueid'] tournament_url = l['tournament_url'] itemdef = l['itemdef'] description = l['description'] dbutils.insertLeagues( [league_name, league_id, tournament_url, itemdef, description]) dbutils.close()
def spiderDazhong(shop_id, now_page): global page global proxies try: url = "http://www.dianping.com/shop/" + str( shop_id) + "/review_all/p" + str(now_page) print("url==========", url) req = requests.get(url, headers=headers) # , proxies=proxies if CrawlerUtils.get_bs(req.text, 'div#log'): proxies = CrawlerUtils.get_proxies(api_url) page -= 1 return True # 获取svg的阈值与数字集合的映射 css_url = CrawlerUtils.get_css(req.content.decode('utf-8')) str_re_compile = '<span class=\"(.*?)\"></span>' addr_tag = CrawlerUtils.get_bs(req.text, 'div.review-truncated-words') if addr_tag != []: addr_tag = str(addr_tag[0]) else: addr_tag = str( CrawlerUtils.get_bs(req.text, 'div.review-words')[0]) _tag = CrawlerUtils.get_tag(re.findall(str_re_compile, addr_tag)) css_and_px_dict = CrawlerUtils.get_css_and_px_dict(css_url) doc = pq(req.text) # print(doc) if doc: # # 存入csv文件 # out = open('./data/Stu_csv.csv', 'a', newline='', encoding="utf-8") # # 设定写入模式 # csv_write = csv.writer(out, dialect='excel') # shopName = doc("div.review-list-header > h1 > a").text() # shopurl = "http://www.dianping.com" + doc("div.review-list-header > h1 > a").attr("href") # csv_write.writerow(["店铺名称", "店铺网址"]) # csv_write.writerow([shopName, shopurl]) # csv_write.writerow(["用户名", "用户ID链接", "评定星级", "评论描述", "评论详情", "评论时间", "评论商铺", "评论图片","是否VIP","推荐菜品"]) # 解析评论 pinglunLi = doc("div.reviews-items > ul > li").items() li_num = 1 for data in pinglunLi: userName = data( "div.main-review > div.dper-info > a").text() # 用户名 shopName = data( "div.main-review > div.misc-info.clearfix > span.shop" ).text() # userID = '' user_id_link = data( "div.main-review > div.dper-info > a").attr("href") if user_id_link != None: userID = "http://www.dianping.com" + user_id_link start_sapn = data( "div.review-rank > span.sml-rank-stars.sml-str10.star") if start_sapn: startShop = str( start_sapn.attr("class")).split(" ")[1].replace( "sml-str", "") else: startShop = 0 describeShop = data("div.review-rank > span.score").text() msg_values = etree.HTML(req.content.decode('utf-8')).xpath( '//div[@class="reviews-items"]/ul/li[' + str(li_num) + ']/div[@class="main-review"]/div[@class="review-words Hide"]/node()' ) # if msg_values == []: msg_values = etree.HTML(req.content.decode('utf-8')).xpath( '//div[@class="reviews-items"]/ul/li[' + str(li_num) + ']/div[@class="main-review"]/div[@class="review-words"]/node()' ) pinglunShop = CrawlerUtils.get_last_value(css_url, msg_values, css_and_px_dict, _tag, is_num=False) if pinglunShop == '': continue timeShop = data( "div.main-review > div.misc-info.clearfix > span.time" ).text() Shop = data( "div.main-review > div.misc-info.clearfix > span.shop" ).text() imgShop = data( "div > div.review-pictures > ul > li> a").items() imgList = [] for img in imgShop: imgList.append("http://www.dianping.com" + img.attr("href")) vip_span = data("div.main-review > div.dper-info > span") is_vip = 1 if vip_span.attr("class") == "vip" else 0 recommend = data( "div.main-review > div.review-recommend > a").text() print("===========", pinglunShop) uuid = CrawlerUtils.get_md5_value( str(shop_id) + pinglunShop + str(timeShop)) # # 写入具体内容 # csv_write.writerow([userName, userID, startShop, describeShop, pinglunShop, timeShop, Shop, imgList]) # print("successful insert csv!") print(", 用户名:userName: {}\n, " "商铺名称:shoName: {}\n, " "用户ID链接:userID: {}\n, " "评定星级:startShop: {}\n, " "评论描述:describeShop : {}\n, " "星级评分:startShop: {}\n, " "评论详情:pinglunShop:{}\n, " "评论时间:timeShop :{}\n, " "评论商铺:Shop:{}\n, " "评论图片:imgList:{}\n, " "推荐菜品:recommend:{}\n, " "是否vip:is_vip:{} " "\n ".format(userName, shopName, userID, startShop, describeShop, startShop, pinglunShop, timeShop, Shop, imgList, recommend, is_vip)) data = { 'uuid': uuid, 'shop_id': shop_id, 'username': userName, 'shop_name': shopName, 'is_vip': is_vip, 'comment': pinglunShop, 'recommend': recommend, 'stars': startShop, 'comment_create_time': timeShop, } DBUtils.save_to_db(Comment, data) li_num += 1 sleep(1) if not CrawlerUtils.get_tags(req.text, '.NextPage'): return False return True except Exception as e: raise e
def get_data(url): """ :param page_url: 待获取url :return: """ try: # 获取url中的html文本 con = CrawlerUtils.get_html(url, headers=CrawlerUtils.get_headers(COOKIES), proxies=proxies) # 获取css url,及tag,由于文本与数字class 前缀不同,所以需要分别获取 num_re_compile = "<b><span class=\"(.*?)\"></span>" # 定义数字class匹配正则 str_re_compile = '<span class="addr"><span class=\"(.*?)\"></span>' # 定义地址class匹配正则 # 获取css的URL css_url = get_css(con) # 获取数字class前缀 num_svg_tag = get_tag(re.findall(num_re_compile, con)) # 获取文本class前缀 str_svg_tag = get_tag(re.findall(str_re_compile, con)) # 获取css对应名与像素的映射字典 css_and_px_dict = get_css_and_px_dict(css_url) # 对html文本使用 etree.HTML(html)解析,得到Element对象 doc = etree.HTML(con) # 获取所有商家标签 shops = doc.xpath('//div[@id="shop-all-list"]/ul/li') for shop in shops[1:]: # 商铺id shop_id = shop.xpath( './/div[@class="tit"]/a')[0].attrib["data-shopid"] logger.info("shop_id", shop_id) # 店名 shop_name = shop.xpath('.//div[@class="tit"]/a')[0].attrib["title"] shop_name = bytes(shop_name.encode('utf-8')).decode('utf-8') comment_num = 0 per_capita_price = taste = service = environment = 0 # 获取星级评分数 star_num = int( str( shop.xpath('.//div[@class="comment"]/span') [0].attrib["class"]).split("sml-str")[-1]) # 获取点评总数 comment_and_price_datas = shop.xpath('.//div[@class="comment"]') for comment_and_price_data in comment_and_price_datas: _comment_data = comment_and_price_data.xpath( 'a[@class="review-num"]/b/node()') comment_num = get_last_value(css_url, _comment_data, css_and_px_dict, num_svg_tag) comment_num = int(comment_num) if comment_num else 0 if comment_num == 0: return # 获取人均消费价格 _price = comment_and_price_data.xpath( 'a[@class="mean-price"]/b/node()') per_capita_price = get_last_value(css_url, _price, css_and_px_dict, num_svg_tag) per_capita_price = int( per_capita_price[1:]) if per_capita_price else 0 # 获取口味,环境,服务评分 others_num_node = shop.xpath('.//span[@class="comment-list"]/span') for others_datas in others_num_node: if others_datas.xpath('text()') and others_datas.xpath( 'text()')[0] == u"口味": _taste_data = others_datas.xpath('b/node()') taste = get_last_value(css_url, _taste_data, css_and_px_dict, num_svg_tag) taste = float(taste) if taste else 0 if others_datas.xpath('text()') and others_datas.xpath( 'text()')[0] == u"服务": _taste_data = others_datas.xpath('b/node()') service = get_last_value(css_url, _taste_data, css_and_px_dict, num_svg_tag) service = float(service) if service else 0 if others_datas.xpath('text()') and others_datas.xpath( 'text()')[0] == u"环境": _taste_data = others_datas.xpath('b/node()') environment = get_last_value(css_url, _taste_data, css_and_px_dict, num_svg_tag) environment = float(environment) if environment else 0 # 获取地址 _ress = shop.xpath( './/div[@class="tag-addr"]/span[@class="addr"]/node()') address = get_last_value(css_url, _ress, css_and_px_dict, str_svg_tag, is_num=False) data = { 'shop_id': shop_id, 'shop_name': shop_name, 'address': address, 'per_capita_price': per_capita_price, 'total_number_comments': comment_num, 'stars': int(star_num), 'taste': taste, 'surroundings': environment, 'serve': service, } logger.info('开始保存数据: %s' % (data, )) # 保存数据 DBUtils.save_to_db(Shop, data) except Exception as e: logger.error(e)
''' @Time : 2017-2-28 21:18 @Author : Moses @File : GetHeroes.py ''' import requests from utils.DBUtils import DBUtils # https://wiki.teamfortress.com/wiki/WebAPI/GetHeroes url = "http://api.steampowered.com/IEconDOTA2_570/GetHeroes/v1" key = "?key=<key>" params = "&language=zh_cn" r = requests.get(url + key + params) rj = r.json() heros = rj['result']['heroes'] dbutils = DBUtils() for h in heros: localized_name = h['localized_name'] name = h['name'] id = h['id'] dbutils.insertHeros([localized_name, name, id]) dbutils.close()