def _handle_request(self): page = int(self.get_argument('page', "1")) - 1 step = 3 startIndex = page * step dbHelper = MysqlHelper() mysql = "select * from msapp_need limit " + str( startIndex) + "," + str(step) print(mysql) result = dbHelper.get_all(mysql) print(result) result = json_success("success need get list") self.write(result) self.finish()
def _handle_request(self): self.logs("_handle_request") # str = "good man" dbHelper = MysqlHelper() # step1: 获取参数值 good = self.get_good() # step2: 参数不合法,返回错误 if not good: self.logs("response_error_para error!") result = json_error(Code.ERROR_PARA, Code.ERROR_PARA_DESC) self.write(result) self.finish() return # step3: 储存用户到数据库 result = self.insert_good(dbHelper, good) if not result: result = json_error(Code.ERROR_GODD_INSERT, Code.ERROR_PARA_DESC) self.write(result) self.finish() return # step4: 写到redis agl good_id = self.get_good_id(dbHelper, good) good.id = good_id publish_good(good, False) result = json_success("success") self.write(result) self.finish()
def _handle_request(self): self.logs("_handle_request") # str = "good man" dbHelper = MysqlHelper() # step1: 获取参数值 need_info = self.get_need_info() # step2: 参数不合法,返回错误 if not need_info: self.logs("response_error_para error!") result = json_error(Code.ERROR_PARA, Code.ERROR_PARA_DESC) self.write(result) self.finish() return # step3: 储存用户到数据库 result = self.insert_need(dbHelper, need_info) if not result: result = json_error(Code.ERROR_GODD_INSERT, Code.ERROR_PARA_DESC) self.write(result) self.finish() return result = json_success("success") self.write(result) self.finish()
def start(self): # 把派发作业队列和完成作业队列注册到网络上 BaseManager.register('get_dispatched_task_queue', callable=self.get_dispatched_task_queue) BaseManager.register('get_finished_task_queue', callable=self.get_finished_task_queue) task = MysqlHelper.excuteFindOne( "select * from tb_task where Fid={}".format(self.taskId)) Fip = task["FserverIp"] Fport = task["Fport"] Fauthkey = task["Fauthkey"] # 监听端口和启动服务,Fauthkey为验证码,自己随便取的,slave连接获取任务时用于验证身份,FserverIp为主机的ip,Fport连接端口号,一般默认是8888端口 manager = BaseManager(address=(Fip, Fport), authkey=Fauthkey) manager.start() # 使用上面注册的方法获取队列 dispatched_tasks = manager.get_dispatched_task_queue() #获取派发队列 finished_tasks = manager.get_finished_task_queue() #获取返回队列 while True: try: task_2 = MysqlHelper.excuteFindOne( "select * from tb_task where Fid={}".format(self.taskId)) num = int(task_2["Fnum"]) url_list = self.url_list for index, url in enumerate(url_list): print url num += 1 dispatched_spider = self.spider dispatched_spider.pageUrl = url dispatched_tasks.put(dispatched_spider) print "派发任务: " + str(index + 1) MysqlHelper.excuteUpdate(self.table_name, { "Fnum": str(num + 1), "Fstate": 1 }, "Fid={}".format(self.taskId)) time.sleep(1) print "------------已完成一轮任务派发----------" #完成一轮任务派发就将状态改为休眠中 MysqlHelper.excuteUpdate(self.table_name, {"Fstate": 2}, "Fid={}".format(self.taskId)) #返回队列执行结果 while not dispatched_tasks.empty(): #返回一个爬取的数量结果 result_spider = finished_tasks.get() print "任务返回结果:" #暂停时间继续进行下一轮的任务派发 # time.sleep(int(task_2["FtimeInterval"])) manager.shutdown() except Exception as ex: MysqlHelper.excuteUpdate(self.table_name, {"Fstate": 0}, "Fid={}".format(self.taskId)) continue
def show_tables(args): m = MysqlHelper(args.host, args.user, args.psw, args.database, args.port) res = m.fetchall("show tables") for o in res: tab = list(o.values())[0] print("-- %s --" % tab) res = m.fetchall("show create table %s" % tab) create_sql = res[0]['Create Table'] print(create_sql) arr = create_sql.split("\n") for i, a in enumerate(arr): if i == 0: pass elif i == len(arr) - 1: pass else: q = '''ALTER TABLE `%s` ADD %s;''' % (tab, a.split(",")[0]) print(q)
class AnjukeHousePipeline(object): def __init__(self): self.mysql = MysqlHelper() def insert(self, total_price, avg_price, title, house_type, building_area, floor, building_time, community, city, area, address, advantage, salesman, url, url_md5): sql = """ insert into anjuke_secondhand_house (total_price,total_price_unit,avg_price,avg_price_unit,title, house_type,building_area,building_area_unit,floor,building_time,community, city,area,address,advantage,salesman,url,url_md5) values (%s,'%s',%s,'%s','%s','%s',%s,'%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s') """ % (total_price, '万', avg_price, '元/m²', title, house_type, building_area, 'm²', floor, building_time, community, city, area, address, advantage, salesman, url, url_md5) self.mysql.execute(sql) def query_md5(self): sql = "select url_md5 from anjuke_secondhand_house" return self.mysql.query(sql)
def _handle_request(self): self.logs("_handle_request") # str = "author man" dbHelper = MysqlHelper() # step1: 获取参数值 author = self.get_author_info() author.point = 0 author.status = 1 # step2: 参数不合法,返回错误 if not author: self.logs("response_error_para error!") result = json_error(Code.ERROR_PARA, Code.ERROR_PARA_DESC) self.write(result) self.finish() return # step3: 储存用户到数据库,直接为审核通过状态 result = self.insert_author(dbHelper, author) if not result: result = json_error(Code.ERROR_GODD_INSERT, Code.ERROR_PARA_DESC) self.write(result) self.finish() return # step4: 写到redis author.id = self.get_db_author(dbHelper, author.openid) publish_author(author) data = { 'id': author.id, 'nickname': author.nickname, 'point': author.point, 'status': author.status, 'town': author.town, 'address': author.address, 'phone': author.phone, 'time': getTime() } result = json_success_data("success", data) self.write(result) self.finish()
def save_to_mysql(): my_sql_helper = MysqlHelper(user='******', passwd='225669', host='192.168.1.229', db='xcar') # 创建数据库工具对象 max_id = my_sql_helper.search( 'select id from cars order by id desc limit 1') # 查找 表cars中最大的id car_id = max_id[0][0] + 1 if max_id and max_id[0] else 0 # car的主键值 my_sql_helper.connect() # 连接数据库 with open('result.txt', 'r') as f: for line in f: # 迭代读取文件,减小内存开销。 dic = json.loads(line) sql_car = 'insert into cars(id,name,brand,level,star) values({},"{}","{}","{}",{})'. \ format(car_id, dic['name'], dic['brand'], dic['level'], dic['star']) my_sql_helper.insert(sql_car) # cars中插入数据 for model in dic['models']: sql_models = 'insert into models(model, gearbox, motor, power, guiding_price, seater, oil_consumption, car_id) ' 'values("{}","{}","{}",{},{},{},{},{})'.format( model['model'], model['gearbox'], model['motor'], model['power'], model['guiding_price'], model['seater'], model['oil_consumption'], car_id) my_sql_helper.insert(sql_models) # models 中插入数据 car_id += 1 my_sql_helper.close() # 关闭数据库连接
# 增加预ping,做悲观连接检测,失效则收回连接 # 增加连接池回收时间,原来为-1, # 预设连接池为QueuePool,也可以只通过设定poolsize的方式指定 # 连接池的最大并发 = pool_size + max_overflow # 单连接查询60s超时丢弃 return records.Database(DB_URL, poolclass=QueuePool, pool_size=30, max_overflow=300, pool_timeout=30, pool_recycle=1200) try: root_db = sql_connection_by_records(rootdb_config) root_object = MysqlHelper(root_db) except Exception as e: root_object = None server_logger.error(e) """ sql = "select * from nvwa" rows = root_object.query(sql) print rows[0] >>> 第一行 <Record {"user": "******", "name": "Hehe"}> for r in rows: print r.user, r.name print r[0], r[1] print r['user'], r['name'] # to df user_df = rows.export('df') # 也可以直接输出到文件
def get_shop_detail(redis_helper, page_index, param, location_point, count, flag): """ 获取列表页到详情页的数据 :param page: :return: """ wm_longitude_before = location_point[0].encode("utf-8") wm_latitude_before = location_point[1].encode('utf-8') wm_longitude = wm_longitude_before.replace('.', '') wm_latitude = wm_latitude_before.replace('.', '') print wm_longitude_before, wm_latitude_before, wm_longitude, wm_latitude page_index = int(page_index) print('当前页数为:%s' % page_index) # 列表页的url shop_list_url = param[0].encode('utf-8') # 请求头 headers = json.loads(param[1]) # 请求体 data = json.loads(param[2]) data['page_index'] = str(page_index) data['wm_longitude'] = str(wm_longitude) data["wm_latitude"] = str(wm_latitude) data['sort_type'] = '5' # data['wm_longitude'] = "121396406" # data["wm_latitude"] = "31169214" # 详情页的url shop_detail_url = shop_list_url.replace('v2/poi/channelpage', 'v1/poi/food') # 商家基本信息页面url: shop_base_info_url = shop_list_url.replace('v2/poi/channelpage', 'v1/poi/info') # 列表页的响应数据 try: item = {} # proxy = proxy_helper.judge_ip() proxy = proxy_helper_copy.get_ip() print proxy response_list = "" for tmp in range(3): response_list = request_url_response(shop_list_url, method="POST", headers=headers, data=data, proxy=proxy, verify=False) print response_list if not response_list: proxy = proxy_helper_copy.get_ip() continue else: break if not response_list: # 请求失败怎么处理,返回False print("请求失败记录token的爬取数量") MysqlHelper.update_params_count(count, shop_list_url) print("切换token值") param = get_new_token(redis_helper) if param != None: count = 0 get_shop_detail(redis_helper, page_index, param, location_point, count, flag) else: keys = 'lgt_lat_page' values = (wm_latitude_before, wm_latitude_before, page_index) redis_helper.lpush_data(keys, values) return elif response_list.status_code == 200: json_data_list = json.loads(response_list.text) if not json_data_list.get('data'): # 记录token的访问数量 MysqlHelper.update_params_count(count, shop_list_url) count = 0 param = get_new_token(redis_helper) if param != None: get_shop_detail(redis_helper, page_index, eval(param), location_point, count, flag) else: print('记录当前点的翻页数量') MysqlHelper.update_location_state_page( wm_longitude_before, wm_latitude_before, page_index) print('token is null') keys = 'lgt_lat_page' values = (wm_latitude_before, wm_latitude_before, page_index) redis_helper.lpush_data(keys, values) return # 列表页的店铺数据 else: response_list_datas = json.loads( response_list.text)['data']['poilist'] # 判断列表页是不是为空列表,如果为空列表改变点的状态。 if response_list_datas == []: print('list is None') print('该点的店铺数量为0,改变的状态') MysqlHelper.update_location_state(wm_longitude_before, wm_latitude_before, page_index) print('获取新的经纬度点') location_point = redis_helper.spop_data('lgt_lat_page') location_point = eval(location_point) page_index = location_point[2] get_shop_detail(redis_helper, page_index, param, location_point, count, flag) else: get_detail_content( item, response_list, response_list_datas, data, param, shop_base_info_url, shop_detail_url, headers, location_point, count, shop_list_url, page_index, flag, wm_longitude_before, wm_latitude_before, proxy) except Exception as e: print e
def get_detail_content(item, response_list, response_list_datas, data, param, shop_base_info_url, shop_detail_url, headers, location_point, count, shop_list_url, page_index, flag, wm_longitude_before, wm_latitude_before, proxy): """ 处理详情页的数据 :param response_list_datas: :return: """ shop_distances_list = [] for list_data in response_list_datas: # 获取距离 distance = list_data['distance'] name = list_data['name'] print distance, name shop_distances_list.append(distance) if sort_helper.handle_sort_distace(shop_distances_list): print len(shop_distances_list) if len(shop_distances_list) == 20: count += len(shop_distances_list) print count for shop_data in response_list_datas: shop_name = shop_data['name'] shop_id = shop_data['id'] print shop_name, shop_id data['wm_poi_id'] = str(shop_id) data['wmpoiid'] = str(shop_id) # 发起详情页的请求 detail_response = request_url_response(shop_detail_url, method="POST", headers=headers, data=data, proxy=proxy, verify=False) # 提取详情页的产品 response_detail_data = json.loads( detail_response.text).get('data') # 发起商家基本信息请求 time.sleep(3) base_info_response = request_url_response(shop_base_info_url, method="POST", headers=headers, data=data, proxy=proxy, verify=False) # 获取商家基本信息数据 base_info_data = json.loads( base_info_response.text).get("data") item['shop_info'] = shop_data item['city'] = 'xiamenshi' item['base_info'] = base_info_data item['detail'] = response_detail_data kafka = Kafka() kafka.process_item(item) # 判断下一页的参数是否存在,如果存在继续访问下一页,如果不存在,切换点,data值不变 print json.loads(response_list.text)['data']['poi_has_next_page'] if json.loads(response_list.text)['data']['poi_has_next_page']: page_index += 1 get_shop_detail(redis_helper, page_index, param, location_point, count, flag) else: # 下一页的参数为false,没有翻页数据,换点 print('没有翻页参数换点') location_point = redis_helper.spop_data('lgt_lat_page') location_point = eval(location_point) page_index = location_point[2].encode('utf-8') # 改变点的状态 print('改变点的状态') MysqlHelper.update_location_state(wm_longitude_before, wm_latitude_before, page_index) get_shop_detail(redis_helper, page_index, param, eval(location_point), count, flag) # 如果是升序,判断列表是不是小于20,如果小于20,判断距离是不是大于4公里,如果大于四公里,切换token值 if len(response_list_datas) < 20: length = len(response_list_datas) count += length print count list = sort_helper.handle_sort_distace_2(shop_distances_list) for distance in list[0]: print distance if distance > 5000: print('该点的店铺数量小于20,升序,第一个点大于5000,经纬度的点,记录经纬度的翻页数量') MysqlHelper.update_location_state_page( wm_longitude_before, wm_latitude_before, page_index) print('记录token的请求数量') MysqlHelper.update_params_count(count, shop_list_url) count = 0 param = get_new_token(redis_helper) if param != None: get_shop_detail(redis_helper, page_index, eval(param), location_point, count, flag) else: keys = 'lgt_lat_page' values = (wm_latitude_before, wm_latitude_before, page_index) redis_helper.lpush_data(keys, values) return else: for data in response_list_datas: shop_name = data['name'] shop_id = data['id'] print shop_name, shop_id data['wm_poi_id'] = str(shop_id) data['wmpoiid'] = str(shop_id) # 发起详情页的请求 detail_response = request_url_response(shop_detail_url, method="POST", headers=headers, data=data, verify=False) # 提取详情页的产品 response_detail_data = json.loads( detail_response.text)['data'] # 发起商家基本信息请求 base_info_response = request_url_response( shop_base_info_url, method="POST", headers=headers, data=data, proxy=proxy, verify=False) # 获取商家基本信息数据 base_info_data = json.loads( base_info_response.text)['data'] item['shop_info'] = data item['city'] = 'xiamenshi' item['base_info'] = base_info_data item['detail'] = response_detail_data # 设计一张表 id 自增 json: string creat_time update_time is_detial # 设计表两张表 一张店铺表 一张店铺中数据表 # id 店铺的名称 店铺的ID 经纬度 create_time update_time is_detial # 店铺详情表 ID shop_id (关联店铺的ID) 店铺的基本信息 create_time update_time is_detial kafka = Kafka() kafka.process_item(item) else: # 记录token的请求个数 # print len(shop_distances_list) # if json.loads(response_list.text)['data']['poi_has_next_page']: print('该点不是升序,改变点的状态') MysqlHelper.update_location_state_page(wm_longitude_before, wm_latitude_before, page_index) print('该点不是升序排序,切换token') for shop_data in response_list_datas: shop_name = shop_data['name'] shop_id = shop_data['id'] print shop_name, shop_id data['wm_poi_id'] = str(shop_id) data['wmpoiid'] = str(shop_id) # 发起详情页的请求 detail_response = request_url_response(shop_detail_url, method="POST", headers=headers, data=data, verify=False) # 提取详情页的产品 response_detail_data = json.loads(detail_response.text).get('data') # 发起商家基本信息请求 base_info_response = request_url_response(shop_base_info_url, method="POST", headers=headers, data=data, verify=False) # 获取商家基本信息数据 base_info_data = json.loads(base_info_response.text).get("data") item['shop_info'] = shop_data item['city'] = 'xiamenshi' item['base_info'] = base_info_data item['detail'] = response_detail_data kafka = Kafka() kafka.process_item(item) print('切换token的值') MysqlHelper.update_params_count(count, shop_list_url) param = get_new_token(redis_helper) if param != None: count = 0 get_shop_detail(redis_helper, page_index, eval(param), location_point, count, flag) else: keys = 'lgt_lat_page' values = (wm_latitude_before, wm_latitude_before, page_index) redis_helper.lpush_data(keys, values) return
#!/usr/bin/env python # coding:utf-8 import logging import os from push_token_to_redis import CityTokenToRedis from mysql_helper import MysqlHelper from redis_helper import RedisHelper from kafka_helper import Kafka import time import requests import json import sort_helper import proxy_helper import proxy_helper_copy path = os.getcwd() MysqlHelper = MysqlHelper() count = 0 def request_url_response(url, method="GET", headers=None, params=None, proxy=None, data=None, verify=False): try: if method == "GET": response = requests.get(url, headers=headers, params=params,
def __init__(self): self.mysql = MysqlHelper()
def insert_txt(file_name, customer_tel, employee_id, date, text): dbutil = MysqlHelper() sql_key = "insert ignore into audio2text (`file_name`, `customer_tel`, `employee_id`, `date`, `text`) value " \ "(%s, %s, %s, %s, %s)" dbutil.inset(sql_key, (file_name, customer_tel, employee_id, date, text))