Пример #1
0
 def _handle_request(self):
     page = int(self.get_argument('page', "1")) - 1
     step = 3
     startIndex = page * step
     dbHelper = MysqlHelper()
     mysql = "select * from msapp_need limit " + str(
         startIndex) + "," + str(step)
     print(mysql)
     result = dbHelper.get_all(mysql)
     print(result)
     result = json_success("success need get list")
     self.write(result)
     self.finish()
Пример #2
0
    def _handle_request(self):

        self.logs("_handle_request")
        # str = "good man"
        dbHelper = MysqlHelper()

        # step1: 获取参数值
        good = self.get_good()

        # step2: 参数不合法,返回错误
        if not good:
            self.logs("response_error_para error!")
            result = json_error(Code.ERROR_PARA, Code.ERROR_PARA_DESC)
            self.write(result)
            self.finish()
            return

        # step3: 储存用户到数据库
        result = self.insert_good(dbHelper, good)

        if not result:
            result = json_error(Code.ERROR_GODD_INSERT, Code.ERROR_PARA_DESC)
            self.write(result)
            self.finish()
            return

        # step4: 写到redis agl
        good_id = self.get_good_id(dbHelper, good)
        good.id = good_id

        publish_good(good, False)

        result = json_success("success")
        self.write(result)
        self.finish()
Пример #3
0
    def _handle_request(self):

        self.logs("_handle_request")
        # str = "good man"
        dbHelper = MysqlHelper()

        # step1: 获取参数值
        need_info = self.get_need_info()

        # step2: 参数不合法,返回错误
        if not need_info:
            self.logs("response_error_para error!")
            result = json_error(Code.ERROR_PARA, Code.ERROR_PARA_DESC)
            self.write(result)
            self.finish()
            return

        # step3: 储存用户到数据库
        result = self.insert_need(dbHelper, need_info)

        if not result:
            result = json_error(Code.ERROR_GODD_INSERT, Code.ERROR_PARA_DESC)
            self.write(result)
            self.finish()
            return

        result = json_success("success")
        self.write(result)
        self.finish()
Пример #4
0
    def start(self):
        # 把派发作业队列和完成作业队列注册到网络上
        BaseManager.register('get_dispatched_task_queue',
                             callable=self.get_dispatched_task_queue)
        BaseManager.register('get_finished_task_queue',
                             callable=self.get_finished_task_queue)

        task = MysqlHelper.excuteFindOne(
            "select * from tb_task where Fid={}".format(self.taskId))
        Fip = task["FserverIp"]
        Fport = task["Fport"]
        Fauthkey = task["Fauthkey"]
        # 监听端口和启动服务,Fauthkey为验证码,自己随便取的,slave连接获取任务时用于验证身份,FserverIp为主机的ip,Fport连接端口号,一般默认是8888端口
        manager = BaseManager(address=(Fip, Fport), authkey=Fauthkey)
        manager.start()

        # 使用上面注册的方法获取队列
        dispatched_tasks = manager.get_dispatched_task_queue()  #获取派发队列
        finished_tasks = manager.get_finished_task_queue()  #获取返回队列

        while True:
            try:
                task_2 = MysqlHelper.excuteFindOne(
                    "select * from tb_task where Fid={}".format(self.taskId))
                num = int(task_2["Fnum"])
                url_list = self.url_list
                for index, url in enumerate(url_list):
                    print url
                    num += 1
                    dispatched_spider = self.spider
                    dispatched_spider.pageUrl = url
                    dispatched_tasks.put(dispatched_spider)
                    print "派发任务: " + str(index + 1)
                    MysqlHelper.excuteUpdate(self.table_name, {
                        "Fnum": str(num + 1),
                        "Fstate": 1
                    }, "Fid={}".format(self.taskId))
                    time.sleep(1)
                print "------------已完成一轮任务派发----------"
                #完成一轮任务派发就将状态改为休眠中
                MysqlHelper.excuteUpdate(self.table_name, {"Fstate": 2},
                                         "Fid={}".format(self.taskId))
                #返回队列执行结果
                while not dispatched_tasks.empty():
                    #返回一个爬取的数量结果
                    result_spider = finished_tasks.get()
                    print "任务返回结果:"
                #暂停时间继续进行下一轮的任务派发
                # time.sleep(int(task_2["FtimeInterval"]))
                manager.shutdown()
            except Exception as ex:
                MysqlHelper.excuteUpdate(self.table_name, {"Fstate": 0},
                                         "Fid={}".format(self.taskId))
                continue
Пример #5
0
def show_tables(args):
    m = MysqlHelper(args.host, args.user, args.psw, args.database, args.port)
    res = m.fetchall("show tables")
    for o in res:
        tab = list(o.values())[0]
        print("-- %s --" % tab)
        res = m.fetchall("show create table %s" % tab)
        create_sql = res[0]['Create Table']
        print(create_sql)
        arr = create_sql.split("\n")

        for i, a in enumerate(arr):
            if i == 0:
                pass
            elif i == len(arr) - 1:
                pass
            else:
                q = '''ALTER TABLE `%s` ADD %s;''' % (tab, a.split(",")[0])
                print(q)
Пример #6
0
class AnjukeHousePipeline(object):
    def __init__(self):
        self.mysql = MysqlHelper()

    def insert(self, total_price, avg_price, title, house_type, building_area,
               floor, building_time, community, city, area, address, advantage,
               salesman, url, url_md5):
        sql = """
            insert into anjuke_secondhand_house (total_price,total_price_unit,avg_price,avg_price_unit,title,
            house_type,building_area,building_area_unit,floor,building_time,community,
            city,area,address,advantage,salesman,url,url_md5)
            values (%s,'%s',%s,'%s','%s','%s',%s,'%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')
        """ % (total_price, '万', avg_price, '元/m²', title, house_type,
               building_area, 'm²', floor, building_time, community, city,
               area, address, advantage, salesman, url, url_md5)
        self.mysql.execute(sql)

    def query_md5(self):
        sql = "select url_md5 from anjuke_secondhand_house"
        return self.mysql.query(sql)
Пример #7
0
    def _handle_request(self):

        self.logs("_handle_request")
        # str = "author man"
        dbHelper = MysqlHelper()

        # step1: 获取参数值
        author = self.get_author_info()
        author.point = 0
        author.status = 1

        # step2: 参数不合法,返回错误
        if not author:
            self.logs("response_error_para error!")
            result = json_error(Code.ERROR_PARA, Code.ERROR_PARA_DESC)
            self.write(result)
            self.finish()
            return

        # step3: 储存用户到数据库,直接为审核通过状态
        result = self.insert_author(dbHelper, author)

        if not result:
            result = json_error(Code.ERROR_GODD_INSERT, Code.ERROR_PARA_DESC)
            self.write(result)
            self.finish()
            return

        # step4: 写到redis
        author.id = self.get_db_author(dbHelper, author.openid)

        publish_author(author)

        data = {
            'id': author.id,
            'nickname': author.nickname,
            'point': author.point,
            'status': author.status,
            'town': author.town,
            'address': author.address,
            'phone': author.phone,
            'time': getTime()
        }

        result = json_success_data("success", data)
        self.write(result)
        self.finish()
Пример #8
0
def save_to_mysql():
    my_sql_helper = MysqlHelper(user='******',
                                passwd='225669',
                                host='192.168.1.229',
                                db='xcar')  # 创建数据库工具对象

    max_id = my_sql_helper.search(
        'select id from cars order by id desc limit 1')  # 查找 表cars中最大的id
    car_id = max_id[0][0] + 1 if max_id and max_id[0] else 0  # car的主键值

    my_sql_helper.connect()  # 连接数据库
    with open('result.txt', 'r') as f:
        for line in f:  # 迭代读取文件,减小内存开销。
            dic = json.loads(line)
            sql_car = 'insert into cars(id,name,brand,level,star) values({},"{}","{}","{}",{})'. \
                format(car_id, dic['name'], dic['brand'], dic['level'], dic['star'])
            my_sql_helper.insert(sql_car)  # cars中插入数据

            for model in dic['models']:
                sql_models = 'insert into models(model, gearbox, motor, power, guiding_price, seater, oil_consumption, car_id) ' 'values("{}","{}","{}",{},{},{},{},{})'.format(
                    model['model'], model['gearbox'], model['motor'],
                    model['power'], model['guiding_price'], model['seater'],
                    model['oil_consumption'], car_id)
                my_sql_helper.insert(sql_models)  # models 中插入数据

            car_id += 1

    my_sql_helper.close()  # 关闭数据库连接
Пример #9
0
    # 增加预ping,做悲观连接检测,失效则收回连接
    # 增加连接池回收时间,原来为-1,
    # 预设连接池为QueuePool,也可以只通过设定poolsize的方式指定
    # 连接池的最大并发 = pool_size + max_overflow
    # 单连接查询60s超时丢弃
    return records.Database(DB_URL,
                            poolclass=QueuePool,
                            pool_size=30,
                            max_overflow=300,
                            pool_timeout=30,
                            pool_recycle=1200)


try:
    root_db = sql_connection_by_records(rootdb_config)
    root_object = MysqlHelper(root_db)
except Exception as e:
    root_object = None
    server_logger.error(e)
"""
sql = "select * from nvwa"
rows = root_object.query(sql)
print rows[0]
>>> 第一行 <Record {"user": "******", "name": "Hehe"}>
for r in rows:
    print r.user, r.name
    print r[0], r[1]
    print r['user'], r['name']
# to df
user_df = rows.export('df')
# 也可以直接输出到文件
Пример #10
0
def get_shop_detail(redis_helper, page_index, param, location_point, count,
                    flag):
    """
    获取列表页到详情页的数据
    :param page:
    :return:
    """
    wm_longitude_before = location_point[0].encode("utf-8")
    wm_latitude_before = location_point[1].encode('utf-8')
    wm_longitude = wm_longitude_before.replace('.', '')
    wm_latitude = wm_latitude_before.replace('.', '')
    print wm_longitude_before, wm_latitude_before, wm_longitude, wm_latitude
    page_index = int(page_index)
    print('当前页数为:%s' % page_index)
    # 列表页的url
    shop_list_url = param[0].encode('utf-8')
    # 请求头
    headers = json.loads(param[1])
    # 请求体
    data = json.loads(param[2])
    data['page_index'] = str(page_index)
    data['wm_longitude'] = str(wm_longitude)
    data["wm_latitude"] = str(wm_latitude)
    data['sort_type'] = '5'
    # data['wm_longitude'] = "121396406"
    # data["wm_latitude"] = "31169214"
    # 详情页的url
    shop_detail_url = shop_list_url.replace('v2/poi/channelpage',
                                            'v1/poi/food')
    # 商家基本信息页面url:
    shop_base_info_url = shop_list_url.replace('v2/poi/channelpage',
                                               'v1/poi/info')
    # 列表页的响应数据
    try:
        item = {}
        # proxy = proxy_helper.judge_ip()
        proxy = proxy_helper_copy.get_ip()
        print proxy
        response_list = ""
        for tmp in range(3):
            response_list = request_url_response(shop_list_url,
                                                 method="POST",
                                                 headers=headers,
                                                 data=data,
                                                 proxy=proxy,
                                                 verify=False)
            print response_list
            if not response_list:
                proxy = proxy_helper_copy.get_ip()
                continue
            else:
                break
        if not response_list:
            # 请求失败怎么处理,返回False
            print("请求失败记录token的爬取数量")
            MysqlHelper.update_params_count(count, shop_list_url)
            print("切换token值")
            param = get_new_token(redis_helper)
            if param != None:
                count = 0
                get_shop_detail(redis_helper, page_index, param,
                                location_point, count, flag)
            else:
                keys = 'lgt_lat_page'
                values = (wm_latitude_before, wm_latitude_before, page_index)
                redis_helper.lpush_data(keys, values)
                return
        elif response_list.status_code == 200:
            json_data_list = json.loads(response_list.text)
            if not json_data_list.get('data'):
                # 记录token的访问数量
                MysqlHelper.update_params_count(count, shop_list_url)
                count = 0
                param = get_new_token(redis_helper)
                if param != None:
                    get_shop_detail(redis_helper, page_index, eval(param),
                                    location_point, count, flag)
                else:
                    print('记录当前点的翻页数量')
                    MysqlHelper.update_location_state_page(
                        wm_longitude_before, wm_latitude_before, page_index)
                    print('token is null')
                    keys = 'lgt_lat_page'
                    values = (wm_latitude_before, wm_latitude_before,
                              page_index)
                    redis_helper.lpush_data(keys, values)

                    return
            # 列表页的店铺数据
            else:
                response_list_datas = json.loads(
                    response_list.text)['data']['poilist']
                # 判断列表页是不是为空列表,如果为空列表改变点的状态。
                if response_list_datas == []:
                    print('list is None')
                    print('该点的店铺数量为0,改变的状态')
                    MysqlHelper.update_location_state(wm_longitude_before,
                                                      wm_latitude_before,
                                                      page_index)
                    print('获取新的经纬度点')
                    location_point = redis_helper.spop_data('lgt_lat_page')
                    location_point = eval(location_point)
                    page_index = location_point[2]
                    get_shop_detail(redis_helper, page_index, param,
                                    location_point, count, flag)
                else:
                    get_detail_content(
                        item, response_list, response_list_datas, data, param,
                        shop_base_info_url, shop_detail_url, headers,
                        location_point, count, shop_list_url, page_index, flag,
                        wm_longitude_before, wm_latitude_before, proxy)
    except Exception as e:
        print e
Пример #11
0
def get_detail_content(item, response_list, response_list_datas, data, param,
                       shop_base_info_url, shop_detail_url, headers,
                       location_point, count, shop_list_url, page_index, flag,
                       wm_longitude_before, wm_latitude_before, proxy):
    """
    处理详情页的数据
    :param response_list_datas:
    :return:
    """
    shop_distances_list = []
    for list_data in response_list_datas:
        # 获取距离
        distance = list_data['distance']
        name = list_data['name']
        print distance, name
        shop_distances_list.append(distance)
    if sort_helper.handle_sort_distace(shop_distances_list):
        print len(shop_distances_list)
        if len(shop_distances_list) == 20:
            count += len(shop_distances_list)
            print count
            for shop_data in response_list_datas:
                shop_name = shop_data['name']
                shop_id = shop_data['id']
                print shop_name, shop_id
                data['wm_poi_id'] = str(shop_id)
                data['wmpoiid'] = str(shop_id)
                # 发起详情页的请求
                detail_response = request_url_response(shop_detail_url,
                                                       method="POST",
                                                       headers=headers,
                                                       data=data,
                                                       proxy=proxy,
                                                       verify=False)
                # 提取详情页的产品
                response_detail_data = json.loads(
                    detail_response.text).get('data')
                # 发起商家基本信息请求
                time.sleep(3)
                base_info_response = request_url_response(shop_base_info_url,
                                                          method="POST",
                                                          headers=headers,
                                                          data=data,
                                                          proxy=proxy,
                                                          verify=False)
                # 获取商家基本信息数据
                base_info_data = json.loads(
                    base_info_response.text).get("data")
                item['shop_info'] = shop_data
                item['city'] = 'xiamenshi'
                item['base_info'] = base_info_data
                item['detail'] = response_detail_data
                kafka = Kafka()
                kafka.process_item(item)
            # 判断下一页的参数是否存在,如果存在继续访问下一页,如果不存在,切换点,data值不变
            print json.loads(response_list.text)['data']['poi_has_next_page']
            if json.loads(response_list.text)['data']['poi_has_next_page']:
                page_index += 1
                get_shop_detail(redis_helper, page_index, param,
                                location_point, count, flag)
            else:
                # 下一页的参数为false,没有翻页数据,换点
                print('没有翻页参数换点')
                location_point = redis_helper.spop_data('lgt_lat_page')
                location_point = eval(location_point)
                page_index = location_point[2].encode('utf-8')
                # 改变点的状态
                print('改变点的状态')
                MysqlHelper.update_location_state(wm_longitude_before,
                                                  wm_latitude_before,
                                                  page_index)
                get_shop_detail(redis_helper, page_index, param,
                                eval(location_point), count, flag)
        # 如果是升序,判断列表是不是小于20,如果小于20,判断距离是不是大于4公里,如果大于四公里,切换token值
        if len(response_list_datas) < 20:
            length = len(response_list_datas)
            count += length
            print count
            list = sort_helper.handle_sort_distace_2(shop_distances_list)
            for distance in list[0]:
                print distance
                if distance > 5000:
                    print('该点的店铺数量小于20,升序,第一个点大于5000,经纬度的点,记录经纬度的翻页数量')
                    MysqlHelper.update_location_state_page(
                        wm_longitude_before, wm_latitude_before, page_index)
                    print('记录token的请求数量')
                    MysqlHelper.update_params_count(count, shop_list_url)
                    count = 0
                    param = get_new_token(redis_helper)
                    if param != None:
                        get_shop_detail(redis_helper, page_index, eval(param),
                                        location_point, count, flag)
                    else:
                        keys = 'lgt_lat_page'
                        values = (wm_latitude_before, wm_latitude_before,
                                  page_index)
                        redis_helper.lpush_data(keys, values)
                        return
                else:
                    for data in response_list_datas:
                        shop_name = data['name']
                        shop_id = data['id']
                        print shop_name, shop_id
                        data['wm_poi_id'] = str(shop_id)
                        data['wmpoiid'] = str(shop_id)
                        # 发起详情页的请求
                        detail_response = request_url_response(shop_detail_url,
                                                               method="POST",
                                                               headers=headers,
                                                               data=data,
                                                               verify=False)
                        # 提取详情页的产品
                        response_detail_data = json.loads(
                            detail_response.text)['data']
                        # 发起商家基本信息请求
                        base_info_response = request_url_response(
                            shop_base_info_url,
                            method="POST",
                            headers=headers,
                            data=data,
                            proxy=proxy,
                            verify=False)
                        # 获取商家基本信息数据
                        base_info_data = json.loads(
                            base_info_response.text)['data']
                        item['shop_info'] = data
                        item['city'] = 'xiamenshi'
                        item['base_info'] = base_info_data
                        item['detail'] = response_detail_data
                        # 设计一张表 id 自增 json: string creat_time update_time  is_detial
                        # 设计表两张表 一张店铺表  一张店铺中数据表
                        # id 店铺的名称 店铺的ID 经纬度 create_time update_time  is_detial
                        # 店铺详情表 ID shop_id (关联店铺的ID) 店铺的基本信息 create_time update_time  is_detial
                        kafka = Kafka()
                        kafka.process_item(item)
    else:
        # 记录token的请求个数
        # print len(shop_distances_list)
        # if json.loads(response_list.text)['data']['poi_has_next_page']:
        print('该点不是升序,改变点的状态')
        MysqlHelper.update_location_state_page(wm_longitude_before,
                                               wm_latitude_before, page_index)
        print('该点不是升序排序,切换token')
        for shop_data in response_list_datas:
            shop_name = shop_data['name']
            shop_id = shop_data['id']
            print shop_name, shop_id
            data['wm_poi_id'] = str(shop_id)
            data['wmpoiid'] = str(shop_id)
            # 发起详情页的请求
            detail_response = request_url_response(shop_detail_url,
                                                   method="POST",
                                                   headers=headers,
                                                   data=data,
                                                   verify=False)
            # 提取详情页的产品
            response_detail_data = json.loads(detail_response.text).get('data')
            # 发起商家基本信息请求
            base_info_response = request_url_response(shop_base_info_url,
                                                      method="POST",
                                                      headers=headers,
                                                      data=data,
                                                      verify=False)
            # 获取商家基本信息数据
            base_info_data = json.loads(base_info_response.text).get("data")
            item['shop_info'] = shop_data
            item['city'] = 'xiamenshi'
            item['base_info'] = base_info_data
            item['detail'] = response_detail_data
            kafka = Kafka()
            kafka.process_item(item)
        print('切换token的值')
        MysqlHelper.update_params_count(count, shop_list_url)
        param = get_new_token(redis_helper)
        if param != None:
            count = 0
            get_shop_detail(redis_helper, page_index, eval(param),
                            location_point, count, flag)
        else:
            keys = 'lgt_lat_page'
            values = (wm_latitude_before, wm_latitude_before, page_index)
            redis_helper.lpush_data(keys, values)
            return
Пример #12
0
#!/usr/bin/env python
# coding:utf-8
import logging
import os
from push_token_to_redis import CityTokenToRedis
from mysql_helper import MysqlHelper
from redis_helper import RedisHelper
from kafka_helper import Kafka
import time
import requests
import json
import sort_helper
import proxy_helper
import proxy_helper_copy
path = os.getcwd()
MysqlHelper = MysqlHelper()
count = 0


def request_url_response(url,
                         method="GET",
                         headers=None,
                         params=None,
                         proxy=None,
                         data=None,
                         verify=False):
    try:
        if method == "GET":
            response = requests.get(url,
                                    headers=headers,
                                    params=params,
Пример #13
0
 def __init__(self):
     self.mysql = MysqlHelper()
Пример #14
0
def insert_txt(file_name, customer_tel, employee_id, date, text):
    dbutil = MysqlHelper()
    sql_key = "insert ignore into audio2text (`file_name`, `customer_tel`, `employee_id`, `date`, `text`) value " \
              "(%s, %s, %s, %s, %s)"
    dbutil.inset(sql_key, (file_name, customer_tel, employee_id, date, text))