Пример #1
0
def get_db_city():
    """
    获取数据库中城市中文名
    :return:
    """
    # 获取 mysql 连接
    coon = POOL.connection()
    cur = coon.cursor()
    sql_fang = """select a.area_code, a.area from area_city_ke a """

    sql_ershou = """select a.area_code, a.area from ershou_area_ke a """

    sql_city = """select a.city_code, a.city from provice_city_ke a """

    try:
        cur.execute(sql_fang)
        result_fang = cur.fetchall()
        cur.execute(sql_ershou)
        result_ershou = cur.fetchall()
        cur.execute(sql_city)
        result_city = cur.fetchall()

        result = {
            'result_fang': dict(result_fang),
            'result_ershou': dict(result_ershou),
            'result_city': dict(result_city)
        }

    except Exception as e:
        logger.error(repr(e))
        result = dict()
    finally:
        cur.close()
        coon.close()
    return result
Пример #2
0
def get_area_city():
    """
    获取数据库中楼盘在每个城市的区域
    :return:
    """
    # 获取 mysql 连接
    coon = POOL.connection()
    cur = coon.cursor()
    sql_fang = """
        select a.area,a.area_code, b.city, b.city_code, b.provice from area_city_ke a 
        join provice_city_ke b on a.city_id=b.id
    """
    sql_ershou = """
        select a.area,a.area_code, b.city, b.city_code, b.provice from ershou_area_ke a 
        join provice_city_ke b on a.city_id=b.id
    """
    try:
        cur.execute(sql_fang)
        result_fang = cur.fetchall()
        cur.execute(sql_ershou)
        result_ershou = cur.fetchall()

        result = {
            'result_fang': result_fang,
            'result_ershou': result_ershou,
        }

        for r in result:
            result_city = {i[3]: i[2] for i in result[r]}
            result_city = [{'city_name': result_city[i], 'city_code': i, 'area': []} for i in result_city]
            result_ = [{'name': i, 'city': []} for i in set([i[4] for i in result[r]])]

            for i in result[r]:
                for j in range(len(result_city)):
                    if i[3] in result_city[j]['city_code']:
                        result_city[j]['area'].append({'area_name': i[0], 'area_code': i[1]})
                        result_city[j]['provice'] = i[4]
            for i in result_city:
                for j in range(len(result_)):
                    if i['provice'] == result_[j]['name']:
                        result_[j]['city'].append(i)
            result[r] = result_

    except Exception as e:
        logger.error(repr(e))
        result = dict()
    finally:
        cur.close()
        coon.close()
    return result
Пример #3
0
def get_xiaou_detail(xiaoqu_name):
    # 获取 mysql 连接
    coon = POOL.connection()
    cur = coon.cursor()
    sql = """select * from xiaoqu_detail_ke where xiaoqu_title=%s"""
    try:
        cur.execute(sql, xiaoqu_name)
        result = cur.fetchone()
    except Exception as e:
        logger.error(repr(e))
        result = None
    finally:
        cur.close()
        coon.close()
    return result
Пример #4
0
def get_ch_city(city):
    # 获取 mysql 连接
    coon = POOL.connection()
    cur = coon.cursor()
    sql = """select city from provice_city_ke where city_code=%s"""
    try:
        cur.execute(sql, city)
        result = cur.fetchone()
    except Exception as e:
        logger.error(repr(e))
        result = None
    finally:
        cur.close()
        coon.close()
    return result
Пример #5
0
def get_ch_area(area, houses_type):
    # 获取 mysql 连接
    coon = POOL.connection()
    cur = coon.cursor()
    sql_fang = """select a.area from area_city_ke a where a.area_code=%s"""
    sql_ershou = """select a.area from ershou_area_ke a where a.area_code=%s"""
    sql_dict = {
        'loupan': sql_fang,
        'ershou': sql_ershou,
        'xiaoqu': sql_ershou,
    }
    try:
        cur.execute(sql_dict[houses_type], area)
        result = cur.fetchone()
    except Exception as e:
        logger.error(repr(e))
        result = None
    finally:
        cur.close()
        coon.close()
    return result
Пример #6
0
def insert_crawl_task(house_type, city, area, crawl_time):
    """
    获取数据库中抓取任务
    :return:
    """
    # 获取 mysql 连接
    coon = POOL.connection()
    cur = coon.cursor()
    sql = """insert into crawl_task(house_type, city, area, crawl_time) VALUES (%s, %s, %s, %s)"""
    try:
        cur.execute(sql, (house_type, city, area, crawl_time))
        coon.commit()
        result = True
    except Exception as e:
        logger.error(repr(e))
        coon.rollback()
        result = False
    finally:
        cur.close()
        coon.close()
    return result
Пример #7
0
def get_crawl_task(house_type, city, area):
    """
    获取数据库中抓取任务
    :return:
    """
    # 获取 mysql 连接
    coon = POOL.connection()
    cur = coon.cursor()
    sql = """
        select * from crawl_task where house_type=%s and city=%s and area=%s and crawl_time is not null 
        and to_days(createtime)=to_days(now())
    """
    try:
        cur.execute(sql, (house_type, city, area))
        result = cur.fetchall()
    except Exception as e:
        logger.error(repr(e))
        result = None
    finally:
        cur.close()
        coon.close()
    return result
Пример #8
0
def get_info(house_type, city, area):
    """
    获取数据库中抓取任务
    :return:
    """
    # 获取 mysql 连接
    coon = POOL.connection()
    cur = coon.cursor()
    sql_xiaoqu = """
    select a.id, b.city, ack.area, xiao.xiaoqu_name, xiao.houseinfo, xiao.positioninfo, xiao.taglist, xiao.price, xiao.on_sale
        from houses_city_ke a
        left join provice_city_ke b on a.city_code=b.city_code
        left join ershou_area_ke ack on a.area_code = ack.area_code
        join xiaoqu_info_ke xiao on xiao.houses_id = a.id
        where a.city_code=%s and a.area_code=%s
    """
    sql_ershou = """
    select a.id, b.city, ack.area, ershou.ershou_name, ershou.positioninfo, ershou.houseinfo, ershou.followinfo, ershou.tag, ershou.priceinfo
        from houses_city_ke a
        left join provice_city_ke b on a.city_code=b.city_code
        left join ershou_area_ke ack on a.area_code = ack.area_code
        join ershou_info_ke ershou on ershou.houses_id = a.id
        where a.city_code=%s and a.area_code=%s
    """
    sql_loupan = """
    select a.id, b.city, ack.area, houses.houses_title, houses.houses_type, houses.houses_status, houses.houses_location,
            houses.houses_room, houses.houses_tag, houses.houses_price
        from houses_city_ke a
        left join provice_city_ke b on a.city_code=b.city_code
        left join area_city_ke ack on a.area_code = ack.area_code
        join houses_info_ke houses on houses.houses_id = a.id
        where a.city_code=%s and a.area_code=%s
    """
    sql_dict = {
        'loupan': sql_loupan,
        'ershou': sql_ershou,
        'xiaoqu': sql_xiaoqu,
    }

    try:
        cur.execute(sql_dict[house_type], (city, area))
        result = cur.fetchall()
    except Exception as e:
        logger.error(repr(e))
        result = None
    finally:
        cur.close()
        coon.close()

    # 对楼盘的数据进行清理
    if house_type == 'loupan':
        """
        楼盘类型: 1住宅,2别墅,3商业,4写字楼,5底商
        楼盘状态: 1在售,2下期待开,3未开盘
        """
        loupan_type = {'1': '住宅', '2': '别墅', '3': '商业', '4': '写字楼', '5': '底商'}
        loupan_status = {'1': '在售', '2': '下期待开', '3': '未开盘'}
        result = [list(i) for i in result if i]
        for i in range(len(result)):
            for j in range(len(result[i])):
                if j == 4 and str(result[i][j]) in loupan_type:
                    result[i][j] = loupan_type[str(result[i][j])]
                elif j == 5 and str(result[i][j]) in loupan_type:
                    result[i][j] = loupan_status[str(result[i][j])]

    return result
def get_info_spider(url, area, city, houses_type):
    """
    获取小区具体信息的spider
    :param url:
    :param area:
    :param city:
    :return:
    """
    headers = create_headers()
    try:
        BaseSpider.random_delay()
        response = requests.get(url, timeout=10, headers=headers)
    except Exception as e:
        logger.error("Have a Error {}".format(repr(e)))
        return

    html = response.text
    soup = BeautifulSoup(html, "lxml")
    try:
        xiaoqu_title = soup.find('h1', class_='main')['title']
        xiaoquinfo = soup.find_all('div', class_='xiaoquInfoItem')
        building_type = xiaoquinfo[0].text
        property_expenses = xiaoquinfo[1].text
        property_company = xiaoquinfo[2].text
        developer = xiaoquinfo[3].text
        total_number_of_buildings = xiaoquinfo[4].text
        total_number_of_houses = xiaoquinfo[5].text
        nearby_stores = xiaoquinfo[6].text
    except Exception as e:
        logger.error(repr(e))
        return
    if xiaoqu_title:  # 获取到小区名字后接着获取小区其他信息
        xiaoqu_title = xiaoqu_title
        building_type = str(building_type).strip().replace("\n", ",")
        property_expenses = str(property_expenses).strip().replace(
            "\n\n", ",").replace(" ", "")
        property_company = str(property_company).strip().replace("\n", ",")
        developer = str(developer).strip().replace("\n", ",")
        total_number_of_buildings = str(
            total_number_of_buildings).strip().replace("\n", ",")
        total_number_of_houses = str(total_number_of_houses).strip().replace(
            "\n", ",")
        nearby_stores = str(nearby_stores).strip().replace("\n\n", ",")
        # 获取经纬度
        city = get_ch_city(city)
        area = get_ch_area(area, houses_type)
        if city and area:
            address = city[0] + area[0] + xiaoqu_title
            lat, lng = getlnglat(address, city[0])

            # 获取 mysql 连接
            coon = POOL.connection()
            cur = coon.cursor()
            sql = """select * from xiaoqu_detail_ke where xiaoqu_title=%s"""
            cur.execute(sql, xiaoqu_title)
            result = cur.fetchone()
            if result:
                xiaoqu_id = result[0]
            else:
                try:
                    cur.execute(
                        """
                        insert into xiaoqu_detail_ke(
                        xiaoqu_title,
                        building_type,
                        property_expenses,
                        property_company,
                        developer,
                        total_number_of_buildings,
                        total_number_of_houses,
                        nearby_stores,
                        lat,
                        lng) 
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                        """, [
                            xiaoqu_title,
                            ('', building_type)[bool(building_type)],
                            ('', property_expenses)[bool(property_expenses)],
                            ('', property_company)[bool(property_company)],
                            ('', developer)[bool(developer)],
                            ('', total_number_of_buildings
                             )[bool(total_number_of_buildings)],
                            ('', total_number_of_houses
                             )[bool(total_number_of_houses)],
                            ('', nearby_stores)[bool(nearby_stores)],
                            ('', lat)[bool(lat)],
                            ('', lng)[bool(lng)],
                        ])
                    coon.commit()
                except Exception as e:
                    logger.error(repr(e))
                    coon.rollback()
                    cur.close()
                    coon.close()
                    return
                else:
                    cur.execute(sql, xiaoqu_title)
                    result = cur.fetchone()
                    xiaoqu_id = result[0]
            cur.close()
            coon.close()
            return xiaoqu_id