예제 #1
0
async def add_compare_prices(compare, uid, hotel_prices, start_time, end_time):
    """
    """
    db = databases("scripture")
    coll = f"statics.{compare}.prices"
    for hotel in uid.split(";"):
        quoter_id, hotel_id = hotel.split('::')
        hotel = f"{hotel_id}::{quoter_id}"
        hotel_data = await db[coll].find_one({"cms_id": hotel})
        if not hotel_data:
            hotel_data = {}
        compare_prices = {
            price["checkin"]: price
            for price in hotel_data.get("min_price", []) or hotel_data.get('prices', [])
            if price and isinstance(price, dict) and start_time
            <= datetime.strptime(price["checkin"], "%Y-%m-%d")
            <= end_time
        }
        for checkin in hotel_prices.keys():
            if checkin not in compare_prices:
                hotel_prices[checkin][compare] = ''
                continue
            hotel_prices[checkin][compare] = compare_prices.get(checkin, {}).get('price', '')
            hotel_prices[checkin][f"{compare}_room"] = compare_prices.get(checkin, {}).get('room_type', '') or compare_prices.get(checkin, {}).get('room_type_cn', '')
    return hotel_prices
예제 #2
0
async def _booking(request):
    valid = await validate_request(request)
    if valid["errmsg"]:
        return rest_result(request, {"status": 400, "errmsg": valid["errmsg"]})
    cms_ids = valid["hotels"]
    db = databases("hub")
    start_time = valid["start_time"]
    end_time = valid["end_time"]
    days = valid["days"]
    # 避免传入的开始时间早于当前日期导致无效查询
    # TODO: 抽象成单独校验日期的方法
    for index, hid in enumerate(cms_ids):
        booking_url = await db["poi_items"].find_one(
            {
                "_id": ObjectId(hid),
                "crawl_info.crawl_website": "bk_url"
            },
            {"crawl_info.$": "1"},
        )
        if not booking_url:
            continue
        await get_booking_prices(booking_url["crawl_info"][0]["crawl_url"],
                                 hid, start_time, days)
        calendar_one.delay(hid, start_time, end_time)
        if index % 10 == 0:
            _check_prices.apply_async(
                kwargs={
                    "base_url": booking_url["crawl_info"][0]["crawl_url"],
                    "cms_id": hid,
                    "start_time": start_time,
                    "days": days,
                },
                countdown=settings.CHECK_PRICE_DELAY_TIME,
            )
    return rest_result(request, {"status": 200, "data": "ok"})
예제 #3
0
async def crawl_booking(cid, url):
    logger = logging.getLogger(__name__)
    scripture = databases("scripture")
    statics_data = await scripture["capture_urls"].find_one(
        {"_id": ObjectId(cid)})
    quoter = statics_data.get("quoter")
    hid = statics_data.get("hid")
    payload = paras_booking_payload(url)
    if quoter and hid:
        where_clouser = {"quoter": quoter, "hid": hid}
        bookings_id = f"{quoter}::{hid}"
    else:
        where_clouser = {"capture_urls_id": cid}
        bookings_id = cid
        payload["capture_urls_id"] = cid
    res = await scripture.bookings.update_one(where_clouser, {"$set": payload},
                                              upsert=True)
    if res:
        await scripture.capture_urls.update_one(
            {"_id": statics_data["_id"]},
            {
                "$set": {
                    "_hotels_cn_id": statics_data.get("hotels_cn_id"),
                    "hotels_cn_id": "",
                    "bookings_id": bookings_id,
                    "jset_id": "",
                    "_jset_id": statics_data.get("jset_id"),
                }
            },
        )
        logger.info(f'cid:{cid},url:{url},update statics data success')
        return {"status": 200, "data": payload}
    else:
        logger.error(f'cid:{cid},url:{url},update statics data faild')
        return {"status": 500, "errmsg": f"update statics data faild!"}
예제 #4
0
async def get_city_name(city_id):
    hub = databases("hub")
    name = await hub["meta_cities"].find_one({"_id": city_id},
                                             {"name_en": "1"})
    if name:
        return name["name_en"]
    else:
        return ""
async def refresh_datas(request):
    logger = logging.getLogger(__name__)
    db = databases("scripture")
    is_refreshing = await db["hotel.online.check"].find_one(
        {"__t": "flag"}, {"refreshing": "1"})
    if not is_refreshing or not is_refreshing.get("refreshing"):
        hotel_online_check.delay()
    logger.info(f'数据更新')
    return html("""</a>数据更新中,请稍后刷新页面下载excel""")
예제 #6
0
async def orders(request, email):
    """List orders
    """
    SORT = {  # pylint: disable=C0103
        '-1': -1,
        '1': 1,
        'DESC': -1,
        'ASC': 1,
        'desc': -1,
        'asc': 1,
        -1: -1,
        1: 1
    }

    scripture = databases('scripture')

    query = {'email': email}

    u = await scripture.g_users.find_one({'email': email})  # noqa pylint: disable=C0103
    if not u:
        return json(
            {
                'error':
                'User is not authenticated, please authenticate first',
                'status': 403
            },
            status=403)
    if not u.get('authenticated'):
        return json({
            'error': 'User authenticate is expired!',
            'status': 401
        },
                    status=401)

    sort = SORT.get(request.args.get('sort', 'DESC').upper())
    limit = int(request.args.get('limit', 20))
    page_number = int(request.args.get('page_number', 0))

    filters = request.args.get('filters')

    if filters:
        query['type'] = {'$in': filters}

    cursor = scripture.g_orders \
        .find(query) \
        .sort('created_at', sort) \
        .skip(limit * page_number) \
        .limit(limit)

    return json(
        jsonutil.dumps([order async for order in cursor],
                       ensure_ascii=False,
                       default=str))
예제 #7
0
async def _hotels_details(request):
    db = request.args.get("db")
    _id = request.args.get("_id")
    scripture = databases("scripture")
    if db == "hotels":
        hotels_info = await scripture[db].find_one({"_id": ObjectId(_id)})
    elif db == "bookings":
        hotels_info = await scripture[db].find_one({"_id": ObjectId(_id)})
    else:
        hotels_info = {"status": 400, "errmsg": "params invalid"}
    logger.info(f'{request}_id :{_id },response_dict{hotels_info}')
    return rest_result(request, hotels_info)
예제 #8
0
async def start_compair(websites, compair):
    rds = databases(settings.REDIS)
    to_redis = json.dumps({
        'spider_name': 'compair',
        'websites': websites,
        'compair': compair
    })
    try:
        await rds.lpush('distributed_spider', to_redis)
        return True
    except Exception as exc:
        logger.warning(f"redis 异常", exc_info=exc)
        return False
예제 #9
0
async def hotel(hotel_ids):
    hotel_mapping = {}
    for item in hotel_ids:
        provider, _id = item.split(":")
        hotel_mapping.setdefault(provider, []).append(ObjectId(_id))
    docs = []
    for provider, ids in hotel_mapping.items():
        async for hotel in (databases("agent").get_collection(provider).find(
            {"_id": {
                "$in": ids
            }})):
            docs.append(HotelDoc(hotel, provider))
    return await solr_add(docs, "hotels")
예제 #10
0
async def search(request):
    logger = logging.getLogger(__name__)
    p = request.form.get('partial', '').lower()
    scripture = databases('scripture')
    cursor = scripture.statics.hotels.hotelbeds.find(
        {"$text": {
            "$search": f'"{p}"'
        }})
    hotels = [formatted(hotel) async for hotel in cursor]
    style_classes = 'table table-bordered table-hover'
    table = json2html.convert(
        hotels, table_attributes=f'id="info-table" class="{style_classes}"')
    logger.info(f'partial:{p} response sucess')
    return html(HTML.format(p, table))
async def start_crawl_hcom(request):
    logger = logging.getLogger(__name__)
    db = databases("scripture")
    wb = xlwt.Workbook(encoding="utf-8")
    st = wb.add_sheet("sheet1")
    st.write(0, 0, "酒店网址")
    st.write(0, 1, "酒店中文名称")
    st.write(0, 2, "酒店英文名称")
    st.write(0, 3, "酒店中文地址")
    st.write(0, 4, "酒店英文地址")
    row = 1

    base_day = datetime.now().strftime("%Y-%m-%d")
    base_day = datetime.strptime(base_day, "%Y-%m-%d")
    is_refreshing = await db["hotel.online.check"].find_one(
        {"__t": "flag"}, {"refreshing": "1"})

    hotels = []
    async for hotel in db["hotel.online.check"].find(
        {"updated_at": {
            "$gte": base_day
        }}):
        hotels.append(hotel)
    if not hotels:
        await db["hotel.online.check"].update_one(
            {"__t": "flag"}, {"$set": {
                "refreshing": True
            }})
        hotel_online_check.delay()
        logger.info(f'base_day:{base_day},数据更新')
        return html("""</a>数据更新中,请稍后刷新页面下载excel""")
    for online in hotels:
        st.write(row, 0,
                 f"https://flashtrip.cn/hotels/{online.get('_id', '')}")
        st.write(row, 1, online.get("name", ""))
        st.write(row, 2, online.get("name_en", ""))
        st.write(row, 3, online.get('address', ""))
        st.write(row, 4, online.get('en', {}).get('address', ''))
        row += 1
    excel = BytesIO()
    wb.save(excel)
    excel.seek(0)
    logger.info(f'下载酒店信息成功,filename={base_day}上线状态酒店.xls')
    return raw(
        excel.getvalue(),
        headers={
            "Content-Disposition": f"attachment;filename={base_day}上线状态酒店.xls"
        },
        content_type="application/vnd.ms-excel",
    )
예제 #12
0
async def save_user_record(cms_id, stage, price, checkin, room_type, checkout,
                           meal_type, is_package, user_id, source,
                           cancel_policy, user_ip, source_type, voucher,
                           deal_check_code):
    db = databases("scripture")
    payload = {
        "cms_id": cms_id,
        "stage": stage,
        "checkin": checkin,
        "checkout": checkout,
        "is_package": is_package,
        "user_id": user_id,
        "query_time": datetime.now(),
        'source': source,
        'source_type': source_type,
        'user_ip': user_ip,
        'created_at': datetime.now(),
        'weego_price': price,
        'meal_type': meal_type,
        'room_type': room_type,
        'cancel_policy': cancel_policy,
    }
    if stage == "availability":
        if price:
            payload['weego_price'] = float(price[0]['price'])
            payload['meal_type'] = price[0].get('meal_type', '')
            payload['cancel_policy'] = price[0].get('cancel_policy', '')
            payload['room_type'] = price[0].get('room_type', '')
            payload['weego_availability'] = price
        else:
            payload['weego_price'] = '当日无报价'
            payload['meal_type'] = ''
            payload['cancel_policy'] = ''
            payload['weego_availability'] = []
            payload['room_type'] = ''
    elif stage == "booking":
        payload['deal_check_code'] = deal_check_code
        payload['voucher'] = voucher
    else:
        # preparation or cancellation
        logger.debug(f"stage: {stage}")
    # 目前除下订和取消外,传入的都是单价,在此处更新为总价
    if isinstance(payload['weego_price'],
                  float) and stage not in ['booking', 'cancellation']:
        book_day = (datetime.strptime(payload['checkout'], '%Y-%m-%d') -
                    datetime.strptime(payload['checkin'], '%Y-%m-%d')).days
        payload['weego_price'] *= book_day
    res = await db["compair"].insert_one(payload)
    return res._InsertOneResult__inserted_id, payload
예제 #13
0
async def supplier_data(supplier, hotel_id):
    logger = logging.getLogger(__name__)
    coll = settings.SUPPLIER_NAME_2_COLL[settings.SUPPLIER_ID_2_NAME[supplier]]
    if coll == 'wg_hotel':
        db = databases('whotel')
    else:
        db = databases("scripture")
    query = settings.SUPPLIER_QUERY[settings.SUPPLIER_ID_2_NAME[supplier]]
    query["code"] = "1"
    condition = [{'code': hotel_id}, {'hotel_id': str(hotel_id)}]
    try:
        int_hotel_id = int(hotel_id)
        condition.append({"code": int_hotel_id})
        condition.append({"hotel_id": int_hotel_id})
    except Exception :
        pass
    s_data = await db[coll].find_one(
       {'$or': condition} , query
    )
    if not s_data:
        logger.info(f'supplier:{supplier},hotel_id:{hotel_id} can not query s_data')
        return {}
    logger.info(f'supplier:{supplier},hotel_id:{hotel_id} query s_data success')
    return formatter_statics_data(settings.SUPPLIER_ID_2_NAME[supplier], s_data)
async def update() -> None:
    logger = logging.getLogger(f'{__name__}.update')
    db = databases("hub")
    query = {
        "__t": "Hotel",
        "edit_status": {
            "$in": ["edited", "audited"]
        },
        "publish_status": "online"
    }
    count = await db.poi_items.count_documents(query)
    progress = 0

    async for doc in db.poi_items.find(query, {'facilities': 1}):
        logger.info('progress: {:.2%}'.format(progress / count))
        progress += 1
        facilities = doc.get('facilities')
        if not facilities:
            logger.warning(f'No facilities: {doc["_id"]}')
            continue
        try:
            facilities = ensure_json(facilities)
        except ValueError as exc:
            logger.error(f'{exc}\n _id: {doc["_id"]}')
            continue
        except Exception as exc:
            logger.critical(f'Unknown_error, _id: {doc["_id"]}', exc_info=exc)
            continue
        try:
            data = await fetch_links(facilities)
        except AssertionError as exc:
            logger.error(f'Status unexpected: {exc}, _id: {doc["_id"]}')
            continue
        except Exception as exc:
            logger.critical(f'Unknown_error, _id: {doc["_id"]}', exc_info=exc)
            continue
        try:
            updated_facilities = ensure_bson(data, {"_id": ObjectId})
        except Exception as exc:
            logger.critical(f'Unknown_error, _id: {doc["_id"]}', exc_info=exc)
            continue
        future = asyncio.ensure_future(
            db.poi_items.update_one(
                {'_id': doc['_id']},
                {'$set': {
                    'facilities': updated_facilities
                }}))
        future.add_done_callback(partial(callback, oid=doc["_id"]))
예제 #15
0
async def start_ta_spider(city, country, allow_num, lost_city, filter_or_not):
    rds = databases(settings.REDIS)
    to_redis = json.dumps({
        'city': city,
        'country': country,
        'allow_num': allow_num,
        'lost_city': lost_city,
        'filter_or_not': filter_or_not,
        'spider_name': 'tripadvisor'
    })
    try:
        await rds.lpush('distributed_spider', to_redis)
        return True
    except Exception as exc:
        # logger.warning(f"redis 异常", exc_info=exc)
        return False
async def get_supplier():
    hub = databases('hub')
    hotels = []
    city_map = {}
    country_map = {}
    async for hotel in hub['poi_items'].find(
        {
            "__t": "Hotel",
            "edit_status": {
                "$in": ["edited", "audited"]
            },
            "publish_status": "online"
        }, {
            'name': '1',
            'address': '1',
            'name_en': '1',
            'quote_ids': '1',
            'city': '1'
        }):
        if hotel['city'] not in city_map:
            city_name = await hub['meta_cities'].find_one(
                {'_id': hotel['city']}, {
                    'name': '1',
                    'country': '1'
                })
            if not city_name:
                city_map[hotel['city']] = '已删除'
                country_map[hotel['city']] = '已删除'
            else:
                city_map[hotel['city']] = city_name['name']
                country_name = await hub['meta_countries'].find_one(
                    {"_id": city_name['country']}, {'name': '1'})
                country_map[hotel['city']] = country_name['name']
        line = {
            0: str(hotel['_id']),
            1: hotel['name'],
            2: hotel['name_en'],
            3: hotel['address'],
            4: city_map[hotel['city']],
            14: country_map[hotel['city']]
        }
        for quote in hotel['quote_ids']:
            line[settings.SUPPLIER_ID_2_INDEX[str(quote['quoter'])]] = str(
                quote['hotel_id'])
        hotels.append(line)
    return hotels
예제 #17
0
async def refresh_access_token(unused_request, email):
    """Refresh access token"""

    scripture = databases('scripture')

    u = await scripture.g_users.find_one({'email': email})  # noqa pylint: disable=C0103

    g = GMailClient(  # pylint: disable=C0103
        client_id=settings.GOOGLE_OAUTH_CLIENT_ID,
        client_secret=settings.GOOGLE_OAUTH_CLIENT_SECRET,
        access_token=u['access_token'],
        request_params=request_params)

    try:
        _, data = await g.refresh_access_token(u['refresh_token'])

        unused_user, u_info = await g.user_info()

        data.update(u_info)
        data['authenticated'] = True

        await scripture.g_users.update_one({'email': email}, {
            '$set': data,
            '$currentDate': {
                'updated_at': True,
                'token_refreshed_at': True
            }
        })
    except Exception as e:  # pylint: disable=W0703,C0103
        logger.exception(e)
        await scripture.g_users.update_one({'email': email}, {
            '$set': {
                'authenticated': False
            },
            '$currentDate': {
                'updated_at': True,
                'token_refreshed_at': True
            }
        })
        return json({'text': False}, status=401)

    return json(data)
예제 #18
0
async def hotel_filter(extra_condition, selected):
    hub = databases("hub")
    condition = {
        "__t": "Hotel",
        "edit_status": {
            "$in": ["edited", "audited"]
        },
        "publish_status": "online",
    }
    condition.update(extra_condition)
    select_hotel = set()
    if selected:
        condition["_id"] = {"$in": [ObjectId(_id) for _id in selected]}
    async for hotel in hub["poi_items"].find(condition, {"_id": "1"}):
        select_hotel.add(str(hotel["_id"]))
    if selected and select_hotel:
        hotels = selected and select_hotel
    else:
        hotels = selected or select_hotel
    return hotels
예제 #19
0
async def refresh_price_calendar(request):
    logger = logging.getLogger(__name__)
    body = request.json
    if not body:
        logger.warning("request has not body")
        return rest_result(request, {
            "status": 400,
            "errmsg": "body is empty!"
        })
    hotel_id = body.get("hotel_id", "")
    if not hotel_id:
        logger.warning("hotel_id cannot be None")
        return rest_result(request, {
            "status": 400,
            "errmsg": "hotel_id cannot be None"
        })
    db = databases("scripture")
    status = await db["statics.hotels.prices"].find_one(
        {"hotel_id": hotel_id}, {"selecting", "updated_at"})
    if (status and status.get("selecting")
            and status.get("updated_at",
                           datetime.now() + timedelta(days=-1)) <
            datetime.now() + timedelta(days=-1)):
        logger.info(f"hotel_id:{hotel_id},Refreshing")
        return rest_result(request, {"status": 200, "data": "Refreshing..."})
    if body.get("bug_price", False):
        if body.get("days"):
            bug_price_one.delay(hotel_id, int(body["days"]))
        else:
            bug_price_one.delay(hotel_id)
        logger.info(f"bug price publish")
    else:
        calendar_one.delay(hotel_id)
    await db["statics.hotels.prices"].update_one({"hotel_id": hotel_id},
                                                 {"$set": {
                                                     "selecting": True
                                                 }})
    logger.info(f"hotel_id:{hotel_id}, publish task succeed")
    return rest_result(request, {"status": 200, "data": "succeed"})
예제 #20
0
async def record_quotes(request):
    request.headers['accept'] = 'application/json'
    hub = databases('hub')
    body = request.json
    if not body:
        return rest_result(request, {
            'status': 400,
            'errmsg': 'body must be non-empty!'
        })
    body['query_time'] = datetime.utcfromtimestamp(int(body['time_stamp']))
    cms_id = body.get('cms_hotel_id')
    if not cms_id:
        logger.error(f"invalid request without cms_id\n{body}")
        return rest_result(request, {
            'status': 400,
            'errmsg': 'cms_id: {cms_id} illegal.'
        })
    else:
        logger.info(f"save pull_down data with {body}")
    cms = await hub['poi_items'].find_one({"_id": ObjectId(cms_id)},
                                          {'city': '1'})
    if not cms.get('city'):
        logger.error(f"invalid cms id : {cms_id} without city")
        return
    if cms['city'] not in city_maps:
        city = await hub['meta_cities'].find_one({"_id": cms['city']},
                                                 {'name': '1'})
        if not city:
            logger.error(f"")
            body['city'] = 'Unknown'
        else:
            city_maps[cms['city']] = city['name']
    body['city'] = city_maps[cms['city']]

    body.pop('time_stamp', '')

    resp = await pulldown.create(**body)
    return rest_result(request, {'status': 200, 'data': resp.to_dict()})
예제 #21
0
async def skyscanner(request):
    valid = await validate_request(request)
    if valid["errmsg"] and not request.json.get("provider"):
        return rest_result(request, {"status": 400, "errmsg": valid["errmsg"]})
    start_time = valid["start_time"]
    days = valid["days"]
    for hotel in valid["hotels"]:
        get_skyscanner.delay(start_time, days, hotel_id=hotel)
    db = databases("scripture")
    for hotel_id, hotel_name in request.json.get("provider", {}).items():
        sid = await db["statics.hotels.skyscanner"].find_one(
            {"name": {
                "$regex": hotel_name.lower(),
                "$options": "i"
            }},
            {"sid": "1"},
        )
        if sid:
            get_skyscanner.delay(start_time,
                                 days,
                                 sid=sid["sid"],
                                 hotel_id=hotel_id)
    return rest_result(request, {"status": 200, "data": "ok"})
예제 #22
0
async def update_premium(data, fir_query):
    logger = logging.getLogger(__name__)
    db = databases(settings.REDIS)
    i = 0
    while True or i > 20:
        compare_msg = await db.get(fir_query)
        if not compare_msg:
            time.sleep(5)
            i += 1
            continue
        data.update(json.loads(compare_msg))
        await async_price_compare_check(data)
        # 将数据传给溢价系统,供动态溢价使用
        # async with aiohttp.ClientSession() as sess:
        #     async with sess.post(
        #         f"",
        #         headers={},
        #         json=data
        #     ) as res:
        #         ...
        break
    logger.info(f"{fir_query} send data to premium succeed!")
    return
예제 #23
0
async def get_booking_prices(url,
                             cms_id,
                             start_time,
                             days,
                             spider_name='booking_prices',
                             **kwargs):
    rds = databases(settings.REDIS)
    _url = URL(url)
    if _url.host not in ['www.booking.com', 'm.ctrip.com']:
        logger.info(f"invalid params : {url}")
        return False
    base_url = f"{_url.scheme}://{_url.host}{_url.path}"
    try:
        days = int(days)
        checkin = datetime.strptime(start_time, '%Y-%m-%d')
        checkin = checkin.strftime('%Y-%m-%d')
    except Exception as exc:
        logger.info(f"invalid params", exc_info=exc)
        return False
    if not 0 < days < 91:
        logger.info(f"invalid params : {days}")
        return False
    to_redis = json.dumps({
        'spider_name': spider_name,
        'base_url': base_url,
        'cms_id': cms_id,
        'start_time': checkin,
        'days': days,
        **kwargs,
    })
    try:
        await rds.lpush('distributed_spider', to_redis)
        return True
    except Exception as exc:
        logger.warning(f"redis 异常", exc_info=exc)
        return False
예제 #24
0
async def destination(destnation_ids, type_name="destinations"):
    type_mapping = {
        "destinations": {
            "type_name": "destination",
            "type_code": 8
        },
        "cities": {
            "type_name": "city",
            "type_code": 16
        },
        "provinces": {
            "type_name": "province",
            "type_code": 32
        },
        "countries": {
            "type_name": "country",
            "type_code": 64
        },
    }
    docs = []
    awaitable_cursor = (databases("agent").get_collection(
        "statics.{}".format(type_name)).find(
            {"_id": {
                "$in": [ObjectId(_id) for _id in destnation_ids]
            }},
            {
                "country_id": 1,
                "province_id": 1,
                "city_id": 1,
                "name_cn": 1,
                "name_en": 1,
                "name_alts": 1,
                "weight": 1,
            },
        ))
    async for destination in awaitable_cursor:
        doc = {
            "name_en": destination["name_en"],
            "name_cn": destination.get("name_cn", ""),
            "type": type_mapping[type_name]["type_name"],
            "type_code": type_mapping[type_name]["type_code"],
            "id": str(destination["_id"]),
            "weight": destination["weight"],
            "hotel_count": destination.get("hotel_count", 0),
        }

        if "name_alts" in destination:
            name_alts = destination["name_alts"].split(",")
        else:
            name_alts = []
            if doc["name_en"]:
                name_alts.append(doc["name_en"])
            if doc["name_cn"]:
                name_alts.append(doc["name_cn"])
        doc["name_alts"] = name_alts
        country_id = destination.get("country_id")
        province_id = destination.get("province_id")
        city_id = destination.get("city_id")
        if country_id:
            country = await get_country_by_id(country_id) or {
                "name_cn": "",
                "name_en": "",
            }
            # doc["country_id"] = str(country_id)
            doc["country_name_cn"] = country.get("name_cn", "")
            doc["country_name_en"] = country["name_en"]
        if province_id:
            province = await get_province_by_id(province_id) or {
                "name_cn": "",
                "name_en": "",
            }
            # doc["province_id"] = str(province_id)
            doc["province_name_cn"] = province.get("name_cn", "")
            doc["province_name_en"] = province["name_en"]
        if city_id:
            city = await get_city_by_id(city_id) or {
                "name_cn": "",
                "name_en": "",
            }
            # doc["city_id"] = str(city_id)
            doc["city_name_cn"] = city.get("name_cn", "")
            doc["city_name_en"] = city["name_en"]
        docs.append(doc)
    return await solr_add(docs, "destinations")
예제 #25
0
async def index(request, email):
    """Trigger parse gmail"""

    access_token = request.form.get('access_token')
    if not access_token:
        return json({
            'status': 400,
            'error': 'access_token must be provided!'
        },
                    status=400)
    refresh_token = request.form.get('refresh_token').strip('"').strip("'")
    if not refresh_token:
        return json({
            'status': 400,
            'error': 'refresh_token must be provided!'
        },
                    status=400)

    id_token = request.form.get('id_token')
    expires_in = int(request.form.get('expires_in', 3599))
    token_type = request.form.get('token_type')

    scripture = databases('scripture')

    scripture.g_users.update_one(
        {'email': email},
        {
            '$set': {
                'access_token': access_token,
                'refresh_token': refresh_token,
                # 'id_token': id_token,
                # 'expires_in': expires_in,
                # 'token_type': token_type,
                'email': email
            },
            '$currentDate': {
                'updated_at': True
            },
            '$setOnInsert': {
                'created_at': datetime.now()
            },
        },
        upsert=True)

    g = GMailClient(  # pylint: disable=C0103
        client_id=settings.GOOGLE_OAUTH_CLIENT_ID,
        client_secret=settings.GOOGLE_OAUTH_CLIENT_SECRET,
        access_token=access_token,
        request_params=request_params)

    _, data = await g.user_info()

    uri = f'users/{email}/messages'

    token = {
        'access_token': access_token,
        'refresh_token': refresh_token,
        'id_token': id_token,
        'expires_in': expires_in,
        'token_type': token_type
    }

    gmail.do_request.apply_async((uri, token),
                                 link=gmail.dispatcher.s(email=email,
                                                         uid=data['id'],
                                                         token=token))

    gmail.refresh_access_token.apply_async((email, ), countdown=expires_in)

    return json(data)
예제 #26
0
async def update_hotel(request):
    logger = logging.getLogger(__name__)
    try:
        body = request.json
    except exceptions.InvalidUsage:
        body = None
    if not body or not isinstance(body, dict):
        logger.warning(f"Invalid request body: {request.body}")
        raise exceptions.InvalidUsage({
            "status": 400,
            "errmsg": "Invalid request body"
        })

    oid = body.get("_id")
    if not oid:
        logger.warning('_id is required!')
        raise exceptions.InvalidUsage({
            "status": 400,
            "errmsg": "_id is required!"
        })
    db = databases("scripture")
    doc = await db.statics.hotels.relux.rooms.find_one({"_id": ObjectId(oid)},
                                                       {"rooms_cn": 1})
    if not doc:
        logger.warning(f'oid:{oid} Corresponding Hotel not found')
        raise exceptions.NotFound({
            "status": 404,
            "errmsg": "Hotel not found!"
        })
    ori_rooms_cn = doc.get("rooms_cn")
    if not ori_rooms_cn:
        logger.warning(f'oid:{oid} Corresponding Hotel rooms_cn not found')
        raise exceptions.NotFound({
            "status": 404,
            "errmsg": "Hotel rooms_cn not found!"
        })
    rooms_cn = body.get("rooms_cn")
    if not rooms_cn:
        logger.warning(f'oid:{oid} Corresponding rooms_cn is required')
        raise exceptions.InvalidUsage({
            "status": 400,
            "errmsg": "rooms_cn is required!"
        })

    futures = []
    for room, ori_room in zip(rooms_cn, ori_rooms_cn):
        room_id = room["id"]
        if room_id != ori_room["id"]:
            logger.error(f"The order of the rooms is out of order: "
                         f"room({room}), ori_doc({doc})")
            continue
        if room == ori_room:
            continue

        for plan, ori_plan in zip(room.get("plans", []),
                                  ori_room.get("plans", [])):
            if plan["id"] != ori_plan["id"]:
                logger.error(f"The order of the plans is out of order: "
                             f"plan({plan}), ori doc({doc})")
                continue
            if plan == ori_plan:
                continue
            if plan["name"] != ori_plan["name"]:
                plan["ori_name"] = ori_plan["name"]
            if plan["feature"] != ori_plan["feature"]:
                plan["ori_feature"] = ori_plan["feature"]
        future = asyncio.ensure_future(
            db.statics.hotels.relux.rooms.update_one(
                {
                    "_id": ObjectId(oid),
                    "rooms_cn.id": room_id,
                    "rooms_cn.plans": ori_room["plans"]
                }, {"$set": {
                    "rooms_cn.$.plans": room["plans"]
                }}))
        extra = {
            "oid": oid,
            "ori_plan": ori_room["plans"],
            "new_plan": room["plans"]
        }
        future.add_done_callback(partial(callback, extra=extra))
        futures.append(future)

    if not futures:
        logger.warning(
            f'ori_room["plans"]:{ori_room["plans"]},room["plans"]:{room["plans"]},'
            f' No difference or rooms order is wrong')
        raise exceptions.InvalidUsage({
            "status":
            400,
            "errmsg":
            "No difference or rooms order is wrong"
        })
    logger.info(f'oid:{oid} update_relux_plan success')
    return response.json({"status": 200, "data": {"count": len(futures)}})
예제 #27
0
async def extract_data(start_time, end_time, stages=[]):
    logger = logging.getLogger(__name__)
    db = databases("scripture")
    hub = databases("hub")
    condition = {"stage": {"$in": []}, "created_at": {}}
    for stage in stages:
        if stage in settings.STAGES:
            condition["stage"]["$in"].append(stage)
        else:
            logger.error(f"invalid stage: {stage}!")

    if isinstance(start_time, str):
        # 兼容scripture-views
        if "T" in start_time:
            start_time = datetime.strptime(start_time, "%Y-%m-%dT%H:%M")
        else:
            start_time = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
    condition["created_at"]["$gte"] = start_time
    if isinstance(end_time, str):
        if "T" in end_time:
            end_time = datetime.strptime(end_time, "%Y-%m-%dT%H:%M")
        else:
            end_time = datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S")
    # 服务器为UTC时间,比北京时间慢8小时,传入时间需-8
    condition["created_at"]["$lte"] = end_time
    result = {"hotel_count": {}, "city_count": {}, "user_count": {}}
    cms_msgs = {}
    async for data in db["compair"].find(condition):
        if data['cms_id'] not in cms_msgs:
            cms_msg = await hub["poi_items"].find_one(
                {"_id": ObjectId(data["cms_id"])},
                {
                    "name": "1",
                    "name_en": "1",
                    "city": "1",
                    "address": "1"
                },
            )
            cms_msgs[data['cms_id']] = cms_msg
        cms_msg = cms_msgs[data['cms_id']]
        if not cms_msg:
            logger.error(f"{data['cms_id']} not find!")
            continue
        if cms_msg["city"] not in city_map:
            city_msg = await hub["meta_cities"].find_one(
                {"_id": cms_msg["city"]}, {"name": "1"})
            if not city_msg:
                city_map[cms_msg["city"]] = "该城市已被删除"
            else:
                city_map[cms_msg["city"]] = city_msg["name"]

        city_name = city_map[cms_msg["city"]]
        city_count_key = f"{city_name}:{data['stage']}"
        if city_count_key not in result["city_count"]:
            result["city_count"][city_count_key] = {
                "count": 0,
                "stage": data["stage"],
            }
        result["city_count"][city_count_key]["count"] += 1

        data["city_name"] = city_name
        data["name_cn"] = cms_msg["name"]
        data["name_en"] = cms_msg["name_en"]
        data["address"] = cms_msg["address"]

        # 服务器为UTC时间,比北京时间慢8小时,展示时间需+8
        if isinstance(data["query_time"], str):
            if "." in data["query_time"]:
                data["query_time"] = str(
                    datetime.strftime(
                        datetime.strptime(data["query_time"],
                                          "%Y-%m-%d %H:%M:%S.%f"),
                        "%Y-%m-%d %H:%M:%S",
                    ))

            else:
                data["query_time"] = str(
                    datetime.strptime(data["query_time"], "%Y-%m-%d %H:%M:%S"))
        else:
            data["query_time"] = str(data["query_time"])

        del data['_id']
        del data['created_at']

        hotel_count_key = f"{data['cms_id']}:{data['stage']}"
        if hotel_count_key not in result["hotel_count"]:
            result["hotel_count"][hotel_count_key] = {
                "count": 0,
                "data": data,
                "stage": data["stage"],
            }
        result["hotel_count"][hotel_count_key]["count"] += 1

        # 数据量过大且暂时无需求,不再返回
        # result["info"].append(data)

        user_count_key = f"{data['user_id']}"
        if not user_count_key.strip():
            continue
        if user_count_key not in result['user_count']:
            result['user_count'][user_count_key] = {
                "count": 0,
                "availability": {
                    "count": 0,
                    "hotel": {}
                },
                "preparation": {
                    "count": 0,
                    "hotel": {}
                },
                "booking": {
                    "count": 0,
                    "hotel": {}
                },
                "cancellation": {
                    "count": 0,
                    "hotel": {}
                },
            }
        if data['cms_id'] not in result['user_count'][user_count_key][
                data['stage']]['hotel']:
            result['user_count'][user_count_key][data['stage']]['hotel'][
                data['cms_id']] = 0
        result['user_count'][user_count_key][data['stage']]['hotel'][
            data['cms_id']] += 1
        result['user_count'][user_count_key]['count'] += 1
        result['user_count'][user_count_key][data['stage']]["count"] += 1

    return result
예제 #28
0
async def list_website_crawled_by_id(request, crawled_id):
    """Get list of website where we crawled from by crawled id"""

    logger = logging.getLogger(__name__)
    scripture = databases("scripture")

    if len(crawled_id) == 24:
        crawled = await scripture.capture_urls.find_one(
            {"_id": ObjectId(crawled_id)})
    else:
        crawled = await scripture.capture_urls.find_one(
            {"hotel_id": crawled_id})
    if not crawled:
        logger.warning("Bad crawled id %s", crawled_id)
        return rest_result(request, {
            "status": 400,
            "err_msg": "Invalid capture_id"
        })
    filters = request.args.get("filters")
    if filters and isinstance(filters, str):
        kwargs = {
            "hotels_cn_id": crawled.get("hotels_cn_id"),
            "jset_id": crawled.get("jset_id"),
            "bookings_id": crawled.get("bookings_id"),
            "capture_id": crawled_id,
        }
        logging.debug(f"kwargs : {kwargs}")
        hotel = await request.app.loop.run_in_executor(
            None,
            lambda: hub_hotel.HubHotel(**kwargs).to_dict(columns=filters.split(
                ",")),
        )
        logging.info(f"crawled_id:{crawled_id} ,api hotel : {hotel}")
        keys = [f.strip() for f in filters.split(",")]
        if "all" in keys:
            logger.info(
                f'crawled_id:{crawled_id} crawled success and "all" in keys ')
            return rest_result(request, {"hotel": hotel, "status": 200})
        hotel = {key: filed_formatter(key, hotel.get(key)) for key in keys}
        logger.info(f'crawled_id:{crawled_id} crawled success ')
        return rest_result(request, {"hotel": hotel, "status": 200})
    urls = {}
    if crawled.get("hotels_cn_id") or crawled_id.get("_hotels_cn_id"):
        urls["hotels.cn"] = (
            "https://www.hotels.cn/"
            f'ho{crawled.get("hotels_cn_id") or crawled_id.get("_hotels_cn_id")}'
        )  # noqa
    if crawled.get("jset_id") or crawled_id.get("_jset_id"):
        jset = await scripture.jsets.find_one(
            {"jset_id": crawled.get("jset_id") or crawled_id.get("_jset_id")})
        if not jset:
            logger.warning("Jset not found %s", crawled.get("jset_id"))
        else:
            urls["jetsetter.com"] = jset["url"]
    if crawled.get("hotel_id") and not crawled.get("bookings_id"):
        urls["roomsxml id"] = crawled.get("hotel_id")

    if crawled.get("bookings_id") or crawled.get("_bookings_id"):
        urls["bookings"] = crawled.get("bk_url")

    if urls:
        logger.info(f'crawled_id:{crawled_id} , found url:{urls}')
        return rest_result(request, {"urls": urls, "status": 200})

    logger.info(f'crawled_id:{crawled_id} , not found url')
    return rest_result(request, {"err_msg": "Not Found", "status": 404})
예제 #29
0
async def crawl_hcom(capture_id, url):
    logger = logging.getLogger(__name__)
    scripture = databases("scripture")
    if len(capture_id) == 24:
        crawled = await scripture.capture_urls.find_one(
            {"_id": ObjectId(capture_id)})
    else:
        crawled = await scripture.capture_urls.find_one(
            {"hotel_id": capture_id})
    if not crawled:
        logger.warning("Bad crawled id %s", capture_id)
        return {"status": 400, "errmsg": "Invalid capture_id"}
    targets = ["www.hotels.cn", "www.hotels.com"]
    hid = re.match("\d+", url)
    if hid:
        hid = hid.group()
        cn_url = f"https://www.hotels.cn/ho{hid}"
        en_url = f"https://www.hotels.com/ho{hid}/?pos=HCOM_US&locale=en_US"
    else:
        _url = URL(url)
        if _url.host in targets:
            hid = _url.path.strip("/").split("/")[0][2:]
            cn_url = f"https://www.hotels.cn/ho{hid}"
            en_url = (
                f"https://www.hotels.com/ho{hid}/?pos=HCOM_US&locale=en_US")
        else:
            logger.warning("Bad url %s", url)
            return {"status": 400, "errmsg": "Invalid url"}
    cn_req = requests.get(cn_url)
    cn_req = cn_req.content.decode("utf-8")
    cn_et = etree.HTML(cn_req)
    title = get_log(cn_et,
                    field="title",
                    rule=hotels_xp.TITLE,
                    choice="take_first")
    if title == "好订网酒店预订 国际酒店预订 特价国外酒店预订 – 网上订酒店就到Hotels.cn":
        logger.warning("Bad hotel id %s", hid)
        return {"status": 400, "errmsg": "Invalid url"}
    payload = hcom_parse(hid, cn_url, cn_et)
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
    }
    en_req = requests.get(en_url, headers=headers)
    en_req = en_req.content.decode("utf-8")
    en_et = etree.HTML(en_req)
    en_payload = hcom_parse(hid, en_url, en_et)
    payload["url"] = cn_url
    payload["us_url"] = en_url
    payload["en"] = en_payload
    if payload["address_text"] and payload["name"]:
        res = await scripture.hotels.update_one({"hotels_id": hid},
                                                {"$set": payload},
                                                upsert=True)
        if res.modified_count:
            await scripture.capture_urls.update_one(
                {"_id": crawled["_id"]},
                {
                    "$set": {
                        "_hotels_cn_id": "",
                        "hotels_cn_id": hid,
                        "bookings_id": "",
                        "_bookings_id": crawled.get("bookings_id"),
                        "jset_id": "",
                        "_jset_id": crawled.get("jset_id"),
                    }
                },
            )
            logger.info(f'hotels_cn_id:{hid} upload success')
            return {"status": 200, "data": payload}
        else:
            logger.info(f'hotels_cn_id:{hid} upload fail')
            return {"status": 500, "errmsg": f"酒店静态数据更新失败!"}
    else:
        if not payload["name"]:
            logger.error(f"酒店名称抓取失败,url:{en_url},xpath:{hotels_xp.NAME}")
        if not payload["address_text"]:
            logger.error(f"酒店名称抓取失败,url:{en_url},xpath:{hotels_xp.ADDRESS}")
        return {"status": 500, "errmsg": f"酒店静态数据更新失败!"}
예제 #30
0
async def crawl_statics_data(request, crawled_id):
    logger = logging.getLogger(__name__)
    url = request.args.get("url")
    _url = URL(url)
    url = f"{_url.scheme}://{_url.host}{_url.path}"
    website = request.args.get("website")
    scripture = databases("scripture")
    if not website or website not in crawling:
        logger.warning("Bad website : not website or website not in crawling")
        return rest_result(request, {
            "status": 400,
            "err_msg": "Invalid website!"
        })

    if len(crawled_id) == 24:
        exists = await scripture.capture_urls.find_one(
            {"_id": ObjectId(crawled_id)})
    else:
        exists = await scripture.capture_urls.find_one(
            {"hotel_id": crawled_id})
    if not exists:
        logger.info(f"invalid crawled_id!: {crawled_id}")
        return rest_result(request, {
            "status": 400,
            "errmsg": f"invalid crawled_id!: {crawled_id}"
        })
    if website in exists and exists[website] == url:
        logger.info(f'{crawled_id} 此网站已经抓取')
        return rest_result(request, {"status": 200, "data": "此网站已经抓取"})
    if website == "bk_url":
        try:
            res = await crawl_booking(str(exists["_id"]), url)
        except Exception as exc:
            logging.warning("", exc_info=exc)
            res = {
                "status": 500,
                "errmsg": f"网站抓取失败,请联系刘博文同学([email protected])\n{exc}",
            }
    elif website == "hcom_id":
        try:
            res = await crawl_hcom(crawled_id, url)
        except Exception as exc:
            logging.warning("", exc_info=exc)
            res = {
                "status": 500,
                "errmsg": f"网站抓取失败,请联系刘博文同学([email protected])\n{exc}",
            }
    else:
        logger.warning(f'Invalid website:{website}')
        return rest_result(request, {
            "status": 400,
            "err_msg": "Invalid website!"
        })

    if res and res.get('status') == 200:
        scripture.capture_urls.update_one({"_id": exists["_id"]},
                                          {"$set": {
                                              website: url
                                          }})
        logger.info(f'url:{url},酒店静态数据抓取完成')
        return rest_result(request, {"status": 200, "data": "酒店静态数据抓取完成!"})
    else:
        logger.warning(f'{crawled_id}抓取失败.errmsg: {res["errmsg"]}')
        return rest_result(request, {
            "status": 500,
            "errmsg": f"{res['errmsg']}"
        })