Пример #1
0
    def _store_item(self, item):
        """store item to database

            Args:
                item: Item
        """
        try:
            clone_dict = convert_hotel_info_item_2_dict(item)
            encodestr = json.dumps(clone_dict, ensure_ascii=False)
            selectsql = "SELECT * FROM rthotel_ctrip_hotel " \
                        "WHERE url=%(url)s LIMIT 1"
            if len(
                    self.item_db.execute_query(
                        selectsql, {
                            'url': build_hotel_url(item.hotel_code),
                        })) >= 1:
                updatesql = "UPDATE rthotel_ctrip_hotel \
                            SET city_code=%(city_code)s, hotel_id=%(hotel_id)s, \
                            url=%(url)s, info=%(info)s, \
                            add_time=%(add_time)s  \
                            WHERE url=%(url)s"

                self.item_db.execute_update(
                    updatesql, {
                        'city_code': item.city_code,
                        'hotel_id': item.hotel_code,
                        'url': build_hotel_url(item.hotel_code),
                        'info': encodestr,
                        'add_time': datetime.datetime.now()
                    })

            else:
                insertsql = "INSERT INTO rthotel_ctrip_hotel \
                (city_code, hotel_id, url, info, add_time) \
                VALUES(%(city_code)s, %(hotel_id)s, %(url)s, %(info)s, \
                %(add_time)s)"

                self.item_db.execute_update(
                    insertsql, {
                        'city_code': item.city_code,
                        'hotel_id': item.hotel_code,
                        'url': build_hotel_url(item.hotel_code),
                        'info': encodestr,
                        'add_time': datetime.datetime.now()
                    })
        except Exception, e:
            self.logger.warn("sql error:%s" % e)
            raise e
Пример #2
0
    def _store_item(self, item):
        """store item to database

            Args:
                item: Item
        """
        try:
            clone_dict = convert_hotel_info_item_2_dict(item)
            encodestr = json.dumps(clone_dict, ensure_ascii=False)
            selectsql = "SELECT * FROM rthotel_ctrip_hotel " \
                        "WHERE url=%(url)s LIMIT 1"
            if len(self.item_db.execute_query(
                    selectsql, {'url': build_hotel_url(item.hotel_code), })) >= 1:
                updatesql = "UPDATE rthotel_ctrip_hotel \
                            SET city_code=%(city_code)s, hotel_id=%(hotel_id)s, \
                            url=%(url)s, info=%(info)s, \
                            add_time=%(add_time)s  \
                            WHERE url=%(url)s"

                self.item_db.execute_update(updatesql,
                                            {'city_code': item.city_code,
                                             'hotel_id': item.hotel_code,
                                             'url': build_hotel_url(item
                                             .hotel_code),
                                             'info': encodestr,
                                             'add_time': datetime.datetime
                                             .now()})

            else:
                insertsql = "INSERT INTO rthotel_ctrip_hotel \
                (city_code, hotel_id, url, info, add_time) \
                VALUES(%(city_code)s, %(hotel_id)s, %(url)s, %(info)s, \
                %(add_time)s)"

                self.item_db.execute_update(insertsql,
                                            {'city_code': item.city_code,
                                             'hotel_id': item.hotel_code,
                                             'url': build_hotel_url(item.
                                             hotel_code),
                                             'info': encodestr,
                                             'add_time': datetime.datetime.
                                             now()})
        except Exception, e:
            self.logger.warn("sql error:%s" % e)
            raise e
Пример #3
0
    def parse(self, task, input_file):
        """parse response result

            Args:
                task: FileTask or HttpTask
                input_file: file or StringIO
            Yields:
                item: Item, result of parse
                task: Task, new task
        """
        self.logger.info("hotel parser begin to parse")
        try:
            try:
                soap_tree = etree.fromstring(input_file.read())
            except Exception, e:
                self.logger.error("not complete xml:%s" % e)
                raise ParserError("not complete xml")

            soap_elems = xpath_namespace(
                soap_tree, "/soap:Envelope/soap:Body/"
                "RequestResponse/RequestResult")
            xml_str = soap_elems[0].text
            tree = etree.fromstring(xml_str)
            elems = tree.xpath("/Response/Header")
            header = elems[0]
            if "ResultCode" not in header.attrib or \
                            header.attrib['ResultCode'] != "Success":
                self.logger.error("not has resultcode or "
                                  "resultcode is not success")
                raise ParserError("ResultCode error")
            else:
                # success
                property_elems = xpath_namespace(
                    tree, "/Response/HotelResponse/OTA_HotelSearchRS/"
                    "Properties/Property")
                city_code = task.kwargs.get('citycode')
                chinese_name = task.kwargs.get('chinesename')

                hotel_requests = list()
                hotel_addresses = dict()
                for property_elem in property_elems:
                    hotel_code = str(property_elem.attrib['HotelCode']) \
                        if "HotelCode" in property_elem.attrib \
                        else None
                    hotel_ctrip_city_code = str(
                        property_elem.attrib['HotelCityCode']) \
                        if "HotelCityCode" in property_elem.attrib else None
                    hotel_address = flist(
                        property_elem.xpath(
                            "*[local-name()='Address']/"
                            "*[local-name()='AddressLine']/text()"))
                    if isinstance(hotel_address, unicode):
                        hotel_address = hotel_address.encode("utf-8")
                    hotel_address = str(hotel_address)

                    if hotel_code and hotel_ctrip_city_code:
                        hotel_url = build_hotel_url(hotel_code)
                        yield HotelCodeItem(hotel_code, city_code, hotel_url)

                        hotel_requests.append(hotel_code)
                        hotel_addresses[hotel_code] = hotel_address
                        if len(hotel_requests) >= self.batch_count:
                            yield build_rooms_task_for_hotel(
                                hotel_requests, city_code, chinese_name,
                                hotel_addresses)
                            hotel_addresses.clear()
                            del hotel_requests[:]

                # send left requests
                if len(hotel_requests) > 0:
                    yield build_rooms_task_for_hotel(hotel_requests, city_code,
                                                     chinese_name,
                                                     hotel_addresses)
                    hotel_addresses.clear()
                    del hotel_requests[:]
Пример #4
0
    def parse(self, task, input_file):
        """parse response result

            Args:
                task: FileTask or HttpTask
                input_file: file or StringIO
            Yields:
                item: Item, result of parse
                task: Task, new task
        """
        self.logger.info("hotel parser begin to parse")
        try:
            try:
                soap_tree = etree.fromstring(input_file.read())
            except Exception, e:
                self.logger.error("not complete xml:%s" % e)
                raise ParserError("not complete xml")

            soap_elems = xpath_namespace(soap_tree,
                                         "/soap:Envelope/soap:Body/"
                                         "RequestResponse/RequestResult")
            xml_str = soap_elems[0].text
            tree = etree.fromstring(xml_str)
            elems = tree.xpath("/Response/Header")
            header = elems[0]
            if "ResultCode" not in header.attrib or \
                            header.attrib['ResultCode'] != "Success":
                self.logger.error("not has resultcode or "
                                  "resultcode is not success")
                raise ParserError("ResultCode error")
            else:
                # success
                property_elems = xpath_namespace(
                    tree, "/Response/HotelResponse/OTA_HotelSearchRS/"
                          "Properties/Property")
                city_code = task.kwargs.get('citycode')
                chinese_name = task.kwargs.get('chinesename')

                hotel_requests = list()
                hotel_addresses = dict()
                for property_elem in property_elems:
                    hotel_code = str(property_elem.attrib['HotelCode']) \
                        if "HotelCode" in property_elem.attrib \
                        else None
                    hotel_ctrip_city_code = str(
                        property_elem.attrib['HotelCityCode']) \
                        if "HotelCityCode" in property_elem.attrib else None
                    hotel_address = flist(property_elem.xpath(
                        "*[local-name()='Address']/"
                        "*[local-name()='AddressLine']/text()"))
                    if isinstance(hotel_address, unicode):
                        hotel_address = hotel_address.encode("utf-8")
                    hotel_address = str(hotel_address)

                    if hotel_code and hotel_ctrip_city_code:
                        hotel_url = build_hotel_url(hotel_code)
                        yield HotelCodeItem(hotel_code, city_code, hotel_url)

                        hotel_requests.append(hotel_code)
                        hotel_addresses[hotel_code] = hotel_address
                        if len(hotel_requests) >= self.batch_count:
                            yield build_rooms_task_for_hotel(hotel_requests,
                                                             city_code,
                                                             chinese_name,
                                                             hotel_addresses)
                            hotel_addresses.clear()
                            del hotel_requests[:]

                # send left requests
                if len(hotel_requests) > 0:
                    yield build_rooms_task_for_hotel(hotel_requests, city_code,
                                                     chinese_name,
                                                     hotel_addresses)
                    hotel_addresses.clear()
                    del hotel_requests[:]