def _store_item(self, item): """store item to database Args: item: Item """ try: clone_dict = convert_hotel_info_item_2_dict(item) encodestr = json.dumps(clone_dict, ensure_ascii=False) selectsql = "SELECT * FROM rthotel_ctrip_hotel " \ "WHERE url=%(url)s LIMIT 1" if len( self.item_db.execute_query( selectsql, { 'url': build_hotel_url(item.hotel_code), })) >= 1: updatesql = "UPDATE rthotel_ctrip_hotel \ SET city_code=%(city_code)s, hotel_id=%(hotel_id)s, \ url=%(url)s, info=%(info)s, \ add_time=%(add_time)s \ WHERE url=%(url)s" self.item_db.execute_update( updatesql, { 'city_code': item.city_code, 'hotel_id': item.hotel_code, 'url': build_hotel_url(item.hotel_code), 'info': encodestr, 'add_time': datetime.datetime.now() }) else: insertsql = "INSERT INTO rthotel_ctrip_hotel \ (city_code, hotel_id, url, info, add_time) \ VALUES(%(city_code)s, %(hotel_id)s, %(url)s, %(info)s, \ %(add_time)s)" self.item_db.execute_update( insertsql, { 'city_code': item.city_code, 'hotel_id': item.hotel_code, 'url': build_hotel_url(item.hotel_code), 'info': encodestr, 'add_time': datetime.datetime.now() }) except Exception, e: self.logger.warn("sql error:%s" % e) raise e
def _store_item(self, item): """store item to database Args: item: Item """ try: clone_dict = convert_hotel_info_item_2_dict(item) encodestr = json.dumps(clone_dict, ensure_ascii=False) selectsql = "SELECT * FROM rthotel_ctrip_hotel " \ "WHERE url=%(url)s LIMIT 1" if len(self.item_db.execute_query( selectsql, {'url': build_hotel_url(item.hotel_code), })) >= 1: updatesql = "UPDATE rthotel_ctrip_hotel \ SET city_code=%(city_code)s, hotel_id=%(hotel_id)s, \ url=%(url)s, info=%(info)s, \ add_time=%(add_time)s \ WHERE url=%(url)s" self.item_db.execute_update(updatesql, {'city_code': item.city_code, 'hotel_id': item.hotel_code, 'url': build_hotel_url(item .hotel_code), 'info': encodestr, 'add_time': datetime.datetime .now()}) else: insertsql = "INSERT INTO rthotel_ctrip_hotel \ (city_code, hotel_id, url, info, add_time) \ VALUES(%(city_code)s, %(hotel_id)s, %(url)s, %(info)s, \ %(add_time)s)" self.item_db.execute_update(insertsql, {'city_code': item.city_code, 'hotel_id': item.hotel_code, 'url': build_hotel_url(item. hotel_code), 'info': encodestr, 'add_time': datetime.datetime. now()}) except Exception, e: self.logger.warn("sql error:%s" % e) raise e
def parse(self, task, input_file): """parse response result Args: task: FileTask or HttpTask input_file: file or StringIO Yields: item: Item, result of parse task: Task, new task """ self.logger.info("hotel parser begin to parse") try: try: soap_tree = etree.fromstring(input_file.read()) except Exception, e: self.logger.error("not complete xml:%s" % e) raise ParserError("not complete xml") soap_elems = xpath_namespace( soap_tree, "/soap:Envelope/soap:Body/" "RequestResponse/RequestResult") xml_str = soap_elems[0].text tree = etree.fromstring(xml_str) elems = tree.xpath("/Response/Header") header = elems[0] if "ResultCode" not in header.attrib or \ header.attrib['ResultCode'] != "Success": self.logger.error("not has resultcode or " "resultcode is not success") raise ParserError("ResultCode error") else: # success property_elems = xpath_namespace( tree, "/Response/HotelResponse/OTA_HotelSearchRS/" "Properties/Property") city_code = task.kwargs.get('citycode') chinese_name = task.kwargs.get('chinesename') hotel_requests = list() hotel_addresses = dict() for property_elem in property_elems: hotel_code = str(property_elem.attrib['HotelCode']) \ if "HotelCode" in property_elem.attrib \ else None hotel_ctrip_city_code = str( property_elem.attrib['HotelCityCode']) \ if "HotelCityCode" in property_elem.attrib else None hotel_address = flist( property_elem.xpath( "*[local-name()='Address']/" "*[local-name()='AddressLine']/text()")) if isinstance(hotel_address, unicode): hotel_address = hotel_address.encode("utf-8") hotel_address = str(hotel_address) if hotel_code and hotel_ctrip_city_code: hotel_url = build_hotel_url(hotel_code) yield HotelCodeItem(hotel_code, city_code, hotel_url) hotel_requests.append(hotel_code) hotel_addresses[hotel_code] = hotel_address if len(hotel_requests) >= self.batch_count: yield build_rooms_task_for_hotel( hotel_requests, city_code, chinese_name, hotel_addresses) hotel_addresses.clear() del hotel_requests[:] # send left requests if len(hotel_requests) > 0: yield build_rooms_task_for_hotel(hotel_requests, city_code, chinese_name, hotel_addresses) hotel_addresses.clear() del hotel_requests[:]
def parse(self, task, input_file): """parse response result Args: task: FileTask or HttpTask input_file: file or StringIO Yields: item: Item, result of parse task: Task, new task """ self.logger.info("hotel parser begin to parse") try: try: soap_tree = etree.fromstring(input_file.read()) except Exception, e: self.logger.error("not complete xml:%s" % e) raise ParserError("not complete xml") soap_elems = xpath_namespace(soap_tree, "/soap:Envelope/soap:Body/" "RequestResponse/RequestResult") xml_str = soap_elems[0].text tree = etree.fromstring(xml_str) elems = tree.xpath("/Response/Header") header = elems[0] if "ResultCode" not in header.attrib or \ header.attrib['ResultCode'] != "Success": self.logger.error("not has resultcode or " "resultcode is not success") raise ParserError("ResultCode error") else: # success property_elems = xpath_namespace( tree, "/Response/HotelResponse/OTA_HotelSearchRS/" "Properties/Property") city_code = task.kwargs.get('citycode') chinese_name = task.kwargs.get('chinesename') hotel_requests = list() hotel_addresses = dict() for property_elem in property_elems: hotel_code = str(property_elem.attrib['HotelCode']) \ if "HotelCode" in property_elem.attrib \ else None hotel_ctrip_city_code = str( property_elem.attrib['HotelCityCode']) \ if "HotelCityCode" in property_elem.attrib else None hotel_address = flist(property_elem.xpath( "*[local-name()='Address']/" "*[local-name()='AddressLine']/text()")) if isinstance(hotel_address, unicode): hotel_address = hotel_address.encode("utf-8") hotel_address = str(hotel_address) if hotel_code and hotel_ctrip_city_code: hotel_url = build_hotel_url(hotel_code) yield HotelCodeItem(hotel_code, city_code, hotel_url) hotel_requests.append(hotel_code) hotel_addresses[hotel_code] = hotel_address if len(hotel_requests) >= self.batch_count: yield build_rooms_task_for_hotel(hotel_requests, city_code, chinese_name, hotel_addresses) hotel_addresses.clear() del hotel_requests[:] # send left requests if len(hotel_requests) > 0: yield build_rooms_task_for_hotel(hotel_requests, city_code, chinese_name, hotel_addresses) hotel_addresses.clear() del hotel_requests[:]