def parse(self, response): sel = Selector(response) sel.register_namespace("redrover", "https://www.redroverapp.com/redrover") #import ipdb;ipdb.set_trace() for event in sel.xpath(u"//item/event"): loader = EventLoader(item=EventItem(), selector=event) loader.selector.register_namespace(u"redrover", u"https://www.redroverapp.com/redrover") for fld in [u"name", u"description", u"start_date", u"start_time", u"end_time", u"phone", u"link"]: fld_xpath = u"%s/text()" % fld loader.add_xpath(fld, fld_xpath) for fld in [u"name", u"street_addr", u"state", u"zip", u"city", u"lat", u"long"]: fld_xpath = u"place/%s/text()" % fld loader.add_xpath(u"place_"+fld, fld_xpath) start_date = loader.get_collected_values(u"start_date") loader.replace_value(u"end_freq", start_date) loader.add_value(u"rr_identifier", response.meta[u"pub_code"]) loader.add_value(u"rr_publisher_market", u"NYC") #TODO add ages and categories yield loader.load_item()
def parse_event(self, response): sel = Selector(response) loader = EventLoader(item=EventItem(), response=response) loader.add_value(u"rr_identifier", u"CTPARK") loader.add_value(u"rr_publisher_market", u"NYC") loader.add_xpath(u"name", "//span[@class='eventtitle']/text()") loader.add_xpath(u"place_name", "//span[@class='locationtitle']/text()") image_url = "".join(sel.xpath("//span/*/img/@src").extract()) #print image_url if image_url: import ipdb;ipdb.set_trace() loader.replace_value(u"image_name", os.path.basename(image_url)) loader.replace_value(u"image_urls", [image_url, ]) loader.add_value(u"link", response.url) yield loader.load_item()