def events(self): """ Yield RA events scraped from the RA calendar """ with contextlib.closing(urllib2.urlopen(self.calendar_url)) as url_stream: dom = html.fromstring(url_stream.read()) for elem in dom.cssselect(".event-details"): # this parsing code is shameful, but it parses the poorly formatted RA html name = cssselect_text(elem, "h3") url = elem.cssselect(".more-link")[0].attrib["href"] event = streetsblog.Event(url, name) # yeah ... the key distinguishing tags are breaks and bolds event_elem = elem.cssselect(".event")[0] strongs = event_elem.cssselect("strong") breaks = event_elem.cssselect("br") date = datetime.strptime(strongs[1].text, "%b %d, %Y").date() times = [datetime.strptime(t, "%I:%M %p").time() for t in breaks[0].tail.split(" - ")] event.start = datetime.combine(date, times[0]) event.end = datetime.combine(date, times[1]) event.address = "\n".join([breaks[2].tail, breaks[3].tail]) if len(strongs) > 2: event.venue = strongs[2].text event.rsvp_url = event.url yield event
def test_cssselect_text(self): """Test css selector utility""" fragment = """ <div> <div class="one">1</div> <div class="two">2.1</div> <div class="two">2.2</div> </div> """ dom = html.fromstring(fragment) self.assertEqual(utils.cssselect_text(dom, ".none"), "") self.assertEqual(utils.cssselect_text(dom, ".one"), "1") self.assertEqual(utils.cssselect_text(dom, ".two"), "2.1\n\n2.2")
def create_event(self, url, name): """ Create a streetsblog.Event by parsing its TA event url. NOTE: The implementation of this is tightly coupled to the TA website as it uses css selectors that are specific to the layout of the TA pages. """ event = streetsblog.Event(url, name) with contextlib.closing(self._open_url(url)) as url_stream: event_dom = html.fromstring(url_stream.read()) event.description = utils.cssselect_text(event_dom, ".main-content > p") event_start = utils.cssselect_text(event_dom, ".field-field-taevent-starttime .date-display-single") if " - " in event_start: event.start = datetime.strptime(event_start, "%B %d, %Y - %I:%M%p") else: event.start = datetime.strptime(event_start, "%B %d, %Y") event_end = utils.cssselect_text(event_dom, ".field-field-taevent-endtime .date-display-single") if " - " in event_end: event.end = datetime.strptime(event_end, "%B %d, %Y - %I:%M%p") elif len(event_end) > 0: event.end = datetime.strptime(event_end, "%B %d, %Y") event.venue = utils.cssselect_text(event_dom, ".field-field-calendar-location p") event.address = utils.cssselect_text(event_dom, ".field-field-address p") event_rsvp = event_dom.cssselect(".field-field-taevent-link a") if len(event_rsvp) > 0: event.rsvp_url = event_rsvp[0].attrib["href"] return event