def events(self):
        """
        Yield RA events scraped from the RA calendar
        """

        with contextlib.closing(urllib2.urlopen(self.calendar_url)) as url_stream:
            dom = html.fromstring(url_stream.read())

        for elem in dom.cssselect(".event-details"):

            # this parsing code is shameful, but it parses the poorly formatted RA html

            name = cssselect_text(elem, "h3")
            url = elem.cssselect(".more-link")[0].attrib["href"]
            event = streetsblog.Event(url, name)

            # yeah ... the key distinguishing tags are breaks and bolds

            event_elem = elem.cssselect(".event")[0]
            strongs = event_elem.cssselect("strong")
            breaks = event_elem.cssselect("br")

            date = datetime.strptime(strongs[1].text, "%b %d, %Y").date()
            times = [datetime.strptime(t, "%I:%M %p").time() for t in breaks[0].tail.split(" - ")]
            event.start = datetime.combine(date, times[0])
            event.end = datetime.combine(date, times[1])

            event.address = "\n".join([breaks[2].tail, breaks[3].tail])
            if len(strongs) > 2:
                event.venue = strongs[2].text

            event.rsvp_url = event.url

            yield event
    def test_cssselect_text(self):
        """Test css selector utility"""

        fragment = """
              <div>
                <div class="one">1</div>
                <div class="two">2.1</div>
                <div class="two">2.2</div>
              </div>
            """
        dom = html.fromstring(fragment)
        self.assertEqual(utils.cssselect_text(dom, ".none"), "")
        self.assertEqual(utils.cssselect_text(dom, ".one"), "1")
        self.assertEqual(utils.cssselect_text(dom, ".two"), "2.1\n\n2.2")
Пример #3
0
    def create_event(self, url, name):
        """
        Create a streetsblog.Event by parsing its TA event url.

        NOTE: The implementation of this is tightly coupled to the TA website as it
        uses css selectors that are specific to the layout of the TA pages.
        """

        event = streetsblog.Event(url, name)

        with contextlib.closing(self._open_url(url)) as url_stream:
            event_dom = html.fromstring(url_stream.read())

        event.description = utils.cssselect_text(event_dom, ".main-content > p")

        event_start = utils.cssselect_text(event_dom, ".field-field-taevent-starttime .date-display-single")
        if " - " in event_start:
            event.start = datetime.strptime(event_start, "%B %d, %Y - %I:%M%p")
        else:
            event.start = datetime.strptime(event_start, "%B %d, %Y")

        event_end = utils.cssselect_text(event_dom, ".field-field-taevent-endtime .date-display-single")
        if " - " in event_end:
            event.end = datetime.strptime(event_end, "%B %d, %Y - %I:%M%p")
        elif len(event_end) > 0:
            event.end = datetime.strptime(event_end, "%B %d, %Y")

        event.venue = utils.cssselect_text(event_dom, ".field-field-calendar-location p")

        event.address = utils.cssselect_text(event_dom, ".field-field-address p")

        event_rsvp = event_dom.cssselect(".field-field-taevent-link a")
        if len(event_rsvp) > 0:
            event.rsvp_url = event_rsvp[0].attrib["href"]

        return event