Пример #1
0
    def _get_page(self, place):
        if place.lower() in self.places:
            place = self.places[place.lower()]

        soup = get_html_parse_tree(
            'http://m.wund.com/cgi-bin/findweather/getForecast?' +
            urlencode({
                'brand': 'mobile_metric',
                'query': place.encode('utf-8')
            }))

        if soup.body.center and soup.body.center.b.string == 'Search not found:':
            raise Weather.WeatherException(u'City not found')

        if soup.table.tr.th and soup.table.tr.th.string == 'Place: Temperature':
            places = []
            for td in soup.table.findAll('td'):
                places.append(td.find('a', href=re.compile('.*html$')).string)

            # Cities with more than one airport give duplicate entries. We can take the first
            if len([x for x in places if x == places[0]]) == len(places):
                url = urljoin(
                    'http://m.wund.com/cgi-bin/findweather/getForecast',
                    soup.table.find('td').find(
                        'a', href=re.compile('.*html$'))['href'])
                soup = get_html_parse_tree(url)
            else:
                raise Weather.TooManyPlacesException(places)

        return soup
Пример #2
0
    def _get_page(self, place):
        if place.lower() in self.places:
            place = self.places[place.lower()]

        soup = get_html_parse_tree(
                    'http://m.wund.com/cgi-bin/findweather/getForecast?'
                    + urlencode({'brand': 'mobile_metric',
                                 'query': place.encode('utf-8')}))

        if soup.body.center and soup.body.center.b.string == 'Search not found:':
            raise Weather.WeatherException(u'City not found')

        if soup.table.tr.th and soup.table.tr.th.string == 'Place: Temperature':
            places = []
            for td in soup.table.findAll('td'):
                places.append(td.find('a', href=re.compile('.*html$')).string)

            # Cities with more than one airport give duplicate entries. We can take the first
            if len([x for x in places if x == places[0]]) == len(places):
                url = urljoin('http://m.wund.com/cgi-bin/findweather/getForecast',
                        soup.table.find('td').find('a', href=re.compile('.*html$'))['href'])
                soup = get_html_parse_tree(url)
            else:
                raise Weather.TooManyPlacesException(places)

        return soup
Пример #3
0
 def scrape_status(self, stream):
     tree = get_html_parse_tree(self.streams[stream]['url'] + 'status.xsl',
                                treetype='etree')
     main_table = tree.findall('.//table')[2]
     status = {}
     for row in main_table.findall('.//tr'):
         key, value = [x.text for x in row.findall('td')]
         status[key[:-1]] = value
     return status
Пример #4
0
 def scrape_status(self, stream):
     tree = get_html_parse_tree(self.streams[stream]['url'] + 'status.xsl',
             treetype='etree')
     main_table = tree.findall('.//table')[2]
     status = {}
     for row in main_table.findall('.//tr'):
         key, value = [x.text for x in row.findall('td')]
         status[key[:-1]] = value
     return status
Пример #5
0
    def _google_scrape_search(self, query, country=None):
        params = {'q': query.encode('utf-8')}
        if country:
            params['cr'] = u'country' + country.upper()

        return get_html_parse_tree(
                'http://www.google.com/search?' + urlencode(params),
                headers={'user-agent': self.user_agent},
                treetype='etree')
Пример #6
0
 def _login(self, user, password):
     params = urlencode({'NAME': user.encode('utf-8'), 'PASSWORD': password.encode('utf-8')})
     try:
         etree = get_html_parse_tree(u'http://ace.delos.com/usacogate', data=params, treetype=u'etree')
     except URLError:
         raise UsacoException(u'Sorry, USACO (or my connection?) is down')
     for font in etree.getiterator(u'font'):
         if font.text and u'Please try again' in font.text:
             return None
     return etree
Пример #7
0
 def _get_title(self, url):
     "Gets the title of a page"
     try:
         headers = {'User-Agent': 'Mozilla/5.0'}
         etree = get_html_parse_tree(url, None, headers, 'etree')
         title = etree.findtext('head/title')
         return title or url
     except Exception, e:
         log.debug(u"Error determining title for %s: %s", url, unicode(e))
         return url
Пример #8
0
 def _get_title(self, url):
     "Gets the title of a page"
     try:
         headers = {'User-Agent': 'Mozilla/5.0'}
         etree = get_html_parse_tree(url, None, headers, 'etree')
         title = etree.findtext('head/title')
         return title or url
     except Exception, e:
         log.debug(u"Error determining title for %s: %s", url, unicode(e))
         return url
Пример #9
0
 def _login(self, user, password):
     params = urlencode({'NAME': user.encode('utf-8'), 'PASSWORD': password.encode('utf-8')})
     try:
         etree = get_html_parse_tree(u'http://ace.delos.com/usacogate', data=params, treetype=u'etree')
     except URLError:
         raise UsacoException(u'Sorry, USACO (or my connection?) is down')
     for font in etree.getiterator(u'font'):
         if font.text and u'Please try again' in font.text:
             return None
     return etree
Пример #10
0
    def _google_scrape_search(self, query, country=None):
        params = {"q": query.encode("utf-8")}
        if country:
            params["cr"] = u"country" + country.upper()

        return get_html_parse_tree(
            "http://www.google.com/search?" + urlencode(params),
            headers={"user-agent": self.user_agent},
            treetype="etree",
        )
Пример #11
0
 def _login(self, user, password):
     params = urlencode({"NAME": user.encode("utf-8"), "PASSWORD": password.encode("utf-8")})
     try:
         etree = get_html_parse_tree(u"http://ace.delos.com/usacogate", data=params, treetype=u"etree")
     except URLError:
         raise UsacoException(u"Sorry, USACO (or my connection?) is down")
     for font in etree.getiterator(u"font"):
         if font.text and u"Please try again" in font.text:
             return None
     return etree
Пример #12
0
 def _add_user(self, monitor_url, user):
     matches = re.search(r'a=(.+)&', monitor_url)
     auth = matches.group(1)
     params = urlencode({'STUDENTID': user.encode('utf-8'), 'ADD': 'ADD STUDENT',
         'a': auth.encode('utf-8'), 'monitor': '1'})
     try:
         etree = get_html_parse_tree(monitor_url, treetype=u'etree', data=params)
     except URLError:
         raise UsacoException(u'Sorry, USACO (or my connection?) is down')
     for font in etree.getiterator(u'font'):
         if font.text and u'No STATUS file for' in font.text:
             raise UsacoException(u'Sorry, user %s not found' % user)
Пример #13
0
 def _add_user(self, monitor_url, user):
     matches = re.search(r'a=(.+)&', monitor_url)
     auth = matches.group(1)
     params = urlencode({'STUDENTID': user.encode('utf-8'), 'ADD': 'ADD STUDENT',
         'a': auth.encode('utf-8'), 'monitor': '1'})
     try:
         etree = get_html_parse_tree(monitor_url, treetype=u'etree', data=params)
     except URLError:
         raise UsacoException(u'Sorry, USACO (or my connection?) is down')
     for font in etree.getiterator(u'font'):
         if font.text and u'No STATUS file for' in font.text:
             raise UsacoException(u'Sorry, user %s not found' % user)
Пример #14
0
 def _add_user(self, monitor_url, user):
     matches = re.search(r"a=(.+)&", monitor_url)
     auth = matches.group(1)
     params = urlencode(
         {"STUDENTID": user.encode("utf-8"), "ADD": "ADD STUDENT", "a": auth.encode("utf-8"), "monitor": "1"}
     )
     try:
         etree = get_html_parse_tree(monitor_url, treetype=u"etree", data=params)
     except URLError:
         raise UsacoException(u"Sorry, USACO (or my connection?) is down")
     for font in etree.getiterator(u"font"):
         if font.text and u"No STATUS file for" in font.text:
             raise UsacoException(u"Sorry, user %s not found" % user)
Пример #15
0
    def __init__(self, char):
        self.char = char
        url = 'http://www.unicode.org/cgi-bin/GetUnihanData.pl?'
        params = {'codepoint': self.char.encode('utf8'), 'useuft8': 'true'}
        soup = get_html_parse_tree(url + urlencode(params),
                                   treetype='html5lib-beautifulsoup')

        tables = soup('table', border="1")

        self.data = defaultdict(unicode, ((html_flatten(td).strip()
                                           for td in row('td'))
                                          for table in tables
                                          for row in table('tr')[1:]))
Пример #16
0
    def __init__(self, char):
        self.char = char
        url = 'http://www.unicode.org/cgi-bin/GetUnihanData.pl?'
        params = {'codepoint': self.char.encode('utf8'),
                  'useuft8': 'true'}
        soup = get_html_parse_tree(url + urlencode(params),
                                            treetype='html5lib-beautifulsoup')

        tables = soup('table', border="1")

        self.data = defaultdict(unicode,
                                ((html_flatten(td).strip() for td in row('td'))
                                 for table in tables for row in table('tr')[1:]))
Пример #17
0
    def find_stories(self, url):
        if isinstance(url, basestring):
            tree = get_html_parse_tree(url, treetype='etree')
        else:
            tree = url

        stories = [div for div in tree.findall('.//div')
                       if div.get(u'class') == u'story s']

        for story in stories:
            body = story.findtext('div').strip()
            id = story.findtext('.//a')
            if isinstance(id, basestring) and id[1:].isdigit():
                id = int(id[1:])
                yield id, body
Пример #18
0
    def mlia(self, event, query):
        query = query is None and u'random' or query.lower()

        if query == u'random' and event.public and not self.public_browse:
            event.addresponse(u'Sorry, not in public. PM me')
            return

        url = 'http://mylifeisaverage.com/'

        if query == u'random' or query is None:
            if not self.random_pool:
                purl = url + str(randint(1, self.pages))
                tree = get_html_parse_tree(purl, treetype='etree')
                self.random_pool = list(self.find_stories(tree))
                shuffle(self.random_pool)

                pagination = [
                    ul for ul in tree.findall('.//ul')
                    if ul.get(u'class') == u'pages'
                ][0]
                self.pages = int([
                    li for li in pagination.findall('li')
                    if li.get(u'class') == u'last'
                ][0].find(u'a').get(u'href'))

            story = self.random_pool.pop()

        else:
            try:
                if query.isdigit():
                    surl = url + '/s/' + query
                else:
                    surl = url + '/best/' + query

                story = self.find_stories(surl).next()

            except StopIteration:
                event.addresponse(u'No such quote')
                return

        id, body = story
        url += 's/%i' % id
        event.addresponse(u'%(body)s\n- %(url)s', {
            'url': url,
            'body': body,
        })
Пример #19
0
    def find_stories(self, url):
        if isinstance(url, basestring):
            tree = get_html_parse_tree(url, treetype='etree')
        else:
            tree = url

        stories = [
            div for div in tree.findall('.//div')
            if div.get(u'class') == u'story s'
        ]

        for story in stories:
            body = story.findtext('div').strip()
            id = story.findtext('.//a')
            if isinstance(id, basestring) and id[1:].isdigit():
                id = int(id[1:])
                yield id, body
Пример #20
0
    def _load_currencies(self):
        etree = get_html_parse_tree('http://www.xe.com/iso4217.php',
                                    headers={
                                        'User-Agent': 'Mozilla/5.0',
                                        'Referer': 'http://www.xe.com/',
                                    },
                                    treetype='etree')

        tbl_main = [
            x for x in etree.getiterator('table')
            if x.get('class') == 'tbl_main'
        ][0]

        self.currencies = {}
        for tbl_sub in tbl_main.getiterator('table'):
            if tbl_sub.get('class') == 'tbl_sub':
                for tr in tbl_sub.getiterator('tr'):
                    code, place = [x.text for x in tr.getchildren()]
                    name = u''
                    if not place:
                        place = u''
                    if u',' in place[1:-1]:
                        place, name = place.split(u',', 1)
                    place = place.strip()
                    if code in self.currencies:
                        currency = self.currencies[code]
                        # Are we using another country's currency?
                        if place != u'' and name != u'' and (
                                currency[1] == u''
                                or currency[1].rsplit(None, 1)[0] in place or
                            (u'(also called' in currency[1]
                             and currency[1].split(u'(', 1)[0].rsplit(
                                 None, 1)[0] in place)):
                            currency[0].insert(0, place)
                            currency[1] = name.strip()
                        else:
                            currency[0].append(place)
                    else:
                        self.currencies[code] = [[place], name.strip()]

        # Special cases for shared currencies:
        self.currencies['EUR'][0].insert(0, u'Euro Member Countries')
        self.currencies['XOF'][0].insert(
            0, u'Communaut\xe9 Financi\xe8re Africaine')
        self.currencies['XOF'][1] = u'Francs'
Пример #21
0
 def get_tfln(self, section):
     tree = get_html_parse_tree('http://textsfromlastnight.com/%s' % section,
                                treetype='etree')
     ul = [x for x in tree.findall('.//ul')
           if x.get('id') == 'texts-list'][0]
     id_re = re.compile('^/Text-Replies-(\d+)\.html$')
     for li in ul.findall('li'):
         id = 0
         message=''
         div = [x for x in li.findall('div') if x.get('class') == 'text'][0]
         for a in div.findall('.//a'):
             href = a.get('href')
             if href.startswith('/Texts-From-Areacode-'):
                 message += u'\n' + a.text
             elif href.startswith('/Text-Replies-'):
                 id = int(id_re.match(href).group(1))
                 message += a.text
         yield id, message.strip()
Пример #22
0
    def mlia(self, event, query):
        query = query is None and u'random' or query.lower()

        if query == u'random' and event.public and not self.public_browse:
            event.addresponse(u'Sorry, not in public. PM me')
            return

        url = 'http://mylifeisaverage.com/'

        if query == u'random' or query is None:
            if not self.random_pool:
                purl = url + str(randint(1, self.pages))
                tree = get_html_parse_tree(purl, treetype='etree')
                self.random_pool = list(self.find_stories(tree))
                shuffle(self.random_pool)

                pagination = [ul for ul in tree.findall('.//ul')
                                       if ul.get(u'class') == u'pages'][0]
                self.pages = int(
                    [li for li in pagination.findall('li')
                        if li.get(u'class') == u'last'][0]
                    .find(u'a').get(u'href'))

            story = self.random_pool.pop()

        else:
            try:
                if query.isdigit():
                    surl = url + '/s/' + query
                else:
                    surl = url + '/best/' + query

                story = self.find_stories(surl).next()

            except StopIteration:
                event.addresponse(u'No such quote')
                return

        id, body = story
        url += 's/%i' % id
        event.addresponse(u'%(body)s\n- %(url)s', {
            'url': url,
            'body': body,
        })
Пример #23
0
    def add(self, event, url, name):
        feed = event.session.query(Feed).filter_by(name=name).first()

        if feed:
            event.addresponse(u"I already have the %s feed", name)
            return

        valid = bool(feedparser.parse(url)["version"])

        if not valid:
            try:
                soup = get_html_parse_tree(url)
                for alternate in soup.findAll(
                        'link',
                    {
                        'rel': 'alternate',
                        'type': re.compile(r'^application/(atom|rss)\+xml$'),
                        'href': re.compile(r'.+')
                    }):
                    newurl = urljoin(url, alternate["href"])
                    valid = bool(feedparser.parse(newurl)["version"])

                    if valid:
                        url = newurl
                        break
            except:
                pass

        if not valid:
            event.addresponse(
                u'Sorry, I could not add the %(name)s feed. '
                u'%(url)s is not a valid feed', {
                    'name': name,
                    'url': url,
                })
            return

        feed = Feed(unicode(name), unicode(url), event.identity)
        event.session.save(feed)
        event.session.commit()
        event.addresponse(True)
        log.info(u"Added feed '%s' by %s/%s (%s): %s (Found %s entries)", name,
                 event.account, event.identity, event.sender['connection'],
                 url, len(feed.entries))
Пример #24
0
 def get_tfln(self, section):
     tree = get_html_parse_tree('http://textsfromlastnight.com/%s' %
                                section,
                                treetype='etree')
     ul = [x for x in tree.findall('.//ul')
           if x.get('id') == 'texts-list'][0]
     id_re = re.compile('^/Text-Replies-(\d+)\.html$')
     for li in ul.findall('li'):
         id = 0
         message = ''
         div = [x for x in li.findall('div') if x.get('class') == 'text'][0]
         for a in div.findall('.//a'):
             href = a.get('href')
             if href.startswith('/Texts-From-Areacode-'):
                 message += u'\n' + a.text
             elif href.startswith('/Text-Replies-'):
                 id = int(id_re.match(href).group(1))
                 message += a.text
         yield id, message.strip()
Пример #25
0
    def dinner(self, event, who, veg):
        url = 'http://www.whatthefuckshouldimakefordinner.com/'
        if veg:
            url += 'veg.php'

        soup = get_html_parse_tree(url, headers={'Cache-Control': 'max-age=0'})
        link = soup.find('a')
        recipe = u''.join(link.contents)

        if ('f**k' in event.message['raw'].lower()
                or 'wtf' in event.message['raw'].lower()):
            template = u"Try some f*****g %(recipe)s. If you're too thick " \
                       u"to work it out for yourself, there's a recipe at " \
                       u"%(link)s"
        else:
            template = u"Try some %(recipe)s. If you can't " \
                       u"work it out for yourself, there's a recipe at " \
                       u"%(link)s"
        event.addresponse(template, {'recipe': recipe, 'link': link['href']})
Пример #26
0
    def dinner (self, event, who, veg):
        url = 'http://www.whatthefuckshouldimakefordinner.com/'
        if veg:
            url += 'veg.php'

        soup = get_html_parse_tree(url, headers={'Cache-Control': 'max-age=0'})
        link = soup.find('a')
        recipe = u''.join(link.contents)

        if ('f**k' in event.message['raw'].lower() or
                'wtf' in event.message['raw'].lower()):
            template = u"Try some f*****g %(recipe)s. If you're too thick " \
                       u"to work it out for yourself, there's a recipe at " \
                       u"%(link)s"
        else:
            template = u"Try some %(recipe)s. If you can't " \
                       u"work it out for yourself, there's a recipe at " \
                       u"%(link)s"
        event.addresponse(template, {'recipe': recipe, 'link': link['href']})
Пример #27
0
    def bash(self, event, id):
        id = id is None and u'random' or id.lower()

        if id == u'random' and event.public and not self.public_browse:
            event.addresponse(u'Sorry, not in public. PM me')
            return

        soup = get_html_parse_tree('http://bash.org/?%s' % id)

        number = u"".join(soup.find('p', 'quote').find('b').contents)
        output = [u'%s:' % number]

        body = soup.find('p', 'qt')
        if not body:
            event.addresponse(u"There's no such quote, but if you keep talking like that maybe there will be")
        else:
            for line in body.contents:
                line = unicode(line).strip()
                if line != u'<br />':
                    output.append(line)
            event.addresponse(u'\n'.join(output), conflate=False)
Пример #28
0
    def add(self, event, url, name):
        feed = event.session.query(Feed).filter_by(name=name).first()

        if feed:
            event.addresponse(u"I already have the %s feed", name)
            return

        valid = bool(feedparser.parse(url)["version"])

        if not valid:
            try:
                soup = get_html_parse_tree(url)
                for alternate in soup.findAll('link', {'rel': 'alternate',
                        'type': re.compile(r'^application/(atom|rss)\+xml$'),
                        'href': re.compile(r'.+')}):
                    newurl = urljoin(url, alternate["href"])
                    valid = bool(feedparser.parse(newurl)["version"])

                    if valid:
                        url = newurl
                        break
            except:
                pass

        if not valid:
            event.addresponse(u'Sorry, I could not add the %(name)s feed. '
                u'%(url)s is not a valid feed', {
                    'name': name,
                    'url': url,
                })
            return

        feed = Feed(unicode(name), unicode(url), event.identity)
        event.session.add(feed)
        event.session.commit()
        event.addresponse(True)
        log.info(u"Added feed '%s' by %s/%s (%s): %s (Found %s entries)",
                name, event.account, event.identity,
                event.sender['connection'], url, len(feed.entries))
Пример #29
0
    def bash(self, event, id):
        id = id is None and u'random' or id.lower()

        if id == u'random' and event.public and not self.public_browse:
            event.addresponse(u'Sorry, not in public. PM me')
            return

        soup = get_html_parse_tree('http://bash.org/?%s' % id)

        number = u"".join(soup.find('p', 'quote').find('b').contents)
        output = [u'%s:' % number]

        body = soup.find('p', 'qt')
        if not body:
            event.addresponse(
                u"There's no such quote, but if you keep talking like that maybe there will be"
            )
        else:
            for line in body.contents:
                line = unicode(line).strip()
                if line != u'<br />':
                    output.append(line)
            event.addresponse(u'\n'.join(output), conflate=False)
Пример #30
0
    def _get_section(self, monitor_url, usaco_user, user):
        try:
            etree = get_html_parse_tree(monitor_url, treetype=u'etree')
        except URLError:
            raise UsacoException(u'Sorry, USACO (or my connection?) is down')
        usaco_user = usaco_user.lower()
        header = True
        for tr in etree.getiterator(u'tr'):
            if header:
                header = False
                continue
            tds = [t.text for t in tr.getiterator(u'td')]
            section = u'is on section %s' % tds[5]
            if tds[5] == u'DONE':
                section = u'has completed USACO training'
            if tds[0] and tds[0].lower() == usaco_user:
                return u'%(user)s (%(usaco_user)s on USACO) %(section)s and last logged in %(days)s ago' % {
                    'user': user,
                    'usaco_user': usaco_user,
                    'days': tds[3],
                    'section': section,
                }

        return None
Пример #31
0
    def _get_section(self, monitor_url, usaco_user, user):
        try:
            etree = get_html_parse_tree(monitor_url, treetype=u'etree')
        except URLError:
            raise UsacoException(u'Sorry, USACO (or my connection?) is down')
        usaco_user = usaco_user.lower()
        header = True
        for tr in etree.getiterator(u'tr'):
            if header:
                header = False
                continue
            tds = [t.text for t in tr.getiterator(u'td')]
            section = u'is on section %s' % tds[5]
            if tds[5] == u'DONE':
                section = u'has completed USACO training'
            if tds[0] and tds[0].lower() == usaco_user:
                return u'%(user)s (%(usaco_user)s on USACO) %(section)s and last logged in %(days)s ago' % {
                    'user': user,
                    'usaco_user': usaco_user,
                    'days': tds[3],
                    'section': section,
                }

        return None
Пример #32
0
    def _load_currencies(self):
        etree = get_html_parse_tree(
                'http://www.xe.com/iso4217.php', headers = {
                    'User-Agent': 'Mozilla/5.0',
                    'Referer': 'http://www.xe.com/',
                }, treetype='etree')

        tbl_main = [x for x in etree.getiterator('table') if x.get('class') == 'tbl_main'][0]

        self.currencies = {}
        for tbl_sub in tbl_main.getiterator('table'):
            if tbl_sub.get('class') == 'tbl_sub':
                for tr in tbl_sub.getiterator('tr'):
                    code, place = [x.text for x in tr.getchildren()]
                    name = u''
                    if not place:
                        place = u''
                    if u',' in place[1:-1]:
                        place, name = place.split(u',', 1)
                    place = place.strip()
                    if code in self.currencies:
                        currency = self.currencies[code]
                        # Are we using another country's currency?
                        if place != u'' and name != u'' and (currency[1] == u'' or currency[1].rsplit(None, 1)[0] in place
                                or (u'(also called' in currency[1] and currency[1].split(u'(', 1)[0].rsplit(None, 1)[0] in place)):
                            currency[0].insert(0, place)
                            currency[1] = name.strip()
                        else:
                            currency[0].append(place)
                    else:
                        self.currencies[code] = [[place], name.strip()]

        # Special cases for shared currencies:
        self.currencies['EUR'][0].insert(0, u'Euro Member Countries')
        self.currencies['XOF'][0].insert(0, u'Communaut\xe9 Financi\xe8re Africaine')
        self.currencies['XOF'][1] = u'Francs'
Пример #33
0
    def _get_section(self, monitor_url, usaco_user, user):
        try:
            etree = get_html_parse_tree(monitor_url, treetype=u"etree")
        except URLError:
            raise UsacoException(u"Sorry, USACO (or my connection?) is down")
        usaco_user = usaco_user.lower()
        header = True
        for tr in etree.getiterator(u"tr"):
            if header:
                header = False
                continue
            tds = [t.text for t in tr.getiterator(u"td")]
            section = u"is on section %s" % tds[5]
            if tds[5] == u"DONE":
                section = u"has completed USACO training"
            if tds[0] and tds[0].lower() == usaco_user:
                return u"%(user)s (%(usaco_user)s on USACO) %(section)s and last logged in %(days)s ago" % {
                    "user": user,
                    "usaco_user": usaco_user,
                    "days": tds[3],
                    "section": section,
                }

        return None
Пример #34
0
            event.addresponse(self._get_section(monitor_url, usaco_user, user))
        except UsacoException, e:
            event.addresponse(e)
            return

    @match(r"^usaco\s+division\s+(?:for\s+)?(.+)$")
    def get_division(self, event, user):
        try:
            usaco_user = self._get_usaco_user(event, user)
        except UsacoException, e:
            event.addresponse(e)
            return

        params = urlencode({"id": usaco_user.encode("utf-8"), "search": "SEARCH"})
        try:
            etree = get_html_parse_tree(u"http://ace.delos.com/showdiv", data=params, treetype=u"etree")
        except URLError:
            event.addresponse(u"Sorry, USACO (or my connection?) is down")
        division = [b.text for b in etree.getiterator(u"b") if b.text and usaco_user in b.text][0]
        if division.find(u"would compete") != -1:
            event.addresponse(
                u"%(user)s (%(usaco_user)s on USACO) has not competed in a USACO before",
                {u"user": user, u"usaco_user": usaco_user},
            )
        matches = re.search(r"(\w+) Division", division)
        division = matches.group(1).lower()
        event.addresponse(
            u"%(user)s (%(usaco_user)s on USACO) is in the %(division)s division",
            {u"user": user, u"usaco_user": usaco_user, u"division": division},
        )
Пример #35
0
            event.addresponse(self._get_section(monitor_url, usaco_user, user))
        except UsacoException, e:
            event.addresponse(e)
            return

    @match(r'^usaco\s+division\s+(?:for\s+)?(.+)$')
    def get_division(self, event, user):
        try:
            usaco_user = self._get_usaco_user(event, user)
        except UsacoException, e:
            event.addresponse(e)
            return

        params = urlencode({'id': usaco_user.encode('utf-8'), 'search': 'SEARCH'})
        try:
            etree = get_html_parse_tree(u'http://ace.delos.com/showdiv', data=params, treetype=u'etree')
        except URLError:
            event.addresponse(u'Sorry, USACO (or my connection?) is down')
        division = [b.text for b in etree.getiterator(u'b') if b.text and usaco_user in b.text][0]
        if division.find(u'would compete') != -1:
            event.addresponse(u'%(user)s (%(usaco_user)s on USACO) has not competed in a USACO before',
                    {u'user': user, u'usaco_user': usaco_user})
        matches = re.search(r'(\w+) Division', division)
        division = matches.group(1).lower()
        event.addresponse(u'%(user)s (%(usaco_user)s on USACO) is in the %(division)s division',
                {u'user': user, u'usaco_user': usaco_user, u'division': division})

    def _redact(self, event, term):
        for type in ['raw', 'deaddressed', 'clean', 'stripped']:
            event['message'][type] = re.sub(r'(.*)(%s)' % re.escape(term), r'\1[redacted]', event['message'][type])
Пример #36
0
    def _flight_search(self, event, dpt, to, dep_date, ret_date):
        airport_dpt = self._airport_search(dpt)
        airport_to = self._airport_search(to)
        if len(airport_dpt) == 0:
            event.addresponse(u"Sorry, I don't know the airport you want to leave from")
            return
        if len(airport_to) == 0:
            event.addresponse(u"Sorry, I don't know the airport you want to fly to")
            return
        if len(airport_dpt) > 1:
            event.addresponse(u'The following airports match the departure: %s', human_join(self.repr_airport(id) for id in airport_dpt)[:480])
            return
        if len(airport_to) > 1:
            event.addresponse(u'The following airports match the destination: %s', human_join(self.repr_airport(id) for id in airport_to)[:480])
            return

        dpt = airport_dpt[0]
        to = airport_to[0]

        def to_travelocity_date(date):
            date = date.lower()
            time = None
            for period in [u'anytime', u'morning', u'afternoon', u'evening']:
                if period in date:
                    time = period.title()
                    date = date.replace(period, u'')
                    break
            try:
                date = parse(date)
            except ValueError:
                raise FlightException(u"Sorry, I can't understand the date %s" % date)
            if time is None:
                if date.hour == 0 and date.minute == 0:
                    time = u'Anytime'
                else:
                    time = date.strftime('%I:00')
                    if time[0] == u'0':
                        time = time[1:]
                    if date.hour < 12:
                        time += u'am'
                    else:
                        time += u'pm'
            date = date.strftime('%m/%d/%Y')
            return (date, time)

        (dep_date, dep_time) = to_travelocity_date(dep_date)
        (ret_date, ret_time) = to_travelocity_date(ret_date)

        params = {}
        params[u'leavingFrom'] = self.airports[dpt][3]
        params[u'goingTo'] = self.airports[to][3]
        params[u'leavingDate'] = dep_date
        params[u'dateLeavingTime'] = dep_time
        params[u'returningDate'] = ret_date
        params[u'dateReturningTime'] = ret_time
        etree = get_html_parse_tree('http://travel.travelocity.com/flights/InitialSearch.do', data=urlencode(params), treetype='etree')
        while True:
            script = [script for script in etree.getiterator(u'script')][1]
            matches = script.text and re.search(r'var finurl = "(.*)"', script.text)
            if matches:
                url = u'http://travel.travelocity.com/flights/%s' % matches.group(1)
                etree = get_html_parse_tree(url, treetype=u'etree')
            else:
                break

        # Handle error
        div = [d for d in etree.getiterator(u'div') if d.get(u'class') == u'e_content']
        if len(div):
            error = div[0].find(u'h3').text
            raise FlightException(error)

        departing_flights = self._parse_travelocity(etree)
        return_url = None
        table = [t for t in etree.getiterator(u'table') if t.get(u'id') == u'tfGrid'][0]
        for tr in table.getiterator(u'tr'):
            for td in tr.getiterator(u'td'):
                if td.get(u'class').strip() in [u'tfPrice', u'tfPriceOrButton']:
                    onclick = td.find(u'div/button').get(u'onclick')
                    match = re.search(r"location.href='\.\./flights/(.+)'", onclick)
                    url_page = match.group(1)
                    match = re.search(r'^(.*?)[^/]*$', url)
                    url_base = match.group(1)
                    return_url = url_base + url_page

        etree = get_html_parse_tree(return_url, treetype=u'etree')
        returning_flights = self._parse_travelocity(etree)

        return (departing_flights, returning_flights, url)
Пример #37
0
    def _flight_search(self, event, dpt, to, dep_date, ret_date):
        airport_dpt = self._airport_search(dpt)
        airport_to = self._airport_search(to)
        if len(airport_dpt) == 0:
            event.addresponse(u"Sorry, I don't know the airport you want to leave from")
            return
        if len(airport_to) == 0:
            event.addresponse(u"Sorry, I don't know the airport you want to fly to")
            return
        if len(airport_dpt) > 1:
            event.addresponse(u'The following airports match the departure: %s', human_join(self.repr_airport(id) for id in airport_dpt)[:480])
            return
        if len(airport_to) > 1:
            event.addresponse(u'The following airports match the destination: %s', human_join(self.repr_airport(id) for id in airport_to)[:480])
            return

        dpt = airport_dpt[0]
        to = airport_to[0]

        def to_travelocity_date(date):
            date = date.lower()
            time = None
            for period in [u'anytime', u'morning', u'afternoon', u'evening']:
                if period in date:
                    time = period.title()
                    date = date.replace(period, u'')
                    break
            try:
                date = parse(date)
            except ValueError:
                raise FlightException(u"Sorry, I can't understand the date %s" % date)
            if time is None:
                if date.hour == 0 and date.minute == 0:
                    time = u'Anytime'
                else:
                    time = date.strftime('%I:00')
                    if time[0] == u'0':
                        time = time[1:]
                    if date.hour < 12:
                        time += u'am'
                    else:
                        time += u'pm'
            date = date.strftime('%m/%d/%Y')
            return (date, time)

        (dep_date, dep_time) = to_travelocity_date(dep_date)
        (ret_date, ret_time) = to_travelocity_date(ret_date)

        params = {}
        params[u'leavingFrom'] = self.airports[dpt][3]
        params[u'goingTo'] = self.airports[to][3]
        params[u'leavingDate'] = dep_date
        params[u'dateLeavingTime'] = dep_time
        params[u'returningDate'] = ret_date
        params[u'dateReturningTime'] = ret_time
        etree = get_html_parse_tree('http://travel.travelocity.com/flights/InitialSearch.do', data=urlencode(params), treetype='etree')
        while True:
            script = [script for script in etree.getiterator(u'script')][1]
            matches = script.text and re.search(r'var finurl = "(.*)"', script.text)
            if matches:
                url = u'http://travel.travelocity.com/flights/%s' % matches.group(1)
                etree = get_html_parse_tree(url, treetype=u'etree')
            else:
                break

        # Handle error
        div = [d for d in etree.getiterator(u'div') if d.get(u'class') == u'e_content']
        if len(div):
            error = div[0].find(u'h3').text
            raise FlightException(error)

        departing_flights = self._parse_travelocity(etree)
        return_url = None
        table = [t for t in etree.getiterator(u'table') if t.get(u'id') == u'tfGrid'][0]
        for tr in table.getiterator(u'tr'):
            for td in tr.getiterator(u'td'):
                if td.get(u'class').strip() in [u'tfPrice', u'tfPriceOrButton']:
                    onclick = td.find(u'div/button').get(u'onclick')
                    match = re.search(r"location.href='\.\./flights/(.+)'", onclick)
                    url_page = match.group(1)
                    match = re.search(r'^(.*?)[^/]*$', url)
                    url_base = match.group(1)
                    return_url = url_base + url_page

        etree = get_html_parse_tree(return_url, treetype=u'etree')
        returning_flights = self._parse_travelocity(etree)

        return (departing_flights, returning_flights, url)
Пример #38
0
            event.addresponse(self._get_section(monitor_url, usaco_user, user))
        except UsacoException, e:
            event.addresponse(e)
            return

    @match(r'^usaco\s+division\s+(?:for\s+)?(.+)$')
    def get_division(self, event, user):
        try:
            usaco_user = self._get_usaco_user(event, user)
        except UsacoException, e:
            event.addresponse(e)
            return

        params = urlencode({'id': usaco_user.encode('utf-8'), 'search': 'SEARCH'})
        try:
            etree = get_html_parse_tree(u'http://ace.delos.com/showdiv', data=params, treetype=u'etree')
        except URLError:
            event.addresponse(u'Sorry, USACO (or my connection?) is down')
        division = [b.text for b in etree.getiterator(u'b') if b.text and usaco_user in b.text][0]
        if division.find(u'would compete') != -1:
            event.addresponse(u'%(user)s (%(usaco_user)s on USACO) has not competed in a USACO before',
                    {u'user': user, u'usaco_user': usaco_user})
        matches = re.search(r'(\w+) Division', division)
        division = matches.group(1).lower()
        event.addresponse(u'%(user)s (%(usaco_user)s on USACO) is in the %(division)s division',
                {u'user': user, u'usaco_user': usaco_user, u'division': division})

    def _redact(self, event, term):
        for type in ['raw', 'deaddressed', 'clean', 'stripped']:
            event['message'][type] = re.sub(r'(.*)(%s)' % re.escape(term), r'\1[redacted]', event['message'][type])