Пример #1
0
    def api_request(self, command, **kwargs):
        if 'data' in kwargs:
            data = to_unicode(kwargs.pop('data')).encode('utf-8', 'replace')
        else:
            data = None

        headers = {}
        if not command.startswith('applications'):
            today = local2utc(datetime.now()).strftime('%Y-%m-%d')
            token = sha256(self.username + self.APITOKEN + today).hexdigest()

            headers['Authorization'] = 'Basic %s' % (b64encode('%s:%s' % (self.username, self.password)))
            headers['X-Platform'] = 'android'
            headers['X-Client-Version'] = self.APIVERSION
            headers['X-AUM-Token'] = token

        url = self.buildurl(self.absurl('/api/%s' % command), **kwargs)
        if isinstance(url, unicode):
            url = url.encode('utf-8')
        req = self.request_class(url, data, headers)
        buf = self.openurl(req).read()

        try:
            r = json.loads(buf)
        except ValueError:
            raise ValueError(buf)

        return r
Пример #2
0
    def __init__(self, browser, url, tree):
        Content.__init__(self, browser)
        self.url = url
        self.id = url2id(self.url)

        if tree is None:
            return

        header = tree.find("header")
        self.title = u" — ".join([a.text for a in header.find("h1").xpath(".//a")])
        try:
            a = self.browser.parser.select(header, "a[rel=author]", 1)
        except BrokenPageError:
            self.author = "Anonyme"
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib["href"].split("/")[2])
        self.body = self.browser.parser.tostring(self.browser.parser.select(tree, "div.content", 1))
        try:
            self.date = datetime.strptime(
                self.browser.parser.select(header, "time", 1).attrib["datetime"].split("+")[0], "%Y-%m-%dT%H:%M:%S"
            )
            self.date = local2utc(self.date)
        except BrokenPageError:
            pass
        for form in self.browser.parser.select(tree.find("footer"), "form.button_to"):
            if form.attrib["action"].endswith("/for"):
                self.relevance_url = form.attrib["action"].rstrip("for").rstrip("against")
                self.relevance_token = self.browser.parser.select(form, "input[name=authenticity_token]", 1).attrib[
                    "value"
                ]

        self.score = int(self.browser.parser.select(tree, "div.figures figure.score", 1).text)
Пример #3
0
    def parse(self):
        self.url = "%s#%s" % (self.preurl, self.div.attrib["id"])
        self.title = unicode(self.browser.parser.select(self.div.find("h2"), "a.title", 1).text)
        try:
            a = self.browser.parser.select(self.div.find("p"), "a[rel=author]", 1)
        except BrokenPageError:
            self.author = "Anonyme"
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib["href"].split("/")[2])
        self.date = datetime.strptime(
            self.browser.parser.select(self.div.find("p"), "time", 1).attrib["datetime"].split("+")[0],
            "%Y-%m-%dT%H:%M:%S",
        )
        self.date = local2utc(self.date)

        content = self.div.find("div")
        try:
            signature = self.browser.parser.select(content, "p.signature", 1)
        except BrokenPageError:
            # No signature.
            pass
        else:
            content.remove(signature)
            self.signature = self.browser.parser.tostring(signature)
        self.body = self.browser.parser.tostring(content)

        self.score = int(self.browser.parser.select(self.div.find("p"), "span.score", 1).text)
        forms = self.browser.parser.select(self.div.find("footer"), "form.button_to")
        if len(forms) > 0:
            self.relevance_url = forms[0].attrib["action"].rstrip("for").rstrip("against")
            self.relevance_token = self.browser.parser.select(forms[0], "input[name=authenticity_token]", 1).attrib[
                "value"
            ]
Пример #4
0
    def __init__(self, browser, url, tree):
        Content.__init__(self, browser)
        self.url = url
        self.id = url2id(self.url)

        if tree is None:
            return

        header = tree.find('header')
        self.title = u' — '.join([a.text for a in header.find('h1').xpath('.//a')])
        try:
            a = self.browser.parser.select(header, 'a[rel=author]', 1)
        except BrokenPageError:
            self.author = 'Anonyme'
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib['href'].split('/')[2])
        self.body = self.browser.parser.tostring(self.browser.parser.select(tree, 'div.content', 1))
        try:
            self.date = datetime.strptime(self.browser.parser.select(header, 'time', 1).attrib['datetime'].split('+')[0],
                                          '%Y-%m-%dT%H:%M:%S')
            self.date = local2utc(self.date)
        except BrokenPageError:
            pass
        for form in self.browser.parser.select(tree.find('footer'), 'form.button_to'):
            if form.attrib['action'].endswith('/for'):
                self.relevance_url = form.attrib['action'].rstrip('for').rstrip('against')
                self.relevance_token = self.browser.parser.select(form, 'input[name=authenticity_token]', 1).attrib['value']

        self.score = int(self.browser.parser.select(tree, 'div.figures figure.score', 1).text)
Пример #5
0
    def api_request(self, command, **kwargs):
        if 'data' in kwargs:
            data = to_unicode(kwargs.pop('data')).encode('utf-8', 'replace')
        else:
            data = None

        headers = {}
        if not command.startswith('applications'):
            today = local2utc(datetime.now()).strftime('%Y-%m-%d')
            token = sha256(self.username + self.APITOKEN + today).hexdigest()

            headers['Authorization'] = 'Basic %s' % (b64encode(
                '%s:%s' % (self.username, self.password)))
            headers['X-Platform'] = 'android'
            headers['X-Client-Version'] = self.APIVERSION
            headers['X-AUM-Token'] = token

        url = self.buildurl(self.absurl('/api/%s' % command), **kwargs)
        if isinstance(url, unicode):
            url = url.encode('utf-8')
        req = self.request_class(url, data, headers)
        buf = self.openurl(req).read()

        try:
            r = json.loads(buf)
        except ValueError:
            raise ValueError(buf)

        return r
Пример #6
0
    def __init__(self, browser, url, tree):
        super(Article, self).__init__(browser)
        self.url = url
        self.id = url2id(self.url)

        if tree is None:
            return

        header = tree.find('header')
        self.title = u' — '.join([a.text for a in header.find('h1').xpath('.//a')])
        try:
            a = header.xpath('.//a[@rel="author"]')[0]
        except IndexError:
            self.author = 'Anonyme'
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib['href'].split('/')[2])
        self.body = lxml.html.tostring(tree.xpath('.//div[has-class("content")]')[0]).decode('utf-8')
        try:
            self.date = datetime.strptime(header.xpath('.//time')[0].attrib['datetime'].split('+')[0],
                                          '%Y-%m-%dT%H:%M:%S')
            self.date = local2utc(self.date)
        except IndexError:
            pass
        for form in tree.find('footer').xpath('//form[has-class("button_to")]'):
            if form.attrib['action'].endswith('/for'):
                self.relevance_url = form.attrib['action'].rstrip('for').rstrip('against')
                self.relevance_token = form.xpath('.//input[@name="authenticity_token"]')[0].attrib['value']

        self.score = int(tree.xpath('.//div[has-class("figures")]//figure[has-class("score")]')[0].text)
Пример #7
0
 def iter_threads(self):
     for thread in self.browser.get_threads():
         t = Thread(thread['id'])
         t.flags = Thread.IS_DISCUSSION
         t.title = u'Discussion with %s' % thread['name']
         t.date = local2utc(datetime.datetime.fromtimestamp(thread['last_message']['utc_timestamp']))
         yield t
Пример #8
0
    def parse(self):
        self.url = '%s#%s' % (self.preurl, self.div.attrib['id'])
        self.title = unicode(self.browser.parser.select(self.div.find('h2'), 'a.title', 1).text)
        try:
            a = self.browser.parser.select(self.div.find('p'), 'a[rel=author]', 1)
        except BrokenPageError:
            self.author = 'Anonyme'
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib['href'].split('/')[2])
        self.date = datetime.strptime(self.browser.parser.select(self.div.find('p'), 'time', 1).attrib['datetime'].split('+')[0],
                                      '%Y-%m-%dT%H:%M:%S')
        self.date = local2utc(self.date)

        content = self.div.find('div')
        try:
            signature = self.browser.parser.select(content, 'p.signature', 1)
        except BrokenPageError:
            # No signature.
            pass
        else:
            content.remove(signature)
            self.signature = self.browser.parser.tostring(signature)
        self.body = self.browser.parser.tostring(content)

        self.score = int(self.browser.parser.select(self.div.find('p'), 'span.score', 1).text)
        forms = self.browser.parser.select(self.div.find('footer'), 'form.button_to')
        if len(forms) > 0:
            self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
            self.relevance_token = self.browser.parser.select(forms[0], 'input[name=authenticity_token]', 1).attrib['value']
Пример #9
0
 def iter_threads(self):
     for thread in self.browser.get_threads():
         t = Thread(thread['id'])
         t.flags = Thread.IS_DISCUSSION
         t.title = u'Discussion with %s' % thread['name']
         t.date = local2utc(datetime.datetime.fromtimestamp(thread['last_message']['utc_timestamp']))
         yield t
Пример #10
0
    def parse(self):
        self.url = '%s#%s' % (self.preurl, self.div.attrib['id'])
        self.title = unicode(self.browser.parser.select(self.div.find('h2'), 'a.title', 1).text)
        try:
            a = self.browser.parser.select(self.div.find('p'), 'a[rel=author]', 1)
        except BrokenPageError:
            self.author = 'Anonyme'
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib['href'].split('/')[2])
        self.date = datetime.strptime(self.browser.parser.select(self.div.find('p'), 'time', 1).attrib['datetime'].split('+')[0],
                                      '%Y-%m-%dT%H:%M:%S')
        self.date = local2utc(self.date)

        content = self.div.find('div')
        try:
            signature = self.browser.parser.select(content, 'p.signature', 1)
        except BrokenPageError:
            # No signature.
            pass
        else:
            content.remove(signature)
            self.signature = self.browser.parser.tostring(signature)
        self.body = self.browser.parser.tostring(content)

        self.score = int(self.browser.parser.select(self.div.find('p'), 'span.score', 1).text)
        forms = self.browser.parser.select(self.div.find('footer'), 'form.button_to')
        if len(forms) > 0:
            self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
            self.relevance_token = self.browser.parser.select(forms[0], 'input[name=authenticity_token]', 1).attrib['value']
Пример #11
0
    def __init__(self, browser, url, tree):
        Content.__init__(self, browser)
        self.url = url
        self.id = url2id(self.url)

        if tree is None:
            return

        header = tree.find('header')
        self.title = u' — '.join([a.text for a in header.find('h1').xpath('.//a')])
        try:
            a = self.browser.parser.select(header, 'a[rel=author]', 1)
        except BrokenPageError:
            self.author = 'Anonyme'
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib['href'].split('/')[2])
        self.body = self.browser.parser.tostring(self.browser.parser.select(tree, 'div.content', 1))
        try:
            self.date = datetime.strptime(self.browser.parser.select(header, 'time', 1).attrib['datetime'].split('+')[0],
                                          '%Y-%m-%dT%H:%M:%S')
            self.date = local2utc(self.date)
        except BrokenPageError:
            pass
        for form in self.browser.parser.select(tree.find('footer'), 'form.button_to'):
            if form.attrib['action'].endswith('/for'):
                self.relevance_url = form.attrib['action'].rstrip('for').rstrip('against')
                self.relevance_token = self.browser.parser.select(form, 'input[name=authenticity_token]', 1).attrib['value']

        self.score = int(self.browser.parser.select(tree, 'div.figures figure.score', 1).text)
Пример #12
0
    def get_thread_mails(self):
        mails = {
            'member' : {},
            'messages' : [],
        }

        try:
            mails['member']['pseudo'] = self.parser.tocleanstring(self.document.getroot().cssselect('div#message_heading div.username span.name')[0])
        except IndexError:
            mails['member']['pseudo'] = 'Unknown'

        for li in reversed(self.document.xpath('//ul[@id="thread"]//li[contains(@id, "message_")]')):
            try:
                txt = self.parser.tostring(li.xpath('.//div[@class="message_body"]')[0])
            except IndexError:
                continue # 'Match' message
            txt = html2text(txt).strip()

            m = re.search(r'(\d+), ', li.xpath('.//span[@class="timestamp"]//script')[0].text)
            assert m
            date = local2utc(datetime.fromtimestamp(int(m.group(1))))

            id_from = li.find('a').attrib['href'].split('/')[-1].split('?')[0]

            mails['messages'].append({
                'date' : date,
                'message' : unicode(txt),
                'id_from' : unicode(id_from),
            })

        return mails
Пример #13
0
    def parse(self):
        self.url = '%s#%s' % (self.preurl, self.div.attrib['id'])
        self.title = unicode(self.div.find('h2').xpath('.//a[has-class("title")]')[0].text)
        try:
            a = self.div.find('p').xpath('.//a[@rel="author"]')[0]
        except IndexError:
            self.author = 'Anonyme'
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib['href'].split('/')[2])
        self.date = datetime.strptime(self.div.find('p').xpath('.//time')[0].attrib['datetime'].split('+')[0],
                                      '%Y-%m-%dT%H:%M:%S')
        self.date = local2utc(self.date)

        content = self.div.find('div')
        try:
            signature = content.xpath('.//p[has-class("signature")]')[0]
        except IndexError:
            # No signature.
            pass
        else:
            content.remove(signature)
            self.signature = lxml.html.tostring(signature).decode('utf-8')
        self.body = lxml.html.tostring(content).decode('utf-8')

        self.score = int(self.div.find('p').xpath('.//span[has-class("score")]')[0].text)
        forms = self.div.find('footer').xpath('.//form[has-class("button_to")]')
        if len(forms) > 0:
            self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
            self.relevance_token = forms[0].xpath('.//input[@name="authenticity_token"]')[0].attrib['value']
Пример #14
0
 def iter_threads(self):
     for thread in self.browser.get_threads():
         t = Thread(thread['id'])
         t.flags = Thread.IS_DISCUSSION
         for user in thread['participants']:
             if user['user']['id'] != self.browser.my_id:
                 t.title = u'Discussion with %s' % user['user']['display_name']
         t.date = local2utc(parse_date(thread['modification_date']))
         yield t
Пример #15
0
def parse_date(s):
    s = s.replace(u'Fév', 'Feb') \
         .replace(u'Avr', 'Apr') \
         .replace(u'Mai', 'May') \
         .replace(u'Juin', 'Jun') \
         .replace(u'Juil', 'Jul') \
         .replace(u'Aoû', 'Aug') \
         .replace(u'Ao\xfbt', 'Aug') \
         .replace(u'Déc', 'Dec')
    return local2utc(_parse_dt(s))
Пример #16
0
 def iter_threads(self):
     for thread in self.browser.get_threads():
         t = Thread(thread['id'])
         t.flags = Thread.IS_DISCUSSION
         for user in thread['participants']:
             if user['user']['id'] != self.browser.my_id:
                 t.title = u'Discussion with %s' % user['user'][
                     'display_name']
         t.date = local2utc(parse_date(thread['modification_date']))
         yield t
Пример #17
0
    def build_request(self, url, *args, **kwargs):
        headers = kwargs.setdefault('headers', {})
        if 'applications' not in url:
            today = local2utc(datetime.now()).strftime('%Y-%m-%d')
            token = sha256((self.username + self.APITOKEN + today).encode('utf-8')).hexdigest()

            headers['Authorization'] = 'Basic %s' % (b64encode(b'%s:%s' % (self.username.encode('utf-8'), self.password.encode('utf-8')))).decode('utf-8')
            headers['X-Platform'] = 'android'
            headers['X-Client-Version'] = self.APIVERSION
            headers['X-AUM-Token'] = token

        return super(AuMBrowser, self).build_request(url, *args, **kwargs)
Пример #18
0
    def get_thread(self, thread):
        if not isinstance(thread, Thread):
            thread = Thread(thread)
            thread.flags = Thread.IS_DISCUSSION

        user = self.browser.get_user(thread.id)
        thread.title = u'Discussion with %s' % user['name']

        contact = self.storage.get('contacts', thread.id, default={'lastmsg': 0})

        signature = u'Age: %s' % user['age']
        signature += u'\nLast online: %s' % user['last_online']
        signature += u'\nPhotos:\n\t%s' % '\n\t'.join([user['photo_host'] + photo['large'] for photo in user['photos']])

        child = None

        for msg in self.browser.get_thread_messages(thread.id):
            flags = 0
            if int(contact['lastmsg']) < msg['utc_timestamp']:
                flags = Message.IS_UNREAD

            if msg['type'] == 'msg':
                content = unicode(msg['msg'])
            elif msg['type'] == 'new_challenge':
                content = u'A new challenge has been proposed!'
            elif msg['type'] == 'serie':
                content = u"I've played"
            elif msg['type'] == 'end_game':
                content = u'%s is the winner! (%s VS %s)' % (self.browser.my_name if msg['score']['w'] == self.browser.my_id else user['name'], msg['score']['s'][0], msg['score']['s'][1])
            else:
                content = u'Unknown action: %s' % msg['type']

            msg = Message(thread=thread,
                          id=msg['utc_timestamp'],
                          title=thread.title,
                          sender=unicode(self.browser.my_name if msg['from'] == self.browser.my_id else user['name']),
                          receivers=[unicode(self.browser.my_name if msg['from'] != self.browser.my_id else user['name'])],
                          date=local2utc(datetime.datetime.fromtimestamp(msg['utc_timestamp'])),
                          content=content,
                          children=[],
                          parent=None,
                          signature=signature if msg['from'] != self.browser.my_id else u'',
                          flags=flags)

            if child:
                msg.children.append(child)
                child.parent = msg
            child = msg
        thread.root = child

        return thread
Пример #19
0
    def get_thread(self, thread):
        if not isinstance(thread, Thread):
            thread = Thread(thread)
            thread.flags = Thread.IS_DISCUSSION

        user = self.browser.get_user(thread.id)
        thread.title = u'Discussion with %s' % user['name']

        contact = self.storage.get('contacts', thread.id, default={'lastmsg': 0})

        signature = u'Age: %s' % user['age']
        signature += u'\nLast online: %s' % user['last_online']
        signature += u'\nPhotos:\n\t%s' % '\n\t'.join([user['photo_host'] + photo['large'] for photo in user['photos']])

        child = None

        for msg in self.browser.get_thread_messages(thread.id):
            flags = 0
            if int(contact['lastmsg']) < msg['utc_timestamp']:
                flags = Message.IS_UNREAD

            if msg['type'] == 'msg':
                content = unicode(msg['msg'])
            elif msg['type'] == 'new_challenge':
                content = u'A new challenge has been proposed!'
            elif msg['type'] == 'serie':
                content = u"I've played"
            elif msg['type'] == 'end_game':
                content = u'%s is the winner! (%s VS %s)' % (self.browser.my_name if msg['score']['w'] == self.browser.my_id else user['name'], msg['score']['s'][0], msg['score']['s'][1])
            else:
                content = u'Unknown action: %s' % msg['type']

            msg = Message(thread=thread,
                          id=msg['utc_timestamp'],
                          title=thread.title,
                          sender=unicode(self.browser.my_name if msg['from'] == self.browser.my_id else user['name']),
                          receivers=[unicode(self.browser.my_name if msg['from'] != self.browser.my_id else user['name'])],
                          date=local2utc(datetime.datetime.fromtimestamp(msg['utc_timestamp'])),
                          content=content,
                          children=[],
                          parent=None,
                          signature=signature if msg['from'] != self.browser.my_id else u'',
                          flags=flags)

            if child:
                msg.children.append(child)
                child.parent = msg
            child = msg
        thread.root = child

        return thread
Пример #20
0
    def get_thread(self, thread):
        if not isinstance(thread, Thread):
            thread = Thread(thread)
            thread.flags = Thread.IS_DISCUSSION

        info = self.browser.get_thread(thread.id)
        for user in info['participants']:
            if user['user']['id'] == self.browser.my_id:
                me = HappnContact(user['user'])
            else:
                other = HappnContact(user['user'])

        thread.title = u'Discussion with %s' % other.name

        contact = self.storage.get('contacts',
                                   thread.id,
                                   default={'lastmsg': 0})

        child = None

        for msg in info['messages']:
            flags = 0
            if int(contact['lastmsg']) < int(msg['id']):
                flags = Message.IS_UNREAD

            if msg['sender']['id'] == me.id:
                sender = me
                receiver = other
            else:
                sender = other
                receiver = me

            msg = Message(thread=thread,
                          id=msg['id'],
                          title=thread.title,
                          sender=sender.name,
                          receivers=[receiver.name],
                          date=local2utc(parse_date(msg['creation_date'])),
                          content=msg['message'],
                          children=[],
                          parent=None,
                          signature=sender.get_text(),
                          flags=flags)

            if child:
                msg.children.append(child)
                child.parent = msg
            child = msg
        thread.root = child

        return thread
Пример #21
0
def parse_dt(s):
    now = datetime.datetime.now()
    if s is None:
        return local2utc(now)
    if 'minutes ago' in s:
        m = int(s.split()[0])
        d = now - datetime.timedelta(minutes=m)
    elif u'–' in s:
        # Date in form : "Yesterday – 20:45"
        day, hour = s.split(u'–')
        day = day.strip()
        hour = hour.strip()
        if day == 'Yesterday':
            d = now - datetime.timedelta(days=1)
        elif day == 'Today':
            d = now
        hour = _parse_dt(hour)
        d = datetime.datetime(d.year, d.month, d.day, hour.hour, hour.minute)
    else:
        #if ',' in s:
        # Date in form : "Dec 28, 2011")
        d = _parse_dt(s)
    return local2utc(d)
Пример #22
0
def parse_dt(s):
    now = datetime.datetime.now()
    if s is None:
        return local2utc(now)
    if 'minutes ago' in s:
        m = int(s.split()[0])
        d = now - datetime.timedelta(minutes=m)
    elif u'–' in s:
        # Date in form : "Yesterday – 20:45"
        day, hour = s.split(u'–')
        day = day.strip()
        hour = hour.strip()
        if day == 'Yesterday':
            d = now - datetime.timedelta(days=1)
        elif day == 'Today':
            d = now
        hour = _parse_dt(hour)
        d = datetime.datetime(d.year, d.month, d.day, hour.hour, hour.minute)
    else:
        #if ',' in s:
        # Date in form : "Dec 28, 2011")
        d = _parse_dt(s)
    return local2utc(d)
Пример #23
0
    def get_thread(self, thread):
        if not isinstance(thread, Thread):
            thread = Thread(thread)
            thread.flags = Thread.IS_DISCUSSION

        info = self.browser.get_thread(thread.id)
        for user in info['participants']:
            if user['user']['id'] == self.browser.my_id:
                me = HappnContact(user['user'])
            else:
                other = HappnContact(user['user'])

        thread.title = u'Discussion with %s' % other.name

        contact = self.storage.get('contacts', thread.id, default={'lastmsg': 0})

        child = None

        for msg in info['messages']:
            flags = 0
            if int(contact['lastmsg']) < int(msg['id']):
                flags = Message.IS_UNREAD

            if msg['sender']['id'] == me.id:
                sender = me
                receiver = other
            else:
                sender = other
                receiver = me

            msg = Message(thread=thread,
                          id=msg['id'],
                          title=thread.title,
                          sender=sender.name,
                          receivers=[receiver.name],
                          date=local2utc(parse_date(msg['creation_date'])),
                          content=msg['message'],
                          children=[],
                          parent=None,
                          signature=sender.get_text(),
                          flags=flags)

            if child:
                msg.children.append(child)
                child.parent = msg
            child = msg
        thread.root = child

        return thread
Пример #24
0
    def __init__(self, browser, url, tree):
        super(Article, self).__init__(browser)
        self.url = url
        self.id = url2id(self.url)

        if tree is None:
            return

        header = tree.find('header')
        self.title = u' — '.join(
            [a.text for a in header.find('h1').xpath('.//a')])
        try:
            a = header.xpath('.//a[@rel="author"]')[0]
        except IndexError:
            self.author = 'Anonyme'
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib['href'].split('/')[2])
        self.body = lxml.html.tostring(
            tree.xpath('.//div[has-class("content")]')[0]).decode('utf-8')
        try:
            self.date = datetime.strptime(
                header.xpath('.//time')[0].attrib['datetime'].split('+')[0],
                '%Y-%m-%dT%H:%M:%S')
            self.date = local2utc(self.date)
        except IndexError:
            pass
        for form in tree.find('footer').xpath(
                '//form[has-class("button_to")]'):
            if form.attrib['action'].endswith('/for'):
                self.relevance_url = form.attrib['action'].rstrip(
                    'for').rstrip('against')
                self.relevance_token = form.xpath(
                    './/input[@name="authenticity_token"]')[0].attrib['value']

        self.score = int(
            tree.xpath(
                './/div[has-class("figures")]//figure[has-class("score")]')
            [0].text)
Пример #25
0
    def get_thread_mails(self):
        mails = {
            'member': {},
            'messages': [],
        }

        try:
            mails['member']['pseudo'] = self.parser.tocleanstring(
                self.document.getroot().cssselect(
                    '#message_heading div.username span.name')[0])
        except IndexError:
            mails['member']['pseudo'] = 'Unknown'

        for li in reversed(
                self.document.xpath(
                    '//ul[@id="thread"]//li[contains(@id, "message_")]')):
            try:
                txt = self.parser.tostring(
                    li.xpath('.//div[@class="message_body"]')[0])
            except IndexError:
                continue  # 'Match' message
            txt = html2text(txt).strip()

            m = re.search(
                r'(\d+), ',
                li.xpath('.//span[@class="timestamp"]//script')[0].text)
            assert m
            date = local2utc(datetime.fromtimestamp(int(m.group(1))))

            id_from = li.find('a').attrib['href'].split('/')[-1].split('?')[0]

            mails['messages'].append({
                'date': date,
                'message': unicode(txt),
                'id_from': unicode(id_from),
            })

        return mails
Пример #26
0
    def parse(self):
        self.url = '%s#%s' % (self.preurl, self.div.attrib['id'])
        self.title = unicode(
            self.div.find('h2').xpath('.//a[has-class("title")]')[0].text)
        try:
            a = self.div.find('p').xpath('.//a[@rel="author"]')[0]
        except IndexError:
            self.author = 'Anonyme'
            self.username = None
        else:
            self.author = unicode(a.text)
            self.username = unicode(a.attrib['href'].split('/')[2])
        self.date = datetime.strptime(
            self.div.find('p').xpath('.//time')[0].attrib['datetime'].split(
                '+')[0], '%Y-%m-%dT%H:%M:%S')
        self.date = local2utc(self.date)

        content = self.div.find('div')
        try:
            signature = content.xpath('.//p[has-class("signature")]')[0]
        except IndexError:
            # No signature.
            pass
        else:
            content.remove(signature)
            self.signature = lxml.html.tostring(signature).decode('utf-8')
        self.body = lxml.html.tostring(content).decode('utf-8')

        self.score = int(
            self.div.find('p').xpath('.//span[has-class("score")]')[0].text)
        forms = self.div.find('footer').xpath(
            './/form[has-class("button_to")]')
        if len(forms) > 0:
            self.relevance_url = forms[0].attrib['action'].rstrip(
                'for').rstrip('against')
            self.relevance_token = forms[0].xpath(
                './/input[@name="authenticity_token"]')[0].attrib['value']
Пример #27
0
def parse_dt(s):
    d = _parse_dt(s)
    return local2utc(d)
Пример #28
0
def parse_dt(s):
    d = _parse_dt(s)
    return local2utc(d)