Python unescape_html示例，xml.sax.saxutils.unescape_html Python示例

示例#1

0

显示文件

文件： reddittweeter.py 项目： DigDug101/reddittweeter

def comment_tokens(data):
    link = 'on http://redd.it/%s' % data['link_id'].split('_')[1]
    tokens = [
        '"%s"' % unescape_html(data['body']),
        ', commented by %s' % data['author'],
    ]
    return link, tokens

示例#2

0

显示文件

文件： reddittweeter.py 项目： carriercomm/reddittweeter

def link_tokens(data):
    link = 'http://redd.it/%s' % data['id']

    tokens = [ unescape_html(data['title']),
               ' [%s]' % data['subreddit'],
#               ' %d points' % data['score'],
               ' submitted by %s' % data['author'],
               ' [%s]' % data['domain'],
               ]
    return link, tokens

示例#3

0

显示文件

文件： reddittweeter.py 项目： DigDug101/reddittweeter

def link_tokens(data):
    link = 'http://redd.it/%s' % data['id']

    tokens = [
        unescape_html(data['title']),
        ' [%s]' % data['subreddit'],
        #               ' %d points' % data['score'],
        ' submitted by %s' % data['author'],
        ' [%s]' % data['domain'],
    ]
    return link, tokens

示例#4

0

显示文件

文件： redditexporter.py 项目： kdeenanauth/redditexporter

def main(sourceurl, username=None, password=None):
    """
    Given a reddit JSON url, yield unicode strings that represent the
    exported HTML
    """
    yield header_template % dict(exported_url=escape_html(sourceurl))

    cookie = None

    if username and password:
        cookie = login(username, password)

    for link in get_links(sourceurl, cookie):
        data = link["data"]

        if link["kind"] == "t3":
            # links

            template_data = dict(
                id=escape_html(data["id"]),
                url=escape_html(data["url"]),
                title=escape_html(data["title"]),
                domain=escape_html(data["domain"]),
                author=escape_html(data["author"]),
                subreddit=escape_html(data["subreddit"]),
                score=int(data["score"]),
                num_comments=int(data["num_comments"]),
                date=time.ctime(data["created_utc"]),
                selftext_template="",
            )

            if data.get("is_self", False):
                # This is the only way to tell if it's a self-post from
                # the API :-/
                template_data["domain_link"] = "http://www.reddit.com/r/%(subreddit)s" % dict(
                    subreddit=escape_html(data["subreddit"])
                )
            else:
                template_data["domain_link"] = "http://www.reddit.com/domain/%(domain)s" % dict(
                    domain=escape_html(data["domain"])
                )

            if data.get("selftext_html"):
                selftext = selftext_template % unescape_html(data["selftext_html"])
                template_data["selftext_template"] = selftext

            yield link_template % template_data

        elif link["kind"] == "t1":
            # comments

            template_data = dict(
                body=unescape_html(data["body_html"]),
                author=escape_html(data["author"]),
                subreddit=escape_html(data["subreddit"]),
                id=escape_html(data["id"]),
                score=int(data["ups"]) - int(data["downs"]),
                date=time.ctime(data["created_utc"]),
                link_id36=escape_html(data["link_id"].split("_")[1]),
            )

            yield comment_template % template_data

        else:
            raise TypeError("I don't know how to decode %r" % link)

    yield footer_template

示例#5

0

显示文件

文件： reddittweeter.py 项目： carriercomm/reddittweeter

def comment_tokens(data):
    link = 'on http://redd.it/%s' % data['link_id'].split('_')[1]
    tokens = [ '"%s"' % unescape_html(data['body']),
               ', commented by %s' % data['author'],
               ]
    return link, tokens

示例#6

0

显示文件

文件： twitter.py 项目： cboylan/karmabot

 def get_last_tweet(self):
     return unescape_html(self.get_info()[0]["text"])

示例#7

0

显示文件

文件： session_command.py 项目： kubicek/fred-client

    def create_eppdoc(self, command):
        "Dispatch command line from user and set internal variables or create EPP document."
        command_name = ''
        command_params = ''
        stop = 0
        self.reset_round()
        if self._session[ESCAPED_INPUT]:
            command = unescape_html(command)
        cmd = EPP_command = session_command = command.strip()
        match = re.match('!+\s+(.+)', cmd)
        if match: cmd = '!%s' % match.group(1) # removing whitespaces after exclamation
        # Possigility type hyphen instead of the spacing underscore:
        m = re.match('(\S+)(.*)', cmd)
        if m:
            # EPP_command = '%s%s'%(m.group(1).replace('-','_'), m.group(2)) OBSOLETE
            EPP_command = '%s%s' % (m.group(1), m.group(2))
            session_command = m.group(1)
            command_params = m.group(2)
        if cmd == '!':
            self.append_note(_T('ERROR: Missing command to start interactive input'))
            self.append_note(_T("Type 'help !' for more information."))
            return command_name, self._raw_cmd, 0
        # help
        help, help_item = None, None
        m = re.match('(h(?:elp)?)(?:$|\s+(\S+))', cmd)
        if m:
            help, help_item = m.groups()
        else:
            m = re.match('(\?)(?:$|\s*(\S+))', cmd)
            if m: help, help_item = m.groups()
        # Resolve three command types: 1. help; 2. session command; 3. EPP command
        if help:
            # 1. help
            #if len(cmd.split()) > 2:
            #    self.append_error(_T('Help command can have only one parameter'))
            #    self._notes_afrer_errors.append(_T("Type 'help ?' to get more information about help usage."))
            #else:
            #    self.display_help(help_item, command)
            self.display_help(help_item, command)
        elif session_command in self._pattern_session_commands:
            # 2. Session commands
##            for names,func,req,p,e,x in self._session_commands:
            for names, func, p, e, x in self._session_commands:
                if session_command in names:
                    if func:
                        name = func(command_params.strip())
                        if name is not None: command_name = name
        elif session_command in self._hidden_commands:
            # undocumented hidden commands for TEST purpose
            if session_command == 'answer-cols':
                self.__session_translate_column_names__(command_params.strip())
            elif session_command == 'hidden':
                self.append_note('%s: ${BOLD}%s${NORMAL}' % (_T('Hidden commands are'), ', '.join(self._hidden_commands)))
        else:
            # 3. EPP commands
            cmd = EPP_command
            if type(cmd) != unicode:
                try:
                    cmd = unicode(cmd, translate.encoding)
                except UnicodeDecodeError, msg:
                    self.append_error('UnicodeDecodeError: %s' % msg)
                    cmd = unicode(repr(cmd), translate.encoding)
            command_name, stop = self.epp_command(cmd, command)
            if command_name != 'q': # User press Ctrl+C or Ctrl+D in interactive mode.
                self._raw_cmd = self._epp_cmd.get_xml()
                self.append_error(self._epp_cmd.fetch_errors()) # any problems on the command line occurrs

示例#8

0

显示文件

文件： session_command.py 项目： CZ-NIC/fred-client

 def create_eppdoc(self, command):
     "Dispatch command line from user and set internal variables or create EPP document."
     command_name = ''
     command_params = ''
     stop = 0
     self.reset_round()
     if self._session[ESCAPED_INPUT]:
         command = unescape_html(command)
     cmd = EPP_command = session_command = command.strip()
     match = re.match('!+\s+(.+)', cmd)
     if match:
         cmd = '!%s' % match.group(
             1)  # removing whitespaces after exclamation
     # Possigility type hyphen instead of the spacing underscore:
     m = re.match('(\S+)(.*)', cmd)
     if m:
         # EPP_command = '%s%s'%(m.group(1).replace('-','_'), m.group(2)) OBSOLETE
         EPP_command = '%s%s' % (m.group(1), m.group(2))
         session_command = m.group(1)
         command_params = m.group(2)
     if cmd == '!':
         self.append_note(
             _T('ERROR: Missing command to start interactive input'))
         self.append_note(_T("Type 'help !' for more information."))
         return command_name, self._raw_cmd, 0
     # help
     help, help_item = None, None
     m = re.match('(h(?:elp)?)(?:$|\s+(\S+))', cmd)
     if m:
         help, help_item = m.groups()
     else:
         m = re.match('(\?)(?:$|\s*(\S+))', cmd)
         if m: help, help_item = m.groups()
     # Resolve three command types: 1. help; 2. session command; 3. EPP command
     if help:
         # 1. help
         #if len(cmd.split()) > 2:
         #    self.append_error(_T('Help command can have only one parameter'))
         #    self._notes_afrer_errors.append(_T("Type 'help ?' to get more information about help usage."))
         #else:
         #    self.display_help(help_item, command)
         self.display_help(help_item, command)
     elif session_command in self._pattern_session_commands:
         # 2. Session commands
         ##            for names,func,req,p,e,x in self._session_commands:
         for names, func, p, e, x in self._session_commands:
             if session_command in names:
                 if func:
                     name = func(command_params.strip())
                     if name is not None: command_name = name
     elif session_command in self._hidden_commands:
         # undocumented hidden commands for TEST purpose
         if session_command == 'answer-cols':
             self.__session_translate_column_names__(command_params.strip())
         elif session_command == 'hidden':
             self.append_note('%s: ${BOLD}%s${NORMAL}' %
                              (_T('Hidden commands are'), ', '.join(
                                  self._hidden_commands)))
     else:
         # 3. EPP commands
         cmd = EPP_command
         command_name, stop = self.epp_command(cmd, command)
         if command_name != 'q':  # User press Ctrl+C or Ctrl+D in interactive mode.
             self._raw_cmd = self._epp_cmd.get_xml()
             self.append_error(self._epp_cmd.fetch_errors()
                               )  # any problems on the command line occurrs
     return command_name, self._raw_cmd, stop

示例#9

0

显示文件

def main(sourceurl, username=None, password=None):
    '''
    Given a reddit JSON url, yield unicode strings that represent the
    exported HTML
    '''
    yield header_template % dict(exported_url=escape_html(sourceurl))

    cookie = None

    if username and password:
        cookie = login(username, password)

    for link in get_links(sourceurl, cookie):
        data = link['data']

        if link['kind'] == 't3':
            # links

            template_data = dict(
                id=escape_html(data['id']),
                url=escape_html(data['url']),
                title=escape_html(data['title']),
                domain=escape_html(data['domain']),
                author=escape_html(data['author']),
                subreddit=escape_html(data['subreddit']),
                score=int(data['score']),
                num_comments=int(data['num_comments']),
                date=time.ctime(data['created_utc']),
                selftext_template='',
            )

            if data.get('is_self', False):
                # This is the only way to tell if it's a self-post from
                # the API :-/
                template_data['domain_link'] = (
                    'http://www.reddit.com/r/%(subreddit)s' %
                    dict(subreddit=escape_html(data['subreddit'])))
            else:
                template_data['domain_link'] = (
                    'http://www.reddit.com/domain/%(domain)s' %
                    dict(domain=escape_html(data['domain'])))

            if data.get('selftext_html'):
                selftext = selftext_template % unescape_html(
                    data['selftext_html'])
                template_data['selftext_template'] = selftext

            yield link_template % template_data

        elif link['kind'] == 't1':
            # comments

            template_data = dict(
                body=unescape_html(data['body_html']),
                author=escape_html(data['author']),
                subreddit=escape_html(data['subreddit']),
                id=escape_html(data['id']),
                score=int(data['ups']) - int(data['downs']),
                date=time.ctime(data['created_utc']),
                link_id36=escape_html(data['link_id'].split('_')[1]),
            )

            yield comment_template % template_data

        else:
            raise TypeError("I don't know how to decode %r" % link)

    yield footer_template

示例#10

0

显示文件

 def get_last_tweet(self):
     return unescape_html(self.get_info()[0]["text"])

示例#11

0

显示文件

文件： redditexporter.py 项目： Hmaal/redditexporter

def main(sourceurl, username = None, password = None):
    '''
    Given a reddit JSON url, yield unicode strings that represent the
    exported HTML
    '''
    yield header_template % dict(exported_url = escape_html(sourceurl))

    cookie = None

    if username and password:
        cookie = login(username, password)

    for link in get_links(sourceurl, cookie):
        data = link['data']

        if link['kind'] == 't3':
            # links

            template_data = dict(
                id = escape_html(data['id']),
                url = escape_html(data['url']),
                title = escape_html(data['title']),
                domain = escape_html(data['domain']),
                author = escape_html(data['author']),
                subreddit = escape_html(data['subreddit']),
                score = int(data['score']),
                num_comments = int(data['num_comments']),
                date = time.ctime(data['created_utc']),
                selftext_template = '',
                )

            if data.get('is_self', False):
                # This is the only way to tell if it's a self-post from
                # the API :-/
                template_data['domain_link'] = (
                    'http://www.reddit.com/r/%(subreddit)s'
                    % dict(subreddit = escape_html(data['subreddit'])))
            else:
                template_data['domain_link'] = (
                    'http://www.reddit.com/domain/%(domain)s'
                    % dict(domain = escape_html(data['domain'])))

            if data.get('selftext_html'):
                selftext = selftext_template % unescape_html(data['selftext_html'])
                template_data['selftext_template'] = selftext

            yield link_template % template_data

        elif link['kind'] == 't1':
            # comments

            template_data = dict(body = unescape_html(data['body_html']),
                                 author = escape_html(data['author']),
                                 subreddit = escape_html(data['subreddit']),
                                 id = escape_html(data['id']),
                                 score = int(data['ups']) - int(data['downs']),
                                 date = time.ctime(data['created_utc']),
                                 link_id36 = escape_html(data['link_id'].split('_')[1]),
                                 )

            yield comment_template % template_data

        else:
            raise TypeError("I don't know how to decode %r" % link)

    yield footer_template