Exemplos de xhtml_unescape em Python, exemplos de tornado.escape.xhtml_unescape em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: websocket_handlers.py Projeto: staspoberezhnik/websocket

    async def on_message(self, message):
        data = json_decode(message)
        send_private_chat_message(message=message, receiver=data['user'])
        receiver_connection = private_chat_ws_connections.get(self.receiver)

        if receiver_connection and receiver_connection.receiver == data['user']:
            if receiver_connection.receiver == data['user']:
                send_private_chat_message(message=message, receiver=data['send_to'])

        await self.insert(
            table='chat',
            params=dict(
                sender=data['user'],
                reciever=data['send_to'],
                message=data['message'],
                date_created=datetime.datetime.now())
        )
        unread_private_message = await self.get_unread_messages(xhtml_unescape(data['send_to']))
        if data['send_to'] in group_chat_ws_connections.keys():
            send_group_chat_message(message=unread_private_message[0]['count'],
                                    message_type='unreaded',
                                    receiver=xhtml_unescape(data['send_to'])
                                    )
        if data['send_to'] in private_chat_ws_connections.keys():
            send_private_chat_message(message=unread_private_message[0]['count'],
                                      message_type='unreaded',
                                      receiver=xhtml_unescape(data['send_to'])
                                      )

Exemplo n.º 2

0

Exibir arquivo

Arquivo: sharejs_to_ghost.py Projeto: sjl421/pyhome

def replace_post(post_data):
    d = {
        "id": 5,
        "title":        "my blog post title",
        "slug":         "my-blog-post-title",
        "markdown":     "the *markdown* formatted post body",
        #"html":         "the <i>html</i> formatted post body",
        "image":        None,
        "featured":     0,
        "page":         0,
        "status":       "published",
        "language":     "zh_CN",
        "meta_title":   None,
        "meta_description": None,
        "author_id":    1,
        "created_at":   cur_timestamp(),
        "created_by":   1,
        "updated_at":   cur_timestamp(),
        "updated_by":   1,
        "published_at": cur_timestamp(),
        "published_by": 1
    }
    d['id'] = int(post_data['source_url'].rsplit('/', 1)[1].split('.')[0])
    d['title'] = post_data['title'].strip()
    d['slug'] = post_data['title'].lower().strip()
    d['markdown'] = xhtml_unescape(post_data['content'].strip())    # unescape
    return d

Exemplo n.º 3

0

Exibir arquivo

Arquivo: parse_cases.py Projeto: 00zhengfu00/viewfinder

def parse_cases(filename):
  """Parses the fogbugz data in the file.

  Returns a list of (subject, assigned_to, body) tuples.
  """
  results = []

  tree = ElementTree.parse(filename)

  for case in tree.find('cases').findall('case'):
    subject = 'FB%s: %s' % (case.get('ixBug'), case.findtext('sTitle'))
    body = []
    assigned_to = case.findtext('sPersonAssignedTo')
    body.append('Assigned to: %s' % assigned_to)
    body.append('Project: %s' % case.findtext('sProject'))
    body.append('Area: %s' % case.findtext('sArea'))
    body.append('Priority: %s (%s)' % (case.findtext('ixPriority'), case.findtext('sPriority')))
    body.append('Category: %s' % case.findtext('sCategory'))
    body.append('')
    for event in case.find('events').findall('event'):
      body.append( '%s at %s' % (event.findtext('evtDescription'), event.findtext('dt')))
      if event.findtext('s'):
        body.append('')
        body.append(event.findtext('s'))
        body.append('')
      if event.find('rgAttachments') is not None:
        for attachment in event.find('rgAttachments').findall('attachment'):
          body.append('Attachment: %s' % escape.xhtml_unescape(attachment.findtext('sURL')))
    results.append((subject, USER_MAP[assigned_to], '\n'.join(body)))
  return results

Exemplo n.º 4

0

Exibir arquivo

def parse_cases(filename):
    """Parses the fogbugz data in the file.

  Returns a list of (subject, assigned_to, body) tuples.
  """
    results = []

    tree = ElementTree.parse(filename)

    for case in tree.find('cases').findall('case'):
        subject = 'FB%s: %s' % (case.get('ixBug'), case.findtext('sTitle'))
        body = []
        assigned_to = case.findtext('sPersonAssignedTo')
        body.append('Assigned to: %s' % assigned_to)
        body.append('Project: %s' % case.findtext('sProject'))
        body.append('Area: %s' % case.findtext('sArea'))
        body.append('Priority: %s (%s)' %
                    (case.findtext('ixPriority'), case.findtext('sPriority')))
        body.append('Category: %s' % case.findtext('sCategory'))
        body.append('')
        for event in case.find('events').findall('event'):
            body.append(
                '%s at %s' %
                (event.findtext('evtDescription'), event.findtext('dt')))
            if event.findtext('s'):
                body.append('')
                body.append(event.findtext('s'))
                body.append('')
            if event.find('rgAttachments') is not None:
                for attachment in event.find('rgAttachments').findall(
                        'attachment'):
                    body.append(
                        'Attachment: %s' %
                        escape.xhtml_unescape(attachment.findtext('sURL')))
        results.append((subject, USER_MAP[assigned_to], '\n'.join(body)))

Exemplo n.º 5

0

Exibir arquivo

Arquivo: emojipy.py Projeto: AyrtonStout/best-ever-chat

 def replace_ascii(match):
     ascii = text[match.start():match.end()]
     ascii = xhtml_unescape(ascii).encode('ascii', 'ignore').strip(
     )  # convert escaped HTML entities back to original chars
     if not ascii or ascii not in ascii_replace:
         return ascii
     return cls.convert(ascii_replace[ascii])

Exemplo n.º 6

0

Exibir arquivo

Arquivo: sharejs_to_ghost.py Projeto: PegasusWang/articles

def replace_post(post_data):
    d = {
        "id": 5,
        "title":        "my blog post title",
        "slug":         "my-blog-post-title",
        "markdown":     "the *markdown* formatted post body",
        #"html":         "the <i>html</i> formatted post body",
        "image":        None,
        "featured":     0,
        "page":         0,
        "status":       "published",
        "language":     "zh_CN",
        "meta_title":   None,
        "meta_description": None,
        "author_id":    1,
        "created_at":   cur_timestamp(),
        "created_by":   1,
        "updated_at":   cur_timestamp(),
        "updated_by":   1,
        "published_at": cur_timestamp(),
        "published_by": 1
    }
    d['id'] = int(post_data['source_url'].rsplit('/', 1)[1].split('.')[0])
    d['title'] = post_data['title'].strip()
    d['slug'] = post_data['title'].strip().replace(' ', '-').lower()
    d['markdown'] = xhtml_unescape(post_data['content'].strip())    # unescape
    return d

Exemplo n.º 7

0

Exibir arquivo

    def get_permanent_wechat_article_url(self, sougou_url):
        """ 从搜狗的临时url获取永久url

        Args:
            sougou_url (str): "http://mp.weixin.qq.com/s?timestamp=1473815432&src=3&ver=1&signature=puOtJfG0mefG5o6Ls-bqDmML9ZjS5S6oDIhdUReNRm6*bIF9yINfCoXvB3btXzPEeUZvV8bdlSRTgKPx5Nsd6ZfzLK4Gv4X6z7te1EEo2azG3llx*rw*fxqXrKnwP2oqTTrNYxaRzM8cARFIbjPHVLpWdZGqNhyxsKoK5ozlXSk="

        Returns:
            msg_link (str): "http://mp.weixin.qq.com/s?__biz=MzI1OTAwNDc1OA==&amp;mid=2652831837&amp;idx=1&amp;sn=3a93c0b6dfeef85e9b85bdac39f47bce&amp;chksm=f1942064c6e3a9728f0bdc4d9bab481b7079c7c1d9ed32397295b45d0b02af839dafcc4b093e#rd";

        """
        time.sleep(random.randint(1, 10))
        curl_str = """
        curl 'http://mp.weixin.qq.com/s?timestamp=1473815432&src=3&ver=1&signature=puOtJfG0mefG5o6Ls-bqDmML9ZjS5S6oDIhdUReNRm6*bIF9yINfCoXvB3btXzPEeUZvV8bdlSRTgKPx5Nsd6ZfzLK4Gv4X6z7te1EEo2azG3llx*rw*fxqXrKnwP2oqTTrNYxaRzM8cARFIbjPHVLpWdZGqNhyxsKoK5ozlXSk=' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' -H 'Connection: keep-alive' -H 'Accept-Encoding: gzip, deflate, sdch' -H 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' --compressed
        """
        _, headers, _ = parse_curl_str(curl_str)
        headers['User-Agent'] = random_ua()
        r = requests.get(sougou_url)
        html = r.text
        try:
            msg_link = xhtml_unescape(extract('msg_link = "', '";', html))
        except Exception:
            self.logger.exception(html)
            msg_link = sougou_url
        self.logger.info('get permanent url: %s', msg_link)
        return msg_link

Exemplo n.º 8

0

Exibir arquivo

Arquivo: normalizer.py Projeto: ItoTomoki/ruiternews

def normalize(sentence):
    """
    コメントを正規化する
    :param str sentence: 正規化するコメント
    :return: 正規化されたコメント
    :rtype: str
    """

    dst = escape.xhtml_unescape(sentence)

    if _re_escape.findall(dst):
        return ""

    # か゛(u'\u304b\u309b')" -> が(u'\u304b \u3099')
    #  -> が(u'\u304b\u3099') -> が(u'\u304c')
    # dst = unicodedata.normalize("NFKC", "".join(unicodedata.normalize("NFKC", dst).split()))
    dst = dst.lower()
    dst = "".join(dst.split())
    try:
        dst = _convert_marks(dst)
    except:
        print "convertError"
    dst = _re_remove.sub("", dst)
    dst = _delete_cyclic_word(dst)

    return dst

Exemplo n.º 9

0

Exibir arquivo

Arquivo: add_shows.py Projeto: EqUaTe/SickRage

    def searchIndexersForShowName(self, search_term, lang=None, indexer=None):
        self.set_header('Cache-Control', 'max-age=0,no-cache,no-store')
        self.set_header('Content-Type', 'application/json')
        if not lang or lang == 'null':
            lang = settings.INDEXER_DEFAULT_LANGUAGE

        search_term = xhtml_unescape(search_term)

        searchTerms = [search_term]

        # If search term ends with what looks like a year, enclose it in ()
        matches = re.match(r'^(.+ |)([12][0-9]{3})$', search_term)
        if matches:
            searchTerms.append("{0}({1})".format(matches.group(1),
                                                 matches.group(2)))

        for searchTerm in searchTerms:
            # If search term begins with an article, let's also search for it without
            matches = re.match(r'^(?:a|an|the) (.+)$', searchTerm, re.I)
            if matches:
                searchTerms.append(matches.group(1))

        results = {}
        final_results = []

        # Query Indexers for each search term and build the list of results
        for i, j in sickchill.indexer if not int(indexer) else [(int(indexer),
                                                                 None)]:
            logger.debug(
                "Searching for Show with searchterm(s): {0} on Indexer: {1}".
                format(searchTerms, 'theTVDB'))
            for searchTerm in searchTerms:
                # noinspection PyBroadException
                try:
                    indexerResults = sickchill.indexer[i].search(searchTerm,
                                                                 language=lang)
                except Exception:
                    logger.exception(traceback.format_exc())
                    continue

                # add search results
                results.setdefault(i, []).extend(indexerResults)

        for i, shows in results.items():
            # noinspection PyUnresolvedReferences
            final_results.extend({
                (sickchill.indexer.name(i), i, sickchill.indexer[i].show_url,
                 show['id'], show['seriesName'], show['firstAired'],
                 sickchill.tv.Show.find(settings.showList,
                                        show['id']) is not None)
                for show in shows
            })

        lang_id = sickchill.indexer.lang_dict()[lang]
        return json.dumps({
            'results': final_results,
            'langid': lang_id,
            'success': len(final_results) > 0
        })

Exemplo n.º 10

0

Exibir arquivo

    def index(self, path='', includeFiles=False, fileTypes=''):  # pylint: disable=arguments-differ

        self.set_header(b'Cache-Control', 'max-age=0,no-cache,no-store')
        self.set_header(b'Content-Type', 'application/json')

        return json.dumps(
            foldersAtPath(xhtml_unescape(path), True, bool(int(includeFiles)),
                          fileTypes.split(',')))

Exemplo n.º 11

0

Exibir arquivo

    def complete(self, term, includeFiles=False, fileTypes=''):

        self.set_header('Cache-Control', 'max-age=0,no-cache,no-store')
        self.set_header('Content-Type', 'application/json')
        paths = [entry['path'] for entry in foldersAtPath(os.path.dirname(xhtml_unescape(term)), includeFiles=bool(int(includeFiles)),
                                                          fileTypes=fileTypes.split(','))
                 if 'path' in entry]

        return json.dumps(paths)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: room_handler.py Projeto: TASKIH/Brain_Hacker

    def edit_card(self, message):
        id = message['data']['id']
        text = xhtml_unescape(message['data']['value'].strip())
        clean_data = {'value': text, 'id': id}

        message_out = self.generate_message('editCard', clean_data)
        self.broadcast(message_out)
        room_id = self.rooms.get_room_id(self)
        self.cards.update_text(room_id, card_id=id, text=xhtml_escape(text))

Exemplo n.º 13

0

Exibir arquivo

Arquivo: room_handler.py Projeto: Hironsan/Brain_Hacker

    def edit_card(self, message):
        id = message['data']['id']
        text = xhtml_unescape(message['data']['value'].strip())
        clean_data = {'value': text, 'id': id}

        message_out = self.generate_message('editCard', clean_data)
        self.broadcast(message_out)
        room_id = self.rooms.get_room_id(self)
        self.cards.update_text(room_id, card_id=id, text=xhtml_escape(text))

Exemplo n.º 14

0

Exibir arquivo

Arquivo: post_processing.py Projeto: xottl/SickChill

    def processEpisode(
        self,
        proc_dir=None,
        nzbName=None,
        quiet=None,
        process_method=None,
        force=None,
        is_priority=None,
        delete_on="0",
        failed="0",
        proc_type="manual",
        force_next=False,
        *args_,
        **kwargs,
    ):

        mode = kwargs.get("type", proc_type)
        process_path = xhtml_unescape(kwargs.get("dir", proc_dir or "") or "")
        if not process_path:
            return self.redirect("/home/postprocess/")

        release_name = xhtml_unescape(nzbName) if nzbName else nzbName

        result = settings.postProcessorTaskScheduler.action.add_item(
            process_path,
            release_name,
            method=process_method,
            force=force,
            is_priority=is_priority,
            delete=delete_on,
            failed=failed,
            mode=mode,
            force_next=force_next,
        )

        if config.checkbox_to_value(quiet):
            return result

        if result:
            result = result.replace("\n", "<br>\n")

        return self._genericMessage("Postprocessing results", result)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: websocket_handlers.py Projeto: staspoberezhnik/websocket

 async def on_message(self, message):
     send_group_chat_message(message=message)
     data = json_decode(message)
     await self.insert(
         table='chat',
         params=dict(
             sender=xhtml_unescape(data['user']),
             message=data['message'],
             date_created=datetime.datetime.now(),
         )
     )

Exemplo n.º 16

0

Exibir arquivo

Arquivo: browser.py Projeto: xottl/SickChill

    def complete(self, term, includeFiles=False, fileTypes=""):

        self.set_header("Cache-Control", "max-age=0,no-cache,no-store")
        self.set_header("Content-Type", "application/json")
        paths = [
            entry["path"]
            for entry in foldersAtPath(os.path.dirname(xhtml_unescape(term)), includeFiles=bool(int(includeFiles)), fileTypes=fileTypes.split(","))
            if "path" in entry
        ]

        return json.dumps(paths)

Exemplo n.º 17

0

Exibir arquivo

    def ascii_to_unicode(cls, text):
        def replace_ascii(match):
            ascii = text[match.start():match.end()]
            ascii = ascii.encode('ascii', 'ignore').strip(
            )  # convert escaped HTML entities back to original chars
            if not ascii or ascii not in ascii_replace:
                return ascii
            return cls.convert(ascii_replace[ascii])

        text = xhtml_unescape(text)
        return re.sub(cls.ascii_compiled, replace_ascii, text)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: escape_test.py Projeto: tom2jack/pj-redis

 def test_xhtml_escape(self):
     tests = [
         ("<foo>", "&lt;foo&gt;"),
         (u("<foo>"), u("&lt;foo&gt;")),
         (b"<foo>", b"&lt;foo&gt;"),
         ("<>&\"", "&lt;&gt;&amp;&quot;"),
         ("&amp;", "&amp;amp;"),
     ]
     for unescaped, escaped in tests:
         self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
         self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))

Exemplo n.º 19

0

Exibir arquivo

Arquivo: escape_test.py Projeto: Liu0330/zufang

 def test_xhtml_unescape_numeric(self):
     tests = [
         ('foo&#32;bar', 'foo bar'),
         ('foo&#x20;bar', 'foo bar'),
         ('foo&#X20;bar', 'foo bar'),
         ('foo&#xabc;bar', u'foo\u0abcbar'),
         ('foo&#xyz;bar', 'foo&#xyz;bar'),  # invalid encoding
         ('foo&#;bar', 'foo&#;bar'),  # invalid encoding
         ('foo&#x;bar', 'foo&#x;bar'),  # invalid encoding
     ]
     for escaped, unescaped in tests:
         self.assertEqual(unescaped, xhtml_unescape(escaped))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: escape_test.py Projeto: leeclemens/tornado

 def test_xhtml_unescape_numeric(self):
     tests = [
         ('foo&#32;bar', 'foo bar'),
         ('foo&#x20;bar', 'foo bar'),
         ('foo&#X20;bar', 'foo bar'),
         ('foo&#xabc;bar', u'foo\u0abcbar'),
         ('foo&#xyz;bar', 'foo&#xyz;bar'),  # invalid encoding
         ('foo&#;bar', 'foo&#;bar'),        # invalid encoding
         ('foo&#x;bar', 'foo&#x;bar'),      # invalid encoding
     ]
     for escaped, unescaped in tests:
         self.assertEqual(unescaped, xhtml_unescape(escaped))

Exemplo n.º 21

0

Exibir arquivo

Arquivo: escape_test.py Projeto: e1ven/Waymoot

    def test_xhtml_escape(self):
        tests = [
            ("<foo>", "&lt;foo&gt;"),
            ("<foo>", "&lt;foo&gt;"),
            (b("<foo>"), b("&lt;foo&gt;")),

            ("<>&\"", "&lt;&gt;&amp;&quot;"),
            ("&amp;", "&amp;amp;"),
            ]
        for unescaped, escaped in tests:
            self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
            self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))

Exemplo n.º 22

0

Exibir arquivo

Arquivo: meta_profile_algo0.py Projeto: syalovit/smartprofilespider

 def __init__(self, profiles):              
     linkedin_profile,twitter_profile,meetup_profile = None,None,None  
     for a_profile in profiles:
         if linkedin_profile is None:                    
             linkedin_profile = a_profile if LINKEDIN in a_profile['cluster'] else None
         if twitter_profile is None: 
             twitter_profile = a_profile if TWITTER in a_profile['cluster'] else None
         if meetup_profile is None:
             meetup_profile = a_profile if MEETUP in a_profile['cluster'] else None          
         
     
     self.name = linkedin_profile['entity']['firstName']+' '+linkedin_profile['entity']['lastName']        
     self.title = linkedin_profile['entity']['title']
     self.jobprofilesummary = linkedin_profile['entity']['profilesummary']
     self.work_interests = linkedin_profile['entity']['interests']
     
     if self.jobprofilesummary:
         self.jobprofilesummary = xhtml_unescape(self.jobprofilesummary)
     self.currentjob = linkedin_profile['entity']['current']
     self.currentjob = self.currentjob[0] if self.currentjob else None
     self.previous_jobs = linkedin_profile['entity']['previous'] or []
     self.education = linkedin_profile['entity']['education'] or []
     self.region = linkedin_profile['entity']['region']
     if twitter_profile:
         self.interests_and_hobbies = twitter_profile['entity']['profilesummary']
         if self.interests_and_hobbies:
             self.interests_and_hobbies = xhtml_unescape(self.interests_and_hobbies)
         else:
             self.interests_and_hobbies = ""
         self.current_tweets = [xhtml_unescape(x) for x in twitter_profile['entity']['tweets'] if x]
     else:
         self.interests_and_hobbies = ""
         self.current_tweets = []
     if meetup_profile:
         org_groups = [xhtml_unescape(x[0]) for x in meetup_profile['entity']['groups'] if x[1] == 'Organizer']
         memb_groups = [xhtml_unescape(x[0]) for x in meetup_profile['entity']['groups'] if x[1] == 'Member']
         self.currentgroups = org_groups + memb_groups
     else:
         self.currentgroups = []

Exemplo n.º 23

0

Exibir arquivo

Arquivo: escape_test.py Projeto: bdarnell/tornado

 def test_xhtml_escape(self):
     tests = [
         ("<foo>", "&lt;foo&gt;"),
         (u"<foo>", u"&lt;foo&gt;"),
         (b"<foo>", b"&lt;foo&gt;"),
         ("<>&\"'", "&lt;&gt;&amp;&quot;&#39;"),
         ("&amp;", "&amp;amp;"),
         (u"<\u00e9>", u"&lt;\u00e9&gt;"),
         (b"<\xc3\xa9>", b"&lt;\xc3\xa9&gt;"),
     ]  # type: List[Tuple[Union[str, bytes], Union[str, bytes]]]
     for unescaped, escaped in tests:
         self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
         self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))

Exemplo n.º 24

0

Exibir arquivo

Arquivo: escape_test.py Projeto: Liu0330/zufang

 def test_xhtml_escape(self):
     tests = [
         ("<foo>", "&lt;foo&gt;"),
         (u"<foo>", u"&lt;foo&gt;"),
         (b"<foo>", b"&lt;foo&gt;"),
         ("<>&\"'", "&lt;&gt;&amp;&quot;&#39;"),
         ("&amp;", "&amp;amp;"),
         (u"<\u00e9>", u"&lt;\u00e9&gt;"),
         (b"<\xc3\xa9>", b"&lt;\xc3\xa9&gt;"),
     ]
     for unescaped, escaped in tests:
         self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
         self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))

Exemplo n.º 25

0

Exibir arquivo

Arquivo: sg.py Projeto: PegasusWang/wechannel

 def _get_articel_info(self, article_info, nick_name, ori_create_time):
     for k, v in article_info.items():
         if isinstance(v, str):
             article_info[k] = xhtml_unescape(v)
     article_dict = {
         'cdn_url': article_info['cover'].replace('\\', ''),
         'title': article_info['title'],
         'nick_name': nick_name,
         'link': ('http://mp.weixin.qq.com' +
                  article_info['content_url'].replace('\\', '')),
         'ori_create_time': ori_create_time,
         'desc': article_info['digest'],
     }
     return article_dict

Exemplo n.º 26

0

Exibir arquivo

Arquivo: auth.py Projeto: henter/PBB

 def post(self):
     user = self.current_user
     for x in ('location','twitter','github','css','words'):
         user[x] = xhtml_unescape(self.get_argument(x,''))
         for x in set(html_killer.findall(user[x])):
             user[x] = user[x].replace(x,'')
     website = self.get_argument('website','')
     w = urlparse(website)
     if w[0] and w[1]:
         user['website'] = website
     else:
         user['website'] = ''
     self.db.users.save(user)
     self.redirect('/user/%s' % user['username'] )

Exemplo n.º 27

0

Exibir arquivo

Arquivo: tau.py Projeto: malcolmsharpe/websockettau

 def on_message(self, message_json):
   message = json.loads(message_json)
   if message['type'] == 'start':
     self.game.start_game()
   elif message['type'] == 'update':
     self.game.request_update(self)
   elif message['type'] == 'chat':
     self.game.add_chat(xhtml_unescape(message['name']), message['message'], "chat")
   elif message['type'] == 'pause':
     self.game.pause(message['pause'])
   elif message['type'] == 'submit':
     self.game.submit_tau(self, message['cards'])
   elif message['type'] == 'training_option':
     self.game.set_training_option(message['option'], message['value'])

Exemplo n.º 28

0

Exibir arquivo

Arquivo: AF_EditTool.py Projeto: deju/afw_old

def fun_article_new_src(user,
                        article_id='-1',
                        article_type='blog',
                        src_type='code',
                        title='',
                        body='',
                        source='',
                        code_type='python',
                        math_type='inline',
                        father_id='-1',
                        group_id='-1'):
    if article_type not in Article_Type:
        return [1, '不支持当前文章类型！']
    if src_type not in Agree_Src:
        return [1, '不支持当前类型的资源！']

    if title is None:
        return [1, '名称不能为空！']

    if body is None:
        if src_type != 'reference':
            return [1, '内容不能为空！']
        else:
            if re.search(r'^(http|https|ftp):\/\/.+$', source) is None:
                return [1, '请填写链接地址或者引用真实内容！']
            body = ''
    else:
        if src_type == 'math':
            body = math_encode(escape.xhtml_unescape(body))
        elif src_type == 'code':
            if code_type not in Agree_Code:
                return [1, '请选择代码种类！']

    if article_type == "about":
        AF_Object = user.about
        article_id = str(user.about._id)
        isnew = False
    elif article_type == "book-about":
        isnew = False
        try:
            book = Catalog(_id=group_id)
            AF_Object = book.about
            limit = book.authority_verify(user)
            if test_auth(limit, A_WRITE) is False:
                return [1, '您无权修改摘要！']
        except Exception, err:
            logging.error(traceback.format_exc())
            logging.error('Catalog not exist, id %s' % group_id)
            return [1, '未找到知识谱！']

Exemplo n.º 29

0

Exibir arquivo

Arquivo: escape_test.py Projeto: YoungLeeNENU/tornado

    def test_xhtml_escape(self):
        tests = [
            ("<foo>", "&lt;foo&gt;"),
            (u("<foo>"), u("&lt;foo&gt;")),
            (b"<foo>", b"&lt;foo&gt;"),

            ("<>&\"'", "&lt;&gt;&amp;&quot;&#39;"),
            ("&amp;", "&amp;amp;"),

            (u("<\u00e9>"), u("&lt;\u00e9&gt;")),
            (b"<\xc3\xa9>", b"&lt;\xc3\xa9&gt;"),
        ]
        for unescaped, escaped in tests:
            self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
            self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))

Exemplo n.º 30

0

Exibir arquivo

Arquivo: room_handler.py Projeto: TASKIH/Brain_Hacker

    def edit_card(self, message):
        id = message['data']['id']
        text = xhtml_unescape(message['data']['value'].strip())
        clean_data = {'value': text, 'id': id}

        message_out = self.generate_message('editCard', clean_data)
        self.broadcast_to_all_room_user(self, message_out)
        room_id = self.rooms.get_room_id(self)
        self.cards.update_text(room_id, card_id=id, text=xhtml_escape(text))

        sentence_generator = SentenceGenerator()
        res = sentence_generator.generate_sentence(text)
        for sent in res:
            message_out = self.generate_message('advice', {'sent': sent})
            self.send_message(message_out)
            yield gen.sleep(2.5)

Exemplo n.º 31

0

Exibir arquivo

Arquivo: room_handler.py Projeto: Hironsan/Brain_Hacker

    def edit_card(self, message):
        id = message['data']['id']
        text = xhtml_unescape(message['data']['value'].strip())
        clean_data = {'value': text, 'id': id}

        message_out = self.generate_message('editCard', clean_data)
        self.broadcast_to_all_room_user(self, message_out)
        room_id = self.rooms.get_room_id(self)
        self.cards.update_text(room_id, card_id=id, text=xhtml_escape(text))

        sentence_generator = SentenceGenerator()
        res = sentence_generator.generate_sentence(text)
        for sent in res:
            message_out = self.generate_message('advice', {'sent': sent})
            self.send_message(message_out)
            yield gen.sleep(2.5)

Exemplo n.º 32

0

Exibir arquivo

Arquivo: sg.py Projeto: PegasusWang/wechannel

 def _get_articel_info(self, article_info, nick_name, ori_create_time):
     for k, v in article_info.items():
         if isinstance(v, str):
             article_info[k] = xhtml_unescape(v)
     article_dict = {
         'cdn_url':
         article_info['cover'].replace('\\', ''),
         'title':
         article_info['title'],
         'nick_name':
         nick_name,
         'link': ('http://mp.weixin.qq.com' +
                  article_info['content_url'].replace('\\', '')),
         'ori_create_time':
         ori_create_time,
         'desc':
         article_info['digest'],
     }
     return article_dict

Exemplo n.º 33

0

Exibir arquivo

Arquivo: AF_EditTool.py Projeto: deju/afw_old

def fun_article_new_src(user, article_id='-1', article_type='blog', src_type='code',
            title='', body='', source='', code_type='python', math_type='inline', father_id='-1', group_id='-1'):
    if article_type not in Article_Type:
        return [1, '不支持当前文章类型！']
    if src_type not in Agree_Src:
        return [1, '不支持当前类型的资源！']
        
    if title is None:
        return [1, '名称不能为空！']
        
    if body is None:
        if src_type != 'reference':
            return [1, '内容不能为空！']
        else:
            if re.search(r'^(http|https|ftp):\/\/.+$', source) is None:
                return [1, '请填写链接地址或者引用真实内容！']
            body = ''
    else:
        if src_type == 'math':    
            body = math_encode(escape.xhtml_unescape(body))
        elif src_type == 'code':
            if code_type not in Agree_Code:
                return [1, '请选择代码种类！']
    
    if article_type == "about":
        AF_Object = user.about
        article_id = str(user.about._id)
        isnew = False
    elif article_type == "book-about":
        isnew = False
        try:
            book = Catalog(_id=group_id)
            AF_Object = book.about
            limit = book.authority_verify(user)
            if test_auth(limit, A_WRITE) is False:
                return [1, '您无权修改摘要！']
        except Exception, err:
            logging.error(traceback.format_exc())
            logging.error('Catalog not exist, id %s' % group_id)
            return [1, '未找到知识谱！']

Exemplo n.º 34

0

Exibir arquivo

Arquivo: config.py Projeto: billwurles/SportRage

def clean_url(url):
    """
    Returns an cleaned url starting with a scheme and folder with trailing /
    or an empty string
    """

    if url and url.strip():
        url = xhtml_unescape(url.strip())

        if '://' not in url:
            url = '//' + url

        scheme, netloc, path, query, fragment = parse.urlsplit(url, 'http')
        if not path:
            path += '/'

        cleaned_url = parse.urlunsplit((scheme, netloc, path, query, fragment))

    else:
        cleaned_url = ''

    return cleaned_url

Exemplo n.º 35

0

Exibir arquivo

    def searchIndexersForShowName(self,
                                  search_term,
                                  lang=None,
                                  indexer=None,
                                  exact=False):
        self.set_header("Cache-Control", "max-age=0,no-cache,no-store")
        self.set_header("Content-Type", "application/json")
        if not lang or lang == "null":
            lang = settings.INDEXER_DEFAULT_LANGUAGE

        search_term = xhtml_unescape(search_term)

        search_terms = [search_term]

        # If search term ends with what looks like a year, enclose it in ()
        matches = re.match(r"^(.+ |)([12][0-9]{3})$", search_term)
        if matches:
            search_terms.append("{0}({1})".format(matches.group(1),
                                                  matches.group(2)))

        for term in search_terms:
            # If search term begins with an article, let's also search for it without
            matches = re.match(r"^(?:a|an|the) (.+)$", term, re.I)
            if matches:
                search_terms.append(matches.group(1))

        results = {}
        final_results = []

        # Query Indexers for each search term and build the list of results
        for i, j in sickchill.indexer if not int(indexer) else [(int(indexer),
                                                                 None)]:
            logger.debug(
                _(f"Searching for Show with search term(s): {search_terms} on Indexer: {sickchill.indexer[i].name} (exact: {exact})"
                  ))
            for term in search_terms:
                # noinspection PyBroadException
                try:
                    indexerResults = sickchill.indexer[i].search(term,
                                                                 language=lang,
                                                                 exact=exact)
                except Exception:
                    logger.exception(traceback.format_exc())
                    continue

                # add search results
                results.setdefault(i, []).extend(indexerResults)

        for i, shows in results.items():
            # noinspection PyUnresolvedReferences
            final_results.extend({(
                sickchill.indexer.name(i),
                i,
                sickchill.indexer[i].show_url,
                show["id"],
                show["seriesName"],
                show["firstAired"],
                sickchill.tv.Show.find(settings.showList, show["id"])
                is not None,
            )
                                  for show in shows})

        if exact:
            logger.debug(
                _("Filtering and sorting out excess results because exact match was checked"
                  ))
            final_results = [
                item for item in final_results
                if search_term.lower() in item[4].lower()
            ]
            final_results.sort(key=itemgetter(4))
            final_results.sort(
                key=lambda x: x[4].lower().index(search_term.lower()))
            final_results.sort(
                key=lambda x: x[4].lower() == search_term.lower(),
                reverse=True)

        lang_id = sickchill.indexer.lang_dict()[lang]
        return json.dumps({
            "results": final_results,
            "langid": lang_id,
            "success": len(final_results) > 0
        })

Exemplo n.º 36

0

Exibir arquivo

Arquivo: commit.py Projeto: Ociidii-Works/commitment

 def output_message(self, message, message_hash):
     self.set_header("Content-Type", "text/plain")
     self.write(xhtml_unescape(message).replace("<br/>", "\n"))

Exemplo n.º 37

0

Exibir arquivo

Arquivo: commit.py Projeto: generatorr/commitment

 def output_message(self, message, message_hash):
     self.set_header('Content-Type', 'text/plain')
     self.set_header('X-Message-Hash', message_hash)
     self.write(xhtml_unescape(message).replace('<br/>', '\n'))

Exemplo n.º 38

0

Exibir arquivo

Arquivo: browser.py Projeto: xottl/SickChill

    def index(self, path="", includeFiles=False, fileTypes=""):

        self.set_header("Cache-Control", "max-age=0,no-cache,no-store")
        self.set_header("Content-Type", "application/json")

        return json.dumps(foldersAtPath(xhtml_unescape(path), True, bool(int(includeFiles)), fileTypes.split(",")))

Exemplo n.º 39

0

Exibir arquivo

Arquivo: unescape_body.py Projeto: Earthson/afewords_base

from article.blog import Blog
from article.about import About
from article.comment import Comment
from article.reference import Reference
from article.tableform import Tableform
from article.langcode import Langcode

from tornado.escape import xhtml_unescape

blogs_all = [Blog(each) for each in Blog.datatype.find()]
comments_all = [Comment(each) for each in Comment.datatype.find()]
about_all = About.find()
ref_all = Reference.find()
table_all = Tableform.find()
code_all = Langcode.find()


for each in blogs_all + comments_all + about_all:
    each.abstract = xhtml_unescape(each.abstract)
    each.body = xhtml_unescape(each.body)

for each in ref_all:
    each.body = xhtml_unescape(each.body)

for each in code_all:
    each.code = xhtml_unescape(each.code)

for each in table_all:
    each.tableform = xhtml_unescape(each.tableform)

Exemplo n.º 40

0

Exibir arquivo

Arquivo: util.py Projeto: anupriyatripathi/qiita

def convert_text_html(message):
    """Linkify URLs and turn newlines into <br/> for HTML"""
    html = xhtml_unescape(tornado_linkify(message))
    return html.replace('\n', '<br/>')

Exemplo n.º 41

0

Exibir arquivo

 def getBasicInfo(self):
     if self.info['noitem']:
         return self.info
     pq_obj = PyQuery(self.content.decode('GBK'))
     detail = pq_obj.find('div#detail')
     detail = re.search(r'宝贝类型：\s*(\S+)',
                        detail.text().encode('utf-8'), re.S)
     self.info['itemType'] = detail.group(1) if detail else None
     if self.info['siteId'] == '2':
         self.info['charset'] = re.findall(REGX_B['charset'], self.content)
         self.info['keywords'] = re.findall(REGX_B['keywords'],
                                            self.content)
         self.info['desc'] = re.findall(REGX_B['desc'], self.content)
         self.info['itemId'] = re.findall(REGX_B['itemId'], self.content)
         self.info['pageId'] = re.findall(REGX_B['pageId'], self.content)
         self.info['shopId'] = re.findall(REGX_B['shopId'], self.content)
         self.info['userId'] = re.findall(REGX_B['userId'], self.content)
         self.info['shopName'] = re.findall(REGX_B['shopName'],
                                            self.content, re.S)
         self.info['shopUrl'] = re.findall(REGX_B['shopUrl'], self.content,
                                           re.S)
         self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content,
                                           re.S)
         self.info['itemTitle'] = re.findall(REGX_B['itemTitle'],
                                             self.content, re.S)
         self.info['initPrice'] = re.findall(REGX_B['initPrice'],
                                             self.content, re.S)
         self.info['bonus'] = re.findall(REGX_B['bonus'], self.content)
         self.info['totalSoldOut'] = re.findall(REGX_B['totalSoldOut'],
                                                self.content)
         self.info['attrList'] = re.findall(REGX_B['attrList'],
                                            self.content, re.S)
         self.info['starts'] = re.findall(REGX_B['starts'], self.content,
                                          re.S)
         self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S)
         self.info['userTag'] = re.findall(REGX_B['userTag'], self.content,
                                           re.S)
         self.info[r'cid'] = re.findall(REGX_B[r'cid'], self.content, re.S)
         self.info['location'] = re.findall(REGX_B['location'],
                                            self.content)
         self.info['brand'] = re.findall(REGX_B['brand'], self.content)
         for (k, v) in self.info.items():
             if v:
                 if isinstance(v, list) and len(v) > 0:
                     self.info[k] = str(v[0])
                     self.info[k] = self.info[k].decode(
                         self.res.encoding, 'ignore').encode('utf-8')
                     if k == 'attrList':
                         for t in re.findall(r'\<\!\-\-.+?\-\-\>',
                                             self.info[k]) + re.findall(
                                                 r'\s+', self.info[k]):
                             self.info[k] = self.info[k].replace(t, ' ')
                 elif isinstance(v, list) and len(v) == 0:
                     self.info[k] = None
                 # else:
                 #     self.info[k] = self.info[k] or None
             else:
                 self.info[k] = None
         if not self.info['itemTitle']:
             itemTitle = re.findall(r'title"\s*:\s*"(.+?)"', self.content)
             if itemTitle:
                 self.info['itemTitle'] = itemTitle[0].decode(
                     self.res.encoding, 'ignore').encode('utf-8')
         if not self.info['itemTitle']:
             itemTitle = re.search(r'<title>(.+?)</title>', self.content)
             self.info['itemTitle'] = itemTitle.group(1).decode(
                 self.res.encoding, 'ignore').encode('utf-8').rstrip(
                     '-tmall.com天猫') if itemTitle else ''
         if not self.info['initPrice'] or self.info['initPrice'] == '0':
             price = re.findall(r'"price"\s*:\s*"(\d+)"', self.content)
             if price:
                 self.info['initPrice'] = '%s.%s' % (price[0][:-2],
                                                     price[0][-2:])
         if not self.info['initPrice'] or self.info['initPrice'] == '0':
             price = re.search(
                 r'defaultItemPrice\'\s*\:\s*\'(\d+\.*\d*).+?\'',
                 self.content, re.S)
             if price:
                 self.info['initPrice'] = price.group(1)
         if not self.info['itemImg']:
             img = re.search(r'url\((\S+?item_pic\.jpg\S+?)\)',
                             self.content, re.S) or re.search(
                                 r'J_UlThumb.+?url\((.+?)\)', self.content,
                                 re.S) or re.search(
                                     r'J_ImgBooth"\s+src="(.+?)"',
                                     self.content, re.S)
             if img:
                 self.info['itemImg'] = img.group(1)
         if not self.info['cid']:
             cid = re.search(r'\'categoryId\'\s*\:\s*\'(\d+)\'',
                             self.content, re.S)
             if cid:
                 self.info['cid'] = cid.group(1)
         if not self.info['spuId']:
             spuId = re.search(r'\'spuId\'\s*\:\s*\'(\d+)\'', self.content,
                               re.S)
             self.info['spuId'] = spuId.group(1) if spuId else None
         shopGoodRate = [
             float(t.text) for t in pq_obj.find('em.count') if t
         ]
         if shopGoodRate:
             self.info['shopGoodRate'] = '%.1f' % (sum(shopGoodRate) /
                                                   len(shopGoodRate))
     else:
         self.info['charset'] = re.findall(REGX_B['charset'], self.content)
         self.info['keywords'] = re.findall(REGX_B['keywords'],
                                            self.content)
         self.info['desc'] = re.findall(REGX_B['desc'], self.content)
         self.info['itemId'] = re.findall(REGX_B['itemId'], self.content)
         self.info['pageId'] = re.findall(REGX_B['pageId'], self.content)
         self.info['shopId'] = re.findall(REGX_B['shopId'], self.content)
         self.info['userId'] = re.findall(REGX_B['userId'], self.content)
         self.info['shopName'] = re.findall(REGX_C['shopName'],
                                            self.content, re.S)
         self.info['shopUrl'] = re.findall(REGX_C['shopUrl'], self.content,
                                           re.S)
         self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content,
                                           re.S)
         self.info['itemTitle'] = re.findall(REGX_C['itemTitle'],
                                             self.content, re.S)
         self.info['initPrice'] = re.findall(REGX_C['initPrice'],
                                             self.content, re.S)
         self.info[r'totalSoldOut'] = re.findall(REGX_C[r'totalSoldOut'],
                                                 self.content)
         self.info['attrList'] = re.findall(REGX_C['attrList'],
                                            self.content, re.S)
         self.info['starts'] = re.findall(REGX_B['starts'], self.content,
                                          re.S)
         self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S)
         self.info['userTag'] = re.findall(REGX_B['userTag'], self.content,
                                           re.S)
         self.info[r'cid'] = re.findall(REGX_C[r'cid'], self.content, re.S)
         self.info['location'] = re.findall(REGX_C['location'],
                                            self.content)
         self.info['gradeAvg'] = [
             float(row) for row in re.findall(
                 r'\<em\sclass="count".+?\>(\d+\.*\d*)\<\/em\>',
                 self.content, re.S)
         ]
         self.info['gradeAvg'] = [
             sum(self.info['gradeAvg']) / len(self.info['gradeAvg'])
         ] if self.info['gradeAvg'] else None
         shopRank = pq_obj.find('a#shop-rank img')
         self.info['shopRank'] = re.sub(
             r'.+?(s\_\w+\_\d)\.gif', r'\1',
             shopRank.attr['src']) if shopRank else None
         self.info['shopGoodRate'] = pq_obj.find(
             'em#J_PositiveRating').text()
         for (k, v) in self.info.items():
             if v:
                 if isinstance(v, list) and len(v) > 0:
                     self.info[k] = v[0]
                     self.info[k] = self.info[k].decode(
                         self.res.encoding, 'ignore'
                     ).encode('utf-8') \
                         if isinstance(self.info[k], (str, unicode))\
                         else self.info[k]
                     if k == 'attrList':
                         for t in re.findall(r'\<\!\-\-.+?\-\-\>',
                                             self.info[k]) + re.findall(
                                                 r'\s+', self.info[k]):
                             self.info[k] = self.info[k].replace(t, ' ')
                 elif isinstance(v, list) and len(v) == 0:
                     self.info[k] = None
             else:
                 self.info[k] = None
         if self.info['location']:
             self.info['location'] = unquote(self.info['location']).decode(
                 self.res.encoding, 'ignore').encode('utf-8')
         if not self.info['itemImg']:
             img = re.search(r'id="J_ImgBooth" data-src="(.+?)"',
                             self.content, re.S)
             if img:
                 self.info['itemImg'] = img.group(1)
         self.info['shopName'] = eval(
             "'%s'" % (self.info['shopName'] or '').replace('%', '\\x'))
         created = re.search(r'dbst\s*:\s*(\d+)', self.content, re.S)
         self.info['created'] = created.group(1) if created else None
     self.info['attrList'] = (xhtml_unescape(
         self.info['attrList']).encode('utf-8')
                              if self.info['attrList'] else None)
     self.info['attrList'] = re.sub(r'\s+', r' ', self.info['attrList']
                                    or '')
     self.info['attrs'] = re.findall(
         r'<li.+?>(.+?)[:：]\s*(.*?)</li>'.decode('utf-8'),
         (self.info['attrList'] or '').decode('utf-8'), re.S)
     self.info['attrs'] = [[
         t[0].strip().encode('utf-8'), t[1].strip('\t\r ').encode('utf-8')
     ] for t in self.info['attrs']]
     self.info['offSale'] = True \
         if self.content.decode(self.res.encoding).encode('utf-8').find('已下架') > -1 \
         else False
     self.info['location'] = self.info['location'] or None
     self.deal_taobao_meal_basic()
     return self.info

Exemplo n.º 42

0

Exibir arquivo

Arquivo: ui.py Projeto: hendrydong/StackOverFlow_Analysis_PySpark

 def render(self, text, hl, **kwargs):
     for wd in hl:
         text = re.sub(r'\b({})\b'.format(wd),
                       '<span style="color:red">{}</span>'.format(wd), text)
     return escape.xhtml_unescape(escape.linkify(text, **kwargs))

Exemplo n.º 43

0

Exibir arquivo

    def addExistingShows(self, shows_to_add, promptForSettings, **kwargs):
        """
        Receives a dir list and add them. Adds the ones with given TVDB IDs first, then forwards
        along to the newShow page.
        """

        # grab a list of other shows to add, if provided
        if not shows_to_add:
            shows_to_add = []
        elif not isinstance(shows_to_add, list):
            shows_to_add = [shows_to_add]

        shows_to_add = [unquote_plus(xhtml_unescape(x)) for x in shows_to_add]

        indexer_id_given = []
        dirs_only = []
        # separate all the ones with Indexer IDs
        for cur_dir in shows_to_add:
            if "|" in cur_dir:
                split_vals = cur_dir.split("|")
                if len(split_vals) < 3:
                    dirs_only.append(cur_dir)
            if "|" not in cur_dir:
                dirs_only.append(cur_dir)
            else:
                indexer, show_dir, indexer_id, show_name = self.split_extra_show(
                    cur_dir)

                if not show_dir or not indexer_id or not show_name:
                    continue

                indexer_id_given.append(
                    (int(indexer), show_dir, int(indexer_id), show_name))

        # if they want me to prompt for settings then I will just carry on to the newShow page
        if shows_to_add and config.checkbox_to_value(promptForSettings):
            return self.newShow(shows_to_add[0], shows_to_add[1:])

        # if they don't want me to prompt for settings then I can just add all the nfo shows now
        num_added = 0
        for cur_show in indexer_id_given:
            indexer, show_dir, indexer_id, show_name = cur_show

            if indexer is not None and indexer_id is not None:
                # add the show
                settings.showQueueScheduler.action.add_show(
                    indexer,
                    indexer_id,
                    show_dir,
                    default_status=settings.STATUS_DEFAULT,
                    quality=settings.QUALITY_DEFAULT,
                    season_folders=settings.SEASON_FOLDERS_DEFAULT,
                    subtitles=settings.SUBTITLES_DEFAULT,
                    anime=settings.ANIME_DEFAULT,
                    scene=settings.SCENE_DEFAULT,
                    default_status_after=settings.STATUS_DEFAULT_AFTER,
                )
                num_added += 1

        if num_added:
            ui.notifications.message(
                _("Shows Added"),
                _("Automatically added {num_shows} from their existing metadata files"
                  ).format(num_shows=str(num_added)))

        # if we're done then go home
        if not dirs_only:
            return self.redirect("/home/")

        # for the remaining shows we need to prompt for each one, so forward this on to the newShow page
        return self.newShow(dirs_only[0], dirs_only[1:])

Exemplo n.º 44

0

Exibir arquivo

Arquivo: topitem.py Projeto: ruige123456/dataMining

 def getBasicInfo(self):
     if self.info['siteId'] == '2':
         self.info['charset'] = re.findall(REGX_B['charset'], self.content)
         self.info['keywords'] = re.findall(
             REGX_B['keywords'], self.content)
         self.info['desc'] = re.findall(REGX_B['desc'], self.content)
         self.info['itemId'] = re.findall(REGX_B['itemId'], self.content)
         self.info['pageId'] = re.findall(REGX_B['pageId'], self.content)
         self.info['shopId'] = re.findall(REGX_B['shopId'], self.content)
         self.info['userId'] = re.findall(REGX_B['userId'], self.content)
         self.info['shopName'] = re.findall(REGX_B['shopName'],
                                            self.content, re.S)
         self.info['shopUrl'] = re.findall(REGX_B['shopUrl'], self.content,
                                           re.S)
         self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content,
                                           re.S)
         self.info['itemTitle'] = re.findall(REGX_B['itemTitle'],
                                             self.content, re.S)
         self.info['initPrice'] = re.findall(REGX_B['initPrice'],
                                             self.content, re.S)
         self.info['bonus'] = re.findall(REGX_B['bonus'], self.content)
         self.info['totalSoldOut'] = re.findall(REGX_B['totalSoldOut'],
                                                self.content)
         self.info['attrList'] = re.findall(REGX_B['attrList'],
                                            self.content, re.S)
         self.info['starts'] = re.findall(REGX_B['starts'], self.content,
                                          re.S)
         self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S)
         self.info['userTag'] = re.findall(REGX_B['userTag'], self.content,
                                           re.S)
         self.info[r'cid'] = re.findall(REGX_B[r'cid'], self.content, re.S)
         self.info['location'] = re.findall(
             REGX_B['location'], self.content)
         self.info['brand'] = re.findall(REGX_B['brand'], self.content)
         for (k, v) in self.info.items():
             if v:
                 if len(v) > 0:
                     self.info[k] = v[0]
                     self.info[k] = self.info[k].decode(self.res.encoding,
                                                        'ignore').encode('utf-8')
                     if k == 'attrList':
                         for t in re.findall(r'\<\!\-\-.+?\-\-\>',
                                             self.info[k]) + re.findall(r'\s+',
                                                                        self.info[k]):
                             self.info[k] = self.info[k].replace(t, ' ')
                 else:
                     self.info[k] = None
             else:
                 self.info[k] = None
         if not self.info['itemTitle']:
             itemTitle = re.findall(r'title"\s*:\s*"(.+?)"', self.content)
             if itemTitle:
                 self.info['itemTitle'] = itemTitle[0].decode(
                     self.res.encoding, 'ignore').encode('utf-8')
         if not self.info['initPrice'] or self.info['initPrice'] == '0':
             price = re.findall(r'"price"\s*:\s*"(\d+)"', self.content)
             if price:
                 self.info['initPrice'] = '%s.%s' % (
                     price[0][:-2], price[0][-2:])
         if not self.info['initPrice'] or self.info['initPrice'] == '0':
             price = re.search(
                 r'defaultItemPrice\'\s*\:\s*\'(\d+\.*\d*).+?\'', self.content, re.S)
             if price:
                 self.info['initPrice'] = price.group(1)
         if not self.info['itemImg']:
             img = re.search(r'url\((\S+?item_pic\.jpg\S+?)\)', self.content, re.S) or re.search(
                 r'J_UlThumb.+?url\((.+?)\)', self.content, re.S)
             if img:
                 self.info['itemImg'] = img.group(1)
         if not self.info['cid']:
             cid = re.search(
                 r'\'categoryId\'\s*\:\s*\'(\d+)\'', self.content, re.S)
             if cid:
                 self.info['cid'] = cid.group(1)
         if not self.info['spuId']:
             spuId = re.search(
                 r'\'spuId\'\s*\:\s*\'(\d+)\'', self.content, re.S)
             self.info['spuId'] = spuId.group(1) if spuId else None
     else:
         self.info['charset'] = re.findall(REGX_B['charset'], self.content)
         self.info['keywords'] = re.findall(
             REGX_B['keywords'], self.content)
         self.info['desc'] = re.findall(REGX_B['desc'], self.content)
         self.info['itemId'] = re.findall(REGX_B['itemId'], self.content)
         self.info['pageId'] = re.findall(REGX_B['pageId'], self.content)
         self.info['shopId'] = re.findall(REGX_B['shopId'], self.content)
         self.info['userId'] = re.findall(REGX_B['userId'], self.content)
         self.info['shopName'] = re.findall(REGX_C['shopName'],
                                            self.content, re.S)
         self.info['shopUrl'] = re.findall(REGX_C['shopUrl'], self.content,
                                           re.S)
         self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content,
                                           re.S)
         self.info['itemTitle'] = re.findall(REGX_C['itemTitle'],
                                             self.content, re.S)
         self.info['initPrice'] = re.findall(REGX_C['initPrice'],
                                             self.content, re.S)
         self.info[r'totalSoldOut'] = re.findall(REGX_C[r'totalSoldOut'],
                                                 self.content)
         self.info['attrList'] = re.findall(REGX_C['attrList'],
                                            self.content, re.S)
         self.info['starts'] = re.findall(REGX_B['starts'], self.content,
                                          re.S)
         self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S)
         self.info['userTag'] = re.findall(REGX_B['userTag'], self.content,
                                           re.S)
         self.info[r'cid'] = re.findall(REGX_C[r'cid'], self.content, re.S)
         self.info['location'] = re.findall(
             REGX_C['location'], self.content)
         for (k, v) in self.info.items():
             if v:
                 if len(v) > 0:
                     self.info[k] = v[0]
                     self.info[k] = self.info[k].decode(self.res.encoding,
                                                        'ignore').encode('utf-8')
                     if k == 'attrList':
                         for t in re.findall(
                             r'\<\!\-\-.+?\-\-\>',
                             self.info[k]) + re.findall(
                                 r'\s+', self.info[k]):
                             self.info[k] = self.info[k].replace(t, ' ')
                 else:
                     self.info[k] = None
             else:
                 self.info[k] = None
         if self.info['location']:
             self.info['location'] = unquote(
                 self.info['location']
             ).decode(self.res.encoding, 'ignore').encode('utf-8')
     self.info['attrList'] = (xhtml_unescape(
         self.info['attrList']).encode('utf-8')
         if self.info['attrList'] else None)
     return self.info

Exemplo n.º 45

0

Exibir arquivo

Arquivo: commit.py Projeto: edudeespinosa/commitment

 def output_message(self, message, message_hash):
     self.set_header('Content-Type', 'text/plain')
     self.write(xhtml_unescape(message).replace('<br/>', '\n'))

Exemplo n.º 46

0

Exibir arquivo

Arquivo: Reader.py Projeto: fireinice/kindlereader2

    def make_mobi(user, feeds, data_dir, kindle_format='book',
                  mobi_templates=None, **other_services):
        """docstring for make_mobi"""
        is_updated = False
        for feed in feeds:
            if len(feed.items) > 0:
                is_updated = True
        if not is_updated:
            logging.info("no feed update.")
            return None

        if kindle_format not in ['book', 'periodical']:
            kindle_format = 'book'
        logging.info("generate .mobi file start... ")

        if not mobi_templates:
            from kindletemplate import TEMPLATES
            mobi_templates = TEMPLATES
        for tpl in mobi_templates:
            if tpl is 'book.html':
                continue

            t = template.Template(mobi_templates[tpl])
            content = t.generate(
                user=user,
                feeds=feeds,
                uuid=uuid.uuid1(),
                format=kindle_format,
                **other_services
            )

            fp = open(os.path.join(data_dir, tpl), 'wb')
            content = content.decode('utf-8', 'ignore').encode('utf-8')
            fp.write(escape.xhtml_unescape(content))
            # fp.write(content)
            fp.close()

        pre_mobi_file = "TheOldReader_%s" % time.strftime('%m-%dT%Hh%Mm')
        opf_file = os.path.join(data_dir, "content.opf")
        os.environ["PATH"] = os.environ["PATH"] + ":./"
        subprocess.call('%s %s -o "%s" > log.txt' %
                        (Kindle.kindle_gen_prog, opf_file, pre_mobi_file),
                        shell=True)
        pre_mobi_file = os.path.join(data_dir, pre_mobi_file)
        mobi_file = pre_mobi_file+".mobi"
        status = subprocess.call(
            'kindlestrip.py "%s" "%s" >> log.txt' %
            (pre_mobi_file, mobi_file), shell=True)

        if 0 != status:
            import shutil
            shutil.move(pre_mobi_file, mobi_file)

        if os.path.isfile(mobi_file) is False:
            logging.error("failed!")
            return None
        else:
            fsize = os.path.getsize(mobi_file)
            logging.info(".mobi save as: %s(%.2fMB)" %
                         (mobi_file, float(fsize)/(1024*1024)))
            return mobi_file

Exemplo n.º 47

0

Exibir arquivo

    def addNewShow(
        self,
        whichSeries=None,
        indexerLang=None,
        rootDir=None,
        defaultStatus=None,
        quality_preset=None,
        anyQualities=None,
        bestQualities=None,
        season_folders=None,
        subtitles=None,
        subtitles_sr_metadata=None,
        fullShowPath=None,
        other_shows=None,
        skipShow=None,
        providedIndexer=None,
        anime=None,
        scene=None,
        blacklist=None,
        whitelist=None,
        defaultStatusAfter=None,
    ):
        """
        Receive tvdb id, dir, and other options and create a show from them. If extra show dirs are
        provided then it forwards back to newShow, if not it goes to /home.
        """

        if not indexerLang:
            indexerLang = settings.INDEXER_DEFAULT_LANGUAGE

        # grab our list of other dirs if given
        if not other_shows:
            other_shows = []
        elif not isinstance(other_shows, list):
            other_shows = [other_shows]

        def finishAddShow():
            # if there are no extra shows then go home
            if not other_shows:
                return self.redirect("/home/")

            # peel off the next one
            next_show_dir = other_shows[0]
            rest_of_show_dirs = other_shows[1:]

            # go to add the next show
            return self.newShow(next_show_dir, rest_of_show_dirs)

        # if we're skipping then behave accordingly
        if skipShow:
            return finishAddShow()

        # sanity check on our inputs
        if (not rootDir and not fullShowPath) or not whichSeries:
            return _(
                "Missing params, no Indexer ID or folder: {show_to_add} and {root_dir}/{show_path}"
            ).format(show_to_add=whichSeries,
                     root_dir=rootDir,
                     show_path=fullShowPath)

        # figure out what show we're adding and where
        series_pieces = whichSeries.split("|")
        if (whichSeries and rootDir) or (whichSeries and fullShowPath
                                         and len(series_pieces) > 1):
            if len(series_pieces) < 6:
                logger.error(
                    "Unable to add show due to show selection. Not enough arguments: {0}"
                    .format((repr(series_pieces))))
                ui.notifications.error(
                    _("Unknown error. Unable to add show due to problem with show selection."
                      ))
                return self.redirect("/addShows/existingShows/")

            indexer = int(series_pieces[1])
            indexer_id = int(series_pieces[3])
            # Show name was sent in UTF-8 in the form
            show_name = xhtml_unescape(series_pieces[4])
        else:
            # if no indexer was provided use the default indexer set in General settings
            if not providedIndexer:
                providedIndexer = settings.INDEXER_DEFAULT

            indexer = int(providedIndexer)
            indexer_id = int(whichSeries)
            show_name = os.path.basename(
                os.path.normpath(xhtml_unescape(fullShowPath)))

        # use the whole path if it's given, or else append the show name to the root dir to get the full show path
        if fullShowPath:
            show_dir = os.path.normpath(xhtml_unescape(fullShowPath))
            extra_check_dir = show_dir
        else:
            folder_name = show_name
            s = sickchill.indexer.series_by_id(indexerid=indexer_id,
                                               indexer=indexer,
                                               language=indexerLang)
            if settings.ADD_SHOWS_WITH_YEAR and s.firstAired:
                try:
                    year = "({0})".format(
                        dateutil.parser.parse(s.firstAired).year)
                    if year not in folder_name:
                        folder_name = "{0} {1}".format(s.seriesName, year)
                except (TypeError, ValueError):
                    logger.info(
                        _("Could not append the show year folder for the show: {0}"
                          ).format(folder_name))

            show_dir = os.path.join(
                rootDir, sanitize_filename(xhtml_unescape(folder_name)))
            extra_check_dir = os.path.join(
                rootDir, sanitize_filename(xhtml_unescape(show_name)))

        # blanket policy - if the dir exists you should have used "add existing show" numbnuts
        if (os.path.isdir(show_dir)
                or os.path.isdir(extra_check_dir)) and not fullShowPath:
            ui.notifications.error(
                _("Unable to add show"),
                _("Folder {show_dir} exists already").format(
                    show_dir=show_dir))
            return self.redirect("/addShows/existingShows/")

        # don't create show dir if config says not to
        if settings.ADD_SHOWS_WO_DIR:
            logger.info("Skipping initial creation of " + show_dir +
                        " due to config.ini setting")
        else:
            dir_exists = helpers.makeDir(show_dir)
            if not dir_exists:
                logger.exception("Unable to create the folder " + show_dir +
                                 ", can't add the show")
                ui.notifications.error(
                    _("Unable to add show"),
                    _("Unable to create the folder {show_dir}, can't add the show"
                      ).format(show_dir=show_dir))
                # Don't redirect to default page because user wants to see the new show
                return self.redirect("/home/")
            else:
                helpers.chmodAsParent(show_dir)

        # prepare the inputs for passing along
        scene = config.checkbox_to_value(scene)
        anime = config.checkbox_to_value(anime)
        season_folders = config.checkbox_to_value(season_folders)
        subtitles = config.checkbox_to_value(subtitles)
        subtitles_sr_metadata = config.checkbox_to_value(subtitles_sr_metadata)

        if whitelist:
            whitelist = short_group_names(whitelist)
        if blacklist:
            blacklist = short_group_names(blacklist)

        if not anyQualities:
            anyQualities = []
        if not bestQualities or try_int(quality_preset, None):
            bestQualities = []
        if not isinstance(anyQualities, list):
            anyQualities = [anyQualities]
        if not isinstance(bestQualities, list):
            bestQualities = [bestQualities]
        newQuality = Quality.combineQualities([int(q) for q in anyQualities],
                                              [int(q) for q in bestQualities])

        # add the show
        settings.showQueueScheduler.action.add_show(
            indexer,
            indexer_id,
            showDir=show_dir,
            default_status=int(defaultStatus),
            quality=newQuality,
            season_folders=season_folders,
            lang=indexerLang,
            subtitles=subtitles,
            subtitles_sr_metadata=subtitles_sr_metadata,
            anime=anime,
            scene=scene,
            paused=None,
            blacklist=blacklist,
            whitelist=whitelist,
            default_status_after=int(defaultStatusAfter),
            root_dir=rootDir,
        )
        ui.notifications.message(
            _("Show added"),
            _("Adding the specified show into {show_dir}").format(
                show_dir=show_dir))

        return finishAddShow()

Exemplo n.º 48

0

Exibir arquivo

Arquivo: web_handlers.py Projeto: staspoberezhnik/websocket

 def get_current_user(self):
     user = self.get_secure_cookie('user')
     return escape.xhtml_unescape(user) if user else None

Exemplo n.º 49

0

Exibir arquivo

Arquivo: util.py Projeto: tanaes/qiita

def convert_text_html(message):
    """Linkify URLs and turn newlines into <br/> for HTML"""
    html = xhtml_unescape(tornado_linkify(message))
    return html.replace('\n', '<br/>')

Exemplo n.º 50

0

Exibir arquivo

    def massAddTable(self, rootDir=None):
        t = PageTemplate(rh=self, filename="home_massAddTable.mako")

        if not rootDir:
            return _("No folders selected.")
        elif not isinstance(rootDir, list):
            root_dirs = [rootDir]
        else:
            root_dirs = rootDir

        root_dirs = [unquote_plus(xhtml_unescape(x)) for x in root_dirs]

        if settings.ROOT_DIRS:
            default_index = int(settings.ROOT_DIRS.split("|")[0])
        else:
            default_index = 0

        if len(root_dirs) > default_index:
            tmp = root_dirs[default_index]
            if tmp in root_dirs:
                root_dirs.remove(tmp)
                root_dirs.insert(0, tmp)

        dir_list = []

        main_db_con = db.DBConnection()
        for root_dir in root_dirs:
            # noinspection PyBroadException
            try:
                file_list = os.listdir(root_dir)
            except Exception:
                continue

            for cur_file in file_list:
                # noinspection PyBroadException
                try:
                    cur_path = os.path.normpath(
                        os.path.join(root_dir, cur_file))
                    if not os.path.isdir(cur_path):
                        continue
                    # ignore Synology folders
                    if cur_file.lower() in ["#recycle", "@eadir"]:
                        continue
                except Exception:
                    continue

                cur_dir = {
                    "dir":
                    cur_path,
                    "existing_info": (None, None, None),
                    "display_dir":
                    "<b>" + os.path.dirname(cur_path) + os.sep + "</b>" +
                    os.path.basename(cur_path),
                }

                # see if the folder is in KODI already
                dirResults = main_db_con.select(
                    "SELECT indexer_id FROM tv_shows WHERE location = ? LIMIT 1",
                    [cur_path])

                if dirResults:
                    cur_dir["added_already"] = True
                else:
                    cur_dir["added_already"] = False

                dir_list.append(cur_dir)

                indexer_id = show_name = indexer = None
                for cur_provider in settings.metadata_provider_dict.values():
                    if not (indexer_id and show_name):
                        (indexer_id, show_name,
                         indexer) = cur_provider.retrieveShowMetadata(cur_path)
                        if all((indexer_id, show_name, indexer)):
                            break

                if all((indexer_id, show_name, indexer)):
                    cur_dir["existing_info"] = (indexer_id, show_name, indexer)

                if indexer_id and Show.find(settings.showList, indexer_id):
                    cur_dir["added_already"] = True
        return t.render(dirList=dir_list)

Exemplo n.º 51

0

Exibir arquivo

Arquivo: topitem.py Projeto: ruige123456/dataMining

 def getBasicInfo(self):
     if self.info['noitem']:
         return self.info
     pq_obj = PyQuery(self.content.decode('GBK'))
     detail = pq_obj.find('div#detail')
     detail = re.search(
         r'宝贝类型：\s*(\S+)', detail.text().encode('utf-8'), re.S)
     self.info['itemType'] = detail.group(1) if detail else None
     if self.info['siteId'] == '2':
         self.info['charset'] = re.findall(REGX_B['charset'], self.content)
         self.info['keywords'] = re.findall(
             REGX_B['keywords'], self.content)
         self.info['desc'] = re.findall(REGX_B['desc'], self.content)
         self.info['itemId'] = re.findall(REGX_B['itemId'], self.content)
         self.info['pageId'] = re.findall(REGX_B['pageId'], self.content)
         self.info['shopId'] = re.findall(REGX_B['shopId'], self.content)
         self.info['userId'] = re.findall(REGX_B['userId'], self.content)
         self.info['shopName'] = re.findall(REGX_B['shopName'],
                                            self.content, re.S)
         self.info['shopUrl'] = re.findall(REGX_B['shopUrl'], self.content,
                                           re.S)
         self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content,
                                           re.S)
         self.info['itemTitle'] = re.findall(REGX_B['itemTitle'],
                                             self.content, re.S)
         self.info['initPrice'] = re.findall(REGX_B['initPrice'],
                                             self.content, re.S)
         self.info['bonus'] = re.findall(REGX_B['bonus'], self.content)
         self.info['totalSoldOut'] = re.findall(REGX_B['totalSoldOut'],
                                                self.content)
         self.info['attrList'] = re.findall(REGX_B['attrList'],
                                            self.content, re.S)
         self.info['starts'] = re.findall(REGX_B['starts'], self.content,
                                          re.S)
         self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S)
         self.info['userTag'] = re.findall(REGX_B['userTag'], self.content,
                                           re.S)
         self.info[r'cid'] = re.findall(REGX_B[r'cid'], self.content, re.S)
         self.info['location'] = re.findall(
             REGX_B['location'], self.content)
         self.info['brand'] = re.findall(REGX_B['brand'], self.content)
         for (k, v) in self.info.items():
             if v:
                 if isinstance(v, list) and len(v) > 0:
                     self.info[k] = str(v[0])
                     self.info[k] = self.info[k].decode(self.res.encoding,
                                                        'ignore').encode('utf-8')
                     if k == 'attrList':
                         for t in re.findall(
                             r'\<\!\-\-.+?\-\-\>',
                             self.info[k]) + re.findall(
                                 r'\s+',
                                 self.info[k]):
                             self.info[k] = self.info[k].replace(t, ' ')
                 elif isinstance(v, list) and len(v) == 0:
                     self.info[k] = None
                 # else:
                 #     self.info[k] = self.info[k] or None
             else:
                 self.info[k] = None
         if not self.info['itemTitle']:
             itemTitle = re.findall(r'title"\s*:\s*"(.+?)"', self.content)
             if itemTitle:
                 self.info['itemTitle'] = itemTitle[0].decode(
                     self.res.encoding, 'ignore').encode('utf-8')
         if not self.info['itemTitle']:
             itemTitle = re.search(r'<title>(.+?)</title>', self.content)
             self.info['itemTitle'] = itemTitle.group(1).decode(
                 self.res.encoding, 'ignore'
             ).encode('utf-8').rstrip('-tmall.com天猫') if itemTitle else ''
         if not self.info['initPrice'] or self.info['initPrice'] == '0':
             price = re.findall(r'"price"\s*:\s*"(\d+)"', self.content)
             if price:
                 self.info['initPrice'] = '%s.%s' % (
                     price[0][:-2], price[0][-2:])
         if not self.info['initPrice'] or self.info['initPrice'] == '0':
             price = re.search(
                 r'defaultItemPrice\'\s*\:\s*\'(\d+\.*\d*).+?\'', self.content, re.S)
             if price:
                 self.info['initPrice'] = price.group(1)
         if not self.info['itemImg']:
             img = re.search(r'url\((\S+?item_pic\.jpg\S+?)\)', self.content, re.S) or re.search(
                 r'J_UlThumb.+?url\((.+?)\)', self.content, re.S) or re.search(
                     r'J_ImgBooth"\s+src="(.+?)"', self.content, re.S)
             if img:
                 self.info['itemImg'] = img.group(1)
         if not self.info['cid']:
             cid = re.search(
                 r'\'categoryId\'\s*\:\s*\'(\d+)\'', self.content, re.S)
             if cid:
                 self.info['cid'] = cid.group(1)
         if not self.info['spuId']:
             spuId = re.search(
                 r'\'spuId\'\s*\:\s*\'(\d+)\'', self.content, re.S)
             self.info['spuId'] = spuId.group(1) if spuId else None
         shopGoodRate = [float(t.text)
                         for t in pq_obj.find('em.count') if t]
         if shopGoodRate:
             self.info['shopGoodRate'] = '%.1f' % (
                 sum(shopGoodRate) / len(shopGoodRate))
     else:
         self.info['charset'] = re.findall(REGX_B['charset'], self.content)
         self.info['keywords'] = re.findall(
             REGX_B['keywords'], self.content)
         self.info['desc'] = re.findall(REGX_B['desc'], self.content)
         self.info['itemId'] = re.findall(REGX_B['itemId'], self.content)
         self.info['pageId'] = re.findall(REGX_B['pageId'], self.content)
         self.info['shopId'] = re.findall(REGX_B['shopId'], self.content)
         self.info['userId'] = re.findall(REGX_B['userId'], self.content)
         self.info['shopName'] = re.findall(REGX_C['shopName'],
                                            self.content, re.S)
         self.info['shopUrl'] = re.findall(REGX_C['shopUrl'], self.content,
                                           re.S)
         self.info['itemImg'] = re.findall(REGX_B['itemImg'], self.content,
                                           re.S)
         self.info['itemTitle'] = re.findall(REGX_C['itemTitle'],
                                             self.content, re.S)
         self.info['initPrice'] = re.findall(REGX_C['initPrice'],
                                             self.content, re.S)
         self.info[r'totalSoldOut'] = re.findall(REGX_C[r'totalSoldOut'],
                                                 self.content)
         self.info['attrList'] = re.findall(REGX_C['attrList'],
                                            self.content, re.S)
         self.info['starts'] = re.findall(REGX_B['starts'], self.content,
                                          re.S)
         self.info['ends'] = re.findall(REGX_B['ends'], self.content, re.S)
         self.info['userTag'] = re.findall(REGX_B['userTag'], self.content,
                                           re.S)
         self.info[r'cid'] = re.findall(REGX_C[r'cid'], self.content, re.S)
         self.info['location'] = re.findall(
             REGX_C['location'], self.content)
         self.info['gradeAvg'] = [
             float(row)
             for row in re.findall(
                 r'\<em\sclass="count".+?\>(\d+\.*\d*)\<\/em\>', self.content, re.S)
         ]
         self.info['gradeAvg'] = [sum(
             self.info['gradeAvg']) / len(self.info['gradeAvg'])] if self.info['gradeAvg'] else None
         shopRank = pq_obj.find('a#shop-rank img')
         self.info['shopRank'] = re.sub(
             r'.+?(s\_\w+\_\d)\.gif', r'\1', shopRank.attr['src']) if shopRank else None
         self.info['shopGoodRate'] = pq_obj.find(
             'em#J_PositiveRating').text()
         for (k, v) in self.info.items():
             if v:
                 if isinstance(v, list) and len(v) > 0:
                     self.info[k] = v[0]
                     self.info[k] = self.info[k].decode(
                         self.res.encoding, 'ignore'
                     ).encode('utf-8') \
                         if isinstance(self.info[k], (str, unicode))\
                         else self.info[k]
                     if k == 'attrList':
                         for t in re.findall(
                             r'\<\!\-\-.+?\-\-\>',
                             self.info[k]) + re.findall(
                                 r'\s+', self.info[k]):
                             self.info[k] = self.info[k].replace(t, ' ')
                 elif isinstance(v, list) and len(v) == 0:
                     self.info[k] = None
             else:
                 self.info[k] = None
         if self.info['location']:
             self.info['location'] = unquote(
                 self.info['location']
             ).decode(self.res.encoding, 'ignore').encode('utf-8')
         if not self.info['itemImg']:
             img = re.search(
                 r'id="J_ImgBooth" data-src="(.+?)"', self.content, re.S)
             if img:
                 self.info['itemImg'] = img.group(1)
         self.info['shopName'] = eval(
             "'%s'" % (self.info['shopName'] or '').replace('%', '\\x'))
         created = re.search(r'dbst\s*:\s*(\d+)', self.content, re.S)
         self.info['created'] = created.group(1) if created else None
     self.info['attrList'] = (xhtml_unescape(
         self.info['attrList']).encode('utf-8')
         if self.info['attrList'] else None)
     self.info['attrList'] = re.sub(
         r'\s+', r' ', self.info['attrList'] or '')
     self.info['attrs'] = re.findall(
         r'<li.+?>(.+?)[:：]\s*(.*?)</li>'.decode('utf-8'),
         (self.info['attrList'] or '').decode('utf-8'),
         re.S)
     self.info['attrs'] = [[t[0].strip().encode('utf-8'), t[1].strip('\t\r ').encode('utf-8')]
                           for t in self.info['attrs']]
     self.info['offSale'] = True \
         if self.content.decode(self.res.encoding).encode('utf-8').find('已下架') > -1 \
         else False
     self.info['location'] = self.info['location'] or None
     self.deal_taobao_meal_basic()
     return self.info

Exemplo n.º 52

0

Exibir arquivo

Arquivo: utils.py Projeto: btwo/hanger

def escape(raw):
    '''Html escape.'''
    return xhtml_escape(xhtml_unescape(raw))