Python unescape示例，utils.unescape Python示例

示例#1

0

显示文件

文件： po2csv.py 项目： sleepyjames/ppb

def csv2po(csv_file):
    """Convert a file-like object `csv_file` to a polib.POFile object"""
    po = polib.POFile()

    # Reset to reading from the beginning of the file
    csv_file.seek(0)
    csv_reader = csv.reader(csv_file)

    for count, row in enumerate(csv_reader):
        # Skip the first two header rows
        if count < len(csv_header_rows):
            continue

        msgid = unescape(row[0])
        msgid_plural = unescape(row[1])
        msgctxt = row[2]
        msgstr, msgstr_plural = undo_plurals(msgid_plural, row[3])

        entry = polib.POEntry()
        entry.msgid = msgid

        if msgid_plural:
            entry.msgid_plural = msgid_plural
        if msgctxt:
            entry.msgctxt = msgctxt
        if msgstr:
            entry.msgstr = msgstr
        if msgstr_plural:
            entry.msgstr_plural = msgstr_plural

        po.append(entry)

    return po

示例#2

0

显示文件

文件： api.py 项目： androa/vgtv-xbmc

    def parse_video_response(self, response):
        data = simplejson.loads(response.read())
        items = list()
        count = 0
        for video in data['videos']:
            vid_url, thumb_url, category_id, dur = self.get_video_urls(video)
            count += 1

            if vid_url is None:
                continue

            meta = video.get('meta')
            items.append({
                'label': unescape(meta.get('title')),
                'thumbnail': thumb_url,
                'info': {
                    'plot': unescape(meta.get('preamble') or ''),
                    'originaltitle': unescape(meta.get('title') or '???'),
                    'tagline': unescape(meta.get('preamble') or ''),
                    'aired': self.get_date(meta.get('timePublished')),
                    'duration': self.get_duration(meta.get('duration'))
                },
                'stream_info': {
                    'video': {
                        'duration': meta.get('duration', 0)
                    }
                },
                'path': vid_url,
                'is_playable': True,
            })

        return items, (count < self.PER_PAGE)

示例#3

0

显示文件

文件： livebox.py 项目： TheLivebox/TheLiveBox

def AddDir(name, mode, url=None, image=None, fanart=None, isFolder=False, isPlayable=False, desc='', plot='', contextMenu=None, replaceItems=False, infoLabels=None):
    try:    
        name = name.encode('utf-8')       
        url = utils.fixUnicode(utils.unescape(url))
    except:
        pass

    try:
        if not validateMode(mode, name):
            return
    
        if not fanart:
            fanart = FANART

        name = name.replace('_', ' ')

        infoLabels = {'title':name, 'fanart':fanart, 'description':desc, 'plot':plot}    
        
        image = utils.patchImage(mode, image, url, infoLabels)
 
        u  = ''
        u += '?mode='  + str(mode)
        u += '&title=' + urllib.quote_plus(name)

        if image:
            u += '&image=' + urllib.quote_plus(image)            

        if url:
            u += '&url=' + urllib.quote_plus(url).replace('%25LB%25', '%')
            
        APPLICATION.addDir(utils.unescape(name), mode, u, image, isFolder, isPlayable, contextMenu=contextMenu, replaceItems=replaceItems, infoLabels=infoLabels)
    except Exception, e:
        raise

示例#4

0

显示文件

文件： views.py 项目： dnet/omnom

def load(request):
    if not request.user.is_authenticated():
        return HttpResponseRedirect("/accounts/login")
    if request.method == 'POST':
        form = ImportDeliciousForm(request.POST,request.FILES)
        if form.is_valid():
            db = get_database()[Bookmark.collection_name]
            html=request.FILES['exported'].read().decode('utf8')
            soup=BeautifulSoup(html)
            for item in soup.findAll('dt'):
                desc=''
                next=item.findNextSiblings()
                if next:
                    next=next[0]
                    if 'name' in dir(next) and next.name=='dd':
                        desc=unescape(u''.join(imap(unicode, next.contents)))
                db.Bookmark({'url': urlSanitize(item.a['href']),
                             'seq': getNextVal('seq'),
                             'tags': item.a['tags'].split(','),
                             'user': unicode(request.user),
                             'created': datetime.fromtimestamp(float(item.a['add_date'])),
                             'private': item.a['private']=='1',
                             'title': unescape(unicode(item.a.string)),
                             'notes': unicode(desc)}).save()
            return HttpResponseRedirect('/u/%s/' % request.user)
    else:
        form = ImportDeliciousForm()
    return render_to_response('import.html', { 'form': form, }, context_instance=RequestContext(request) )

示例#5

0

显示文件

文件： users.py 项目： magarcia/python-producteev

 def __reload(self, values):
     self.__raw.__dict__.update(values)
     self.firstname = unescape(self.__raw.firstname)
     self.lastname = unescape(self.__raw.lastname)
     self.company = unescape(self.__raw.company)
     self.colleagues = self.__raw.colleagues
     self.id = int(self.__raw.id_user)
     self.lang = LANG_ID[int(self.__raw.lang) + 1]

示例#6

0

显示文件

 def __reload(self, values):
     self.__raw.__dict__.update(values)
     self.firstname = unescape(self.__raw.firstname)
     self.lastname = unescape(self.__raw.lastname)
     self.company = unescape(self.__raw.company)
     self.colleagues = self.__raw.colleagues
     self.id = int(self.__raw.id_user)
     self.lang = LANG_ID[int(self.__raw.lang) + 1]

示例#7

0

显示文件

    def from_text(cls, text):
        match = cls.token_re.match(text)
        assert match, 'cannot parse Token from {}'.format(text)
        groups = match.groupdict()

        word = unescape(groups['word'])
        lemma = unescape(groups['lemma'])
        pos = unescape(groups['pos'])

        return cls(word, lemma, pos)

示例#8

0

显示文件

    def from_text(cls, text):
        match = cls.pred_re.match(text)
        assert match, 'cannot parse Predicate from {}'.format(text)
        groups = match.groupdict()

        word = unescape(groups['word'])
        lemma = unescape(groups['lemma'])
        pos = unescape(groups['pos'])
        neg = True if groups['neg'] is not None else False
        prt = unescape(groups['prt']) if groups['prt'] is not None else ''

        return cls(word, lemma, pos, neg, prt)

示例#9

0

显示文件

    def from_text(cls, text):
        match = cls.arg_re.match(text)
        assert match, 'cannot parse Argument from {}'.format(text)
        groups = match.groupdict()

        word = unescape(groups['word'])
        lemma = unescape(groups['lemma'])
        pos = unescape(groups['pos'])
        ner = groups['ner'] if groups['ner'] != 'NONE' else ''
        entity_idx = int(groups['entity_idx']) if groups['entity_idx'] else -1
        mention_idx = \
            int(groups['mention_idx']) if groups['mention_idx'] else -1

        return cls(word, lemma, pos, ner, entity_idx, mention_idx)

示例#10

0

显示文件

def collect_album_info(album_soup):
    url = 'http://tut-audio.su'
    album_dict = {}
    album_dict['name'] = unescape(
        album_soup.find(id="titlealb").get_text()[:-14])
    album_dict['year'] = album_soup.find(
        id="dopinfoalb").find('p').find('b').get_text()
    if album_dict['year']:
        album_dict['year'] = int(album_dict['year'])
    album_dict['cover_url'] = url + album_soup.find(id="imagesalb").get('src')
    t = album_soup.find_all("div", "player")[0]
    artist, _ = t['data-title'].split(' — ')
    artist = unescape(artist)
    album_dict['url'] = url + album_url
    return album_dict, artist

示例#11

0

显示文件

文件： todo.py 项目： softformance/basecamp.karm

    def __init__(
        self,
        uid,
        summary,
        dtstamp=None,
        created=None,
        last_modified=None,
        related_to=None,
        completed=None,
        percent_complete=None,
        x_kde_ktimetracker_totalsessiontime=None,
        x_kde_ktimetracker_totaltasktime=None,
        x_kde_ktimetracker_bctype=None,
    ):
        self.uid = uid
        self.summary = unescape(summary)
        self.dtstamp = dtstamp
        self.created = created
        self.last_modified = last_modified
        self.related_to = related_to
        self.completed = completed
        self.percent_complete = percent_complete
        self.x_kde_ktimetracker_totalsessiontime = x_kde_ktimetracker_totalsessiontime
        self.x_kde_ktimetracker_totaltasktime = x_kde_ktimetracker_totaltasktime
        self.x_kde_ktimetracker_bctype = x_kde_ktimetracker_bctype

        self.todos = {}

示例#12

0

显示文件

文件： gen_problem_solution.py 项目： Andimeo/topcoder_crawler

def gen_solution(cur, td, num, p_id):
#	import pdb
#	pdb.set_trace()
	global testcase_id
	global testcase_crawled

	if num == 0:
		column_name = 'java'
	elif num == 1:
		column_name = 'cpp'
	elif num == 2:
		column_name = 'csharp'
	else:
		column_name = 'VB'
	cur.execute('select %s from problem where id = %d' % (column_name, p_id))
	if cur.fetchall()[0][0] != None:
		return
	p = compile('"/stat\?c=problem_solution.*?"')
	l = p.findall(td)
	if len(l) == 1:
		url = topcoder_site_url + unescape(l[0][1:-1])
		try:
			page = topcoder.get_page(url)
		except Exception, e:
			print url, e
			return
		p = compile('<TD CLASS="problemText" COLSPAN="8" VALIGN="middle" ALIGN="left">[\d\D]*?</TD>')
		try:
			code = escape_string(p.findall(page)[0])
		except Exception, e:
			print 'No code found:',url,e
			return

示例#13

0

显示文件

def fetch_bioguide_page(bioguide, force):
    url = "http://bioguide.congress.gov/scripts/biodisplay.pl?index=%s" % bioguide
    cache = "legislators/bioguide/%s.html" % bioguide
    try:
        body = download(url, cache, force)

        # Fix a problem?
        body = body.replace("&Aacute;\xc2\x81", "&Aacute;")

        # Entities like &#146; are in Windows-1252 encoding. Normally lxml
        # handles that for us, but we're also parsing HTML. The lxml.html.HTMLParser
        # doesn't support specifying an encoding, and the lxml.etree.HTMLParser doesn't
        # provide a cssselect method on element objects. So we'll just decode ourselves.
        body = utils.unescape(body, "Windows-1252")

        dom = lxml.html.parse(io.StringIO(body)).getroot()
    except lxml.etree.XMLSyntaxError:
        raise Exception("Error parsing: " + url)

    # Sanity check.

    if len(dom.cssselect("title")) == 0:
        raise Exception("No page for bioguide %s!" % bioguide)

    return dom

示例#14

0

显示文件

    def insertPicDetail(self, picDetailModel):

        cur = self.con.cursor()
        try:

            sql = '''INSERT INTO admin_picdetail 
            (`pid`, `pic_path`, `height`, `width`, `pic_desc`, `categoary_id`, `albunm_name`, `albunm_id`, `user_id`, 
            `time`, `taoke_num_iid`, `taoke_title`, `taoke_price`) 
            VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')''' % (
                picDetailModel.pId, picDetailModel.picPath,
                picDetailModel.height, picDetailModel.width,
                picDetailModel.desc, picDetailModel.cateId,
                picDetailModel.albunmName, picDetailModel.albunmId,
                picDetailModel.userId, picDetailModel.time,
                picDetailModel.taokeNumIID, picDetailModel.title,
                picDetailModel.price)

            sql = utils.unescape(sql).encode('utf-8')

            cur.execute(sql)

            self.con.commit()
        except Exception, what:
            print '========-------=======', what
            #            print sql
            pass

示例#15

0

显示文件

def gen_solution(cur, td, num, p_id):
    #	import pdb
    #	pdb.set_trace()
    global testcase_id
    global testcase_crawled

    if num == 0:
        column_name = 'java'
    elif num == 1:
        column_name = 'cpp'
    elif num == 2:
        column_name = 'csharp'
    else:
        column_name = 'VB'
    cur.execute('select %s from problem where id = %d' % (column_name, p_id))
    if cur.fetchall()[0][0] != None:
        return
    p = compile('"/stat\?c=problem_solution.*?"')
    l = p.findall(td)
    if len(l) == 1:
        url = topcoder_site_url + unescape(l[0][1:-1])
        try:
            page = topcoder.get_page(url)
        except Exception, e:
            print url, e
            return
        p = compile(
            '<TD CLASS="problemText" COLSPAN="8" VALIGN="middle" ALIGN="left">[\d\D]*?</TD>'
        )
        try:
            code = escape_string(p.findall(page)[0])
        except Exception, e:
            print 'No code found:', url, e
            return

示例#16

0

显示文件

文件： api.py 项目： rexxars/vgtv-xbmc

    def get_categories(self, root_id=0, only_series=False):
        categories = self.get_category_tree()
        root = int(root_id)

        matches = []
        for category in categories:
            id = category.get('id')
            
            if category.get('showCategory') is False:
                continue

            if only_series is True and category.get('isSeries') is not True:
                continue

            if only_series is False and category.get('parentId') != root:
                continue
            
            matches.append({
                'label': unescape(category.get('title')),
                'path':  self.plugin.url_for(
                    'show_category',
                    id=str(id),
                    mode='all'
                ),
                'id':    id
            })

        return matches

示例#17

0

显示文件

def view(request, shurl):
    item = getItemByUrl(shurl)
    item['shurl'] = base62.from_decimal(item['seq'])

    if request.GET.get('format', '') == 'json':
        del item['user']
        res = {
            'url': unicode(item['url']),
            'title': unicode(item['title']),
            'created': tuple(item['created'].timetuple()),
            'private': item['private'],
            'notes': unicode(unescape(item['notes'])),
            'tags': item['tags'],
        }
        return HttpResponse(json.dumps(res), mimetype="application/json")
    else:
        item['snapshot'] = '' if not item.get('snapshot') else item.get(
            'snapshot')[0]
        tpl = 'view.html'
        if request.GET.get('raw', None):
            tpl = 'view-bare.html'
        return render_to_response(tpl, {
            'item': item,
        },
                                  context_instance=RequestContext(request))

示例#18

0

显示文件

文件： fetch_searched_albums.py 项目： huanghao/muse

def fetch_albums(url):
    html = urlopen(url)

    found = re.findall(r'<td class="Title".*?<a href="/music/url\?q=(/music/album\?id%3D.*?)".*?>(.*?)</a>', html)
    print '# albums:', len(found), urllib.unquote(url)
    for link, title in found:
        link = 'http://www.google.cn'+link.split('&')[0]
        title = unescape(title)
        print urllib.unquote(link), '|', title

    found = re.findall(r'<td>.*?<a class="imglink" href="/music/url\?q=(.*?)"', html)
    pages = [ 'http://www.google.cn'+urllib.unquote(i.split('&amp;')[0]) for i in found ]

    cache[url] = True
    for page in pages:
        if page not in cache:
            cache[page] = False

    another_page = None
    for page, done in cache.iteritems():
        if not done:
            another_page = page
            break

    if another_page:
        fetch_albums(another_page)

示例#19

0

显示文件

文件： bioguide.py 项目： TheWalkers/congress-legislators

def fetch_bioguide_page(bioguide, force):
  url = "http://bioguide.congress.gov/scripts/biodisplay.pl?index=%s" % bioguide
  cache = "legislators/bioguide/%s.html" % bioguide
  try:
    body = download(url, cache, force)

    # Fix a problem?
    body = body.replace("&Aacute;\xc2\x81", "&Aacute;")

    # Entities like &#146; are in Windows-1252 encoding. Normally lxml
    # handles that for us, but we're also parsing HTML. The lxml.html.HTMLParser
    # doesn't support specifying an encoding, and the lxml.etree.HTMLParser doesn't
    # provide a cssselect method on element objects. So we'll just decode ourselves.
    body = utils.unescape(body, "Windows-1252")

    dom = lxml.html.parse(io.StringIO(body)).getroot()
  except lxml.etree.XMLSyntaxError:
    raise Exception("Error parsing: " + url)

  # Sanity check.

  if len(dom.cssselect("title")) == 0:
    raise Exception("No page for bioguide %s!" % bioguide)

  return dom

示例#20

0

显示文件

文件： redis.py 项目： zjutkz/custom_redis

 def _parse_result(self, buf, properties={}):
     count = 0
     result = ""
     try:
         self.redis_conn.send(buf)
     except Exception as e:
         if e.args[0] == errno.EPIPE and count < 3:
             self.setup()
             count += 1
             time.sleep(1)
         else:
             raise
     while True:
         recv = self.redis_conn.recv(1024000)
         if recv:
             result += recv
         if not recv or recv.endswith("\r\n\r\n"):
             break
     a = result.split("#-*-#")
     code, info, data = a
     data = data[:-4]
     if code == "200":
         return handle_safely(properties.get("recv",
                                             default_recv))(unescape(data))
     elif code == "502":
         return properties.get("result", data)
     else:
         raise RedisError("%s:%s, data: %s" % (code, info, data))

示例#21

0

显示文件

文件： PicDetailDao_mysql.py 项目： poorevil/MutilRequests

    def insertPicDetail(self,picDetailModel):
        
        cur = self.con.cursor()
        try:
            
            sql = '''INSERT INTO admin_picdetail 
            (`pid`, `pic_path`, `height`, `width`, `pic_desc`, `categoary_id`, `albunm_name`, `albunm_id`, `user_id`, 
            `time`, `taoke_num_iid`, `taoke_title`, `taoke_price`) 
            VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')'''%(picDetailModel.pId
            ,picDetailModel.picPath
            ,picDetailModel.height
            ,picDetailModel.width
            ,picDetailModel.desc
            ,picDetailModel.cateId
            ,picDetailModel.albunmName
            ,picDetailModel.albunmId
            ,picDetailModel.userId
            ,picDetailModel.time
            ,picDetailModel.taokeNumIID
            ,picDetailModel.title
            ,picDetailModel.price)
            
            sql = utils.unescape(sql).encode('utf-8')
            
            cur.execute(sql)

            self.con.commit()
        except Exception,what:
            print '========-------=======',what
#            print sql
            pass

示例#22

0

显示文件

文件： settings.py 项目： Giftie/service.makemkv.rip

 def settings_to_log( self ):
     try:
         utils.log( "Settings" )
         setting_values = self.read_settings_xml()
         for k, v in sorted( setting_values.items() ):
             utils.log( "%30s: %s" % ( k, str( utils.unescape( v.decode('utf-8', 'ignore') ) ) ) )
     except:
         traceback.print_exc()

示例#23

0

显示文件

def get_musicbrainz_artist_id(artist_search, limit=1, alias=False):
    name = ""
    id = ""
    sortname = ""
    artist_name = smart_unicode(
        (artist_search.replace('"', '?').replace('&', 'and')))
    if not alias:
        url = artist_url % (server, quote_plus(
            artist_name.encode("utf-8")), limit)
    else:
        url = alias_url % (server, quote_plus(
            artist_name.encode("utf-8")), limit)
    htmlsource = get_html_source(url, "", save_file=False)
    match = re.search('''<artist(.*?)</artist>''', htmlsource)
    if match:
        score_match = re.search('''score="(.*?)"''', htmlsource)
        name_match = re.search('''<name>(.*?)</name>''', htmlsource)
        id_match = re.search('''<artist id="(.*?)"(?:.*?)>''', htmlsource)
        if not id_match:
            id_match = re.search('''<artist (?:.*?)id="(.*?)">''', htmlsource)
        sort_name_match = re.search('''<sort-name>(.*?)</sort-name>''',
                                    htmlsource)

        if score_match:
            score = score_match.group(1)
        if name_match:
            name = unescape(smart_unicode(name_match.group(1)))
        if id_match:
            id = id_match.group(1)
        if sort_name_match:
            sortname = unescape(smart_unicode(sort_name_match.group(1)))
        log("Score     : %s" % score, xbmc.LOGDEBUG)
        log("Id        : %s" % id, xbmc.LOGDEBUG)
        log("Name      : %s" % name, xbmc.LOGDEBUG)
        log("Sort Name : %s" % sortname, xbmc.LOGDEBUG)
    else:
        if not alias:
            log("No Artist ID found trying aliases: %s" % artist_search,
                xbmc.LOGDEBUG)
            name, id, sortname = get_musicbrainz_artist_id(
                artist_search, limit, True)
        else:
            log("No Artist ID found for Artist: %s" % artist_search,
                xbmc.LOGDEBUG)
    xbmc.sleep(mb_delay)
    return name, id, sortname

示例#24

0

显示文件

def get_musicbrainz_artists(artist_search, limit=1):
    log("Artist: %s" % artist_search, xbmc.LOGDEBUG)
    score = ""
    name = ""
    id = ""
    sortname = ""
    artists = []
    artist_name = smart_unicode(
        (artist_search.replace('"', '?').replace('&', 'and')))
    url = artist_url % (server, quote_plus(artist_name.encode("utf-8")), limit)
    htmlsource = get_html_source(url, "", save_file=False, overwrite=False)
    match = re.findall('''<artist(.*?)</artist>''', htmlsource)
    if match:
        for item in match:
            artist = {}
            artist["score"] = ""
            artist["name"] = ""
            artist["id"] = ""
            artist["sortname"] = ""
            score_match = re.search('''score="(.*?)"''', item)
            name_match = re.search('''<name>(.*?)</name>''', item)
            id_match = re.search('''id="(.*?)"(?:.*?)>''', item)
            if not id_match:
                id_match = re.search('''id="(.*?)">''', item)
            sort_name_match = re.search('''<sort-name>(.*?)</sort-name>''',
                                        item)
            if score_match:
                artist["score"] = score_match.group(1)
            if name_match:
                artist["name"] = unescape(smart_unicode(name_match.group(1)))
            if id_match:
                artist["id"] = id_match.group(1)
            if sort_name_match:
                artist["sortname"] = unescape(
                    smart_unicode(sort_name_match.group(1)))
            log("Score     : %s" % artist["score"], xbmc.LOGDEBUG)
            log("Id        : %s" % artist["id"], xbmc.LOGDEBUG)
            log("Name      : %s" % artist["name"], xbmc.LOGDEBUG)
            log("Sort Name : %s" % artist["sortname"], xbmc.LOGDEBUG)
            artists.append(artist)
    else:
        log("No Artist ID found for Artist: %s" % repr(artist_search),
            xbmc.LOGDEBUG)
    xbmc.sleep(mb_delay)
    return artists

示例#25

0

显示文件

文件： jtr_spider.py 项目： hikhvar/jtr_scrapper

 def _locate(self, town_name):
     town_name = utils.unescape(town_name.strip())
     if town_name not in self.location_cache:
         try:
             self.location_cache[town_name] = self.geo_locator.geocode(town_name)
         except geopy.exc.GeocoderTimedOut:
             print "Geocoder Timeout."
             return None
     return self.location_cache[town_name]

示例#26

0

显示文件

文件： favourite.py 项目： kemalsecer/spoyser-repo

def getFavourites(file, limit=10000, validate=True, superSearch=False):
    import xbmcgui

    file = xbmc.translatePath(file)
    xml  = '<favourites></favourites>'
    if os.path.exists(file):  
        fav = open(file , 'r')
        xml = fav.read()
        fav.close()

    items = []

    faves = re.compile('<favourite(.+?)</favourite>').findall(xml)

    for fave in faves:
        fave = fave.replace('&quot;', '&_quot_;')
        fave = fave.replace('\'', '"')
        fave = utils.unescape(fave)

        fave = fave.replace('name=""', '')
        try:    name = re.compile('name="(.+?)"').findall(fave)[0]
        except: name = ''

        try:    thumb = re.compile('thumb="(.+?)"').findall(fave)[0]
        except: thumb = ''

        try:    cmd   = fave.split('>', 1)[-1]
        except: cmd = ''

        #name  = utils.Clean(name.replace( '&_quot_;', '"'))
        name  = name.replace( '&_quot_;', '"')
        thumb = thumb.replace('&_quot_;', '"')
        cmd   = cmd.replace(  '&_quot_;', '"')

        add = False

        if superSearch:
            add = isValid(cmd)
        elif (SHOWUNAVAIL) or (not validate) or isValid(cmd):
            add = True

        if add:
            cmd = upgradeCmd(cmd)

            if cmd.startswith('PlayMedia'):
                option = 'mode'
                try:                        
                    mode = int(favourite.getOption(cmd, option))
                except:
                    win  = xbmcgui.getCurrentWindowId()
                    cmd  = updateSFOption(cmd, 'winID', win)

            items.append([name, thumb, cmd])
            if len(items) > limit:
                return items

    return items

示例#27

0

显示文件

def check_url(url, geturl=False):
    send = []
    try:
        o = urllib.urlopen(url)
        ctype, clength = o.info().get("Content-Type"), o.info().get(
            "Content-Length")
        if o.info().gettype() == "text/html":
            title = 'Pas de titre'
            html = o.read(1000000)
            try:
                SoupList = BeautifulSoup(utils.unescape(html),
                                         parseOnlyThese=SoupStrainer('title'))
            except UnicodeDecodeError:
                SoupList = BeautifulSoup(utils.unescape(
                    html.decode("latin1", "ignore")),
                                         parseOnlyThese=SoupStrainer('title'))
            try:
                titles = [title for title in SoupList]
                title = utils.xhtml2text(titles[0].renderContents())
            except IndexError:
                title = "Pas de titre"
            except HTMLParseError:
                pass
            if geturl:
                send.append("%s : [Lien] Titre : %s" %
                            (o.geturl(), " ".join(title.split())))
            else:
                send.append("[Lien] Titre : %s" % " ".join(title.split()))
        else:
            send.append("[Lien] Type: %s, Taille : %s octets" %
                        (ctype, clength))
        o.close()
    except IOError as error:
        if error[1] == 401:
            send.append("Je ne peux pas m'authentifier sur %s :'(" % url)
        elif error[1] == 404:
            send.append("%s n'existe pas !" % url)
        elif error[1] == 403:
            send.append("Il est interdit d'accéder à %s !" % url)
        else:
            send.append("Erreur %s sur %s" % (error[1], url))
    except httplib.InvalidURL:
        send.append("L'URL %s n'est pas valide !" % url)
    return send

示例#28

0

显示文件

 def settings_to_log(self):
     try:
         utils.log("Settings")
         setting_values = self.read_settings_xml()
         for k, v in sorted(setting_values.items()):
             utils.log(
                 "%30s: %s" %
                 (k, str(utils.unescape(v.decode('utf-8', 'ignore')))))
     except:
         traceback.print_exc()

示例#29

0

显示文件

文件： tree.py 项目： KevinXuxuxu/mini_c_compiler

 def to_str(self, i=1):
     s = "{}:\n".format(type(self).__name__)
     for field in list(self._fields) + self.extra_fields:
         value = self.__getattribute__(field)
         if isinstance(value, Tree):
             s += "{}{}:{}".format(self.unit * i, field,
                                   value.to_str(i + 1))
         elif isinstance(value, list):
             s += "{}{}:\n".format(self.unit * i, field)
             for v in value:
                 if isinstance(v, Tree):
                     s += "{}- {}".format(self.unit * i, v.to_str(i + 1))
                 else:
                     s += "{}- {}\n".format(self.unit * i,
                                            utils.unescape(v))
         else:
             s += "{}{}: {}\n".format(self.unit * i, field,
                                      utils.unescape(value))
     return s

示例#30

0

显示文件

文件： handlers.py 项目： lehrblogger/Teatime_PageChat

 def post(self):
     text = self.request.get('text')
     if text:
         conver_url = utils.unescape(self.request.get('url'))
         conver = Conver.get_for_url(conver_url)
         message = Message(author=PermaUser.get_current_permauser(), text=text, conver=conver)
         message.put()
         self.distribute_message(message)
         
     else:
         logging.error("No message '%S'saved for %s", text, conver_url)

示例#31

0

显示文件

文件： jtr_spider.py 项目： hikhvar/jtr_scrapper

 def parse_starting_page(self, response):
     ranking = 0
     for sel in response.xpath('//div[@class="content"]/table/tr'):
         team_link = sel.xpath('td/a/@href').extract_first()
         if team_link is not None:
             team_name = sel.xpath('td/a/text()').extract_first()
             data = sel.xpath('td/text()').extract()
             ranking_item = JtrTeamRankingItem()
             ranking_item['team_name'] = utils.unescape(team_name)
             if len(data) == 4:
                 ranking, city, tournaments, points = data
             else:
                 city, tournaments, points = data
             ranking_item['ranking'] = int(ranking.split("/")[0].strip().strip("."))
             ranking_item['hometown'] = utils.unescape(city)
             ranking_item['points'] = float(points)
             ranking_item['number_of_tournaments'] = utils.unescape(tournaments)
             ranking_item['crawl_date'] = datetime.datetime.now()
             yield  ranking_item
             yield scrapy.Request(response.urljoin(team_link), callback=self.parse_team_site)

示例#32

0

显示文件

文件： actions.py 项目： gedisony/script.reddit.reader

def addtoKodiFavorites(command, name, thumbnail):
    import xml.etree.ElementTree
    from utils import unescape

#adding to favorites involve 3 steps:
#  1.) add the favorite via jsonrpc (script params not included)
#  2.) modify the favourites.xml to include script params <-- (kodi18 leia alpha1) i think there is another favourites file or this file is cached until another favorite is added
#  3.) ??? <-- adding another favorite will delete the first one (until kodi is restarted) need to find a way for kodi to reload the modified favourite.xml

    #http://kodi.wiki/view/JSON-RPC_API/v8#Favourites
    #schema=xbmc.executeJSONRPC('{"jsonrpc": "2.0", "method": "JSONRPC.Introspect", "id": 1}')
    #log(repr(schema))
    favorite_was_found=False
    #add_dummy_favorite()
    temp_command='script.reddit.reader' #can't add script favorites with parameter using jsonrpc
    saved_command='RunScript("script.reddit.reader")'

    json_rpc_command={"jsonrpc": "2.0",
                      "method": "Favourites.AddFavourite",
                      'params': {
                                 'title': name,
                                 'type': 'script',
                                 'path': temp_command,
                                 'thumbnail':thumbnail,
                                 },
                      'id': '1'
                      }
    a=xbmc.executeJSONRPC(json.dumps(json_rpc_command))
    #log(repr(a))
    a=json.loads(a)
    if a.get('result','')=="OK":
        log('Favourite added')
        #now that we've created the favorite, we edit it to add parameters
        favorites_xml       = xbmc.translatePath(os.path.join(addon.getAddonInfo('profile'), '..','..','favourites.xml'))
        if os.path.exists(favorites_xml):
            #log('{0} exists'.format(favorites_xml) )
            et = xml.etree.ElementTree.parse(favorites_xml)
            root=et.getroot()

            for f in root.findall('favourite'):
                #the name attribute is escape encoded the xml file.
                fav_name=unescape( f.get('name') ) #replaces &amp; to & etc.
                fav_cmd=f.text
                #log('*a*'+repr(name) + '  ' + saved_command)
                #log('*b*'+repr(fav_name) + '  ' + fav_cmd )
                #log('---')
                if (fav_name==name) and (fav_cmd==saved_command):
                    log('Favourite entry found {0}'.format(fav_name) )
                    favorite_was_found=True
                    f.text=command

            if favorite_was_found:
                et.write(favorites_xml)
                xbmc_notify(translation(32028), fav_name, icon=thumbnail)

示例#33

0

显示文件

文件： tweets.py 项目： cshintov/timeline

def extract_tweets(tweets, cmd_line=False):
    """ prints the tweets from tweets: list of tweet dicts """
    tweet_texts = []
    for tweet in tweets:
        text = get_tweet(tweet)
        if cmd_line:
            text = text.encode('unicode-escape')
            text = ununicode(text)
            text = unescape(text)
        tweet_texts.append(parser(text))
    return tweet_texts

示例#34

0

显示文件

文件： utils.py 项目： alub/pipobot

def check_url(url, geturl=False):
    send = []
    try:
        o = urllib.urlopen(url)
        ctype, clength = o.info().get("Content-Type"), o.info().get("Content-Length")
        if  o.info().gettype() == "text/html":
            title = 'Pas de titre'
            html = o.read(1000000)
            try:
                SoupList = BeautifulSoup(utils.unescape(html),
                                         parseOnlyThese=SoupStrainer('title'))
            except UnicodeDecodeError:
                SoupList = BeautifulSoup(utils.unescape(html.decode("latin1", "ignore")),
                                         parseOnlyThese=SoupStrainer('title'))
            try:
                titles = [title for title in SoupList]
                title = utils.xhtml2text(titles[0].renderContents())
            except IndexError:
                title = "Pas de titre"
            except HTMLParseError:
                pass
            if geturl:
                send.append("%s : [Lien] Titre : %s" %
                            (o.geturl(), " ".join(title.split())))
            else:
                send.append("[Lien] Titre : %s" % " ".join(title.split()))
        else:
            send.append("[Lien] Type: %s, Taille : %s octets" % (ctype, clength))
        o.close()
    except IOError as error:
        if error[1] == 401:
            send.append("Je ne peux pas m'authentifier sur %s :'(" % url)
        elif error[1] == 404:
            send.append("%s n'existe pas !" % url)
        elif error[1] == 403:
            send.append("Il est interdit d'accéder à %s !" % url)
        else:
            send.append("Erreur %s sur %s" % (error[1], url))
    except httplib.InvalidURL:
        send.append("L'URL %s n'est pas valide !" % url)
    return send

示例#35

0

显示文件

def load(request):
    if not request.user.is_authenticated():
        return HttpResponseRedirect("/accounts/login")
    if request.method == 'POST':
        form = ImportDeliciousForm(request.POST, request.FILES)
        if form.is_valid():
            db = get_database()[Bookmark.collection_name]
            html = request.FILES['exported'].read().decode('utf8')
            soup = BeautifulSoup(html)
            for item in soup.findAll('dt'):
                desc = ''
                next = item.findNextSiblings()
                if next:
                    next = next[0]
                    if 'name' in dir(next) and next.name == 'dd':
                        desc = unescape(u''.join(imap(unicode, next.contents)))
                db.Bookmark({
                    'url':
                    urlSanitize(item.a['href']),
                    'seq':
                    getNextVal('seq'),
                    'tags':
                    item.a['tags'].split(','),
                    'user':
                    unicode(request.user),
                    'created':
                    datetime.fromtimestamp(float(item.a['add_date'])),
                    'private':
                    item.a['private'] == '1',
                    'title':
                    unescape(unicode(item.a.string)),
                    'notes':
                    unicode(desc)
                }).save()
            return HttpResponseRedirect('/u/%s/' % request.user)
    else:
        form = ImportDeliciousForm()
    return render_to_response('import.html', {
        'form': form,
    },
                              context_instance=RequestContext(request))

示例#36

0

显示文件

    def _build_show_summary(
        self,
        data,
        show_status=False,
        pre_rating='',
        post_rating='',
        ratings_pos='front',
        preserve_rating=False,
    ):
        out = []

        star = unescape("&#9733;")
        sep = " | "
        status = _get(data, 'status')
        plot = _get(data, 'plot')
        alt_ratings = _get(data, 'alt_ratings')
        rating = _get(data, 'rating', 0.0)

        if show_status and status:
            out.append('Status: {}'.format(status))

        if plot:
            out.append(plot)

        if alt_ratings:
            buf = []
            for source, _rating in alt_ratings:
                buf.append("{}: {}".format(source, _rating))
            piece = sep.join(buf)

            if ratings_pos == 'front':
                out.insert(0, star + " " + piece + " " + star + "\n\n")
            else:
                out.append("\n\n" + star + " " + piece + " " + star)

        if preserve_rating:
            tmp = unescape("{}{:.1f}{}".format(pre_rating, rating,
                                               post_rating))
            out.insert(0, tmp)

        return sep.join(out)

示例#37

0

显示文件

文件： musicbrainz_utils.py 项目： Rah85/script.cdartmanager

def get_musicbrainz_artists( artist_search, limit=1 ):
    log( "Artist: %s" % artist_search, xbmc.LOGDEBUG )
    score = ""
    name = ""
    id = ""
    sortname = ""
    artists = []
    artist_name = smart_unicode( artist_search.replace( '"', '?' ) )
    url = artist_url % ( server, quote_plus( artist_name.encode("utf-8") ), limit )
    htmlsource = get_html_source( url, "", save_file = False, overwrite = False )
    match = re.findall( '''<artist(.*?)</artist>''', htmlsource )
    if match:
        for item in match:
            artist = {}
            artist["score"] = ""
            artist["name"] = ""
            artist["id"] = ""
            artist["sortname"] = ""
            score_match = re.search( '''score="(.*?)"''', item )
            name_match = re.search( '''<name>(.*?)</name>''', item )
            id_match = re.search( '''id="(.*?)"(?:.*?)>''', item )
            if not id_match:
                id_match = re.search( '''id="(.*?)">''', item )
            sort_name_match = re.search( '''<sort-name>(.*?)</sort-name>''', item )
            if score_match:
                artist["score"] = score_match.group(1)
            if name_match:
                artist["name"] = unescape( smart_unicode( name_match.group(1) ) )
            if id_match:
                artist["id"] = id_match.group(1)
            if sort_name_match:
                artist["sortname"] = unescape( smart_unicode( sort_name_match.group(1) ) )
            log( "Score     : %s" % artist["score"], xbmc.LOGDEBUG )
            log( "Id        : %s" % artist["id"], xbmc.LOGDEBUG )
            log( "Name      : %s" % artist["name"], xbmc.LOGDEBUG )
            log( "Sort Name : %s" % artist["sortname"], xbmc.LOGDEBUG )
            artists.append(artist)
    else:
        log( "No Artist ID found for Artist: %s" % repr( artist_search ), xbmc.LOGDEBUG )
    xbmc.sleep( mb_delay )
    return artists

示例#38

0

显示文件

文件： greedy.py 项目： huanghao/muse

def artist(url):
    html = urlopen(url)
    
    found = re.findall(r'<a href="/music/url\?q=(/music/album\?.*?)&.*?>(.*?)</a>',
                       html.split('所有专辑', 1)[1])
    albums = dict(found)
    artist = trim_title(html)
    print artist, 'albums', len(albums)
    
    for href, title in sorted(albums.items(), lambda i,j: cmp(i[1],j[1])):
        url = 'http://www.google.cn%s' % urllib.unquote(href)
        print '%s |%s' % (url, unescape(title))

示例#39

0

显示文件

文件： tasks.py 项目： thetwoj/gvobot

def _message_handler(message):
    mention = '@gvobot'
    print(message)
    # Strip mention if it's at the beginning
    if message == mention:
        message = ''
    elif message.startswith(mention):
        # Remove the extra space added after the mention, too
        message = message[len(mention) + 1:]

    # Unescape message (skype encodes &, <, >, ', and ")
    message = unescape(message)

    # Be snarky when no message is sent; otherwise, S.C.I.E.N.C.E.
    if len(message) == 0:
        response = 'Has anyone really been far even as decided to use ' \
            'even go want to do look more like?'
    elif FIXED_RESPONSES.get(message.lower(), None) is not None:
        response = FIXED_RESPONSES[message.lower()]
    elif message.startswith('!number'):
        usage = 'Usage: !number [<start num> <end num>]'
        args = message.split()
        if len(args) == 1:
            response = str(random.randint(1, 6))
        elif len(args) == 3:
            try:
                start = int(args[1])
                end = int(args[2])
                response = str(random.randint(start, end))
            except ValueError:
                response = usage
        else:
            response = usage
    elif message.startswith('!song'):
        args = message.split()
        if len(args) == 1:
            song = get_random_song()
            response = song.to_message()
        else:
            response = 'Usage: !song'
    else:
        response = sciencify(message)

        # Allow bot to do actions with /me
        if response.startswith('/M.E. '):
            response = response.replace('/M.E.', '/me', 1)

        # The bot's name is unscienceable.
        response = response.replace('@G.V.O.B.O.T.', '@gvobot')

    print(response)
    return response

示例#40

0

显示文件

文件： feds.py 项目： pshc/imprint

def twitter_status(twitter_username):
    status = cache.get('feds-%s-status' % twitter_username)
    if status is None:
        try:
            import twitter
            user = twitter.Api().GetUser(twitter_username)
            status = user.status
            text = unescape(status.text)
            status = render_to_string('feds/twitter_status.html', locals())
        except:
            status = ''
        cache.set('feds-%s-status' % twitter_username, status)
    return status

示例#41

0

显示文件

文件： musicbrainz_utils.py 项目： Rah85/script.cdartmanager

def get_musicbrainz_artist_id( artist, limit=1, alias = False ):
    name = ""
    id = ""
    sortname = ""
    artist_name = smart_unicode( artist.replace( '"', '?' ) )
    if not alias:
        url = artist_url % ( server, quote_plus( artist_name.encode("utf-8") ), limit )
    else:
        url = alias_url % ( server, quote_plus( artist_name.encode("utf-8") ), limit )
    htmlsource = get_html_source( url, "", save_file = False)
    match = re.search( '''<artist(.*?)</artist>''', htmlsource )
    if match:
        score_match = re.search( '''score="(.*?)"''', htmlsource )
        name_match = re.search( '''<name>(.*?)</name>''', htmlsource )
        id_match = re.search( '''<artist id="(.*?)"(?:.*?)>''', htmlsource )
        if not id_match:
            id_match = re.search( '''<artist (?:.*?)id="(.*?)">''', htmlsource )
        sort_name_match = re.search( '''<sort-name>(.*?)</sort-name>''', htmlsource )
        
        if score_match:
            score = score_match.group(1)
        if name_match:
            name = unescape( smart_unicode( name_match.group(1) ) )
        if id_match:
            id = id_match.group(1)
        if sort_name_match:
            sortname = unescape( smart_unicode( sort_name_match.group(1) ) )
        log( "Score     : %s" % score, xbmc.LOGDEBUG )
        log( "Id        : %s" % id, xbmc.LOGDEBUG )
        log( "Name      : %s" % name, xbmc.LOGDEBUG )
        log( "Sort Name : %s" % sortname, xbmc.LOGDEBUG )
    else:
        if not alias:
            log( "No Artist ID found trying aliases: %s" % artist, xbmc.LOGDEBUG )
            name, id, sortname = get_musicbrainz_artist_id( artist, limit, True )
        else:
            log( "No Artist ID found for Artist: %s" % artist, xbmc.LOGDEBUG )
    xbmc.sleep( mb_delay )
    return name, id, sortname

示例#42

0

显示文件

文件： views.py 项目： bs-insecure/gh_tools

def article_list(request, blog_id=None):
    ret = {'status': 'error', "data": []}
    if blog_id:
        try:
            blog = BlogModel.objects.get(pk=blog_id)
            articles = ArticleModel.objects.filter(niche=blog.niche)
            for article in articles:
                ret["data"].append({'id': article.id, 'title': article.title, 
                                    'text': mark_safe( unescape(article.text))})
            ret['status'] = 'ok';
        except Exception, e:
            print(e)
            pass

示例#43

0

显示文件

文件： po2csv.py 项目： sleepyjames/ppb

def undo_plurals(has_plural, plurals):
    """Undo what `force_plurals` does in order to figure out if just `msgstr`
    or `msgstr[x]` should be set. Returns `(singular_msgstr, plural_msgstr_map)`
    """
    plurals_list = plurals.split(PLURAL_SEPARATOR)
    plurals_dict = {}

    for i, p in enumerate(plurals_list):
        plurals_dict[unicode(i)] = unescape(p)

    if has_plural:
        return '', plurals_dict
    return plurals_dict.get('0', ''), {}

示例#44

0

显示文件

文件： bill_info.py 项目： jradice/congress

def strip_tags(text):
    # Preserve paragraph breaks. Convert closing p tags (and surrounding whitespace) into two newlines. Strip trailing whitespace
    text = re.sub("\s*</\s*p\s*>\s*", "\n\n", text).strip()

    # naive stripping of tags, should work okay in this limited context
    text = re.sub("<[^>]+>", "", text)

    # compress and strip whitespace artifacts, except for the paragraph breaks
    text = re.sub("[ \t\r\f\v]{2,}", " ", text).strip()

    # Replace HTML entities with characters.
    text = utils.unescape(text)

    return text

示例#45

0

显示文件

文件： jtr_spider.py 项目： hikhvar/jtr_scrapper

 def parse_team_site(self, response):
     team = response.xpath('//div[@class="title"]/text()').extract_first()
     for sel in response.xpath('//div[@class="content"]/table/tr'):
         tournament_link = sel.xpath('td/a/@href').extract_first()
         if tournament_link is not None:
             data = sel.xpath('td/text()').extract()
             tournament_name = sel.xpath('td/a/text()').extract_first()
             if len(data) == 6:
                 date, tournament_town, ranking, zf, tw, points = data
                 item = JtrTournamentPartition()
                 item['tournament_date'] = date
                 item['crawl_date'] = datetime.datetime.now()
                 item['ranking'] = int(ranking.split("/")[0].strip().strip("."))
                 home_town, team_name = team.split("-", 1)
                 item['team_name'] = utils.unescape(team_name.strip())
                 item['team_hometown'] = utils.unescape(home_town.strip())
                 item['tournament_town'] = utils.unescape(tournament_town)
                 item['tournament_name'] = utils.unescape(tournament_name)
                 home_town = self._locate(home_town)
                 tournament_town = self._locate(tournament_town)
                 item["team_hometown_position"] = self._get_geohash(home_town)
                 item["tournament_town_position"] = self._get_geohash(tournament_town)
                 item["distance"] = self._get_distance(home_town, tournament_town)
                 yield item

示例#46

0

显示文件

def get_tracks(album, links):
    track_num = 1
    for link in links:
        track = {}
        _, track['name'] = link['data-title'].split(' — ')
        track['year'] = album.year
        track['album'] = album
        track['name'] = unescape(track['name'])
        track['number'] = track_num
        track['url'] = url + link['data-mp3url']
        new_track = Track(**track)
        session.add(new_track)
        track_num += 1
    session.commit()
    return track_num

示例#47

0

显示文件

文件： views.py 项目： bs-insecure/gh_tools

def article_list(request, blog_id=None):
    ret = {'status': 'error', "data": []}
    if blog_id:
        try:
            blog = BlogModel.objects.get(pk=blog_id)
            articles = ArticleModel.objects.filter(niche=blog.niche)
            for article in articles:
                ret["data"].append({
                    'id': article.id,
                    'title': article.title,
                    'text': mark_safe(unescape(article.text))
                })
            ret['status'] = 'ok'
        except Exception, e:
            print(e)
            pass

示例#48

0

显示文件

文件： handlers.py 项目： lehrblogger/Teatime_PageChat

 def get(self):
     permauser = PermaUser.get_current_permauser()
     conver_url = utils.unescape(self.request.get('url'))
     conver = Conver.get_for_url(conver_url)
     messages = Message.all().filter('conver =', conver).order('created').fetch(1000)
     self.response.out.write(template.render(
         os.path.join(os.path.dirname(__file__),
         'templates/conver.html'), 
         {
             'token': channel.create_channel(permauser.user_id() + str(conver.key().id_or_name())),
             'conver_url': conver_url,
             'messages': [ {'author': message.author.display_name(), 'text': message.text} for message in messages],
             'loginorout_text': 'Log out',
             'loginorout_url': users.create_logout_url(self.request.uri)
         }
     ))

示例#49

0

显示文件

文件： api.py 项目： androa/vgtv-xbmc

    def get_categories(self, root_id=0):
        categories = self.get_category_tree()

        matches = []
        for id in categories:

            if int(id) < 0:
                continue

            category = categories.get(id)
            if (int(category.get('parentId')) == int(root_id)):
                matches.append({
                    'label': unescape(category.get('name')),
                    'path':  self.plugin.url_for('show_category', id=str(id)),
                    'id':    id
                })

        return matches

示例#50

0

显示文件

文件： tvguide.py 项目： psichokillah/krysty-xbmc

def getTVGuide(tvchannel):
    url = getChannelGuideUrl(tvchannel)
    
    if not url:
        return None
    
    try:
        req = urllib2.Request(url)
        req.add_header('User-Agent', common.HEADERS['User-Agent'])
        conn = urllib2.urlopen(req, timeout=5)
        html = conn.read()
        conn.close()
        
        soup = BeautifulSoup(html, 'html5lib')
        tds = soup.findAll('td', attrs={'class': 'container_events'})
        tds = [tds[i] for i in xrange(len(tds)) if divmod(i, 4)[1] == 0]
        
        hours = []
        titles = []
        
        for td in tds:
            hours.extend(td.findAll('td', attrs={'class': 'ora'}))
            titles.extend(td.findAll('div', attrs={'class': 'title'}))
        
        if not hours or not titles or len(hours) != len(titles):
            return None
        
        items = []
        
        for i in xrange(len(titles)):
            current = 'current' in str(hours[i])
            hour = re.search(r'<div>(\d+:\d+)<\/div>', str(hours[i])).group(1)
            title = titles[i].getText().strip()
            title = ' '.join(title.split())
            title = utils.unescape(title, True)
            item = (hour, title, current)
            items.append(item)
        
        return items
    
    except:
        log_utils.log(traceback.print_exc())
    
    return None

示例#51

0

显示文件

def getTVGuide(tvchannel):
    url = getChannelGuideUrl(tvchannel)

    if not url:
        return None

    try:
        req = urllib2.Request(url)
        req.add_header('User-Agent', common.HEADERS['User-Agent'])
        conn = urllib2.urlopen(req, timeout=5)
        html = conn.read()
        conn.close()

        soup = BeautifulSoup(html, 'html5lib')
        tds = soup.findAll('td', attrs={'class': 'container_events'})
        tds = [tds[i] for i in xrange(len(tds)) if divmod(i, 4)[1] == 0]

        hours = []
        titles = []

        for td in tds:
            hours.extend(td.findAll('td', attrs={'class': 'ora'}))
            titles.extend(td.findAll('div', attrs={'class': 'title'}))

        if not hours or not titles or len(hours) != len(titles):
            return None

        items = []

        for i in xrange(len(titles)):
            current = 'current' in str(hours[i])
            hour = re.search(r'<div>(\d+:\d+)<\/div>', str(hours[i])).group(1)
            title = titles[i].getText().strip()
            title = ' '.join(title.split())
            title = utils.unescape(title, True)
            item = (hour, title, current)
            items.append(item)

        return items

    except:
        log_utils.log(traceback.print_exc())

    return None

示例#52

0

显示文件

文件： tasks.py 项目： svattam/celery-crawler

def find_links(doc_id):
    if doc_id is None:
        return

    doc = Page.load(settings.db, doc_id)

    if doc.content is None:
        print "Got None for the content of %s -> %s." % (doc_id, doc.url)
        return

    raw_links = []
    for match in link_single_re.finditer(doc.content):
        raw_links.append(match.group(1))

    for match in link_double_re.finditer(doc.content):
        raw_links.append(match.group(1))

    doc.links = []
    for link in raw_links:
        if link.startswith("#"):
            continue
        elif link.startswith("http://") or link.startswith("https://"):
            pass
        elif link.startswith("/"):
            parse = urlparse(doc["url"])
            link = parse.scheme + "://" + parse.netloc + link
        else:
            link = "/".join(doc["url"].split("/")[:-1]) + "/" + link

        doc.links.append(unescape(link.split("#")[0]))

    print "find_links %s -> %i" % (doc.url, len(doc.links))
    doc.store(settings.db)

    calculate_rank.delay(doc.id)

    for link in doc.links:
        p = Page.get_id_by_url(link, update=False)
        if p is not None:
            calculate_rank.delay(p)
        else:
            retrieve_page.delay(link)

示例#53

0

显示文件

    res = db.find(query, sort=order)
    total = res.count()
    paginator = Paginator(res, limit)
    try:
        res = paginator.page(page)
    except (EmptyPage, InvalidPage):
        res = paginator.page(paginator.num_pages)

    if request.GET.get('format', '') == 'json':
        res = [{
            'url': unicode(obj['url']),
            'title': unicode(obj['title']),
            'created': tuple(obj['created'].timetuple()),
            'private': obj['private'],
            'notes': unicode(unescape(obj['notes'])),
            'tags': obj['tags']
        } for obj in res.object_list]
        if request.GET.get('j') == None:
            return HttpResponse(json.dumps(res), mimetype="application/json")
        return HttpResponse("var omnom_posts = " + json.dumps(res) + ";",
                            mimetype="text/javascript")

    if request.GET.get('format', '') == 'atom':
        tpl = 'atom.xml'
    else:
        tpl = 'list.html'

    res.object_list = [{
        'seq':
        obj['seq'],

示例#54

0

显示文件

文件： imageutils.py 项目： CarlaLlama/siyavula.latex2image

def latex2png(picture_element, preamble, return_eps=False, page_width_px=None,
              dpi=150, included_files={}, pdflatexpath=None):
    """
    Create a PNG image from latex.

    Inputs:

      pspicture_element - etree.Element

      preamble - which preamble to use, one of PsPicture_preamble, tikzpicture_preamble
      or equation_preamble

      return_eps - whether to also return the intermediate EPS file

      page_width_px - page width in pixels, used to scale the
        style:width attribute in the element.

      dpi - Will be used only if the width of the figure relative to
        the page width was not set (or the page width in pixels was not
        passed as an argument).

    Outputs:

    One or two paths, the first to the PNG, the second to the EPS.
    """
    temp_dir = tempfile.mkdtemp()
    latex_path = os.path.join(temp_dir, 'figure.tex')
    png_path = os.path.join(temp_dir, 'figure.png')
    pdf_path = os.path.join(temp_dir, 'figure.pdf')

    # can send the raw string code or a <pre> element with <code> child
    if isinstance(picture_element, (str, unicode)):
        code = picture_element
        code = cleanup_code(code)
    else:
        code = picture_element.find('.//code').text.encode('utf-8')
    code = code.replace(r'&amp;', '&').replace(r'&gt;', '>').replace(r'&lt;', '<')

    if not code:
        raise ValueError("Code cannot be empty.")

    with open(latex_path, 'wt') as fp:
        temp = unescape(preamble.replace('__CODE__', code.strip()))
        try:
            fp.write(temp)
        except UnicodeEncodeError:
            fp.write(temp.encode('utf-8'))

    for path, path_file in included_files.iteritems():
        try:
            os.makedirs(os.path.join(temp_dir, os.path.dirname(path)))
        except OSError:
            # Catch exception if path already exists
            pass
        with open(os.path.join(temp_dir, path), 'wb') as fp:
            fp.write(path_file.read())

    if not pdflatexpath:
        raise ValueError("pdflatexpath cannot be None")

    errorLog, temp = execute([pdflatexpath,
                              "-shell-escape", "-halt-on-error",
                              "-output-directory", temp_dir, latex_path])
    try:
        open(pdf_path, "rb")
    except IOError:
        raise LatexPictureError(
            "LaTeX failed to compile the image. %s \n%s" % (
                latex_path, preamble.replace('__CODE__', code.strip())))

    # crop the pdf image too
    # execute(['pdfcrop', '--margins', '1', pdfPath, pdfPath])

    execute(['convert', '-density', '%i' % dpi, pdf_path, png_path])

    return png_path

示例#55

0

显示文件

文件： main_listing.py 项目： rbsquinch/plugin.video.reddit_viewer

def listLinksInComment(url, name, type_):
    from domains import parse_reddit_link, build_DirectoryItem_url_based_on_media_type
    from utils import markdown_to_bbcode, unescape
    from guis import progressBG
    #from resources.domains import make_addon_url_from
    #called from context menu
    log('listLinksInComment:%s:%s' % (type_, url))

    #does not work for list comments coz key is the playable url (not reddit comments url)
    #msg=WINDOW.getProperty(url)
    #WINDOW.clearProperty( url )
    #log( '   msg=' + msg )

    directory_items = []
    author = ""
    ShowOnlyCommentsWithlink = False

    if type_ == 'linksOnly':
        ShowOnlyCommentsWithlink = True

    #url='https://www.reddit.com/r/Music/comments/4k02t1/bonnie_tyler_total_eclipse_of_the_heart_80s_pop/' + '.json'
    #only get up to "https://www.reddit.com/r/Music/comments/4k02t1".
    #   do not include                                            "/bonnie_tyler_total_eclipse_of_the_heart_80s_pop/"
    #   because we'll have problem when it looks like this: "https://www.reddit.com/r/Overwatch/comments/4nx91h/ever_get_that_feeling_dÃ©jÃ _vu/"

    #url=re.findall(r'(.*/comments/[A-Za-z0-9]+)',url)[0]

    #use safe='' argument in quoteplus to encode only the weird chars part
    url = urllib.quote_plus(url, safe=':/?&')
    if '?' in url:
        url = url.split('?', 1)[0] + '.json?' + url.split('?', 1)[1]
    else:
        url += '.json'

    loading_indicator = progressBG(translation(30024))
    loading_indicator.update(0, 'Retrieving comments')

    content = reddit_request(url)
    if not content:
        loading_indicator.end()
        return

    loading_indicator.update(10, 'Parsing')
    content = json.loads(content)

    del harvest[:]
    #harvest links in the post text (just 1)
    r_linkHunter(content[0]['data']['children'])

    try:
        submitter = content[0]['data']['children'][0]['data']['author']
    except:
        submitter = ''

    #the post title is provided in json, we'll just use that instead of messages from addLink()
    try:
        post_title = content[0]['data']['children'][0]['data']['title']
    except:
        post_title = ''
    #for i, h in enumerate(harvest):
    #    log("aaaaa first harvest "+h[2])

    #harvest links in the post itself
    r_linkHunter(content[1]['data']['children'])

    comment_score = 0

    loading_indicator.set_tick_total(len(harvest))

    for i, h in enumerate(harvest):
        try:
            #log(str(i)+"  score:"+ str(h[0]).zfill(5)+" "+ h[1] +'|'+ h[3] )
            comment_score = h[0]
            #log("score %d < %d (%s)" %(comment_score,int_CommentTreshold, CommentTreshold) )
            link_url = h[2]
            desc100 = h[3].replace(
                '\n', ' ')[0:100]  #first 100 characters of description

            kind = h[
                6]  #reddit uses t1 for user comments and t3 for OP text of the post. like a poster describing the post.
            d = h[5]  #depth of the comment

            tab = " " * d if d > 0 else "-"

            from urlparse import urlparse
            domain = '{uri.netloc}'.format(uri=urlparse(link_url))

            author = h[7]
            DirectoryItem_url = ''

            if comment_score < int_CommentTreshold:
                continue

            #hoster, DirectoryItem_url, videoID, mode_type, thumb_url,poster_url, isFolder,setInfo_type, setProperty_IsPlayable =make_addon_url_from(h[2])
            #if link_url:
            #    log( '  comment %s TITLE:%s... link[%s]' % ( str(d).zfill(3), desc100.ljust(20)[:20],link_url ) )

            ld = parse_reddit_link(link_url=link_url,
                                   assume_is_video=False,
                                   needs_preview=True,
                                   get_playable_url=True)

            if kind == 't1':
                list_title = r"[COLOR cadetblue]%3d[/COLOR] %s" % (h[0], tab)
            elif kind == 't3':
                list_title = r"[COLOR cadetblue]Title [/COLOR] %s" % (tab)

            #helps the the textbox control treat [url description] and (url) as separate words. so that they can be separated into 2 lines
            plot = h[3].replace('](', '] (')
            plot = markdown_to_bbcode(plot)
            plot = unescape(plot)  #convert html entities e.g.:(&#39;)

            liz = xbmcgui.ListItem(label=list_title + ': ' + desc100)

            liz.setInfo(type="Video",
                        infoLabels={
                            "Title": h[1],
                            "plot": plot,
                            "studio": domain,
                            "votes": str(comment_score),
                            "director": author
                        })
            isFolder = False

            #force all links to ytdl to see if it can be played
            if link_url:
                DirectoryItem_url, setProperty_IsPlayable, isFolder, title_prefix = build_DirectoryItem_url_based_on_media_type(
                    ld, link_url)

                liz.setProperty('IsPlayable', setProperty_IsPlayable)
                liz.setProperty(
                    'url', DirectoryItem_url)  #<-- needed by the xml gui skin
                liz.setPath(DirectoryItem_url)

                if domain:
                    plot = "  [COLOR greenyellow][%s] %s" % (domain,
                                                             plot) + "[/COLOR]"
                else:
                    plot = "  [COLOR greenyellow][%s]" % (plot) + "[/COLOR]"
                liz.setLabel(list_title + plot)

                if ld:
                    liz.setArt({
                        "thumb": ld.poster,
                        "poster": ld.poster,
                        "banner": ld.poster,
                        "fanart": ld.poster,
                        "landscape": ld.poster
                    })

            if DirectoryItem_url:
                #log( 'IsPlayable:'+setProperty_IsPlayable )
                directory_items.append((
                    DirectoryItem_url,
                    liz,
                    isFolder,
                ))
                #xbmcplugin.addDirectoryItem(handle=pluginhandle,url=DirectoryItem_url,listitem=liz,isFolder=isFolder)
            else:
                #this section are for comments that have no links
                if not ShowOnlyCommentsWithlink:
                    result = h[3].replace('](', '] (')
                    result = markdown_to_bbcode(result)
                    liz = xbmcgui.ListItem(label=list_title + desc100)
                    liz.setInfo(type="Video",
                                infoLabels={
                                    "Title": h[1],
                                    "plot": result,
                                    "studio": domain,
                                    "votes": str(h[0]),
                                    "director": author
                                })
                    liz.setProperty('IsPlayable', 'false')

                    directory_items.append((
                        "",
                        liz,
                        False,
                    ))
                    #xbmcplugin.addDirectoryItem(handle=pluginhandle,url="",listitem=liz,isFolder=False)

                #END section are for comments that have no links or unsupported links
        except Exception as e:
            log('  EXCEPTION:' + str(e))

        #for di in directory_items:
        #    log( str(di) )

        loading_indicator.tick(1, desc100)
    loading_indicator.end()

    #log('  comments_view id=%s' %comments_viewMode)

    #xbmcplugin.setContent(pluginhandle, "mixed")  #in estuary, mixed have limited view id's available. it has widelist which is nice for comments but we'll just stick with 'movies'
    xbmcplugin.setContent(
        pluginhandle, "episodes"
    )  #files, songs, artists, albums, movies, tvshows, episodes, musicvideos
    xbmcplugin.setPluginCategory(pluginhandle, 'Comments')

    xbmcplugin.addDirectoryItems(handle=pluginhandle, items=directory_items)
    xbmcplugin.endOfDirectory(pluginhandle)

    if comments_viewMode:
        xbmc.executebuiltin('Container.SetViewMode(%s)' % comments_viewMode)

示例#56

0

显示文件

def getFavourites(file,
                  limit=10000,
                  validate=True,
                  superSearch=False,
                  chooser=False):
    import xbmcgui

    prefix = ''
    if not chooser:
        prefix = 'HOME:' if xbmcgui.getCurrentWindowId() == 10000 else ''

    xml = '<favourites></favourites>'
    if sfile.exists(file):
        xml = sfile.read(file)

    items = []

    faves = re.compile('<favourite(.+?)</favourite>').findall(xml)

    for fave in faves:
        fave = fave.replace('&quot;', '&_quot_;')
        fave = fave.replace('\'', '"')
        fave = utils.unescape(fave)

        fave = fave.replace('name=""', '')
        try:
            name = re.compile('name="(.+?)"').findall(fave)[0]
        except:
            name = ''

        try:
            thumb = re.compile('thumb="(.+?)"').findall(fave)[0]
        except:
            thumb = ''

        try:
            cmd = fave.split('>', 1)[-1]
        except:
            cmd = ''

        #name  = utils.Clean(name.replace( '&_quot_;', '"'))
        name = name.replace('&_quot_;', '"')
        thumb = thumb.replace('&_quot_;', '"')
        cmd = cmd.replace('&_quot_;', '"')

        add = False

        if superSearch:
            add = isValid(cmd)
        elif (SHOWUNAVAIL) or (not validate) or isValid(cmd):
            add = True

        if add:
            cmd = upgradeCmd(cmd)

            if cmd.startswith('PlayMedia'):
                option = 'mode'
                try:
                    mode = int(favourite.getOption(cmd, option))
                except:
                    win = xbmcgui.getCurrentWindowId()
                    cmd = updateSFOption(cmd, 'winID', win)

            name = resolve(name)
            cmd = patch(cmd)
            cmd = resolve(cmd)
            cmd = prefix + cmd

            items.append([name, thumb, cmd])
            if len(items) > limit:
                return items

    return items

示例#57

0

显示文件

         elif _command.startswith("movie_title="):
             titles = re.split("=", _command, maxsplit=1)[1]
         movie_titles = titles.split(";")
         if not movie_titles == "":
             _build_playlist(movie_titles)
             exit = Script().start_script("oldway")
         else:
             exit = False
     elif _command.startswith(
             "open_settings"):  # Open Settings
         __addon__.openSettings()
         exit = False
 elif sys.argv[1].startswith(
         "jsonquery="):  # JSON RPC Query
     _clear_playlists()
     jsonquery = utils.unescape(
         re.split("=", sys.argv[1], maxsplit=1)[1])
     jsonquery = (jsonquery.replace("<li>", ":")).replace(
         "<lic>", ",")
     #print jsonquery
     movie_ids = Script()._jsonrpc_query(jsonquery)
     if movie_ids:
         _build_playlist(movie_ids)
         exit = Script().start_script("oldway")
     else:
         exit = False
 elif sys.argv[1].startswith("movieid="):
     _clear_playlists()
     movie_id = sys.argv[1].split("=")[1]
     movie_ids = movie_id.split(";")
     if movie_ids:
         _build_playlist(movie_ids, mode="movie_ids")

示例#58

0

显示文件

def run():
    def update_birthday(bioguide, person, main):

        birthday = birthday_for(main)
        if not birthday:
            print("[%s] NO BIRTHDAY :(\n\n%s" %
                  (bioguide, main.encode("utf8")))
            warnings.append(bioguide)
            return
        if birthday == "UNKNOWN":
            return

        try:
            birthday = datetime.datetime.strptime(birthday.replace(",", ""),
                                                  "%B %d %Y")
        except ValueError:
            print("[%s] BAD BIRTHDAY :(\n\n%s" %
                  (bioguide, main.encode("utf8")))
            warnings.append(bioguide)
            return

        birthday = "%04d-%02d-%02d" % (birthday.year, birthday.month,
                                       birthday.day)
        person.setdefault("bio", {})["birthday"] = birthday

    def birthday_for(string):
        # exceptions for not-nicely-placed semicolons
        string = string.replace(
            "born in Cresskill, Bergen County, N. J.; April", "born April")
        string = string.replace(
            "FOSTER, A. Lawrence, a Representative from New York; September 17, 1802;",
            "born September 17, 1802")
        string = string.replace(
            "CAO, Anh (Joseph), a Representative from Louisiana; born in Ho Chi Minh City, Vietnam; March 13, 1967",
            "born March 13, 1967")
        string = string.replace(
            "CRITZ, Mark S., a Representative from Pennsylvania; born in Irwin, Westmoreland County, Pa.; January 5, 1962;",
            "born January 5, 1962")
        string = string.replace(
            "SCHIFF, Steven Harvey, a Representative from New Mexico; born in Chicago, Ill.; March 18, 1947",
            "born March 18, 1947")
        string = string.replace(
            'KRATOVIL, Frank, M. Jr., a Representative from Maryland; born in Lanham, Prince George\u2019s County, Md.; May 29, 1968',
            "born May 29, 1968")

        # look for a date
        pattern = r"born [^;]*?((?:January|February|March|April|May|June|July|August|September|October|November|December),? \d{1,2},? \d{4})"
        match = re.search(pattern, string, re.I)
        if not match or not match.group(1):
            # specifically detect cases that we can't handle to avoid unnecessary warnings
            if re.search("birth dates? unknown|date of birth is unknown",
                         string, re.I):
                return "UNKNOWN"
            if re.search(
                    "born [^;]*?(?:in|about|before )?(?:(?:January|February|March|April|May|June|July|August|September|October|November|December) )?\d{4}",
                    string, re.I):
                return "UNKNOWN"
            return None
        return match.group(1).strip()

    def relationships_of(string):
        # relationship data is stored in a parenthetical immediately after the end of the </font> tag in the bio
        # e.g. "(son of Joseph Patrick Kennedy, II, and great-nephew of Edward Moore Kennedy and John Fitzgerald Kennedy)"
        pattern = "^\((.*?)\)"
        match = re.search(pattern, string, re.I)

        relationships = []

        if match and len(match.groups()) > 0:
            relationship_text = match.group(1).encode("ascii", "replace")

            # since some relationships refer to multiple people--great-nephew of Edward Moore Kennedy AND John Fitzgerald Kennedy--we need a special grammar
            from nltk import tree, pos_tag, RegexpParser
            tokens = re.split("[ ,;]+|-(?![0-9])", relationship_text)
            pos = pos_tag(tokens)

            grammar = r"""
        NAME: {<NNP>+}
        NAMES: { <IN><NAME>(?:<CC><NAME>)* }
        RELATIONSHIP: { <JJ|NN|RB|VB|VBD|VBN|IN|PRP\$>+ }
        MATCH: { <RELATIONSHIP><NAMES> }
        """
            cp = RegexpParser(grammar)
            chunks = cp.parse(pos)

            # iterate through the Relationship/Names pairs
            for n in chunks:
                if isinstance(n, tree.Tree) and n.node == "MATCH":
                    people = []
                    relationship = None
                    for piece in n:
                        if piece.node == "RELATIONSHIP":
                            relationship = " ".join([x[0] for x in piece])
                        elif piece.node == "NAMES":
                            for name in [
                                    x for x in piece
                                    if isinstance(x, tree.Tree)
                            ]:
                                people.append(" ".join([x[0] for x in name]))
                    for person in people:
                        relationships.append({
                            "relation": relationship,
                            "name": person
                        })
        return relationships

    # default to caching
    cache = utils.flags().get('cache', True)
    force = not cache

    # pick either current or historical
    # order is important here, since current defaults to true
    if utils.flags().get('historical', False):
        filename = "legislators-historical.yaml"
    elif utils.flags().get('current', True):
        filename = "legislators-current.yaml"
    else:
        print("No legislators selected.")
        exit(0)

    print("Loading %s..." % filename)
    legislators = load_data(filename)

    # reoriented cache to access by bioguide ID
    by_bioguide = {}
    for m in legislators:
        if "bioguide" in m["id"]:
            by_bioguide[m["id"]["bioguide"]] = m

    # optionally focus on one legislator

    bioguide = utils.flags().get('bioguide', None)
    if bioguide:
        bioguides = [bioguide]
    else:
        bioguides = list(by_bioguide.keys())

    warnings = []
    missing = []
    count = 0
    families = 0

    for bioguide in bioguides:
        # Download & parse the HTML of the bioguide page.

        url = "http://bioguide.congress.gov/scripts/biodisplay.pl?index=%s" % bioguide
        cache = "legislators/bioguide/%s.html" % bioguide
        try:
            body = download(url, cache, force)

            # Fix a problem?
            body = body.replace("&Aacute;\xc2\x81", "&Aacute;")

            # Entities like &#146; are in Windows-1252 encoding. Normally lxml
            # handles that for us, but we're also parsing HTML. The lxml.html.HTMLParser
            # doesn't support specifying an encoding, and the lxml.etree.HTMLParser doesn't
            # provide a cssselect method on element objects. So we'll just decode ourselves.
            body = utils.unescape(body, "Windows-1252")

            dom = lxml.html.parse(io.StringIO(body)).getroot()
        except lxml.etree.XMLSyntaxError:
            print("Error parsing: ", url)
            continue

        # Sanity check.

        if len(dom.cssselect("title")) == 0:
            print("[%s] No page for this bioguide!" % bioguide)
            missing.append(bioguide)
            continue

        # Extract the member's name and the biography paragraph (main).

        try:
            name = dom.cssselect("p font")[0]
            main = dom.cssselect("p")[0]
        except IndexError:
            print("[%s] Missing name or content!" % bioguide)
            exit(0)

        name = name.text_content().strip()
        main = main.text_content().strip().replace("\n",
                                                   " ").replace("\r", " ")
        main = re.sub("\s+", " ", main)

        # Extract the member's birthday.

        update_birthday(bioguide, by_bioguide[bioguide], main)

        # Extract relationships with other Members of Congress.

        if utils.flags().get("relationships", False):
            #relationship information, if present, is in a parenthetical immediately after the name.
            #should always be present if we passed the IndexError catch above
            after_name = dom.cssselect("p font")[0].tail.strip()
            relationships = relationships_of(after_name)
            if len(relationships):
                families = families + 1
                by_bioguide[bioguide]["family"] = relationships

        count = count + 1

    print()
    if warnings:
        print("Missed %d birthdays: %s" %
              (len(warnings), str.join(", ", warnings)))

    if missing:
        print("Missing a page for %d bioguides: %s" %
              (len(missing), str.join(", ", missing)))

    print("Saving data to %s..." % filename)
    save_data(legislators, filename)

    print("Saved %d legislators to %s" % (count, filename))

    if utils.flags().get("relationships", False):
        print("Found family members for %d of those legislators" % families)

示例#59

0

显示文件

文件： labels.py 项目： y-aok/python-producteev

 def fget(self):
     return unescape(self.__raw.title)