示例#1
0
文件: po2csv.py 项目: sleepyjames/ppb
def csv2po(csv_file):
    """Convert a file-like object `csv_file` to a polib.POFile object"""
    po = polib.POFile()

    # Reset to reading from the beginning of the file
    csv_file.seek(0)
    csv_reader = csv.reader(csv_file)

    for count, row in enumerate(csv_reader):
        # Skip the first two header rows
        if count < len(csv_header_rows):
            continue

        msgid = unescape(row[0])
        msgid_plural = unescape(row[1])
        msgctxt = row[2]
        msgstr, msgstr_plural = undo_plurals(msgid_plural, row[3])

        entry = polib.POEntry()
        entry.msgid = msgid

        if msgid_plural:
            entry.msgid_plural = msgid_plural
        if msgctxt:
            entry.msgctxt = msgctxt
        if msgstr:
            entry.msgstr = msgstr
        if msgstr_plural:
            entry.msgstr_plural = msgstr_plural

        po.append(entry)

    return po
示例#2
0
文件: api.py 项目: androa/vgtv-xbmc
    def parse_video_response(self, response):
        data = simplejson.loads(response.read())
        items = list()
        count = 0
        for video in data['videos']:
            vid_url, thumb_url, category_id, dur = self.get_video_urls(video)
            count += 1

            if vid_url is None:
                continue

            meta = video.get('meta')
            items.append({
                'label': unescape(meta.get('title')),
                'thumbnail': thumb_url,
                'info': {
                    'plot': unescape(meta.get('preamble') or ''),
                    'originaltitle': unescape(meta.get('title') or '???'),
                    'tagline': unescape(meta.get('preamble') or ''),
                    'aired': self.get_date(meta.get('timePublished')),
                    'duration': self.get_duration(meta.get('duration'))
                },
                'stream_info': {
                    'video': {
                        'duration': meta.get('duration', 0)
                    }
                },
                'path': vid_url,
                'is_playable': True,
            })

        return items, (count < self.PER_PAGE)
示例#3
0
def AddDir(name, mode, url=None, image=None, fanart=None, isFolder=False, isPlayable=False, desc='', plot='', contextMenu=None, replaceItems=False, infoLabels=None):
    try:    
        name = name.encode('utf-8')       
        url = utils.fixUnicode(utils.unescape(url))
    except:
        pass

    try:
        if not validateMode(mode, name):
            return
    
        if not fanart:
            fanart = FANART

        name = name.replace('_', ' ')

        infoLabels = {'title':name, 'fanart':fanart, 'description':desc, 'plot':plot}    
        
        image = utils.patchImage(mode, image, url, infoLabels)
 
        u  = ''
        u += '?mode='  + str(mode)
        u += '&title=' + urllib.quote_plus(name)

        if image:
            u += '&image=' + urllib.quote_plus(image)            

        if url:
            u += '&url=' + urllib.quote_plus(url).replace('%25LB%25', '%')
            
        APPLICATION.addDir(utils.unescape(name), mode, u, image, isFolder, isPlayable, contextMenu=contextMenu, replaceItems=replaceItems, infoLabels=infoLabels)
    except Exception, e:
        raise
示例#4
0
文件: views.py 项目: dnet/omnom
def load(request):
    if not request.user.is_authenticated():
        return HttpResponseRedirect("/accounts/login")
    if request.method == 'POST':
        form = ImportDeliciousForm(request.POST,request.FILES)
        if form.is_valid():
            db = get_database()[Bookmark.collection_name]
            html=request.FILES['exported'].read().decode('utf8')
            soup=BeautifulSoup(html)
            for item in soup.findAll('dt'):
                desc=''
                next=item.findNextSiblings()
                if next:
                    next=next[0]
                    if 'name' in dir(next) and next.name=='dd':
                        desc=unescape(u''.join(imap(unicode, next.contents)))
                db.Bookmark({'url': urlSanitize(item.a['href']),
                             'seq': getNextVal('seq'),
                             'tags': item.a['tags'].split(','),
                             'user': unicode(request.user),
                             'created': datetime.fromtimestamp(float(item.a['add_date'])),
                             'private': item.a['private']=='1',
                             'title': unescape(unicode(item.a.string)),
                             'notes': unicode(desc)}).save()
            return HttpResponseRedirect('/u/%s/' % request.user)
    else:
        form = ImportDeliciousForm()
    return render_to_response('import.html', { 'form': form, }, context_instance=RequestContext(request) )
示例#5
0
 def __reload(self, values):
     self.__raw.__dict__.update(values)
     self.firstname = unescape(self.__raw.firstname)
     self.lastname = unescape(self.__raw.lastname)
     self.company = unescape(self.__raw.company)
     self.colleagues = self.__raw.colleagues
     self.id = int(self.__raw.id_user)
     self.lang = LANG_ID[int(self.__raw.lang) + 1]
示例#6
0
 def __reload(self, values):
     self.__raw.__dict__.update(values)
     self.firstname = unescape(self.__raw.firstname)
     self.lastname = unescape(self.__raw.lastname)
     self.company = unescape(self.__raw.company)
     self.colleagues = self.__raw.colleagues
     self.id = int(self.__raw.id_user)
     self.lang = LANG_ID[int(self.__raw.lang) + 1]
示例#7
0
    def from_text(cls, text):
        match = cls.token_re.match(text)
        assert match, 'cannot parse Token from {}'.format(text)
        groups = match.groupdict()

        word = unescape(groups['word'])
        lemma = unescape(groups['lemma'])
        pos = unescape(groups['pos'])

        return cls(word, lemma, pos)
示例#8
0
    def from_text(cls, text):
        match = cls.pred_re.match(text)
        assert match, 'cannot parse Predicate from {}'.format(text)
        groups = match.groupdict()

        word = unescape(groups['word'])
        lemma = unescape(groups['lemma'])
        pos = unescape(groups['pos'])
        neg = True if groups['neg'] is not None else False
        prt = unescape(groups['prt']) if groups['prt'] is not None else ''

        return cls(word, lemma, pos, neg, prt)
示例#9
0
    def from_text(cls, text):
        match = cls.arg_re.match(text)
        assert match, 'cannot parse Argument from {}'.format(text)
        groups = match.groupdict()

        word = unescape(groups['word'])
        lemma = unescape(groups['lemma'])
        pos = unescape(groups['pos'])
        ner = groups['ner'] if groups['ner'] != 'NONE' else ''
        entity_idx = int(groups['entity_idx']) if groups['entity_idx'] else -1
        mention_idx = \
            int(groups['mention_idx']) if groups['mention_idx'] else -1

        return cls(word, lemma, pos, ner, entity_idx, mention_idx)
示例#10
0
def collect_album_info(album_soup):
    url = 'http://tut-audio.su'
    album_dict = {}
    album_dict['name'] = unescape(
        album_soup.find(id="titlealb").get_text()[:-14])
    album_dict['year'] = album_soup.find(
        id="dopinfoalb").find('p').find('b').get_text()
    if album_dict['year']:
        album_dict['year'] = int(album_dict['year'])
    album_dict['cover_url'] = url + album_soup.find(id="imagesalb").get('src')
    t = album_soup.find_all("div", "player")[0]
    artist, _ = t['data-title'].split(' — ')
    artist = unescape(artist)
    album_dict['url'] = url + album_url
    return album_dict, artist
示例#11
0
    def __init__(
        self,
        uid,
        summary,
        dtstamp=None,
        created=None,
        last_modified=None,
        related_to=None,
        completed=None,
        percent_complete=None,
        x_kde_ktimetracker_totalsessiontime=None,
        x_kde_ktimetracker_totaltasktime=None,
        x_kde_ktimetracker_bctype=None,
    ):
        self.uid = uid
        self.summary = unescape(summary)
        self.dtstamp = dtstamp
        self.created = created
        self.last_modified = last_modified
        self.related_to = related_to
        self.completed = completed
        self.percent_complete = percent_complete
        self.x_kde_ktimetracker_totalsessiontime = x_kde_ktimetracker_totalsessiontime
        self.x_kde_ktimetracker_totaltasktime = x_kde_ktimetracker_totaltasktime
        self.x_kde_ktimetracker_bctype = x_kde_ktimetracker_bctype

        self.todos = {}
def gen_solution(cur, td, num, p_id):
#	import pdb
#	pdb.set_trace()
	global testcase_id
	global testcase_crawled

	if num == 0:
		column_name = 'java'
	elif num == 1:
		column_name = 'cpp'
	elif num == 2:
		column_name = 'csharp'
	else:
		column_name = 'VB'
	cur.execute('select %s from problem where id = %d' % (column_name, p_id))
	if cur.fetchall()[0][0] != None:
		return
	p = compile('"/stat\?c=problem_solution.*?"')
	l = p.findall(td)
	if len(l) == 1:
		url = topcoder_site_url + unescape(l[0][1:-1])
		try:
			page = topcoder.get_page(url)
		except Exception, e:
			print url, e
			return
		p = compile('<TD CLASS="problemText" COLSPAN="8" VALIGN="middle" ALIGN="left">[\d\D]*?</TD>')
		try:
			code = escape_string(p.findall(page)[0])
		except Exception, e:
			print 'No code found:',url,e
			return
示例#13
0
def fetch_bioguide_page(bioguide, force):
    url = "http://bioguide.congress.gov/scripts/biodisplay.pl?index=%s" % bioguide
    cache = "legislators/bioguide/%s.html" % bioguide
    try:
        body = download(url, cache, force)

        # Fix a problem?
        body = body.replace("&Aacute;\xc2\x81", "&Aacute;")

        # Entities like &#146; are in Windows-1252 encoding. Normally lxml
        # handles that for us, but we're also parsing HTML. The lxml.html.HTMLParser
        # doesn't support specifying an encoding, and the lxml.etree.HTMLParser doesn't
        # provide a cssselect method on element objects. So we'll just decode ourselves.
        body = utils.unescape(body, "Windows-1252")

        dom = lxml.html.parse(io.StringIO(body)).getroot()
    except lxml.etree.XMLSyntaxError:
        raise Exception("Error parsing: " + url)

    # Sanity check.

    if len(dom.cssselect("title")) == 0:
        raise Exception("No page for bioguide %s!" % bioguide)

    return dom
示例#14
0
    def insertPicDetail(self, picDetailModel):

        cur = self.con.cursor()
        try:

            sql = '''INSERT INTO admin_picdetail 
            (`pid`, `pic_path`, `height`, `width`, `pic_desc`, `categoary_id`, `albunm_name`, `albunm_id`, `user_id`, 
            `time`, `taoke_num_iid`, `taoke_title`, `taoke_price`) 
            VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')''' % (
                picDetailModel.pId, picDetailModel.picPath,
                picDetailModel.height, picDetailModel.width,
                picDetailModel.desc, picDetailModel.cateId,
                picDetailModel.albunmName, picDetailModel.albunmId,
                picDetailModel.userId, picDetailModel.time,
                picDetailModel.taokeNumIID, picDetailModel.title,
                picDetailModel.price)

            sql = utils.unescape(sql).encode('utf-8')

            cur.execute(sql)

            self.con.commit()
        except Exception, what:
            print '========-------=======', what
            #            print sql
            pass
示例#15
0
def gen_solution(cur, td, num, p_id):
    #	import pdb
    #	pdb.set_trace()
    global testcase_id
    global testcase_crawled

    if num == 0:
        column_name = 'java'
    elif num == 1:
        column_name = 'cpp'
    elif num == 2:
        column_name = 'csharp'
    else:
        column_name = 'VB'
    cur.execute('select %s from problem where id = %d' % (column_name, p_id))
    if cur.fetchall()[0][0] != None:
        return
    p = compile('"/stat\?c=problem_solution.*?"')
    l = p.findall(td)
    if len(l) == 1:
        url = topcoder_site_url + unescape(l[0][1:-1])
        try:
            page = topcoder.get_page(url)
        except Exception, e:
            print url, e
            return
        p = compile(
            '<TD CLASS="problemText" COLSPAN="8" VALIGN="middle" ALIGN="left">[\d\D]*?</TD>'
        )
        try:
            code = escape_string(p.findall(page)[0])
        except Exception, e:
            print 'No code found:', url, e
            return
示例#16
0
文件: api.py 项目: rexxars/vgtv-xbmc
    def get_categories(self, root_id=0, only_series=False):
        categories = self.get_category_tree()
        root = int(root_id)

        matches = []
        for category in categories:
            id = category.get('id')
            
            if category.get('showCategory') is False:
                continue

            if only_series is True and category.get('isSeries') is not True:
                continue

            if only_series is False and category.get('parentId') != root:
                continue
            
            matches.append({
                'label': unescape(category.get('title')),
                'path':  self.plugin.url_for(
                    'show_category',
                    id=str(id),
                    mode='all'
                ),
                'id':    id
            })

        return matches
示例#17
0
def view(request, shurl):
    item = getItemByUrl(shurl)
    item['shurl'] = base62.from_decimal(item['seq'])

    if request.GET.get('format', '') == 'json':
        del item['user']
        res = {
            'url': unicode(item['url']),
            'title': unicode(item['title']),
            'created': tuple(item['created'].timetuple()),
            'private': item['private'],
            'notes': unicode(unescape(item['notes'])),
            'tags': item['tags'],
        }
        return HttpResponse(json.dumps(res), mimetype="application/json")
    else:
        item['snapshot'] = '' if not item.get('snapshot') else item.get(
            'snapshot')[0]
        tpl = 'view.html'
        if request.GET.get('raw', None):
            tpl = 'view-bare.html'
        return render_to_response(tpl, {
            'item': item,
        },
                                  context_instance=RequestContext(request))
示例#18
0
def fetch_albums(url):
    html = urlopen(url)

    found = re.findall(r'<td class="Title".*?<a href="/music/url\?q=(/music/album\?id%3D.*?)".*?>(.*?)</a>', html)
    print '# albums:', len(found), urllib.unquote(url)
    for link, title in found:
        link = 'http://www.google.cn'+link.split('&')[0]
        title = unescape(title)
        print urllib.unquote(link), '|', title

    found = re.findall(r'<td>.*?<a class="imglink" href="/music/url\?q=(.*?)"', html)
    pages = [ 'http://www.google.cn'+urllib.unquote(i.split('&amp;')[0]) for i in found ]

    cache[url] = True
    for page in pages:
        if page not in cache:
            cache[page] = False

    another_page = None
    for page, done in cache.iteritems():
        if not done:
            another_page = page
            break

    if another_page:
        fetch_albums(another_page)
示例#19
0
def fetch_bioguide_page(bioguide, force):
  url = "http://bioguide.congress.gov/scripts/biodisplay.pl?index=%s" % bioguide
  cache = "legislators/bioguide/%s.html" % bioguide
  try:
    body = download(url, cache, force)

    # Fix a problem?
    body = body.replace("&Aacute;\xc2\x81", "&Aacute;")

    # Entities like &#146; are in Windows-1252 encoding. Normally lxml
    # handles that for us, but we're also parsing HTML. The lxml.html.HTMLParser
    # doesn't support specifying an encoding, and the lxml.etree.HTMLParser doesn't
    # provide a cssselect method on element objects. So we'll just decode ourselves.
    body = utils.unescape(body, "Windows-1252")

    dom = lxml.html.parse(io.StringIO(body)).getroot()
  except lxml.etree.XMLSyntaxError:
    raise Exception("Error parsing: " + url)

  # Sanity check.

  if len(dom.cssselect("title")) == 0:
    raise Exception("No page for bioguide %s!" % bioguide)

  return dom
示例#20
0
 def _parse_result(self, buf, properties={}):
     count = 0
     result = ""
     try:
         self.redis_conn.send(buf)
     except Exception as e:
         if e.args[0] == errno.EPIPE and count < 3:
             self.setup()
             count += 1
             time.sleep(1)
         else:
             raise
     while True:
         recv = self.redis_conn.recv(1024000)
         if recv:
             result += recv
         if not recv or recv.endswith("\r\n\r\n"):
             break
     a = result.split("#-*-#")
     code, info, data = a
     data = data[:-4]
     if code == "200":
         return handle_safely(properties.get("recv",
                                             default_recv))(unescape(data))
     elif code == "502":
         return properties.get("result", data)
     else:
         raise RedisError("%s:%s, data: %s" % (code, info, data))
    def insertPicDetail(self,picDetailModel):
        
        cur = self.con.cursor()
        try:
            
            sql = '''INSERT INTO admin_picdetail 
            (`pid`, `pic_path`, `height`, `width`, `pic_desc`, `categoary_id`, `albunm_name`, `albunm_id`, `user_id`, 
            `time`, `taoke_num_iid`, `taoke_title`, `taoke_price`) 
            VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')'''%(picDetailModel.pId
            ,picDetailModel.picPath
            ,picDetailModel.height
            ,picDetailModel.width
            ,picDetailModel.desc
            ,picDetailModel.cateId
            ,picDetailModel.albunmName
            ,picDetailModel.albunmId
            ,picDetailModel.userId
            ,picDetailModel.time
            ,picDetailModel.taokeNumIID
            ,picDetailModel.title
            ,picDetailModel.price)
            
            sql = utils.unescape(sql).encode('utf-8')
            
            cur.execute(sql)

            self.con.commit()
        except Exception,what:
            print '========-------=======',what
#            print sql
            pass
示例#22
0
 def settings_to_log( self ):
     try:
         utils.log( "Settings" )
         setting_values = self.read_settings_xml()
         for k, v in sorted( setting_values.items() ):
             utils.log( "%30s: %s" % ( k, str( utils.unescape( v.decode('utf-8', 'ignore') ) ) ) )
     except:
         traceback.print_exc()
示例#23
0
def get_musicbrainz_artist_id(artist_search, limit=1, alias=False):
    name = ""
    id = ""
    sortname = ""
    artist_name = smart_unicode(
        (artist_search.replace('"', '?').replace('&', 'and')))
    if not alias:
        url = artist_url % (server, quote_plus(
            artist_name.encode("utf-8")), limit)
    else:
        url = alias_url % (server, quote_plus(
            artist_name.encode("utf-8")), limit)
    htmlsource = get_html_source(url, "", save_file=False)
    match = re.search('''<artist(.*?)</artist>''', htmlsource)
    if match:
        score_match = re.search('''score="(.*?)"''', htmlsource)
        name_match = re.search('''<name>(.*?)</name>''', htmlsource)
        id_match = re.search('''<artist id="(.*?)"(?:.*?)>''', htmlsource)
        if not id_match:
            id_match = re.search('''<artist (?:.*?)id="(.*?)">''', htmlsource)
        sort_name_match = re.search('''<sort-name>(.*?)</sort-name>''',
                                    htmlsource)

        if score_match:
            score = score_match.group(1)
        if name_match:
            name = unescape(smart_unicode(name_match.group(1)))
        if id_match:
            id = id_match.group(1)
        if sort_name_match:
            sortname = unescape(smart_unicode(sort_name_match.group(1)))
        log("Score     : %s" % score, xbmc.LOGDEBUG)
        log("Id        : %s" % id, xbmc.LOGDEBUG)
        log("Name      : %s" % name, xbmc.LOGDEBUG)
        log("Sort Name : %s" % sortname, xbmc.LOGDEBUG)
    else:
        if not alias:
            log("No Artist ID found trying aliases: %s" % artist_search,
                xbmc.LOGDEBUG)
            name, id, sortname = get_musicbrainz_artist_id(
                artist_search, limit, True)
        else:
            log("No Artist ID found for Artist: %s" % artist_search,
                xbmc.LOGDEBUG)
    xbmc.sleep(mb_delay)
    return name, id, sortname
示例#24
0
def get_musicbrainz_artists(artist_search, limit=1):
    log("Artist: %s" % artist_search, xbmc.LOGDEBUG)
    score = ""
    name = ""
    id = ""
    sortname = ""
    artists = []
    artist_name = smart_unicode(
        (artist_search.replace('"', '?').replace('&', 'and')))
    url = artist_url % (server, quote_plus(artist_name.encode("utf-8")), limit)
    htmlsource = get_html_source(url, "", save_file=False, overwrite=False)
    match = re.findall('''<artist(.*?)</artist>''', htmlsource)
    if match:
        for item in match:
            artist = {}
            artist["score"] = ""
            artist["name"] = ""
            artist["id"] = ""
            artist["sortname"] = ""
            score_match = re.search('''score="(.*?)"''', item)
            name_match = re.search('''<name>(.*?)</name>''', item)
            id_match = re.search('''id="(.*?)"(?:.*?)>''', item)
            if not id_match:
                id_match = re.search('''id="(.*?)">''', item)
            sort_name_match = re.search('''<sort-name>(.*?)</sort-name>''',
                                        item)
            if score_match:
                artist["score"] = score_match.group(1)
            if name_match:
                artist["name"] = unescape(smart_unicode(name_match.group(1)))
            if id_match:
                artist["id"] = id_match.group(1)
            if sort_name_match:
                artist["sortname"] = unescape(
                    smart_unicode(sort_name_match.group(1)))
            log("Score     : %s" % artist["score"], xbmc.LOGDEBUG)
            log("Id        : %s" % artist["id"], xbmc.LOGDEBUG)
            log("Name      : %s" % artist["name"], xbmc.LOGDEBUG)
            log("Sort Name : %s" % artist["sortname"], xbmc.LOGDEBUG)
            artists.append(artist)
    else:
        log("No Artist ID found for Artist: %s" % repr(artist_search),
            xbmc.LOGDEBUG)
    xbmc.sleep(mb_delay)
    return artists
示例#25
0
 def _locate(self, town_name):
     town_name = utils.unescape(town_name.strip())
     if town_name not in self.location_cache:
         try:
             self.location_cache[town_name] = self.geo_locator.geocode(town_name)
         except geopy.exc.GeocoderTimedOut:
             print "Geocoder Timeout."
             return None
     return self.location_cache[town_name]
示例#26
0
def getFavourites(file, limit=10000, validate=True, superSearch=False):
    import xbmcgui

    file = xbmc.translatePath(file)
    xml  = '<favourites></favourites>'
    if os.path.exists(file):  
        fav = open(file , 'r')
        xml = fav.read()
        fav.close()

    items = []

    faves = re.compile('<favourite(.+?)</favourite>').findall(xml)

    for fave in faves:
        fave = fave.replace('&quot;', '&_quot_;')
        fave = fave.replace('\'', '"')
        fave = utils.unescape(fave)

        fave = fave.replace('name=""', '')
        try:    name = re.compile('name="(.+?)"').findall(fave)[0]
        except: name = ''

        try:    thumb = re.compile('thumb="(.+?)"').findall(fave)[0]
        except: thumb = ''

        try:    cmd   = fave.split('>', 1)[-1]
        except: cmd = ''

        #name  = utils.Clean(name.replace( '&_quot_;', '"'))
        name  = name.replace( '&_quot_;', '"')
        thumb = thumb.replace('&_quot_;', '"')
        cmd   = cmd.replace(  '&_quot_;', '"')

        add = False

        if superSearch:
            add = isValid(cmd)
        elif (SHOWUNAVAIL) or (not validate) or isValid(cmd):
            add = True

        if add:
            cmd = upgradeCmd(cmd)

            if cmd.startswith('PlayMedia'):
                option = 'mode'
                try:                        
                    mode = int(favourite.getOption(cmd, option))
                except:
                    win  = xbmcgui.getCurrentWindowId()
                    cmd  = updateSFOption(cmd, 'winID', win)

            items.append([name, thumb, cmd])
            if len(items) > limit:
                return items

    return items
示例#27
0
def check_url(url, geturl=False):
    send = []
    try:
        o = urllib.urlopen(url)
        ctype, clength = o.info().get("Content-Type"), o.info().get(
            "Content-Length")
        if o.info().gettype() == "text/html":
            title = 'Pas de titre'
            html = o.read(1000000)
            try:
                SoupList = BeautifulSoup(utils.unescape(html),
                                         parseOnlyThese=SoupStrainer('title'))
            except UnicodeDecodeError:
                SoupList = BeautifulSoup(utils.unescape(
                    html.decode("latin1", "ignore")),
                                         parseOnlyThese=SoupStrainer('title'))
            try:
                titles = [title for title in SoupList]
                title = utils.xhtml2text(titles[0].renderContents())
            except IndexError:
                title = "Pas de titre"
            except HTMLParseError:
                pass
            if geturl:
                send.append("%s : [Lien] Titre : %s" %
                            (o.geturl(), " ".join(title.split())))
            else:
                send.append("[Lien] Titre : %s" % " ".join(title.split()))
        else:
            send.append("[Lien] Type: %s, Taille : %s octets" %
                        (ctype, clength))
        o.close()
    except IOError as error:
        if error[1] == 401:
            send.append("Je ne peux pas m'authentifier sur %s :'(" % url)
        elif error[1] == 404:
            send.append("%s n'existe pas !" % url)
        elif error[1] == 403:
            send.append("Il est interdit d'accéder à %s !" % url)
        else:
            send.append("Erreur %s sur %s" % (error[1], url))
    except httplib.InvalidURL:
        send.append("L'URL %s n'est pas valide !" % url)
    return send
示例#28
0
 def settings_to_log(self):
     try:
         utils.log("Settings")
         setting_values = self.read_settings_xml()
         for k, v in sorted(setting_values.items()):
             utils.log(
                 "%30s: %s" %
                 (k, str(utils.unescape(v.decode('utf-8', 'ignore')))))
     except:
         traceback.print_exc()
示例#29
0
 def to_str(self, i=1):
     s = "{}:\n".format(type(self).__name__)
     for field in list(self._fields) + self.extra_fields:
         value = self.__getattribute__(field)
         if isinstance(value, Tree):
             s += "{}{}:{}".format(self.unit * i, field,
                                   value.to_str(i + 1))
         elif isinstance(value, list):
             s += "{}{}:\n".format(self.unit * i, field)
             for v in value:
                 if isinstance(v, Tree):
                     s += "{}- {}".format(self.unit * i, v.to_str(i + 1))
                 else:
                     s += "{}- {}\n".format(self.unit * i,
                                            utils.unescape(v))
         else:
             s += "{}{}: {}\n".format(self.unit * i, field,
                                      utils.unescape(value))
     return s
示例#30
0
 def post(self):
     text = self.request.get('text')
     if text:
         conver_url = utils.unescape(self.request.get('url'))
         conver = Conver.get_for_url(conver_url)
         message = Message(author=PermaUser.get_current_permauser(), text=text, conver=conver)
         message.put()
         self.distribute_message(message)
         
     else:
         logging.error("No message '%S'saved for %s", text, conver_url)
示例#31
0
 def parse_starting_page(self, response):
     ranking = 0
     for sel in response.xpath('//div[@class="content"]/table/tr'):
         team_link = sel.xpath('td/a/@href').extract_first()
         if team_link is not None:
             team_name = sel.xpath('td/a/text()').extract_first()
             data = sel.xpath('td/text()').extract()
             ranking_item = JtrTeamRankingItem()
             ranking_item['team_name'] = utils.unescape(team_name)
             if len(data) == 4:
                 ranking, city, tournaments, points = data
             else:
                 city, tournaments, points = data
             ranking_item['ranking'] = int(ranking.split("/")[0].strip().strip("."))
             ranking_item['hometown'] = utils.unescape(city)
             ranking_item['points'] = float(points)
             ranking_item['number_of_tournaments'] = utils.unescape(tournaments)
             ranking_item['crawl_date'] = datetime.datetime.now()
             yield  ranking_item
             yield scrapy.Request(response.urljoin(team_link), callback=self.parse_team_site)
示例#32
0
def addtoKodiFavorites(command, name, thumbnail):
    import xml.etree.ElementTree
    from utils import unescape

#adding to favorites involve 3 steps:
#  1.) add the favorite via jsonrpc (script params not included)
#  2.) modify the favourites.xml to include script params <-- (kodi18 leia alpha1) i think there is another favourites file or this file is cached until another favorite is added
#  3.) ??? <-- adding another favorite will delete the first one (until kodi is restarted) need to find a way for kodi to reload the modified favourite.xml

    #http://kodi.wiki/view/JSON-RPC_API/v8#Favourites
    #schema=xbmc.executeJSONRPC('{"jsonrpc": "2.0", "method": "JSONRPC.Introspect", "id": 1}')
    #log(repr(schema))
    favorite_was_found=False
    #add_dummy_favorite()
    temp_command='script.reddit.reader' #can't add script favorites with parameter using jsonrpc
    saved_command='RunScript("script.reddit.reader")'

    json_rpc_command={"jsonrpc": "2.0",
                      "method": "Favourites.AddFavourite",
                      'params': {
                                 'title': name,
                                 'type': 'script',
                                 'path': temp_command,
                                 'thumbnail':thumbnail,
                                 },
                      'id': '1'
                      }
    a=xbmc.executeJSONRPC(json.dumps(json_rpc_command))
    #log(repr(a))
    a=json.loads(a)
    if a.get('result','')=="OK":
        log('Favourite added')
        #now that we've created the favorite, we edit it to add parameters
        favorites_xml       = xbmc.translatePath(os.path.join(addon.getAddonInfo('profile'), '..','..','favourites.xml'))
        if os.path.exists(favorites_xml):
            #log('{0} exists'.format(favorites_xml) )
            et = xml.etree.ElementTree.parse(favorites_xml)
            root=et.getroot()

            for f in root.findall('favourite'):
                #the name attribute is escape encoded the xml file.
                fav_name=unescape( f.get('name') ) #replaces &amp; to & etc.
                fav_cmd=f.text
                #log('*a*'+repr(name) + '  ' + saved_command)
                #log('*b*'+repr(fav_name) + '  ' + fav_cmd )
                #log('---')
                if (fav_name==name) and (fav_cmd==saved_command):
                    log('Favourite entry found {0}'.format(fav_name) )
                    favorite_was_found=True
                    f.text=command

            if favorite_was_found:
                et.write(favorites_xml)
                xbmc_notify(translation(32028), fav_name, icon=thumbnail)
示例#33
0
def extract_tweets(tweets, cmd_line=False):
    """ prints the tweets from tweets: list of tweet dicts """
    tweet_texts = []
    for tweet in tweets:
        text = get_tweet(tweet)
        if cmd_line:
            text = text.encode('unicode-escape')
            text = ununicode(text)
            text = unescape(text)
        tweet_texts.append(parser(text))
    return tweet_texts
示例#34
0
文件: utils.py 项目: alub/pipobot
def check_url(url, geturl=False):
    send = []
    try:
        o = urllib.urlopen(url)
        ctype, clength = o.info().get("Content-Type"), o.info().get("Content-Length")
        if  o.info().gettype() == "text/html":
            title = 'Pas de titre'
            html = o.read(1000000)
            try:
                SoupList = BeautifulSoup(utils.unescape(html),
                                         parseOnlyThese=SoupStrainer('title'))
            except UnicodeDecodeError:
                SoupList = BeautifulSoup(utils.unescape(html.decode("latin1", "ignore")),
                                         parseOnlyThese=SoupStrainer('title'))
            try:
                titles = [title for title in SoupList]
                title = utils.xhtml2text(titles[0].renderContents())
            except IndexError:
                title = "Pas de titre"
            except HTMLParseError:
                pass
            if geturl:
                send.append("%s : [Lien] Titre : %s" %
                            (o.geturl(), " ".join(title.split())))
            else:
                send.append("[Lien] Titre : %s" % " ".join(title.split()))
        else:
            send.append("[Lien] Type: %s, Taille : %s octets" % (ctype, clength))
        o.close()
    except IOError as error:
        if error[1] == 401:
            send.append("Je ne peux pas m'authentifier sur %s :'(" % url)
        elif error[1] == 404:
            send.append("%s n'existe pas !" % url)
        elif error[1] == 403:
            send.append("Il est interdit d'accéder à %s !" % url)
        else:
            send.append("Erreur %s sur %s" % (error[1], url))
    except httplib.InvalidURL:
        send.append("L'URL %s n'est pas valide !" % url)
    return send
示例#35
0
def load(request):
    if not request.user.is_authenticated():
        return HttpResponseRedirect("/accounts/login")
    if request.method == 'POST':
        form = ImportDeliciousForm(request.POST, request.FILES)
        if form.is_valid():
            db = get_database()[Bookmark.collection_name]
            html = request.FILES['exported'].read().decode('utf8')
            soup = BeautifulSoup(html)
            for item in soup.findAll('dt'):
                desc = ''
                next = item.findNextSiblings()
                if next:
                    next = next[0]
                    if 'name' in dir(next) and next.name == 'dd':
                        desc = unescape(u''.join(imap(unicode, next.contents)))
                db.Bookmark({
                    'url':
                    urlSanitize(item.a['href']),
                    'seq':
                    getNextVal('seq'),
                    'tags':
                    item.a['tags'].split(','),
                    'user':
                    unicode(request.user),
                    'created':
                    datetime.fromtimestamp(float(item.a['add_date'])),
                    'private':
                    item.a['private'] == '1',
                    'title':
                    unescape(unicode(item.a.string)),
                    'notes':
                    unicode(desc)
                }).save()
            return HttpResponseRedirect('/u/%s/' % request.user)
    else:
        form = ImportDeliciousForm()
    return render_to_response('import.html', {
        'form': form,
    },
                              context_instance=RequestContext(request))
示例#36
0
    def _build_show_summary(
        self,
        data,
        show_status=False,
        pre_rating='',
        post_rating='',
        ratings_pos='front',
        preserve_rating=False,
    ):
        out = []

        star = unescape("&#9733;")
        sep = " | "
        status = _get(data, 'status')
        plot = _get(data, 'plot')
        alt_ratings = _get(data, 'alt_ratings')
        rating = _get(data, 'rating', 0.0)

        if show_status and status:
            out.append('Status: {}'.format(status))

        if plot:
            out.append(plot)

        if alt_ratings:
            buf = []
            for source, _rating in alt_ratings:
                buf.append("{}: {}".format(source, _rating))
            piece = sep.join(buf)

            if ratings_pos == 'front':
                out.insert(0, star + " " + piece + " " + star + "\n\n")
            else:
                out.append("\n\n" + star + " " + piece + " " + star)

        if preserve_rating:
            tmp = unescape("{}{:.1f}{}".format(pre_rating, rating,
                                               post_rating))
            out.insert(0, tmp)

        return sep.join(out)
def get_musicbrainz_artists( artist_search, limit=1 ):
    log( "Artist: %s" % artist_search, xbmc.LOGDEBUG )
    score = ""
    name = ""
    id = ""
    sortname = ""
    artists = []
    artist_name = smart_unicode( artist_search.replace( '"', '?' ) )
    url = artist_url % ( server, quote_plus( artist_name.encode("utf-8") ), limit )
    htmlsource = get_html_source( url, "", save_file = False, overwrite = False )
    match = re.findall( '''<artist(.*?)</artist>''', htmlsource )
    if match:
        for item in match:
            artist = {}
            artist["score"] = ""
            artist["name"] = ""
            artist["id"] = ""
            artist["sortname"] = ""
            score_match = re.search( '''score="(.*?)"''', item )
            name_match = re.search( '''<name>(.*?)</name>''', item )
            id_match = re.search( '''id="(.*?)"(?:.*?)>''', item )
            if not id_match:
                id_match = re.search( '''id="(.*?)">''', item )
            sort_name_match = re.search( '''<sort-name>(.*?)</sort-name>''', item )
            if score_match:
                artist["score"] = score_match.group(1)
            if name_match:
                artist["name"] = unescape( smart_unicode( name_match.group(1) ) )
            if id_match:
                artist["id"] = id_match.group(1)
            if sort_name_match:
                artist["sortname"] = unescape( smart_unicode( sort_name_match.group(1) ) )
            log( "Score     : %s" % artist["score"], xbmc.LOGDEBUG )
            log( "Id        : %s" % artist["id"], xbmc.LOGDEBUG )
            log( "Name      : %s" % artist["name"], xbmc.LOGDEBUG )
            log( "Sort Name : %s" % artist["sortname"], xbmc.LOGDEBUG )
            artists.append(artist)
    else:
        log( "No Artist ID found for Artist: %s" % repr( artist_search ), xbmc.LOGDEBUG )
    xbmc.sleep( mb_delay )
    return artists
示例#38
0
文件: greedy.py 项目: huanghao/muse
def artist(url):
    html = urlopen(url)
    
    found = re.findall(r'<a href="/music/url\?q=(/music/album\?.*?)&.*?>(.*?)</a>',
                       html.split('所有专辑', 1)[1])
    albums = dict(found)
    artist = trim_title(html)
    print artist, 'albums', len(albums)
    
    for href, title in sorted(albums.items(), lambda i,j: cmp(i[1],j[1])):
        url = 'http://www.google.cn%s' % urllib.unquote(href)
        print '%s |%s' % (url, unescape(title))
示例#39
0
文件: tasks.py 项目: thetwoj/gvobot
def _message_handler(message):
    mention = '@gvobot'
    print(message)
    # Strip mention if it's at the beginning
    if message == mention:
        message = ''
    elif message.startswith(mention):
        # Remove the extra space added after the mention, too
        message = message[len(mention) + 1:]

    # Unescape message (skype encodes &, <, >, ', and ")
    message = unescape(message)

    # Be snarky when no message is sent; otherwise, S.C.I.E.N.C.E.
    if len(message) == 0:
        response = 'Has anyone really been far even as decided to use ' \
            'even go want to do look more like?'
    elif FIXED_RESPONSES.get(message.lower(), None) is not None:
        response = FIXED_RESPONSES[message.lower()]
    elif message.startswith('!number'):
        usage = 'Usage: !number [<start num> <end num>]'
        args = message.split()
        if len(args) == 1:
            response = str(random.randint(1, 6))
        elif len(args) == 3:
            try:
                start = int(args[1])
                end = int(args[2])
                response = str(random.randint(start, end))
            except ValueError:
                response = usage
        else:
            response = usage
    elif message.startswith('!song'):
        args = message.split()
        if len(args) == 1:
            song = get_random_song()
            response = song.to_message()
        else:
            response = 'Usage: !song'
    else:
        response = sciencify(message)

        # Allow bot to do actions with /me
        if response.startswith('/M.E. '):
            response = response.replace('/M.E.', '/me', 1)

        # The bot's name is unscienceable.
        response = response.replace('@G.V.O.B.O.T.', '@gvobot')

    print(response)
    return response
示例#40
0
文件: feds.py 项目: pshc/imprint
def twitter_status(twitter_username):
    status = cache.get('feds-%s-status' % twitter_username)
    if status is None:
        try:
            import twitter
            user = twitter.Api().GetUser(twitter_username)
            status = user.status
            text = unescape(status.text)
            status = render_to_string('feds/twitter_status.html', locals())
        except:
            status = ''
        cache.set('feds-%s-status' % twitter_username, status)
    return status
def get_musicbrainz_artist_id( artist, limit=1, alias = False ):
    name = ""
    id = ""
    sortname = ""
    artist_name = smart_unicode( artist.replace( '"', '?' ) )
    if not alias:
        url = artist_url % ( server, quote_plus( artist_name.encode("utf-8") ), limit )
    else:
        url = alias_url % ( server, quote_plus( artist_name.encode("utf-8") ), limit )
    htmlsource = get_html_source( url, "", save_file = False)
    match = re.search( '''<artist(.*?)</artist>''', htmlsource )
    if match:
        score_match = re.search( '''score="(.*?)"''', htmlsource )
        name_match = re.search( '''<name>(.*?)</name>''', htmlsource )
        id_match = re.search( '''<artist id="(.*?)"(?:.*?)>''', htmlsource )
        if not id_match:
            id_match = re.search( '''<artist (?:.*?)id="(.*?)">''', htmlsource )
        sort_name_match = re.search( '''<sort-name>(.*?)</sort-name>''', htmlsource )
        
        if score_match:
            score = score_match.group(1)
        if name_match:
            name = unescape( smart_unicode( name_match.group(1) ) )
        if id_match:
            id = id_match.group(1)
        if sort_name_match:
            sortname = unescape( smart_unicode( sort_name_match.group(1) ) )
        log( "Score     : %s" % score, xbmc.LOGDEBUG )
        log( "Id        : %s" % id, xbmc.LOGDEBUG )
        log( "Name      : %s" % name, xbmc.LOGDEBUG )
        log( "Sort Name : %s" % sortname, xbmc.LOGDEBUG )
    else:
        if not alias:
            log( "No Artist ID found trying aliases: %s" % artist, xbmc.LOGDEBUG )
            name, id, sortname = get_musicbrainz_artist_id( artist, limit, True )
        else:
            log( "No Artist ID found for Artist: %s" % artist, xbmc.LOGDEBUG )
    xbmc.sleep( mb_delay )
    return name, id, sortname
示例#42
0
def article_list(request, blog_id=None):
    ret = {'status': 'error', "data": []}
    if blog_id:
        try:
            blog = BlogModel.objects.get(pk=blog_id)
            articles = ArticleModel.objects.filter(niche=blog.niche)
            for article in articles:
                ret["data"].append({'id': article.id, 'title': article.title, 
                                    'text': mark_safe( unescape(article.text))})
            ret['status'] = 'ok';
        except Exception, e:
            print(e)
            pass
示例#43
0
文件: po2csv.py 项目: sleepyjames/ppb
def undo_plurals(has_plural, plurals):
    """Undo what `force_plurals` does in order to figure out if just `msgstr`
    or `msgstr[x]` should be set. Returns `(singular_msgstr, plural_msgstr_map)`
    """
    plurals_list = plurals.split(PLURAL_SEPARATOR)
    plurals_dict = {}

    for i, p in enumerate(plurals_list):
        plurals_dict[unicode(i)] = unescape(p)

    if has_plural:
        return '', plurals_dict
    return plurals_dict.get('0', ''), {}
示例#44
0
def strip_tags(text):
    # Preserve paragraph breaks. Convert closing p tags (and surrounding whitespace) into two newlines. Strip trailing whitespace
    text = re.sub("\s*</\s*p\s*>\s*", "\n\n", text).strip()

    # naive stripping of tags, should work okay in this limited context
    text = re.sub("<[^>]+>", "", text)

    # compress and strip whitespace artifacts, except for the paragraph breaks
    text = re.sub("[ \t\r\f\v]{2,}", " ", text).strip()

    # Replace HTML entities with characters.
    text = utils.unescape(text)

    return text
示例#45
0
 def parse_team_site(self, response):
     team = response.xpath('//div[@class="title"]/text()').extract_first()
     for sel in response.xpath('//div[@class="content"]/table/tr'):
         tournament_link = sel.xpath('td/a/@href').extract_first()
         if tournament_link is not None:
             data = sel.xpath('td/text()').extract()
             tournament_name = sel.xpath('td/a/text()').extract_first()
             if len(data) == 6:
                 date, tournament_town, ranking, zf, tw, points = data
                 item = JtrTournamentPartition()
                 item['tournament_date'] = date
                 item['crawl_date'] = datetime.datetime.now()
                 item['ranking'] = int(ranking.split("/")[0].strip().strip("."))
                 home_town, team_name = team.split("-", 1)
                 item['team_name'] = utils.unescape(team_name.strip())
                 item['team_hometown'] = utils.unescape(home_town.strip())
                 item['tournament_town'] = utils.unescape(tournament_town)
                 item['tournament_name'] = utils.unescape(tournament_name)
                 home_town = self._locate(home_town)
                 tournament_town = self._locate(tournament_town)
                 item["team_hometown_position"] = self._get_geohash(home_town)
                 item["tournament_town_position"] = self._get_geohash(tournament_town)
                 item["distance"] = self._get_distance(home_town, tournament_town)
                 yield item
示例#46
0
def get_tracks(album, links):
    track_num = 1
    for link in links:
        track = {}
        _, track['name'] = link['data-title'].split(' — ')
        track['year'] = album.year
        track['album'] = album
        track['name'] = unescape(track['name'])
        track['number'] = track_num
        track['url'] = url + link['data-mp3url']
        new_track = Track(**track)
        session.add(new_track)
        track_num += 1
    session.commit()
    return track_num
示例#47
0
def article_list(request, blog_id=None):
    ret = {'status': 'error', "data": []}
    if blog_id:
        try:
            blog = BlogModel.objects.get(pk=blog_id)
            articles = ArticleModel.objects.filter(niche=blog.niche)
            for article in articles:
                ret["data"].append({
                    'id': article.id,
                    'title': article.title,
                    'text': mark_safe(unescape(article.text))
                })
            ret['status'] = 'ok'
        except Exception, e:
            print(e)
            pass
示例#48
0
 def get(self):
     permauser = PermaUser.get_current_permauser()
     conver_url = utils.unescape(self.request.get('url'))
     conver = Conver.get_for_url(conver_url)
     messages = Message.all().filter('conver =', conver).order('created').fetch(1000)
     self.response.out.write(template.render(
         os.path.join(os.path.dirname(__file__),
         'templates/conver.html'), 
         {
             'token': channel.create_channel(permauser.user_id() + str(conver.key().id_or_name())),
             'conver_url': conver_url,
             'messages': [ {'author': message.author.display_name(), 'text': message.text} for message in messages],
             'loginorout_text': 'Log out',
             'loginorout_url': users.create_logout_url(self.request.uri)
         }
     ))
示例#49
0
文件: api.py 项目: androa/vgtv-xbmc
    def get_categories(self, root_id=0):
        categories = self.get_category_tree()

        matches = []
        for id in categories:

            if int(id) < 0:
                continue

            category = categories.get(id)
            if (int(category.get('parentId')) == int(root_id)):
                matches.append({
                    'label': unescape(category.get('name')),
                    'path':  self.plugin.url_for('show_category', id=str(id)),
                    'id':    id
                })

        return matches
示例#50
0
def getTVGuide(tvchannel):
    url = getChannelGuideUrl(tvchannel)
    
    if not url:
        return None
    
    try:
        req = urllib2.Request(url)
        req.add_header('User-Agent', common.HEADERS['User-Agent'])
        conn = urllib2.urlopen(req, timeout=5)
        html = conn.read()
        conn.close()
        
        soup = BeautifulSoup(html, 'html5lib')
        tds = soup.findAll('td', attrs={'class': 'container_events'})
        tds = [tds[i] for i in xrange(len(tds)) if divmod(i, 4)[1] == 0]
        
        hours = []
        titles = []
        
        for td in tds:
            hours.extend(td.findAll('td', attrs={'class': 'ora'}))
            titles.extend(td.findAll('div', attrs={'class': 'title'}))
        
        if not hours or not titles or len(hours) != len(titles):
            return None
        
        items = []
        
        for i in xrange(len(titles)):
            current = 'current' in str(hours[i])
            hour = re.search(r'<div>(\d+:\d+)<\/div>', str(hours[i])).group(1)
            title = titles[i].getText().strip()
            title = ' '.join(title.split())
            title = utils.unescape(title, True)
            item = (hour, title, current)
            items.append(item)
        
        return items
    
    except:
        log_utils.log(traceback.print_exc())
    
    return None
示例#51
0
def getTVGuide(tvchannel):
    url = getChannelGuideUrl(tvchannel)

    if not url:
        return None

    try:
        req = urllib2.Request(url)
        req.add_header('User-Agent', common.HEADERS['User-Agent'])
        conn = urllib2.urlopen(req, timeout=5)
        html = conn.read()
        conn.close()

        soup = BeautifulSoup(html, 'html5lib')
        tds = soup.findAll('td', attrs={'class': 'container_events'})
        tds = [tds[i] for i in xrange(len(tds)) if divmod(i, 4)[1] == 0]

        hours = []
        titles = []

        for td in tds:
            hours.extend(td.findAll('td', attrs={'class': 'ora'}))
            titles.extend(td.findAll('div', attrs={'class': 'title'}))

        if not hours or not titles or len(hours) != len(titles):
            return None

        items = []

        for i in xrange(len(titles)):
            current = 'current' in str(hours[i])
            hour = re.search(r'<div>(\d+:\d+)<\/div>', str(hours[i])).group(1)
            title = titles[i].getText().strip()
            title = ' '.join(title.split())
            title = utils.unescape(title, True)
            item = (hour, title, current)
            items.append(item)

        return items

    except:
        log_utils.log(traceback.print_exc())

    return None
示例#52
0
def find_links(doc_id):
    if doc_id is None:
        return

    doc = Page.load(settings.db, doc_id)

    if doc.content is None:
        print "Got None for the content of %s -> %s." % (doc_id, doc.url)
        return

    raw_links = []
    for match in link_single_re.finditer(doc.content):
        raw_links.append(match.group(1))

    for match in link_double_re.finditer(doc.content):
        raw_links.append(match.group(1))

    doc.links = []
    for link in raw_links:
        if link.startswith("#"):
            continue
        elif link.startswith("http://") or link.startswith("https://"):
            pass
        elif link.startswith("/"):
            parse = urlparse(doc["url"])
            link = parse.scheme + "://" + parse.netloc + link
        else:
            link = "/".join(doc["url"].split("/")[:-1]) + "/" + link

        doc.links.append(unescape(link.split("#")[0]))

    print "find_links %s -> %i" % (doc.url, len(doc.links))
    doc.store(settings.db)

    calculate_rank.delay(doc.id)

    for link in doc.links:
        p = Page.get_id_by_url(link, update=False)
        if p is not None:
            calculate_rank.delay(p)
        else:
            retrieve_page.delay(link)
示例#53
0
    res = db.find(query, sort=order)
    total = res.count()
    paginator = Paginator(res, limit)
    try:
        res = paginator.page(page)
    except (EmptyPage, InvalidPage):
        res = paginator.page(paginator.num_pages)

    if request.GET.get('format', '') == 'json':
        res = [{
            'url': unicode(obj['url']),
            'title': unicode(obj['title']),
            'created': tuple(obj['created'].timetuple()),
            'private': obj['private'],
            'notes': unicode(unescape(obj['notes'])),
            'tags': obj['tags']
        } for obj in res.object_list]
        if request.GET.get('j') == None:
            return HttpResponse(json.dumps(res), mimetype="application/json")
        return HttpResponse("var omnom_posts = " + json.dumps(res) + ";",
                            mimetype="text/javascript")

    if request.GET.get('format', '') == 'atom':
        tpl = 'atom.xml'
    else:
        tpl = 'list.html'

    res.object_list = [{
        'seq':
        obj['seq'],
def latex2png(picture_element, preamble, return_eps=False, page_width_px=None,
              dpi=150, included_files={}, pdflatexpath=None):
    """
    Create a PNG image from latex.

    Inputs:

      pspicture_element - etree.Element

      preamble - which preamble to use, one of PsPicture_preamble, tikzpicture_preamble
      or equation_preamble

      return_eps - whether to also return the intermediate EPS file

      page_width_px - page width in pixels, used to scale the
        style:width attribute in the element.

      dpi - Will be used only if the width of the figure relative to
        the page width was not set (or the page width in pixels was not
        passed as an argument).

    Outputs:

    One or two paths, the first to the PNG, the second to the EPS.
    """
    temp_dir = tempfile.mkdtemp()
    latex_path = os.path.join(temp_dir, 'figure.tex')
    png_path = os.path.join(temp_dir, 'figure.png')
    pdf_path = os.path.join(temp_dir, 'figure.pdf')

    # can send the raw string code or a <pre> element with <code> child
    if isinstance(picture_element, (str, unicode)):
        code = picture_element
        code = cleanup_code(code)
    else:
        code = picture_element.find('.//code').text.encode('utf-8')
    code = code.replace(r'&amp;', '&').replace(r'&gt;', '>').replace(r'&lt;', '<')

    if not code:
        raise ValueError("Code cannot be empty.")

    with open(latex_path, 'wt') as fp:
        temp = unescape(preamble.replace('__CODE__', code.strip()))
        try:
            fp.write(temp)
        except UnicodeEncodeError:
            fp.write(temp.encode('utf-8'))

    for path, path_file in included_files.iteritems():
        try:
            os.makedirs(os.path.join(temp_dir, os.path.dirname(path)))
        except OSError:
            # Catch exception if path already exists
            pass
        with open(os.path.join(temp_dir, path), 'wb') as fp:
            fp.write(path_file.read())

    if not pdflatexpath:
        raise ValueError("pdflatexpath cannot be None")

    errorLog, temp = execute([pdflatexpath,
                              "-shell-escape", "-halt-on-error",
                              "-output-directory", temp_dir, latex_path])
    try:
        open(pdf_path, "rb")
    except IOError:
        raise LatexPictureError(
            "LaTeX failed to compile the image. %s \n%s" % (
                latex_path, preamble.replace('__CODE__', code.strip())))

    # crop the pdf image too
    # execute(['pdfcrop', '--margins', '1', pdfPath, pdfPath])

    execute(['convert', '-density', '%i' % dpi, pdf_path, png_path])

    return png_path
def listLinksInComment(url, name, type_):
    from domains import parse_reddit_link, build_DirectoryItem_url_based_on_media_type
    from utils import markdown_to_bbcode, unescape
    from guis import progressBG
    #from resources.domains import make_addon_url_from
    #called from context menu
    log('listLinksInComment:%s:%s' % (type_, url))

    #does not work for list comments coz key is the playable url (not reddit comments url)
    #msg=WINDOW.getProperty(url)
    #WINDOW.clearProperty( url )
    #log( '   msg=' + msg )

    directory_items = []
    author = ""
    ShowOnlyCommentsWithlink = False

    if type_ == 'linksOnly':
        ShowOnlyCommentsWithlink = True

    #url='https://www.reddit.com/r/Music/comments/4k02t1/bonnie_tyler_total_eclipse_of_the_heart_80s_pop/' + '.json'
    #only get up to "https://www.reddit.com/r/Music/comments/4k02t1".
    #   do not include                                            "/bonnie_tyler_total_eclipse_of_the_heart_80s_pop/"
    #   because we'll have problem when it looks like this: "https://www.reddit.com/r/Overwatch/comments/4nx91h/ever_get_that_feeling_déjà_vu/"

    #url=re.findall(r'(.*/comments/[A-Za-z0-9]+)',url)[0]

    #use safe='' argument in quoteplus to encode only the weird chars part
    url = urllib.quote_plus(url, safe=':/?&')
    if '?' in url:
        url = url.split('?', 1)[0] + '.json?' + url.split('?', 1)[1]
    else:
        url += '.json'

    loading_indicator = progressBG(translation(30024))
    loading_indicator.update(0, 'Retrieving comments')

    content = reddit_request(url)
    if not content:
        loading_indicator.end()
        return

    loading_indicator.update(10, 'Parsing')
    content = json.loads(content)

    del harvest[:]
    #harvest links in the post text (just 1)
    r_linkHunter(content[0]['data']['children'])

    try:
        submitter = content[0]['data']['children'][0]['data']['author']
    except:
        submitter = ''

    #the post title is provided in json, we'll just use that instead of messages from addLink()
    try:
        post_title = content[0]['data']['children'][0]['data']['title']
    except:
        post_title = ''
    #for i, h in enumerate(harvest):
    #    log("aaaaa first harvest "+h[2])

    #harvest links in the post itself
    r_linkHunter(content[1]['data']['children'])

    comment_score = 0

    loading_indicator.set_tick_total(len(harvest))

    for i, h in enumerate(harvest):
        try:
            #log(str(i)+"  score:"+ str(h[0]).zfill(5)+" "+ h[1] +'|'+ h[3] )
            comment_score = h[0]
            #log("score %d < %d (%s)" %(comment_score,int_CommentTreshold, CommentTreshold) )
            link_url = h[2]
            desc100 = h[3].replace(
                '\n', ' ')[0:100]  #first 100 characters of description

            kind = h[
                6]  #reddit uses t1 for user comments and t3 for OP text of the post. like a poster describing the post.
            d = h[5]  #depth of the comment

            tab = " " * d if d > 0 else "-"

            from urlparse import urlparse
            domain = '{uri.netloc}'.format(uri=urlparse(link_url))

            author = h[7]
            DirectoryItem_url = ''

            if comment_score < int_CommentTreshold:
                continue

            #hoster, DirectoryItem_url, videoID, mode_type, thumb_url,poster_url, isFolder,setInfo_type, setProperty_IsPlayable =make_addon_url_from(h[2])
            #if link_url:
            #    log( '  comment %s TITLE:%s... link[%s]' % ( str(d).zfill(3), desc100.ljust(20)[:20],link_url ) )

            ld = parse_reddit_link(link_url=link_url,
                                   assume_is_video=False,
                                   needs_preview=True,
                                   get_playable_url=True)

            if kind == 't1':
                list_title = r"[COLOR cadetblue]%3d[/COLOR] %s" % (h[0], tab)
            elif kind == 't3':
                list_title = r"[COLOR cadetblue]Title [/COLOR] %s" % (tab)

            #helps the the textbox control treat [url description] and (url) as separate words. so that they can be separated into 2 lines
            plot = h[3].replace('](', '] (')
            plot = markdown_to_bbcode(plot)
            plot = unescape(plot)  #convert html entities e.g.:(&#39;)

            liz = xbmcgui.ListItem(label=list_title + ': ' + desc100)

            liz.setInfo(type="Video",
                        infoLabels={
                            "Title": h[1],
                            "plot": plot,
                            "studio": domain,
                            "votes": str(comment_score),
                            "director": author
                        })
            isFolder = False

            #force all links to ytdl to see if it can be played
            if link_url:
                DirectoryItem_url, setProperty_IsPlayable, isFolder, title_prefix = build_DirectoryItem_url_based_on_media_type(
                    ld, link_url)

                liz.setProperty('IsPlayable', setProperty_IsPlayable)
                liz.setProperty(
                    'url', DirectoryItem_url)  #<-- needed by the xml gui skin
                liz.setPath(DirectoryItem_url)

                if domain:
                    plot = "  [COLOR greenyellow][%s] %s" % (domain,
                                                             plot) + "[/COLOR]"
                else:
                    plot = "  [COLOR greenyellow][%s]" % (plot) + "[/COLOR]"
                liz.setLabel(list_title + plot)

                if ld:
                    liz.setArt({
                        "thumb": ld.poster,
                        "poster": ld.poster,
                        "banner": ld.poster,
                        "fanart": ld.poster,
                        "landscape": ld.poster
                    })

            if DirectoryItem_url:
                #log( 'IsPlayable:'+setProperty_IsPlayable )
                directory_items.append((
                    DirectoryItem_url,
                    liz,
                    isFolder,
                ))
                #xbmcplugin.addDirectoryItem(handle=pluginhandle,url=DirectoryItem_url,listitem=liz,isFolder=isFolder)
            else:
                #this section are for comments that have no links
                if not ShowOnlyCommentsWithlink:
                    result = h[3].replace('](', '] (')
                    result = markdown_to_bbcode(result)
                    liz = xbmcgui.ListItem(label=list_title + desc100)
                    liz.setInfo(type="Video",
                                infoLabels={
                                    "Title": h[1],
                                    "plot": result,
                                    "studio": domain,
                                    "votes": str(h[0]),
                                    "director": author
                                })
                    liz.setProperty('IsPlayable', 'false')

                    directory_items.append((
                        "",
                        liz,
                        False,
                    ))
                    #xbmcplugin.addDirectoryItem(handle=pluginhandle,url="",listitem=liz,isFolder=False)

                #END section are for comments that have no links or unsupported links
        except Exception as e:
            log('  EXCEPTION:' + str(e))

        #for di in directory_items:
        #    log( str(di) )

        loading_indicator.tick(1, desc100)
    loading_indicator.end()

    #log('  comments_view id=%s' %comments_viewMode)

    #xbmcplugin.setContent(pluginhandle, "mixed")  #in estuary, mixed have limited view id's available. it has widelist which is nice for comments but we'll just stick with 'movies'
    xbmcplugin.setContent(
        pluginhandle, "episodes"
    )  #files, songs, artists, albums, movies, tvshows, episodes, musicvideos
    xbmcplugin.setPluginCategory(pluginhandle, 'Comments')

    xbmcplugin.addDirectoryItems(handle=pluginhandle, items=directory_items)
    xbmcplugin.endOfDirectory(pluginhandle)

    if comments_viewMode:
        xbmc.executebuiltin('Container.SetViewMode(%s)' % comments_viewMode)
示例#56
0
def getFavourites(file,
                  limit=10000,
                  validate=True,
                  superSearch=False,
                  chooser=False):
    import xbmcgui

    prefix = ''
    if not chooser:
        prefix = 'HOME:' if xbmcgui.getCurrentWindowId() == 10000 else ''

    xml = '<favourites></favourites>'
    if sfile.exists(file):
        xml = sfile.read(file)

    items = []

    faves = re.compile('<favourite(.+?)</favourite>').findall(xml)

    for fave in faves:
        fave = fave.replace('&quot;', '&_quot_;')
        fave = fave.replace('\'', '"')
        fave = utils.unescape(fave)

        fave = fave.replace('name=""', '')
        try:
            name = re.compile('name="(.+?)"').findall(fave)[0]
        except:
            name = ''

        try:
            thumb = re.compile('thumb="(.+?)"').findall(fave)[0]
        except:
            thumb = ''

        try:
            cmd = fave.split('>', 1)[-1]
        except:
            cmd = ''

        #name  = utils.Clean(name.replace( '&_quot_;', '"'))
        name = name.replace('&_quot_;', '"')
        thumb = thumb.replace('&_quot_;', '"')
        cmd = cmd.replace('&_quot_;', '"')

        add = False

        if superSearch:
            add = isValid(cmd)
        elif (SHOWUNAVAIL) or (not validate) or isValid(cmd):
            add = True

        if add:
            cmd = upgradeCmd(cmd)

            if cmd.startswith('PlayMedia'):
                option = 'mode'
                try:
                    mode = int(favourite.getOption(cmd, option))
                except:
                    win = xbmcgui.getCurrentWindowId()
                    cmd = updateSFOption(cmd, 'winID', win)

            name = resolve(name)
            cmd = patch(cmd)
            cmd = resolve(cmd)
            cmd = prefix + cmd

            items.append([name, thumb, cmd])
            if len(items) > limit:
                return items

    return items
示例#57
0
         elif _command.startswith("movie_title="):
             titles = re.split("=", _command, maxsplit=1)[1]
         movie_titles = titles.split(";")
         if not movie_titles == "":
             _build_playlist(movie_titles)
             exit = Script().start_script("oldway")
         else:
             exit = False
     elif _command.startswith(
             "open_settings"):  # Open Settings
         __addon__.openSettings()
         exit = False
 elif sys.argv[1].startswith(
         "jsonquery="):  # JSON RPC Query
     _clear_playlists()
     jsonquery = utils.unescape(
         re.split("=", sys.argv[1], maxsplit=1)[1])
     jsonquery = (jsonquery.replace("<li>", ":")).replace(
         "<lic>", ",")
     #print jsonquery
     movie_ids = Script()._jsonrpc_query(jsonquery)
     if movie_ids:
         _build_playlist(movie_ids)
         exit = Script().start_script("oldway")
     else:
         exit = False
 elif sys.argv[1].startswith("movieid="):
     _clear_playlists()
     movie_id = sys.argv[1].split("=")[1]
     movie_ids = movie_id.split(";")
     if movie_ids:
         _build_playlist(movie_ids, mode="movie_ids")
示例#58
0
def run():
    def update_birthday(bioguide, person, main):

        birthday = birthday_for(main)
        if not birthday:
            print("[%s] NO BIRTHDAY :(\n\n%s" %
                  (bioguide, main.encode("utf8")))
            warnings.append(bioguide)
            return
        if birthday == "UNKNOWN":
            return

        try:
            birthday = datetime.datetime.strptime(birthday.replace(",", ""),
                                                  "%B %d %Y")
        except ValueError:
            print("[%s] BAD BIRTHDAY :(\n\n%s" %
                  (bioguide, main.encode("utf8")))
            warnings.append(bioguide)
            return

        birthday = "%04d-%02d-%02d" % (birthday.year, birthday.month,
                                       birthday.day)
        person.setdefault("bio", {})["birthday"] = birthday

    def birthday_for(string):
        # exceptions for not-nicely-placed semicolons
        string = string.replace(
            "born in Cresskill, Bergen County, N. J.; April", "born April")
        string = string.replace(
            "FOSTER, A. Lawrence, a Representative from New York; September 17, 1802;",
            "born September 17, 1802")
        string = string.replace(
            "CAO, Anh (Joseph), a Representative from Louisiana; born in Ho Chi Minh City, Vietnam; March 13, 1967",
            "born March 13, 1967")
        string = string.replace(
            "CRITZ, Mark S., a Representative from Pennsylvania; born in Irwin, Westmoreland County, Pa.; January 5, 1962;",
            "born January 5, 1962")
        string = string.replace(
            "SCHIFF, Steven Harvey, a Representative from New Mexico; born in Chicago, Ill.; March 18, 1947",
            "born March 18, 1947")
        string = string.replace(
            'KRATOVIL, Frank, M. Jr., a Representative from Maryland; born in Lanham, Prince George\u2019s County, Md.; May 29, 1968',
            "born May 29, 1968")

        # look for a date
        pattern = r"born [^;]*?((?:January|February|March|April|May|June|July|August|September|October|November|December),? \d{1,2},? \d{4})"
        match = re.search(pattern, string, re.I)
        if not match or not match.group(1):
            # specifically detect cases that we can't handle to avoid unnecessary warnings
            if re.search("birth dates? unknown|date of birth is unknown",
                         string, re.I):
                return "UNKNOWN"
            if re.search(
                    "born [^;]*?(?:in|about|before )?(?:(?:January|February|March|April|May|June|July|August|September|October|November|December) )?\d{4}",
                    string, re.I):
                return "UNKNOWN"
            return None
        return match.group(1).strip()

    def relationships_of(string):
        # relationship data is stored in a parenthetical immediately after the end of the </font> tag in the bio
        # e.g. "(son of Joseph Patrick Kennedy, II, and great-nephew of Edward Moore Kennedy and John Fitzgerald Kennedy)"
        pattern = "^\((.*?)\)"
        match = re.search(pattern, string, re.I)

        relationships = []

        if match and len(match.groups()) > 0:
            relationship_text = match.group(1).encode("ascii", "replace")

            # since some relationships refer to multiple people--great-nephew of Edward Moore Kennedy AND John Fitzgerald Kennedy--we need a special grammar
            from nltk import tree, pos_tag, RegexpParser
            tokens = re.split("[ ,;]+|-(?![0-9])", relationship_text)
            pos = pos_tag(tokens)

            grammar = r"""
        NAME: {<NNP>+}
        NAMES: { <IN><NAME>(?:<CC><NAME>)* }
        RELATIONSHIP: { <JJ|NN|RB|VB|VBD|VBN|IN|PRP\$>+ }
        MATCH: { <RELATIONSHIP><NAMES> }
        """
            cp = RegexpParser(grammar)
            chunks = cp.parse(pos)

            # iterate through the Relationship/Names pairs
            for n in chunks:
                if isinstance(n, tree.Tree) and n.node == "MATCH":
                    people = []
                    relationship = None
                    for piece in n:
                        if piece.node == "RELATIONSHIP":
                            relationship = " ".join([x[0] for x in piece])
                        elif piece.node == "NAMES":
                            for name in [
                                    x for x in piece
                                    if isinstance(x, tree.Tree)
                            ]:
                                people.append(" ".join([x[0] for x in name]))
                    for person in people:
                        relationships.append({
                            "relation": relationship,
                            "name": person
                        })
        return relationships

    # default to caching
    cache = utils.flags().get('cache', True)
    force = not cache

    # pick either current or historical
    # order is important here, since current defaults to true
    if utils.flags().get('historical', False):
        filename = "legislators-historical.yaml"
    elif utils.flags().get('current', True):
        filename = "legislators-current.yaml"
    else:
        print("No legislators selected.")
        exit(0)

    print("Loading %s..." % filename)
    legislators = load_data(filename)

    # reoriented cache to access by bioguide ID
    by_bioguide = {}
    for m in legislators:
        if "bioguide" in m["id"]:
            by_bioguide[m["id"]["bioguide"]] = m

    # optionally focus on one legislator

    bioguide = utils.flags().get('bioguide', None)
    if bioguide:
        bioguides = [bioguide]
    else:
        bioguides = list(by_bioguide.keys())

    warnings = []
    missing = []
    count = 0
    families = 0

    for bioguide in bioguides:
        # Download & parse the HTML of the bioguide page.

        url = "http://bioguide.congress.gov/scripts/biodisplay.pl?index=%s" % bioguide
        cache = "legislators/bioguide/%s.html" % bioguide
        try:
            body = download(url, cache, force)

            # Fix a problem?
            body = body.replace("&Aacute;\xc2\x81", "&Aacute;")

            # Entities like &#146; are in Windows-1252 encoding. Normally lxml
            # handles that for us, but we're also parsing HTML. The lxml.html.HTMLParser
            # doesn't support specifying an encoding, and the lxml.etree.HTMLParser doesn't
            # provide a cssselect method on element objects. So we'll just decode ourselves.
            body = utils.unescape(body, "Windows-1252")

            dom = lxml.html.parse(io.StringIO(body)).getroot()
        except lxml.etree.XMLSyntaxError:
            print("Error parsing: ", url)
            continue

        # Sanity check.

        if len(dom.cssselect("title")) == 0:
            print("[%s] No page for this bioguide!" % bioguide)
            missing.append(bioguide)
            continue

        # Extract the member's name and the biography paragraph (main).

        try:
            name = dom.cssselect("p font")[0]
            main = dom.cssselect("p")[0]
        except IndexError:
            print("[%s] Missing name or content!" % bioguide)
            exit(0)

        name = name.text_content().strip()
        main = main.text_content().strip().replace("\n",
                                                   " ").replace("\r", " ")
        main = re.sub("\s+", " ", main)

        # Extract the member's birthday.

        update_birthday(bioguide, by_bioguide[bioguide], main)

        # Extract relationships with other Members of Congress.

        if utils.flags().get("relationships", False):
            #relationship information, if present, is in a parenthetical immediately after the name.
            #should always be present if we passed the IndexError catch above
            after_name = dom.cssselect("p font")[0].tail.strip()
            relationships = relationships_of(after_name)
            if len(relationships):
                families = families + 1
                by_bioguide[bioguide]["family"] = relationships

        count = count + 1

    print()
    if warnings:
        print("Missed %d birthdays: %s" %
              (len(warnings), str.join(", ", warnings)))

    if missing:
        print("Missing a page for %d bioguides: %s" %
              (len(missing), str.join(", ", missing)))

    print("Saving data to %s..." % filename)
    save_data(legislators, filename)

    print("Saved %d legislators to %s" % (count, filename))

    if utils.flags().get("relationships", False):
        print("Found family members for %d of those legislators" % families)
示例#59
0
 def fget(self):
     return unescape(self.__raw.title)