def getreviews(logindata, gamedata,page=1): if page == 1: for game in gamedata: if "review" in gamedata[game]: del gamedata[game]["reviewed"] try: appinfofile = urllib2.urlopen(utils.getprofileurl(logindata) + "/recommended/?p=" + str(page)) doc = bs(appinfofile) for el in doc.findAll("div", {"class": "review_box"}): # header = el.find("div", {"class": "header"}) # try: # print re.search(r'([0-9]+) of ([0-9]+) people \(([0-9]+)%',header.text).groups() # except: appid = re.search(r'\d+',el.find("div",{"class":"leftcol"}).find("a")["href"]).group() review = re.search(r'(?<=thumbs).*(?=\.)',el.find("div",{"class":"rightcol"}).find("div",{"class":"thumb"}).find("a").find("img")["src"]).group() gamedata[appid]["reviewed"] = review.lower() paging = doc.find("div", {"class": "workshopBrowsePagingInfo"}) revpos = re.search(r'([0-9]+) of ([0-9]+)',paging.text) if revpos.groups()[0] != revpos.groups()[1]: getreviews(logindata, gamedata, page+1) except: pass
def GetBaiyRealUrl(playurl, times=0): res = [] if times > MAX_TRY: return res try: _, _, _, response = fetch(playurl) soup = bs(response) playlist = soup.findAll('ul', id="playlist") if playlist: newplayurl = playlist[0].script['src'] if newplayurl: url = 'http://' + BAIY_HOST + newplayurl _, _, _, response = fetch(url) uri = re.findall("(?<=unescape\(').*.(?='\);)", response)[0] info = urllib.unquote(uri) for s in info.split('$$$'): res.extend([s.split('$')]) return res except: t, v, tb = sys.exc_info() log.error("GetBaiyRealUrl playurl: %s, %s,%s,%s" % (playurl, t, v, traceback.format_tb(tb))) return GetBaiyRealUrl(playurl, times + 1)