def _search_person_cb(what): print "searching for " + what page = util.request(BASE_URL + "hledat/complete-films/?q=" + urllib.quote(what)) results = [] data = util.substr(page, '<div id="search-creators', '<div class="footer') for m in re.finditer( '<h3 class="subject"[^<]+<a href="(?P<url>[^"]+)[^>]+>(?P<name>[^<]+).+?<p>(?P<info>[^<]+)', data, re.DOTALL | re.IGNORECASE, ): results.append((m.group("url"), m.group("name") + " (" + m.group("info") + ")")) for m in re.finditer( "<li(?P<item>.+?)</li>", util.substr(data, '<ul class="creators others', "</div"), re.DOTALL | re.IGNORECASE ): base = re.search('<a href="(?P<url>[^"]+)[^>]+>(?P<name>[^<]+)', m.group("item")) if base: name = base.group("name") for n in re.finditer("<span[^>]*>(?P<data>[^<]+)", m.group("item")): name = "%s %s" % (name, n.group("data")) results.append((base.group("url"), name)) for url, name in results: info = scrapper._empty_info() info["url"] = url add_person(name, info) xbmcplugin.endOfDirectory(int(sys.argv[1]))
def list_show_page(self, url, page, seasons=False, episodes=False): result = [] if "/p/epizody" in url or "/p/epiz%C3%B3dy" in url or "p/archiv" in url: if seasons: season_data = util.substr(page, SERIES_START2, SERIES_END2) for m in re.finditer(SERIES_ITER_RE2, season_data, re.DOTALL | re.IGNORECASE): item = self.dir_item() item['title'] = m.group('title') item['url'] = url + '#post=%s' % (m.group('id')) self._filter(result, item) if episodes: for m in re.finditer(EPISODE_ITER_RE2, page, re.DOTALL | re.IGNORECASE): item = self.video_item() item['title'] = "%s (%s)" % (m.group('title'), m.group('date')) item['url'] = m.group('url') self._filter(result, item) else: if seasons: season_data = util.substr(page, SERIES_START, SERIES_END) for m in re.finditer(SERIES_ITER_RE, season_data, re.DOTALL | re.IGNORECASE): item = self.dir_item() item['title'] = m.group('title') item['url'] = 'http://' + urlparse.urlparse(url).netloc + '/ajax.json?' + m.group('url') self._filter(result, item) if episodes: episodes_data = util.substr(page, EPISODE_START, EPISODE_END) for m in re.finditer(EPISODE_ITER_RE, page, re.DOTALL | re.IGNORECASE): item = self.video_item() item['title'] = "%s. %s (%s)" % (m.group('episode'), m.group('title'), m.group('date')) item['url'] = m.group('url') self._filter(result, item) return result
def _search_movie_cb(what): print "searching for movie " + what url = BASE_URL + "hledat/complete-films/?q=" + urllib.quote(what) page = util.request(url) results = [] data = util.substr(page, '<div id="search-films', '<div class="footer') for m in re.finditer( '<h3 class="subject"[^<]+<a href="(?P<url>[^"]+)[^>]+>(?P<name>[^<]+).+?<p>(?P<info>[^<]+)', data, re.DOTALL | re.IGNORECASE, ): results.append((m.group("url"), "%s (%s)" % (m.group("name"), m.group("info")))) for m in re.finditer( "<li(?P<item>.+?)</li>", util.substr(data, '<ul class="films others', "</div"), re.DOTALL | re.IGNORECASE ): base = re.search('<a href="(?P<url>[^"]+)[^>]+>(?P<name>[^<]+)', m.group("item")) if base: name = base.group("name") for n in re.finditer("<span[^>]*>(?P<data>[^<]+)", m.group("item")): name = "%s %s" % (name, n.group("data")) results.append((base.group("url"), name)) if preload(): return preload_items(results) add_items(results) xbmcplugin.endOfDirectory(int(sys.argv[1]))
def list_episodes(self, url, page=0): result = [] if url.find('ajax.json') != -1: headers = {'X-Requested-With':'XMLHttpRequest', 'Referer':util.substr(url, url, url.split('/')[-1]) } httpdata = util.request(url, headers) httpdata = util.json.loads(httpdata)['content'] else: httpdata = util.request(url) httpdata = util.substr(httpdata, EPISODE_START, EPISODE_END) entries = 0 skip_entries = MAX_PAGE_ENTRIES * page for m in re.finditer(EPISODE_ITER_RE, httpdata, re.DOTALL | re.IGNORECASE): entries += 1 if entries < skip_entries: continue item = self.video_item() item['title'] = "%s. %s (%s)" % (m.group('episode'), m.group('title'), m.group('date')) item['url'] = m.group('url') self._filter(result, item) if entries >= (skip_entries + MAX_PAGE_ENTRIES): page += 1 item = self.dir_item() item['type'] = 'next' item['url'] = "#episodes##%d#" % (page) + url result.append(item) break return result
def show(self, page): result = [] data = util.substr(page, '<div id="search', "<hr>") for m in re.finditer( '<div.+?class="scene[^>]*>.+?<img src="(?P<img>[^"]+)" alt="(?P<name>[^"]+).+?<div class="sc-name">(?P<author>[^<]+).+?<a href="(?P<url>[^"]+)', data, re.IGNORECASE | re.DOTALL, ): name = "%s (%s)" % (m.group("name"), m.group("author")) item = self.video_item() item["title"] = name item["url"] = m.group("url") item["img"] = m.group("img") self._filter(result, item) data = util.substr(page, 'class="pages">', "</div>") next = re.search('<a href="(?P<url>[^"]+)"[^<]+<img src="/images/page-right.gif', data) prev = re.search('<a href="(?P<url>[^"]+)"[^<]+<img src="/images/page-left.gif', data) if prev: item = self.dir_item() item["type"] = "prev" item["url"] = prev.group("url") result.append(item) if next: item = self.dir_item() item["type"] = "next" item["url"] = next.group("url") result.append(item) return result
def list(self,url): result = [] url = self._url(url) page = util.request(url) q = url.find('?') if q > 0: url = url[:q] data = util.substr(page,'<div id=\"videolist','<div class=\"paging-adfox\">') pattern='<div class=\"img-wrapper\"><a href=\"(?P<url>[^\"]+)\" title=\"(?P<name>[^\"]+)(.+?)<img(.+?)src=\"(?P<img>[^\"]+)(.+?)<p class=\"dsc\">(?P<plot>[^<]+)' for m in re.finditer(pattern, data, re.IGNORECASE | re.DOTALL): item = self.video_item() item['title'] = m.group('name') item['url'] = m.group('url') item['img'] = m.group('img') item['plot'] = m.group('plot') self._filter(result,item) data = util.substr(page,'<div class=\"jumpto\"','</div>') data = util.substr(page,'<p class=\"paging','</p>') next = re.search('<a href=\"(?P<url>[^\"]+)\"><img(.+?)next\.png',data,re.IGNORECASE | re.DOTALL) if next: item = self.dir_item() item['type'] = 'next' item['url'] = url+next.group('url') result.append(item) return result
def list_archive_page(self, show_page, showon=False, showoff=False): showonlist = [] if showon: page = util.substr(show_page, VYSIELANE_START, NEVYSIELANE_START) for m in re.finditer(VYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE): item = self.dir_item() item['title'] = m.group('title') item['plot'] = m.group('desc') item['url'] = m.group('url') + "#season_episode" if m.group('itime') is not None: item['type'] = "showon7d" else: item['type'] = "showon" showonlist.append(item) showonlist.sort(key=lambda x: x['title'].lower()) showofflist = [] if showoff: page = util.substr(show_page, NEVYSIELANE_START, NEVYSIELANE_END) for m in re.finditer(NEVYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE): item = self.dir_item() item['title'] = m.group('title') item['url'] = m.group('url') + "#season_episode" item['type'] = "showoff" showofflist.append(item) showofflist.sort(key=lambda x: x['title'].lower()) result = showonlist + showofflist return result
def list_by_letter(self, url): result = [] page = self.get_data_cached(url) data = util.substr(page, '<ul class=\"content', '</ul>') subs = self.get_subs() for m in re.finditer('<a class=\"title\" href=\"(?P<url>[^\"]+)[^>]+>(?P<name>[^<]+)', data, re.IGNORECASE | re.DOTALL): item = {'url': m.group('url'), 'title': m.group('name')} if item['url'] in subs: item['menu'] = {"[B][COLOR red]Remove from subscription[/COLOR][/B]": { 'url': m.group('url'), 'action': 'remove-subscription', 'name': m.group('name')} } else: item['menu'] = {"[B][COLOR red]Add to library[/COLOR][/B]": { 'url': m.group('url'), 'action': 'add-to-library', 'name': m.group('name')}} self._filter(result, item) paging = util.substr(page, '<div class=\"pagination\"', '</div') next = re.search('<li class=\"next[^<]+<a href=\"\?page=(?P<page>\d+)', paging, re.IGNORECASE | re.DOTALL) if next: next_page = int(next.group('page')) current = re.search('\?page=(?P<page>\d)', url) current_page = 0 if self.is_most_popular(url) and next_page > 10: return result if current: current_page = int(current.group('page')) if current_page < next_page: url = re.sub('\?.+?$', '', url) + '?page=' + str(next_page) result += self.list_by_letter(url) return result
def list_content(self, page, url=None): result = [] if not url: url = self.base_url data = util.substr(page, '<div class=\"contentArea', '<div class=\"pagination\">') pattern = '<h\d class=\"postTitle\"><a href=\"(?P<url>[^\"]+)(.+?)<span>(?P<title>[^<]+)</span></a></h\d>(.+?)<span class=\"postDate\">(?P<date>[^\<]+)</span>(.+?)<div class=\"postContent">[^<]+<a[^>]+[^<]+<img src=\"(?P<img>[^\"]+)(.+?)<div class=\"obs\">(?P<plot>.+?)</div>' for m in re.finditer(pattern, data, re.IGNORECASE | re.DOTALL): plot = re.sub('<br[^>]*>', '', m.group('plot')) item = self.video_item() item['title'] = m.group('title') item['img'] = m.group('img') item['plot'] = plot item['url'] = m.group('url') self._filter(result, item) data = util.substr(page, '<div class=\"pagination\">', '</div>') m = re.search('<li class=\"info\"><span>([^<]+)', data) n = re.search('<li class=\"prev\"[^<]+<a href=\"(?P<url>[^\"]+)[^<]+<span>(?P<name>[^<]+)', data) k = re.search('<li class=\"next\"[^<]+<a href=\"(?P<url>[^\"]+)[^<]+<span>(?P<name>[^<]+)', data) # replace last / + everyting till the end #myurl = re.sub('\/[\w\-]+$', '/', url) if not m == None: if not n == None: item = self.dir_item() item['type'] = 'prev' #item['title'] = '%s - %s' % (m.group(1), n.group('name')) item['url'] = n.group('url') result.append(item) if not k == None: item = self.dir_item() item['type'] = 'next' #item['title'] = '%s - %s' % (m.group(1), k.group('name')) item['url'] = k.group('url') result.append(item) return result
def list_content(self, page, url=None): result = [] if not url: url = self.base_url data = util.substr(page, '<div class=\"items no-wrapper no-padder', '<div class=\"my-pagination>') pattern = '<article class=\"video\".+?<a href=\"(?P<url>[^\"]+)\" *title=\"(?P<title>[^\"]+)\"(.+?)<img src=\"(?P<img>[^\"]+)\".+?<p>(?P<plot>[^<]+?)<\/p>.+?<li class=\"i-published\".+?title=\"(?P<date>[^\"]+)\"' for m in re.finditer(pattern, data, re.IGNORECASE | re.DOTALL): item = self.video_item() item['title'] = m.group('title') item['img'] = m.group('img').strip() item['plot'] = self.decode_plot(m.group('plot')) item['url'] = self.base_url[:-1] + m.group('url') item['menu'] = {'$30060':{'list':'#related#' + item['url'], 'action-type':'list'}} print item self._filter(result, item) data = util.substr(page, '<ul class=\"my-pagination', '</div>') n = re.search('<li class=\"paginate_button previous *\"[^<]+<a href=\"(?P<url>[^\"]+)\">(?P<name>[^<]+)<', data) k = re.search('<li class=\"paginate_button next *\"[^<]+<a href=\"(?P<url>[^\"]+)\">(?P<name>[^<]+)<', data) if not n == None: item = self.dir_item() item['type'] = 'prev' # item['title'] = '%s - %s' % (m.group(1), n.group('name')) item['url'] = n.group('url') result.append(item) if not k == None: item = self.dir_item() item['type'] = 'next' # item['title'] = '%s - %s' % (m.group(1), k.group('name')) item['url'] = k.group('url') result.append(item) return result
def list_videos(self, url): result = [] page = util.request(url) data = util.substr(page, '<div class="articles">','<div class="paginator">') listing_iter_re = r""" <article\ class=\"row\">.+? <a\ href=\"(?P<url>[^\"]+)\"><i\ class=\"ta3-icon-video\"[^>]+>[^>]+>(?P<title>[^<]+)</a>.+? </article> """ for m in re.finditer(listing_iter_re, data, re.DOTALL | re.VERBOSE): item = self.video_item() item['title'] = m.group('title').strip() #item['title'] = "%s (%s)" % (m.group('title').strip(), m.group('date').strip()) item['url'] = m.group('url') self._filter(result, item) pager_data = util.substr(page,'<div class="paginator">', '</div>') next_page_match = re.search(r'<li class="next"><a href="(?P<url>[^"]+)', pager_data) if next_page_match: item = self.dir_item() item['type'] = 'next' next_url = next_page_match.group('url').replace('&','&') # ta3.com gives invalid page urls for publicistika if "publicistika.html" in url: purl = urlparse.urlparse(url) pnext_url = urlparse.urlparse(next_url) next_url = "http://" + purl.netloc + purl.path + "?" + pnext_url.query item['url'] = next_url self._filter(result, item) return result
def list_content(self, page): result = [] data = util.substr(page, LISTING_START, LISTING_END) for m in re.finditer(LISTING_ITER_RE, data, re.DOTALL | re.VERBOSE): item = self.video_item() item['title'] = "%s - (%s)" % (m.group('title').strip(), m.group('date').strip()) item['img'] = m.group('img') item['url'] = m.group('url') if 'section_title' in m.groupdict() and 'section_url' in m.groupdict(): item['menu'] = {"Sekcia - "+m.group('section_title'):{'list':m.group('section_url'), 'action-type':'list'}} self._filter(result, item) pager_data = util.substr(page, PAGER_START, PAGER_END) for m in re.finditer("<a.+?</a>", pager_data, re.DOTALL): p = re.search(PAGE_PREV_RE, m.group(), re.DOTALL) n = re.search(PAGE_NEXT_RE, m.group(), re.DOTALL) if p: item = self.dir_item() item['type'] = 'prev' item['url'] = p.group('url') result.append(item) if n: item = self.dir_item() item['type'] = 'next' item['url'] = n.group('url') result.append(item) return result
def list(self,url): result = [] url = self._url(url) print url page = util.request(url) data = util.substr(page,'<div id=\"archive','end #archive') pattern = '<div class=\"cover\"><a href=\"(?P<url>[^\"]+)(.+?)title=\"(?P<name>[^\"]+)(.+?)<img src=\"(?P<logo>[^\"]+)(.+?)<p class=\"postmetadata\">(?P<fired>[^<]+)(.+?)<p>(?P<plot>[^<]+)' for m in re.finditer(pattern, data, re.IGNORECASE | re.DOTALL): name = ('%s - %s' % (m.group('name'),m.group('fired').replace('/',''))).strip() item = self.video_item() item['title'] = name item['img'] = m.group('logo') item['plot'] = m.group('plot') item['url'] = m.group('url') self._filter(result,item) data = util.substr(page,'<div class=\"navigation\">','</div>') type = 'next' for m in re.finditer('<a href=\"(.+?)(?P<page>/page/[\d]+)',data,re.IGNORECASE | re.DOTALL): item = self.dir_item() item['type'] = type if type == 'next': type = 'prev' item['url'] = m.group('page') result.append(item) return result
def list(self, url): if url.find("#fm#") == 0: return self.list_folder(url[5:]) url = self._url(url) page = util.request( url, headers={"X-Requested-With": "XMLHttpRequest", "Referer": url, "Cookie": "uloz-to-id=1561277170;"} ) script = util.substr(page, "var kn", "</script>") keymap = None key = None k = re.search('{([^\;]+)"', script, re.IGNORECASE | re.DOTALL) if k: keymap = json.loads("{" + k.group(1) + '"}') j = re.search('kapp\(kn\["([^"]+)"', script, re.IGNORECASE | re.DOTALL) if j: key = j.group(1) if not (j and k): self.error("error parsing page - unable to locate keys") return [] burl = b64decode("I2h0dHA6Ly9kZWNyLWNlY2gucmhjbG91ZC5jb20vZGVjcnlwdC8/a2V5PSVzJnZhbHVlPSVz") murl = b64decode("aHR0cDovL2RlY3ItY2VjaC5yaGNsb3VkLmNvbS9kZWNyeXB0Lw==") data = util.substr(page, '<ul class="chessFiles', "</ul>") result = [] req = {"seed": keymap[key], "values": keymap} decr = json.loads(util.post_json(murl, req)) for li in re.finditer('<li data-icon="(?P<key>[^"]+)', data, re.IGNORECASE | re.DOTALL): body = urllib.unquote(b64decode(decr[li.group("key")])) m = re.search( '<li>.+?<div data-icon="(?P<key>[^"]+)[^<]+<img(.+?)src="(?P<logo>[^"]+)(.+?)<div class="fileInfo(?P<info>.+?)</h4>', body, re.IGNORECASE | re.DOTALL, ) if not m: continue value = keymap[m.group("key")] info = m.group("info") iurl = burl % (keymap[key], value) item = self.video_item() item["title"] = ".. title not found.." title = re.search('<div class="fileName.+?<a[^>]+>(?P<title>[^<]+)', info, re.IGNORECASE | re.DOTALL) if title: item["title"] = title.group("title") size = re.search('<span class="fileSize[^>]+>(?P<size>[^<]+)', info, re.IGNORECASE | re.DOTALL) if size: item["size"] = size.group("size").strip() time = re.search('<span class="fileTime[^>]+>(?P<time>[^<]+)', info, re.IGNORECASE | re.DOTALL) if time: item["length"] = time.group("time") item["url"] = iurl item["img"] = m.group("logo") self._filter(result, item) # page navigation data = util.substr(page, '<div class="paginator', "</div") mnext = re.search('<a href="(?P<url>[^"]+)" class="next', data) if mnext: item = self.dir_item() item["type"] = "next" item["url"] = mnext.group("url") result.append(item) return result
def filmoteka(p): if p["filmoteka"] == "": data = login() if data: userid = get_userid(data) if userid: page = util.request(furl(userid + "filmoteka")) data = util.substr(page, '<select name="filter', "</select>") for m in re.finditer( '<option value="(?P<value>[^"]+)[^>]+>(?P<name>[^<]+)', data, re.DOTALL | re.IGNORECASE ): p["filmoteka"] = userid + "filmoteka/filtr-" + m.group("value") xbmcutil.add_dir(m.group("name"), p) xbmcplugin.endOfDirectory(int(sys.argv[1])) else: page = login(p["filmoteka"]) data = util.substr(page, '<table class="ui-table-list', "</table") results = [] for m in re.finditer( '<tr[^<]+<td>(?P<added>[^<]+)</td[^<]+<td[^<]+<a href="(?P<url>[^"]+)[^>]+>(?P<name>[^<]+)', data, re.DOTALL | re.IGNORECASE, ): results.append((m.group("url"), m.group("name"))) if preload(): return preload_items(results) add_items(results) xbmcplugin.endOfDirectory(int(sys.argv[1]))
def list_movie_recently_added(self, url): result = [] page = self.get_data_cached(url) data = util.substr(page, '<div class=\"content\"', '</ul>') for m in re.finditer( '<a class=\"content-block\" href=\"(?P<url>[^\"]+)\" title=\"(?P<name>[^\"]+)', data, re.IGNORECASE | re.DOTALL): item = self.video_item() item['url'] = m.group('url') item['title'] = m.group('name') item['menu'] = {"[B][COLOR red]Add to library[/COLOR][/B]": { 'url': m.group('url'), 'action': 'add-to-library', 'name': m.group('name')}} self._filter(result, item) paging = util.substr(page, '<div class=\"pagination\"', '</div') next = re.search('<li class=\"next[^<]+<a href=\"\?page=(?P<page>\d+)', paging, re.IGNORECASE | re.DOTALL) if next: next_page = int(next.group('page')) current = re.search('\?page=(?P<page>\d)', url) current_page = 0 if next_page > 5: return result if current: current_page = int(current.group('page')) if current_page < next_page: url = re.sub('\?.+?$', '', url) + '?page=' + str(next_page) result += self.list_movie_recently_added(url) return result
def film(self,page): result=[] data = util.substr(page,self.od,self.do) pattern='background: url\((?P<img>.+?)\)[\s|\S]*?<h2><a href=\"(?P<url>.+?)\".[^>]+>(?P<name>.+?)</a></h2>' for m in re.finditer(pattern,data,re.IGNORECASE | re.DOTALL ): item = self.video_item() item['url'] = m.group('url') item['title'] = m.group('name') item['img'] = m.group('img') result.append(item) data = util.substr(page,'<div class=\'wp-pagenavi\'>','<div id=\"sidebar\">') pattern='<span class=\'pages\'>(?P<pg>.+?)</span>' m = re.search(pattern, data, re.IGNORECASE | re.DOTALL) last_page='' if not m == None: last_page=m.group('pg').split(' ') next_page=int(last_page[1])+1 last_page=last_page[-1] pattern = 'href=\"(?P<url>.+?)\".?><div class=\"next"></div>' m = re.search(pattern, data, re.IGNORECASE | re.DOTALL) next_url='' if not m == None: next_url=m.group('url') try: item = self.dir_item() item['title'] = 'Přejít na stranu '+ str(next_page)+' z '+ str(last_page) item['url'] = '#film#'+next_url result.append(item) except: pass return result
def resolve(self,item,captcha_cb=None,select_cb=None): item = item.copy() url = self._url(item['url']).replace('×', '%c3%97') data = util.substr(util.request(url), '<div id=\"content\"', '#content') visionone_resolved, onevision_resolved, scz_resolved = [],[],[] onevision = re.search('(?P<url>http://onevision\.ucoz\.ua/[^<]+)', data, re.IGNORECASE) if onevision: onevision_data = util.substr(util.request(onevision.group('url')),'<td class=\"eText\"','<td class=\"rightColumn\"') onevision_resolved=resolver.findstreams(onevision_data, ['<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]', '<object.*?data=(?P<url>.+?)</object>']) visionone = re.search('(?P<url>http://visionone\.ucoz\.ru/[^<]+)', data, re.IGNORECASE) if visionone: visionone_data = util.substr(util.request(visionone.group('url')),'<td class=\"eText\"','<td class=\"rightColumn\"') visionone_resolved = resolver.findstreams(visionone_data, ['<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]', '<object.*?data=(?P<url>.+?)</object>']) scz = re.search('(?P<url>http://scz\.uvadi\.cz/\?p=[\d]+)', data, re.IGNORECASE) if scz: scz_data = util.substr(util.request(scz.group('url')),'<div id=\"content\"', '#content') scz_resolved = resolver.findstreams(scz_data, ['<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]', '<object.*?data=(?P<url>.+?)</object>']) serialy_resolved = resolver.findstreams(data, ['<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]', '<object.*?data=(?P<url>.+?)</object>', '<p><code><strong>(?P<url>http.+?)</strong></code></p>', '<p><code><strong><big>(?P<url>.+?)</big></strong></code></p>']) resolved = [] resolved+= serialy_resolved or [] resolved+= visionone_resolved or [] resolved+= onevision_resolved or [] resolved+= scz_resolved or [] resolved = len(resolved) > 0 and resolved or None result = [] for i in resolved: item = self.video_item() item['title'] = i['name'] item['url'] = i['url'] item['quality'] = i['quality'] item['surl'] = i['surl'] item['headers'] = i['headers'] result.append(item) if len(result) == 1: return result[0] elif len(result) > 1 and select_cb: return select_cb(result)
def list(self,url): if url.find('#fm#') == 0: return self.list_folder(url[5:]) url = self._url(url) page = util.request(url,headers={'X-Requested-With':'XMLHttpRequest','Referer':url,'Cookie':'uloz-to-id=1561277170;'}) script = util.substr(page,'var kn','</script>') keymap = None key = None k = re.search('{([^\;]+)"',script,re.IGNORECASE | re.DOTALL) if k: keymap = json.loads("{"+k.group(1)+"\"}") j = re.search('kapp\(kn\[\"([^\"]+)"',script,re.IGNORECASE | re.DOTALL) if j: key = j.group(1) if not (j and k): self.error('error parsing page - unable to locate keys') return [] burl = b64decode('I2h0dHA6Ly9jcnlwdG8uamV6em92by5uZXQvZGVjcnlwdC8/a2V5PSVzJnZhbHVlPSVzCg==') murl = b64decode('aHR0cDovL2NyeXB0by5qZXp6b3ZvLm5ldC9kZWNyeXB0Lwo=') murl = 'http://crypto.jezzovo.net/decrypt/' data = util.substr(page,'<ul class=\"chessFiles','</ul>') result = [] req = {'seed':keymap[key],'values':keymap} decr = json.loads(util.post_json(murl,req)) for li in re.finditer('<li data-icon=\"(?P<key>[^\"]+)',data, re.IGNORECASE | re.DOTALL): body = urllib.unquote(b64decode(decr[li.group('key')])) m = re.search('<li>.+?<div data-icon=\"(?P<key>[^\"]+)[^<]+<img(.+?)src=\"(?P<logo>[^\"]+)(.+?)<div class=\"fileInfo(?P<info>.+?)</h4>',body, re.IGNORECASE | re.DOTALL) if not m: continue value = keymap[m.group('key')] info = m.group('info') iurl = burl % (keymap[key],value) item = self.video_item() item['title'] = '.. title not found..' title = re.search('<div class=\"fileName.+?<a[^>]+>(?P<title>[^<]+)',info, re.IGNORECASE|re.DOTALL) if title: item['title'] = title.group('title') size = re.search('<span class=\"fileSize[^>]+>(?P<size>[^<]+)',info, re.IGNORECASE|re.DOTALL) if size: item['size'] = size.group('size').strip() time = re.search('<span class=\"fileTime[^>]+>(?P<time>[^<]+)',info, re.IGNORECASE|re.DOTALL) if time: item['length'] = time.group('time') item['url'] = iurl item['img'] = m.group('logo') self._filter(result,item) # page navigation data = util.substr(page,'<div class=\"paginator','</div') mnext = re.search('<a href=\"(?P<url>[^\"]+)\" class="next',data) if mnext: item = self.dir_item() item['type'] = 'next' item['url'] = mnext.group('url') result.append(item) return result
def resolve(self,item,captcha_cb=None,select_cb=None): item = item.copy() url = self._url(item['url']).replace('×', '%c3%97') data = util.substr(util.request(url), '<div id=\"content\"', '#content') for script in re.finditer('<script.+?src=\"([^\"]+)',data,re.IGNORECASE|re.DOTALL): try: data += util.request(script.group(1)).replace('\\\"','\"') except: pass util.init_urllib() # need to reinitialize urrlib, because anyfiles could have left some cookies visionone_resolved, onevision_resolved, scz_resolved = [],[],[] onevision = re.search('(?P<url>http://onevision\.ucoz\.ua/[^<]+)', data, re.IGNORECASE) if onevision: onevision_data = util.substr(util.request(onevision.group('url')),'<td class=\"eText\"','<td class=\"rightColumn\"') onevision_resolved=self.findstreams(onevision_data, ['<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]', '<object.*?data=(?P<url>.+?)</object>']) visionone = re.search('(?P<url>http://visionone\.ucoz\.ru/[^<]+)', data, re.IGNORECASE) if visionone: visionone_data = util.substr(util.request(visionone.group('url')),'<td class=\"eText\"','<td class=\"rightColumn\"') visionone_resolved = self.findstreams(visionone_data, ['<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]', '<object.*?data=(?P<url>.+?)</object>']) scz = re.search('(?P<url>http://scz\.uvadi\.cz/\?p=[\d]+)', data, re.IGNORECASE) if scz: scz_data = util.substr(util.request(scz.group('url')),'<div id=\"content\"', '#content') scz_resolved = self.findstreams(scz_data, ['<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]', '<object.*?data=(?P<url>.+?)</object>']) serialy_resolved = self.findstreams(data, ['<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\'](?P<url>.+?)[\'\"]', '<object.*?data=(?P<url>.+?)</object>', '<p><code><strong>(?P<url>http.+?)</strong></code></p>', '<p><code><strong><big>(?P<url>.+?)</big></strong></code></p>']) resolved = [] resolved+= serialy_resolved or [] resolved+= visionone_resolved or [] resolved+= onevision_resolved or [] resolved+= scz_resolved or [] resolved = len(resolved) > 0 and resolved or None if len(resolved) == 1: return resolved[0] elif len(resolved) > 1 and select_cb: return select_cb(resolved)
def list(self,url): if url.find('#fm#') == 0: return self.list_folder(url[5:]) url = self._url(url) page = util.request(url,headers={'X-Requested-With':'XMLHttpRequest','Referer':url,'Cookie':'uloz-to-id=1561277170;'}).decode('string-escape') script = util.substr(page,'var kn','</script>') keymap = None key = None k = re.search(r'({.+?})',script) if k: keymap = util.json.loads(k.group(1)) j = re.search(r'ad.push\(\[kn, kn\["([^"]+)', script) if j: key = j.group(1) if not (j and k): self.error('error parsing page - unable to locate keys') return [] burl = b64decode('I2h0dHA6Ly9kZWNyLWNlY2gucmhjbG91ZC5jb20vZGVjcnlwdC8/a2V5PSVzJnZhbHVlPSVz') murl = b64decode('aHR0cDovL2RlY3ItY2VjaC5yaGNsb3VkLmNvbS9kZWNyeXB0Lw==') result = [] req = {'seed':keymap[key],'values':keymap} decr = json.loads(util.post_json(murl,req)) for li in re.finditer('<div data-icon=\"(?P<key>[^\"]+)',page, re.IGNORECASE | re.DOTALL): body = urllib.unquote(b64decode(decr[li.group('key')])) div_name = util.substr(body, '<div class="name"', '</div>') title_url_match = re.search(r'<a href="(?P<url>[^"]+)" title="(?P<title>[^"]+)', div_name) if not title_url_match: continue item = self.video_item() item['title'] = title_url_match.group('title') item['url'] = title_url_match.group('url') div_media = util.substr(body, 'div class="media"', '<div class="tools">') img_match = re.search(r'img src="([^"]+)', div_media) if img_match: item['img'] = "http:" + img_match.group(1) time_match = re.search(r'<span>Čas</span>(.+)', div_media) if time_match: item['length'] = time_match.group(1).strip() size_match = re.search(r'<span>Velikost</span>([^<]+)', div_media) if size_match: item['size'] = size_match.group(1).strip() self._filter(result,item) # page navigation data = util.substr(page,'<div class=\"paginator','</div') mnext = re.search('<a href=\"(?P<url>[^\"]+)\" class="next',data) if mnext: item = self.dir_item() item['type'] = 'next' item['url'] = util.decode_html(mnext.group('url')) result.append(item) return result
def list_subcategories(self, page, category_name): result = [] data = util.substr(page, CATEGORIES_START, CATEGORIES_END) data = util.substr(data, category_name, CATEGORIES_END) for m in re.finditer(CATEGORIES_ITER_RE, data, re.IGNORECASE | re.DOTALL): if not m.group('type').strip().startswith('child'): break item = self.dir_item() item['title'] = m.group('title') item['url'] = m.group('url') self._filter(result, item) return result
def list(self,url): if url.find('#fm#') == 0: return self.list_folder(url[5:]) url = self._url(url) page = util.request(url,headers={'X-Requested-With':'XMLHttpRequest','Referer':url,'Cookie':'uloz-to-id=1561277170;'}) script = util.substr(page,'var kn','</script>') keymap = None key = None k = re.search('{([^\;]+)"',script,re.IGNORECASE | re.DOTALL) if k: keymap = json.loads("{"+k.group(1)+"\"}") j = re.search('kapp\(kn\[\"([^\"]+)"',script,re.IGNORECASE | re.DOTALL) if j: key = j.group(1) if not (j and k): self.error('error parsing page - unable to locate keys') return [] burl = b64decode('I2h0dHA6Ly9jcnlwdG8tenNlcnYucmhjbG91ZC5jb20vYXBpL3YyL2RlY3J5cHQvP2tleT0lcyZ2YWx1ZT0lcwo=') data = util.substr(page,'<ul class=\"chessFiles','</ul>') result = [] for m in re.finditer('<li>.+?<div data-icon=\"(?P<key>[^\"]+)[^<]+<img(.+?)src=\"(?P<logo>[^\"]+)(.+?)alt=\"(?P<name>[^\"]+)(.+?)<div class=\"fileInfo(?P<info>.+?)</div>',data, re.IGNORECASE | re.DOTALL): info = m.group('info') value = keymap[m.group('key')] iurl = burl % (keymap[key],value) item = self.video_item() item['title'] = m.group('name') size = re.search('<span class=\"fileSize[^>]+>(?P<size>[^<]+)',info, re.IGNORECASE|re.DOTALL) if size: item['size'] = size.group('size').strip() time = re.search('<span class=\"fileTime[^>]+>(?P<time>[^<]+)',info, re.IGNORECASE|re.DOTALL) if time: item['length'] = time.group('time') item['url'] = iurl item['img'] = m.group('logo') self._filter(result,item) # page navigation data = util.substr(page,'<div class=\"paginator','</div') mprev = re.search('<a href=\"(?P<url>[^\"]+)\" class=\"prev',data) if mprev: item = self.dir_item() item['type'] = 'prev' item['url'] = mprev.group('url') result.append(item) mnext = re.search('<a href=\"(?P<url>[^\"]+)\" class="next',data) if mnext: item = self.dir_item() item['type'] = 'next' item['url'] = mnext.group('url') result.append(item) return result
def categories(self): result = [] item = self.dir_item() item['type'] = 'new' item['url'] = 'category/new-episode' result.append(item) data = util.substr(util.request(self.base_url),'<div id=\"primary\"','<div id=\"secondary') pattern='<a href=\"(?P<url>[^\"]+)[^>]+>(?P<name>[^<]+)</a>' for m in re.finditer(pattern, util.substr(data,'Seriály</a>','</ul>'), re.IGNORECASE | re.DOTALL): item = self.dir_item() item['title'] = m.group('name') item['url'] = m.group('url') result.append(item) return result
def login(self): if self.username and self.password and len(self.username) > 0 and len(self.password) > 0: self.info("Login user=%s, pass=*****" % self.username) self.rh.throw = False page = util.request(self.base_url + "login?key=logreg") if page.find('href="/?do=web-logout') > 0: self.info("Already logged in") return True data = util.substr(page, '<li class="menu-username', "</li") m = re.search('key=(?P<key>[^"]+)"', data, re.IGNORECASE | re.DOTALL) token = re.search('<input type="hidden" name="_token_".+?value="([^"]+)"', page, re.IGNORECASE | re.DOTALL) if m and token: login_url = self.base_url + "login?key=" + m.group("key") + "&do=loginForm-submit" data = util.post( login_url, { "username": self.username, "password": self.password, "remember": "on", "login": "******", "_token_": token.group(1), }, ) if data.find('href="/?do=web-logout') > 0: self.info("Login successfull") return True self.info("Login failed") return False
def list_tv_show(self, url): result = [] page = util.request(url) data = util.substr(page, '<div class=\"content\">', '<script') for s in re.finditer('<strong.+?</ul>', data, re.IGNORECASE | re.DOTALL): serie = s.group(0) serie_name = re.search('<strong>([^<]+)', serie).group(1) for e in re.finditer('<li.+?</li>', serie, re.IGNORECASE | re.DOTALL): episode = e.group(0) item = self.video_item() ep_name = re.search('<a href=\"#[^<]+<span>(?P<id>[^<]+)</span>(?P<name>[^<]+)', episode) if ep_name: item['title'] = '%s %s %s' % ( serie_name, ep_name.group('id'), ep_name.group('name')) item['epname'] = ep_name.group('name') item['ep'] = ep_name i = re.search('<div class=\"inner-item[^<]+<img src=\"(?P<img>[^\"]+).+?<a href=\"' '(?P<url>[^\"]+)', episode, re.IGNORECASE | re.DOTALL) if i: item['img'] = self._url(i.group('img')) item['url'] = i.group('url') if i and ep_name: self._filter(result, item) if self.reverse_eps: result.reverse() return result
def list_episodes(self, page): result = [] episodes = [] page = util.substr(page, START_LISTING, END_LISTING) current_date = to_unicode(re.search(LISTING_DATE_RE, page, re.IGNORECASE | re.DOTALL).group('date')) self.info("<list_episodes> current_date: %s" % current_date) prev_url = re.search(LISTING_PAGER_RE, page, re.IGNORECASE | re.DOTALL).group('prevurl') prev_url = re.sub('&', '&', prev_url) #self.info("<list_episodes> prev_url: %s" % prev_url) for m in re.finditer(LISTING_ITER_RE, page, re.IGNORECASE | re.DOTALL): episodes.append([self._fix_url(re.sub('&', '&', m.group('url'))), m]) self.info("<list_episodes> found %d episodes" % len(episodes)) res = self._request_parallel(episodes) for p, m in res: m = m[0] dnum = to_unicode(m.group('daynum')) item = self.list_episode(p) item['title'] = "%s (%s. %s)" % (item['title'], dnum, current_date) item['date'] = dnum item['url'] = re.sub('&', '&', m.group('url')) self._filter(result, item) result.sort(key=lambda x:int(x['date']), reverse=True) item = self.dir_item() item['type'] = 'prev' item['url'] = prev_url self._filter(result, item) return result
def resolve(self, item, captcha_cb=None, select_cb=None): result = [] item = item.copy() url = self._url(item['url']) data = util.substr(util.request(url), '<embed id', '>') yurl_m = re.search('file=.*?(http[^&]+)&',data,re.DOTALL) yurl = yurl_m and re.sub('youtu.be/','www.youtube.com/watch?v=',yurl_m.group(1)) or '' resolved = resolver.findstreams(yurl, ['(?P<url>[^&]+)']) subs = re.search('captions\.file=([^&]+)', data, re.DOTALL) if resolved and subs: for i in resolved: i['subs'] = self._url(subs.group(1)) if not resolved: raise ResolveException('Video nenalezeno') for i in resolved: item = self.video_item() item['title'] = i['title'] item['url'] = i['url'] item['quality'] = i['quality'] item['surl'] = i['surl'] item['subs'] = i['subs'] item['headers'] = i['headers'] try: item['fmt'] = i['fmt'] except KeyError: pass print item result.append(item) if len(result) == 0: return result[0] return select_cb(result)
def list(self,url): result = [] page = util.request(self._url(url)) data = util.substr(page,'<div class=\"search','<footer') for m in re.finditer('<div class=\"search-result-box(.+?)</a>',data,re.IGNORECASE | re.DOTALL ): it = m.group(1) link = re.search('<a href=([^ ]+)',it,re.IGNORECASE | re.DOTALL) name = re.search('title=\"([^\"]+)',it,re.IGNORECASE | re.DOTALL) img = re.search('<img src=\"([^\"]+)',it,re.IGNORECASE | re.DOTALL) size = re.search('<div class=\"fs\">([^<]+)',it,re.IGNORECASE | re.DOTALL) time = re.search('<div class=\"vd\">([^<]+)',it,re.IGNORECASE | re.DOTALL) if name and link: item = self.video_item() item['title'] = name.group(1) if size: item['size'] = size.group(1).strip() if time: item['length'] = time.group(1).strip() item['url'] = self._url(link.group(1)) item['img'] = self._url(img.group(1)) self._filter(result,item) next = re.search('<a href=\"(?P<url>[^\"]+)[^>]+>dal',data,re.IGNORECASE | re.DOTALL) if next: item = self.dir_item() item['type'] = 'next' item['url'] = next.group('url') result.append(item) return result
def resolve(self,item,captcha_cb=None,select_cb=None): item = item.copy() url = self._url(item['url']) data = util.request(self._url(item['url'])) data = util.substr(data,'<div class=\"video','</div') sosac = re.search('\"(http\://[\w]+\.sosac\.ph[^\"]+)',data,re.DOTALL) if sosac: data = util.request(sosac.group(1)) resolved = resolver.findstreams(data,[ '<embed( )*flashvars=\"file=(?P<url>[^\"]+)', '<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\' ](?P<url>.+?)[\'\" ]', ]) result = [] if not resolved: self.error('Nothing resolved') for i in resolved: item = self.video_item() item['title'] = i['name'] item['url'] = i['url'] item['quality'] = i['quality'] item['surl'] = i['surl'] item['subs'] = i['subs'] result.append(item) if len(result)==1: return result[0] elif len(result) > 1 and select_cb: return select_cb(result)
def resolve(self, item, captcha_cb=None, select_cb=None): item = item.copy() url = self._url(item['url']) data = util.request(self._url(item['url'])) data = util.substr(data, '<div class=\"video', '</div') sosac = re.search('\"(http\://[\w]+\.sosac\.[^\"]+)', data, re.DOTALL) if sosac: sosac = HTMLParser.HTMLParser().unescape(sosac.group(1)) self.info("Reading sosac URL " + sosac) data = util.request(sosac, headers=HDRS) result = self.findstreams(data, [ '<embed( )*flashvars=\"file=(?P<url>[^\"]+)', '<embed( )src=\"(?P<url>[^\"]+)', '<object(.+?)data=\"(?P<url>[^\"]+)', '<iframe(.+?)src=[\"\' ](?P<url>.+?)[\'\" ]', '<object.*?data=(?P<url>.+?)</object>' ]) if len(result) == 1: return result[0] elif len(result) > 1 and select_cb: return select_cb(result)
def _categories(self, page, url): data = util.substr(page, '<ul id=\"menu_kategorie', '</ul') prefix = '' mask = '[B]%s[/B]' if url.find('serialy') >= 0: prefix = '#show#' mask = '%s' result = [] for m in re.finditer( '<a href=\"(?P<url>[^\"]+)[^<]+<span[^>]*>(?P<name>[^<]+)', data, re.IGNORECASE | re.DOTALL): item = self.dir_item() item['title'] = mask % m.group('name') item['url'] = prefix + m.group('url') result.append(item) # sort this result = sorted(result, key=lambda i: i['title']) if prefix == '': # when listing movie categories, we also list movies on 'main' page return result + self.list_page(page, '<!-- Movies', '</section') return result
def __init__(self, master, row, group): self.row = row self.group = group self._value = tk.IntVar(value=1 if group.valid else 0) self.visible = True bgcolor = BGCOLORS[row % 2] if group.valid else 'red' ft = font.Font(size=11) sticky = tk.N + tk.E + tk.S + tk.W logger.debug("%s,%s,%s,%s,%s,%s", group.bdz, group.zxl, group.fzxl, group.tq, group.name, substr(group.name, 18)) self.ckb = tk.Checkbutton( master, variable=self._value, bg=bgcolor, pady=3, state=tk.NORMAL if group.valid else tk.DISABLED) self.lb_bdz = tk.Label(master, text=substr(group.bdz, 11), width=13, bg=bgcolor, font=ft) self.lb_zxl = tk.Label(master, text=substr(group.zxl, 11), width=13, bg=bgcolor, font=ft) self.lb_fzxl = tk.Label(master, text=substr(group.fzxl, 11), width=13, bg=bgcolor, font=ft) self.lb_tq = tk.Label(master, text=substr(group.tq, 11), width=13, bg=bgcolor, font=ft) self.lb_name = tk.Label(master, text=substr(group.name, 22), width=25, bg=bgcolor, font=ft) self.columns = [ self.ckb, self.lb_bdz, self.lb_zxl, self.lb_fzxl, self.lb_tq, self.lb_name ] for i, c in enumerate(self.columns): c.grid(row=row, column=i, sticky=sticky)
def list_az(self, page): result = [] images = [] page = util.substr(page, START_AZ, END_AZ) for m in re.finditer(AZ_ITER_RE, page, re.IGNORECASE | re.DOTALL): img = { 'remote': self._fix_url(m.group('img')), 'local': self._get_image_path(self._fix_url(m.group('img'))) } item = self.dir_item() semicolon = m.group('title').find(':') if semicolon != -1: item['title'] = m.group('title')[:semicolon].strip() else: item['title'] = m.group('title') item['img'] = img['local'] item['url'] = m.group('url') self._filter(result, item) images.append(img) self._get_images(images) return result
def categories(self): result = [] item = self.dir_item() item['type'] = 'new' item['url'] = "?orderby=post_date" result.append(item) item = self.dir_item() item['title'] = 'Top of the month' item['url'] = "zebricky/mesic/vse" result.append(item) data = util.request(self.base_url) data = util.substr(data, '<ul class=\"nav categories m-b\">', '</div>') pattern = '<a href=\"(?P<url>[^\"]+)(.+?)>(?P<name>[^<]+)' for m in re.finditer(pattern, data, re.IGNORECASE | re.DOTALL): if m.group('url') == '/': continue item = self.dir_item() item['title'] = m.group('name') item['url'] = m.group('url') result.append(item) return result
def getSavedPlaylists(pathCookies): session = initSession(pathCookies) loginInfo = google.getLoginInfo(session) result = [] content = session.get( youtubeUrl + 'channel/' + loginInfo['channel'] + '/playlists' + '?' + urllib.urlencode({'sort': 'dd', 'view_as': 'subscriber', 'view': '52', 'shelf_id': '0'})).text dummy, i = util.substr ('[{"gridRenderer":{"items"',':',content) data = json.loads(util.parseBrackets(content, i, ['[',']'])) for item in data: try: count = item['gridPlaylistRenderer']['videoCountShortText']['simpleText'] except: count = '' result.append({ 'id': item['gridPlaylistRenderer']['playlistId'], 'name': item['gridPlaylistRenderer']['title']['runs'][0]['text'], 'thumb': item['gridPlaylistRenderer']['thumbnail']['thumbnails'][0]['url'], 'count': count, 'privacy': 'Public', 'user': '******' }) return (result)
def url(url): m = _regex(url) if not m == None: data = util.substr(util.request(url), 'class=\"proform\"', '</form>') #print data form_values = {} pattern = '<input.+?name="(?P<name>.*?)".+?value="(?P<value>.*?)"' for n in re.finditer(pattern, data, re.IGNORECASE | re.DOTALL): form_values[n.group('name')] = n.group('value') #print form_values try: #time.sleep(10) resp = util.post(url, form_values) except: util.error('streamcloud: got http error fetching %s' % (url)) return False r = re.search('file: "(.+?)",', resp) if r: return [r.group(1)]
def list_serie(self,url): result = [] page = util.request(self._url(url)) data = util.substr(page,'<div class=\"content\">','<script') for s in re.finditer('<strong.+?</ul>',data,re.IGNORECASE | re.DOTALL): serie = s.group(0) serie_name = re.search('<strong>([^<]+)',serie).group(1) for e in re.finditer('<li.+?</li>',serie,re.IGNORECASE | re.DOTALL): episode = e.group(0) item = self.video_item() ep_name = re.search('<a href=\"#[^<]+<span>(?P<id>[^<]+)</span>(?P<name>[^<]+)',episode) if ep_name: item['title'] = '%s %s %s' % (serie_name,ep_name.group('id'),ep_name.group('name')) i = re.search('<div class=\"inner-item[^<]+<img src=\"(?P<img>[^\"]+).+?<a href=\"(?P<url>[^\"]+)',episode, re.IGNORECASE | re.DOTALL) if i: item['img'] = self._url(i.group('img')) item['url'] = i.group('url') if i and ep_name: self._filter(result,item) if self.reverse_eps: result.reverse() return result
def process_category(title, url): page = util.request(url) img_m = re.search(CATEGORY_IMG_RE, page, re.IGNORECASE) img = img_m and self._url(img_m.group(1)) page = util.substr(page, LISTING_START, LISTING_END) finditer = False for m in re.finditer(LISTING_ITER_RE, page, re.DOTALL | re.IGNORECASE): finditer = True # payed content if m.group('playimg').find(YELLOW_IMG) != -1: return break # no links if not finditer: return item = self.dir_item() item['title'] = title item['url'] = url item['img'] = img with lock: self._filter(list, item)
def list_favourites(self,url): url = self._url(url) page = util.request(url) data = util.substr(page,'<div class=\"file-list file-list-vertical','<div id=\"layout-push') result = [] for m in re.finditer('<div class=\"file-entry.+?<div class="preview.+?<div class=\"data.+?</div>',data, re.IGNORECASE|re.DOTALL): entry = m.group(0) item = self.video_item() murl = re.search('<[hH]3><a href=\"(?P<url>[^\"]+)[^>]+>(?P<name>[^<]+)',entry) item['url'] = murl.group('url') item['title'] = murl.group('name') mimg = re.search('<img src=\"(?P<img>[^\"]+)',entry) if mimg: item['img'] = mimg.group('img') msize = re.search('<span class=\"file-size[^>]+>(?P<size>[^<]+)',entry) if msize: item['size'] = msize.group('size').strip() mtime = re.search('<span class=\"duration[^>]+>(?P<time>[^<]+)',entry) if mtime: item['length'] = mtime.group('time').strip() self._filter(result,item) return result
def categories(self): result = [] item = self.dir_item() item['type'] = 'new' item['url'] = "#new#" result.append(item) item = self.dir_item() item['type'] = 'top' item['url'] = "#top#" result.append(item) categories = [] page = util.request(self._url('/video')) page = util.substr(page, CATEGORIES_START, CATEGORIES_END) for item in re.finditer(CATEGORIES_ITER_RE, page, re.DOTALL | re.IGNORECASE): title = item.group('title') url = self._url(item.group('url')) categories.append((title, url)) self._fill_categories_parallel(result, categories) sorted(result, key=lambda x: x['title']) return result
def getMyAlbums(pathCookies): session = initSession(pathCookies) loginInfo = google.getLoginInfo(session) result = [] content = session.get("https://photos.google.com/albums").text dummy, i = util.substr("key: 'ds:2', isError: false , hash:", "return", content) data = json.loads(util.parseBrackets(content, i, ['[', ']'])) for row in data[0]: metadata = row[12]['72930366'] ## ==9 sharedKey = metadata[5] if sharedKey is not None: if len(metadata) != 9: continue result.append({ 'id': row[0], 'sharedKey': sharedKey, 'name': metadata[1], 'thumb': row[1][0], 'tsStart': datetime.datetime.fromtimestamp(metadata[2][0] / 1000), 'tsEnd': datetime.datetime.fromtimestamp(metadata[2][1] / 1000), 'owner': loginInfo['name'], 'ownerFlag': True, 'photosCount': metadata[3], }) # resultsorted = sorted(result, key=lambda k: k['name'], reverse=True) return (result)
def list_date(self, page): result = [] images = [] page = util.substr(page, START_DATE, END_DATE) for m in re.finditer(DATE_ITER_RE, page, re.IGNORECASE | re.DOTALL): img = { 'remote': self._fix_url(m.group('img')), 'local': self._get_image_path(self._fix_url(m.group('img'))) } item = self.video_item() item['title'] = "%s (%s)" % (m.group('title'), m.group('time')) item['img'] = img['local'] item['url'] = m.group('url') item['menu'] = { '$30070': { 'list': item['url'], 'action-type': 'list' } } self._filter(result, item) images.append(img) self._get_images(images) return result
def categories(self): result = [] if os.path.isfile(self.clips_path): os.remove(self.clips_path) data = util.substr(util.request(self.base_url), CATEGORIES_START, CATEGORIES_END) for m in re.finditer(CATEGORIES_ITER_RE, data, re.DOTALL): item = self.dir_item() item['title'] = m.group('title') item['url'] = self.plurl.format(m.group('id'), 0) item['menu'] = { u'Playlist': { 'play': self.plurl.format(m.group('id'), 0), 'title': m.group('title'), 'action-type': 'play' } } result.append(item) item = self.dir_item() item['title'] = u'Historie tématických playlistů' item['url'] = 'historie' result.append(item) return result
def login(self): if self.username and self.password and len(self.username) > 0 and len( self.password) > 0: page = util.request(self.base_url + '?do=loginBox-loginpopup') if page.find('href="/?do=loginBox-logout') > 0: self.info('Already logged in') return True data = util.substr(page, '<td class=\"popup-lef', '</form') m = re.search('<form action=\"(?P<url>[^\"]+)', data, re.IGNORECASE | re.DOTALL) if m: login_url = self._url(m.group('url')).replace('&', '&') data = util.post( login_url, { 'username': self.username, 'password': self.password, 'pernament_login': '******', 'login': '******', 'redir_url': self.base_url + '?do=loginBox-login' }) if data.find('href="/?do=loginBox-logout') > 0: return True return False
def getOtherAlbums(pathCookies): session = initSession(pathCookies) loginInfo = google.getLoginInfo(session) result = [] content = session.get("https://photos.google.com/sharing").text dummy, i = util.substr("key: 'ds:1'", "return", content) data = json.loads(util.parseBrackets(content, i, ['[', ']'])) util.objToFile(str(data[0][0]), pathCookies.replace('/cookies', '/data.txt')) for row in data[0]: owner = row[10][0][11][0] if owner == loginInfo['name']: continue result.append({ 'id': row[6], 'sharedKey': row[7], 'name': row[1], 'thumb': row[2][0], 'tsStart': None, 'tsEnd': None, 'photosCount': row[3], 'tsCreated': datetime.datetime.fromtimestamp(row[4] / 1000), 'owner': row[10][0][11][0], 'ownerFlag': False }) return result
def searchVideos(searchStr, pathCookies): session = initSession(pathCookies) result = [] content = session.get( youtubeUrl + 'results' + '?' + urllib.urlencode({'search_query': searchStr, 'sp': 'EgIQAVAU'})).text dummy, i = util.substr ('"itemSectionRenderer":{"contents"',':',content) data = json.loads(util.parseBrackets(content, i, ['[',']'])) for item in data: if 'videoRenderer' not in item.keys(): continue content = { 'id': item['videoRenderer']['videoId'], 'name': item['videoRenderer']['title']['simpleText'], 'thumb': videoImage(item['videoRenderer']['videoId']), 'duration': '', 'publishedTime': '', 'viewCount': '', 'owner': '', 'privacy': 'Public', } try: content['duration'] = item['videoRenderer']['lengthText']['simpleText'] except: continue try: content['publishedTime'] = item['videoRenderer']['publishedTimeText']['simpleText'] except: None try: content['viewCount'] = item['videoRenderer']['viewCountText']['simpleText'].replace(' views','') except: None try: content['owner'] = item['videoRenderer']['ownerText']['runs'][0]['text'] except: None result.append(content) return (result)
def list(self, url): result = [] data = util.request(self._url(url)) for m in re.finditer( '<div class=mosaic-overlay[^<]+<a.+?href=\"(?P<url>[^\"]+)[^<]+<div[^>]+>(?P<name>[^<]+)(.+?)src=\"(?P<logo>[^\"]+)', data, re.IGNORECASE | re.DOTALL): item = self.video_item() item['title'] = m.group('name') item['img'] = self._url(m.group('logo')) item['url'] = m.group('url') self._filter(result, item) navurl = url index = url.find('?') if index > 0: navurl = url[:index] data = util.substr(data, '<div class=strana', '</div') next = re.search('<a href=\"(?P<url>[^\"]+)\">\(>\)<', data, re.IGNORECASE | re.DOTALL) if next: item = self.dir_item() item['type'] = 'next' item['url'] = navurl + next.group('url') result.append(item) return result
def list_page(self, page, start, end): next = re.search('<a href=\"(?P<url>[^\"]+)\" class=\"ajax\">Další', page) page = util.substr(page, start, end) result = [] for m in re.finditer('<article[^>]+(.+?)</article>', page, re.IGNORECASE | re.DOTALL): data = m.group(1) url = re.search('<a href=\"([^\"]+)', data) img = re.search( '<div class=\"img[^<]+<img src=\"(?P<img>[^\"]+).+?alt=\"(?P<name>[^\"]+)', data) if img and url: item = self.dir_item() item['url'] = '#movie#' + url.group(1) item['img'] = self._url(img.group('img')) item['title'] = img.group('name') self._filter(result, item) if next: item = self.dir_item() item['type'] = 'next' item['url'] = next.group('url').replace('&', '&') result.append(item) return result
def categories(self): result = [] item = self.dir_item() item['type'] = 'new' item['url'] = '#newest#' result.append(item) data = util.request(self.base_url + '/kategorie/') data = util.substr(data, '<ul id="cat"', '</div>') pattern = '<a href="(?P<url>[^"]+)" title="(?P<name>[^"]+)">' for m in re.finditer(pattern, data, re.IGNORECASE | re.DOTALL): if m.group('url') == '#': break item = self.dir_item() item['title'] = m.group('name') item['url'] = m.group('url') result.append(item) over18 = __addon__.getSetting('over18') if (over18 == 'true'): item = self.dir_item() item['title'] = 'erotika' item['url'] = self.base_url + '/erotika' result.append(item) return result
def getMyVideos(session): result = [] content = session.get(youtubeUrl + 'my_videos' + '?' + urllib.urlencode({'o': 'U'})).text dummy, i = util.substr('"VIDEO_LIST_DISPLAY_OBJECT"', ':', content) data = json.loads(util.parseBrackets(content, i, ['[', ']'])) for item in data: soup = BeautifulSoup( util.unescape(item['html'].decode('unicode_escape')), "html.parser") ptag = soup.find(class_="vm-video-indicators") privacy = 'Public' if not ptag.find(class_='vm-unlisted').parent.has_attr('aria-hidden'): privacy = 'Private' if not ptag.find(class_='vm-private').parent.has_attr('aria-hidden'): privacy = 'Private' try: duration = util.timeStrToSeconds( soup.find(class_="video-time").get_text()) except: duration = '' result.append({ 'id': item['id'], 'name': soup.find(class_="vm-video-title-content").get_text(), 'thumb': videoImage(item['id']), 'duration': duration, 'privacy': privacy, 'user': '******' }) return (result)
def resolve(self, item, captcha_cb=None, select_cb=None): result = [] item = item.copy() url = item['url'] if url.endswith('live.html'): url = url.replace('www.', '') channel = urlparse.urlparse(url).netloc.split('.')[0] if channel in 'plus': channel = 'jojplus' channel_quality_map = { 'joj': ('360', '540', '720'), 'jojplus': ('360', '540'), 'wau': ('360', '540') } for quality in channel_quality_map[channel]: item = self.video_item() item['quality'] = quality + 'p' item[ 'url'] = 'https://nn.geo.joj.sk/live/hls/' + channel + '-' + quality + '.m3u8' result.append(item) else: data = util.request(url) data = util.substr( data, '<section class="s-section py-0 s-video-detail">', '</section>') iframe_url = re.search('<iframe src="([^"]+)"', data).group(1) #print 'iframe_url = ', iframe_url player_str = urllib2.urlopen(iframe_url).read() #print player_str labels_str = re.search(r'var labels = {(.+?)};', player_str, re.DOTALL).group(1) #print 'labels:', labels_str renditions = re.search(r'renditions: \[(.+?)\]', labels_str).group(1).replace("'", "").split(',') #print 'renditions: ', renditions settings_str = re.search(r'var settings = {(.+?)};', player_str, re.DOTALL).group(1) #print 'settings:', settings_str poster_url = re.search(r'poster: \"(.+?)\"', settings_str).group(1) #print 'poster_url:', poster_url bitrates_str = re.search(r'var src = {(.+?)};', player_str, re.DOTALL).group(1) #print 'bitrates:', bitrates_str bitrates_url = re.search(r'"mp4": \[(.+?)\]', bitrates_str, re.DOTALL).group(1) bitrates_url = bitrates_url.replace("'", "").replace('\n', '').replace( ' ', '').split(',') for idx, url in enumerate(bitrates_url): item = self.video_item() item['img'] = poster_url item['quality'] = renditions[idx] item['url'] = url result.append(item) result.reverse() if select_cb: return select_cb(result) return result
def list_show(self, url, list_series=False, list_episodes=False): result = [] self.info("list_show %s" % (url)) data = util.request(url) if list_series: series_data = util.substr( data, r'<select onchange="return selectSeason(this.value);">', '</select>') for serie_match in re.finditer( r'<option value="(?P<season_id>\d+)?"\s(selected="selected")?>\s+(?P<title>[^<]+)\n', series_data): item = self.dir_item() season_id = serie_match.group('season_id') if not season_id: season_id = "" item['title'] = serie_match.group('title') item['url'] = "%s?seasonId=%s" % (url.split('#')[0], season_id) result.append(item) if list_episodes: episodes_data = util.substr(data, r'<section>', '</section>') for article_match in re.finditer( r'<article class="b-article title-xs article-lp">(.+?)</article>', episodes_data, re.DOTALL): article_dict = self._list_article(article_match.group(1)) if article_dict is not None: item = self.video_item() item.update(article_dict) item['title'] += ' ' + item.get('subtitle', '') result.append(item) title_to_key = { 'Dátum': 'date', 'Názov epizódy': 'title', 'Sledovanosť': 'seen', 'Séria': 'season', 'Epizóda': 'episode' } headers_match = re.search( '<div class="i head e-video-categories">(.+?)</div>', episodes_data, re.DOTALL) if headers_match is not None: headers = [] for span_match in re.finditer('<span[^>]*>([^<]+)</span>', headers_match.group(1)): key = title_to_key.get(span_match.group(1)) if key is None: print "undefined key", span_match.group(1) headers.append("") else: headers.append(key) archive_list_pattern = r'<a href="(?P<url>[^"]+)" title="(?P<title>[^"]+)[^>]+>\s+' for key in headers: if key in ("", "title"): archive_list_pattern += r'^.+?$\s+' else: archive_list_pattern += r'<span>(?P<%s>[^<]*)</span>\s+' % key for archive_list_match in re.finditer(archive_list_pattern, episodes_data, re.MULTILINE): item = self.video_item() groupdict = archive_list_match.groupdict() if 'season' in groupdict and 'episode' in groupdict: # joj sometimes don't provide season/episode numbers # for latest episodes, so mark them as 0. try: season = int(archive_list_match.group('season')) except Exception: season = 0 try: episode = int(archive_list_match.group('episode')) except Exception: episode = 0 item['title'] = "(S%02d E%02d) - %s" % ( season, episode, archive_list_match.group('title')) else: item['title'] = "(%s) - %s" % ( archive_list_match.group('date'), archive_list_match.group('title')) item['url'] = self._fix_url( archive_list_match.group('url')) result.append(item) pagination_data = util.substr(data, '<nav>', '</nav>') next_match = re.search( r'a href="(?P<url>[^"]+)" aria-label="Ďalej"', pagination_data, re.DOTALL) if next_match: item = self.dir_item() item['type'] = 'next' item['url'] = self._fix_url(next_match.group(1)) result.append(item) return result
def list(self, url): if url.find('#fm#') == 0: return self.list_folder(url[5:]) url = self._url(url) page = util.request(url, headers={ 'X-Requested-With': 'XMLHttpRequest', 'Referer': url, 'Cookie': 'uloz-to-id=1561277170;' }) script = util.substr(page, 'var kn', '</script>') keymap = None key = None k = re.search('{([^\;]+)"', script, re.IGNORECASE | re.DOTALL) if k: keymap = json.loads("{" + k.group(1) + "\"}") j = re.search('kapp\(kn\[\"([^\"]+)"', script, re.IGNORECASE | re.DOTALL) if j: key = j.group(1) if not (j and k): self.error('error parsing page - unable to locate keys') return [] burl = b64decode( 'I2h0dHA6Ly9kZWNyLWNlY2gucmhjbG91ZC5jb20vZGVjcnlwdC8/a2V5PSVzJnZhbHVlPSVz' ) murl = b64decode( 'aHR0cDovL2RlY3ItY2VjaC5yaGNsb3VkLmNvbS9kZWNyeXB0Lw==') data = util.substr(page, '<ul class=\"chessFiles', 'var kn =') result = [] req = {'seed': keymap[key], 'values': keymap} decr = json.loads(util.post_json(murl, req)) for li in re.finditer('<li data-icon=\"(?P<key>[^\"]+)', data, re.IGNORECASE | re.DOTALL): body = urllib.unquote(b64decode(decr[li.group('key')])) m = re.search( '<li.+?<div data-icon=\"(?P<key>[^\"]+)[^<]+<img(.+?)src=\"(?P<logo>[^\"]+)(.+?)<i class=\"fa fa-download(?P<info>.+?)class="fileReset"', body, re.IGNORECASE | re.DOTALL) if not m: continue value = keymap[m.group('key')] info = m.group('info') iurl = burl % (keymap[key], value) item = self.video_item() item['title'] = '.. title not found..' title = re.search( '<div class=\"fileName.+?<a[^>]+>(?P<title>[^<]+)', info, re.IGNORECASE | re.DOTALL) if title: item['title'] = title.group('title') size = re.search('<span class=\"fileSize[^>]+>(?P<size>[^<]+)', info, re.IGNORECASE | re.DOTALL) if size: item['size'] = size.group('size').strip() time = re.search('<span class=\"fileTime[^>]+>(?P<time>[^<]+)', info, re.IGNORECASE | re.DOTALL) if time: item['length'] = time.group('time') item['url'] = iurl item['img'] = m.group('logo') self._filter(result, item) # page navigation data = util.substr(page, '<div class=\"paginator', '</div') mnext = re.search('<a href=\"(?P<url>[^\"]+)\" class="next', data) if mnext: item = self.dir_item() item['type'] = 'next' item['url'] = mnext.group('url') result.append(item) return result
def resolve(self, item, captcha_cb=None, select_cb=None): item = item.copy() util.init_urllib() url = self._url(item['url']) page = '' try: opener = OpenerDirector() opener.add_handler(HTTPHandler()) opener.add_handler(UnknownHandler()) install_opener(opener) request = Request(url) request.add_header('User-Agent', util.UA) response = urlopen(request) page = response.read() response.close() except HTTPError as e: traceback.print_exc() return data = util.substr(page, '<form method=post target=\"iframe_dwn\"', '</form>') action = re.search('action=(?P<url>[^>]+)', data, re.IGNORECASE | re.DOTALL) img = re.search('<img src=\"(?P<url>[^\"]+)', data, re.IGNORECASE | re.DOTALL) if img and action: sessid = [] for cookie in re.finditer('(PHPSESSID=[^\;]+)', response.headers.get('Set-Cookie'), re.IGNORECASE | re.DOTALL): sessid.append(cookie.group(1)) # we have to download image ourselves image = util.request(self._url(img.group('url')), headers={ 'Referer': url, 'Cookie': sessid[-1] }) img_file = os.path.join(self.tmp_dir, 'captcha.png') util.save_data_to_file(image, img_file) code = None if captcha_cb: code = captcha_cb({'id': '0', 'img': img_file}) if not code: self.info('No captcha received, exit') return request = urllib.urlencode({'code': code}) req = Request(self._url(action.group('url')), request) req.add_header('User-Agent', util.UA) req.add_header('Referer', url) req.add_header('Cookie', sessid[-1]) try: resp = urlopen(req) if resp.code == 302: file_url = resp.headers.get('location') else: file_url = resp.geturl() if file_url.find(action.group('url')) > 0: msg = resp.read() resp.close() js_msg = re.search('alert\(\'(?P<msg>[^\']+)', msg, re.IGNORECASE | re.DOTALL) if js_msg: raise ResolveException(js_msg.group('msg')) self.error(msg) raise ResolveException( 'Nelze ziskat soubor, zkuste to znovu') resp.close() if file_url.find('data') >= 0 or file_url.find( 'download_free') > 0: item['url'] = file_url return item self.error('wrong captcha, retrying') return self.resolve(item, captcha_cb, select_cb) except HTTPError: traceback.print_exc() return
def resolve(self, item, captcha_cb=None, select_cb=None): original_yt = self.original_yt self.info('original_yt ' + str(original_yt) + ' ' + str(type(original_yt))) result = [] resolved = [] item = item.copy() url = self._url(item['url']) data = util.substr(util.request(url), 'async type', '</script>') # print ('data start ----') # print (data) # print ('data end ----') playlist = re.search( '''new mfJWPlayer.+?(?P<jsondata>playlist:.+?)events:''', data, re.MULTILINE | re.DOTALL) # print ('playlist start ----') # print (playlist) # print ('playlist end ----') jsondata = re.sub( ' +', ' ', '{%s' % playlist.group('jsondata').replace('file:', '"file":').replace( 'label:', '"label":').replace('kind:', '"kind":').replace( 'default:', '"default":').replace( 'true', '"true"').replace('],', ']')) + '}' # print ('jsondata start ----') # print (jsondata) # print ('jsondata end ----') jsondata = demjson.decode(jsondata) for playlist_item in jsondata['playlist']: playlist_item['file'] = playlist_item['file'].replace( 'time_continue=1&', '') #self.info(playlist_item['file']) if original_yt: e = 'watch?v=' edx = playlist_item['file'].find(e) video_id = playlist_item['file'][edx + len(e):] vid = YDStreamExtractor.getVideoInfo( playlist_item['file'], quality=3 ) #quality is 0=SD, 1=720p, 2=1080p, 3=Highest Available video_url = [vid.streams()[0]] # self.info(video_url) subs = playlist_item['tracks'] if video_url and subs: for i in video_url: i['subs'] = self.base_url[:-1] + subs[0]['file'] resolved += video_url[:] if not resolved: raise ResolveException('Video nenalezeno') for i in resolved: item = self.video_item() try: item['title'] = i['title'] except KeyError: pass item['url'] = i['xbmc_url'] if original_yt: item[ 'url'] = "plugin://plugin.video.youtube/?action=play_video&videoid=" + video_id item['quality'] = i['ytdl_format']['height'] item['surl'] = i['ytdl_format']['webpage_url'] item['subs'] = i['subs'] item['headers'] = {} # self.info(item) try: item['fmt'] = i['fmt'] except KeyError: pass result.append(item) if len(result) > 0 and select_cb: # self.info(result) return select_cb(result) # print ('==resolve==') # print (result) return result
class FastshareContentProvider(ContentProvider): def __init__(self,username=None,password=None,filter=None,tmp_dir='.'): ContentProvider.__init__(self,'fastshare.cz','http://www.fastshare.cz/',username,password,filter,tmp_dir) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.LWPCookieJar())) urllib2.install_opener(opener) def capabilities(self): return ['search','resolve'] def search(self,keyword): return self.list('?term='+urllib.quote(keyword)) def list(self,url): result = [] page = util.request(self._url(url)) data = util.substr(page,'<div class=\"search','<footer') for m in re.finditer('<div class=\"search-result-box(.+?)</a>',data,re.IGNORECASE | re.DOTALL ): it = m.group(1) link = re.search('<a href=([^ ]+)',it,re.IGNORECASE | re.DOTALL) name = re.search('title=\"([^\"]+)',it,re.IGNORECASE | re.DOTALL) img = re.search('<img src=\"([^\"]+)',it,re.IGNORECASE | re.DOTALL) size = re.search('<div class=\"fs\">([^<]+)',it,re.IGNORECASE | re.DOTALL) time = re.search('<div class=\"vd\">([^<]+)',it,re.IGNORECASE | re.DOTALL) if name and link: item = self.video_item() item['title'] = name.group(1) if size: item['size'] = size.group(1).strip() if time: item['length'] = time.group(1).strip() item['url'] = self._url(link.group(1)) item['img'] = self._url(img.group(1)) self._filter(result,item) next = re.search('<a href=\"(?P<url>[^\"]+)[^>]+>dal',data,re.IGNORECASE | re.DOTALL) if next: item = self.dir_item() item['type'] = 'next' item['url'] = next.group('url') result.append(item) return result def resolve(self,item,captcha_cb=None,select_cb=None): item = item.copy() util.init_urllib() url = self._url(item['url']) page = '' try: opener = urllib2.OpenerDirector() opener.add_handler(urllib2.HTTPHandler()) opener.add_handler(urllib2.UnknownHandler()) urllib2.install_opener(opener) request = urllib2.Request(url) request.add_header('User-Agent',util.UA) response= urllib2.urlopen(request) page = response.read() response.close() except urllib2.HTTPError, e: traceback.print_exc() return data = util.substr(page,'<form method=post target=\"iframe_dwn\"','</form>') action = re.search('action=(?P<url>[^>]+)',data,re.IGNORECASE | re.DOTALL) img = re.search('<img src=\"(?P<url>[^\"]+)',data,re.IGNORECASE | re.DOTALL) if img and action: sessid=[] for cookie in re.finditer('(PHPSESSID=[^\;]+)',response.headers.get('Set-Cookie'),re.IGNORECASE | re.DOTALL): sessid.append(cookie.group(1)) # we have to download image ourselves image = util.request(self._url(img.group('url')),headers={'Referer':url,'Cookie':sessid[-1]}) img_file = os.path.join(self.tmp_dir,'captcha.png') util.save_data_to_file(image,img_file) code = None if captcha_cb: code = captcha_cb({'id':'0','img':img_file}) if not code: self.info('No captcha received, exit') return request = urllib.urlencode({'code':code}) req = urllib2.Request(self._url(action.group('url')),request) req.add_header('User-Agent',util.UA) req.add_header('Referer',url) req.add_header('Cookie',sessid[-1]) try: resp = urllib2.urlopen(req) if resp.code == 302: file_url = resp.headers.get('location') else: file_url = resp.geturl() if file_url.find(action.group('url')) > 0: msg = resp.read() resp.close() js_msg = re.search('alert\(\'(?P<msg>[^\']+)',msg,re.IGNORECASE | re.DOTALL) if js_msg: raise ResolveException(js_msg.group('msg')) self.error(msg) raise ResolveException('Nelze ziskat soubor, zkuste to znovu') resp.close() if file_url.find('data') >=0 or file_url.find('download_free') > 0: item['url'] = file_url return item self.error('wrong captcha, retrying') return self.resolve(item,captcha_cb,select_cb) except urllib2.HTTPError: traceback.print_exc() return
def login(pathCookies, email, password): session = initSession() ######## Step 1 ######### content = session.get('https://accounts.google.com/Login').text dummy, i = util.substr('window.WIZ_global_data', '= ', content) data = json.loads(util.parseBrackets(content, i, ['{', '}'])) data = data['OewCAd'] p1 = data = data.split(',')[3].replace('"', '').replace(']', '') soap = BeautifulSoup(content, "html.parser") data = soap.find("div", {"id": "view_container" })['data-initial-setup-data'].replace('%.@.', '[') data = json.loads(data) data = data[13] p2 = json.dumps(data).replace('"', '') ######## Step 2 ######### session.headers.update({ 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', 'Google-Accounts-XSRF': '1', 'Referer': 'https://accounts.google.com/', 'X-Same-Domain': '1' }) data = { # 'continue': 'https://myaccount.google.com/', 'continue': 'https://accounts.google.com/ManageAccount', 'f.req': '["' + email + '","' + p2 + '",[],null,"IL",null,null,2,false,true,[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?requestPath=%2FLogin&Page=PasswordSeparationSignIn",null,[],4],1,[null,null,[]],null,null,null,true],"' + email + '"]', 'azt': p1, 'deviceinfo': '[null,null,null,[],null,"IL",null,null,[],"GlifWebSignIn",null,[null,null,[]]]', 'gmscoreversion': 'undefined', 'checkConnection': 'youtube:841:0', 'checkedDomains': 'youtube', 'pstMsg': '1' } content = session.post( 'https://accounts.google.com/_/signin/sl/lookup?hl=en&_reqid=73079&rt=j', data=data).text data = json.loads(content.replace(")]}'", "").replace("\n", "")) data = data[0][0][2] p3 = json.dumps(data).replace('"', '') ######## Step 3 ######### data = { 'continue': 'https://accounts.google.com/ManageAccount', # 'continue': 'https://myaccount.google.com/', 'f.req': '["' + p3 + '",null,1,null,[1,null,null,null,["' + password + '",null,true]],[null,null,[2,1,null,1,"https://accounts.google.com/ServiceLogin?requestPath=%2FLogin&Page=PasswordSeparationSignIn",null,[],4],1,[null,null,[]],null,null,null,true]]', 'azt': p1, 'deviceinfo': '[null,null,null,[],null,"IL",null,null,[],"GlifWebSignIn",null,[null,null,[]]]', 'gmscoreversion': 'undefined', 'checkConnection': 'youtube:841:0', 'checkedDomains': 'youtube', 'pstMsg': '1' } content = session.post( 'https://accounts.google.com/_/signin/sl/challenge?hl=en&_reqid=173079&rt=j', data=data).text if 'SID' not in str(session.cookies): raise Exception('Unable to login: Invalid credentials') ######## Step 4 ######### util.resetHeaders(session) content = session.get('https://www.youtube.com/my_videos?o=U').text dummy, i = util.substr("yt.setConfig('GOOGLE_HELP_PRODUCT_DATA'", ', ', content) data = json.loads(util.parseBrackets(content, i, ['{', '}'])) channel = data['channel_external_id'] soap = BeautifulSoup(content, "html.parser") name = soap.find("div", class_='yt-masthead-picker-name').get_text() ### Save login data ### loginInfo = email + '&' + channel + '&' + name + '&' + str(time.time()) util.setCookie(session, 'www.google.com', 'MyLoginInfo', loginInfo) util.saveCookies(session, pathCookies)
def list_show(self, url, categories=True, episodes=True): result = [] data = util.request(url) if categories: categories_data = util.substr( data, '<section class="col-md-12 videoarchiv_navi">', '</section>') categories_data = util.substr( categories_data, '<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-2">', '</div>') for i in re.findall(r'<li>(.+?)</li>', categories_data, re.DOTALL): item = self.dir_item() item['url'] = self._url( re.search(r'<a href="([^"]+)', i).group(1)) item['title'] = re.search(r'title="([^"]+)', i).group(1) result.append(item) if episodes: row_list = [] row_pattern = re.compile( r'<div class="item row ">(.+?)</div>\s+</div>', re.DOTALL) purl = urlparse(url) if not 'page=' in purl.query: # latest episode episodes_data = util.substr( data, '<section class="col-md-12 info_new row">', '</section>') row_match = row_pattern.search(episodes_data) if row_match: row_list.append(row_match.group(1)) # other episodes episodes_data = util.substr( data, '<section class="col-md-12 article-view homepage">', '</section>') row_list += row_pattern.findall(episodes_data) for row in row_list: title_and_url_match = re.search( r'<a href="(?P<url>[^"]+") title="(?P<title>[^"]+)', row) if not title_and_url_match: self.error('list_show - cannot get video item from "%s"' % row.strip()) continue item = self.video_item() item['url'] = self._url(title_and_url_match.group('url')) item['title'] = title_and_url_match.group('title') img_match = re.search(r'<img.+?src="([^"]+)', row) if img_match: item['img'] = self._url(img_match.group(1)) countdown_match = re.search( r'<span class="archiv-countdown">.+?</i>([^<]+)', row) if countdown_match: item['countdown'] = countdown_match.group(1).strip() time_match = re.search(r'<div class="time">([^<]+)', row) if time_match: length_str, date_str = time_match.group(1).split('•') item['length'] = length_str.strip() item['date'] = date_str.strip() result.append(item) next_match = re.search(r'<li class="pager-next"><a href="([^"]+)', data) if next_match: result.append( self.dir_item(url=self._url(next_match.group(1)), type='next')) return result
class UloztoContentProvider(ContentProvider): def __init__(self, username=None, password=None, filter=None): ContentProvider.__init__(self, 'ulozto.cz', 'https://www.ulozto.cz/', username, password, filter) self.search_type = '' self.cp = urllib2.HTTPCookieProcessor(cookielib.LWPCookieJar()) self.rh = UloztoHTTPRedirectHandler() self.rh.throw = False self.rh.location = None self.init_urllib() def init_urllib(self): opener = urllib2.build_opener(self.cp, self.rh) urllib2.install_opener(opener) def capabilities(self): return ['login', 'search', 'resolve', 'categories'] def categories(self): result = [] if not self.login(): return result data = util.request(self.base_url + 'm/' + self.username) fav = re.search( '<li id=\"fmFavoritesFolder.+?href=\"(?P<url>[^\"]+)[^>]*>(?P<title>[^<]+)', data, re.IGNORECASE | re.DOTALL) if fav: item = self.dir_item() item['url'] = '#fm#' + fav.group('url') item['title'] = fav.group('title') result.append(item) myfiles = re.search( '<a class=\"fmHomeFolder.+?href=\"(?P<url>[^\"]+)[^>]*>(?P<title>[^<]+)', data, re.IGNORECASE | re.DOTALL) if myfiles: item = self.dir_item() item['url'] = '#fm#' + myfiles.group('url') item['title'] = myfiles.group('title') result.append(item) return result def search(self, keyword): return self.list(self.base_url + 'hledej/?' + self.search_type + 'q=' + urllib.quote(keyword)) def login(self): if self.username and self.password and len(self.username) > 0 and len( self.password) > 0: self.info('Login user=%s, pass=*****' % self.username) self.rh.throw = False page = util.request(self.base_url + 'login?key=logreg') if page.find('href="/?do=web-logout') > 0: self.info('Already logged in') return True data = util.substr(page, '<li class=\"menu-username', '</li') m = re.search('key=(?P<key>[^\"]+)\"', data, re.IGNORECASE | re.DOTALL) token = re.search( '<input type=\"hidden\" name=\"_token_\".+?value=\"([^\"]+)"', page, re.IGNORECASE | re.DOTALL) if m and token: login_url = self.base_url + 'login?key=' + m.group( 'key') + '&do=loginForm-submit' data = util.post( login_url, { 'username': self.username, 'password': self.password, 'remember': 'on', 'login': '******', '_token_': token.group(1) }) if data.find('href="/?do=web-logout') > 0: self.info('Login successfull') return True self.info('Login failed') return False def list_folder(self, url): self.login() result = [] page = util.request(self._url(url)) page = util.substr(page, '<div id=\"fmItems', '</ul') for m in re.finditer('<div class=\"fmFolder(.+?)</em', page, re.IGNORECASE | re.DOTALL): data = m.group(1) item = self.dir_item() item['url'] = '#fm#' + re.search('data-href=\"([^\"]+)', data).group(1) item['title'] = re.search('data-name=\"([^\"]+)', data).group(1) item['img'] = re.search('<img src=\"([^\"]+)', data).group(1) result.append(item) for m in re.finditer('<div class=\"fmFile(.+?)</em>', page, re.IGNORECASE | re.DOTALL): data = m.group(1) item = self.video_item() item['url'] = re.search('data-href=\"([^\"]+)', data).group(1) item['title'] = '%s.%s' % ( re.search('data-name=\"([^\"]+)', data).group(1), re.search('data-ext=\"([^\"]+)', data).group(1)) item['img'] = re.search('<img src=\"([^\"]+)', data).group(1) result.append(item) return result @cached(1) def list(self, url): if url.find('#fm#') == 0: return self.list_folder(url[5:]) url = self._url(url) page = util.request(url, headers={ 'X-Requested-With': 'XMLHttpRequest', 'Referer': url, 'Cookie': 'uloz-to-id=1561277170;' }) script = util.substr(page, 'var kn', '</script>') keymap = None key = None k = re.search('{([^\;]+)"', script, re.IGNORECASE | re.DOTALL) if k: keymap = json.loads("{" + k.group(1) + "\"}") j = re.search('kapp\(kn\[\"([^\"]+)"', script, re.IGNORECASE | re.DOTALL) if j: key = j.group(1) if not (j and k): self.error('error parsing page - unable to locate keys') return [] burl = b64decode( 'I2h0dHA6Ly9kZWNyLWNlY2gucmhjbG91ZC5jb20vZGVjcnlwdC8/a2V5PSVzJnZhbHVlPSVz' ) murl = b64decode( 'aHR0cDovL2RlY3ItY2VjaC5yaGNsb3VkLmNvbS9kZWNyeXB0Lw==') data = util.substr(page, '<ul class=\"chessFiles', 'var kn =') result = [] req = {'seed': keymap[key], 'values': keymap} decr = json.loads(util.post_json(murl, req)) for li in re.finditer('<li data-icon=\"(?P<key>[^\"]+)', data, re.IGNORECASE | re.DOTALL): body = urllib.unquote(b64decode(decr[li.group('key')])) m = re.search( '<li.+?<div data-icon=\"(?P<key>[^\"]+)[^<]+<img(.+?)src=\"(?P<logo>[^\"]+)(.+?)<i class=\"fa fa-download(?P<info>.+?)class="fileReset"', body, re.IGNORECASE | re.DOTALL) if not m: continue value = keymap[m.group('key')] info = m.group('info') iurl = burl % (keymap[key], value) item = self.video_item() item['title'] = '.. title not found..' title = re.search( '<div class=\"fileName.+?<a[^>]+>(?P<title>[^<]+)', info, re.IGNORECASE | re.DOTALL) if title: item['title'] = title.group('title') size = re.search('<span class=\"fileSize[^>]+>(?P<size>[^<]+)', info, re.IGNORECASE | re.DOTALL) if size: item['size'] = size.group('size').strip() time = re.search('<span class=\"fileTime[^>]+>(?P<time>[^<]+)', info, re.IGNORECASE | re.DOTALL) if time: item['length'] = time.group('time') item['url'] = iurl item['img'] = m.group('logo') self._filter(result, item) # page navigation data = util.substr(page, '<div class=\"paginator', '</div') mnext = re.search('<a href=\"(?P<url>[^\"]+)\" class="next', data) if mnext: item = self.dir_item() item['type'] = 'next' item['url'] = mnext.group('url') result.append(item) return result @cached(48) def decr_url(self, url): if url.startswith('#'): ret = json.loads(util.request(url[1:])) if ret.has_key('result'): url = b64decode(ret['result']) url = self._url(url) return url def resolve(self, item, captcha_cb=None): item = item.copy() url = item['url'] if url.startswith('http://www.ulozto.sk'): url = self.base_url + url[20:] url = self.decr_url(url) url = self._url(url) if url.startswith('#'): util.error('[uloz.to] - url was not correctly decoded') return self.init_urllib() self.login() self.info('Resolving %s' % url) if not item.has_key('vip'): item['vip'] = False vip = item['vip'] if vip: page = util.request(url) else: try: request = urllib2.Request(url) response = urllib2.urlopen(request) page = response.read() response.close() except urllib2.HTTPError, e: traceback.print_exc() return if page.find('Stránka nenalezena!') > 0: self.error('page with movie was not found on server') return if vip: data = util.substr(page, '<h3>Neomezené stahování</h3>', '</div') m = re.search('<a(.+?)href=\"(?P<url>[^\"#]+)\"', data, re.IGNORECASE | re.DOTALL) if m: try: self.rh.throw = True resp = urllib2.urlopen( urllib2.Request(self._url(m.group('url')))) except RedirectionException: # this is what we need, our redirect handler raises this pass except urllib2.HTTPError: # this is not OK, something went wrong traceback.print_exc() self.error( 'Cannot resolve stream url, server did not redirected us' ) self.info('POST url:' + post_url) return stream = self.rh.location item['url'] = self._fix_stream_url(stream) item['surl'] = url return item else: data = util.substr(page, '<h3>Omezené stahování</h3>', '<script') m = re.search('<form(.+?)action=\"(?P<action>[^\"]+)\"', data, re.IGNORECASE | re.DOTALL) if m: self.rh.throw = True stream_url = self._get_file_url_anonymous( page, self._url(m.group('action')), response.headers, captcha_cb) if stream_url: item['url'] = stream_url item['surl'] = url return item