def findvideos(item): logger.info("[pelisadicto.py] findvideos") itemlist = [] data = re.sub(r"\n|\s{2}","",scrapertools.cache_page(item.url)) #<!-- SINOPSIS --> <h2>Sinopsis de 101 dálmatas</h2> <p>Pongo y Perdita, los dálmatas protagonistas, son una feliz pareja canina que vive rodeada de sus cachorros y con sus amos Roger y Anita. Pero su felicidad está amenazada. Cruella de Ville, una pérfida mujer que vive en una gran mansión y adora los abrigos de pieles, se entera de que los protagonistas tienen quince cachorros dálmatas. Entonces, la idea de secuestrarlos para hacerse un exclusivo abrigo de pieles se convierte en una obsesión enfermiza. Para hacer realidad su sueño contrata a dos ladrones.</p> patron = "<!-- SINOPSIS --> " patron += "<h2>[^<]+</h2> " patron += "<p>([^<]+)</p>" matches = re.compile(patron,re.DOTALL).findall(data) plot = matches[0] # Descarga la pagina data = scrapertools.cache_page(item.url) patron = '<tr>.*?' patron += '<td><img src="(.*?)".*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<a href="(.*?)".*?</tr>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedidioma, scrapedcalidad, scrapedserver, scrapedurl in matches: idioma ="" if "/img/1.png" in scrapedidioma: idioma="Castellano" if "/img/2.png" in scrapedidioma: idioma="Latino" if "/img/3.png" in scrapedidioma: idioma="Subtitulado" title = item.title + " ["+scrapedcalidad+"][" + idioma + "][" + scrapedserver + "]" itemlist.append( Item(channel=__channel__, action="play", title=title, fulltitle=title , url=scrapedurl , thumbnail="" , plot=plot , show = item.show) ) return itemlist
def play(item): logger.info("streamondemand.streamingfilmit play") data = scrapertools.cache_page(item.url, headers=headers) data = scrapertools.decodeHtmlentities(data).replace('http://cineblog01.pw', 'http://k4pp4.pw') url = scrapertools.find_single_match(data, r'<a\s*href="([^"]+)"><h1') data = scrapertools.cache_page(url, headers=headers) if "go.php" in url: data = scrapertools.get_match(data, 'window.location.href = "([^"]+)";') elif "/link/" in url: from lib.jsbeautifier.unpackers import packer try: data = scrapertools.get_match(data, "(eval.function.p,a,c,k,e,.*?)</script>") data = packer.unpack(data) except IndexError: pass data = scrapertools.get_match(data, 'var link(?:\s)?=(?:\s)?"([^"]+)";') else: data = url itemlist = servertools.find_video_items(data=data) for videoitem in itemlist: videoitem.title = item.show videoitem.fulltitle = item.fulltitle videoitem.thumbnail = item.thumbnail videoitem.channel = __channel__ return itemlist
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[hdplay.py] get_video_url(page_url='%s')" % page_url) video_urls = [] # Descarga la página, el usuario tiene dos botones de "Descargar" o "Ver" data = scrapertools.cache_page(page_url) # La descarga de nuevo como si hubiera pulsado el botón "Ver" data = scrapertools.cache_page(page_url,post="agree=") #var movieURL = "http://srv.hdplay.org:81/storage/flv/JOUhxy.mp4?key=3e518187037720b4ef1a210df2c92162"; #var fileName = "JOUhxy.mp4"; patron = 'var movieURL \= "([^"]+)"\;\W+' patron += 'var fileName \= "([^"]+)"\;' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: # La URL es http://srv.hdplay.org:81/storage/flv/JOUhxy.mp4?key=3e518187037720b4ef1a210df2c92162 url = matches[0][0] video_urls.append( ["[hdplay]",url ] ) for video_url in video_urls: logger.info("[hdplay.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def play(item): logger.info("pelisalacarta.channels.seriesdanko play (url="+item.url+", server="+item.server+")" ) data = scrapertools.cache_page(item.url) patron = '<input type="hidden" name="id" value="([^"]+)" />.*?' patron+= '<img src="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) id = matches[0][0] captcha = matches[0][1] image = os.path.join( config.get_data_path(), 'captcha.png') imgurl = "http://seriesdanko.com/" + captcha req = urllib2.Request(imgurl) req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0') req.add_header('Accept-Encoding','gzip, deflate') f = urllib2.urlopen(req) img = open(image, 'wb') img.write(f.read()) img.close() spc = get_captcha(image) post = "id=%s&spc=%s" % (id,spc) data = scrapertools.cache_page( "http://seriesdanko.com/anonim.php", post=post ) return servertools.find_video_items(data=data)
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[one80upload.py] get_video_url(page_url='%s')" % page_url) video_urls = [] data = scrapertools.cache_page(page_url) #op=download2&id=yz6lx411cshb&rand=3wqqg6mjw3nxu254dfw4icuxknqfkzdjnbluhty&referer=&method_free=&method_premium=&down_direct=1 codigo = scrapertools.get_match(data,'<input type="hidden" name="id" value="([^"]+)">[^<]+') rand = scrapertools.get_match(data,'<input type="hidden" name="rand" value="([^"]+)">') post = "op=download2&id="+codigo+"&rand="+rand+"&referer=&method_free=&method_premium=&down_direct=1" data = scrapertools.cache_page( page_url , post=post, headers=[['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14'],['Referer',page_url]] ) #logger.info("data="+data) # Busca el video online o archivo de descarga patron = 'href="([^"]+)" target="_parent"><span class="style1">Download' matches = re.compile(patron,re.DOTALL).findall(data) #scrapertools.printMatches(matches) if len(matches)>0: logger.info("[180upload.py] encuentra archivo de descarga="+matches[0]) else: logger.info("[180upload.py] buscando video para ver online") patron = "this\.play\('([^']+)'" matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: video_urls.append( ["."+matches[0].rsplit('.',1)[1]+" [180upload]",matches[0]]) for video_url in video_urls: logger.info("[180upload.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[filesmonster.py] get_video_url( page_url='%s')" ) video_urls = [] itemlist=[] data1='' data2='' url='' alerta='[filesmonster premium]' enlace="no" post2 = "username="******"&password="******"http://filesmonster.com/api/public/login" data1=scrapertools.cache_page(login_url, post=post2) partes1=data1.split('"') estado=partes1[3] if estado!='success': alerta="[error de filesmonster premium]: "+estado id=page_url id=id.replace("http://filesmonster.com/download.php","") post=id.replace("?", "") url = 'http://filesmonster.com/api/public/premiumDownload' data2=scrapertools.cache_page(url, post=post) partes=data2.split('"') url=partes[7] if "http" not in url:alerta="[error de filesmonster premium]: "+url video_urls.append( [alerta ,url] ) return video_urls
def update_servers(): xml = scrapertools.cache_page(remote_url + "serverlist.xml") remote_dict = read_servers_list(xml) with open(os.path.join(local_folder, "serverlist.xml"), "rb") as f: data = f.read() local_dict = read_servers_list(data) # ---------------------------- import xbmcgui progress = xbmcgui.DialogProgressBG() progress.create("Update servers list") # ---------------------------- for index, server_id in enumerate(remote_dict.iterkeys()): # ---------------------------- percentage = index * 100 / len(remote_dict) # ---------------------------- if server_id not in local_dict or remote_dict[server_id][VERSION_IDX] > local_dict[server_id][VERSION_IDX]: data = scrapertools.cache_page(remote_dict[server_id][UPDATE_URL_IDX]) with open(os.path.join(local_folder, server_id + ".py"), "wb") as f: f.write(data) # ---------------------------- progress.update(percentage, " Update server: " + server_id) # ---------------------------- with open(os.path.join(local_folder, "serverlist.xml"), "wb") as f: f.write(xml) # ---------------------------- progress.close()
def findvid(item): logger.info("streamondemand.channels.animesubita findvideos") headers.append(['Referer', item.url]) # Descarga la pagina data = scrapertools.cache_page(item.url, headers=headers) patron = 'return\s*gnarty_player\((\d+)\);' matches = re.compile(patron, re.DOTALL).findall(data) url = host + 'wp-admin/admin-ajax.php' html = [] for vid in matches: html.append(scrapertools.cache_page(url, post='action=loadPlayer&id=' + vid, headers=headers)) html = ''.join(html) itemlist = servertools.find_video_items(data=html) if len(itemlist) == 0: itemlist = servertools.find_video_items(data=data) for videoitem in itemlist: videoitem.title = item.title + videoitem.title videoitem.fulltitle = item.fulltitle videoitem.thumbnail = item.thumbnail videoitem.show = item.show videoitem.plot = item.plot videoitem.channel = __channel__ return itemlist
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("(page_url='%s')" % page_url) data = scrapertools.cache_page(page_url) # Submit post = {} r = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', data) for name, value in r: post[name] = value post.update({'method_free': 'Free Download'}) data = scrapertools.cache_page(page_url, post=urllib.urlencode(post)) # Get link sPattern = '''<div id="player_code">.*?<script type='text/javascript'>(eval.+?)</script>''' r = re.findall(sPattern, data, re.DOTALL | re.I) mediaurl = "" if r: sUnpacked = jsunpack.unpack(r[0]) sUnpacked = sUnpacked.replace("\\'", "") r = re.findall('file,(.+?)\)\;s1', sUnpacked) if not r: r = re.findall('"src"value="(.+?)"/><embed', sUnpacked) mediaurl = r[0] video_urls = [] video_urls.append([scrapertools.get_filename_from_url(mediaurl)[-4:] + " [hugefiles]", mediaurl]) for video_url in video_urls: logger.info("%s - %s" % (video_url[0], video_url[1])) return video_urls
def play(item): logger.info("[documoo.py] play") itemlist = [] video_url = "" server = None data = scrapertools.cache_page(item.url) url = scrapertools.find_single_match(data, '<iframe\s+(?:width="[^"]*"\s*height="[^"]*"\s*)?src="([^"]+)"') if 'youtu' in url: data = scrapertools.cache_page(url) vid = scrapertools.find_single_match(data, '\'VIDEO_ID\'\s*:\s*"([^"]+)') if vid != "": video_url = "http://www.youtube.com/watch?v=%s" % vid server = 'youtube' elif 'rai.tv' in url: data = scrapertools.cache_page(url) video_url = scrapertools.find_single_match(data, '<meta\s+name="videourl_m3u8"\s*content="([^"]+)"') if video_url != "": item.url = video_url item.server = server itemlist.append(item) return itemlist
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("[speedvid.py] url=" + page_url) video_urls = [] data = scrapertools.cache_page(page_url, headers=headers) time.sleep(5) post_url = re.findall('Form method="POST" action=\'(.*)\'', data)[0] post_selected = re.findall('Form method="POST" action=(.*)</Form>', data, re.DOTALL)[0] post_data = 'op=%s&usr_login=%s&id=%s&fname=%s&referer=%s&hash=%s&imhuman=Proceed+to+video' % ( re.findall('input type="hidden" name="op" value="(.*)"', post_selected)[0], re.findall('input type="hidden" name="usr_login" value="(.*)"', post_selected)[0], re.findall('input type="hidden" name="id" value="(.*)"', post_selected)[0], re.findall('input type="hidden" name="fname" value="(.*)"', post_selected)[0], re.findall('input type="hidden" name="referer" value="(.*)"', post_selected)[0], re.findall('input type="hidden" name="hash" value="(.*)"', post_selected)[0]) headers.append(['Referer', page_url]) data = scrapertools.cache_page(post_url, post=post_data, headers=headers) data = scrapertools.find_single_match(data, "(eval.function.p,a,c,k,e,.*?)\s*</script>") if data != "": from core import jsunpack data = jsunpack.unpack(data) # URL del vídeo url = re.findall('file:\s*"([^"]+)"', data)[0] video_urls.append([scrapertools.get_filename_from_url(url)[-4:] + " [speedvid]", url]) return video_urls
def get_video_url( page_url , premium = False , user="" , password="" , video_password="" ): logger.info("[videoweed.py] get_video_url(page_url='%s')" % page_url) data = scrapertools.cache_page(page_url) patron = 'flashvars.file="(.*?)";' matches = re.compile(patron).findall(data) for match in matches: logger.info("File = "+match) flashvarsfile = match patron = 'flashvars.filekey="(.*?)";' matches = re.compile(patron).findall(data) for match in matches: logger.info("Key = "+match) flashvarsfilekey = match post="key="+flashvarsfilekey+"&user=undefined&codes=1&pass=undefined&file="+flashvarsfile url = "http://www.videoweed.es/api/player.api.php?"+post data = scrapertools.cache_page(url, post=post) logger.info(data) patron = 'url=(.*?)&title=' matches = re.compile(patron).findall(data) scrapertools.printMatches(matches) video_urls = [] logger.info(matches[0]) video_urls.append( [".flv [videoweed]",matches[0]]) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[movreel.py] get_video_url(page_url='%s')" % page_url) video_urls = [] data = scrapertools.cache_page(page_url) op = scrapertools.get_match(data,'<input type="hidden" name="op" value="([^"]+)">') file_code = scrapertools.get_match(data,'<input type="hidden" name="file_code" value="([^"]+)">') w = scrapertools.get_match(data,'<input type="hidden" name="w" value="([^"]+)">') h = scrapertools.get_match(data,'<input type="hidden" name="h" value="([^"]+)">') method_free = scrapertools.get_match(data,'<input type="submit" name="method_free" value="([^"]+)">') #op=video_embed&file_code=yrwo5dotp1xy&w=600&h=400&method_free=Close+Ad+and+Watch+as+Free+User post = urllib.urlencode( {"op":op,"file_code":file_code,"w":w,"h":h,"method_free":method_free} ) data = scrapertools.cache_page(page_url,post=post) data = jsunpack.unpack(data) logger.info("data="+data) media_url = scrapertools.get_match(data,'file\:"([^"]+)"') video_urls.append( [ scrapertools.get_filename_from_url(media_url)[-4:]+" [movreel]",media_url]) for video_url in video_urls: logger.info("[movreel.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("pelisalacarta.videomega get_video_url(page_url='%s')" % page_url) data = scrapertools.cache_page(page_url) video_urls = [] # Descarga la página headers = [ ['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'] ] data = scrapertools.cache_page(page_url , headers = headers) #document.write(unescape("%3c%73%63%72%69%70%74%20%74e")); location = scrapertools.get_match(data,'document.write\(unescape\("([^"]+)"\)\)') logger.info("pelisalacarta.videomega location="+location) location = urllib.unquote(location) logger.info("pelisalacarta.videomega location="+location) location = scrapertools.get_match(location,'file\: "([^"]+)"') logger.info("pelisalacarta.videomega location="+location) location = location+"&start=0" logger.info("pelisalacarta.videomega location="+location) #http://st100.u1.videomega.tv/v/bf38b3577874d7ce424c1c87d6d1b8d9.mp4?st=kuiAz1XJ7XFzOCnaleGVxA&start=0 #http://st100.u1.videomega.tv/v/bf38b3577874d7ce424c1c87d6d1b8d9.mp4?st=kuiAz1XJ7XFzOCnaleGVxA video_urls.append( [ scrapertools.get_filename_from_url(location)[-4:]+" [videomega]" , location ] ) for video_url in video_urls: logger.info("pelisalacarta.videomega %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[fileserve.py] get_video_url(page_url='%s')" % page_url) video_urls = [] if premium: # Accede a la home para precargar la cookie data = scrapertools.cache_page("http://fileserve.com/index.php") # Hace el login url = "http://fileserve.com/login.php" post = "loginUserName=%s&loginUserPassword=%s&autoLogin=on&ppp=102&loginFormSubmit=Login" % (user,password) data = scrapertools.cache_page(url, post=post) location = scrapertools.get_header_from_response(page_url,header_to_get="location") logger.info("location="+location) if location.startswith("http"): extension = location[-4:] video_urls.append( [ "%s (Premium) [fileserve]" % extension, location ] ) for video_url in video_urls: logger.info("[fileserve.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("streamondemand.servers.vidgg get_video_url(page_url='%s')" % page_url) video_urls = [] data = scrapertools.cache_page(page_url) mediaurls = scrapertools.find_multiple_matches(data, '<source src="([^"]+)"') if not mediaurls: id_file = page_url.rsplit("/",1)[1] key = scrapertools.find_single_match(data, 'flashvars\.filekey\s*=\s*"([^"]+)"') if not key: varkey = scrapertools.find_single_match(data, 'flashvars\.filekey\s*=\s*([^;]+);') key = scrapertools.find_single_match(data, varkey+'\s*=\s*"([^"]+)"') # Primera url, se extrae una url erronea necesaria para sacar el enlace url = "http://www.vidgg.to//api/player.api.php?cid2=undefined&cid=undefined&numOfErrors=0&user=undefined&cid3=undefined&key=%s&file=%s&pass=undefined" % (key, id_file) data = scrapertools.cache_page(url) url_error = scrapertools.find_single_match(data, 'url=([^&]+)&') url = "http://www.vidgg.to//api/player.api.php?cid2=undefined&cid=undefined&numOfErrors=1&errorUrl=%s&errorCode=404&user=undefined&cid3=undefined&key=%s&file=%s&pass=undefined" % (url_error, key, id_file) data = scrapertools.cache_page(url) mediaurls = scrapertools.find_multiple_matches(data, 'url=([^&]+)&') for i, mediaurl in enumerate(mediaurls): title = scrapertools.get_filename_from_url(mediaurl)[-4:]+" Mirror %s [vidgg]" % str(i+1) video_urls.append( [title, mediaurl]) for video_url in video_urls: logger.info("[vidgg.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def peliculasx(item): logger.info("streamondemand.filmstreampw peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url, headers=headers) # Extrae las entradas (carpetas) patron = '<div class="news2 float">.*?<div class="boxgrid2 caption2">.*?<a href="([^"]+)">.*?<img.*?src="([^"]+)"/>.*?<div class="cover2 boxcaption2">.*?<div class="boxgridtext">(.*?)</div>.*?<br>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedthumbnail, scrapedtitle in matches: html = scrapertools.cache_page(scrapedurl, headers=headers) start = html.find("<li class=\"current\" style=\"font-size: 15px; line-height: 18px;\">") end = html.find("</div></li>", start) scrapedplot = html[start:end] scrapedplot = re.sub(r'<[^>]*>', '', scrapedplot) scrapedplot = scrapertools.decodeHtmlentities(scrapedplot) scrapedtitle = scrapedtitle.strip() if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append(infoSod( Item(channel=__channel__, extra=item.extra, action="episodios" if item.extra == "serie" else "findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True), tipo='movie')) return itemlist
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("fusionse.servers.uptobox get_video_url(page_url='%s')" % page_url) #Si el enlace es directo de upstream if "uptobox" not in page_url: data = scrapertools.cache_page(page_url) if "Video not found" in data: page_url = page_url.replace("uptostream.com/iframe/","uptobox.com/") data = scrapertools.cache_page(page_url) video_urls = uptobox(page_url, data) else: video_urls = uptostream(data) else: data = scrapertools.cache_page(page_url) #Si el archivo tiene enlace de streaming se redirige a upstream if "Streaming link:" in data: page_url = "http://uptostream.com/"+scrapertools.find_single_match(page_url,'uptobox.com/([a-z0-9]+)') data = scrapertools.cache_page(page_url) video_urls = uptostream(page_url) else: #Si no lo tiene se utiliza la descarga normal video_urls = uptobox(page_url, data) for video_url in video_urls: logger.info("fusionse.servers.uptobox %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("documentary get_video_url(page_url='%s')" % page_url) video_urls = [] data = scrapertools.cache_page(page_url) #var videoVars = {"videoNonceVar":"94767795ce","post_id":"2835"}; videoNonceVar = scrapertools.get_match(data,'var\s*videoVars\s*\=\s*\{"videoNonceVar"\:"([^"]+)","post_id"\:"\d+"') post_id = scrapertools.get_match(data,'var\s*videoVars\s*\=\s*\{"videoNonceVar"\:"[^"]+","post_id"\:"(\d+)"') #http://documentary.es/wp-admin/admin-ajax.php?postId=2835&videoNonce=94767795ce&action=getVideo&_=1385893877929 import random url = "http://documentary.es/wp-admin/admin-ajax.php?postId="+post_id+"&videoNonce="+videoNonceVar+"&action=getVideo&_="+str(random.randint(10000000000,9999999999999)) data = scrapertools.cache_page(url) #{"videoUrl":"http:\/\/www.dailymotion.com\/embed\/video\/xioggh?autoplay=1&defaultSubtitle=es"} data = data.replace("\\","") import servertools real_urls = servertools.find_video_items(data=data) if len(real_urls)>0: item = real_urls[len(real_urls)-1] exec "import "+item.server exec "servermodule = "+item.server video_urls = servermodule.get_video_url(item.url) for video_url in video_urls: logger.info("documentary %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("url="+page_url) # Lo pide una vez headers = [['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14']] data = scrapertools.cache_page( page_url , headers=headers ) try: media_url = scrapertools.get_match( data , 'file\: "([^"]+)"' ) except: post = "" matches = scrapertools.find_multiple_matches(data, '<input.*?name="([^"]+)".*?value="([^"]*)">') for inputname, inputvalue in matches: post += inputname + "=" + inputvalue + "&" post = post.replace("op=download1","op=download2") data = scrapertools.cache_page( page_url , post=post) if 'id="justanotice"' in data: logger.info("data="+data) logger.info("Ha saltado el detector de adblock") return [] # Extrae la URL media_url = scrapertools.get_match( data , 'file\: "([^"]+)"' ) video_urls = [] video_urls.append( [ scrapertools.get_filename_from_url(media_url)[-4:]+" [streamcloud]",media_url]) for video_url in video_urls: logger.info("%s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("pelisalacarta.servers.clicknupload url="+page_url) data = scrapertools.cache_page( page_url ) data = data.replace("\n","").replace("\t","") post = "" block = scrapertools.find_single_match(data, '<Form method="POST"(.*?)</Form>') matches = scrapertools.find_multiple_matches(block, 'input.*?name="([^"]+)".*?value="([^"]*)"') for inputname, inputvalue in matches: post += inputname + "=" + inputvalue + "&" #Primera solicitud post data = scrapertools.cache_page( page_url , post=post) data = data.replace("\n","").replace("\t","") import time time.sleep(5) post = "" block = scrapertools.find_single_match(data, '<Form name="F1" method="POST"(.*?)</Form>') matches = scrapertools.find_multiple_matches(block, '<input.*?name="([^"]+)".*?value="([^"]*)">') for inputname, inputvalue in matches: post += inputname + "=" + inputvalue + "&" #Segunda solicitud post tras 5 segundos de espera data = scrapertools.cache_page( page_url , post=post) video_urls = [] media = scrapertools.find_single_match(data,"onClick=\"window.open\('([^']+)'") #Solo es necesario codificar la ultima parte de la url url_strip = urllib.quote(media.rsplit('/', 1)[1]) media_url = media.rsplit('/', 1)[0] +"/"+url_strip video_urls.append( [ scrapertools.get_filename_from_url(media_url)[-4:]+" [clicknupload]",media_url]) for video_url in video_urls: logger.info("pelisalacarta.servers.clicknupload %s - %s" % (video_url[0],video_url[1])) return video_urls
def episodios(item): logger.info("pelisalacarta.channels.pelisplus episodios") itemlist = [] data = scrapertools.cache_page(item.url) patron = '<span class="ico season_play"><\/span>([^<]+)<\/a>.<a href="([^"]+)" class="season-online enabled">' temporada = 'temporada/'+item.extra.strip(' ') matches = re.compile(patron,re.DOTALL).findall(data) contentSeasonNumber = re.findall (r'\d+', item.title) for scrapedtitle, scrapedurl in matches: if temporada in scrapedurl: url = scrapedurl capitulo = re.findall(r'Capitulo \d+', scrapedtitle) contentEpisodeNumber = re.findall(r'\d+', capitulo[0]) title = contentSeasonNumber[0]+'x'+contentEpisodeNumber[0]+' - '+scrapedtitle thumbnail = scrapertools.find_single_match(data,'<img src="([^"]+)" alt="" class="picture-movie">') plot = '' datab = scrapertools.cache_page(scrapedurl) fanart = scrapertools.find_single_match(datab,'<img src="([^"]+)" alt=".*?" class="picture-movie">') plot = scrapertools.find_single_match(datab,'<span>Sinopsis:<\/span>.([^<]+)<span class="text-detail-hide"><\/span>.<\/p>') if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"])") itemlist.append( Item(channel=item.channel, action="findvideos" , title=title , fulltitle=item.title, url=url, thumbnail=thumbnail, plot=plot, fanart = fanart, extra=scrapedtitle)) return itemlist
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[vidxden.py] url="+page_url) if ".html" not in page_url: logger.info("[vidxden.py] URL incompleta") data = scrapertools.cache_page(page_url) patron = '<input name="fname" type="hidden" value="([^"]+)">' matches = re.compile(patron,re.DOTALL).findall(data) page_url = page_url+"/"+matches[0]+".html" # Lo pide una vez scrapertools.cache_page( page_url , headers=[['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14']] ) # Lo pide una segunda vez, como si hubieras hecho click en el banner patron = 'http\:\/\/www\.vidxden\.com/([^\/]+)/(.*?)\.html' matches = re.compile(patron,re.DOTALL).findall(page_url) logger.info("[vidxden.py] fragmentos de la URL") scrapertools.printMatches(matches) codigo = "" nombre = "" if len(matches)>0: codigo = matches[0][0] nombre = matches[0][1] post = "op=download1&usr_login=&id="+codigo+"&fname="+nombre+"&referer=&method_free=Free+Stream" data = scrapertools.cache_page( page_url , post=post, headers=[['User-Agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14'],['Referer',page_url]] ) # Extrae el trozo cifrado patron = '<div id="embedcontmvshre"[^>]+>(.*?)</div>' matches = re.compile(patron,re.DOTALL).findall(data) #scrapertools.printMatches(matches) data = "" if len(matches)>0: data = matches[0] logger.info("[vidxden.py] bloque packed="+data) else: logger.info("[vidxden.py] no encuentra bloque packed="+data) return "" # Lo descifra descifrado = unpackerjs.unpackjs(data) # Extrae la URL del vídeo logger.info("descifrado="+descifrado) # Extrae la URL patron = '<param name="src"value="([^"]+)"/>' matches = re.compile(patron,re.DOTALL).findall(descifrado) scrapertools.printMatches(matches) video_urls = [] if len(matches)>0: video_urls.append( ["."+matches[0].rsplit('.',1)[1]+" [vidxden]",matches[0]]) for video_url in video_urls: logger.info("[vidxden.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def login(): logger.info("channels.megahd login") # Averigua el id de sesión data = scrapertools.cache_page("http://megahd.me/login/", headers=MAIN_HEADERS) #<form action="http://megahd.me/login2/" name="frmLogin" id="frmLogin" method="post" accept-charset="UTF-8" onsubmit="hashLoginPassword(this, 'd3c3d7467c05a4058e9361996daeaed4');"> cur_session_id = scrapertools.get_match(data,'onsubmit\="hashLoginPassword\(this, \'([a-z0-9]+)\'') logger.info("channels.megahd cur_session_id="+cur_session_id) # Calcula el hash del password LOGIN = config.get_setting("megahduser") PASSWORD = config.get_setting("megahdpassword") logger.info("channels.megahd LOGIN="******"channels.megahd PASSWORD="******"channels.megahd hash_passwrd="+hash_passwrd) # Hace el submit del login post = "user="******"&passwrd=&cookieneverexp=on&hash_passwrd="+hash_passwrd logger.info("channels.megahd post="+post) data = scrapertools.cache_page("http://megahd.me/login2/" , post=post, headers=MAIN_HEADERS) return True
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("streamondemand.servers.vimpleru page_url="+page_url) mobj = re.match(_VALID_URL, page_url) video_id = mobj.group('id') logger.info("streamondemand.servers.vimpleru video_id="+video_id) data = scrapertools.cache_page( page_url ) logger.info("streamondemand.servers.vimpleru data="+data) cookie_data = config.get_cookie_data() #logger.info("streamondemand.servers.vimpleru cookie_data="+cookie_data) universalid = scrapertools.get_match(cookie_data,'UniversalUserID\s*([a-f0-9]+)') logger.info("universalid="+universalid) player_url = scrapertools.find_single_match(data,'"swfplayer"\:"([^"]+)"') player_url = player_url.replace("\\","") logger.info("streamondemand.servers.vimpleru player_url="+player_url) player = scrapertools.cache_page( player_url) #logger.info("streamondemand.servers.vimpleru player="+repr(player)) player = zlib.decompress(player[8:]) #logger.info("streamondemand.servers.vimpleru player="+repr(player)) xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) logger.info("streamondemand.servers.vimpleru xml_pieces="+repr(xml_pieces)) xml_pieces = [piece[1:-1] for piece in xml_pieces] logger.info("streamondemand.servers.vimpleru xml_pieces="+repr(xml_pieces)) xml_data = b''.join(xml_pieces) logger.info("streamondemand.servers.vimpleru xml_data="+repr(xml_data)) xml_data = base64.b64decode(xml_data) logger.info("streamondemand.servers.vimpleru xml_data="+repr(xml_data)) xml_data = xml.etree.ElementTree.fromstring(xml_data) video = xml_data.find('Video') quality = video.get('quality') q_tag = video.find(quality.capitalize()) ''' logger.info("streamondemand.servers.vimpleru url: " + q_tag.get('url')) logger.info("streamondemand.servers.vimpleru tbr: " + q_tag.get('bitrate')) logger.info("streamondemand.servers.vimpleru filesize: " + q_tag.get('filesize')) logger.info("streamondemand.servers.vimpleru format_id: " + quality) logger.info("streamondemand.servers.vimpleru id: " + video_id) logger.info("streamondemand.servers.vimpleru title: " + video.find('Title').text) logger.info("streamondemand.servers.vimpleru thumbnail: " + video.find('Poster').get('url')) logger.info("streamondemand.servers.vimpleru duration: " + video.get('duration')) logger.info("streamondemand.servers.vimpleru webpage_url: " + video.find('Share').get('videoPageUrl')) ''' media_url = q_tag.get('url')+"|Cookie=UniversalUserID="+universalid video_urls = [] video_urls.append( [ scrapertools.get_filename_from_url(media_url)[-4:]+" [vimple.ru]",media_url]) for video_url in video_urls: logger.info("streamondemand.servers.vimpleru %s - %s" % (video_url[0],video_url[1])) return video_urls
def login(): # Averigua el id de sesión data = scrapertools.cache_page("http://www.mocosoftx.com/foro/index.php") cur_session_id = scrapertools.get_match( data, 'form action="[^"]+" method="post" accept-charset="ISO-8859-1" onsubmit="hashLoginPassword\(this, \'([a-z0-9]+)\'', ) logger.info("cur_session_id=" + cur_session_id) # Calcula el hash del password LOGIN = config.get_setting("mocosoftxuser") PASSWORD = config.get_setting("mocosoftxpassword") logger.info("LOGIN="******"PASSWORD="******"hash_passwrd=" + hash_passwrd) # Hace el submit del login post = "user="******"&passwrd=&cookielength=-1&hash_passwrd=" + hash_passwrd logger.info("post=" + post) data = scrapertools.cache_page("http://mocosoftx.com/foro/login2/", post=post, headers=MAIN_HEADERS) return True
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[modovideo.py] get_video_url(page_url='%s')" % page_url) video_urls = [] # Descarga la página headers = [] headers.append(["User-Agent","Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10"]) scrapertools.cache_page(page_url,headers=headers) # Descarga el iframe headers.append(["Referer",page_url]) code = scrapertools.get_match(page_url,"http://www.modovideo.com/video\?v\=([a-zA-Z0-9]+)") #http://www.modovideo.com/frame.php?v=teml3hpu3141n0lam2a04iufcsz7q7pt data = scrapertools.cache_page("http://www.modovideo.com/frame.php?v="+code , headers=headers) # Extrae la URL real #<video id='player' src=http://s07.modovideo.com:80/vid/8734f19b6ec156e285a0e526fdc79566/508ea846/flv/0c4lymiwtfe2m9tdr2fp8dzxmbe3csv3.mp4 style=' width: 100%; height: 75%' type='video/mp4' poster='' controls='controls' ></video></div> patronvideos = "<video id='player' src=(.*?) " matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) video_urls = [] for match in matches: video_urls.append(["[modovideo]",match]) for video_url in video_urls: logger.info("[modovideo.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[nowdownload.py] get_video_url (page_url='%s')" % page_url) ''' <a href="http://f02.nowdownload.co/dl/91efaa9ec507ef4de023cd62bb9a0fe2/50ab76ac/6711c9c90ebf3_family.guy.s11e02.italian.subbed.hdtv.xvid_gannico.avi" class="btn btn-danger"><i class="icon-white icon-download"></i> Download Now</a> ''' data = scrapertools.cache_page( page_url ) logger.debug("[nowdownload.py] data:" + data) try: url = scrapertools.get_match(data,'<a href="([^"]*)" class="btn btn-danger"><i class="icon-white icon-download"></i> Download Now</a>') except: #$.get("/api/token.php?token=7e1ab09df2775dbea02506e1a2651883"); token = scrapertools.get_match(data,'(/api/token.php\?token=[^"]*)') logger.debug("[nowdownload.py] token:" + token) d= scrapertools.cache_page( "http://www.nowdownload.co"+ token ) url = scrapertools.get_match(data,'expiryText: \'<a class="btn btn-danger" href="([^"]*)') logger.debug("[nowdownload.py] url_1:" + url) data = scrapertools.cache_page("http://www.nowdownload.co" + url ) logger.debug("[nowdownload.py] data:" + data) #<a href="http://f03.nowdownload.co/dl/8ec5470153bb7a2177847ca7e1638389/50ab71b3/f92882f4d33a5_squadra.antimafia_palermo.oggi.4x01.episodio.01.ita.satrip.xvid_upz.avi" class="btn btn-success">Click here to download !</a> url = scrapertools.get_match(data,'<a href="([^"]*)" class="btn btn-success">Click here to download !</a>') logger.debug("[nowdownload.py] url_final:" + url) video_urls = [url] return video_urls
def get_video_url( page_url , premium = False , user="" , password="", video_password="" ): logger.info("[tutv.py] get_video_url(page_url='%s')" % page_url) # Busca el ID en la URL id = extract_id(page_url) # Si no lo tiene, lo extrae de la página if id=="": # La descarga data = scrapertools.cache_page(page_url) patron = '<link rel="video_src" href="([^"]+)"/>' matches = re.compile(patron,re.DOTALL).findall(data) if len(matches)>0: id = extract_id(matches[0]) else: id = "" # Descarga el descriptor url = "http://tu.tv/visualizacionExterna2.php?web=undefined&codVideo="+id data = scrapertools.cache_page(url) # Obtiene el enlace al vídeo patronvideos = 'urlVideo0=([^\&]+)\&' matches = re.compile(patronvideos,re.DOTALL).findall(data) #scrapertools.printMatches(matches) url = urllib.unquote_plus( matches[0] ) video_urls = [ ["[tu.tv]",url] ] for video_url in video_urls: logger.info("[tutv.py] %s - %s" % (video_url[0],video_url[1])) return video_urls
def pelis_top100(item): logger.info("streamondemand.guardarefilm peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url, headers=headers) # Extrae las entradas (carpetas) patron = r'<span class="top100_title"><a href="([^"]+)">(.*?\(\d+\))</a>' matches = re.compile(patron).findall(data) for scrapedurl, scrapedtitle in matches: html = scrapertools.cache_page(scrapedurl, headers=headers) start = html.find("<div class=\"textwrap\" itemprop=\"description\">") end = html.find("</div>", start) scrapedplot = html[start:end] scrapedplot = re.sub(r'<[^>]*>', '', scrapedplot) scrapedplot = scrapertools.decodeHtmlentities(scrapedplot) scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) scrapedthumbnail = scrapertools.find_single_match(html, r'class="poster-wrapp"><a href="([^"]+)"') if (DEBUG): logger.info( "title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="episodios" if item.extra == "serie" else "findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=urlparse.urljoin(host, scrapedthumbnail), plot=scrapedplot, folder=True, fanart=host + scrapedthumbnail)) return itemlist
def peliculas(item): logger.info("pelisalacarta.yaske listado") data = scrapertools.cache_page(item.url, headers=HEADER) data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) # Extrae las entradas ''' <li class="item-movies c8"><a class="image-block" href="http://www.yaske.to/es/pelicula/0005346/ver-transformers-4-online.html" title="Transformers 4: La era de la extinción"><img src="http://www.yaske.to/upload/images/59481937cedbdd789cec00aab9f7ed8b.jpg" width="140" height="200" /></a><ul class="bottombox"><li title="Transformers 4: La era de la extinción"><a href="http://www.yaske.to/es/pelicula/0005346/ver-transformers-4-online.html" title="Transformers 4: La era de la extinción">Transformers 4: La…</a></li><li>Accion, ciencia Ficcion</li><li><img src='http://www.yaske.to/theme/01/data/images/flags/es_es.png' title='Spanish ' width='25'/> <img src='http://www.yaske.to/theme/01/data/images/flags/en_es.png' title='English SUB Spanish' width='25'/> <img src='http://www.yaske.to/theme/01/data/images/flags/la_la.png' title='Latino ' width='25'/> </li><li><a rel="lyteframe" rev="width: 600px; height: 380px; scrolling: no;" youtube="trailer" href="http://www.youtube.com/v/&hl&autoplay=1" target="_blank"><img src="http://2.bp.blogspot.com/-hj7moVFACQU/UBoi0HAFeyI/AAAAAAAAA9o/2I2KPisYtsk/s1600/vertrailer.png" height="22" border="0"></a></li></ul><div class="quality">Hd Real 720</div><div class="view"><span>view: 335482</span></div></li> ''' patron = '<li class="item-movies[^"]+">' patron += '<a class="image-block" href="([^"]+)" title="([^"]+)">' patron += '<img src="([^"]+)"[^/]+/></a>' patron += '<ul class="bottombox">.*?<li>(<img.*?)</li>.*?</ul>' patron += '<div class="quality">([^<]+)</div>' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedurl, scrapedtitle, scrapedthumbnail, idiomas, calidad in matches: patronidiomas = "<img src='[^']+' title='([^']+)'" matchesidiomas = re.compile(patronidiomas, re.DOTALL).findall(idiomas) idiomas_disponibles = "" for idioma in matchesidiomas: idiomas_disponibles = idiomas_disponibles + idioma.strip() + "/" if len(idiomas_disponibles) > 0: idiomas_disponibles = "[" + idiomas_disponibles[:-1] + "]" title = scrapedtitle.strip( ) + " " + idiomas_disponibles + "[" + calidad + "]" title = scrapertools.htmlclean(title) url = scrapedurl thumbnail = scrapedthumbnail scrapedplot = "" itemlist.append( Item(channel=__channel__, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=scrapedplot, fulltitle=scrapertools.htmlclean(scrapedtitle.strip()), viewmode="movie", folder=True)) # Extrae el paginador patronvideos = "<a href='([^']+)'>\»\;</a>" matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=__channel__, action="peliculas", title=">> Página siguiente", url=scrapedurl, folder=True)) return itemlist
def findvideos(item): logger.info("[newpct1.py] findvideos") itemlist = [] ## Cualquiera de las tres opciones son válidas #item.url = item.url.replace("1.com/","1.com/ver-online/") #item.url = item.url.replace("1.com/","1.com/descarga-directa/") item.url = item.url.replace("1.com/", "1.com/descarga-torrent/") # Descarga la página data = re.sub(r"\n|\r|\t|\s{2}|(<!--.*?-->)", "", scrapertools.cache_page(item.url)) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") title = scrapertools.find_single_match( data, "<h1><strong>([^<]+)</strong>[^<]+</h1>") title += scrapertools.find_single_match( data, "<h1><strong>[^<]+</strong>([^<]+)</h1>") caratula = scrapertools.find_single_match( data, '<div class="entry-left">.*?src="([^"]+)"') #<a href="http://tumejorjuego.com/download/index.php?link=descargar-torrent/058310_yo-frankenstein-blurayrip-ac3-51.html" title="Descargar torrent de Yo Frankenstein " class="btn-torrent" target="_blank">Descarga tu Archivo torrent!</a> patron = '<a href="([^"]+)" title="[^"]+" class="btn-torrent" target="_blank">' # escraped torrent url = scrapertools.find_single_match(data, patron) if url != "": itemlist.append( Item(channel=__channel__, action="play", server="torrent", title=title + " [torrent]", fulltitle=title, url=url, thumbnail=caratula, plot=item.plot, folder=False)) # escraped ver vídeos, descargar vídeos un link, múltiples liks data = data.replace("'", '"') data = data.replace( 'javascript:;" onClick="popup("http://www.newpct1.com/pct1/library/include/ajax/get_modallinks.php?links=', "") data = data.replace( "http://tumejorserie.com/descargar/url_encript.php?link=", "") data = data.replace("$!", "#!") patron_descargar = '<div id="tab2"[^>]+>.*?</ul>' patron_ver = '<div id="tab3"[^>]+>.*?</ul>' match_ver = scrapertools.find_single_match(data, patron_ver) match_descargar = scrapertools.find_single_match(data, patron_descargar) patron = '<div class="box1"><img src="([^"]+)".*?' # logo patron += '<div class="box2">([^<]+)</div>' # servidor patron += '<div class="box3">([^<]+)</div>' # idioma patron += '<div class="box4">([^<]+)</div>' # calidad patron += '<div class="box5"><a href="([^"]+)".*?' # enlace patron += '<div class="box6">([^<]+)</div>' # titulo enlaces_ver = re.compile(patron, re.DOTALL).findall(match_ver) enlaces_descargar = re.compile(patron, re.DOTALL).findall(match_descargar) for logo, servidor, idioma, calidad, enlace, titulo in enlaces_ver: servidor = servidor.replace("played", "playedto") titulo = titulo + " [" + servidor + "]" mostrar_server = True if config.get_setting("hidepremium") == "true": mostrar_server = servertools.is_server_enabled(servidor) if mostrar_server: itemlist.append( Item(channel=__channel__, action="play", server=servidor, title=titulo, fulltitle=item.title, url=enlace, thumbnail=logo, plot=item.plot, folder=False)) for logo, servidor, idioma, calidad, enlace, titulo in enlaces_descargar: servidor = servidor.replace("uploaded", "uploadedto") partes = enlace.split(" ") p = 1 for enlace in partes: parte_titulo = titulo + " (%s/%s)" % ( p, len(partes)) + " [" + servidor + "]" p += 1 mostrar_server = True if config.get_setting("hidepremium") == "true": mostrar_server = servertools.is_server_enabled(servidor) if mostrar_server: itemlist.append( Item(channel=__channel__, action="play", server=servidor, title=parte_titulo, fulltitle=item.title, url=enlace, thumbnail=logo, plot=item.plot, folder=False)) return itemlist
def get_episodios(item): logger.info("[newpct1.py] get_episodios") itemlist = [] logger.info("[newpct1.py] get_episodios url=" + item.url) data = re.sub(r'\n|\r|\t|\s{2}|<!--.*?-->|<i class="icon[^>]+"></i>', "", scrapertools.cache_page(item.url)) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") logger.info("[newpct1.py] data=" + data) patron = '<ul class="buscar-list">(.*?)</ul>' #logger.info("[newpct1.py] patron=" + patron) fichas = scrapertools.get_match(data, patron) #logger.info("[newpct1.py] matches=" + str(len(fichas))) #<li><a href="http://www.newpct1.com/serie/forever/capitulo-101/" title="Serie Forever 1x01"><img src="http://www.newpct1.com/pictures/c/minis/1880_forever.jpg" alt="Serie Forever 1x01"></a> <div class="info"> <a href="http://www.newpct1.com/serie/forever/capitulo-101/" title="Serie Forever 1x01"><h2 style="padding:0;">Serie <strong style="color:red;background:none;">Forever - Temporada 1 </strong> - Temporada<span style="color:red;background:none;">[ 1 ]</span>Capitulo<span style="color:red;background:none;">[ 01 ]</span><span style="color:red;background:none;padding:0px;">Espa�ol Castellano</span> Calidad <span style="color:red;background:none;">[ HDTV ]</span></h2></a> <span>27-10-2014</span> <span>450 MB</span> <span class="color"><ahref="http://www.newpct1.com/serie/forever/capitulo-101/" title="Serie Forever 1x01"> Descargar</a> </div></li> #logger.info("[newpct1.py] get_episodios: " + fichas) patron = '<li><a href="([^"]+).*?' #url patron += '<img src="([^"]+)".*?' #thumbnail patron += '<h2 style="padding(.*?)/h2>' #titulo, idioma y calidad matches = re.compile(patron, re.DOTALL).findall(fichas) #logger.info("[newpct1.py] get_episodios matches: " + str(len(matches))) for scrapedurl, scrapedthumbnail, scrapedinfo in matches: try: url = scrapedurl if '</span>' in scrapedinfo: #logger.info("[newpct1.py] get_episodios: scrapedinfo="+scrapedinfo) #<h2 style="padding:0;">Serie <strong style="color:red;background:none;">The Big Bang Theory - Temporada 6 </strong> - Temporada<span style="color:red;background:none;">[ 6 ]</span>Capitulo<span style="color:red;background:none;">[ 03 ]</span><span style="color:red;background:none;padding:0px;">Español Castellano</span> Calidad <span style="color:red;background:none;">[ HDTV ]</span></h2> patron = '<span style=".*?">\[\s*(.*?)\]</span>.*?' #temporada patron += '<span style=".*?">\[\s*(.*?)\].*?' #capitulo patron += ';([^/]+)' #idioma info_extra = re.compile(patron, re.DOTALL).findall(scrapedinfo) (temporada, capitulo, idioma) = info_extra[0] #logger.info("[newpct1.py] get_episodios: temporada=" + temporada) #logger.info("[newpct1.py] get_episodios: capitulo=" + capitulo) #logger.info("[newpct1.py] get_episodios: idioma=" + idioma) if '">' in idioma: idioma = " [" + scrapertools.find_single_match( idioma, '">([^<]+)').strip() + "]" elif ' ' in idioma: idioma = " [" + scrapertools.find_single_match( idioma, ' ([^<]+)').strip() + "]" else: idioma = "" title = item.title + " (" + temporada.strip( ) + "x" + capitulo.strip() + ")" + idioma else: #<h2 style="padding:0;">The Big Bang Theory - Temporada 6 [HDTV][Cap.602][Español Castellano]</h2> #<h2 style="padding:0;">The Beast - Temporada 1 [HDTV] [Capítulo 13] [Español]</h2 #<h2 style="padding:0;">The Beast - Temp.1 [DVD-DVB][Cap.103][Spanish]</h2> try: temp, cap = scrapertools.get_season_and_episode( scrapedinfo).split('x') except: #Formatear temporadaXepisodio patron = re.compile('Cap.*?\s*([\d]+)', re.IGNORECASE) info_extra = patron.search(scrapedinfo) if len(str(info_extra.group(1))) >= 3: cap = info_extra.group(1)[-2:] temp = info_extra.group(1)[:-2] else: cap = info_extra.group(1) patron = 'Temp.*?\s*([\d]+)' temp = re.compile( patron, re.IGNORECASE).search(scrapedinfo).group(1) title = item.title + " (" + temp + 'x' + cap + ")" #logger.info("[newpct1.py] get_episodios: fanart= " +item.fanart) itemlist.append( Item(channel=__channel__, action="findvideos", title=title, url=url, thumbnail=item.thumbnail, show=item.show, fanart=item.fanart)) except: logger.info("[newpct1.py] ERROR al añadir un episodio") if "pagination" in data: patron = '<ul class="pagination">(.*?)</ul>' paginacion = scrapertools.get_match(data, patron) #logger.info("[newpct1.py] get_episodios: paginacion= " + paginacion) if "Next" in paginacion: url_next_page = scrapertools.get_match( paginacion, '<a href="([^>]+)>Next</a>')[:-1] url_next_page = url_next_page.replace(" ", "%20") #logger.info("[newpct1.py] get_episodios: url_next_page= " + url_next_page) itemlist.append( Item(channel=__channel__, action="get_episodios", title=">> Página siguiente", url=url_next_page)) return itemlist
def listado(item): logger.info("[newpct1.py] listado") #logger.info("[newpct1.py] listado url=" + item.url) itemlist = [] data = re.sub(r"\n|\r|\t|\s{2}|(<!--.*?-->)", "", scrapertools.cache_page(item.url)) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") patron = '<ul class="' + item.extra + '">(.*?)</ul>' logger.info("[newpct1.py] patron=" + patron) fichas = scrapertools.get_match(data, patron) #<li><a href="http://www.newpct1.com/pelicula/x-men-dias-del-futuro-pasado/ts-screener/" title="Descargar XMen Dias Del Futuro gratis"><img src="http://www.newpct1.com/pictures/f/58066_x-men-dias-del-futuro--blurayrip-ac3-5.1.jpg" width="130" height="180" alt="Descargar XMen Dias Del Futuro gratis"><h2>XMen Dias Del Futuro </h2><span>BluRayRip AC3 5.1</span></a></li> patron = '<li><a href="([^"]+).*?' #url patron += 'title="([^"]+).*?' #titulo patron += '<img src="([^"]+)"[^>]+>.*?' #thumbnail patron += '<span>([^<]*)</span>' #calidad matches = re.compile(patron, re.DOTALL).findall(fichas) for scrapedurl, scrapedtitle, scrapedthumbnail, calidad in matches: url = scrapedurl title = scrapedtitle thumbnail = scrapedthumbnail action = "findvideos" extra = "" if "1.com/series" in url: action = "completo" extra = "serie" title = scrapertools.find_single_match(title, '([^-]+)') title = title.replace("Ver online", "", 1).replace("Descarga Serie HD", "", 1).replace("Ver en linea", "", 1).strip() #logger.info("[newpct1.py] titulo="+title) if len(title) > 3: url_i = 'http://www.newpct1.com/index.php?page=buscar&url=&letter=&q=%22' + title.replace( " ", "%20") + '%22' else: url_i = 'http://www.newpct1.com/index.php?page=buscar&url=&letter=&q=' + title if "1.com/series-hd" in url: extra = "serie-hd" url = url_i + '&categoryID=&categoryIDR=1469&calidad=' + calidad.replace( " ", "+") #DTV+720p+AC3+5.1 elif "1.com/series-vo" in url: extra = "serie-vo" url = url_i + '&categoryID=&categoryIDR=775&calidad=' + calidad.replace( " ", "+") #HDTV+720p+AC3+5.1 elif "1.com/series/" in url: extra = "serie-tv" url = url_i + '&categoryID=&categoryIDR=767&calidad=' + calidad.replace( " ", "+") url += '&idioma=&ordenar=Nombre&inon=Descendente' else: title = title.replace("Descargar", "", 1).strip() if title.endswith("gratis"): title = title[:-7] show = title if item.extra != "buscar-list": title = title + ' ' + calidad itemlist.append( Item(channel=__channel__, action=action, title=title, url=url, thumbnail=thumbnail, extra=extra, show=show)) if "pagination" in data: patron = '<ul class="pagination">(.*?)</ul>' paginacion = scrapertools.get_match(data, patron) if "Next" in paginacion: url_next_page = scrapertools.get_match( paginacion, '<a href="([^>]+)>Next</a>')[:-1].replace(" ", "%20") itemlist.append( Item(channel=__channel__, action="listado", title=">> Página siguiente", url=url_next_page, extra=item.extra)) #logger.info("[newpct1.py] listado items:" + str(len(itemlist))) return itemlist
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("streamondemand.streamplay get_video_url(page_url='%s')" % page_url) data = scrapertools.cache_page(page_url) jj_encode = scrapertools.find_single_match(data, "(\w+=~\[\];.*?\)\(\)\)\(\);)") jj_decode = None jj_patron = None reverse = False substring = False splice = False if jj_encode: jj_decode = jjdecode(jj_encode) if jj_decode: jj_patron = scrapertools.find_single_match(jj_decode, "/([^/]+)/") if not "(" in jj_patron: jj_patron = "(" + jj_patron if not ")" in jj_patron: jj_patron += ")" if "x72x65x76x65x72x73x65" in jj_decode: reverse = True if "x73x75x62x73x74x72x69x6Ex67" in jj_decode: substring = True if "x73x70x6Cx69x63x65" in jj_decode: splice = True matches = scrapertools.find_single_match( data, "<script type=[\"']text/javascript[\"']>(eval.*?)</script>") data = jsunpack.unpack(data).replace("\\", "") data = scrapertools.find_single_match(data.replace('"', "'"), "sources\s*=[^\[]*\[([^\]]+)\]") matches = scrapertools.find_multiple_matches(data, "[src|file]:'([^']+)'") video_urls = [] for video_url in matches: _hash = scrapertools.find_single_match(video_url, '\w{40,}') if splice: splice = int( scrapertools.find_single_match(jj_decode, "\((\d),\d\);")) if reverse: h = list(_hash) h.pop(-splice - 1) _hash = "".join(h) else: h = list(_hash) h.pop(splice) _hash = "".join(h) if substring: substring = int( scrapertools.find_single_match(jj_decode, "_\w+.\d...(\d)...;")) if reverse: _hash = _hash[:-substring] else: _hash = _hash[substring:] if reverse: video_url = re.sub(r'\w{40,}', _hash[::-1], video_url) filename = scrapertools.get_filename_from_url(video_url)[-4:] if video_url.startswith("rtmp"): rtmp, playpath = video_url.split("vod/", 1) video_url = "%s playpath=%s swfUrl=%splayer6/jwplayer.flash.swf pageUrl=%s" % ( rtmp + "vod/", playpath, host, page_url) filename = "RTMP" elif video_url.endswith(".m3u8"): video_url += "|User-Agent=" + headers[0][1] elif video_url.endswith("/v.mp4"): video_url_flv = re.sub(r'/v.mp4$', '/v.flv', video_url) video_urls.append([ ".flv [streamplay]", re.sub(r'%s' % jj_patron, r'\1', video_url_flv) ]) video_urls.append([ filename + " [streamplay]", re.sub(r'%s' % jj_patron, r'\1', video_url) ]) video_urls.sort(key=lambda x: x[0], reverse=True) for video_url in video_urls: logger.info("[streamplay.py] %s - %s" % (video_url[0], video_url[1])) return video_urls
def episodios(item): def load_episodios(html, item, itemlist, lang_title): patron = '((?:.*?<a href="[^"]+"[^>]+[^>]+>[^<]+</a>)+)' matches = re.compile(patron).findall(html) for data in matches: # Extrae las entradas scrapedtitle = data.split('<a ')[0] scrapedtitle = re.sub(r'<[^>]*>', '', scrapedtitle).strip() if scrapedtitle != 'Categorie': scrapedtitle = scrapedtitle.replace('×', 'x') scrapedtitle = scrapedtitle.replace('×', 'x') scrapedtitle = scrapedtitle.replace(';', '') itemlist.append( Item(channel=__channel__, action="findvideos", contentType="episode", title="[COLOR azure]%s[/COLOR]" % (scrapedtitle + " (" + lang_title + ")"), url=data, thumbnail=item.thumbnail, plot=item.plot, extra=item.extra, fulltitle=scrapedtitle + " (" + lang_title + ")" + ' - ' + item.show, show=item.show)) logger.info("[streamondemand-pureita.casacinema] episodios") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) data = scrapertools.decodeHtmlentities(data) data = scrapertools.get_match( data, '<p>(?:<strong>|)(.*?)<div id="disqus_thread">') lang_titles = [] starts = [] patron = r"<p><strong>.*?(?:ITA|\d+)" matches = re.compile(patron, re.IGNORECASE).finditer(data) for match in matches: season_title = match.group() if season_title != '': lang_titles.append('SUB ITA' if 'SUB' in season_title.upper() else 'ITA') starts.append(match.end()) i = 1 len_lang_titles = len(lang_titles) while i <= len_lang_titles: inizio = starts[i - 1] fine = starts[i] if i < len_lang_titles else -1 html = data[inizio:fine] lang_title = lang_titles[i - 1] load_episodios(html, item, itemlist, lang_title) i += 1 return itemlist
def findvideos(item): logger.info("pelisalacarta.yaske findvideos url=" + item.url) # Descarga la página data = scrapertools.cache_page(item.url, headers=HEADER) # Extrae las entradas ''' <tr bgcolor=""> <td height="32" align="center"><a class="btn btn-mini enlace_link" style="text-decoration:none;" rel="nofollow" target="_blank" title="Ver..." href="http://www.yaske.net/es/reproductor/pelicula/2141/44446/"><i class="icon-play"></i><b> Opcion 04</b></a></td> <td align="left"><img src="http://www.google.com/s2/favicons?domain=played.to"/>played</td> <td align="center"><img src="http://www.yaske.net/theme/01/data/images/flags/la_la.png" width="21">Lat.</td> <td align="center" class="center"><span title="" style="text-transform:capitalize;">hd real 720</span></td> <td align="center"><div class="star_rating" title="HD REAL 720 ( 5 de 5 )"> <ul class="star"><li class="curr" style="width: 100%;"></li></ul> </div> </td> <td align="center" class="center">2553</td> </tr> ''' patron = '<tr bgcolor=(.*?)</tr>' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] #n = 1 for tr in matches: logger.info("tr=" + tr) try: title = scrapertools.get_match(tr, '<b>([^<]+)</b>') server = scrapertools.get_match( tr, '"http\://www.google.com/s2/favicons\?domain\=([^"]+)"') # <td align="center"><img src="http://www.yaske.net/theme/01/data/images/flags/la_la.png" width="19">Lat.</td> idioma = scrapertools.get_match( tr, '<img src="http://www.yaske.[a-z]+/theme/01/data/images/flags/([a-z_]+).png"[^>]+>[^<]*<' ) subtitulos = scrapertools.get_match( tr, '<img src="http://www.yaske.[a-z]+/theme/01/data/images/flags/[^"]+"[^>]+>([^<]*)<' ) calidad = scrapertools.get_match( tr, '<td align="center" class="center"[^<]+<span title="[^"]*" style="text-transform.capitalize.">([^<]+)</span></td>' ) #<a [....] href="http://api.ysk.pe/noref/?u=< URL Vídeo >"> url = scrapertools.get_match(tr, '<a.*?href="([^"]+)"').split("=")[1] # Para extraer netutv se necesita en la actualidad pasar por varias páginas con lo que relentiza mucho la carga. # De momento mostrará "No hay nada que reproducir" ''' if "/netu/tv/" in url: import base64 ################################################### # Añadido 17-09-14 ################################################### try: data = scrapertools.cache_page(url,headers=getSetCookie(url1)) except: data = scrapertools.cache_page(url) ################################################### match_b64_1 = 'base64,([^"]+)"' b64_1 = scrapertools.get_match(data, match_b64_1) utf8_1 = base64.decodestring(b64_1) match_b64_inv = "='([^']+)';" b64_inv = scrapertools.get_match(utf8_1, match_b64_inv) b64_2 = b64_inv[::-1] utf8_2 = base64.decodestring(b64_2).replace("%","\\").decode('unicode-escape') id_video = scrapertools.get_match(utf8_2,'<input name="vid" id="text" value="([^"]+)">') url = "http://netu.tv/watch_video.php?v="+id_video ''' thumbnail = "" plot = "" title = title.replace(" ", "") if "es_es" in idioma: scrapedtitle = title + " en " + server.strip( ) + " [Español][" + calidad + "]" elif "la_la" in idioma: scrapedtitle = title + " en " + server.strip( ) + " [Latino][" + calidad + "]" elif "en_es" in idioma: scrapedtitle = title + " en " + server.strip( ) + " [Inglés SUB Español][" + calidad + "]" else: scrapedtitle = title + " en " + server.strip( ) + " [" + idioma + " / " + subtitulos + "][" + calidad + "]" scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedtitle = scrapedtitle.strip() scrapedurl = url scrapedthumbnail = thumbnail scrapedplot = plot itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, fulltitle=item.fulltitle, folder=False)) except: import traceback logger.info("Excepcion: " + traceback.format_exc()) return itemlist
def search(item, texto): logger.info() try: if config.get_setting("zampaseriesaccount") == True: login() if item.url == "": item.url = "http://vserie.com/search" texto = texto.replace(" ", "+") # Mete el referer en item.extra post = "s=" + texto data = scrapertools.cache_page(item.url, post=post) data = scrapertools.find_single_match( data, '<div id="resultados">(.*?)<div id="cargando">') ''' <div id="resultados"> <h1>Resultados de la Busqueda para skyfall (1)</h1> <div id="lista"> <ul> <li title="007 Skyfall" id="id-1"><a href="http://vserie.com/pelicula/2-007-skyfall"><img src="http://vserie.com/images/p_p2_s.png" alt=""></a></li> </ul> </div> <div id="cargando"><i class="icon-spinner icon-spin"></i>Cargando más resultados</div> </div> ''' patron = '<li title="([^"]+)"[^<]+<a href="([^"]+)"><img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedtitle, scrapedurl, scrapedthumbnail in matches: if "/pelicula/" in scrapedurl: title = scrapedtitle url = scrapedurl thumbnail = scrapedthumbnail plot = "" itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, show=title)) else: title = scrapedtitle url = scrapedurl thumbnail = scrapedthumbnail plot = "" itemlist.append( Item(channel=item.channel, action="episodios", title=title, url=url, thumbnail=thumbnail, plot=plot, show=title)) return itemlist # Se captura la excepción, para no interrumpir al buscador global si un canal falla except: import sys for line in sys.exc_info(): logger.error("%s" % line) return []
def lista_2(item): logger.info("[cinetemagay.py] lista") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) #logger.info(data) # Extrae las entradas (carpetas) patronvideos = 'img.*?src="([^"]+)".*?' patronvideos += "<link rel='replies' type='text/html' href='([^']+)' title='([^']+)'/><link" matches = re.compile(patronvideos, re.DOTALL).findall(data) for match in matches: scrapedtitle = "ver película" scrapedtitle = scrapedtitle.replace("'", "'") scrapedtitle = scrapedtitle.replace("@", "a") scrapedtitle = scrapedtitle.replace(""", "'") scrapedtitle = scrapedtitle.replace("<h1>", "") scrapedtitle = scrapedtitle.replace("</h1>", "") scrapedurl = match[1] scrapedthumbnail = match[0] imagen = "" scrapedplot = match[1] tipo = match[0] if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") scrapedplot = "<" + scrapedplot scrapedplot = scrapedplot.replace(">", ">") scrapedplot = scrapedplot.replace("<", "<") scrapedplot = scrapedplot.replace("</div>", "\n") scrapedplot = scrapedplot.replace("<br />", "\n") scrapedplot = scrapedplot.replace("&", "") scrapedplot = scrapedplot.replace("nbsp;", "") scrapedplot = strip_tags(scrapedplot) itemlist.append( Item(channel=__channel__, action="detail", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) variable = item.url.split("index=")[1] variable = int(variable) variable += 100 variable = str(variable) variable_url = item.url.split("index=")[0] url_nueva = variable_url + "index=" + variable itemlist.append( Item(channel=__channel__, action="lista", title="Ir a la página siguiente (desde " + variable + ")", url=url_nueva, thumbnail="", plot="Pasar a la página siguiente (en grupos de 100)\n\n" + url_nueva)) return itemlist
def foro(item): logger.info("pelisalacarta.channels.mocosoftx foro") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url, headers=MAIN_HEADERS) # Extrae los foros y subforos patron = '<h4><a href="([^"]+)"[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle in matches: scrapedtitle = unicode(scrapedtitle, "iso-8859-1", errors="replace").encode("utf-8") title = ">> Foro " + scrapedtitle url = urlparse.urljoin(item.url, scrapedurl) #http://mocosoftx.com/foro/fotos-hentai/?PHPSESSID=nflddqf9nvbm2dd92 if "PHPSESSID" in url: url = scrapertools.get_match(url, "(.*?)\?PHPSESSID=") thumbnail = "" plot = "" itemlist.append( Item(channel=__channel__, title=title, action="foro", url=url, plot=plot, thumbnail=thumbnail, folder=True)) # Extrae los hilos individuales patron = '<td class="icon2 windowbgb">[^<]+' patron += '<img src="([^"]+)"[^<]+' patron += '</td>[^<]+' patron += '<td class="subject windowbgb2">[^<]+' patron += '<div >[^<]+' patron += '<span id="msg_\d+"><a href="([^"]+)">([^>]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedthumbnail, scrapedurl, scrapedtitle in matches: title = scrapedtitle url = urlparse.urljoin(item.url, scrapedurl) if "PHPSESSID" in url: url = scrapertools.get_match(url, "(.*?)\?PHPSESSID=") thumbnail = scrapedthumbnail plot = "" itemlist.append( Item(channel=__channel__, title=title, action="findvideos", url=url, plot=plot, thumbnail=thumbnail, folder=True)) # Extrae la marca de siguiente página #<a class="navPages" href="http://mocosoftx.com/foro/peliculas-xxx-online-(completas)/20/?PHPSESSID=rpejdrj1trngh0sjdp08ds0ef7">2</a> patronvideos = '<strong>\d+</strong[^<]+<a class="navPages" href="([^"]+)">' matches = re.compile(patronvideos, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) > 0: scrapedtitle = ">> Página siguiente" scrapedurl = urlparse.urljoin(item.url, matches[0]) if "PHPSESSID" in scrapedurl: scrapedurl = scrapertools.get_match(scrapedurl, "(.*?)\?PHPSESSID=") scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=__channel__, title=scrapedtitle, action="foro", url=scrapedurl, plot=scrapedplot, thumbnail=scrapedthumbnail, folder=True)) return itemlist
def get_temporadas(item): logger.info("[pepecine.py] get_temporadas") itemlist = [] infoLabels = {} data = re.sub(r"\n|\r|\t|\s{2}|(<!--.*?-->)", "", scrapertools.cache_page(item.url)) patron = 'vars.title =(.*?)};' try: data_dict = jsontools.load_json( scrapertools.get_match(data, patron) + '}') except: return itemlist # Devolvemos lista vacia if item.extra == "serie_add": item.extra = str(data_dict['tmdb_id']) item.url = str(data_dict["link"]) infoLabels['titleraw'] = data_dict["title"] infoLabels['tvshowtitle'] = data_dict["title"] infoLabels['title_id'] = data_dict['id'] item.infoLabels = infoLabels itemlist = get_episodios(item) else: infoLabels = item.infoLabels if data_dict.has_key("actor"): cast = [] rol = [] for actor in data_dict["actor"]: cast.append(actor['name']) rol.append(actor['pivot']['char_name']) infoLabels['cast'] = cast infoLabels['castandrole'] = zip(cast, rol) if data_dict.has_key("writer"): writers_list = [] for writer in data_dict["writer"]: writers_list.append(writer['name']) infoLabels['writer'] = ", ".join(writers_list) if data_dict.has_key("director"): director_list = [] for director in data_dict["director"]: director_list.append(director['name']) infoLabels['director'] = ", ".join(director_list) if len(data_dict["season"]) == 1: # Si solo hay una temporada ... item.extra = str(data_dict['tmdb_id']) item.url = str(data_dict["link"]) item.infoLabels = infoLabels itemlist = get_episodios(item) else: #... o si hay mas de una temporada y queremos el listado por temporada... item.extra = str(data_dict['tmdb_id']) data_dict["season"].sort( key=lambda x: (x['number'])) # ordenamos por numero de temporada for season in data_dict["season"]: url = filter( lambda l: l["season"] == season['number'], data_dict["link"]) #filtramos enlaces por temporada if url: if season['overview']: infoLabels['plot'] = season['overview'] if season['number']: infoLabels['season'] = season['number'] if season["poster"]: item.thumbnail = re.compile("/w\d{3}/").sub( "/w500/", season["poster"]) if season["release_date"]: infoLabels['premiered'] = season['release_date'] item.infoLabels = infoLabels title = item.title + ' ' + season["title"].lower().replace( 'season', 'temporada').capitalize() itemlist.append( Item(channel=item.channel, action="get_episodios", title=title, url=str(url), extra=item.extra, fanart=item.fanart, text_color="0xFFFFCE9C", thumbnail=item.thumbnail, viewmode="movie_with_plot", infoLabels=item.infoLabels)) if config.get_library_support() and itemlist: url = urlparse.urljoin(__url_base__, "series-online/" + str(data_dict['id'])) itemlist.append( Item(channel=item.channel, title="Añadir esta serie a la biblioteca", url=url, action="add_serie_to_library", extra='episodios###serie_add', show=data_dict["title"], text_color="0xFFe5ffcc", thumbnail= 'https://d5.usercdn.com/dl/i/02360/a99fzwbqdaen.png')) return itemlist
def peliculas(item, data=""): logger.info() if config.get_setting("zampaseriesaccount") == True: login() # Descarga la pagina if data == "": if item.extra == "": data = scrapertools.cache_page(item.url) else: data = scrapertools.cache_page(item.url, post=item.extra) # logger.info("data="+data) json_object = jsontools.load(data) # {"resultado":{"40":"<li id=\"id-40\" title=\"The 100\"><a href=\"http:\/\/vserie.com\/serie\/175-the-100\"><img src=\"http:\/\/vserie.com\/images\/s_s175_s.png\" alt=\"The 100\"><\/a><\/li>","41":"<li id=\"id-41\" title=\"Teen Wolf\"><a href=\"http:\/\/vserie.com\/serie\/25-teen-wolf\"><img src=\"http:\/\/vserie.com\/images\/s_s25_s.png\" alt=\"Teen Wolf\"><\/a><\/li>","42":"<li id=\"id-42\" title=\"Surviving Jack\"><a href=\"http:\/\/vserie.com\/serie\/178-surviving-jack\"><img src=\"http:\/\/vserie.com\/images\/s_s178_s.png\" alt=\"Surviving Jack\"><\/a><\/li>","43":"<li id=\"id-43\" title=\"Supernatural\"><a href=\"http:\/\/vserie.com\/serie\/68-supernatural\"><img src=\"http:\/\/vserie.com\/images\/s_s68_s.png\" alt=\"Supernatural\"><\/a><\/li>","44":"<li id=\"id-44\" title=\"Suits\"><a href=\"http:\/\/vserie.com\/serie\/131-suits\"><img src=\"http:\/\/vserie.com\/images\/s_s131_s.png\" alt=\"Suits\"><\/a><\/li>","45":"<li id=\"id-45\" title=\"Star-Crossed\"><a href=\"http:\/\/vserie.com\/serie\/154-star-crossed\"><img src=\"http:\/\/vserie.com\/images\/s_s154_s.png\" alt=\"Star-Crossed\"><\/a><\/li>","46":"<li id=\"id-46\" title=\"Sons of Anarchy\"><a href=\"http:\/\/vserie.com\/serie\/46-sons-of-anarchy\"><img src=\"http:\/\/vserie.com\/images\/s_s46_s.png\" alt=\"Sons of Anarchy\"><\/a><\/li>","47":"<li id=\"id-47\" title=\"Sleepy Hollow\"><a href=\"http:\/\/vserie.com\/serie\/52-sleepy-hollow\"><img src=\"http:\/\/vserie.com\/images\/s_s52_s.png\" alt=\"Sleepy Hollow\"><\/a><\/li>","48":"<li id=\"id-48\" title=\"Skins\"><a href=\"http:\/\/vserie.com\/serie\/36-skins\"><img src=\"http:\/\/vserie.com\/images\/s_s36_s.png\" alt=\"Skins\"><\/a><\/li>","49":"<li id=\"id-49\" title=\"Sirens\"><a href=\"http:\/\/vserie.com\/serie\/172-sirens\"><img src=\"http:\/\/vserie.com\/images\/s_s172_s.png\" alt=\"Sirens\"><\/a><\/li>","50":"<li id=\"id-50\" title=\"Sin identidad\"><a href=\"http:\/\/vserie.com\/serie\/199-sin-identidad\"><img src=\"http:\/\/vserie.com\/images\/s_s199_s.png\" alt=\"Sin identidad\"><\/a><\/li>","51":"<li id=\"id-51\" title=\"Silicon Valley\"><a href=\"http:\/\/vserie.com\/serie\/179-silicon-valley\"><img src=\"http:\/\/vserie.com\/images\/s_s179_s.png\" alt=\"Silicon Valley\"><\/a><\/li>","52":"<li id=\"id-52\" title=\"Siberia\"><a href=\"http:\/\/vserie.com\/serie\/39-siberia\"><img src=\"http:\/\/vserie.com\/images\/s_s39_s.png\" alt=\"Siberia\"><\/a><\/li>","53":"<li id=\"id-53\" title=\"Sherlock\"><a href=\"http:\/\/vserie.com\/serie\/103-sherlock\"><img src=\"http:\/\/vserie.com\/images\/s_s103_s.png\" alt=\"Sherlock\"><\/a><\/li>","54":"<li id=\"id-54\" title=\"Shameless\"><a href=\"http:\/\/vserie.com\/serie\/142-shameless\"><img src=\"http:\/\/vserie.com\/images\/s_s142_s.png\" alt=\"Shameless\"><\/a><\/li>","55":"<li id=\"id-55\" title=\"Salem\"><a href=\"http:\/\/vserie.com\/serie\/186-salem\"><img src=\"http:\/\/vserie.com\/images\/s_s186_s.png\" alt=\"Salem\"><\/a><\/li>","56":"<li id=\"id-56\" title=\"Rosemary's Baby (La semilla del diablo)\"><a href=\"http:\/\/vserie.com\/serie\/198-rosemary-039-s-baby-la-semilla-del-diablo\"><img src=\"http:\/\/vserie.com\/images\/s_s198_s.png\" alt=\"Rosemary's Baby (La semilla del diablo)\"><\/a><\/li>","57":"<li id=\"id-57\" title=\"Ripper Street\"><a href=\"http:\/\/vserie.com\/serie\/100-ripper-street\"><img src=\"http:\/\/vserie.com\/images\/s_s100_s.png\" alt=\"Ripper Street\"><\/a><\/li>","58":"<li id=\"id-58\" title=\"Revolution\"><a href=\"http:\/\/vserie.com\/serie\/62-revolution\"><img src=\"http:\/\/vserie.com\/images\/s_s62_s.png\" alt=\"Revolution\"><\/a><\/li>","59":"<li id=\"id-59\" title=\"Revenge\"><a href=\"http:\/\/vserie.com\/serie\/67-revenge\"><img src=\"http:\/\/vserie.com\/images\/s_s67_s.png\" alt=\"Revenge\"><\/a><\/li>","60":"<li id=\"id-60\" title=\"Resurrection\"><a href=\"http:\/\/vserie.com\/serie\/167-resurrection\"><img src=\"http:\/\/vserie.com\/images\/s_s167_s.png\" alt=\"Resurrection\"><\/a><\/li>","61":"<li id=\"id-61\" title=\"Remedy\"><a href=\"http:\/\/vserie.com\/serie\/161-remedy\"><img src=\"http:\/\/vserie.com\/images\/s_s161_s.png\" alt=\"Remedy\"><\/a><\/li>","62":"<li id=\"id-62\" title=\"Reign\"><a href=\"http:\/\/vserie.com\/serie\/92-reign\"><img src=\"http:\/\/vserie.com\/images\/s_s92_s.png\" alt=\"Reign\"><\/a><\/li>","63":"<li id=\"id-63\" title=\"Ray Donovan\"><a href=\"http:\/\/vserie.com\/serie\/44-ray-donovan\"><img src=\"http:\/\/vserie.com\/images\/s_s44_s.png\" alt=\"Ray Donovan\"><\/a><\/li>","64":"<li id=\"id-64\" title=\"Ravenswood\"><a href=\"http:\/\/vserie.com\/serie\/93-ravenswood\"><img src=\"http:\/\/vserie.com\/images\/s_s93_s.png\" alt=\"Ravenswood\"><\/a><\/li>","65":"<li id=\"id-65\" title=\"Psych\"><a href=\"http:\/\/vserie.com\/serie\/203-psych\"><img src=\"http:\/\/vserie.com\/images\/s_s203_s.png\" alt=\"Psych\"><\/a><\/li>","66":"<li id=\"id-66\" title=\"Pretty Little Liars (Pequeñas mentirosas)\"><a href=\"http:\/\/vserie.com\/serie\/38-pretty-little-liars-peque-ntilde-as-mentirosas\"><img src=\"http:\/\/vserie.com\/images\/s_s38_s.png\" alt=\"Pretty Little Liars (Pequeñas mentirosas)\"><\/a><\/li>","67":"<li id=\"id-67\" title=\"Power\"><a href=\"http:\/\/vserie.com\/serie\/205-power\"><img src=\"http:\/\/vserie.com\/images\/s_s205_s.png\" alt=\"Power\"><\/a><\/li>","68":"<li id=\"id-68\" title=\"Person of Interest\"><a href=\"http:\/\/vserie.com\/serie\/59-person-of-interest\"><img src=\"http:\/\/vserie.com\/images\/s_s59_s.png\" alt=\"Person of Interest\"><\/a><\/li>","69":"<li id=\"id-69\" title=\"Perdidos (Lost)\"><a href=\"http:\/\/vserie.com\/serie\/112-perdidos-lost\"><img src=\"http:\/\/vserie.com\/images\/s_s112_s.png\" alt=\"Perdidos (Lost)\"><\/a><\/li>","70":"<li id=\"id-70\" title=\"Perception\"><a href=\"http:\/\/vserie.com\/serie\/164-perception\"><img src=\"http:\/\/vserie.com\/images\/s_s164_s.png\" alt=\"Perception\"><\/a><\/li>","71":"<li id=\"id-71\" title=\"Penny Dreadful\"><a href=\"http:\/\/vserie.com\/serie\/195-penny-dreadful\"><img src=\"http:\/\/vserie.com\/images\/s_s195_s.png\" alt=\"Penny Dreadful\"><\/a><\/li>","72":"<li id=\"id-72\" title=\"Peaky Blinders\"><a href=\"http:\/\/vserie.com\/serie\/97-peaky-blinders\"><img src=\"http:\/\/vserie.com\/images\/s_s97_s.png\" alt=\"Peaky Blinders\"><\/a><\/li>","73":"<li id=\"id-73\" title=\"Orphan Black\"><a href=\"http:\/\/vserie.com\/serie\/158-orphan-black\"><img src=\"http:\/\/vserie.com\/images\/s_s158_s.png\" alt=\"Orphan Black\"><\/a><\/li>","74":"<li id=\"id-74\" title=\"Orange Is the New Black\"><a href=\"http:\/\/vserie.com\/serie\/13-orange-is-the-new-black\"><img src=\"http:\/\/vserie.com\/images\/s_s13_s.png\" alt=\"Orange Is the New Black\"><\/a><\/li>"} rows = json_object["resultado"] data = "" for row in rows: # logger.info("rows[row]="+rows[row]) data = data + rows[row] logger.info("data=" + repr(data)) # Extrae las entradas (carpetas) patron = 'title="([^"]+)"[^<]+<a href="(http.//vserie.com/pelicula/[^"]+)"><img src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedtitle, scrapedurl, scrapedthumbnail in matches: title = scrapertools.htmlclean(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) plot = "" logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, show=title)) if not "/paginador/" in item.url: itemlist.append( Item(channel=item.channel, action="peliculas", title=">> Página siguiente", url="http://vserie.com/api/paginador/", extra="tipo=peliculas&last=40", viewmode="movie")) else: actual = scrapertools.find_single_match(item.extra, "last\=(\d+)") siguiente = str(int(actual) + 35) itemlist.append( Item(channel=item.channel, action="peliculas", title=">> Página siguiente", url="http://vserie.com/api/paginador/", extra="tipo=peliculas&last=" + siguiente, viewmode="movie")) return itemlist
def peliculas(item): logger.info("streamondemand.italiaserie peliculas") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) # Extrae las entradas (carpetas) patron = '<div class="post-thumb">\s*<a href="([^"]+)" title="([^"]+)">\s*<img src="([^"]+)"[^>]+>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedthumbnail in matches: html = scrapertools.cache_page(scrapedurl) start = html.find("<div class=\"entry-content\">") end = html.find("</p>", start) scrapedplot = html[start:end] scrapedplot = re.sub(r'<[^>]*>', '', scrapedplot) scrapedplot = scrapertools.decodeHtmlentities(scrapedplot) scrapedtitle = scrapertools.decodeHtmlentities(scrapedtitle) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") tmdbtitle1 = scrapedtitle.split("[")[0] tmdbtitle = tmdbtitle1.split("(")[0] try: plot, fanart, poster, extrameta = info_tv(tmdbtitle) itemlist.append( Item(channel=__channel__, thumbnail=poster, fanart=fanart if fanart != "" else poster, extrameta=extrameta, plot=str(plot), action="findvid_serie", title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, fulltitle=scrapedtitle, show=scrapedtitle, folder=True)) except: itemlist.append( Item(channel=__channel__, action="findvideos", fulltitle=scrapedtitle, show=scrapedtitle, title="[COLOR azure]" + scrapedtitle + "[/COLOR]", url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True)) # Extrae el paginador patronvideos = '<a class="next page-numbers" href="(.*?)">Next' matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) > 0: scrapedurl = urlparse.urljoin(item.url, matches[0]) itemlist.append( Item(channel=__channel__, action="HomePage", title="[COLOR yellow]Torna Home[/COLOR]", folder=True)), itemlist.append( Item( channel=__channel__, action="peliculas", title="[COLOR orange]Successivo >>[/COLOR]", url=scrapedurl, thumbnail= "http://2.bp.blogspot.com/-fE9tzwmjaeQ/UcM2apxDtjI/AAAAAAAAeeg/WKSGM2TADLM/s1600/pager+old.png", folder=True)) return itemlist
def get_only_episodio(item): logger.info("[pepecine.py] get_only_episodio") itemlist = [] plot = {} data = re.sub(r"\n|\r|\t|\s{2}|(<!--.*?-->)", "", scrapertools.cache_page(item.url)) patron = 'vars.title =(.*?)};' try: data_dict = jsontools.load_json( scrapertools.get_match(data, patron) + '}') except: return itemlist # Devolvemos lista vacia try: from core.tmdb import Tmdb oTmdb = Tmdb(id_Tmdb=data_dict['tmdb_id'], tipo="tv") except: pass infoLabels = item.infoLabels cast = [] rol = [] for actor in data_dict["actor"]: cast.append(actor['name']) rol.append(actor['pivot']['char_name']) writers_list = [] for writer in data_dict["writer"]: writers_list.append(writer['name']) director_list = [] for director in data_dict["director"]: director_list.append(director['name']) infoLabels['cast'] = cast infoLabels['castandrole'] = zip(cast, rol) infoLabels['writer'] = ", ".join(writers_list) infoLabels['director'] = ", ".join(director_list) infoLabels['season'], infoLabels['episode'] = item.extra.split('x') try: # añadimos sinopsis e imagenes del capitulo datos_tmdb = oTmdb.get_episodio(temporada=infoLabels['season'], capitulo=infoLabels['episode']) if datos_tmdb["episodio_sinopsis"] != "": infoLabels['plot'] = datos_tmdb["episodio_sinopsis"] if datos_tmdb["episodio_imagen"] != "": item.thumbnail = datos_tmdb["episodio_imagen"] #if datos_tmdb["episodio_titulo"] !="": title = title + " [COLOR 0xFFFFE6CC]" + datos_tmdb["episodio_titulo"].replace('\t','') + "[/COLOR]" except: pass def cap(l): try: temporada_link = int(l["season"]) capitulo_link = int(l['episode']) except: return False return True if temporada_link == int( infoLabels['season']) and capitulo_link == int( infoLabels['episode']) else False item.url = str(filter(cap, data_dict["link"])) #filtramos enlaces por capitulo item.infoLabels = infoLabels item.extra = str(data_dict['tmdb_id']) return findvideos(item)
def generos(item): tgenero = { "comedia": "https://s32.postimg.org/q7g2qs90l/comedia.png", "suspenso": "https://s31.postimg.org/kb629gscb/suspenso.png", "drama": "https://s32.postimg.org/e6z83sqzp/drama.png", "accion": "https://s32.postimg.org/4hp7gwh9x/accion.png", "aventura": "https://s32.postimg.org/whwh56is5/aventura.png", "romance": "https://s31.postimg.org/y7vai8dln/romance.png", "animacion e infantil": "https://s32.postimg.org/rbo1kypj9/animacion.png", "ciencia ficcion": "https://s32.postimg.org/6hp3tsxsl/ciencia_ficcion.png", "terror": "https://s32.postimg.org/ca25xg0ed/terror.png", "anime": 'https://s31.postimg.org/lppob54d7/anime.png', "documentales": "https://s32.postimg.org/7opmvc5ut/documental.png", "intriga": "https://s32.postimg.org/xc2ovcqfp/intriga.png", "musical": "https://s31.postimg.org/7i32lca7f/musical.png", "western": "https://s31.postimg.org/nsksyt3hn/western.png", "fantasia": "https://s32.postimg.org/pklrf01id/fantasia.png", "asiaticas": "https://s32.postimg.org/ijqp3mt85/asiatica.png", "bélico (guerra)": "https://s32.postimg.org/kjbko3xhx/belica.png", "deporte": "https://s31.postimg.org/pdc8etc0r/deporte.png", "adolescente": "https://s31.postimg.org/xkz086q0r/adolescente.png", "artes marciales": "https://s32.postimg.org/5e80taodh/artes_marciales.png", "cine negro": "https://s32.postimg.org/b0882kt7p/cine_negro.png", "eroticas +18": "https://s31.postimg.org/6kcxutv3v/erotica.png", "hindu": "https://s31.postimg.org/495qn1i63/hindu.png", "religiosas": "https://s31.postimg.org/5tgjedlwb/religiosa.png", "vampiros": "https://s32.postimg.org/wt6f483j9/vampiros.png", "zombies": "https://s32.postimg.org/atd2jfw6t/zombies.png" } logger.info("pelisalacarta.channels.locopelis episodios") itemlist = [] data = scrapertools.cache_page(item.url) patron = '<li><a title.*?href="http:\/\/www.locopelis.com\/categoria\/([^"]+)">([^<]+)<\/a><\/li>.*?' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: url = urlparse.urljoin( item.url, 'http://www.locopelis.com/categoria/' + scrapedurl) title = scrapedtitle.decode('cp1252') title = title.encode('utf-8') if title.lower() in tgenero: thumbnail = tgenero[title.lower()] fanart = tgenero[title.lower()] else: thumbnail = '' fanart = '' plot = '' if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "])") itemlist.append( Item(channel=item.channel, action="todas", title=title.lower(), fulltitle=item.fulltitle, url=url, thumbnail=thumbnail, plot=plot, fanart=fanart)) return itemlist
def listado(item): #import json logger.info("[pepecine.py] listado") itemlist = [] try: data_dict = jsontools.load_json(scrapertools.cache_page(item.url)) except: return itemlist # Devolvemos lista vacia offset = scrapertools.get_match(item.url, 'offset=(\d*)') limit = scrapertools.get_match(item.url, 'limit=(\d*)') for i in data_dict["result"]: infoLabels = {} idioma = '' if item.extra == "movie": action = "get_movie" title = i["title"] + ' (' + i['year'] + ')' url = urlparse.urljoin( __url_base__, "peliculas-online/" + str(i["id"])) #+"-"+i["title"]).lower().replace(" ","-"))) elif item.extra == "series": action = "get_temporadas" title = i["title"] infoLabels['tvshowtitle'] = i["title"] url = urlparse.urljoin( __url_base__, "series-online/" + str(i["id"])) #+"-"+i["title"]).lower().replace(" ","-"))) else: #item.extra=="series_novedades": action = "get_only_episodio" infoLabels['season'] = i['season'] infoLabels['episode'] = i['episode'].zfill(2) item.extra = infoLabels["season"] + "x" + infoLabels["episode"] infoLabels['tvshowtitle'] = i["title"] flag = scrapertools.find_single_match(i["label"], '(\s*\<img src=.*\>)') idioma = i["label"].replace(flag, "") title = i["title"] + ' ' + item.extra + ' (' + idioma + ')' url = urlparse.urljoin( __url_base__, "series-online/" + str(i["id"])) #+"-"+i["title"]).lower().replace(" ","-"))) if i.has_key("poster") and i["poster"]: thumbnail = re.compile("/w\d{3}/").sub("/w500/", i["poster"]) else: thumbnail = item.thumbnail if i.has_key("background") and i["background"]: fanart = i["background"] else: fanart = item.fanart # Rellenamos el diccionario de infoLabels infoLabels['title_id'] = i[ 'id'] # title_id: identificador de la pelicula/serie en pepecine.com infoLabels['titleraw'] = i[ "title"] # titleraw: titulo de la pelicula/serie sin formato if i['genre']: infoLabels['genre'] = i['genre'] if i['year']: infoLabels['year'] = i['year'] if i['tagline']: infoLabels['plotoutline'] = i['tagline'] if i['plot']: infoLabels['plot'] = i['plot'] else: infoLabels['plot'] = "" if i['runtime']: infoLabels['duration'] = int(i['runtime']) * 60 if i['imdb_rating']: infoLabels['rating'] = i['imdb_rating'] elif i['tmdb_rating']: infoLabels['rating'] = i['tmdb_rating'] newItem = Item(channel=item.channel, action=action, title=title, url=url, extra=item.extra, fanart=fanart, thumbnail=thumbnail, viewmode="movie_with_plot", language=idioma, text_color="0xFFFFCE9C", infoLabels=infoLabels) newItem.year = i['year'] newItem.contentTitle = i['title'] if 'season' in infoLabels and infoLabels['season']: newItem.contentSeason = infoLabels['season'] if 'episode' in infoLabels and infoLabels['episode']: newItem.contentEpisodeNumber = infoLabels['episode'] itemlist.append(newItem) # Paginacion if int(data_dict["total_results"]) == int(limit): url = item.url.replace('offset=' + offset, 'offset=' + str(int(offset) + int(limit))) itemlist.append( Item(channel=item.channel, action="listado", text_color="0xFF994D00", title=">> Pagina siguiente >>", thumbnail=item.thumbnail, url=url, extra=item.extra, fanart=fanart_host)) return itemlist
def episodios(item): def load_episodios(html, item, itemlist, lang_title): for data in scrapertools.decodeHtmlentities(html).split('<br />'): # Extrae las entradas end = data.find('<a ') if end > 0: scrapedtitle = re.sub(r'<[^>]*>', '', data[:end]).strip() else: scrapedtitle = '' title = scrapertools.find_single_match(scrapedtitle, '\d+[^\d]+\d+') if title == '': title = scrapedtitle if title != '': itemlist.append( Item(channel=__channel__, action="findvid_serie", title=title + " (" + lang_title + ")", url=item.url, thumbnail=item.thumbnail, extra=data, fulltitle=item.fulltitle, show=item.show)) logger.info("streamondemand.streamblog episodios") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) lang_titles = [] starts = [] patron = r"STAGIONE.*?ITA" matches = re.compile(patron, re.IGNORECASE).finditer(data) for match in matches: season_title = match.group() if season_title != '': lang_titles.append('SUB ITA' if 'SUB' in season_title.upper() else 'ITA') starts.append(match.end()) i = 1 len_lang_titles = len(lang_titles) while i <= len_lang_titles: inizio = starts[i - 1] fine = starts[i] if i < len_lang_titles else -1 html = data[inizio:fine] lang_title = lang_titles[i - 1] load_episodios(html, item, itemlist, lang_title) i += 1 if config.get_library_support() and len(itemlist) != 0: itemlist.append( Item(channel=__channel__, title=item.title, url=item.url, action="add_serie_to_library", extra="episodios", show=item.show)) itemlist.append( Item(channel=item.channel, title="Scarica tutti gli episodi della serie", url=item.url, action="download_all_episodes", extra="episodios", show=item.show)) return itemlist
def letras(item): thumbletras = { '0-9': 'https://s32.postimg.org/drojt686d/image.png', '0 - 9': 'https://s32.postimg.org/drojt686d/image.png', '#': 'https://s32.postimg.org/drojt686d/image.png', 'a': 'https://s32.postimg.org/llp5ekfz9/image.png', 'b': 'https://s32.postimg.org/y1qgm1yp1/image.png', 'c': 'https://s32.postimg.org/vlon87gmd/image.png', 'd': 'https://s32.postimg.org/3zlvnix9h/image.png', 'e': 'https://s32.postimg.org/bgv32qmsl/image.png', 'f': 'https://s32.postimg.org/y6u7vq605/image.png', 'g': 'https://s32.postimg.org/9237ib6jp/image.png', 'h': 'https://s32.postimg.org/812yt6pk5/image.png', 'i': 'https://s32.postimg.org/6nbbxvqat/image.png', 'j': 'https://s32.postimg.org/axpztgvdx/image.png', 'k': 'https://s32.postimg.org/976yrzdut/image.png', 'l': 'https://s32.postimg.org/fmal2e9yd/image.png', 'm': 'https://s32.postimg.org/m19lz2go5/image.png', 'n': 'https://s32.postimg.org/b2ycgvs2t/image.png', 'o': 'https://s32.postimg.org/c6igsucpx/image.png', 'p': 'https://s32.postimg.org/jnro82291/image.png', 'q': 'https://s32.postimg.org/ve5lpfv1h/image.png', 'r': 'https://s32.postimg.org/nmovqvqw5/image.png', 's': 'https://s32.postimg.org/zd2t89jol/image.png', 't': 'https://s32.postimg.org/wk9lo8jc5/image.png', 'u': 'https://s32.postimg.org/w8s5bh2w5/image.png', 'v': 'https://s32.postimg.org/e7dlrey91/image.png', 'w': 'https://s32.postimg.org/fnp49k15x/image.png', 'x': 'https://s32.postimg.org/dkep1w1d1/image.png', 'y': 'https://s32.postimg.org/um7j3zg85/image.png', 'z': 'https://s32.postimg.org/jb4vfm9d1/image.png' } logger.info("pelisalacarta.channels.locopelis letras") itemlist = [] data = scrapertools.cache_page(item.url) realplot = '' if item.extra == 'letras': patron = '<li><a href="([^"]+)" title="Letra.*?">([^<]+)</a></li>' else: patron = '<li><a.*?href="([^"]+)" title="([^v]+)' + item.extra + '.*?">' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: url = urlparse.urljoin(item.url, scrapedurl) if item.extra != 'letras': data = scrapertools.cache_page(scrapedurl) thumbnail = scrapertools.get_match( data, '<link rel="image_src" href="([^"]+)"/>') realplot = scrapertools.find_single_match( data, '<p itemprop="articleBody">([^<]+)<\/p> ') plot = scrapertools.remove_htmltags(realplot) action = 'temporadas' else: if scrapedtitle.lower() in thumbletras: thumbnail = thumbletras[scrapedtitle.lower()] else: thumbnail = '' plot = '' action = 'todas' title = scrapedtitle.replace(': ', '') title = scrapertools.decodeHtmlentities(title) if item.extra == 'letras': fanart = 'https://s31.postimg.org/c3bm9cnl7/a_z.png' elif item.extra == 'Vista': fanart = 'https://s32.postimg.org/466gt3ipx/vistas.png' else: fanart = '' if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "])") itemlist.append( Item(channel=item.channel, action=action, title=title, url=url, thumbnail=thumbnail, plot=plot, fanart=fanart)) return itemlist
def findvideos(item): logger.info("pelisalacarta.seriesblanco findvideos") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) data = re.sub(r"\n|\r|\t|\s{2}| |<Br>|<BR>|<br>|<br/>|<br />|-\s", "", data) data = re.sub(r"<!--.*?-->", "", data) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") data = re.sub(r"<center>|</center>|</a>", "", data) data = re.sub(r"<td class='tam(\d+)'></td></tr>", r"<td class='tam\1'>SD</td></tr>", data) ''' <tr> <td class='tam*N*'><a href='(*URL*)'*ATTR*> <img src='*PATH*(*IDIOMA*).*EXT*'*ATTR*></td> <td class='tam*N*'>(*FECHA*)</td> <td class='tam*N*'><a href='*URL*'*ATTR*> <img src='*PATH*(*SERVIDOR*).*EXT*'*ATTR*></td> <td class='tam*N*'><a href='*URL*'*ATTR*>(*UPLOADER*)</td> <td class='tam*N*'>(*SUB|CALIDAD*)</td> </tr> ''' online = scrapertools.get_match( data, "<thead><tbody>(.*?)<table class='zebra'>") download = scrapertools.get_match( data, "<caption class='tam16'>Descarga.*?<thead><tbody>(.*?)</tbody></table>" ) online = re.sub( r"<tr>" + \ "<td class='tam12'><a href='([^']+)'[^>]+>" + \ "<img src='/banderas/([^\.]+)\.[^>]+></td>" + \ "<td class='tam12'>([^<]+)</td>" + \ "<td class='tam12'><[^>]+>" + \ "<img src='/servidores/([^\.]+)\.[^>]+></td>" + \ "<td class='tam12'><[^>]+>([^<]+)</td>" + \ "<td class='tam12'>([^<]+)</td>" + \ "</tr>", r"<patron>\1;\2;\3;\4;\5;\6;Ver</patron>", online ) download = re.sub( r"<tr>" + \ "<td class='tam12'><a href='([^']+)'[^>]+>" + \ "<img src='/banderas/([^\.]+)\.[^>]+></td>" + \ "<td class='tam12'>([^<]+)</td>" + \ "<td class='tam12'><[^>]+>" + \ "<img src='/servidores/([^\.]+)\.[^>]+></td>" + \ "<td class='tam12'><[^>]+>([^<]+)</td>" + \ "<td class='tam12'>([^<]+)</td>" + \ "</tr>", r"<patron>\1;\2;\3;\4;\5;\6;Descargar</patron>", download ) data = online + download ''' <patron>*URL*;*IDIOMA*;*FECHA*;*SERVIDOR*;*UPLOADER*;*SUB|CALIDAD*;*TIPO*</patron> ''' patron = '<patron>([^;]+);([^;]+);([^;]+);([^;]+);([^;]+);([^;]+);([^<]+)</patron>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedidioma, scrapedfecha, scrapedservidor, scrapeduploader, scrapedsubcalidad, scrapedtipo in matches: title = scrapedtipo + " en " + scrapedservidor + " [" + idiomas[ scrapedidioma] + "] [" + scrapedsubcalidad + "] (" + scrapeduploader + ": " + scrapedfecha + ")" itemlist.append( Item(channel=__channel__, title=title, url=urlparse.urljoin(host, scrapedurl), action="play", show=item.show)) return itemlist
def todas(item): latino = 'limegreen' # español = 'yellow' # sub = 'white' logger.info("pelisalacarta.channels.locopelis todas") itemlist = [] data = scrapertools.cache_page(item.url) patron = '<h2 class="titpeli bold ico_b">.*?<\/h2>.*?' patron += '<a href="([^"]+)" title="([^"]+)">.*?' patron += '<img src="([^"]+)" alt=.*?><\/a>.*?' patron += '<p>([^<]+)<\/p>.*?' patron += '<div class=.*?>Idioma<\/strong>:.<img src=.*?>([^<]+)<\/div>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedplot, scrapedidioma in matches: idioma = scrapedidioma.strip() idioma = scrapertools.decodeHtmlentities(idioma) url = urlparse.urljoin(item.url, scrapedurl) title = scrapedtitle.decode('cp1252') title = title.encode('utf-8') + ' (' + idioma + ')' thumbnail = scrapedthumbnail plot = scrapedplot fanart = 'https://s31.postimg.org/5worjw2nv/locopelis.png' if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "])") itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, extra=idioma, contentTitle=scrapedtitle.decode('cp1252'))) #Paginacion siguiente = '' title = '' actual = scrapertools.find_single_match( data, '<li><a href=".*?"><span><b>([^<]+)<\/b><\/span><\/a><\/li>') ultima = scrapertools.find_single_match( data, '<li><a href=".*?page=([^"]+)">Ultima<\/a><\/li>') if 'page' in item.title: while not item.url.endswith('='): item.url = item.url[:-1] if actual: siguiente = int(actual) + 1 if item.url.endswith('='): siguiente_url = item.url + str(siguiente) else: siguiente_url = item.url + '?&page=' + str(siguiente) if actual and ultima and siguiente <= int(ultima): #import inspect titlen = 'Pagina Siguiente >>> ' + str(actual) + '/' + str(ultima) fanart = 'https://s31.postimg.org/5worjw2nv/locopelis.png' itemlist.append( Item(channel=item.channel, action="todas", title=titlen, url=siguiente_url, fanart=fanart)) return itemlist
def findvideos(item): logger.info("pelisalacarta.peliculasdk findvideos") itemlist = [] data = scrapertools.cache_page(item.url) data = re.sub(r"<!--.*?-->", "", data) data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) bloque_tab = scrapertools.find_single_match( data, '<div id="verpelicula">(.*?)<div class="tab_container">') patron = '<li><a href="#([^<]+)"><span class="re">\d<\/span><span class="([^<]+)"><\/span><span class=.*?>([^<]+)<\/span>' check = re.compile(patron, re.DOTALL).findall(bloque_tab) servers_data_list = [] patron = '<div id="(tab\d+)" class="tab_content"><script type="text/rocketscript">(\w+)\("([^"]+)"\)</script></div>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) == 0: patron = '<div id="(tab\d+)" class="tab_content"><script>(\w+)\("([^"]+)"\)</script></div>' matches = re.compile(patron, re.DOTALL).findall(data) print matches for check_tab, server, id in matches: scrapedplot = scrapertools.get_match( data, '<span class="clms">(.*?)</div></div>') plotformat = re.compile('(.*?:) </span>', re.DOTALL).findall(scrapedplot) scrapedplot = scrapedplot.replace( scrapedplot, bbcode_kodi2html("[COLOR white]" + scrapedplot + "[/COLOR]")) for plot in plotformat: scrapedplot = scrapedplot.replace( plot, bbcode_kodi2html("[COLOR red][B]" + plot + "[/B][/COLOR]")) scrapedplot = scrapedplot.replace("</span>", "[CR]") scrapedplot = scrapedplot.replace(":", "") if check_tab in str(check): idioma, calidad = scrapertools.find_single_match( str(check), "" + check_tab + "', '(.*?)', '(.*?)'") servers_data_list.append([server, id, idioma, calidad]) url = "http://www.peliculasdk.com/Js/videod.js" data = scrapertools.cachePage(url) data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) data = data.replace( '<iframe width="100%" height="400" scrolling="no" frameborder="0"', '') patron = 'function (\w+)\(id\).*?' patron += '"([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) for server, url in matches: for enlace, id, idioma, calidad in servers_data_list: if server == enlace: video_url = re.sub(r"embed\-|\-630x400\.html", "", url) video_url = video_url.replace("'+codigo+'", id) if "goo.gl" in video_url: video_url = scrapertools.get_header_from_response( "http://anonymouse.org/cgi-bin/anon-www.cgi/" + video_url, header_to_get="location") servertitle = scrapertools.get_match(video_url, 'http.*?://(.*?)/') servertitle = servertitle.replace( servertitle, bbcode_kodi2html("[COLOR red]" + servertitle + "[/COLOR]")) servertitle = servertitle.replace("embed.", "") servertitle = servertitle.replace("player.", "") servertitle = servertitle.replace("api.video.", "") servertitle = servertitle.replace("hqq.tv", "netu.tv") servertitle = servertitle.replace("anonymouse.org", "netu.tv") title = bbcode_kodi2html( "[COLOR orange]Ver en --[/COLOR]" ) + servertitle + " " + idioma + " " + calidad itemlist.append( Item(channel=item.channel, title=title, url=video_url, action="play", thumbnail=item.category, plot=scrapedplot, fanart=item.show)) return itemlist
def get_video_url(page_url, premium=False, user="", password="", video_password=""): logger.info("[moevideos.py] get_video_url(page_url='%s')" % page_url) video_urls = [] if page_url.startswith("http://www.moevideos.net/online"): headers = [] headers.append([ 'User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14' ]) data = scrapertools.cache_page(page_url, headers=headers) # Descarga el script (no sirve para nada, excepto las cookies) headers.append(['Referer', page_url]) post = "id=1&enviar2=ver+video" data = scrapertools.cache_page(page_url, post=post, headers=headers) ### Modificado 12-6-2014 #code = scrapertools.get_match(data,'flashvars\="file\=([^"]+)"') #<iframe width="860" height="440" src="http://moevideo.net/framevideo/16363.1856374b43bbd40c7f8d2b25b8e5?width=860&height=440" frameborder="0" allowfullscreen ></iframe> code = scrapertools.get_match( data, '<iframe width="860" height="440" src="http://moevideo.net/framevideo/([^\?]+)\?width=860\&height=440" frameborder="0" allowfullscreen ></iframe>' ) logger.info("code=" + code) else: #http://moevideo.net/?page=video&uid=81492.8c7b6086f4942341aa1b78fb92df code = scrapertools.get_match(page_url, "uid=([a-z0-9\.]+)") # API de letitbit headers2 = [] headers2.append([ 'User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14' ]) ### Modificado 12-6-2014 url = "http://api.letitbit.net" #url = "http://api.moevideo.net" #post = "r=%5B%22tVL0gjqo5%22%2C%5B%22preview%2Fflv%5Fimage%22%2C%7B%22uid%22%3A%2272871%2E71f6541e64b0eda8da727a79424d%22%7D%5D%2C%5B%22preview%2Fflv%5Flink%22%2C%7B%22uid%22%3A%2272871%2E71f6541e64b0eda8da727a79424d%22%7D%5D%5D" #post = "r=%5B%22tVL0gjqo5%22%2C%5B%22preview%2Fflv%5Fimage%22%2C%7B%22uid%22%3A%2212110%2E1424270cc192f8856e07d5ba179d%22%7D%5D%2C%5B%22preview%2Fflv%5Flink%22%2C%7B%22uid%22%3A%2212110%2E1424270cc192f8856e07d5ba179d%22%7D%5D%5D #post = "r=%5B%22tVL0gjqo5%22%2C%5B%22preview%2Fflv%5Fimage%22%2C%7B%22uid%22%3A%2268653%2E669cbb12a3b9ebee43ce14425d9e%22%7D%5D%2C%5B%22preview%2Fflv%5Flink%22%2C%7B%22uid%22%3A%2268653%2E669cbb12a3b9ebee43ce14425d9e%22%7D%5D%5D" post = 'r=["tVL0gjqo5",["preview/flv_image",{"uid":"' + code + '"}],["preview/flv_link",{"uid":"' + code + '"}]]' data = scrapertools.cache_page(url, headers=headers2, post=post) logger.info("data=" + data) if ',"not_found"' in data: return [] data = data.replace("\\", "") logger.info("data=" + data) patron = '"link"\:"([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) video_url = matches[ 0] + "?ref=www.moevideos.net|User-Agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:15.0) Gecko/20100101 Firefox/15.0.1&Range=bytes:0-" logger.info("[moevideos.py] video_url=" + video_url) video_urls = [] video_urls.append([ scrapertools.get_filename_from_url(video_url)[-4:] + " [moevideos]", video_url ]) for video_url in video_urls: logger.info("[moevideos.py] %s - %s" % (video_url[0], video_url[1])) return video_urls
def buscador(item): logger.info("pelisalacarta.peliculasdk buscador") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) patron = '<div class="karatula".*?' patron += 'src="([^"]+)".*?' patron += '<div class="tisearch"><a href="([^"]+)">' patron += '([^<]+)<.*?' patron += 'Audio:(.*?)</a>.*?' patron += 'Género:(.*?)</a>.*?' patron += 'Calidad:(.*?),' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) if len(matches) == 0: itemlist.append( Item(channel=item.channel, title=bbcode_kodi2html( "[COLOR gold][B]Sin resultados...[/B][/COLOR]"), thumbnail="http://s6.postimg.org/t8gfes7rl/pdknoisethumb.png", fanart="http://s6.postimg.org/oy1rj72oh/pdknoisefan.jpg", folder=False)) for scrapedthumbnail, scrapedurl, scrapedtitle, scrapedlenguaje, scrapedgenero, scrapedcalidad in matches: try: year = scrapertools.get_match(scrapedtitle, '\((\d+)\)') except: year = "" title_fan = re.sub( r"\[.*?\]|\(.*?\)|\d+x\d+.*?Final|-\d+|-|\d+x\d+|Temporada.*?Completa| ;", "", scrapedtitle).strip() scrapedcalidad = re.sub(r"<a href.*?>|</a>|</span>", "", scrapedcalidad).strip() scrapedlenguaje = re.sub(r"<a href.*?>|</a>|</span>", "", scrapedlenguaje).strip() if not "Adultos" in scrapedgenero and not "Adultos" in scrapedlenguaje and not "Adultos" in scrapedcalidad: scrapedcalidad = scrapedcalidad.replace( scrapedcalidad, bbcode_kodi2html("[COLOR orange]" + scrapedcalidad + "[/COLOR]")) scrapedlenguaje = scrapedlenguaje.replace( scrapedlenguaje, bbcode_kodi2html("[COLOR orange]" + scrapedlenguaje + "[/COLOR]")) scrapedtitle = scrapedtitle + "-(Idioma: " + scrapedlenguaje + ")" + "-(Calidad: " + scrapedcalidad + ")" scrapedtitle = scrapedtitle.replace( scrapedtitle, bbcode_kodi2html("[COLOR white]" + scrapedtitle + "[/COLOR]")) extra = year + "|" + title_fan itemlist.append( Item(channel=item.channel, title=scrapedtitle, url=scrapedurl, action="fanart", thumbnail=scrapedthumbnail, extra=extra, fanart="http://s18.postimg.org/h9kb22mnt/pdkfanart.jpg", folder=True)) try: next_page = scrapertools.get_match( data, '<span class="current">.*?<a href="(.*?)".*?>Siguiente »</a></div>' ) title = "siguiente>>" title = title.replace( title, bbcode_kodi2html("[COLOR red]" + title + "[/COLOR]")) itemlist.append( Item(channel=item.channel, action="buscador", title=title, url=next_page, thumbnail="http://s6.postimg.org/uej03x4r5/bricoflecha.png", fanart="http://s18.postimg.org/h9kb22mnt/pdkfanart.jpg", folder=True)) except: pass return itemlist
def info(item): logger.info("pelisalacarta.peliculasdk info") itemlist = [] url = item.url id = item.extra if "serie" in item.url: try: rating_tmdba_tvdb = item.extra.split("|")[6] if item.extra.split("|")[6] == "": rating_tmdba_tvdb = "Sin puntuación" except: rating_tmdba_tvdb = "Sin puntuación" else: rating_tmdba_tvdb = item.extra.split("|")[3] rating_filma = item.extra.split("|")[4] print "eztoquee" print rating_filma print rating_tmdba_tvdb filma = "http://s6.postimg.org/6yhe5fgy9/filma.png" try: if "serie" in item.url: title = item.extra.split("|")[8] else: title = item.extra.split("|")[6] title = title.replace("%20", " ") title = "[COLOR yellow][B]" + title + "[/B][/COLOR]" except: title = item.title try: if "." in rating_tmdba_tvdb: check_rat_tmdba = scrapertools.get_match(rating_tmdba_tvdb, '(\d+).') else: check_rat_tmdba = rating_tmdba_tvdb if int(check_rat_tmdba) >= 5 and int(check_rat_tmdba) < 8: rating = "[COLOR springgreen][B]" + rating_tmdba_tvdb + "[/B][/COLOR]" elif int(check_rat_tmdba) >= 8 or rating_tmdba_tvdb == 10: rating = "[COLOR yellow][B]" + rating_tmdba_tvdb + "[/B][/COLOR]" else: rating = "[COLOR crimson][B]" + rating_tmdba_tvdb + "[/B][/COLOR]" print "lolaymaue" except: rating = "[COLOR crimson][B]" + rating_tmdba_tvdb + "[/B][/COLOR]" if "10." in rating: rating = re.sub(r'10\.\d+', '10', rating) try: check_rat_filma = scrapertools.get_match(rating_filma, '(\d)') print "paco" print check_rat_filma if int(check_rat_filma) >= 5 and int(check_rat_filma) < 8: print "dios" print check_rat_filma rating_filma = "[COLOR springgreen][B]" + rating_filma + "[/B][/COLOR]" elif int(check_rat_filma) >= 8: print check_rat_filma rating_filma = "[COLOR yellow][B]" + rating_filma + "[/B][/COLOR]" else: rating_filma = "[COLOR crimson][B]" + rating_filma + "[/B][/COLOR]" print "rojo??" print check_rat_filma except: rating_filma = "[COLOR crimson][B]" + rating_filma + "[/B][/COLOR]" if not "serie" in item.url: url_plot = "http://api.themoviedb.org/3/movie/" + item.extra.split( "|" )[1] + "?api_key=2e2160006592024ba87ccdf78c28f49f&append_to_response=credits&language=es" data_plot = scrapertools.cache_page(url_plot) plot = scrapertools.find_single_match(data_plot, '"overview":"(.*?)",') tagline = scrapertools.find_single_match(data_plot, '"tagline":(".*?")') if plot == "": plot = item.show.split("|")[2] plot = "[COLOR moccasin][B]" + plot + "[/B][/COLOR]" plot = re.sub(r"\\", "", plot) else: plot = item.show.split("|")[2] plot = "[COLOR moccasin][B]" + plot + "[/B][/COLOR]" plot = re.sub(r"\\", "", plot) if item.extra.split("|")[7] != "": tagline = item.extra.split("|")[7] #tagline= re.sub(r',','.',tagline) else: tagline = "" if "serie" in item.url: check2 = "serie" icon = "http://s6.postimg.org/hzcjag975/tvdb.png" foto = item.show.split("|")[1] if item.extra.split("|")[5] != "": critica = item.extra.split("|")[5] else: critica = "Esta serie no tiene críticas..." if not ".png" in item.extra.split("|")[0]: photo = "http://imgur.com/6uXGkrz.png" else: photo = item.extra.split("|")[0].replace(" ", "%20") try: tagline = "[COLOR aquamarine][B]" + tagline + "[/B][/COLOR]" except: tagline = "" else: critica = item.extra.split("|")[5] if "%20" in critica: critica = "No hay críticas" icon = "http://imgur.com/SenkyxF.png" photo = item.extra.split("|")[0].replace(" ", "%20") foto = item.show.split("|")[1] try: if tagline == "\"\"": tagline = " " except: tagline = " " tagline = "[COLOR aquamarine][B]" + tagline + "[/B][/COLOR]" check2 = "pelicula" #Tambien te puede interesar peliculas = [] if "serie" in item.url: url_tpi = "http://api.themoviedb.org/3/tv/" + item.show.split( "|" )[5] + "/recommendations?api_key=2e2160006592024ba87ccdf78c28f49f&language=es" data_tpi = scrapertools.cachePage(url_tpi) tpi = scrapertools.find_multiple_matches( data_tpi, 'id":(.*?),.*?"original_name":"(.*?)",.*?"poster_path":(.*?),"popularity"' ) else: url_tpi = "http://api.themoviedb.org/3/movie/" + item.extra.split( "|" )[1] + "/recommendations?api_key=2e2160006592024ba87ccdf78c28f49f&language=es" data_tpi = scrapertools.cachePage(url_tpi) tpi = scrapertools.find_multiple_matches( data_tpi, 'id":(.*?),.*?"original_title":"(.*?)",.*?"poster_path":(.*?),"popularity"' ) for idp, peli, thumb in tpi: thumb = re.sub(r'"|}', '', thumb) if "null" in thumb: thumb = "http://s6.postimg.org/tw1vhymj5/noposter.png" else: thumb = "https://image.tmdb.org/t/p/original" + thumb peliculas.append([idp, peli, thumb]) check2 = check2.replace("pelicula", "movie").replace("serie", "tvshow") infoLabels = { 'title': title, 'plot': plot, 'thumbnail': photo, 'fanart': foto, 'tagline': tagline, 'rating': rating } item_info = item.clone(info=infoLabels, icon=icon, extra=id, rating=rating, rating_filma=rating_filma, critica=critica, contentType=check2, thumb_busqueda="http://imgur.com/kdfWEJ6.png") from channels import infoplus infoplus.start(item_info, peliculas)
def episodios(item): logger.info("pelisalacarta.channels.jkanime episodios") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) scrapedplot = scrapertools.get_match( data, '<meta name="description" content="([^"]+)"/>') scrapedthumbnail = scrapertools.find_single_match( data, '<div class="separedescrip">.*?src="([^"]+)"') idserie = scrapertools.get_match(data, "ajax/pagination_episodes/(\d+)/") logger.info("idserie=" + idserie) if " Eps" in item.extra and not "Desc" in item.extra: caps_x = item.extra caps_x = caps_x.replace(" Eps", "") capitulos = int(caps_x) paginas = capitulos / 10 + (capitulos % 10 > 0) else: paginas, capitulos = getPagesAndEpisodes(data) logger.info("idserie=" + idserie) for numero in range(1, paginas + 1): numero_pagina = str(numero) headers = [] headers.append([ "User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:16.0) Gecko/20100101 Firefox/16.0" ]) headers.append(["Referer", item.url]) data2 = scrapertools.cache_page( "http://jkanime.net/ajax/pagination_episodes/" + idserie + "/" + numero_pagina + "/") logger.info("data2=" + data2) ''' [{"number":"1","title":"Rose of Versailles - 1"},{"number":"2","title":"Rose of Versailles - 2"},{"number":"3","title":"Rose of Versailles - 3"},{"number":"4","title":"Rose of Versailles - 4"},{"number":"5","title":"Rose of Versailles - 5"},{"number":"6","title":"Rose of Versailles - 6"},{"number":"7","title":"Rose of Versailles - 7"},{"number":"8","title":"Rose of Versailles - 8"},{"number":"9","title":"Rose of Versailles - 9"},{"number":"10","title":"Rose of Versailles - 10"}] [{"id":"14199","title":"GetBackers - 1","number":"1","animes_id":"122","timestamp":"2012-01-04 16:59:30"},{"id":"14200","title":"GetBackers - 2","number":"2","animes_id":"122","timestamp":"2012-01-04 16:59:30"},{"id":"14201","title":"GetBackers - 3","number":"3","animes_id":"122","timestamp":"2012-01-04 16:59:30"},{"id":"14202","title":"GetBackers - 4","number":"4","animes_id":"122","timestamp":"2012-01-04 16:59:30"},{"id":"14203","title":"GetBackers - 5","number":"5","animes_id":"122","timestamp":"2012-01-04 16:59:30"},{"id":"14204","title":"GetBackers - 6","number":"6","animes_id":"122","timestamp":"2012-01-04 16:59:30"},{"id":"14205","title":"GetBackers - 7","number":"7","animes_id":"122","timestamp":"2012-01-04 16:59:30"},{"id":"14206","title":"GetBackers - 8","number":"8","animes_id":"122","timestamp":"2012-01-04 16:59:30"},{"id":"14207","title":"GetBackers - 9","number":"9","animes_id":"122","timestamp":"2012-01-04 16:59:30"},{"id":"14208","title":"GetBackers - 10","number":"10","animes_id":"122","timestamp":"2012-01-04 16:59:30"}] ''' patron = '"number"\:"(\d+)","title"\:"([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data2) #http://jkanime.net/get-backers/1/ for numero, scrapedtitle in matches: title = scrapedtitle.strip() url = urlparse.urljoin(item.url, numero) thumbnail = scrapedthumbnail plot = scrapedplot if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action="findvideos", title=title, url=url, thumbnail=thumbnail, fanart=thumbnail, plot=plot)) if len(itemlist) == 0: try: porestrenar = scrapertools.get_match( data, '<div[^<]+<span class="labl">Estad[^<]+</span[^<]+<span[^>]+>Por estrenar</span>' ) itemlist.append( Item(channel=item.channel, action="findvideos", title="Serie por estrenar", url="", thumbnail=scrapedthumbnail, fanart=scrapedthumbnail, plot=scrapedplot, server="directo", folder=False)) except: pass return itemlist
def peliculas(item): logger.info("pelisalacarta.peliculasdk peliculas") itemlist = [] # Descarga la página data = scrapertools.cache_page(item.url) data = re.sub(r"\n|\r|\t|\s{2}| |&#.*?;", "", data) patron = 'style="position:relative;"> ' patron += '<a href="([^"]+)" ' patron += 'title="([^<]+)">' patron += '<img src="([^"]+)".*?' patron += 'Audio:(.*?)</br>.*?' patron += 'Calidad:(.*?)</br>.*?' patron += 'Género:.*?tag">(.*?)</a>' matches = re.compile(patron, re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedlenguaje, scrapedcalidad, scrapedgenero in matches: try: year = scrapertools.get_match(scrapedtitle, '\((\d+)\)') except: year = "" title_fan = re.sub( r"\[.*?\]|\(.*?\)|\d+x\d+.*?Final|-\d+|-|\d+x\d+|Temporada.*?Completa| ;", "", scrapedtitle) scrapedtitle = re.sub(r"\(\d+\)", "", scrapedtitle).strip() scrapedcalidad = re.sub(r"<a href.*?>|</a>", "", scrapedcalidad).strip() scrapedlenguaje = re.sub(r"<a href.*?>|</a>", "", scrapedlenguaje).strip() scrapedcalidad = scrapedcalidad.replace( scrapedcalidad, bbcode_kodi2html("[COLOR orange]" + scrapedcalidad + "[/COLOR]")) if not "Adultos" in scrapedgenero and not "Adultos" in scrapedlenguaje and not "Adultos" in scrapedcalidad: scrapedlenguaje = scrapedlenguaje.replace( scrapedlenguaje, bbcode_kodi2html("[COLOR orange]" + scrapedlenguaje + "[/COLOR]")) scrapedtitle = scrapedtitle + "-(Idioma: " + scrapedlenguaje + ")" + "-(Calidad: " + scrapedcalidad + ")" scrapedtitle = scrapedtitle.replace( scrapedtitle, bbcode_kodi2html("[COLOR white]" + scrapedtitle + "[/COLOR]")) extra = year + "|" + title_fan itemlist.append( Item(channel=item.channel, title=scrapedtitle, url=scrapedurl, action="fanart", thumbnail=scrapedthumbnail, extra=extra, fanart="http://s18.postimg.org/h9kb22mnt/pdkfanart.jpg", folder=True)) ## Paginación next_page = scrapertools.get_match( data, '<span class="current">.*?<a href="(.*?)".*?>Siguiente »</a></div>' ) title = "siguiente>>" title = title.replace(title, bbcode_kodi2html("[COLOR red]" + title + "[/COLOR]")) itemlist.append( Item(channel=item.channel, action="peliculas", title=title, url=next_page, thumbnail="http://s6.postimg.org/uej03x4r5/bricoflecha.png", fanart="http://s18.postimg.org/h9kb22mnt/pdkfanart.jpg", folder=True)) return itemlist
def listado(item): logger.info("[peliserie.py] listado") itemlist = [] data = re.sub(r"\n|\r|\t|\s{2}|(<!--.*?-->)","",scrapertools.cache_page(item.url)) data= data.replace('<div class="list-end">','</div></div><div class="list-end">') patron = '<div class="grid-list(.*?)</a></div></div></div>' try: fichas = scrapertools.get_match(data,patron) + '</a>' except: return itemlist # Devolvemos lista vacia '''<a href="/movie/6916/Big-Eyes-2014-online" title="Big Eyes" data-ajax="true" data-label="Big Eyes - 2014" data-page="movies"> <div class="mask-container"> <div class="poster"> <div class="X9G8W3" data-image="/images/posters/670b9a082a8c9dc40e48b039501da7d1.png"></div> <div class="quality c4">DVD Rip</div> <div class="lang"><img src="./images/flags/lang/flag_0.png"/></div> o <div class="lang"></div> <div class="gender">Drama</div> <div class="title">Big Eyes</div></div></div> </a> ''' patron = '<a href="([^"]+).*?' #url patron += 'title="([^"]+).*?' #titulo patron += 'data-label=".*?(\d{4})".*?' #año patron += '<div class="poster">(.*?)</a>' #info matches = re.compile(patron,re.DOTALL).findall(fichas) logger.info("[peliserie.py] listado: matches " + str(len(matches))) pag_actual= 1 i=0 if 'search?q=' not in item.url: #Preparar paginacion if not 'page=' in item.url: #http://www.peliserie.com/series item.url += '?page=1' else: #http://www.peliserie.com/series?page=3 pag_actual= float(scrapertools.get_match(item.url,'page=(\d+)')) if item.extra=='series': action = 'getEpisodios' else: action = "findvideos" for url, title, year, info in matches: i += 1 if i > ((pag_actual-1) * 56): # Recuperar informacion thumbnail = __url_base__ + scrapertools.get_match(info,'data-image="([^"]+)"></div>.*?') show = title show += '|' + year #pasamos el año para buscar el fanart url=__url_base__ + url if 'search?q=' in item.url: # Resultado de busquedas itemlist.append( Item(channel=__channel__, action=action, title=title, url=url, thumbnail=thumbnail, extra=item.extra, show=show ) ) else: idiomas='' try: idiomas = scrapertools.get_match(info,'<div class="lang">(.*?)</div>') lang=[] if 'flag_0.png' in idiomas: lang.append('Es') if 'flag_1.png' in idiomas: lang.append('Lat') if 'flag_2.png' in idiomas: lang.append('VO') if 'flag_3.png' in idiomas: lang.append('VOSE') if len(lang) > 0: idiomas=' [' + "/".join(lang)+']' except: #Si no incluye idiomas no pasa nada pass try: logger.info("[peliserie.py] listado item.extra: " + item.extra) calidad = ' [' + scrapertools.get_match(info,'<div class="quality[^"]+">([^<]*)</div>.*?') + ']' title = title + calidad + idiomas itemlist.append( Item(channel=__channel__, action=action, title=title, url=url, thumbnail=thumbnail, extra=item.extra, show=show ) ) except: #Si no incluye la calidad no hay enlaces aun pass #Paginacion if not '<div class="list-end">' in data: url_next_page = item.url[:item.url.rfind(str(pag_actual))] + str(pag_actual+1) itemlist.append( Item(channel=__channel__, action="listado" , title=">> Página siguiente" , url=url_next_page, extra=item.extra)) logger.info("[peliserie.py] listado: itemlist " + str(len(itemlist))) return itemlist
def findvideos(item): logger.info("pelisalacarta.peliculasdk findvideos") itemlist = [] data = scrapertools.cache_page(item.url) data = re.sub(r"<!--.*?-->", "", data) data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) bloque_tab = scrapertools.find_single_match( data, '<div id="verpelicula">(.*?)<div class="tab_container">') patron = '<li><a href="#([^<]+)"><span class="re">\d<\/span><span class="([^<]+)"><\/span><span class=.*?>([^<]+)<\/span>' check = re.compile(patron, re.DOTALL).findall(bloque_tab) servers_data_list = [] patron = '<div id="(tab\d+)" class="tab_content"><script type="text/rocketscript">(\w+)\("([^"]+)"\)</script></div>' matches = re.compile(patron, re.DOTALL).findall(data) if len(matches) == 0: patron = '<div id="(tab\d+)" class="tab_content"><script>(\w+)\("([^"]+)"\)</script></div>' matches = re.compile(patron, re.DOTALL).findall(data) for check_tab, server, id in matches: scrapedplot = scrapertools.get_match( data, '<span class="clms">(.*?)</div></div>') plotformat = re.compile('(.*?:) </span>', re.DOTALL).findall(scrapedplot) scrapedplot = scrapedplot.replace( scrapedplot, bbcode_kodi2html("[COLOR white]" + scrapedplot + "[/COLOR]")) for plot in plotformat: scrapedplot = scrapedplot.replace( plot, bbcode_kodi2html("[COLOR red][B]" + plot + "[/B][/COLOR]")) scrapedplot = scrapedplot.replace("</span>", "[CR]") scrapedplot = scrapedplot.replace(":", "") if check_tab in str(check): idioma, calidad = scrapertools.find_single_match( str(check), "" + check_tab + "', '(.*?)', '(.*?)'") servers_data_list.append([server, id, idioma, calidad]) url = "http://www.peliculasdk.com/Js/videod.js" data = scrapertools.cachePage(url) data = re.sub(r"\n|\r|\t|\s{2}| ", "", data) data = data.replace( '<iframe width="100%" height="400" scrolling="no" frameborder="0"', '') patron = 'function (\w+)\(id\).*?' patron += 'data-src="([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) for server, url in matches: for enlace, id, idioma, calidad in servers_data_list: if server == enlace: video_url = re.sub(r"embed\-|\-.*?x.*?\.html|u\'|\'\(", "", str(url)) video_url = re.sub(r"'\+codigo\+'", "", video_url) video_url = video_url.replace('embed//', 'embed/') video_url = video_url + id if "goo.gl" in video_url: try: from unshortenit import unshorten url = unshorten(video_url) video_url = scrapertools.get_match( str(url), "u'([^']+)'") except: continue servertitle = scrapertools.get_match(video_url, 'http.*?://(.*?)/') servertitle = servertitle.replace( servertitle, bbcode_kodi2html("[COLOR red]" + servertitle + "[/COLOR]")) servertitle = servertitle.replace("embed.", "") servertitle = servertitle.replace("player.", "") servertitle = servertitle.replace("api.video.", "") servertitle = servertitle.replace("hqq.tv", "netu.tv") servertitle = servertitle.replace("anonymouse.org", "netu.tv") title = bbcode_kodi2html( "[COLOR orange]Ver en --[/COLOR]" ) + servertitle + " " + idioma + " " + calidad itemlist.append( Item(channel=item.channel, title=title, url=video_url, action="play", thumbnail=item.category, plot=scrapedplot, fanart=item.show)) if item.library and config.get_library_support() and len(itemlist) > 0: infoLabels = { 'tmdb_id': item.infoLabels['tmdb_id'], 'title': item.fulltitle } itemlist.append( Item(channel=item.channel, title="Añadir esta película a la biblioteca", action="add_pelicula_to_library", url=item.url, infoLabels=infoLabels, text_color="0xFFff6666", thumbnail='http://imgur.com/0gyYvuC.png')) return itemlist
def findvideos(item): logger.info("[peliserie.py] findvideos extra: " + item.extra) itemlist=[] if item.extra=='peliculas': # Solo mostramos enlaces para ver online patron= 'id="contribution-view">(.*?)</ul>' # Si quisiseramos mostrarlos todos: patron= 'id="contribution-view">(.*?)class="list-end"' # Buscamos el fanart en TMDB year=item.show.split('|')[1] item.show = item.show.split('|')[0] try: from core.tmdb import Tmdb oTmdb= Tmdb(texto_buscado=item.show,year=year) item.fanart=oTmdb.get_backdrop() except: pass else: # 'series' y 'play_from_library' # Solo mostramos enlaces para ver online patron= 'id="view-list">(.*?)</ul>' # Si quisiseramos mostrarlos todos: patron= 'id="id="view-list">(.*?)class="list-end"' # Descarga la página data = re.sub(r"\n|\r|\t|\s{2}|(<!--.*?-->)","",scrapertools.cache_page(item.url)) if item.plot == '': item.plot= scrapertools.entityunescape(scrapertools.get_match(data,'<p class="sinopsis">(.*?)</p>')) print item.plot data= scrapertools.get_match(data,patron) patron = '<li data-id="(.*?)</li>' matches = re.compile(patron,re.DOTALL).findall(data) ''' <li data-id="53885"> <div class="column"><strong>Allmyvideos</strong></div> <div class="column" style="width:15%"> <img src="/images/flags/lang/flag_0.png"/> </div> <div class="column">BrScreener/Line</div> <div class="column">bibiamorant</div> <div class="column" style="width:25%"> <div class="btn s"> <a href="/external?action=movie&id=53885" class="" target="_blank">Ver online</a> </div> <div class="actions"> <i id="report-contribution" data-id="53885" class="iconx16 icon3"></i> </div> </div> </li> ''' for i in matches: servidor = scrapertools.get_match(i,'<div class="column"><strong>([^<]+)</strong>') mostrar_server= True if config.get_setting("hidepremium")=="true": mostrar_server= servertools.is_server_enabled (servidor) if mostrar_server: idioma = scrapertools.get_match(i,'<img src="(.*?)"/>') if 'flag_0.png' in idioma: idioma ='Es' elif 'flag_1.png' in idioma: idioma ='Lat' elif 'flag_2.png' in idioma: idioma ='VO' elif 'flag_3.png' in idioma: idioma ='VOSE' calidad= scrapertools.get_match(i,'<div class="column">([^<]+)</div>') url= __url_base__ + scrapertools.get_match(i,'<a href="([^"]+)"') title= 'Ver en ' + servidor + ' [' + calidad + '] (' + idioma + ')' itemlist.append( Item(channel=__channel__, action="play", viewmode="movie_with_plot", server=servidor, title=title , plot=item.plot, thumbnail=item.thumbnail, fanart= item.fanart, fulltitle = item.title, url=url , extra=item.extra, folder=False) ) #itemlist.append( Item(channel=__channel__, action="play", server=servidor, title=title , plot=item.plot, thumbnail=item.thumbnail, fanart= item.fanart, fulltitle = item.title, url=url , extra=item.extra, folder=False) ) return itemlist
def series(item): logger.info("pelisalacarta.channels.jkanime series") # Descarga la pagina data = scrapertools.cache_page(item.url) # Extrae las entradas ''' <table class="search"> <tr> <td rowspan="2"> <a href="http://jkanime.net/basilisk-kouga-ninpou-chou/"><img src="http://jkanime.net/assets/images/animes/thumbnail/basilisk-kouga-ninpou-chou.jpg" width="50" /></a> </td> <td><a class="titl" href="http://jkanime.net/basilisk-kouga-ninpou-chou/">Basilisk: Kouga Ninpou Chou</a></td> <td rowspan="2" style="width:50px; text-align:center;">Serie</td> <td rowspan="2" style="width:50px; text-align:center;" >24 Eps</td> </tr> <tr> <td><p>Basilisk, considerada una de las mejores series del genero ninja, nos narra la historia de dos clanes ninja separados por el odio entre dos familias. Los actuales representantes, Kouga Danjo del clan Kouga y Ogen del clan… <a class="next" href="http://jkanime.net/basilisk-kouga-ninpou-chou/">seguir leyendo</a></p></td> </tr> </table> ''' patron = '<table class="search[^<]+' patron += '<tr[^<]+' patron += '<td[^<]+' patron += '<a href="([^"]+)"><img src="([^"]+)"[^<]+</a>[^<]+' patron += '</td>[^<]+' patron += '<td><a[^>]+>([^<]+)</a></td>[^<]+' patron += '<td[^>]+>([^<]+)</td>[^<]+' patron += '<td[^>]+>([^<]+)</td>[^<]+' patron += '</tr>[^<]+' patron += '<tr>[^<]+' patron += '<td>(.*?)</td>' matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedurl, scrapedthumbnail, scrapedtitle, line1, line2, scrapedplot in matches: title = scrapedtitle.strip() + " (" + line1.strip( ) + ") (" + line2.strip() + ")" extra = line2.strip() url = urlparse.urljoin(item.url, scrapedurl) thumbnail = urlparse.urljoin(item.url, scrapedthumbnail) thumbnail = thumbnail.replace("thumbnail", "image") plot = scrapertools.htmlclean(scrapedplot) if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action="episodios", title=title, url=url, thumbnail=thumbnail, fanart=thumbnail, plot=plot, extra=extra)) try: siguiente = scrapertools.get_match( data, '<a class="listsiguiente" href="([^"]+)" >Resultados Siguientes') scrapedurl = urlparse.urljoin(item.url, siguiente) scrapedtitle = ">> Pagina Siguiente" scrapedthumbnail = "" scrapedplot = "" itemlist.append( Item(channel=item.channel, action="series", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, folder=True, viewmode="movie_with_plot")) except: pass return itemlist