def novedades_episodios(item): logger.info("pelisalacarta.channels.animeflv novedades") data = scrapertools.anti_cloudflare(item.url, headers=CHANNEL_DEFAULT_HEADERS, host=CHANNEL_HOST) ''' <div class="not"> <a href="/ver/cyclops-shoujo-saipu-12.html" title="Cyclops Shoujo Saipu 12"> <img class="imglstsr lazy" src="http://cdn.animeflv.net/img/mini/957.jpg" border="0"> <span class="tit_ep"><span class="tit">Cyclops Shoujo Saipu 12</span></span> </a> </div> ''' patronvideos = '<div class="not"[^<]+<a href="([^"]+)" title="([^"]+)"[^<]+<img class="[^"]+" ' \ 'src="([^"]+)"[^<]+<span class="tit_ep"><span class="tit">([^<]+)<' matches = re.compile(patronvideos, re.DOTALL).findall(data) itemlist = [] for match in matches: scrapedtitle = scrapertools.entityunescape(match[3]) fulltitle = scrapedtitle # directory = match[1] scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = urlparse.urljoin( item.url, match[2].replace("mini", "portada")) scrapedplot = "" #if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(scrapedtitle, scrapedurl, scrapedthumbnail)) new_item = Item(channel=item.channel, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, fulltitle=fulltitle) content_title = scrapertools.entityunescape(match[1]) if content_title: episode = scrapertools.get_match(content_title, '\s+(\d+)$') content_title = content_title.replace(episode, '') season, episode = numbered_for_tratk(content_title, 1, episode) new_item.hasContentDetails = "true" new_item.contentTitle = content_title new_item.contentSeason = season new_item.contentEpisodeNumber = int(episode) itemlist.append(new_item) return itemlist
def generos(item): logger.info("pelisalacarta.channels.animeflv generos") itemlist = [] data = scrapertools.cache_page(item.url) data = scrapertools.get_match(data, '<div class="generos_box"(.*?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" if DEBUG: logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="series", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def novedades_episodios(item): logger.info() data = httptools.downloadpage(item.url).data data = scrapertools.find_single_match( data, '<section class="lastcap">(.*?)</section>') patronvideos = '(?s)<a href="([^"]+)">[^<]+<header>([^<]+).*?src="([^"]+)"[\s\S]+?<p>(.+?)</p>' matches = re.compile(patronvideos, re.DOTALL).findall(data) itemlist = [] for url, title, thumbnail, plot in matches: scrapedtitle = scrapertools.entityunescape(title) scrapedurl = urlparse.urljoin(item.url, url) scrapedthumbnail = thumbnail scrapedplot = plot episodio = scrapertools.find_single_match(scrapedtitle, '\s+#(.*?)$') contentTitle = scrapedtitle.replace('#' + episodio, '') itemlist.append( Item(channel=item.channel, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, contentSeason=1, contentTitle=contentTitle)) return itemlist
def generos(item): logger.info("pelisalacarta.channels.animeflv generos") itemlist = [] data = scrapertools.anti_cloudflare(item.url, headers=CHANNEL_DEFAULT_HEADERS, host=CHANNEL_HOST) data = scrapertools.get_match(data, '<div class="generos_box"(.*?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format( title, url, thumbnail)) itemlist.append( Item(channel=item.channel, action="series", title=title, url=url, thumbnail=thumbnail, plot=plot, viewmode="movies_with_plot")) return itemlist
def letras(item): logger.info("pelisalacarta.channels.animeflv letras") itemlist = [] status_code, data = get_page(item.url) logger.info("pelisalacarta.channels.animeflv **** {0}".format(status_code)) if status_code == requests.codes.ok: data = scrapertools.get_match(data, '<div class="alfabeto_box"(.*?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail)) itemlist.append(Item(channel=__channel__, action="series", title=title, url=url, thumbnail=thumbnail, plot=plot)) else: itemlist.append(Item(channel=__channel__, action="mainlist", title="No se ha podido cargar la pagina ERROR:{0}".format(status_code), url="", thumbnail="", plot="")) return itemlist
def novedades_episodios(item): logger.info() # Descarga la pagina # <article> <a href="/ver/uchuu-kyoudai-35"> <header>Uchuu Kyoudai #35</header> <figure><img src="http://static.animeid.com/art/uchuu-kyoudai/normal/b4934a1d.jpg" class="cover" alt="Uchuu Kyoudai" width="250" height="140" /></figure><div class="mask"></div> <aside><span class="p"><strong>Reproducciones: </strong>306</span> <span class="f"><strong>Favoritos: </strong>0</span></aside> </a> <p>Una noche en el año 2006, cuando eran jovenes, los dos hermanos Mutta (el mayor) y Hibito (el menor) vieron un OVNI que hiba en dirección hacia la luna. Esa misma noche decidieron que ellos se convertirian en astronautas y irian al espacio exterior. En el año 2050, Hibito se ha convertido en astronauta y que ademas está incluido en una misión que irá a la luna. En cambio Mutta siguió una carrera mas tradicional, y terminó trabajando en una compañia de fabricación de automoviles. Sin embargo, Mutta termina arruinando su carrera por ciertos problemas que tiene con su jefe. Ahora bien, no sólo perdió su trabajo si no que fue incluido en la lista negra de la industria laboral. Pueda ser que esta sea su unica oportunidad que tenga Mutta de volver a perseguir su sueño de la infancia y convertirse en astronauta, al igual que su perqueño hermano Hibito.</p> </article> # <img pagespeed_high_res_src=" data = httptools.downloadpage(item.url).data data = scrapertools.get_match(data, '<section class="lastcap">(.*?)</section>') patronvideos = '<a href="([^"]+)">[^<]+<header>([^<]+)</header>[^<]+<figure><img[^>]+src="([^"]+)"[\s\S]+?<p>(.+?)</p>' matches = re.compile(patronvideos, re.DOTALL).findall(data) itemlist = [] for url, title, thumbnail, plot in matches: scrapedtitle = scrapertools.entityunescape(title) scrapedurl = urlparse.urljoin(item.url, url) scrapedthumbnail = thumbnail scrapedplot = plot logger.debug("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") episodio = scrapertools.get_match(scrapedtitle, '\s+#(.*?)$') contentTitle = scrapedtitle.replace('#' + episodio, '') itemlist.append(Item(channel=item.channel, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, hasContentDetails=True, contentSeason=1, contentTitle=contentTitle)) return itemlist
def buscador(item, paginacion=True): logger.info("[shurweb.py] peliculas") url = item.url # Descarga la página data = scrapertools.cachePage(url) # Extrae las entradas patronvideos = '<a href="([^"]+)" style="display:none;" rel="nofollow"><img src="([^"]+)" width="100" height="144" border="0" alt="" /><br/><br/>[^<]+<b>([^<]+)</b></a>[^<]+<a href="([^"]+)">([^#]+)#888"><b>([^<]+)</b>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: if match[5] == 'Peliculas' or match[5] == 'Series': scrapedtitle = match[2] scrapedtitle = scrapertools.entityunescape(scrapedtitle) fulltitle = scrapedtitle scrapedplot = "" scrapedurl = match[3] scrapedthumbnail = match[1] if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action='findvideos', title=scrapedtitle, fulltitle=fulltitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, extra=scrapedtitle, context="4|5")) return itemlist
def novedades(item): logger.info("[capitancinema.py] novedades") # Descarga la página data = scrapertools.cachePage(item.url) # Extrae las entradas (carpetas) patronvideos = '<td width="23\%"><a href="([^"]+)"[^>]+><img style="[^"]+" src="([^"]+)" border="0" alt="([^"]+)"[^>]+></a></td>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist=[] for match in matches: # Atributos scrapedtitle = match[2] scrapedtitle = scrapedtitle.replace(""","") scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = urlparse.urljoin(item.url,match[1]) scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, action="mirrors", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , folder=True) ) return itemlist
def paises(item): logger.info("[animeflv.py] paises") itemlist = [] data = scrapertools.cache_page(item.url) data = scrapertools.get_match(data, '<h2>País</h2>(.*?)</ul') patron = "<a href='([^']+)'>([^<]+)</a>" matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="peliculas", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def letras(item): logger.info() itemlist = [] data = scrapertools.anti_cloudflare(item.url, headers=CHANNEL_DEFAULT_HEADERS, host=CHANNEL_HOST) data = re.sub(r"\n|\r|\t|\s{2}| |<Br>|<BR>|<br>|<br/>|<br />|-\s", "", data) data = scrapertools.get_match(data, '<div class="alphabet">(.+?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" logger.debug("title=[{0}], url=[{1}], thumbnail=[{2}]".format( title, url, thumbnail)) itemlist.append( Item(channel=item.channel, action="series", title=title, url=url, thumbnail=thumbnail, plot=plot, viewmode="movies_with_plot")) return itemlist
def peliculas(item,paginacion=True,data=None): logger.info("[shurweb.py] peliculas") url = item.url # Descarga la página if data is None: data = scrapertools.cachePage(url) # Extrae las entradas patronvideos = '<a class="video_thumb" href="([^"]+)" rel="bookmark" title="([^"]+)">.*?<img.*?src="([^"]+)"' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[1] scrapedtitle = scrapertools.entityunescape(scrapedtitle) fulltitle = scrapedtitle scrapedplot = "" scrapedurl = match[0] scrapedthumbnail = match[2] if DEBUG: logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action='findvideos', title=scrapedtitle , fulltitle=fulltitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , extra=scrapedtitle , viewmode="movie", context="4|5",fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg") ) #<span class="i_next fr" ><a href="http://www.shurweb.es/videoscategory/animacion/page/2/" >Ver Más Videos</a> </span> try: next_page_url = scrapertools.get_match(data,'<span class="i_next fr" ><a href="([^"]+)" >Ver M') itemlist.append( Item(channel=__channel__, title=">> Página siguiente", action="peliculas", url=urlparse.urljoin(item.url,next_page_url),fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg")) except: pass return itemlist
def novedades(item): logger.info("pelisalacarta.channels.animeflv novedades") # Descarga la pagina data = scrapertools.cache_page(item.url) # Extrae las entradas (carpetas) ''' <div class="not"> <a href="/ver/cyclops-shoujo-saipu-12.html" title="Cyclops Shoujo Saipu 12"> <img class="imglstsr lazy" src="http://cdn.animeflv.net/img/mini/957.jpg" border="0"> <span class="tit_ep"><span class="tit">Cyclops Shoujo Saipu 12</span></span> </a> ''' patronvideos = '<div class="not"[^<]+<a href="([^"]+)" title="([^"]+)"[^<]+<img class="[^"]+" src="([^"]+)"[^<]+' \ '<span class="tit_ep"><span class="tit">([^<]+)<' matches = re.compile(patronvideos, re.DOTALL).findall(data) itemlist = [] for match in matches: scrapedtitle = scrapertools.entityunescape(match[3]) fulltitle = scrapedtitle # directory = match[1] scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = urlparse.urljoin(item.url, match[2].replace("mini", "portada")) scrapedplot = "" if DEBUG: logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append(Item(channel=__channel__, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, fulltitle=fulltitle, viewmode="movie")) return itemlist
def generos(item): logger.info("pelisalacarta.channels.animeflv generos") itemlist = [] status_code, data = get_page(item.url) logger.info("pelisalacarta.channels.animeflv **** {0}".format(item.url)) logger.info("pelisalacarta.channels.animeflv **** {0}".format(status_code)) if status_code == requests.codes.ok: data = scrapertools.get_match(data, '<div class="generos_box"(.*?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail)) itemlist.append(Item(channel=__channel__, action="series", title=title, url=url, thumbnail=thumbnail, plot=plot)) else: itemlist.append(Item(channel=__channel__, action="mainlist", title="No se ha podido cargar la pagina ERROR:{0}".format(status_code), url="", thumbnail="", plot="")) return itemlist
def ultimas_series(item): logger.info("[peliculamos.py] ultimas_series") itemlist = [] data = scrapertools.cache_page(item.url) logger.info("data=" + data) #<div class="associated-post"><h3 class="post-title"><a href="http://peliculamos.net/the-new-normal-streaming-ita-vk/" title="The New Normal streaming ita Vk">The New Normal streaming ita Vk</a></h3><div class="post-excerpt"><div class="fblike" style="height:25px; height:25px; overflow:hidden;"><iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fpeliculamos.net%2Fthe-new-normal-streaming-ita-vk%2F&layout=standard&show_faces=false&width=450&action=like&font=arial&colorscheme=light" scrolling="no" frameborder="0" allow Transparency="true" style="border:none; overflow:hidden; width:450px;"></iframe></div><p>David e Bryan sono una coppia di Beverly Hills, che hanno tutto dalla vita; una relazione stabile, delle brillanti carriere e una bella casa, l’unica che manca nella loro vita è un figlio. Ma le cose cambiano quando incontrano Goldie, una giovane madre single dal passato burrascoso, trasferitasi a Los Angeles con la figlia di [...]</p></div></div><div class="associated-post"><h3 class="post-title"><a href="http: patron = '<div class="associated-post"><h3 class="post-title"><a href="([^"]+)" title="[^"]+">([^"]+)</a></h3><div class="post-excerpt">' patron += '<div[^<]+<iframe[^<]+</iframe></div><p>([^<]+)<' matches = re.compile(patron, re.DOTALL).findall(data) for url, title, plot in matches: thumbnail = "" title = scrapertools.entityunescape(title) title = scrapertools.htmlclean(title) if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def peliculas(item, paginacion=True): logger.info("[shurweb.py] peliculas") url = item.url # Descarga la página data = scrapertools.cachePage(url) # Extrae las entradas patronvideos = '<a class="video_thumb" href="([^"]+)" rel="bookmark" title="([^"]+)">[^<]+<img width="123" height="100" src="([^"]+)"[^<]+<span class="time">([^<]+)</span>' matches = re.compile(patronvideos, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[1] + " (" + match[3] + ")" scrapedtitle = scrapertools.entityunescape(scrapedtitle) fulltitle = scrapedtitle scrapedplot = "" scrapedurl = match[0] scrapedthumbnail = match[2] if DEBUG: logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action='findvideos', title=scrapedtitle, fulltitle=fulltitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, extra=scrapedtitle, viewmode="movie", context="4|5", fanart= "http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg")) return itemlist
def detalle_programa(item): #http://www.rtve.es/infantil/series/monsuno/videos/ #http://www.rtve.es/infantil/series/hay-nuevo-scooby-doo/ url = item.url if url.endswith("/videos"): url = url.replace("/videos", "") # Descarga la página print "url=" + url data = scrapertools.cache_page(url) data = scrapertools.get_match(data, '<div class="contenido-serie">(.*?</div>)') print "data=" + data # Obtiene el thumbnail try: item.thumbnail = scrapertools.get_match(data, '<img.*?src="([^"]+)"') except: pass try: item.plot = scrapertools.htmlclean( scrapertools.get_match(data, '<div>(.*?)</div>')).strip() except: pass try: title = scrapertools.get_match( data, '<h3>[^<]+<a[^>]+>([^<]+)</a>[^<]+</h3>').strip() item.title = scrapertools.entityunescape(title) except: pass return item
def letras(item): logger.info() itemlist = [] headers = DEFAULT_HEADERS[:] data = scrapertools.cache_page(item.url, headers=headers) patron = ' <a href="(\/lista-.+?)">(.+?)<' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(host, scrapedurl) thumbnail = "" plot = "" logger.debug("title=[{0}], url=[{1}], thumbnail=[{2}]".format( title, url, thumbnail)) itemlist.append( Item(channel=item.channel, action="pagina_", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def letras(item): logger.info("pelisalacarta.channels.animeflv letras") itemlist = [] data = anti_cloudflare(item.url) data = scrapertools.get_match(data, '<div class="alfabeto_box"(.*?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format( title, url, thumbnail)) itemlist.append( Item(channel=__channel__, action="series", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def generos(item): logger.info() itemlist = [] headers = DEFAULT_HEADERS[:] data = scrapertools.cache_page(item.url, headers=headers) data = data.replace('\n', '') data = data.replace('\r', '') data = scrapertools.get_match(data, '<!-- Lista de Generos -->(.+?)<\/div>') patron = '<a href="(.+?)".+?>(.+?)<' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(host, scrapedurl) thumbnail = "" plot = "" logger.debug("title=[{0}], url=[{1}], thumbnail=[{2}]".format( title, url, thumbnail)) itemlist.append( Item(channel=item.channel, action="pagina_", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def novedades_episodios(item): logger.info("pelisalacarta.channels.animeid novedades_episodios") # Descarga la pagina #<article> <a href="/ver/uchuu-kyoudai-35"> <header>Uchuu Kyoudai #35</header> <figure><img src="http://static.animeid.com/art/uchuu-kyoudai/normal/b4934a1d.jpg" class="cover" alt="Uchuu Kyoudai" width="250" height="140" /></figure><div class="mask"></div> <aside><span class="p"><strong>Reproducciones: </strong>306</span> <span class="f"><strong>Favoritos: </strong>0</span></aside> </a> <p>Una noche en el año 2006, cuando eran jovenes, los dos hermanos Mutta (el mayor) y Hibito (el menor) vieron un OVNI que hiba en dirección hacia la luna. Esa misma noche decidieron que ellos se convertirian en astronautas y irian al espacio exterior. En el año 2050, Hibito se ha convertido en astronauta y que ademas está incluido en una misión que irá a la luna. En cambio Mutta siguió una carrera mas tradicional, y terminó trabajando en una compañia de fabricación de automoviles. Sin embargo, Mutta termina arruinando su carrera por ciertos problemas que tiene con su jefe. Ahora bien, no sólo perdió su trabajo si no que fue incluido en la lista negra de la industria laboral. Pueda ser que esta sea su unica oportunidad que tenga Mutta de volver a perseguir su sueño de la infancia y convertirse en astronauta, al igual que su perqueño hermano Hibito.</p> </article> #<img pagespeed_high_res_src=" data = scrapertools.cache_page(item.url) data = scrapertools.get_match(data,'<section class="lastcap">(.*?)</section>') patronvideos = '<a href="([^"]+)">[^<]+<header>([^<]+)</header>[^<]+<figure><img[^>]+src="([^"]+)"[\s\S]+?<p>(.+?)</p>' matches = re.compile(patronvideos,re.DOTALL).findall(data) itemlist = [] for url,title,thumbnail,plot in matches: scrapedtitle = scrapertools.entityunescape(title) scrapedurl = urlparse.urljoin(item.url,url) scrapedthumbnail = thumbnail scrapedplot = plot if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") episodio = scrapertools.get_match(scrapedtitle,'\s+#(\d+)$') contentTitle = scrapedtitle.replace('#'+ episodio, '') itemlist.append( Item(channel=item.channel, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, hasContentDetails="true", contentSeason=1, contentTitle=contentTitle, contentEpisodeNumber=int(episodio))) return itemlist
def letras(item): logger.info("[animeflv.py] letras") itemlist = [] data = scrapertools.cache_page(item.url, headers=ANIMEFLV_REQUEST_HEADERS) data = scrapertools.get_match(data, '<div class="alfabeto_box"(.*?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="series", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def ultimos_episodios(item): logger.info("[peliculamos.py] ultimos_episodios") itemlist = [] data = scrapertools.cache_page(item.url) logger.info("data=" + data) #<p style="text-align: center;"><a href="http://peliculamos.net/the-vampire-diaries-stagione-4-streaming-sub-ita-vk/"><strong>The Vampire Diaries 4×18 Sub ITA</strong></a></p> #<p style="text-align: center;"><a href="http://peliculamos.net/criminal-minds-streaming-putlocker-nowvideo-download/"><strong>Criminal Minds 8×07 ITA</strong></a></p> patron = '<p style="text-align\: center\;"><a href="([^"]+)">(.*?)</a></p>' matches = re.compile(patron, re.DOTALL).findall(data) for url, title in matches: thumbnail = "" plot = "" title = scrapertools.entityunescape(title) title = scrapertools.htmlclean(title) if (DEBUG): logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=__channel__, action="findvideos", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def findvideos(item): logger.info("pelisalacarta.yaske findvideos url="+item.url) # Descarga la página data = scrapertools.cache_page(item.url) # Extrae las entradas ''' <tr bgcolor=""> <td height="32" align="center"><a class="btn btn-mini enlace_link" style="text-decoration:none;" rel="nofollow" target="_blank" title="Ver..." href="http://www.yaske.net/es/reproductor/pelicula/2141/44446/"><i class="icon-play"></i><b> Opcion 04</b></a></td> <td align="left"><img src="http://www.google.com/s2/favicons?domain=played.to"/>played</td> <td align="center"><img src="http://www.yaske.net/theme/01/data/images/flags/la_la.png" width="21">Lat.</td> <td align="center" class="center"><span title="" style="text-transform:capitalize;">hd real 720</span></td> <td align="center"><div class="star_rating" title="HD REAL 720 ( 5 de 5 )"> <ul class="star"><li class="curr" style="width: 100%;"></li></ul> </div> </td> <td align="center" class="center">2553</td> </tr> ''' patron = '<tr bgcolor=(.*?)</tr>' matches = re.compile(patron,re.DOTALL).findall(data) #scrapertools.printMatches(matches) itemlist = [] for tr in matches: logger.info("tr="+tr) try: title = scrapertools.get_match(tr,'<b>([^<]+)</b>') server = scrapertools.get_match(tr,'"http\://www.google.com/s2/favicons\?domain\=([^"]+)"') # <td align="center"><img src="http://www.yaske.to/theme/01/data/images/flags/la_la.png" width="19">Lat.</td> idioma = scrapertools.get_match(tr,'<img src="http://www.yaske.[a-z]+/theme/01/data/images/flags/([a-z_]+).png"[^>]+>[^<]*<') subtitulos = scrapertools.get_match(tr,'<img src="http://www.yaske.[a-z]+/theme/01/data/images/flags/[^"]+"[^>]+>([^<]*)<') calidad = scrapertools.get_match(tr,'<td align="center" class="center"[^<]+<span title="[^"]*" style="text-transform.capitalize.">([^<]+)</span></td>') #<a href="http://www.yaske.net/es/reproductor/pelicula/2244/15858/" title="Batman: El regreso del Caballero Oscuro, Parte 2" url = scrapertools.get_match(tr,'<a.*?href="([^"]+)"') thumbnail = "" plot = "" title = title.replace(" ","") if "es_es" in idioma: scrapedtitle = title + " en "+server.strip()+" [Español]["+calidad+"]" elif "la_la" in idioma: scrapedtitle = title + " en "+server.strip()+" [Latino]["+calidad+"]" elif "en_es" in idioma: scrapedtitle = title + " en "+server.strip()+" [Inglés SUB Español]["+calidad+"]" else: scrapedtitle = title + " en "+server.strip()+" ["+idioma+" / "+subtitulos+"]["+calidad+"]" scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedtitle = scrapedtitle.strip() scrapedurl = url scrapedthumbnail = thumbnail scrapedplot = plot if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , fulltitle=item.fulltitle, folder=False) ) except: import traceback logger.info("Excepcion: "+traceback.format_exc()) return itemlist
def detalle_programa(item): #http://www.rtve.es/infantil/series/monsuno/videos/ #http://www.rtve.es/infantil/series/hay-nuevo-scooby-doo/ url = item.url if url.endswith("/videos"): url = url.replace("/videos","") # Descarga la página print "url="+url data = scrapertools.cache_page(url) data = scrapertools.get_match(data,'<div class="contenido-serie">(.*?</div>)') print "data="+data # Obtiene el thumbnail try: item.thumbnail = scrapertools.get_match(data,'<img.*?src="([^"]+)"') except: pass try: item.plot = scrapertools.htmlclean( scrapertools.get_match(data,'<div>(.*?)</div>') ).strip() except: pass try: title = scrapertools.get_match(data,'<h3>[^<]+<a[^>]+>([^<]+)</a>[^<]+</h3>').strip() item.title = scrapertools.entityunescape(title) except: pass return item
def novedades(item): logger.info("[documaniatv.py] novedades") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) matches = re.compile('<li[^<]+<div class="pm-li-video"(.*?)</li>',re.DOTALL).findall(data) for match in matches: try: scrapedtitle = scrapertools.get_match(match,'<h3 dir="ltr"><a[^>]+>([^<]+)</a></h3>') scrapedurl = scrapertools.get_match(match,'<a href="([^"]+)" class="pm-title-link') scrapedthumbnail = scrapertools.get_match(match,'<img src="([^"]+)"') scrapedplot = scrapertools.get_match(match,'<p class="pm-video-attr-desc">([^<]+)</p>') #scrapedplot = scrapertools.htmlclean(scrapedplot) scrapedplot = scrapertools.entityunescape(scrapedplot) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , fanart=scrapedthumbnail, folder=False) ) except: logger.info("documaniatv.novedades Error al añadir entrada "+match) # Busca enlaces de paginas siguientes... try: next_page_url = scrapertools.get_match(data,'<li class="active"[^<]+<a[^<]+</a[^<]+</li[^<]+<li[^<]+<a href="([^"]+)">') next_page_url = urlparse.urljoin(item.url,next_page_url) itemlist.append( Item(channel=__channel__, action="novedades", title=">> Pagina siguiente" , url=next_page_url , thumbnail="" , plot="" , folder=True) ) except: logger.info("documaniatv.novedades Siguiente pagina no encontrada") return itemlist
def playlist_play(item): logger.info("[goear.py] playlist_play") data = scrapertools.cachePage(item.url) patron = '"id":"[^"]+","title":"(.*?),"mp3path":"([^"]+)","imgpath"' #{"id":"053eee9","title":"DINASTIA","artist":"series tv","mp3path":"http:\/\/live3.goear.com\/listen\/ea2cc54efa02be506cf891c6890799a1\/51460ec2\/sst\/mp3files\/06082006\/78313386a704d28e8ac0c6a0bf1e7848.mp3","imgpath":"http:\/\/userserve-ak.last.fm\/serve\/_\/11714861\/Series+TV+mejoresseriesempire1.jpg","songtime":"1:17"}, matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for scrapedtitle, scrapedurl in matches: scrapedurl = scrapedurl.replace("\\", "") scrapedtitle = scrapedtitle.replace('","artist":"', ' - ') scrapedtitle = scrapedtitle.replace('"', ' ') scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedtitle = scrapedtitle if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + "" + "]") itemlist.append( Item(channel=__channel__, action="play", server="directo", title=scrapedtitle, url=scrapedurl, thumbnail=item.thumbnail, plot=item.plot, folder=False)) return itemlist
def completo(item): logger.info("[cinetube.py] completo()") url = item.url siguiente = True itemlist = [] data = scrapertools.cachePage(url) patronpag = '<li class="navs"><a class="pag_next" href="([^"]+)"></a></li>' while siguiente==True: patron = '<!--SERIE-->.*?<a href="([^"]+)" .*?>([^<]+)</a></span></li>.*?<!--FIN SERIE-->' matches = re.compile(patron,re.DOTALL).findall(data) for match in matches: scrapedtitle = match[1] # Convierte desde UTF-8 y quita entidades HTML scrapedtitle = unicode( scrapedtitle, "iso-8859-1" , errors="replace" ).encode("utf-8") scrapedtitle = scrapertools.entityunescape(scrapedtitle) fulltitle = scrapedtitle scrapedplot = "" scrapedurl = urlparse.urljoin(url,match[0]) scrapedthumbnail = "" itemlist.append( Item(channel=__channel__, action="temporadas", title=scrapedtitle , fulltitle=fulltitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , extra=scrapedtitle, show=scrapedtitle) ) # Extrae el paginador matches = re.compile(patronpag,re.DOTALL).findall(data) if len(matches)==0: siguiente = False else: data = scrapertools.cachePage(urlparse.urljoin(url,matches[0])) return itemlist
def novedades(item): logger.info("pelisalacarta.channels.animeflv novedades") # Descarga la pagina data = scrapertools.cache_page(item.url , headers = ANIMEFLV_REQUEST_HEADERS) # Extrae las entradas (carpetas) ''' <div class="not"> <a href="/ver/cyclops-shoujo-saipu-12.html" title="Cyclops Shoujo Saipu 12"> <img class="imglstsr lazy" src="http://cdn.animeflv.net/img/mini/957.jpg" border="0"> <span class="tit_ep"><span class="tit">Cyclops Shoujo Saipu 12</span></span> </a> ''' patronvideos = '<div class="not"[^<]+<a href="([^"]+)" title="([^"]+)"[^<]+<img class="[^"]+" src="([^"]+)"[^<]+<span class="tit_ep"><span class="tit">([^<]+)<' matches = re.compile(patronvideos,re.DOTALL).findall(data) itemlist = [] for match in matches: scrapedtitle = scrapertools.entityunescape(match[3]) fulltitle = scrapedtitle # directory = match[1] scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = urlparse.urljoin(item.url,match[2].replace("mini","portada")) scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, fulltitle=fulltitle, viewmode="movie")) return itemlist
def letras(item): logger.info() itemlist = [] data = scrapertools.cache_page(item.url) data = scrapertools.find_single_match(data, '<div class="alfabeto">(.*?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action="series", title=title, url=url, thumbnail=thumbnail, plot=plot, viewmode="movie_with_plot")) return itemlist
def generos(item): logger.info() itemlist = [] #itemlist.append( Item(channel=item.channel, action="series" , title="acción" , url="http://reyanime.com/ver/genero/accion", viewmode="movie_with_plot")) data = scrapertools.cache_page(item.url) data = scrapertools.get_match( data, '<div class="lista-hoja-genero-2"(.*?)</div>') logger.info("data=" + data) patron = '<a href="([^"]+)">([^<]+)</a>' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) thumbnail = "" plot = "" logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "]") itemlist.append( Item(channel=item.channel, action="series", title=title, url=url, thumbnail=thumbnail, plot=plot, viewmode="movie_with_plot")) return itemlist
def series(item): logger.info("[yotix.py] videolist") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae las entradas de la home como carpetas #<h2><a href="http://yotixanime.com/they-ade/" rel="bookmark">KimiArs~</a></h2><div class="entry"> <a title="Ver Serie Kimi ga Aruji de Shitsuji ga Ore de ~They Are My Noble Masters~" href="http://yotixanime.com/they-are-my-noble-masters-kimi-ga-aruji-de-shitsuji-ga-ore-de/"><img class="imagen" src="http://yotixanime.com/caratula/Kimi ga Aruji de Shitsuji ga Ore de.jpg" border="0" /></a><p>Uesugi Ren y su hermana Mihato, tras escapar de su violento padre y la terrible vida que tenían junto a él, deciden empezar una nueva vida juntos. Ren desea poder cuidar de su querida hermana mayor, pero pronto los fondos se acaban y necesita encontrar pronto un trabajo (y no quiere aceptar el ofrecimiento de su hermanad para hacer de modelo). La oportunidad se presenta con las hermanas Kuonji: Shinra, Miyu y Yume; unas chicas de familia rica que parecen siempre necesitar de más mayordomos. Sin nada que perder y no queriendo que su hermana haga otro tipo de labor, Ren y Mihato se unen al staff de sirvientes de la familia; donde tendrán que convivir con los demás mayordomos y sirvientas en busca de complacer a sus queridas amas.</p><div class="clear"></div></div></div> #<h2><a href="http://yotixanime.com/kissxsis/" rel="bookmark">KissXsis</a></h2><div class="entry"> <img class="imagen" src="http://yotixanime.com/caratula/kiss xsis.jpg" border="0" /><p>Keita Suminoe, estudiante de secundaria cercano a realizar examenes para la preparatoria, convive con sus hermanastras mayores Ako y Riko, que son gemelas de personalidades muy distintas, pero ambas arden en deseos y emociones por el, tanto asi que lo acosan constantemente y son capaces de “cualquier cosa” por su atencion, pero siempre lo cuidan y desean lo mejor para el. Keita vive un conflicto en su interior, no siendo inmune a los constantes acosos, se debate entre su deseo carnal por sus sensuales hermanastras y el lazo familiar (sin lazos sanguineos) creado por años de convivencia, una disputa por el amor de un joven, un trio amoroso, desplegado con humor y muchas situaciones picantes, Keita se prepara para pasar sus examenes y asi poder asistir a la misma preparatoria que sus adoradas hermanastras.<div class="titulo-anime">Próxima Ova en Julio</div></p> patron = '<h2><a href="([^"]+)" rel="bookmark">([^<]+)</a></h2>' patron += '<div class="entry">.*?' patron += '<img class="imagen" src="([^"]+)" border="0" />.*?' patron += '<p>(.*?)</p>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedplot in matches: scrapedtitle = scrapedtitle.replace("Ver Serie ", "") scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedthumbnail = scrapedthumbnail.replace(" ", "%20") if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="episodios", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, fanart=scrapedthumbnail, plot=scrapedplot)) # Extrae la página siguiente #<a href='http://yotixanime.com/pagina/2/' class='nextpostslink'> patron = "<a href='([^']+)' class='nextpostslink'>" matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = "!Pagina siguiente >>" scrapedurl = match scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="series", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) return itemlist
def search_results(item): logger.info("[goear.py] search_results") data = scrapertools.cachePage(item.url) patron = '"id":"[^"]+","title":"(.*?),"mp3path":"([^"]+)","imgpath".*?songtime":"([^"]+)"' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for scrapedtitle, scrapedurl, scrapedtime in matches: scrapedurl = scrapedurl.replace("\\", "") scrapedtitle = scrapedtitle.replace('","artist":"', ' - ') scrapedtitle = scrapedtitle.replace('"', ' ') scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedtitle = scrapedtitle + " / Duración: " + scrapedtime scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + "" + "]") itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle, url=scrapedurl, thumbnail=item.thumbnail, plot=item.plot, server="directo", folder=False)) if 'playlist_songs_json' in item.url: url_pag_sig = "" else: #EXTRAE EL NUMERO DE PAGINA ACTUAL Y LO LIMPIA pag_actual = item.url[-17:-13] pag_actual = pag_actual.replace("p", "") pag_actual = pag_actual.replace("&", "") pag_actual = pag_actual.replace("=", "") # INCREMENTA EN UNO EL NUMERO DE PAGINA pag_sig = int(pag_actual) + 1 # FABRICA EL LINK DE LA SIGUIENTE PAGINA DE RESULTADOS url_pag_sig = item.url url_pag_sig = url_pag_sig.replace(pag_actual, repr(pag_sig)) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + "" + "]") itemlist.append( Item(channel=__channel__, action="search_results", title=">> Pagina Siguiente", url=url_pag_sig, plot=item.plot)) return itemlist
def novedades(item): logger.info("[documaniatv.py] novedades") itemlist = [] # Descarga la pagina data = scrapertools.cache_page(item.url) matches = re.compile('<li[^<]+<div class="pm-li-video"(.*?)</li>', re.DOTALL).findall(data) for match in matches: try: scrapedtitle = scrapertools.get_match( match, '<h3 dir="ltr"><a[^>]+>([^<]+)</a></h3>') scrapedurl = scrapertools.get_match( match, '<a href="([^"]+)" class="pm-title-link') scrapedthumbnail = scrapertools.get_match(match, '<img src="([^"]+)"') scrapedplot = scrapertools.get_match( match, '<p class="pm-video-attr-desc">([^<]+)</p>') #scrapedplot = scrapertools.htmlclean(scrapedplot) scrapedplot = scrapertools.entityunescape(scrapedplot) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="play", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, fanart=scrapedthumbnail, folder=False)) except: logger.info("documaniatv.novedades Error al añadir entrada " + match) # Busca enlaces de paginas siguientes... try: next_page_url = scrapertools.get_match( data, '<li class="active"[^<]+<a[^<]+</a[^<]+</li[^<]+<li[^<]+<a href="([^"]+)">' ) next_page_url = urlparse.urljoin(item.url, next_page_url) itemlist.append( Item(channel=__channel__, action="novedades", title=">> Pagina siguiente", url=next_page_url, thumbnail="", plot="", folder=True)) except: logger.info("documaniatv.novedades Siguiente pagina no encontrada") return itemlist
def series(item): logger.info("[turbonick.py] series") itemlist = [] # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(item.url) #logger.info(data) # -------------------------------------------------------- # Extrae los vídeos # -------------------------------------------------------- patron = '<content\s+cmsid="([^"]+)"\s+type="content"\s+contenttype="video"[^>]+>[^<]+<meta(.*?)</meta' bloques = re.compile(patron, re.DOTALL).findall(data) #if DEBUG: scrapertools.printMatches(bloques) dictionaryseries = {} for bloque in bloques: data = bloque[1] patron = '<title>([^<]+)</title>[^<]+' patron += '<shorttitle>([^<]+)</shorttitle>[^<]+' patron += '<description>([^<]+)</description>.*?' patron += '<iconurl>([^<]+)</iconurl>[^<]+' patron += '<iconurljpg>([^<]+)</iconurljpg>.*?' patron += '<date>([^<]+)</date>.*?' patron += '<showname>([^<]+)</showname>[^<]+' patron += '<shortshowname>([^<]+)</shortshowname>[^<]+' patron += '<showid>([^<]+)</showid>[^<]+' matches = re.compile(patron, re.DOTALL).findall(data) #if DEBUG: scrapertools.printMatches(matches) idserie = matches[0][6] #logger.info("[turbonick.py] idserie="+idserie) if not dictionaryseries.has_key(idserie): logger.info("Nueva serie %s" % idserie) scrapedtitle = scrapertools.entityunescape(idserie) if scrapedtitle == "false": scrapedtitle = "Otros" itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle, extra=idserie, action="episodios", url=item.url, thumbnail="", plot="", show=scrapedtitle, category=item.category, folder=True)) dictionaryseries[idserie] = True return itemlist
def list_all(item): logger.info() itemlist = [] url = item.url if not item.page: item.page = 1 if not item.extra: url += "?page=%s" % item.page data = httptools.downloadpage(url, encoding=encoding, canonical=canonical).data patron = '(?is)TPost C.*?<a href="([^"]+)' patron += '.*?data-src="([^"]+)' patron += '.*?"Title">([^<]+)' matches = scrapertools.find_multiple_matches(data, patron) for scrapedurl, scrapedthumbnail, scrapedtitle in matches: scrapedtitle = scrapertools.entityunescape(scrapedtitle) item.infoLabels['year'] = "-" if "serie" in scrapedurl: itemlist.append( item.clone( action="seasons", channel=item.channel, contentSerieName=scrapedtitle, thumbnail="https://" + scrapedthumbnail, title=scrapedtitle, url=urlparse.urljoin(host, scrapedurl), )) else: itemlist.append( item.clone( action="findvideos", channel=item.channel, contentType="movie", contentTitle=scrapedtitle, thumbnail="https://" + scrapedthumbnail, title=scrapedtitle, url=urlparse.urljoin(host, scrapedurl), )) tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__) item.page += 1 url_pagina = scrapertools.find_single_match(data, 'page=%s' % item.page) if url_pagina != "": pagina = "Pagina: %s" % item.page itemlist.append( Item(action="list_all", channel=item.channel, page=item.page, title=pagina, url=item.url)) return itemlist
def novedades_episodios(item): logger.info("pelisalacarta.channels.animeflv novedades") data = scrapertools.anti_cloudflare(item.url, headers=CHANNEL_DEFAULT_HEADERS, host=CHANNEL_HOST) ''' <div class="not"> <a href="/ver/cyclops-shoujo-saipu-12.html" title="Cyclops Shoujo Saipu 12"> <img class="imglstsr lazy" src="http://cdn.animeflv.net/img/mini/957.jpg" border="0"> <span class="tit_ep"><span class="tit">Cyclops Shoujo Saipu 12</span></span> </a> </div> ''' patronvideos = '<div class="not"[^<]+<a href="([^"]+)" title="([^"]+)"[^<]+<img class="[^"]+" ' \ 'src="([^"]+)"[^<]+<span class="tit_ep"><span class="tit">([^<]+)<' matches = re.compile(patronvideos, re.DOTALL).findall(data) itemlist = [] for match in matches: scrapedtitle = scrapertools.entityunescape(match[3]) fulltitle = scrapedtitle # directory = match[1] scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = urlparse.urljoin(item.url, match[2].replace("mini", "portada")) scrapedplot = "" #if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(scrapedtitle, scrapedurl, scrapedthumbnail)) new_item = Item(channel=item.channel, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, fulltitle=fulltitle) content_title = scrapertools.entityunescape(match[1]) if content_title: episode = scrapertools.get_match(content_title, '\s+(\d+)$') content_title = content_title.replace(episode, '') season, episode = numbered_for_tratk(content_title, 1, episode) new_item.hasContentDetails = "true" new_item.contentTitle = content_title new_item.contentSeason = season new_item.contentEpisodeNumber = int(episode) itemlist.append(new_item) return itemlist
def episodios(item): logger.info("[seriespepito.py] list") # Descarga la página data = scrapertools.cache_page(item.url) # Completa plot y thumbnail item = detalle_programa(item,data) data = scrapertools.get_match(data,'<div class="accordion"(.*?)<div class="subtitulo">') logger.info(data) # Extrae los capítulos ''' <tbody> <tr> <td> <a class="asinenlaces" title=" 0x01 - Battlestar Galactica 2003 - Capitulo 1" href="http://battlestar-galactica-2003.seriespepito.com/temporada-0/capitulo-1/"> <i class="icon-film"></i> <strong>0x01</strong> - Battlestar Galactica 2003 - Capitulo 1 </a><button id="capvisto_121_0_1" class="btn btn-warning btn-mini sptt pull-right bcapvisto ctrl_over" data-tt_my="left center" data-tt_at="right center" data-tt_titulo="Marca del último capítulo visto" data-tt_texto="Este es el último capítulo que has visto de esta serie." data-id="121" data-tem="0" data-cap="1" type="button"><i class="icon-eye-open"></i></button></td></tr><tr><td><a title=" 0x02 - Battlestar Galactica 2003 - Capitulo 2" href="http://battlestar-galactica-2003.seriespepito.com/temporada-0/capitulo-2/"><i class="icon-film"></i> <strong>0x02</strong> - Battlestar Galactica 2003 - Capitulo 2 <span class="flag flag_0"></span></a><button id="capvisto_121_0_2" class="btn btn-warning btn-mini sptt pull-right bcapvisto ctrl_over" data-tt_my="left center" data-tt_at="right center" data-tt_titulo="Marca del último capítulo visto" data-tt_texto="Este es el último capítulo que has visto de esta serie." data-id="121" data-tem="0" data-cap="2" type="button"><i class="icon-eye-open"></i></button></td></tr></tbody> ''' patron = '<tr>' patron += '<td>' patron += '<a.*?href="([^"]+)"[^<]+' patron += '<i[^<]+</i[^<]+' patron += '<strong>([^<]+)</strong>' patron += '([^<]+)<(.*?)<button' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) itemlist = [] for scrapedurl,scrapedepisode,scrapedtitle,idiomas in matches: #title = unicode( scrapedtitle.strip(), "iso-8859-1" , errors="replace" ).encode("utf-8") title = scrapedepisode + " " + scrapedtitle.strip() title = scrapertools.entityunescape(title) if "flag_0" in idiomas: title = title + " (Español)" if "flag_1" in idiomas: title = title + " (Latino)" if "flag_2" in idiomas: title = title + " (VO)" if "flag_3" in idiomas: title = title + " (VOS)" url = scrapedurl thumbnail = item.thumbnail plot = item.plot if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos" , title=title , url=url, thumbnail=thumbnail, plot=plot, show=item.show, viewmode="movie_with_plot")) if config.get_library_support() and len(itemlist)>0: itemlist.append( Item(channel=item.channel, title="Añadir esta serie a la biblioteca de XBMC", url=item.url, action="add_serie_to_library", extra="episodios", show=item.show,fanart="http://pelisalacarta.mimediacenter.info/fanart/seriespepito.jpg")) itemlist.append( Item(channel=item.channel, title="Descargar todos los episodios de la serie", url=item.url, action="download_all_episodes", extra="episodios", show=item.show,fanart="http://pelisalacarta.mimediacenter.info/fanart/seriespepito.jpg")) return itemlist
def findvideos(item): logger.info("[seriespepito.py] findvideos") itemlist = [] # Descarga la pagina data = scrapertools.cachePage(item.url) #logger.info(data) ''' <tr> <td class="tdidioma"><span class="flag flag_2"></span></td> <td>25/06/2012</td> <td class="tdservidor"><img src="http://www.seriespepito.com/uploads/servidores/76-imagen_img.png" alt="Moevideos" /> Moevideos</td> <td class="tdenlace"><a class="btn btn-mini enlace_link" rel="nofollow" target="_blank" title="Ver..." href="http://falling-skies.seriespepito.com/temporada-2/capitulo-3/385944/"><i class="icon-play"></i> Ver</a></td> <td class="tdusuario"><a id="a_ava_71" href="http://www.seriespepito.com/usuarios/perfil/d02560dd9d7db4467627745bd6701e809ffca6e3">mater</a></td> <td class="tdcomentario"></td> <td class="tdreportar"><button class="btn btn-danger btn-mini hide sptt breportar" data-tt_my="left center" data-tt_at="right center" data-tt_titulo="Reportar problemas..." data-tt_texto="¿Algún problema con el enlace?, ¿esta roto?, ¿el audio esta mal?, ¿no corresponde el contenido?, repórtalo y lo revisaremos, ¡gracias!." data-enlace="385944" type="button"><i class="icon-warning-sign icon-white"></i></button></td> </tr> ''' ''' <tr> <td class="tdidioma"><span class="flag flag_3"></span></td> <td>28/12/2011</td> <td class="tdservidor"><img src="http://www.seriespepito.com/uploads/servidores/44-imagen_img.png" alt="Uploaded" /> Uploaded</td> <td class="tdenlace"><a class="btn btn-mini enlace_link" rel="nofollow" target="_blank" title="Bajar..." href="http://rizzoli-and-isles.seriespepito.com/temporada-2/capitulo-15/329503/"><i class="icon-download"></i> Bajar</a></td> <td class="tdusuario"><a href="http://www.seriespepito.com/usuarios/perfil/9109c85a45b703f87f1413a405549a2cea9ab556">Pepito</a></td> <td class="tdcomentario"></td> <td class="tdreportar"><button class="btn btn-danger btn-mini hide sptt breportar" data-tt_my="left center" data-tt_at="right center" data-tt_titulo="Reportar problemas..." data-tt_texto="¿Algún problema con el enlace?, ¿esta roto?, ¿el audio esta mal?, ¿no corresponde el contenido?, repórtalo y lo revisaremos, ¡gracias!." data-enlace="329503" type="button"><i class="icon-warning-sign icon-white"></i></button></td> ''' # Listas de enlaces patron = '<tr[^<]+' patron += '<td class="tdidioma"><span class="([^"]+)".*?' patron += '<td class="tdservidor"><img src="([^"]+)"[^>]+>([^<]+)</td[^<]+' # patron += '<td class="tdenlace"><a class="btn btn-mini enlace_link" data-servidor="([^"]+)" rel="nofollow" target="_blank" title="[^"]+" href="([^"]+)"' patron += '<td class="tdenlace"><a class="btn btn-mini enlace_link" data-servidor="([^"]+)" rel="nofollow" target="_blank" title="[^"]+" href="([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) scrapertools.printMatches(matches) for idiomas,scrapedthumbnail,servidor,dataservidor,scrapedurl in matches: url = urlparse.urljoin(item.url,scrapedurl) title = "Ver en "+scrapertools.entityunescape(servidor).strip() plot = "" if "flag_0" in idiomas: title = title + " (Español)" if "flag_1" in idiomas: title = title + " (Latino)" if "flag_2" in idiomas: title = title + " (VO)" if "flag_3" in idiomas: title = title + " (VOS)" itemlist.append( Item(channel=__channel__, action="play" , title=title , url=url, thumbnail=item.thumbnail, plot=item.plot, show=item.show, folder=False,fanart="http://pelisalacarta.mimediacenter.info/fanart/seriespepito.jpg")) return itemlist
def search_results(item): logger.info("[yotix.py] search_results") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae las entradas de la home como carpetas #<h2><a href="http://yotixanime.com/upotte/" rel="bookmark">Upotte!!</a></h2><div class="entry"> Un profesor transferido se encueuela increíble.<div class="titulo-anime">Anime en Emisión – Próximo 06/05/2012</div> patron = '<h2><a href="([^"]+)" rel="bookmark">([^<]+)</a></h2>' patron += '<div class="entry">(.*?)</div>' matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for scrapedurl, scrapedtitle, scrapedplot in matches: scrapedtitle = scrapedtitle.replace("Ver Serie ", "") scrapedtitle = scrapertools.entityunescape(scrapedtitle) if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + "" + "]") itemlist.append( Item(channel=__channel__, action="episodios", title=scrapedtitle, url=scrapedurl, plot=scrapedplot)) # Extrae la página siguiente #<a href='http://yotixanime.com/pagina/2/' class='nextpostslink'> patron = "<a href='([^']+)' class='nextpostslink'>" matches = re.compile(patron, re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = "!Pagina siguiente >>" scrapedurl = match scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=[" + scrapedtitle + "], url=[" + scrapedurl + "], thumbnail=[" + scrapedthumbnail + "]") itemlist.append( Item(channel=__channel__, action="series", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) return itemlist
def episodios(item): logger.info("[turbonick.py] episodios") print item.tostring() itemlist = [] # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(item.url) #logger.info(data) # -------------------------------------------------------- # Extrae los vídeos # -------------------------------------------------------- patron = '<content\s+cmsid="([^"]+)"\s+type="content"\s+contenttype="video"[^>]+>[^<]+<meta(.*?)</meta' bloques = re.compile(patron,re.DOTALL).findall(data) #if DEBUG: scrapertools.printMatches(bloques) for bloque in bloques: data = bloque[1] patron = '<title>([^<]+)</title>[^<]+' patron += '<shorttitle>([^<]+)</shorttitle>[^<]+' patron += '<description>([^<]+)</description>.*?' patron += '<iconurl>([^<]+)</iconurl>[^<]+' patron += '<iconurljpg>([^<]+)</iconurljpg>.*?' patron += '<date>([^<]+)</date>.*?' patron += '<showname>([^<]+)</showname>[^<]+' patron += '<shortshowname>([^<]+)</shortshowname>[^<]+' patron += '<showid>([^<]+)</showid>[^<]+' matches = re.compile(patron,re.DOTALL).findall(data) #if DEBUG: scrapertools.printMatches(matches) match = matches[0] idserie = match[6] if match[1] != "false": scrapedtitle = match[1]+" - "+match[2] else: scrapedtitle = idserie+" - "+match[2] scrapedtitle = scrapertools.entityunescape(scrapedtitle) if scrapedtitle.startswith("DRAKE AND JOSH"): scrapedtitle = scrapedtitle.replace("DRAKE AND JOSH","DRAKE & JOSH") scrapedthumbnail = match[3] if scrapedthumbnail == "false": scrapedthumbnail = "" scrapedplot = match[5] scrapedurl = 'http://es.turbonick.nick.com/dynamo/turbonick/xml/dyn/flvgenPT.jhtml?vid='+bloque[0]+'&hiLoPref=hi' #logger.info("[turbonick.py] idserie="+idserie) if idserie==item.extra: itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , show=item.show , category = item.category , folder=False) ) return itemlist
def serie(item): logger.info("pelisalacarta.channels.youanimehd serie") # Descarga la pagina data = scrapertools.cache_page(item.url) data = data.replace('\n',"") data = scrapertools.get_match(data,'<div class="sc_menu"[^<]+<ul class="sc_menu">(.*?)</ul[^<]+</div[^<]+</li>') # Saca el argumento """patronplot = 'Descripción</strong><br /><br />([^"]+)<br />' matches = re.compile(patronplot,re.DOTALL).findall(data) if len(matches)>0:""" scrapedplot = "" # Saca enlaces a los episodios #<li><a target="vides" href="http://www.youanimehd.com/videoss/?video=196994058_165265436&c=1086387723"> #<img src="http://cs525400.vk.me/u196994058/video/l_ab9b6a65.jpg","date":1366157450,"views":0,"comments":0,"player":"http://vk.com/video_ext.php?oid=196994058&id=165265436&hash=79452ec7c92c0c6f" width="100" height="75" alt="1" border="0" align="top"/><span style="color:red">Capitulo 1</span> #</a> </li> patronvideos = ' <li><a target="vides" href="([^"]+)"[^<]+<img\s+src="([^"]+)"[^<]+<span style="color:red">([^"]+)</span>' itemlist = [] matches = re.compile(patronvideos,re.DOTALL).findall(data) for match in matches: #chapnum += 1 #if chapnum == "0" #initnum = 0 #chapnum = str(chapnum+1) #scrapedtitle = matches[2] + chapnum scrapedtitle = match[2] scrapedtitle = unicode( scrapedtitle, "iso-8859-1" , errors="replace" ).encode("utf-8") scrapedtitle = scrapertools.entityunescape( scrapedtitle ) try: episodio = scrapertools.get_match(scrapedtitle,"(\d+)") if len(episodio)==1: scrapedtitle = "1x0"+episodio else: scrapedtitle = "1x"+episodio except: pass scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = match[1] #if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="play" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, show=item.show, fulltitle="a", folder=False)) if config.get_library_support(): itemlist.append( Item(channel=item.channel, title="Añadir esta serie a la biblioteca de XBMC", url=item.url, action="add_serie_to_library", extra="serie", show=item.show) ) return itemlist
def series(item, data=""): logger.info("pelisalacarta.channels.seriesdanko series") itemlist = [] # Descarga la página if data == "": data = scrapertools.cache_page(item.url) # logger.info("data="+data) # Averigua el encoding try: patronvideos = "charset=(.+?)'" charset = re.compile(patronvideos, re.DOTALL).findall(matches[0]) except: logger.info("charset desconocido") charset = "utf-8" # <div style='float:left;width: 33%;text-align:center;'><a href='serie.php?serie=748' title='Capitulos de: Aaron Stone'><img class='ict' src='http://3.bp.blogspot.com/-0m9BHsd1Etc/To2PMvCRNeI/AAAAAAAAD1Y/ax3KPRNnJjY/s400/aaron-stone.jpg' alt='Capitulos de: Aaron Stone' height='184' width='120'></a><br><div style='text-align:center;line-height:20px;height:20px;'><a href='serie.php?serie=748' style='font-size: 11px;'>Capitulos de: Aaron Stone</a></div><br><br></div><div style='float:left;widt patronvideos = "<div[^<]+<a href='(serie.php[^']+)' title='Capitulos de\: ([^']+)'><img class='ict' src='([^']+)'" matches = re.compile(patronvideos, re.DOTALL).findall(data) if len(matches) == 0: # <div style='float:left;width: 33%;text-align:center;'><a href='../serie.php?serie=938' title='Capitulos de: Tron: Uprising'><img class='ict' src='http://2.bp.blogspot.com/-N1ffDX9Cf_s/T7qPkVEoFgI/AAAAAAAALmM/EN_vA-UCJJY/s1600/tron--uprising.jpg' alt='Capitulos de: Tron: Uprising' height='184' width='120'></a><br><div style='text-align:center;line-height:20px;height:20px;'><a href='../serie.php?serie=938' style='font-size: 11px;'>Capitulos de: Tron: Uprising'</a></div><br><br></div><div style='float:left;width: 33%;text-align:center;'><a href='../serie.php?serie=966' title='Capitulos de: Pablo Escobar El Patron del Mal '><img class='ict' src='http://2.bp.blogspot.com/-5Ten6N_ytgU/T-7569pKe5I/AAAAAAAAMnY/nfVNFd9W5Oo/s1600/Escobar-el-patron-del-mal.jpg' alt='Capitulos de: Pablo Escobar El Patron del Mal ' height='184' width='120'></a><br><div style='text-align:center;line-height:20px;height:20px;'><a href='../serie.php?serie=966' style='font-size: 11px;'>Capitulos de: Pablo Escobar El Patron del Mal '</a></div><br><br></div></div></td></tr></tbody></table><div style='clear: both;'></div> patronvideos = ( "<div[^<]+<a href='(../serie.php[^']+)' title='Capitulos de\: ([^']+)'><img class='ict' src='([^']+)'" ) matches = re.compile(patronvideos, re.DOTALL).findall(data) for scrapedurl, scrapedtitle, scrapedthumbnail in matches: scrapedtitle = unicode(scrapedtitle, "utf-8", errors="replace").encode("utf-8") scrapedtitle = scrapedtitle.replace("á", "á") scrapedtitle = scrapedtitle.replace("é", "é") scrapedtitle = scrapedtitle.replace("í", "í") scrapedtitle = scrapedtitle.replace("ó", "ó") scrapedtitle = scrapedtitle.replace("ú", "ú") scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedurl = urlparse.urljoin(item.url, scrapedurl) itemlist.append( Item( channel=__channel__, action="episodios", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot="", show=scrapedtitle, folder=True, ) ) return itemlist
def videolist(item): logger.info("[publicotv.py] videolist") itemlist=[] # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae los vídeos patron = '<div class="video-overview a1">[^<]+' patron += '<a href="([^"]+)" title="Play">' patron += '<img.*?src="(.*?)".*?title="([^"]+)"[^>]+></a>\W*<h4></h4>\W*<p class="title">(.*?)</p>\W*<div class="video-info-line">\W*<p>(.*?)</p>\W*<p>(.*?)</p>\W*</div>\W*</div>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = match[3] + " ("+match[5]+") ("+match[4]+")" scrapedurl = urlparse.urljoin(item.url,match[0]) scrapedthumbnail = urlparse.urljoin(item.url,match[1]) scrapedplot = scrapertools.entityunescape(match[2]) seppos = scrapedplot.find("--") scrapedplot = scrapedplot[seppos+2:] if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="play" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , folder=False) ) # Página siguiente patron = '<a href="([^"]+)" title="Ir a la siguiente[^"]+">Siguiente \»\;</a></div>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) if len(matches)>0: match = matches[0] scrapedtitle = "Página siguiente" scrapedurl = urlparse.urljoin(item.url,match) scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") # Añade al listado de XBMC itemlist.append( Item(channel=CHANNELNAME, title=scrapedtitle , action="videolist" , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot , folder=True) ) return itemlist
def series(item): logger.info("[turbonick.py] series") itemlist = [] # -------------------------------------------------------- # Descarga la página # -------------------------------------------------------- data = scrapertools.cachePage(item.url) #logger.info(data) # -------------------------------------------------------- # Extrae los vídeos # -------------------------------------------------------- patron = '<content\s+cmsid="([^"]+)"\s+type="content"\s+contenttype="video"[^>]+>[^<]+<meta(.*?)</meta' bloques = re.compile(patron,re.DOTALL).findall(data) #if DEBUG: scrapertools.printMatches(bloques) dictionaryseries = {} for bloque in bloques: data = bloque[1] patron = '<title>([^<]+)</title>[^<]+' patron += '<shorttitle>([^<]+)</shorttitle>[^<]+' patron += '<description>([^<]+)</description>.*?' patron += '<iconurl>([^<]+)</iconurl>[^<]+' patron += '<iconurljpg>([^<]+)</iconurljpg>.*?' patron += '<date>([^<]+)</date>.*?' patron += '<showname>([^<]+)</showname>[^<]+' patron += '<shortshowname>([^<]+)</shortshowname>[^<]+' patron += '<showid>([^<]+)</showid>[^<]+' matches = re.compile(patron,re.DOTALL).findall(data) #if DEBUG: scrapertools.printMatches(matches) idserie = matches[0][6] #logger.info("[turbonick.py] idserie="+idserie) if not dictionaryseries.has_key(idserie): logger.info("Nueva serie %s" % idserie) scrapedtitle = scrapertools.entityunescape(idserie) if scrapedtitle=="false": scrapedtitle="Otros" itemlist.append( Item(channel=CHANNELNAME, title = scrapedtitle , extra=idserie , action="episodios" , url=item.url, thumbnail="", plot="" , show=scrapedtitle , category=item.category , folder=True) ) dictionaryseries[idserie] = True return itemlist
def episodios(item): logger.info("[yotix.py] episodios") itemlist=[] data = scrapertools.cachePage(item.url) #<a class="azul" href="http://yotixanime.com/rt/kissxsis-capitulo-4/9f44885c43a61e9a337f9ce598078baa/" target="_blank">Capitulo 04 – Notas de un Amante</a> #<a class="azul" href="http://yotixanime.com/sp/upotte!!-capitulo-01/HoQZ2fAkb1QEaYkEt5RR/" target="_blank">Capitulo 1 – Aprieta y Espera</a> patronvideos = '<a class="azul"\s+href="([^"]+)" target="_blank">(.*?)</a>' matches = re.compile(patronvideos,re.DOTALL).findall(data) scrapertools.printMatches(matches) for scrapedurl,scrapedtitle in matches: scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedtitle = scrapertools.entityunescape(scrapedtitle) itemlist.append( Item(channel=__channel__, action="findvideos" , title=scrapedtitle , url=scrapedurl, thumbnail=item.thumbnail, plot=item.plot)) return itemlist
def novedades(item): logger.info("pelisalacarta.channels.animeflv novedades") itemlist = [] status_code, data = get_page(item.url) logger.info("pelisalacarta.channels.animeflv **** {0}".format(item.url)) logger.info("pelisalacarta.channels.animeflv **** {0}".format(status_code)) if status_code == requests.codes.ok: ''' <div class="not"> <a href="/ver/cyclops-shoujo-saipu-12.html" title="Cyclops Shoujo Saipu 12"> <img class="imglstsr lazy" src="http://cdn.animeflv.net/img/mini/957.jpg" border="0"> <span class="tit_ep"><span class="tit">Cyclops Shoujo Saipu 12</span></span> </a> </div> ''' patronvideos = '<div class="not"[^<]+<a href="([^"]+)" title="([^"]+)"[^<]+<img class="[^"]+" ' \ 'src="([^"]+)"[^<]+<span class="tit_ep"><span class="tit">([^<]+)<' matches = re.compile(patronvideos, re.DOTALL).findall(data) itemlist = [] for match in matches: scrapedtitle = scrapertools.entityunescape(match[3]) fulltitle = scrapedtitle # directory = match[1] scrapedurl = urlparse.urljoin(item.url, match[0]) scrapedthumbnail = urlparse.urljoin(item.url, match[2].replace("mini", "portada")) scrapedplot = "" if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(scrapedtitle, scrapedurl, scrapedthumbnail)) itemlist.append(Item(channel=__channel__, action="findvideos", title=scrapedtitle, url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot, fulltitle=fulltitle, viewmode="movie")) else: itemlist.append(Item(channel=__channel__, action="mainlist", title="No se ha podido cargar la pagina ERROR:{0}".format(status_code), url="", thumbnail="", plot="")) return itemlist
def letras(item): logger.info("pelisalacarta.channels.reyanime letras") itemlist = [] data = scrapertools.cache_page(item.url) data = scrapertools.find_single_match(data,'<div class="alfabeto">(.*?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url,scrapedurl) thumbnail = "" plot = "" if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=item.channel, action="series" , title=title , url=url, thumbnail=thumbnail, plot=plot, viewmode="movie_with_plot")) return itemlist
def letras(item): logger.info("pelisalacarta.channels.animeflv letras") itemlist = [] data = scrapertools.cache_page(item.url, headers = ANIMEFLV_REQUEST_HEADERS) data = scrapertools.get_match(data,'<div class="alfabeto_box"(.*?)</div>') patron = '<a href="([^"]+)[^>]+>([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url,scrapedurl) thumbnail = "" plot = "" if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="series" , title=title , url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def search_results(item): logger.info("[goear.py] search_results") data = scrapertools.cachePage(item.url) patron = '"id":"[^"]+","title":"(.*?),"mp3path":"([^"]+)","imgpath".*?songtime":"([^"]+)"' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for scrapedtitle, scrapedurl, scrapedtime in matches: scrapedurl = scrapedurl.replace("\\","") scrapedtitle = scrapedtitle.replace('","artist":"',' - ') scrapedtitle = scrapedtitle.replace('"',' ') scrapedtitle = scrapertools.htmlclean(scrapedtitle) scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedtitle = scrapedtitle + " / Duración: " + scrapedtime scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+""+"]") itemlist.append( Item(channel=__channel__, action="play" , title=scrapedtitle , url=scrapedurl, thumbnail=item.thumbnail, plot=item.plot, server="directo", folder=False)) if 'playlist_songs_json' in item.url: url_pag_sig="" else: #EXTRAE EL NUMERO DE PAGINA ACTUAL Y LO LIMPIA pag_actual = item.url[-17:-13] pag_actual = pag_actual.replace("p","") pag_actual = pag_actual.replace("&","") pag_actual = pag_actual.replace("=","") # INCREMENTA EN UNO EL NUMERO DE PAGINA pag_sig = int(pag_actual)+1 # FABRICA EL LINK DE LA SIGUIENTE PAGINA DE RESULTADOS url_pag_sig = item.url url_pag_sig = url_pag_sig.replace(pag_actual,repr(pag_sig)) if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+""+"]") itemlist.append( Item(channel=__channel__, action="search_results" , title=">> Pagina Siguiente", url=url_pag_sig, plot=item.plot)) return itemlist
def paises(item): logger.info("[animeflv.py] paises") itemlist = [] data = scrapertools.cache_page(item.url) data = scrapertools.get_match(data,'<h2>País</h2>(.*?)</ul') patron = "<a href='([^']+)'>([^<]+)</a>" matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url,scrapedurl) thumbnail = "" plot = "" if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="peliculas" , title=title , url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def generos(item): logger.info() itemlist = [] data = scrapertools.cache_page(item.url) data = re.sub(r"\n|\r|\t|\s{2}", "", data) data = scrapertools.get_match(data, "<div class='cccon'>(.*?)</div><div id=\"myslides\">") patron = "<a.+? href='/([^']+)'>(.*?)</a>" matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url, scrapedurl) logger.debug("title=[{0}], url=[{1}]".format(title, url)) itemlist.append(Item(channel=item.channel, action="series", title=title, url=url)) return itemlist
def series(item): logger.info("[yotix.py] videolist") # Descarga la página data = scrapertools.cachePage(item.url) #logger.info(data) # Extrae las entradas de la home como carpetas #<h2><a href="http://yotixanime.com/they-ade/" rel="bookmark">KimiArs~</a></h2><div class="entry"> <a title="Ver Serie Kimi ga Aruji de Shitsuji ga Ore de ~They Are My Noble Masters~" href="http://yotixanime.com/they-are-my-noble-masters-kimi-ga-aruji-de-shitsuji-ga-ore-de/"><img class="imagen" src="http://yotixanime.com/caratula/Kimi ga Aruji de Shitsuji ga Ore de.jpg" border="0" /></a><p>Uesugi Ren y su hermana Mihato, tras escapar de su violento padre y la terrible vida que tenían junto a él, deciden empezar una nueva vida juntos. Ren desea poder cuidar de su querida hermana mayor, pero pronto los fondos se acaban y necesita encontrar pronto un trabajo (y no quiere aceptar el ofrecimiento de su hermanad para hacer de modelo). La oportunidad se presenta con las hermanas Kuonji: Shinra, Miyu y Yume; unas chicas de familia rica que parecen siempre necesitar de más mayordomos. Sin nada que perder y no queriendo que su hermana haga otro tipo de labor, Ren y Mihato se unen al staff de sirvientes de la familia; donde tendrán que convivir con los demás mayordomos y sirvientas en busca de complacer a sus queridas amas.</p><div class="clear"></div></div></div> #<h2><a href="http://yotixanime.com/kissxsis/" rel="bookmark">KissXsis</a></h2><div class="entry"> <img class="imagen" src="http://yotixanime.com/caratula/kiss xsis.jpg" border="0" /><p>Keita Suminoe, estudiante de secundaria cercano a realizar examenes para la preparatoria, convive con sus hermanastras mayores Ako y Riko, que son gemelas de personalidades muy distintas, pero ambas arden en deseos y emociones por el, tanto asi que lo acosan constantemente y son capaces de “cualquier cosa” por su atencion, pero siempre lo cuidan y desean lo mejor para el. Keita vive un conflicto en su interior, no siendo inmune a los constantes acosos, se debate entre su deseo carnal por sus sensuales hermanastras y el lazo familiar (sin lazos sanguineos) creado por años de convivencia, una disputa por el amor de un joven, un trio amoroso, desplegado con humor y muchas situaciones picantes, Keita se prepara para pasar sus examenes y asi poder asistir a la misma preparatoria que sus adoradas hermanastras.<div class="titulo-anime">Próxima Ova en Julio</div></p> patron = '<h2><a href="([^"]+)" rel="bookmark">([^<]+)</a></h2>' patron += '<div class="entry">.*?' patron += '<img class="imagen" src="([^"]+)" border="0" />.*?' patron += '<p>(.*?)</p>' matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for scrapedurl,scrapedtitle,scrapedthumbnail,scrapedplot in matches: scrapedtitle = scrapedtitle.replace("Ver Serie ","") scrapedtitle = scrapertools.entityunescape(scrapedtitle) scrapedthumbnail = scrapedthumbnail.replace(" ","%20") if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="episodios" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, fanart=scrapedthumbnail, plot=scrapedplot)) # Extrae la página siguiente #<a href='http://yotixanime.com/pagina/2/' class='nextpostslink'> patron = "<a href='([^']+)' class='nextpostslink'>" matches = re.compile(patron,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) for match in matches: scrapedtitle = "!Pagina siguiente >>" scrapedurl = match scrapedthumbnail = "" scrapedplot = "" if (DEBUG): logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action="series" , title=scrapedtitle , url=scrapedurl, thumbnail=scrapedthumbnail, plot=scrapedplot)) return itemlist
def generos(item): logger.info("pelisalacarta.channels.reyanime generos") itemlist = [] data = scrapertools.cache_page(item.url) data = scrapertools.get_match(data,'<div class="top-list-anim">G[^<]+</div>(.*?)</div>\s') logger.info("data="+data) patron = '<a href="([^"]+)"><div[^<]+</div>([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url,scrapedurl) thumbnail = "" plot = "" if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="series" , title=title , url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def peliculas(item,paginacion=True): logger.info("[shurweb.py] peliculas") url = item.url # Descarga la página data = scrapertools.cachePage(url) # Extrae las entradas patronvideos = '<a class="video_thumb" href="([^"]+)" rel="bookmark" title="([^"]+)">[^<]+<img width="123" height="100" src="([^"]+)"[^<]+<span class="time">([^<]+)</span>' matches = re.compile(patronvideos,re.DOTALL).findall(data) if DEBUG: scrapertools.printMatches(matches) itemlist = [] for match in matches: scrapedtitle = match[1] + " (" + match[3] +")" scrapedtitle = scrapertools.entityunescape(scrapedtitle) fulltitle = scrapedtitle scrapedplot = "" scrapedurl = match[0] scrapedthumbnail = match[2] if DEBUG: logger.info("title=["+scrapedtitle+"], url=["+scrapedurl+"], thumbnail=["+scrapedthumbnail+"]") itemlist.append( Item(channel=__channel__, action='findvideos', title=scrapedtitle , fulltitle=fulltitle , url=scrapedurl , thumbnail=scrapedthumbnail , plot=scrapedplot , extra=scrapedtitle , viewmode="movie", context="4|5",fanart="http://pelisalacarta.mimediacenter.info/fanart/shurweb.jpg") ) return itemlist
def letras(item): logger.info("pelisalacarta.channels.daramatv letras") itemlist = [] headers = DEFAULT_HEADERS[:] data = scrapertools.cache_page(item.url,headers=headers) patron = ' <a href="(\/lista-.+?)">(.+?)<' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(host, scrapedurl) thumbnail = "" plot = "" if DEBUG: logger.info("title=[{0}], url=[{1}], thumbnail=[{2}]".format(title, url, thumbnail)) itemlist.append(Item(channel=__channel__, action="pagina_", title=title, url=url, thumbnail=thumbnail, plot=plot)) return itemlist
def generos(item): logger.info("pelisalacarta.channels.reyanime generos") itemlist = [] #itemlist.append( Item(channel=item.channel, action="series" , title="acción" , url="http://reyanime.com/ver/genero/accion", viewmode="movie_with_plot")) data = scrapertools.cache_page(item.url) data = scrapertools.get_match(data,'<div class="lista-hoja-genero-2"(.*?)</div>') logger.info("data="+data) patron = '<a href="([^"]+)">([^<]+)</a>' matches = re.compile(patron,re.DOTALL).findall(data) for scrapedurl,scrapedtitle in matches: title = scrapertools.entityunescape(scrapedtitle) url = urlparse.urljoin(item.url,scrapedurl) thumbnail = "" plot = "" if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=item.channel, action="series" , title=title , url=url, thumbnail=thumbnail, plot=plot, viewmode="movie_with_plot")) return itemlist
def ultimas_series(item): logger.info("[peliculamos.py] ultimas_series") itemlist=[] data = scrapertools.cache_page(item.url) logger.info("data="+data) #<div class="associated-post"><h3 class="post-title"><a href="http://peliculamos.net/the-new-normal-streaming-ita-vk/" title="The New Normal streaming ita Vk">The New Normal streaming ita Vk</a></h3><div class="post-excerpt"><div class="fblike" style="height:25px; height:25px; overflow:hidden;"><iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Fpeliculamos.net%2Fthe-new-normal-streaming-ita-vk%2F&layout=standard&show_faces=false&width=450&action=like&font=arial&colorscheme=light" scrolling="no" frameborder="0" allow Transparency="true" style="border:none; overflow:hidden; width:450px;"></iframe></div><p>David e Bryan sono una coppia di Beverly Hills, che hanno tutto dalla vita; una relazione stabile, delle brillanti carriere e una bella casa, l’unica che manca nella loro vita è un figlio. Ma le cose cambiano quando incontrano Goldie, una giovane madre single dal passato burrascoso, trasferitasi a Los Angeles con la figlia di [...]</p></div></div><div class="associated-post"><h3 class="post-title"><a href="http: patron = '<div class="associated-post"><h3 class="post-title"><a href="([^"]+)" title="[^"]+">([^"]+)</a></h3><div class="post-excerpt">' patron += '<div[^<]+<iframe[^<]+</iframe></div><p>([^<]+)<' matches = re.compile(patron,re.DOTALL).findall(data) for url,title,plot in matches: thumbnail = "" title = scrapertools.entityunescape(title) title = scrapertools.htmlclean(title) if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"]") itemlist.append( Item(channel=__channel__, action="findvideos" , title=title , url=url, thumbnail=thumbnail, plot=plot)) return itemlist