def orderalf(item): logger.info("%s mainlist orderalf log: %s" % (__channel__, item)) itemlist = [] # scarico la pagina data = httptools.downloadpage(item.url, headers=headers).data # da qui fare le opportuni modifiche patron = '<td class="mlnh-thumb"><a href="(.*?)".title="(.*?)".*?src="(.*?)".*?mlnh-3">(.*?)<.*?"mlnh-5">.<(.*?)<td' #scrapertools.find_single_match(data, '<td class="mlnh-thumb"><a href="(.*?)".title="(.*?)".*?src="(.*?)".*?mlnh-3">(.*?)<.*?"mlnh-5">.<(.*?)<td') matches = scrapertools.find_multiple_matches(data, patron) for scrapedurl, scrapedtitle, scrapedimg, scrapedyear, scrapedqualang in matches: if 'sub ita' in scrapedqualang.lower(): scrapedlang = 'Sub-ita' else: scrapedlang = 'ITA' itemlist.append( Item(channel=item.channel, action="findvideos_film", contentTitle=scrapedtitle, fulltitle=scrapedtitle, url=scrapedurl, infoLabels={'year': scrapedyear}, contenType="movie", thumbnail=host + scrapedimg, title="%s [%s]" % (scrapedtitle, scrapedlang), language=scrapedlang, context="buscar_trailer")) # se il sito permette l'estrazione dell'anno del film aggiungere la riga seguente tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) # Paginazione support.nextPage(itemlist, item, data, '<span>[^<]+</span>[^<]+<a href="(.*?)">') return itemlist
def peliculas_latest_ep(item): patron = r'<article.*?"TPost C".*?href="([^"]+)".*?img.*?src="([^"]+)"' patron += r'.*?class="ClB">([^<]+)<\/span>([^<]+).*?<h3.*?>([^<]+)' data = httptools.downloadpage(item.url).data matches = re.compile(patron, re.DOTALL).findall(data) itemlist = [] for scrapedurl, scrapedthumbnail, scrapednum, scrapedep, scrapedtitle in matches: itemlist.append( Item(channel=item.channel, action="findvideos", contentType=item.contentType, title="[B]" + scrapednum + "[/B]" + scrapedep + " - " + scrapedtitle, fulltitle=scrapedep + " " + scrapedtitle, show=scrapedep + " " + scrapedtitle, url=scrapedurl, extra=item.extra, thumbnail="http:" + scrapedthumbnail, infoLabels=item.infoLabels)) support.nextPage(itemlist, item, data, r'page-numbers current.*?href="([^"]+)"') return itemlist
def peliculas(item): log() itemlist = [] blacklist = ['top 10 anime da vedere'] matches, data = support.match( item, r'<a class="[^"]+" href="([^"]+)" title="([^"]+)"><img[^s]+src="([^"]+)"[^>]+' ) for url, title, thumb in matches: title = scrapertoolsV2.decodeHtmlentities(title.strip()).replace( "streaming", "") lang = scrapertoolsV2.find_single_match(title, r"((?:SUB ITA|ITA))") videoType = '' if 'movie' in title.lower(): videoType = ' - (MOVIE)' if 'ova' in title.lower(): videoType = ' - (OAV)' cleantitle = title.replace(lang, "").replace( '(Streaming & Download)', '').replace('( Streaming & Download )', '').replace('OAV', '').replace('OVA', '').replace('MOVIE', '').strip() if not videoType: contentType = "tvshow" action = "episodios" else: contentType = "movie" action = "findvideos" if not title.lower() in blacklist: itemlist.append( Item(channel=item.channel, action=action, contentType=contentType, title=support.typo(cleantitle + videoType, 'bold') + support.typo(lang, '_ [] color kod'), fulltitle=cleantitle, show=cleantitle, url=url, thumbnail=thumb)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) autorenumber.renumber(itemlist) support.nextPage(itemlist, item, data, r'<a class="next page-numbers" href="([^"]+)">') return itemlist
def peliculas(item): logger.info("%s mainlist peliculas log: %s" % (__channel__, item)) itemlist = [] # scarico la pagina data = httptools.downloadpage(item.url, headers=headers).data # da qui fare le opportuni modifiche if item.args != 'orderalf': if item.args == 'pellicola' or item.args == 'years': bloque = scrapertools.find_single_match( data, '<div class="cover boxcaption">(.*?)<div id="right_bar">') elif item.args == "search": bloque = scrapertools.find_single_match( data, '<div class="cover boxcaption">(.*?)</a>') else: bloque = scrapertools.find_single_match( data, '<div class="cover boxcaption">(.*?)<div class="page_nav">') patron = '<h2>.<a href="(.*?)".*?src="(.*?)".*?class="trdublaj">(.*?)<div class="ml-item-hiden".*?class="h4">(.*?)<.*?label">(.*?)</span' matches = scrapertools.find_multiple_matches(data, patron) for scrapedurl, scrapedimg, scrapedqualang, scrapedtitle, scrapedyear in matches: if 'sub ita' in scrapedqualang.lower(): scrapedlang = 'Sub-Ita' else: scrapedlang = 'ITA' itemlist.append( Item(channel=item.channel, action="findvideos", contentTitle=scrapedtitle, fulltitle=scrapedtitle, url=scrapedurl, infoLabels={'year': scrapedyear}, contenType="movie", thumbnail=host + scrapedimg, title="%s [%s]" % (scrapedtitle, scrapedlang), language=scrapedlang)) # poichè il sito ha l'anno del film con TMDB la ricerca titolo-anno è esatta quindi inutile fare lo scrap delle locandine # e della trama dal sito che a volte toppano tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) # Paginazione support.nextPage(itemlist, item, data, '<span>[^<]+</span>[^<]+<a href="(.*?)">') return itemlist
def peliculas(item): support.log() itemlist = [] data = httptools.downloadpage(item.url, headers=headers).data patron = r'<div class="cover_kapsul ml-mask".*?<a href="(.*?)">(.*?)<\/a>.*?<img .*?src="(.*?)".*?<div class="trdublaj">(.*?)<\/div>.(<div class="sub_ita">(.*?)<\/div>|())' matches = scrapertoolsV2.find_multiple_matches(data, patron) for scrapedurl, scrapedtitle, scrapedthumbnail, scrapedquality, subDiv, subText, empty in matches: info = scrapertoolsV2.find_multiple_matches( data, r'<span class="ml-label">([0-9]+)+<\/span>.*?<span class="ml-label">(.*?)<\/span>.*?<p class="ml-cat".*?<p>(.*?)<\/p>.*?<a href="(.*?)" class="ml-watch">' ) infoLabels = {} for infoLabels['year'], duration, scrapedplot, checkUrl in info: if checkUrl == scrapedurl: break infoLabels['duration'] = int(duration.replace( ' min', '')) * 60 # calcolo la durata in secondi scrapedthumbnail = host + scrapedthumbnail scrapedtitle = scrapertoolsV2.decodeHtmlentities(scrapedtitle) fulltitle = scrapedtitle if subDiv: fulltitle += support.typo(subText + ' _ () color limegreen') fulltitle += support.typo(scrapedquality.strip() + ' _ [] color kod') itemlist.append( Item(channel=item.channel, action="findvideos", contentType=item.contenType, contentTitle=scrapedtitle, contentQuality=scrapedquality.strip(), plot=scrapedplot, title=fulltitle, fulltitle=scrapedtitle, show=scrapedtitle, url=scrapedurl, infoLabels=infoLabels, thumbnail=scrapedthumbnail)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) support.nextPage(itemlist, item, data, '<span>[^<]+</span>[^<]+<a href="(.*?)">') return itemlist
def peliculas(item): logger.info("%s mainlist peliculas log: %s" % (__channel__, item)) itemlist = [] # scarico la pagina data = httptools.downloadpage(item.url, headers=headers).data # da qui fare le opportuni modifiche patron = 'class="innerImage">.*?href="([^"]+)".*?src="([^"]+)".*?'\ 'class="ml-item-title">([^"]+)</.*?class="ml-item-label">'\ '(.*?)<.*?class="ml-item-label">.*?class="ml-item-label">(.*?)</' matches = scrapertools.find_multiple_matches(data, patron) for scrapedurl, scrapedimg, scrapedtitle, scrapedyear, scrapedlang in matches: if 'italiano' in scrapedlang.lower(): scrapedlang = 'ITA' else: scrapedlang = 'Sub-Ita' itemlist.append( Item(channel=item.channel, action="findvideos", contentTitle=scrapedtitle, fulltitle=scrapedtitle, url=scrapedurl, infoLabels={'year': scrapedyear}, contenType="movie", thumbnail=scrapedimg, title="%s [%s]" % (scrapedtitle, scrapedlang), language=scrapedlang, context="buscar_trailer")) # poichè il sito ha l'anno del film con TMDB la ricerca titolo-anno è esatta quindi inutile fare lo scrap delle locandine # e della trama dal sito che a volte toppano tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) # Paginazione support.nextPage(itemlist, item, data, '<span>\d</span> <a href="([^"]+)">') return itemlist
def lista_serie(item): support.log(item.channel + " lista_serie") itemlist = [] data = httptools.downloadpage(item.url, headers=headers).data patron = r'<div class="item">\s*<a href="([^"]+)" data-original="([^"]+)" class="lazy inner">' patron += r'[^>]+>[^>]+>[^>]+>[^>]+>([^<]+)<' matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedimg, scrapedtitle in matches: infoLabels = {} year = scrapertools.find_single_match(scrapedtitle, '\((\d{4})\)') if year: infoLabels['year'] = year scrapedtitle = cleantitle(scrapedtitle) itemlist.append( Item(channel=item.channel, action="episodios", title=scrapedtitle, fulltitle=scrapedtitle, url=scrapedurl, thumbnail=scrapedimg, show=scrapedtitle, infoLabels=infoLabels, contentType='tvshow', folder=True)) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) # Pagine support.nextPage(itemlist, item, data, '<li><a href="([^"]+)">Pagina successiva') return itemlist