Python remove_htmltags示例，core.scrapertools.remove_htmltags Python示例

示例#1

0

显示文件

def ultimas(item):
    logger.info()
    itemlist = []
    data = httptools.downloadpage(item.url).data
    realplot = ''
    patron = '<li><A HREF="([^"]+)"> <.*?>Ver ([^<]+)<\/A><\/li>'

    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle in matches:
        url = urlparse.urljoin(item.url, scrapedurl)
        data = httptools.downloadpage(scrapedurl).data
        thumbnail = scrapertools.get_match(
            data, '<meta property="og:image" content="([^"]+)".*?>')
        realplot = scrapertools.find_single_match(data,
                                                  '\/><\/a>([^*]+)<p><\/p>.*')
        plot = scrapertools.remove_htmltags(realplot)
        plot = ""
        title = scrapedtitle.replace('online', '')
        title = scrapertools.decodeHtmlentities(title)
        fanart = item.fanart
        itemlist.append(
            Item(channel=item.channel,
                 action="idioma",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 fanart=fanart,
                 contentSerieName=title,
                 context=autoplay.context))

    return itemlist

示例#2

0

显示文件

文件： mundoflv.py 项目： leonardop318/pelisalacarta

def temporadas(item):
    logger.info("pelisalacarta.channels.mundoflv temporadas")
    itemlist = []
    data = scrapertools.cache_page(item.url)
    realplot = ''
    patron = "<button class='classnamer' onclick='javascript: mostrarcapitulos.*?blank'>([^<]+)</button>"

    matches = re.compile(patron, re.DOTALL).findall(data)

    serieid = scrapertools.find_single_match(
        data,
        "<link rel='shortlink' href='http:\/\/mundoflv.com\/\?p=([^']+)' \/>")

    for scrapedtitle in matches:
        url = 'http://mundoflv.com/wp-content/themes/wpRafael/includes/capitulos.php?serie=' + serieid + '&sr=&temporada=' + scrapedtitle
        title = 'Temporada ' + scrapertools.decodeHtmlentities(scrapedtitle)
        thumbnail = item.thumbnail
        realplot = scrapertools.find_single_match(data,
                                                  '\/><\/a>([^*]+)<p><\/p>.*')
        plot = scrapertools.remove_htmltags(realplot)
        fanart = ''  #scrapertools.find_single_match(data,'<img src="([^"]+)"/>.*?</a>')
        if (DEBUG):
            logger.info("title=[" + title + "], url=[" + url +
                        "], thumbnail=[" + thumbnail + "])")
        itemlist.append(
            Item(channel=item.channel,
                 action="episodios",
                 title=title,
                 fulltitle=item.title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 fanart=fanart))

    return itemlist

示例#3

0

显示文件

文件： mundoflv.py 项目： jurrKodi/pelisalacarta

def ultimas(item):
    logger.info("pelisalacarta.channels.mundoflv masvistas")
    itemlist = []
    data = scrapertools.cache_page(item.url)
    realplot = ""
    patron = '<li><A HREF="([^"]+)"> <.*?>Ver ([^<]+)<\/A><\/li>'
    # patron = '<li class=".*?="([^"]+)".*?>([^<]+)</a>'
    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle in matches:
        url = urlparse.urljoin(item.url, scrapedurl)
        data = scrapertools.cache_page(scrapedurl)
        thumbnail = scrapertools.get_match(data, '<meta property="og:image" content="([^"]+)" />')
        realplot = scrapertools.find_single_match(data, "\/><\/a>([^*]+)<p><\/p>.*")
        plot = scrapertools.remove_htmltags(realplot)
        plot = ""
        title = scrapedtitle.replace("online", "")
        title = scrapertools.decodeHtmlentities(title)
        # title = title.replace("&","x");
        fanart = item.fanart
        if DEBUG:
            logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "])")
        itemlist.append(
            Item(
                channel=item.channel,
                action="temporadas",
                title=title,
                url=url,
                thumbnail=thumbnail,
                plot=plot,
                fanart=fanart,
            )
        )

    return itemlist

示例#4

0

显示文件

文件： mundoflv.py 项目： neno1978/pelisalacarta

def temporadas(item):
    logger.debug("pelisalacarta.channels.mundoflv temporadas")
    
    itemlist = []
    templist = []
    data = scrapertools.cache_page(item.url)
    realplot = ''
    patron = "<button class='classnamer' onclick='javascript: mostrarcapitulos.*?blank'>([^<]+)</button>"
    
    matches = re.compile(patron,re.DOTALL).findall(data)
    
    serieid = scrapertools.find_single_match(data,"<link rel='shortlink' href='http:\/\/mundoflv.com\/\?p=([^']+)' \/>")
    item.thumbnail = item.thumbvid
    for scrapedtitle in matches:
        url = 'http://mundoflv.com/wp-content/themes/wpRafael/includes/capitulos.php?serie='+serieid+'&sr=&temporada=' + scrapedtitle
        title = 'Temporada '+ scrapertools.decodeHtmlentities(scrapedtitle)
        thumbnail = item.thumbnail
        realplot = scrapertools.find_single_match(data, '\/><\/a>([^*]+)<p><\/p>.*')
        plot = scrapertools.remove_htmltags(realplot)
        fanart = ''#scrapertools.find_single_match(data,'<img src="([^"]+)"/>.*?</a>')
        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"])")
        itemlist.append( Item(channel=item.channel, action="episodios" , title=title , fulltitle=item.title, url=url, thumbnail=thumbnail, plot=plot, fanart = fanart, extra1=item.extra1, contentSerieName=item.contentSerieName))
    
    if item.extra=='temporadas':
        for tempitem in itemlist:
    	    templist += episodios(tempitem)
        
    if config.get_library_support() and len(itemlist) > 0:
        itemlist.append(Item(channel=item.channel, title='[COLOR yellow]Añadir esta serie a la biblioteca[/COLOR]', url=item.url,
                             action="add_serie_to_library", extra="temporadas", contentSerieName=item.contentSerieName, extra1 = item.extra1))
    if item.extra=='temporadas':
    	return templist
    else:
    	return itemlist

示例#5

0

显示文件

def ultimas(item):
    logger.info("pelisalacarta.channels.qserie masvistas")
    itemlist = []
    data = scrapertools.cache_page(item.url)
    realplot = ''
    patron = '<li><a title="([^"]+)" href="([^"]+)"><strong>.*?</a></li>'
    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedtitle, scrapedurl in matches:
        url = urlparse.urljoin(item.url, scrapedurl)
        data = scrapertools.cache_page(scrapedurl)
        thumbnail = scrapertools.get_match(
            data, '<link rel="image_src" href="([^"]+)"/>')
        realplot = scrapertools.find_single_match(
            data, '<p itemprop="articleBody">([^<]+)<\/p> ')
        plot = scrapertools.remove_htmltags(realplot)
        inutil = re.findall(r' Temporada \d', scrapedtitle)
        title = scrapedtitle
        title = scrapertools.decodeHtmlentities(title)
        realtitle = scrapedtitle.replace(inutil[0], '')
        fanart = 'https://s31.postimg.org/3ua9kwg23/ultimas.png'
        if (DEBUG):
            logger.info("title=[" + title + "], url=[" + url +
                        "], thumbnail=[" + thumbnail + "])")
        itemlist.append(
            Item(channel=item.channel,
                 action="temporadas",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 fanart=fanart,
                 contentSerieName=realtitle))

    return itemlist

示例#6

0

显示文件

文件： mundoflv.py 项目： koko200/pelisalacarta

def temporadas(item):
    logger.debug("pelisalacarta.channels.mundoflv temporadas")
    
    itemlist = []
    templist = []
    data = scrapertools.cache_page(item.url)
    realplot = ''
    patron = "<button class='classnamer' onclick='javascript: mostrarcapitulos.*?blank'>([^<]+)</button>"
    
    matches = re.compile(patron,re.DOTALL).findall(data)
    
    serieid = scrapertools.find_single_match(data,"<link rel='shortlink' href='http:\/\/mundoflv.com\/\?p=([^']+)' \/>")
    item.thumbnail = item.thumbvid
    for scrapedtitle in matches:
        #url = 'http://mundoflv.com/wp-content/themes/wpRafael/includes/capitulos.php?serie='+serieid+'&sr=&temporada=' + scrapedtitle
        url = 'http://mundoflv.com/wp-content/themes/wpRafael/includes/capitulos.php?serie='+serieid+'&temporada=' + scrapedtitle
        title = 'Temporada '+ scrapertools.decodeHtmlentities(scrapedtitle)
        contentSeasonNumber = scrapedtitle
        thumbnail = item.thumbnail
        realplot = scrapertools.find_single_match(data, '\/><\/a>([^*]+)<p><\/p>.*')
        plot = scrapertools.remove_htmltags(realplot)
        fanart = ''#scrapertools.find_single_match(data,'<img src="([^"]+)"/>.*?</a>')
        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"])")
        itemlist.append( Item(channel=item.channel, action="episodiosxtemp" , title=title , fulltitle=item.title, url=url, thumbnail=thumbnail, plot=plot, fanart = fanart, extra1=item.extra1, contentSerieName=item.contentSerieName, contentSeasonNumber = contentSeasonNumber))
    
            
    if config.get_library_support() and len(itemlist) > 0:
        itemlist.append(Item(channel=item.channel, title='[COLOR yellow]Añadir esta serie a la biblioteca[/COLOR]', url=item.url,
                             action="add_serie_to_library", extra="episodios", contentSerieName=item.contentSerieName, extra1 = item.extra1))
    
    return itemlist

示例#7

0

显示文件

文件： mundoflv.py 项目： shorbete/pelisalacarta

def recomendadas(item):
    logger.info("pelisalacarta.channels.mundoflv masvistas")
    itemlist = []
    data = scrapertools.cache_page(item.url)
    realplot = ''
    patron = '<li><A HREF="([^"]+)"><.*?>Ver ([^<]+)<\/A><\/li>'
    # patron = '<li class=".*?="([^"]+)".*?>([^<]+)</a>'
    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle in matches:
        url = urlparse.urljoin(item.url, scrapedurl)
        data = scrapertools.cache_page(scrapedurl)
        thumbnail = scrapertools.get_match(
            data, '<meta property="og:image" content="([^"]+)" />')
        realplot = scrapertools.find_single_match(data,
                                                  '\/><\/a>([^*]+)<p><\/p>.*')
        plot = scrapertools.remove_htmltags(realplot)
        title = scrapedtitle.replace('online', '')
        title = title = scrapertools.decodeHtmlentities(title)
        # title = title.replace("&","x");
        fanart = item.fanart
        if (DEBUG):
            logger.info("title=[" + title + "], url=[" + url +
                        "], thumbnail=[" + thumbnail + "])")
        itemlist.append(
            Item(channel=item.channel,
                 action="temporadas",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 fanart=fanart,
                 contentSerieName=title))

    return itemlist

示例#8

0

显示文件

文件： mejortorrent.py 项目： fcammed/addon

def buscador(item):
    logger.info()
    itemlist = []
    data = httptools.downloadpage(item.url).data
    patron = "<a href='(/serie-descargar-torrent[^']+)'[^>]+>(.*?)</a>"
    patron += ".*?<span style='color:gray;'>([^']+)</span>"
    patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html"

    matches = scrapertools.find_multiple_matches(data, patron)

    for scrapedurl, scrapedtitle, scrapedinfo in matches:
        title = scrapertools.remove_htmltags(scrapedtitle).decode('iso-8859-1').encode(
            'utf8') + ' ' + scrapedinfo.decode('iso-8859-1').encode('utf8')
        url = urlparse.urljoin(item.url, scrapedurl)
        logger.debug("title=[" + title + "], url=[" + url + "]")

        itemlist.append(
            Item(channel=item.channel, action="episodios", title=title, url=url, folder=True, extra="series",
                 viewmode="movie_with_plot"))

    # busca pelis
    patron = "<a href='(/peli-descargar-torrent-[^']+)'[^>]+>(.*?)</a>"
    patron_enlace = "/peli-descargar-torrent-\d+(.*?)\.html"
    matches = re.compile(patron, re.DOTALL).findall(data)
    for scrapedurl, scrapedtitle in matches:
        title = scrapertools.remove_htmltags(scrapedtitle).decode('iso-8859-1').encode('utf-8')
        url = urlparse.urljoin(item.url, scrapedurl)
        logger.debug("title=[" + title + "], url=[" + url + "]")
        itemlist.append(Item(channel=item.channel, action="play", title=title, url=url, folder=False, extra="pelicula"))

    # busca docu
    patron = "<a href='(/doc-descargar-torrent[^']+)' .*?"
    patron += "<font Color='darkblue'>(.*?)</font>.*?"
    patron += "<td align='right' width='20%'>(.*?)</td>"
    patron_enlace = "/doc-descargar-torrent-\d+-\d+-(.*?)\.html"
    matches = re.compile(patron, re.DOTALL).findall(data)
    for scrapedurl, scrapedtitle, scrapedinfo in matches:
        title = scrapedtitle.decode('iso-8859-1').encode('utf8') + " " + scrapedinfo.decode('iso-8859-1').encode('utf8')
        url = urlparse.urljoin(item.url, scrapedurl)
        logger.debug("title=[" + title + "], url=[" + url + "]")

        itemlist.append(Item(channel=item.channel, action="episodios",
                             title=title, url=url, folder=True, extra="docu",
                             viewmode="movie_with_plot"))

    return itemlist

示例#9

0

显示文件

文件： seriesflv.py 项目： Ibrahimxelil/plugin.video.balandro

def last_episodes(item):
    logger.info()
    itemlist = []

    perpage = 10  # para que no tarde tanto por las múltiples llamadas a tmdb (serie / temporada / episodio)
    if not item.page: item.page = 0
    if not item.lang: item.lang = 'es'

    data = httptools.downloadpage(host,
                                  headers=headers,
                                  use_cache=True,
                                  cache_duration=3600).data

    # ~ matches = scrapertools.find_multiple_matches(data, '<a href="([^"]+)" class="item-one" lang="%s"(.*?)</a>' % item.lang)
    matches = scrapertools.find_multiple_matches(
        data, '<a href="([^"]+)" class="item-one"(.*?)</a>')
    matches = filter(lambda x: 'language/%s.png"' % item.lang in x[1],
                     matches)  # seleccionar idioma pedido

    num_matches = len(matches)
    desde = item.page * perpage
    hasta = desde + perpage

    for url, resto in matches[desde:hasta]:
        try:
            title = scrapertools.remove_htmltags(
                scrapertools.find_single_match(
                    resto, '<div class="i-title">(.*?)</div>')).strip()
            title = re.sub('\(\d{4}\)$', '', title).strip()
            season, episode = scrapertools.find_single_match(
                url, '/(\d+)/(\d+)(?:/|)$')
        except:
            continue
        # ~ logger.info('%s %s %s' % (season, episode, title))
        if not title or not season or not episode: continue

        titulo = '%sx%s %s' % (season, episode, title)

        itemlist.append(
            item.clone(action='findvideos',
                       url=url,
                       title=titulo,
                       contentType='episode',
                       contentSerieName=title,
                       contentSeason=season,
                       contentEpisodeNumber=episode))

    tmdb.set_infoLabels(itemlist)

    if num_matches > hasta:  # subpaginación interna
        itemlist.append(
            item.clone(title='>> Página siguiente',
                       page=item.page + 1,
                       action='last_episodes'))

    return itemlist

示例#10

0

显示文件

文件： mundoflv.py 项目： enursha101/xbmc-addon

def fail_tmdb(itemlist):
    logger.info()
    realplot=''
    for item in itemlist:
        if item.infoLabels['plot'] =='':
            data = httptools.downloadpage(item.url).data
            if item.thumbnail == '':
                item.thumbnail= scrapertools.find_single_match(data,patrones[0])
            realplot = scrapertools.find_single_match(data, patrones[1])
            item.plot = scrapertools.remove_htmltags(realplot)
    return itemlist

示例#11

0

显示文件

文件： cineblog01.py 项目： sim1pan0/channels

 def load_vid_series(html, item, itemlist, blktxt):
     if len(blktxt) > 2:
         vtype = scrapertools.remove_htmltags(blktxt.strip()[:-1]) + " - "
     else:
         vtype = ''
     patron = r'<a href=([^\s]+)\starget=_blank.*?>(.*?)<'
     # Estrae i contenuti
     matches = re.compile(patron, re.DOTALL).finditer(html)
     for match in matches:
         scrapedurl = match.group(1)
         scrapedtitle = match.group(2)
         title = item.title + " [COLOR blue][" + vtype + scrapedtitle + "][/COLOR]"
         itemlist.append(
             Item(channel=__channel__,
                  action="play",
                  title=title,
                  url=scrapedurl,
                  fulltitle=item.fulltitle,
                  show=item.show,
                  folder=False))

示例#12

0

显示文件

文件： mundoflv.py 项目： CYBERxNUKE/xbmc-addon

def recomendadas(item):
    logger.info()
    itemlist = []
    data = httptools.downloadpage(item.url).data
    realplot=''
    patron = '<li><A HREF="([^"]+)"><.*?>Ver ([^<]+)<\/A><\/li>'
    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedurl,scrapedtitle in matches:
        url = urlparse.urljoin(item.url,scrapedurl)
        data = httptools.downloadpage(scrapedurl).data
        thumbnail= scrapertools.get_match(data,'<meta property="og:image" content="([^"]+)" />')
        realplot = scrapertools.find_single_match(data, '\/><\/a>([^*]+)<p><\/p>.*')
        plot = scrapertools.remove_htmltags(realplot)
        title = scrapedtitle.replace('online','')
        title = title = scrapertools.decodeHtmlentities(title) 
        fanart = item.fanart
        itemlist.append( Item(channel=item.channel, action="idioma" , title=title , url=url, thumbnail=thumbnail, plot=plot, fanart=fanart,contentSerieName=title))

    return itemlist

示例#13

0

显示文件

文件： mundoflv.py 项目： jurrKodi/pelisalacarta

def temporadas(item):
    logger.info("pelisalacarta.channels.mundoflv temporadas")
    itemlist = []
    data = scrapertools.cache_page(item.url)
    realplot = ""
    patron = "<button class='classnamer' onclick='javascript: mostrarcapitulos.*?blank'>([^<]+)</button>"

    matches = re.compile(patron, re.DOTALL).findall(data)

    serieid = scrapertools.find_single_match(
        data, "<link rel='shortlink' href='http:\/\/mundoflv.com\/\?p=([^']+)' \/>"
    )

    for scrapedtitle in matches:
        url = (
            "http://mundoflv.com/wp-content/themes/wpRafael/includes/capitulos.php?serie="
            + serieid
            + "&sr=&temporada="
            + scrapedtitle
        )
        title = "Temporada " + scrapertools.decodeHtmlentities(scrapedtitle)
        thumbnail = item.thumbnail
        realplot = scrapertools.find_single_match(data, "\/><\/a>([^*]+)<p><\/p>.*")
        plot = scrapertools.remove_htmltags(realplot)
        fanart = ""  # scrapertools.find_single_match(data,'<img src="([^"]+)"/>.*?</a>')
        if DEBUG:
            logger.info("title=[" + title + "], url=[" + url + "], thumbnail=[" + thumbnail + "])")
        itemlist.append(
            Item(
                channel=item.channel,
                action="episodios",
                title=title,
                fulltitle=item.title,
                url=url,
                thumbnail=thumbnail,
                plot=plot,
                fanart=fanart,
            )
        )

    return itemlist

示例#14

0

显示文件

文件： qserie.py 项目： jurrKodi/pelisalacarta

def ultimas(item):
    logger.info("pelisalacarta.channels.qserie masvistas")
    itemlist = []
    data = scrapertools.cache_page(item.url)
    realplot=''
    patron ='<li><a title="([^"]+)" href="([^"]+)"><strong>.*?</a></li>' 
    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedtitle, scrapedurl in matches:
        url = urlparse.urljoin(item.url,scrapedurl)
        data = scrapertools.cache_page(scrapedurl)
        thumbnail= scrapertools.get_match(data,'<link rel="image_src" href="([^"]+)"/>')
        realplot = scrapertools.find_single_match(data, '<p itemprop="articleBody">([^<]+)<\/p> ')
        plot = scrapertools.remove_htmltags(realplot)
        title = scrapedtitle
        title = scrapertools.decodeHtmlentities(title)
        fanart = 'https://s31.postimg.org/3ua9kwg23/ultimas.png'
        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"])")
        itemlist.append( Item(channel=item.channel, action="temporadas" , title=title , url=url, thumbnail=thumbnail, plot=plot, fanart=fanart))

    return itemlist

示例#15

0

显示文件

文件： elitetorrent2.py 项目： d3v3l0p1n/mitvspain

def getsearchlist(item):

    logger.info()
    itemlist = []

    data = httptools.downloadpage(item.url).data

    # PATRON URL
    patron = '<div class="title"> <a href="(.*?)">'

    # PATRON IMAGE
    patron = '<img src="(.*?)"'

    # PATRON NAME
    patron = 'alt="(.*?)"'

    # PATRON DESCRIPTION
    patron += '<p>([^>]+)</p>'

    matches = scrapertools.find_multiple_matches(data, patron)
    scrapertools.printMatches(matches)

    for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedplot in matches:
        title = scrapertools.remove_htmltags(scrapedtitle).decode(
            'iso-8859-1').encode('utf8') + ' ' + scrapedinfo.decode(
                'iso-8859-1').encode('utf8')
        url = urlparse.urljoin(item.url, scrapedurl)
        thumbnail = urlparse.urljoin(item.url, urllib.quote(scrapedthumbnail))
        plot = scrapedplot
        logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" +
                     thumbnail + "]")
        itemlist.append(
            Item(channel=item.channel,
                 action="pelicula",
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 folder=False,
                 extra=extra))

示例#16

0

显示文件

文件： qserie.py 项目： CYBERxNUKE/xbmc-addon

def ultimas(item):
    logger.info()
    itemlist = []
    data = httptools.downloadpage(item.url).data
    realplot=''
    patron ='<li><a title="([^"]+)" href="([^"]+)"><strong>.*?</a></li>' 
    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedtitle, scrapedurl in matches:
        url = urlparse.urljoin(item.url,scrapedurl)
        data = httptools.downloadpage(scrapedurl).data
        thumbnail= scrapertools.get_match(data,'<link rel="image_src" href="([^"]+)"/>')
        realplot = scrapertools.find_single_match(data, '<p itemprop="articleBody">([^<]+)<\/p> ')
        plot = scrapertools.remove_htmltags(realplot)
        inutil = re.findall(r' Temporada \d', scrapedtitle)
        title = scrapedtitle
        title = scrapertools.decodeHtmlentities(title)
        realtitle = scrapedtitle.replace(inutil[0],'')
        fanart = 'https://s31.postimg.org/3ua9kwg23/ultimas.png'
        itemlist.append( Item(channel=item.channel, action="temporadas" , title=title , url=url, thumbnail=thumbnail, plot=plot, fanart=fanart, contentSerieName = realtitle))

    return itemlist

示例#17

0

显示文件

def listado(item):                                                              # Listado principal y de búsquedas
    logger.info()
    
    itemlist = []
    item.category = categoria
    thumb_pelis = get_thumb("channels_movie.png")
    thumb_series = get_thumb("channels_tvshow.png")

    #logger.debug(item)
    
    curr_page = 1                                                               # Página inicial
    last_page = 99999                                                           # Última página inicial
    last_page_print = 1                                                         # Última página inicial, para píe de página
    page_factor = 1.0                                                           # Factor de conversión de pag. web a pag. Alfa
    if item.curr_page:
        curr_page = int(item.curr_page)                                         # Si viene de una pasada anterior, lo usamos
        del item.curr_page                                                      # ... y lo borramos
    if item.last_page:
        last_page = int(item.last_page)                                         # Si viene de una pasada anterior, lo usamos
        del item.last_page                                                      # ... y lo borramos
    if item.page_factor:
        page_factor = float(item.page_factor)                                   # Si viene de una pasada anterior, lo usamos
        del item.page_factor                                                    # ... y lo borramos
    if item.last_page_print:
        last_page_print = item.last_page_print                                  # Si viene de una pasada anterior, lo usamos
        del item.last_page_print                                                # ... y lo borramos
    
    cnt_tot = 30                                                                # Poner el num. máximo de items por página
    cnt_title = 0                                                               # Contador de líneas insertadas en Itemlist
    if item.cnt_tot_match:
        cnt_tot_match = float(item.cnt_tot_match)                               # restauramos el contador TOTAL de líneas procesadas de matches
        del item.cnt_tot_match
    else:
        cnt_tot_match = 0.0                                                     # Contador TOTAL de líneas procesadas de matches
    inicio = time.time()                                    # Controlaremos que el proceso no exceda de un tiempo razonable
    fin = inicio + 5                                                            # Después de este tiempo pintamos (segundos)
    timeout_search = timeout * 2                                                # Timeout para descargas
    if item.extra == 'search' and item.extra2 == 'episodios':                   # Si viene de episodio que quitan los límites
        cnt_tot = 999
        fin = inicio + 30

    #Sistema de paginado para evitar páginas vacías o semi-vacías en casos de búsquedas con series con muchos episodios
    title_lista = []                                        # Guarda la lista de series que ya están en Itemlist, para no duplicar lineas
    if item.title_lista:                                    # Si viene de una pasada anterior, la lista ya estará guardada
        title_lista.extend(item.title_lista)                                    # Se usa la lista de páginas anteriores en Item
        del item.title_lista                                                    # ... limpiamos
    matches = []
        
    if not item.extra2:                                                         # Si viene de Catálogo o de Alfabeto
        item.extra2 = ''
        
    post = None
    if item.post:                                                               # Rescatamos el Post, si lo hay
        post = item.post
    
    next_page_url = item.url
    # Máximo num. de líneas permitidas por TMDB. Máx de 5 segundos por Itemlist para no degradar el rendimiento
    while (cnt_title < cnt_tot and curr_page <= last_page and fin > time.time()) or item.matches:
    
        # Descarga la página
        data = ''
        cnt_match = 0                                                           # Contador de líneas procesadas de matches
        
        if not item.matches:                                                    # si no viene de una pasada anterior, descargamos
            data, success, code, item, itemlist = generictools.downloadpage(next_page_url, 
                                          timeout=timeout_search, post=post, s2=False, 
                                          item=item, itemlist=itemlist)         # Descargamos la página)
            
            # Verificamos si se ha cargado una página correcta
            curr_page += 1                                                      # Apunto ya a la página siguiente
            if not data or not success:                                         # Si la web está caída salimos sin dar error
                if len(itemlist) > 1:                                           # Si hay algo que pintar lo pintamos 
                    last_page = 0
                    break
                return itemlist                                                 # Si no hay nada más, salimos directamente
        
        #Patrón para búsquedas, pelis y series
        patron = '<div\s*class="[^"]+">\s*<div\s*class="card">\s*<a\s*href="([^"]+)"\s*'
        patron += 'class="card__cover">\s*<img\s*src="([^"]+)"\s*alt="[^"]*">\s*'
        patron += '<div\s*class="card__play">.*?<\/div>\s*<ul\s*class="card__list">\s*'
        patron += '<li>([^<]+)<\/li>\s*<\/ul>\s*<\/a>\s*<div\s*class="card__content">\s*'
        patron += '<h3\s*class="card__title"><a\s*href="[^"]+">([^<]+)<\/a><\/h3>'
        patron += '.*?<\/div>\s*<\/div>\s*<\/div>'

        if not item.matches:                                                    # De pasada anterior?
            matches = re.compile(patron, re.DOTALL).findall(data)
        else:
            matches = item.matches
            del item.matches
            
        #logger.debug("PATRON: " + patron)
        #logger.debug(matches)
        #logger.debug(data)

        if not matches and item.extra != 'search' and not item.extra2:          #error
            logger.error("ERROR 02: LISTADO: Ha cambiado la estructura de la Web " 
                        + " / PATRON: " + patron + " / DATA: " + data)
            itemlist.append(item.clone(action='', title=item.channel.capitalize() + 
                        ': ERROR 02: LISTADO: Ha cambiado la estructura de la Web.  ' 
                        + 'Reportar el error con el log'))
            break                                                               #si no hay más datos, algo no funciona, pintamos lo que tenemos
        
        if not matches and item.extra == 'search':                              #búsqueda vacía
            if len(itemlist) > 0:                                               # Si hay algo que pintar lo pintamos
                last_page = 0
                break
            return itemlist                                                     #Salimos

        #Buscamos la próxima página
        next_page_url = re.sub(r'page\/(\d+)', 'page/%s' % str(curr_page), item.url)
        #logger.debug('curr_page: ' + str(curr_page) + ' / last_page: ' + str(last_page))
        
        #Buscamos la última página
        if last_page == 99999:                                                  #Si es el valor inicial, buscamos
            patron_last = '<ul\s*class="pagination[^"]+">.*?<li>\s*<a\s*class="page-numbers"\s*href="[^"]+">'
            patron_last += '(\d+)<\/a><\/li>\s*<li>\s*<a\s*class="next page-numbers"\s*href="[^"]+">»<\/a><\/li>\s*<\/ul>'
            try:
                last_page = int(scrapertools.find_single_match(data, patron_last))
                page_factor = float(len(matches)) / float(cnt_tot)
            except:                                                             #Si no lo encuentra, lo ponemos a 999
                last_page = 1
                last_page_print = int((float(len(matches)) / float(cnt_tot)) + 0.999999)

            #logger.debug('curr_page: ' + str(curr_page) + ' / last_page: ' + str(last_page))
        
        #Empezamos el procesado de matches
        for scrapedurl, scrapedthumb, scrapedquality, scrapedtitle in matches:
            cnt_match += 1
            
            title = scrapedtitle

            title = scrapertools.remove_htmltags(title).rstrip('.')             # Removemos Tags del título
            url = scrapedurl
            
            title_subs = []                                                     #creamos una lista para guardar info importante
            
            title = title.replace("á", "a").replace("é", "e").replace("í", "i")\
                    .replace("ó", "o").replace("ú", "u").replace("ü", "u")\
                    .replace("ï¿½", "ñ").replace("Ã±", "ñ").replace("&#8217;", "'")\
                    .replace("&amp;", "&")

            # Se filtran las entradas para evitar duplicados de Temporadas
            url_list = url
            if url_list in title_lista:                                         #Si ya hemos procesado el título, lo ignoramos
                continue
            else:
                title_lista += [url_list]                                       #la añadimos a la lista de títulos

            # Si es una búsqueda por años, filtramos por tipo de contenido
            if 'anno' in item.extra2 and item.extra == 'series' and '/serie' not in url:
                continue
            elif 'anno' in item.extra2 and item.extra == 'peliculas' and '/serie' in url:
                continue
            
            cnt_title += 1                                                      # Incrementamos el contador de entradas válidas
            
            item_local = item.clone()                                           #Creamos copia de Item para trabajar
            if item_local.tipo:                                                 #... y limpiamos
                del item_local.tipo
            if item_local.totalItems:
                del item_local.totalItems
            if item_local.intervencion:
                del item_local.intervencion
            if item_local.viewmode:
                del item_local.viewmode
            item_local.extra2 = True
            del item_local.extra2
            item_local.text_bold = True
            del item_local.text_bold
            item_local.text_color = True
            del item_local.text_color
            
            # Después de un Search se restablecen las categorías
            if item_local.extra == 'search':
                if '/serie' in url:
                    item_local.extra = 'series'                                 # Serie búsqueda
                else:
                    item_local.extra = 'peliculas'                              # Película búsqueda

            # Procesamos idiomas
            item_local.language = []                                            #creamos lista para los idiomas
            if '[Subs. integrados]' in scrapedquality or '(Sub Forzados)' in scrapedquality \
                        or 'Sub' in scrapedquality:
                item_local.language = ['VOS']                                   # añadimos VOS
            if 'castellano' in scrapedquality.lower() or ('español' in scrapedquality.lower() and not 'latino' in scrapedquality.lower()): 
                item_local.language += ['CAST']                                 # añadimos CAST
            if '[Dual' in title or 'dual' in scrapedquality.lower():
                title = re.sub(r'(?i)\[dual.*?\]', '', title)
                item_local.language += ['DUAL']                                 # añadimos DUAL
            if not item_local.language:
                item_local.language = ['LAT']                                   # [LAT] por defecto
                
            # Procesamos Calidad
            if scrapedquality:
                item_local.quality = scrapertools.remove_htmltags(scrapedquality)   # iniciamos calidad
                if '[720p]' in scrapedquality.lower() or '720p' in scrapedquality.lower():
                    item_local.quality = '720p'
                if '[1080p]' in scrapedquality.lower() or '1080p' in scrapedquality.lower():
                    item_local.quality = '1080p'
                if '4k' in scrapedquality.lower():
                    item_local.quality = '4K'
                if '3d' in scrapedquality.lower() and not '3d' in item_local.quality.lower():
                    item_local.quality += ', 3D'
            if not item_local.quality or item_local.extra == 'series':
                item_local.quality = '720p'
                
            item_local.thumbnail = ''                                           #iniciamos thumbnail

            item_local.url = urlparse.urljoin(host, url)                        #guardamos la url final
            item_local.context = "['buscar_trailer']"                           #... y el contexto

            # Guardamos los formatos para series
            if item_local.extra == 'series' or '/serie' in item_local.url:
                item_local.contentType = "tvshow"
                item_local.action = "episodios"
                item_local.season_colapse = season_colapse                      #Muestra las series agrupadas por temporadas?
            else:
                # Guardamos los formatos para películas
                item_local.contentType = "movie"
                item_local.action = "findvideos"

            #Limpiamos el título de la basura innecesaria
            if item_local.contentType == "tvshow":
                title = scrapertools.find_single_match(title, '(^.*?)\s*(?:$|\(|\[|-)')

            title = re.sub(r'(?i)TV|Online|(4k-hdr)|(fullbluray)|4k| - 4k|(3d)|miniserie', '', title).strip()
            item_local.quality = re.sub(r'(?i)proper|unrated|directors|cut|repack|internal|real|extended|masted|docu|super|duper|amzn|uncensored|hulu', 
                        '', item_local.quality).strip()

            #Analizamos el año.  Si no está claro ponemos '-'
            item_local.infoLabels["year"] = '-'
            try:
                if 'anno' in item.extra2:
                    item_local.infoLabels["year"] = int(item.extra2.replace('anno', ''))
            except:
                pass
            
            #Terminamos de limpiar el título
            title = re.sub(r'[\(|\[]\s+[\)|\]]', '', title)
            title = title.replace('()', '').replace('[]', '').replace('[4K]', '').replace('(4K)', '').strip().lower().title()
            item_local.from_title = title.strip().lower().title()           #Guardamos esta etiqueta para posible desambiguación de título

            #Salvamos el título según el tipo de contenido
            if item_local.contentType == "movie":
                item_local.contentTitle = title
            else:
                item_local.contentSerieName = title.strip().lower().title()

            item_local.title = title.strip().lower().title()
            
            #Guarda la variable temporal que almacena la info adicional del título a ser restaurada después de TMDB
            item_local.title_subs = title_subs
                
            #Salvamos y borramos el número de temporadas porque TMDB a veces hace tonterias.  Lo pasamos como serie completa
            if item_local.contentSeason and (item_local.contentType == "season" \
                        or item_local.contentType == "tvshow"):
                item_local.contentSeason_save = item_local.contentSeason
                del item_local.infoLabels['season']

            #Ahora se filtra por idioma, si procede, y se pinta lo que vale
            if filter_languages > 0:                                            #Si hay idioma seleccionado, se filtra
                itemlist = filtertools.get_link(itemlist, item_local, list_language)
            else:
                itemlist.append(item_local.clone())                             #Si no, pintar pantalla
            
            cnt_title = len(itemlist)                                           # Recalculamos los items después del filtrado
            if cnt_title >= cnt_tot and (len(matches) - cnt_match) + cnt_title > cnt_tot * 1.3:     #Contador de líneas añadidas
                break
            
            #logger.debug(item_local)
    
        matches = matches[cnt_match:]                                           # Salvamos la entradas no procesadas
        cnt_tot_match += cnt_match                                              # Calcular el num. total de items mostrados
    
    #Pasamos a TMDB la lista completa Itemlist
    tmdb.set_infoLabels(itemlist, __modo_grafico__, idioma_busqueda='es')
    
    #Llamamos al método para el maquillaje de los títulos obtenidos desde TMDB
    item, itemlist = generictools.post_tmdb_listado(item, itemlist)

    # Si es necesario añadir paginacion
    if curr_page <= last_page or len(matches) > 0:
        curr_page_print = int(cnt_tot_match / float(cnt_tot))
        if curr_page_print < 1:
            curr_page_print = 1
        if last_page:
            if last_page > 1:
                last_page_print = int((last_page * page_factor) + 0.999999)
            title = '%s de %s' % (curr_page_print, last_page_print)
        else:
            title = '%s' % curr_page_print

        itemlist.append(Item(channel=item.channel, action="listado", title=">> Página siguiente " 
                        + title, title_lista=title_lista, url=next_page_url, extra=item.extra, 
                        extra2=item.extra2, last_page=str(last_page), curr_page=str(curr_page), 
                        page_factor=str(page_factor), cnt_tot_match=str(cnt_tot_match), matches=matches, 
                        last_page_print=last_page_print, post=post))

    return itemlist

示例#18

0

显示文件

def lasmas(item):

    thumbletras = {
        '0-9': 'https://s32.postimg.org/drojt686d/image.png',
        '0 - 9': 'https://s32.postimg.org/drojt686d/image.png',
        '#': 'https://s32.postimg.org/drojt686d/image.png',
        'a': 'https://s32.postimg.org/llp5ekfz9/image.png',
        'b': 'https://s32.postimg.org/y1qgm1yp1/image.png',
        'c': 'https://s32.postimg.org/vlon87gmd/image.png',
        'd': 'https://s32.postimg.org/3zlvnix9h/image.png',
        'e': 'https://s32.postimg.org/bgv32qmsl/image.png',
        'f': 'https://s32.postimg.org/y6u7vq605/image.png',
        'g': 'https://s32.postimg.org/9237ib6jp/image.png',
        'h': 'https://s32.postimg.org/812yt6pk5/image.png',
        'i': 'https://s32.postimg.org/6nbbxvqat/image.png',
        'j': 'https://s32.postimg.org/axpztgvdx/image.png',
        'k': 'https://s32.postimg.org/976yrzdut/image.png',
        'l': 'https://s32.postimg.org/fmal2e9yd/image.png',
        'm': 'https://s32.postimg.org/m19lz2go5/image.png',
        'n': 'https://s32.postimg.org/b2ycgvs2t/image.png',
        'o': 'https://s32.postimg.org/c6igsucpx/image.png',
        'p': 'https://s32.postimg.org/jnro82291/image.png',
        'q': 'https://s32.postimg.org/ve5lpfv1h/image.png',
        'r': 'https://s32.postimg.org/nmovqvqw5/image.png',
        's': 'https://s32.postimg.org/zd2t89jol/image.png',
        't': 'https://s32.postimg.org/wk9lo8jc5/image.png',
        'u': 'https://s32.postimg.org/w8s5bh2w5/image.png',
        'v': 'https://s32.postimg.org/e7dlrey91/image.png',
        'w': 'https://s32.postimg.org/fnp49k15x/image.png',
        'x': 'https://s32.postimg.org/dkep1w1d1/image.png',
        'y': 'https://s32.postimg.org/um7j3zg85/image.png',
        'z': 'https://s32.postimg.org/jb4vfm9d1/image.png'
    }

    logger.info("pelisalacarta.channels.qserie lasmas")
    itemlist = []
    data = scrapertools.cache_page(item.url)
    realplot = ''
    if item.extra == 'letras':
        patron = '<li><a href="([^"]+)" title="Series que comienzan con.*?">([^<]+)</a></li>'
    else:
        patron = '<a href="([^"]+)" title="([^V]+)' + item.extra + '.*?">'

    matches = re.compile(patron, re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle in matches:
        url = urlparse.urljoin(item.url, scrapedurl)
        if item.extra != 'letras':
            data = scrapertools.cache_page(scrapedurl)
            thumbnail = scrapertools.get_match(
                data, '<link rel="image_src" href="([^"]+)"/>')
            realplot = scrapertools.find_single_match(
                data, '<p itemprop="articleBody">([^<]+)<\/p> ')
            plot = scrapertools.remove_htmltags(realplot)
            action = 'temporadas'
        else:
            if scrapedtitle.lower() in thumbletras:
                thumbnail = thumbletras[scrapedtitle.lower()]
            else:
                thumbnail = ''
            plot = ''
            action = 'todas'
        title = scrapedtitle.replace(': ', '')
        title = scrapertools.decodeHtmlentities(title)
        if item.extra == 'letras':
            fanart = 'https://s31.postimg.org/c3bm9cnl7/a_z.png'
        elif item.extra == 'Vista':
            fanart = 'https://s32.postimg.org/466gt3ipx/vistas.png'
        else:
            fanart = ''

        if (DEBUG):
            logger.info("title=[" + title + "], url=[" + url +
                        "], thumbnail=[" + thumbnail + "])")
        itemlist.append(
            Item(channel=item.channel,
                 action=action,
                 title=title,
                 url=url,
                 thumbnail=thumbnail,
                 plot=plot,
                 fanart=fanart,
                 contentSerieName=scrapedtitle))

    return itemlist

示例#19

0

显示文件

文件： mejortorrent.py 项目： kampanita/pelisalacarta

def buscador(item):
    logger.info("pelisalacarta.mejortorrent buscador")
    itemlist = []

    data = scrapertools.cachePage(item.url)

    # pelis
    # <a href="/peli-descargar-torrent-9578-Presentimientos.html">
    # <img src="/uploads/imagenes/peliculas/Presentimientos.jpg" border="1"></a
    #
    # series
    #
    #<a href="/serie-descargar-torrents-11589-11590-Ahora-o-nunca-4-Temporada.html">
    #<img src="/uploads/imagenes/series/Ahora o nunca4.jpg" border="1"></a>
    #
    # docs
    #
    #<a href="/doc-descargar-torrent-1406-1407-El-sueno-de-todos.html">
    #<img border="1" src="/uploads/imagenes/documentales/El sueno de todos.jpg"></a>

    #busca series
    patron  = "<a href='(/serie-descargar-torrent[^']+)'[^>]+>(.*?)</a>"
    patron += ".*?<span style='color:gray;'>([^']+)</span>"
    patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html"


    matches = scrapertools.find_multiple_matches(data, patron)
    scrapertools.printMatches(matches)

    for scrapedurl, scrapedtitle, scrapedinfo in matches:
        title = scrapertools.remove_htmltags(scrapedtitle).decode('iso-8859-1').encode('utf8') + ' ' + scrapedinfo.decode('iso-8859-1').encode('utf8')
        url = urlparse.urljoin(item.url,scrapedurl)
        logger.debug("title=["+title+"], url=["+url+"]")

        itemlist.append( Item(channel=item.channel, action="episodios", title=title , url=url , folder=True, extra="series", viewmode="movie_with_plot") )

    #busca pelis
    patron  = "<a href='(/peli-descargar-torrent-[^']+)'[^>]+>(.*?)</a>"
    patron_enlace = "/peli-descargar-torrent-\d+(.*?)\.html"

    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for scrapedurl, scrapedtitle in matches:
        title = scrapertools.remove_htmltags(scrapedtitle).decode('iso-8859-1').encode('utf-8')
        url = urlparse.urljoin(item.url,scrapedurl)
        logger.debug("title=["+title+"], url=["+url+"]")

        itemlist.append( Item(channel=item.channel, action="play", title=title , url=url , folder=False, extra="") )


    #busca docu
    patron  = "<a href='(/doc-descargar-torrent[^']+)' .*?"
    patron += "<font Color='darkblue'>(.*?)</font>.*?"
    patron += "<td align='right' width='20%'>(.*?)</td>"
    patron_enlace = "/doc-descargar-torrent-\d+-\d+-(.*?)\.html"

    matches = re.compile(patron,re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for scrapedurl, scrapedtitle, scrapedinfo in matches:
        title = scrapedtitle.decode('iso-8859-1').encode('utf8') + " " + scrapedinfo.decode('iso-8859-1').encode('utf8')
        url = urlparse.urljoin(item.url,scrapedurl)
        logger.debug("title=["+title+"], url=["+url+"]")

        itemlist.append( Item(channel=item.channel, action="episodios", title=title , url=url , folder=True, extra="docu", viewmode="movie_with_plot") )

    if len(itemlist) == 0:
        itemlist.append( Item(channel=item.channel, action="mainlist", title="No se han encontrado nada con ese término" ) )


    return itemlist

示例#20

0

显示文件

def buscador(item):
    logger.info("pelisalacarta.mejortorrent buscador")
    itemlist = []

    data = scrapertools.cachePage(item.url)

    # pelis
    # <a href="/peli-descargar-torrent-9578-Presentimientos.html">
    # <img src="/uploads/imagenes/peliculas/Presentimientos.jpg" border="1"></a
    #
    # series
    #
    #<a href="/serie-descargar-torrents-11589-11590-Ahora-o-nunca-4-Temporada.html">
    #<img src="/uploads/imagenes/series/Ahora o nunca4.jpg" border="1"></a>
    #
    # docs
    #
    #<a href="/doc-descargar-torrent-1406-1407-El-sueno-de-todos.html">
    #<img border="1" src="/uploads/imagenes/documentales/El sueno de todos.jpg"></a>

    #busca series
    patron = "<a href='(/serie-descargar-torrent[^']+)'[^>]+>(.*?)</a>"
    patron += ".*?<span style='color:gray;'>([^']+)</span>"
    patron_enlace = "/serie-descargar-torrents-\d+-\d+-(.*?)\.html"

    matches = scrapertools.find_multiple_matches(data, patron)
    scrapertools.printMatches(matches)

    for scrapedurl, scrapedtitle, scrapedinfo in matches:
        title = scrapertools.remove_htmltags(scrapedtitle).decode(
            'iso-8859-1').encode('utf8') + ' ' + scrapedinfo.decode(
                'iso-8859-1').encode('utf8')
        url = urlparse.urljoin(item.url, scrapedurl)
        logger.debug("title=[" + title + "], url=[" + url + "]")

        itemlist.append(
            Item(channel=item.channel,
                 action="episodios",
                 title=title,
                 url=url,
                 folder=True,
                 extra="series",
                 viewmode="movie_with_plot"))

    #busca pelis
    patron = "<a href='(/peli-descargar-torrent-[^']+)'[^>]+>(.*?)</a>"
    patron_enlace = "/peli-descargar-torrent-\d+(.*?)\.html"

    matches = re.compile(patron, re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for scrapedurl, scrapedtitle in matches:
        title = scrapertools.remove_htmltags(scrapedtitle).decode(
            'iso-8859-1').encode('utf-8')
        url = urlparse.urljoin(item.url, scrapedurl)
        logger.debug("title=[" + title + "], url=[" + url + "]")

        itemlist.append(
            Item(channel=item.channel,
                 action="play",
                 title=title,
                 url=url,
                 folder=False,
                 extra=""))

    #busca docu
    patron = "<a href='(/doc-descargar-torrent[^']+)' .*?"
    patron += "<font Color='darkblue'>(.*?)</font>.*?"
    patron += "<td align='right' width='20%'>(.*?)</td>"
    patron_enlace = "/doc-descargar-torrent-\d+-\d+-(.*?)\.html"

    matches = re.compile(patron, re.DOTALL).findall(data)
    scrapertools.printMatches(matches)

    for scrapedurl, scrapedtitle, scrapedinfo in matches:
        title = scrapedtitle.decode('iso-8859-1').encode(
            'utf8') + " " + scrapedinfo.decode('iso-8859-1').encode('utf8')
        url = urlparse.urljoin(item.url, scrapedurl)
        logger.debug("title=[" + title + "], url=[" + url + "]")

        itemlist.append(
            Item(channel=item.channel,
                 action="episodios",
                 title=title,
                 url=url,
                 folder=True,
                 extra="docu",
                 viewmode="movie_with_plot"))

    if len(itemlist) == 0:
        itemlist.append(
            Item(channel=item.channel,
                 action="mainlist",
                 title="No se han encontrado nada con ese término"))

    return itemlist

示例#21

0

显示文件

文件： qserie.py 项目： jurrKodi/pelisalacarta

def lasmas(item):
    
    thumbletras = {'0-9':'https://s32.postimg.org/drojt686d/image.png',
    '0 - 9':'https://s32.postimg.org/drojt686d/image.png',
    '#':'https://s32.postimg.org/drojt686d/image.png',
    'a':'https://s32.postimg.org/llp5ekfz9/image.png',
    'b':'https://s32.postimg.org/y1qgm1yp1/image.png',
    'c':'https://s32.postimg.org/vlon87gmd/image.png',
    'd':'https://s32.postimg.org/3zlvnix9h/image.png',
    'e':'https://s32.postimg.org/bgv32qmsl/image.png',
    'f':'https://s32.postimg.org/y6u7vq605/image.png',
    'g':'https://s32.postimg.org/9237ib6jp/image.png',
    'h':'https://s32.postimg.org/812yt6pk5/image.png',
    'i':'https://s32.postimg.org/6nbbxvqat/image.png',
    'j':'https://s32.postimg.org/axpztgvdx/image.png',
    'k':'https://s32.postimg.org/976yrzdut/image.png',
    'l':'https://s32.postimg.org/fmal2e9yd/image.png',
    'm':'https://s32.postimg.org/m19lz2go5/image.png',
    'n':'https://s32.postimg.org/b2ycgvs2t/image.png',
    'o':'https://s32.postimg.org/c6igsucpx/image.png',
    'p':'https://s32.postimg.org/jnro82291/image.png',
    'q':'https://s32.postimg.org/ve5lpfv1h/image.png',
    'r':'https://s32.postimg.org/nmovqvqw5/image.png',
    's':'https://s32.postimg.org/zd2t89jol/image.png',
    't':'https://s32.postimg.org/wk9lo8jc5/image.png',
    'u':'https://s32.postimg.org/w8s5bh2w5/image.png',
    'v':'https://s32.postimg.org/e7dlrey91/image.png',
    'w':'https://s32.postimg.org/fnp49k15x/image.png',
    'x':'https://s32.postimg.org/dkep1w1d1/image.png',
    'y':'https://s32.postimg.org/um7j3zg85/image.png',
    'z':'https://s32.postimg.org/jb4vfm9d1/image.png'}

    logger.info("pelisalacarta.channels.qserie lasmas")
    itemlist = []
    data = scrapertools.cache_page(item.url)
    realplot=''
    if item.extra == 'letras':
         patron ='<li><a href="([^"]+)" title="Series que comienzan con.*?">([^<]+)</a></li>' 
    else:    
         patron ='<a href="([^"]+)" title="([^V]+)'+item.extra+'.*?">' 
    
    matches = re.compile(patron,re.DOTALL).findall(data)

    for scrapedurl, scrapedtitle in matches:
        url = urlparse.urljoin(item.url,scrapedurl)
        if item.extra != 'letras':
           data = scrapertools.cache_page(scrapedurl)
           thumbnail= scrapertools.get_match(data,'<link rel="image_src" href="([^"]+)"/>')
           realplot = scrapertools.find_single_match(data, '<p itemprop="articleBody">([^<]+)<\/p> ')
           plot = scrapertools.remove_htmltags(realplot)
           action='temporadas'
        else:
           if scrapedtitle.lower() in thumbletras:
              thumbnail = thumbletras[scrapedtitle.lower()]
           else:
              thumbnail = ''
           plot=''
           action='todas'
        title = scrapedtitle.replace(': ','')
        title = scrapertools.decodeHtmlentities(title)
        if item.extra == 'letras':
           fanart = 'https://s31.postimg.org/c3bm9cnl7/a_z.png'
        elif item.extra == 'Vista':
           fanart = 'https://s32.postimg.org/466gt3ipx/vistas.png' 
        else:
           fanart = ''  
   
        if (DEBUG): logger.info("title=["+title+"], url=["+url+"], thumbnail=["+thumbnail+"])")
        itemlist.append( Item(channel=item.channel, action=action, title=title , url=url, thumbnail=thumbnail, plot=plot, fanart=fanart))

    return itemlist

示例#22

0

显示文件

文件： torrentrapid.py 项目： x7r6xx/repo

def listado_busqueda(item):
    logger.info()
    itemlist = []
    data = re.sub(r"\n|\r|\t|\s{2,}", "",
                  httptools.downloadpage(item.url, post=item.post).data)
    data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8")

    list_chars = [["Ã±", "ñ"]]

    for el in list_chars:
        data = re.sub(r"%s" % el[0], el[1], data)

    try:
        get, post = scrapertools.find_single_match(
            data, '<ul class="pagination">.*?<a class="current" href.*?'
            '<a\s*href="([^"]+)"(?:\s*onClick=".*?\'([^"]+)\'.*?")')
    except:
        post = False

    if post:
        if "pg" in item.post:
            item.post = re.sub(r"pg=(\d+)", "pg=%s" % post, item.post)
        else:
            item.post += "&pg=%s" % post

    pattern = '<ul class="%s">(.*?)</ul>' % item.pattern
    data = scrapertools.get_match(data, pattern)
    pattern = '<li[^>]*><a href="(?P<url>[^"]+).*?<img.*?src="(?P<thumb>[^"]+)?".*?<h2.*?>(?P<title>.*?)?<\/h2>'
    matches = re.compile(pattern, re.DOTALL).findall(data)

    for url, thumb, title in matches:
        real_title = scrapertools.find_single_match(
            title, r'<strong.*?>(.*?)Temporada.*?<\/strong>')  #series
        if real_title == "":
            real_title = scrapertools.find_single_match(
                title, r'(.*?)\[.*?]')  #movies
        real_title = scrapertools.remove_htmltags(real_title).decode(
            'iso-8859-1').encode('utf-8')
        real_title = scrapertools.htmlclean(real_title)
        calidad = scrapertools.find_single_match(
            title,
            r'.*?\s*Calidad.*?<span[^>]+>[\[]\s*(?P<quality>.*?)\s*[\]]<\/span>'
        )  #series
        if calidad == "":
            calidad = scrapertools.find_single_match(
                title, r'..*?(\[.*?.*\])')  #movies
        year = scrapertools.find_single_match(thumb, r'-(\d{4})')

        # fix encoding for title
        title = scrapertools.htmlclean(title)
        title = title.replace("ï¿½", "ñ").replace("Temp", " Temp").replace(
            "Esp", " Esp").replace("Ing", " Ing").replace("Eng", " Eng")
        title = re.sub(r'(Calidad.*?\])', '', title)

        if real_title == "":
            real_title = title
        if calidad == "":
            calidad = title
        context = "movie"
        url_real = True

        # no mostramos lo que no sean videos
        if "juego/" in url:
            continue

        # Codigo para rescatar lo que se puede en pelisy.series.com de Series para la Videoteca.  la URL apunta al capítulo y no a la Serie.  Nombre de Serie frecuentemente en blanco. Se obtiene de Thumb, así como el id de la serie
        if ("/serie" in url or "-serie" in url) and "pelisyseries.com" in host:
            calidad_mps = "series/"
            if "seriehd" in url:
                calidad_mps = "series-hd/"
            if "serievo" in url:
                calidad_mps = "series-vo/"
            if "serie-vo" in url:
                calidad_mps = "series-vo/"

            real_title_mps = re.sub(r'.*?\/\d+_', '', thumb)
            real_title_mps = re.sub(r'\.\w+.*?', '', real_title_mps)

            if "/0_" not in thumb:
                serieid = scrapertools.find_single_match(
                    thumb, r'.*?\/\w\/(?P<serieid>\d+).*?.*')
                if len(serieid) > 5:
                    serieid = ""
            else:
                serieid = ""

            #detectar si la url creada de tvshow es válida o hay que volver atras
            url_tvshow = host + calidad_mps + real_title_mps + "/"
            url_id = host + calidad_mps + real_title_mps + "/" + serieid
            data_serie = data = re.sub(r"\n|\r|\t|\s{2,}", "",
                                       httptools.downloadpage(url_id).data)
            data_serie = unicode(data_serie, "iso-8859-1",
                                 errors="replace").encode("utf-8")
            data_serie = data_serie.replace("chapters", "buscar-list")
            pattern = '<ul class="%s">(.*?)</ul>' % "buscar-list"  # item.pattern
            if not scrapertools.find_single_match(data_serie, pattern):
                data_serie = data = re.sub(
                    r"\n|\r|\t|\s{2,}", "",
                    httptools.downloadpage(url_tvshow).data)
                data_serie = unicode(data_serie,
                                     "iso-8859-1",
                                     errors="replace").encode("utf-8")
                data_serie = data_serie.replace("chapters", "buscar-list")
                if not scrapertools.find_single_match(data_serie, pattern):
                    context = "movie"
                    url_real = False
                    if not config.get_setting(
                            "unify"
                    ):  #Si Titulos Inteligentes NO seleccionados:
                        if calidad:
                            title = title + '[' + calidad + "]"
                else:
                    url = url_tvshow
            else:
                url = url_id

            real_title_mps = real_title_mps.replace("-", " ")
            logger.debug("url: " + url + " / title: " + title +
                         " / real_title: " + real_title +
                         " / real_title_mps: " + real_title_mps +
                         " / calidad_mps : " + calidad_mps + " / context : " +
                         context)
            real_title = real_title_mps

        show = real_title

        if ".com/serie" in url and "/miniseries" not in url and url_real:
            if not config.get_setting(
                    "unify"):  #Si Titulos Inteligentes NO seleccionados:
                if calidad:
                    title = title + '[' + calidad + "]"
            context = "tvshow"

            itemlist.append(
                Item(channel=item.channel,
                     action="episodios",
                     title=title,
                     url=url,
                     thumbnail=thumb,
                     quality=calidad,
                     show=show,
                     extra="serie",
                     context=["buscar_trailer"],
                     contentType=context,
                     contentTitle=real_title,
                     contentSerieName=real_title,
                     infoLabels={'year': year}))
        else:
            if config.get_setting(
                    "unify"):  #Si Titulos Inteligentes SI seleccionados:
                title = real_title

            itemlist.append(
                Item(channel=item.channel,
                     action="findvideos",
                     title=title,
                     url=url,
                     thumbnail=thumb,
                     quality=calidad,
                     show=show,
                     context=["buscar_trailer"],
                     contentType=context,
                     contentTitle=real_title,
                     infoLabels={'year': year}))

        logger.debug("url: " + url + " / title: " + title + " / real_title: " +
                     real_title + " / show: " + show + " / calidad: " +
                     calidad)

    tmdb.set_infoLabels(itemlist, True)

    if post:
        itemlist.append(
            item.clone(channel=item.channel,
                       action="listado_busqueda",
                       title=">> Página siguiente",
                       text_color='yellow',
                       text_bold=True,
                       thumbnail=get_thumb("next.png")))

    return itemlist

示例#23

0

显示文件

def listado(item):  # Listado principal y de búsquedas
    logger.info()

    itemlist = []
    item.category = categoria
    thumb_pelis = get_thumb("channels_movie.png")
    thumb_series = get_thumb("channels_tvshow.png")

    #logger.debug(item)

    curr_page = 1  # Página inicial
    last_page = 99999  # Última página inicial
    last_page_print = 1  # Última página inicial, para píe de página
    page_factor = 1.0  # Factor de conversión de pag. web a pag. Alfa
    if item.curr_page:
        curr_page = int(
            item.curr_page)  # Si viene de una pasada anterior, lo usamos
        del item.curr_page  # ... y lo borramos
    if item.last_page:
        last_page = int(
            item.last_page)  # Si viene de una pasada anterior, lo usamos
        del item.last_page  # ... y lo borramos
    if item.page_factor:
        page_factor = float(
            item.page_factor)  # Si viene de una pasada anterior, lo usamos
        del item.page_factor  # ... y lo borramos
    if item.last_page_print:
        last_page_print = item.last_page_print  # Si viene de una pasada anterior, lo usamos
        del item.last_page_print  # ... y lo borramos

    cnt_tot = 30  # Poner el num. máximo de items por página
    cnt_title = 0  # Contador de líneas insertadas en Itemlist
    if item.cnt_tot_match:
        cnt_tot_match = float(
            item.cnt_tot_match
        )  # restauramos el contador TOTAL de líneas procesadas de matches
        del item.cnt_tot_match
    else:
        cnt_tot_match = 0.0  # Contador TOTAL de líneas procesadas de matches
    inicio = time.time(
    )  # Controlaremos que el proceso no exceda de un tiempo razonable
    fin = inicio + 5  # Después de este tiempo pintamos (segundos)
    timeout_search = timeout * 2  # Timeout para descargas
    if item.extra == 'search' and item.extra2 == 'episodios':  # Si viene de episodio que quitan los límites
        cnt_tot = 999
        fin = inicio + 30

    #Sistema de paginado para evitar páginas vacías o semi-vacías en casos de búsquedas con series con muchos episodios
    title_lista = [
    ]  # Guarda la lista de series que ya están en Itemlist, para no duplicar lineas
    if item.title_lista:  # Si viene de una pasada anterior, la lista ya estará guardada
        title_lista.extend(
            item.title_lista)  # Se usa la lista de páginas anteriores en Item
        del item.title_lista  # ... limpiamos
    matches = []

    if not item.extra2:  # Si viene de Catálogo o de Alfabeto
        item.extra2 = ''

    post = None
    headers = None
    forced_proxy_opt = None
    if item.post:
        forced_proxy_opt = None
    if item.post or item.post is None:  # Rescatamos el Post, si lo hay
        post = item.post
        del item.post
    if item.headers or item.headers is None:  # Rescatamos el Headers, si lo hay
        headers = item.headers
        del item.headers

    next_page_url = item.url
    # Máximo num. de líneas permitidas por TMDB. Máx de 5 segundos por Itemlist para no degradar el rendimiento
    while (cnt_title < cnt_tot and curr_page <= last_page
           and fin > time.time()) or item.matches:

        # Descarga la página
        data = ''
        cnt_match = 0  # Contador de líneas procesadas de matches

        if not item.matches:  # si no viene de una pasada anterior, descargamos
            data, response, item, itemlist = generictools.downloadpage(
                next_page_url,
                canonical=canonical,
                headers=headers,
                timeout=timeout_search,
                post=post,
                s2=False,
                item=item,
                itemlist=itemlist)  # Descargamos la página)
            # Verificamos si ha cambiado el Host
            if response.host:
                next_page_url = response.url_new

            # Verificamos si se ha cargado una página correcta
            curr_page += 1  # Apunto ya a la página siguiente
            if not data or not response.sucess:  # Si la web está caída salimos sin dar error
                if len(itemlist) > 1:  # Si hay algo que pintar lo pintamos
                    last_page = 0
                    break
                return itemlist  # Si no hay nada más, salimos directamente

            headers = {'referer': next_page_url}

        #Patrón para búsquedas, pelis y series
        patron = '<div\s*class="item hitem">\s*<a\s*href="([^"]+)">.*?'
        patron += '(?:<div\s*class="nota">\s*(.*?)\s*<\/div>)?\s*<img[^>]*src="([^"]+)"'
        patron += '.*?<div\s*class="titulo">\s*<span>\s*([^<]*)<\/span>'

        if not item.matches:  # De pasada anterior?
            matches = re.compile(patron, re.DOTALL).findall(data)
        else:
            matches = item.matches
            del item.matches

        #logger.debug("PATRON: " + patron)
        #logger.debug(matches)
        #logger.debug(data)

        if not matches and item.extra != 'search' and not item.extra2:  #error
            logger.error(
                "ERROR 02: LISTADO: Ha cambiado la estructura de la Web " +
                " / PATRON: " + patron + " / DATA: " + data)
            itemlist.append(
                item.clone(
                    action='',
                    title=item.channel.capitalize() +
                    ': ERROR 02: LISTADO: Ha cambiado la estructura de la Web.  '
                    + 'Reportar el error con el log'))
            break  #si no hay más datos, algo no funciona, pintamos lo que tenemos

        if not matches and item.extra == 'search':  #búsqueda vacía
            if len(itemlist) > 0:  # Si hay algo que pintar lo pintamos
                last_page = 0
                break
            return itemlist  #Salimos

        # Buscamos la próxima página
        next_page_url = re.sub(r'page\/(\d+)', 'page/%s' % str(curr_page),
                               item.url)
        #logger.debug('curr_page: ' + str(curr_page) + ' / last_page: ' + str(last_page))

        # Buscamos la última página
        if last_page == 99999:  #Si es el valor inicial, buscamos
            patron_last = 'Last\s*Page"\s*href="[^"]+\/(\d*)\/"'
            if not scrapertools.find_single_match(data, patron_last):
                patron_last = 'href="[^"]+">(\d+)<\/a>(?:<span[^<]*<\/span>)?\s*<a[^>]*aria-label="Next\s*Page"'
            try:
                last_page = int(
                    scrapertools.find_single_match(data, patron_last))
                page_factor = float(len(matches)) / float(cnt_tot)
            except:  #Si no lo encuentra, lo ponemos a 999
                last_page = 1
                last_page_print = int((float(len(matches)) / float(cnt_tot)) +
                                      0.999999)

            #logger.debug('curr_page: ' + str(curr_page) + ' / last_page: ' + str(last_page))

        #Empezamos el procesado de matches
        for scrapedurl, scrapedlang, scrapedthumb, scrapedtitle in matches:
            cnt_match += 1

            title = scrapedtitle
            title = scrapertools.remove_htmltags(title).rstrip(
                '.')  # Removemos Tags del título
            url = scrapedurl

            title_subs = []  #creamos una lista para guardar info importante

            # Slugify, pero más light
            title = title.replace("á", "a").replace("é", "e").replace("í", "i")\
                    .replace("ó", "o").replace("ú", "u").replace("ü", "u")\
                    .replace("ï¿½", "ñ").replace("Ã±", "ñ")
            title = scrapertools.decode_utf8_error(title)

            cnt_title += 1  # Incrementamos el contador de entradas válidas

            item_local = item.clone()  #Creamos copia de Item para trabajar
            if item_local.tipo:  #... y limpiamos
                del item_local.tipo
            if item_local.totalItems:
                del item_local.totalItems
            if item_local.intervencion:
                del item_local.intervencion
            if item_local.viewmode:
                del item_local.viewmode
            item_local.extra2 = True
            del item_local.extra2
            item_local.text_bold = True
            del item_local.text_bold
            item_local.text_color = True
            del item_local.text_color

            item_local.headers = headers

            # Tratamos los idiomas
            if scrapedlang:
                item_local.language = []
                langs = scrapedlang.split('>')
                logger.error(langs)
                for lang in langs:
                    if not lang: continue
                    if 'espanol' in lang:
                        item_local.language += ['CAST']
                    elif 'latin' in lang:
                        item_local.language += ['LAT']
                    elif 'english' in lang or 'ingles' in lang:
                        item_local.language += ['VO']
                    else:
                        item_local.language += ['OTHER']
            if not item_local.language:
                item_local.language = ['CAST']  # [CAST] por defecto

            if not item_local.quality:
                item_local.quality = 'DVDR'  # DVDR por defecto

            item_local.thumbnail = urlparse.urljoin(
                host, scrapedthumb)  # iniciamos thumbnail

            item_local.url = urlparse.urljoin(host,
                                              url)  # guardamos la url final
            item_local.context = "['buscar_trailer']"  # ... y el contexto

            # Guardamos los formatos para películas
            item_local.contentType = "movie"
            item_local.action = "findvideos"

            title = re.sub(
                r'(?i)TV|Online|(4k-hdr)|(fullbluray)|4k| - 4k|(3d)|miniserie|ciclo\s*[^-|–]+[-|–]\s*',
                '', title).strip()
            item_local.quality = re.sub(
                r'(?i)proper|unrated|directors|cut|repack|internal|real|extended|masted|docu|super|duper|amzn|uncensored|hulu',
                '', item_local.quality).strip()

            # Analizamos el año.  Si no está claro ponemos '-'
            item_local.infoLabels["year"] = '-'

            # Terminamos de limpiar el título
            title = re.sub(r'[\(|\[]\s+[\)|\]]', '', title)
            title = title.replace('()', '').replace('[]', '').replace(
                '[4K]', '').replace('(4K)', '').strip().lower().title()
            title = title.strip().lower().title()
            item_local.from_title = title  #Guardamos esta etiqueta para posible desambiguación de título
            item_local.contentTitle = title
            item_local.title = title

            #Guarda la variable temporal que almacena la info adicional del título a ser restaurada después de TMDB
            item_local.title_subs = title_subs

            #Ahora se filtra por idioma, si procede, y se pinta lo que vale
            if filter_languages > 0:  #Si hay idioma seleccionado, se filtra
                itemlist = filtertools.get_link(itemlist, item_local,
                                                list_language)
            else:
                itemlist.append(item_local.clone())  #Si no, pintar pantalla

            cnt_title = len(
                itemlist)  # Recalculamos los items después del filtrado
            if cnt_title >= cnt_tot and (
                    len(matches) - cnt_match
            ) + cnt_title > cnt_tot * 1.3:  #Contador de líneas añadidas
                break

            #logger.debug(item_local)

        matches = matches[cnt_match:]  # Salvamos la entradas no procesadas
        cnt_tot_match += cnt_match  # Calcular el num. total de items mostrados

    #Pasamos a TMDB la lista completa Itemlist
    tmdb.set_infoLabels(itemlist,
                        __modo_grafico__,
                        idioma_busqueda=idioma_busqueda)

    #Llamamos al método para el maquillaje de los títulos obtenidos desde TMDB
    item, itemlist = generictools.post_tmdb_listado(item, itemlist)

    # Si es necesario añadir paginacion
    if curr_page <= last_page or len(matches) > 0:
        curr_page_print = int(cnt_tot_match / float(cnt_tot))
        if curr_page_print < 1:
            curr_page_print = 1
        if last_page:
            if last_page > 1:
                last_page_print = int((last_page * page_factor) + 0.999999)
            title = '%s de %s' % (curr_page_print, last_page_print)
        else:
            title = '%s' % curr_page_print

        itemlist.append(
            Item(channel=item.channel,
                 action="listado",
                 title=">> Página siguiente " + title,
                 title_lista=title_lista,
                 url=next_page_url,
                 extra=item.extra,
                 extra2=item.extra2,
                 last_page=str(last_page),
                 curr_page=str(curr_page),
                 page_factor=str(page_factor),
                 cnt_tot_match=str(cnt_tot_match),
                 matches=matches,
                 last_page_print=last_page_print,
                 post=post,
                 headers=headers))

    return itemlist

示例#24

0

显示文件

文件： dascer.py 项目： shlibidon/addon

def listado(item):                                                              # Listado principal
    logger.info()
    
    itemlist = []
    matches = []
    logos = {'Fórmula 1': '/f1-logo', 'Fórmula 2': '/f2-', 'Fórmula E': '/fe-', 'MotoGP': '/moto-gp', 'Moto2': '/moto2', 'Moto3': '/moto3'}
    item.category = categoria

    #logger.debug(item)
    
    cnt_tot = 30                                                                # Poner el num. máximo de items por página
    cnt_title = 0                                                               # Contador de líneas insertadas en Itemlist
    cnt_offset = 0                                                              # offset para cnt_title en searchs
    curr_page = 1
    last_page = 999
    
    inicio = time.time()                                    # Controlaremos que el proceso no exceda de un tiempo razonable
    fin = inicio + 5                                                            # Después de este tiempo pintamos (segundos)
    timeout_search = timeout * 2                                                # Timeout para descargas

    next_page_url = item.url
    # Máximo num. de líneas permitidas por TMDB. Máx de 5 segundos por Itemlist para no degradar el rendimiento
    while (next_page_url and cnt_title < cnt_tot and curr_page <= last_page and fin > time.time()) or item.matches:
    
        # Descarga la página
        data = ''
        cnt_match = 0                                                           # Contador de líneas procesadas de matches
        
        if item.extra2 == 'novedades':
            patron = '<div\s*class="elementor-widget-container">\s*<div\s*class='
            patron += '"elementor-image">\s*<a\s*href="([^"]+)">\s*<img\s*(?:width="[^"]+"\s*'
            patron += 'height="[^"]+"\s*)?src="([^"]+)"\s*class="attachment-medium_large\s*'
            patron += 'size-medium_large"\s*alt="[^"]*"\s*(?:loading="[^"]*"\s*)?srcset=.*?<h4\s*class='
            patron += '"elementor-heading[^>]+>\s*<a\s*href="[^"]+">\s*(.*?)\s*<\/a>\s*<\/h4>'
        else:
            patron = '<article\s*class="[^>]+>\s*<div\s*class="elementor-post__card">\s*'
            patron += '<a\s*class="[^"]+"\s*href="([^"]+)"\s*>\s*<div\s*class='
            patron += '"elementor-post__thumbnail"\s*>\s*<\s*img.*?src="([^"]+)".*?'
            patron += '<h3\s*class="elementor-post__title"\s*>\s*<a href="[^"]+"\s*>'
            patron += '\s*(.*?)\s*<\/a>\s*<\/h3>\s*<\/div>\s*<\/div>\s*<\/article>'
        
        if not item.matches:                                                    # si no viene de una pasada anterior, descargamos
            data, success, code, item, itemlist = generictools.downloadpage(next_page_url, 
                                          timeout=timeout_search, s2=False, patron=patron, 
                                          item=item, itemlist=itemlist)         # Descargamos la página)
            
            # Verificamos si se ha cargado una página correcta
            curr_page += 1                                                      # Apunto ya a la página siguiente
            if not data or not success:                                         # Si la web está caída salimos sin dar error
                if len(itemlist) > 1:                                           # Si hay algo que pintar lo pintamos 
                    last_page = 0
                    break
                return itemlist                                                 # Si no hay nada más, salimos directamente
        
        # Comprobar si hay más páginas
        patron_page = '<a\s*class="page-numbers[^"]+"\s*href="([^"]+)">Siguiente'
        if scrapertools.find_single_match(data, patron_page):
            next_page_url = scrapertools.find_single_match(data, patron_page)
        else:
            next_page_url = ''

        if not item.matches:                                                    # De pasada anterior?
            matches = re.compile(patron, re.DOTALL).findall(data)
        else:
            matches = item.matches
            del item.matches
            
        #logger.debug("PATRON: " + patron)
        #logger.debug(matches)
        #logger.debug(data)
        
        # Iniciamos el Plot y marcamos TMDB como no usable
        item.contentPlot = '[COLOR gold][B]%s[/B][/COLOR]%s\n\n' % (item.title, item.channel_sufix)     #iniciamos el Plot con el Deporte
        item.from_title = '%s - ' % item.title
        item.infoLabels['tmdb_id'] = null

        #Empezamos el procesado de matches
        for scrapedurl, scrapedthumb, scrapedtitle in matches:
            #Generamos una copia de Item para trabajar sobre ella
            item_local = item.clone()
            
            cnt_match += 1
            
            title = scrapedtitle
            title = scrapertools.remove_htmltags(title).rstrip('.')             # Removemos Tags del título
            title = title.replace("á", "a").replace("é", "e").replace("í", "i")\
                    .replace("ó", "o").replace("ú", "u").replace("ü", "u")\
                    .replace("ï¿½", "ñ").replace("Ã±", "ñ").replace("&#8217;", "'")\
                    .replace("&amp;", "&")
            
            cnt_title += 1                                                      # Incrementamos el contador de entradas válidas
           
            # Procesamos idiomas
            item_local.language = []                                            #creamos lista para los idiomas
            if not item_local.language:
                item_local.language = ['CAST']                                  # [CAST] por defecto
                
            # Procesamos Calidad
            if not item_local.quality:
                item_local.quality = 'HDTV'
                
            item_local.thumbnail = scrapedthumb                                 #iniciamos thumbnail
            item_local.contentThumbnail = scrapedthumb                          #iniciamos thumbnail
            item_local.infoLabels['fanart'] = scrapedthumb                      #iniciamos Fanart
            
            # Para Novedades buscamos el Deporte en los thumbs
            if item_local.extra2 == 'novedades':
                for sport, logo in list(logos.items()):
                    if logo in item_local.thumbnail.lower():
                        item_local.contentPlot = '[COLOR gold][B]%s[/B][/COLOR]%s\n\n' % (sport, item.channel_sufix)
                        item_local.from_title = '%s - ' % sport
                        break
            
            item_local.url = scrapedurl                                         #iniciamos la url
            item_local.url_tvshow = item_local.url

            # Guardamos los formatos para películas
            item_local.contentType = event
            item_local.action = "episodios"

            year = scrapertools.find_single_match(title, '\d{4}')
            if year:
                item_local.infoLabels['year'] = int(year)
                item_local.infoLabels['aired'] = str(year)

            title = re.sub(r'(?i)TV|Online|(4k-hdr)|(fullbluray)|4k| - 4k|(3d)|miniserie|\d{4}', '', title).strip()
            item_local.title = title.strip().lower().title()
            item_local.contentTitle = item_local.title
            item_local.from_title += item_local.title

            #Ahora se filtra por idioma, si procede, y se pinta lo que vale
            if filter_languages > 0:                                            #Si hay idioma seleccionado, se filtra
                itemlist = filtertools.get_link(itemlist, item_local, list_language)
            else:
                itemlist.append(item_local.clone())                             #Si no, pintar pantalla
            
            cnt_title = len(itemlist) - cnt_offset                              # Recalculamos los items después del filtrado
            if cnt_title >= cnt_tot and (len(matches) - cnt_match) + cnt_title > cnt_tot * 1.3:     #Contador de líneas añadidas
                break
            
            #logger.debug(item_local)
    
        matches = matches[cnt_match:]                                           # Salvamos la entradas no procesadas

    # Enlace a Temporadas anteriores
    if item.extra2 != 'novedades':
        patron = '<h2\s*class=".*?<a\s*href="([^"]+)"\s*class="'
        url_temp_anteriores = scrapertools.find_single_match(data, patron)
        itemlist.insert(0,item.clone(title="Temporadas anteriores", url=url_temp_anteriores, 
                        action="temporadas_anteriores", thumbnail=thumb_sports, url_tvshow = url_temp_anteriores, 
                        language=['CAST'], quality = 'HDTV'))

    return itemlist

示例#25

0

显示文件

def listado(item):  # Listado principal y de búsquedas
    logger.info()

    itemlist = []
    item.category = categoria
    thumb_pelis = get_thumb("channels_movie.png")
    thumb_series = get_thumb("channels_tvshow.png")

    #logger.debug(item)

    curr_page = 1  # Página inicial
    last_page = 99999  # Última página inicial
    last_page_print = 1  # Última página inicial, para píe de página
    page_factor = 1.0  # Factor de conversión de pag. web a pag. Alfa
    if item.curr_page:
        curr_page = int(
            item.curr_page)  # Si viene de una pasada anterior, lo usamos
        del item.curr_page  # ... y lo borramos
    if item.last_page:
        last_page = int(
            item.last_page)  # Si viene de una pasada anterior, lo usamos
        del item.last_page  # ... y lo borramos
    if item.page_factor:
        page_factor = float(
            item.page_factor)  # Si viene de una pasada anterior, lo usamos
        del item.page_factor  # ... y lo borramos
    if item.last_page_print:
        last_page_print = item.last_page_print  # Si viene de una pasada anterior, lo usamos
        del item.last_page_print  # ... y lo borramos

    cnt_tot = 30  # Poner el num. máximo de items por página
    cnt_title = 0  # Contador de líneas insertadas en Itemlist
    if item.cnt_tot_match:
        cnt_tot_match = float(
            item.cnt_tot_match
        )  # restauramos el contador TOTAL de líneas procesadas de matches
        del item.cnt_tot_match
    else:
        cnt_tot_match = 0.0  # Contador TOTAL de líneas procesadas de matches
    inicio = time.time(
    )  # Controlaremos que el proceso no exceda de un tiempo razonable
    fin = inicio + 5  # Después de este tiempo pintamos (segundos)
    timeout_search = timeout * 2  # Timeout para descargas
    if item.extra == 'search' and item.extra2 == 'episodios':  # Si viene de episodio que quitan los límites
        cnt_tot = 999
        fin = inicio + 30

    #Sistema de paginado para evitar páginas vacías o semi-vacías en casos de búsquedas con series con muchos episodios
    title_lista = [
    ]  # Guarda la lista de series que ya están en Itemlist, para no duplicar lineas
    if item.title_lista:  # Si viene de una pasada anterior, la lista ya estará guardada
        title_lista.extend(
            item.title_lista)  # Se usa la lista de páginas anteriores en Item
        del item.title_lista  # ... limpiamos
    matches = []

    if not item.extra2:  # Si viene de Catálogo o de Alfabeto
        item.extra2 = ''

    post = None
    if item.post:  # Rescatamos el Post, si lo hay
        post = item.post

    next_page_url = item.url
    # Máximo num. de líneas permitidas por TMDB. Máx de 5 segundos por Itemlist para no degradar el rendimiento
    while (cnt_title < cnt_tot and curr_page <= last_page
           and fin > time.time()) or item.matches:

        # Descarga la página
        data = ''
        cnt_match = 0  # Contador de líneas procesadas de matches

        if not item.matches:  # si no viene de una pasada anterior, descargamos
            data, response, item, itemlist = generictools.downloadpage(
                next_page_url,
                timeout=timeout_search,
                post=post,
                s2=False,
                item=item,
                itemlist=itemlist)  # Descargamos la página)

            # Verificamos si ha cambiado el Host
            global host, canonical
            if response.canonical and response.canonical != host:
                host, canonical = generictools.check_host(
                    channel, [response.canonical] + host_alt,
                    host_black_list,
                    host='',
                    CF=True,
                    alfa_s=True,
                    canonical=True)

            # Verificamos si se ha cargado una página correcta
            curr_page += 1  # Apunto ya a la página siguiente
            if not data or not response.sucess:  # Si la web está caída salimos sin dar error
                if len(itemlist) > 1:  # Si hay algo que pintar lo pintamos
                    last_page = 0
                    break
                return itemlist  # Si no hay nada más, salimos directamente

        if item.extra2 == 'PELICULA':
            #Patrón para seleccionar el bloque
            patron = '(?i)<header\s*class="archive_post">\s*<h2>\s*A.adido\s*recientemente\s*<\/h2>'
            patron += '\s*<span>\s*[^<]*\s*<\/span>\s*<\/header>\s*<div\s*id="archive-content"'
            patron += '\s*class="animation[^"]*">(.*?<\/article>\s*<\/div>'
            patron += '(?:\s*<div\s*class="pagination">.*?<\/div>))'
            data = scrapertools.find_single_match(data, patron)

            #logger.debug(data)

        #Patrón para búsquedas, pelis y series
        if item.extra == 'search':
            patron = '<article><div\s*class="image">.*?<img\s*src="([^"]+)".*?<span\s*'
            patron += 'class="movies">()[^<]*<\/span>.*?<a\s*href="([^"]+)">\s*([^<]*)'
            patron += '<\/a>\s*<\/div>\s*<div[^<]*<span[^<]*<\/span>\s*<span\s*'
            patron += 'class="year">\s*(?:(\d{4}))?\s*<\/span>'
        else:
            patron = '<article\s*id="post[^>]+>.*?<img\s*src="([^"]+)".*?<span\s*'
            patron += 'class="quality">\s*([^<]*)<\/span>.*?<h3>\s*<a\s*href="([^"]+)">'
            patron += '\s*([^<]*)<\/a>\s*<\/h3>\s*<span>(?:.*?(\d{4}))?\s*<\/span>'

        if not item.matches:  # De pasada anterior?
            matches = re.compile(patron, re.DOTALL).findall(data)
        else:
            matches = item.matches
            del item.matches

        #logger.debug("PATRON: " + patron)
        #logger.debug(matches)
        #logger.debug(data)

        if not matches and item.extra != 'search' and not item.extra2:  #error
            logger.error(
                "ERROR 02: LISTADO: Ha cambiado la estructura de la Web " +
                " / PATRON: " + patron + " / DATA: " + data)
            itemlist.append(
                item.clone(
                    action='',
                    title=item.channel.capitalize() +
                    ': ERROR 02: LISTADO: Ha cambiado la estructura de la Web.  '
                    + 'Reportar el error con el log'))
            break  #si no hay más datos, algo no funciona, pintamos lo que tenemos

        if not matches and item.extra == 'search':  #búsqueda vacía
            if len(itemlist) > 0:  # Si hay algo que pintar lo pintamos
                last_page = 0
                break
            return itemlist  #Salimos

        # Actualizado la url por si ha habido redirecciones
        if last_page == 99999:  #Si es la primera pasada...
            patron_canonical = '<link\s*rel="canonical"\s*href="([^"]+)"\s*\/*>'
            if scrapertools.find_single_match(data, patron_canonical):
                canonical = scrapertools.find_single_match(
                    data, patron_canonical)
                if not canonical.endswith('/'): canonical += '/'
                item.url = canonical + 'page/1/'

        # Buscamos la próxima página
        next_page_url = re.sub(r'page\/(\d+)', 'page/%s' % str(curr_page),
                               item.url)
        #logger.debug('curr_page: ' + str(curr_page) + ' / last_page: ' + str(last_page))

        # Buscamos la última página
        if last_page == 99999:  #Si es el valor inicial, buscamos
            patron_last = '<div\s*class="pagination">\s*<span>P.gina\s*\d+\s*de\s*(\d+)\s*<\/span>'
            try:
                last_page = int(
                    scrapertools.find_single_match(data, patron_last))
                page_factor = float(len(matches)) / float(cnt_tot)
            except:  #Si no lo encuentra, lo ponemos a 999
                last_page = 1
                last_page_print = int((float(len(matches)) / float(cnt_tot)) +
                                      0.999999)

            #logger.debug('curr_page: ' + str(curr_page) + ' / last_page: ' + str(last_page))

        #Empezamos el procesado de matches
        for scrapedthumb, scrapedquality, scrapedurl, scrapedtitle, scrapedyear in matches:
            scrapedlanguage = ''
            cnt_match += 1

            title = scrapedtitle

            title = scrapertools.remove_htmltags(title).rstrip(
                '.')  # Removemos Tags del título
            url = scrapedurl

            title_subs = []  #creamos una lista para guardar info importante

            # Slugify, pero más light
            title = title.replace("á", "a").replace("é", "e").replace("í", "i")\
                    .replace("ó", "o").replace("ú", "u").replace("ü", "u")\
                    .replace("ï¿½", "ñ").replace("Ã±", "ñ")
            title = scrapertools.decode_utf8_error(title)

            cnt_title += 1  # Incrementamos el contador de entradas válidas

            item_local = item.clone()  #Creamos copia de Item para trabajar
            if item_local.tipo:  #... y limpiamos
                del item_local.tipo
            if item_local.totalItems:
                del item_local.totalItems
            if item_local.intervencion:
                del item_local.intervencion
            if item_local.viewmode:
                del item_local.viewmode
            item_local.extra2 = True
            del item_local.extra2
            item_local.text_bold = True
            del item_local.text_bold
            item_local.text_color = True
            del item_local.text_color

            # Procesamos idiomas
            item_local.language = []  #creamos lista para los idiomas
            if '[Subs. integrados]' in scrapedquality or '(Sub Forzados)' in scrapedquality \
                        or 'Sub' in scrapedquality or 'ing' in scrapedlanguage.lower():
                item_local.language = ['VOS']  # añadimos VOS
            if 'lat' in scrapedlanguage.lower():
                item_local.language += ['LAT']  # añadimos LAT
            if 'castellano' in scrapedquality.lower() or ('español' in scrapedquality.lower() \
                        and not 'latino' in scrapedquality.lower()) or 'cas' in scrapedlanguage.lower():
                item_local.language += ['CAST']  # añadimos CAST
            if '[Dual' in title or 'dual' in scrapedquality.lower(
            ) or 'dual' in scrapedlanguage.lower():
                title = re.sub(r'(?i)\[dual.*?\]', '', title)
                item_local.language += ['DUAL']  # añadimos DUAL
            if not item_local.language:
                item_local.language = ['LAT']  # [LAT] por defecto

            # Procesamos Calidad
            if scrapedquality:
                item_local.quality = scrapertools.remove_htmltags(
                    scrapedquality)  # iniciamos calidad
                if '[720p]' in scrapedquality.lower(
                ) or '720p' in scrapedquality.lower():
                    item_local.quality = '720p'
                if '[1080p]' in scrapedquality.lower(
                ) or '1080p' in scrapedquality.lower():
                    item_local.quality = '1080p'
                if '4k' in scrapedquality.lower():
                    item_local.quality = '4K'
                if '3d' in scrapedquality.lower(
                ) and not '3d' in item_local.quality.lower():
                    item_local.quality += ', 3D'
            if not item_local.quality:
                item_local.quality = '1080p'

            item_local.thumbnail = urlparse.urljoin(
                host, scrapedthumb)  #iniciamos thumbnail

            item_local.url = urlparse.urljoin(host,
                                              url)  #guardamos la url final
            item_local.context = "['buscar_trailer']"  #... y el contexto

            # Guardamos los formatos para películas
            item_local.contentType = "movie"
            item_local.action = "findvideos"

            title = re.sub(
                r'(?i)TV|Online|(4k-hdr)|(fullbluray)|4k| - 4k|(3d)|miniserie',
                '', title).strip()
            item_local.quality = re.sub(
                r'(?i)proper|unrated|directors|cut|repack|internal|real|extended|masted|docu|super|duper|amzn|uncensored|hulu',
                '', item_local.quality).strip()

            #Analizamos el año.  Si no está claro ponemos '-'
            item_local.infoLabels["year"] = '-'
            try:
                if scrapedyear:
                    item_local.infoLabels["year"] = int(scrapedyear)
            except:
                pass

            #Terminamos de limpiar el título
            title = re.sub(r'[\(|\[]\s+[\)|\]]', '', title)
            title = title.replace('()', '').replace('[]', '').replace(
                '[4K]', '').replace('(4K)', '').strip().lower().title()
            title = title.strip().lower().title()
            item_local.from_title = title  #Guardamos esta etiqueta para posible desambiguación de título
            item_local.contentTitle = title
            item_local.title = title

            #Guarda la variable temporal que almacena la info adicional del título a ser restaurada después de TMDB
            item_local.title_subs = title_subs

            #Ahora se filtra por idioma, si procede, y se pinta lo que vale
            if filter_languages > 0:  #Si hay idioma seleccionado, se filtra
                itemlist = filtertools.get_link(itemlist, item_local,
                                                list_language)
            else:
                itemlist.append(item_local.clone())  #Si no, pintar pantalla

            cnt_title = len(
                itemlist)  # Recalculamos los items después del filtrado
            if cnt_title >= cnt_tot and (
                    len(matches) - cnt_match
            ) + cnt_title > cnt_tot * 1.3:  #Contador de líneas añadidas
                break

            #logger.debug(item_local)

        matches = matches[cnt_match:]  # Salvamos la entradas no procesadas
        cnt_tot_match += cnt_match  # Calcular el num. total de items mostrados

    #Pasamos a TMDB la lista completa Itemlist
    tmdb.set_infoLabels(itemlist,
                        __modo_grafico__,
                        idioma_busqueda=idioma_busqueda)

    #Llamamos al método para el maquillaje de los títulos obtenidos desde TMDB
    item, itemlist = generictools.post_tmdb_listado(item, itemlist)

    # Si es necesario añadir paginacion
    if curr_page <= last_page or len(matches) > 0:
        curr_page_print = int(cnt_tot_match / float(cnt_tot))
        if curr_page_print < 1:
            curr_page_print = 1
        if last_page:
            if last_page > 1:
                last_page_print = int((last_page * page_factor) + 0.999999)
            title = '%s de %s' % (curr_page_print, last_page_print)
        else:
            title = '%s' % curr_page_print

        itemlist.append(
            Item(channel=item.channel,
                 action="listado",
                 title=">> Página siguiente " + title,
                 title_lista=title_lista,
                 url=next_page_url,
                 extra=item.extra,
                 extra2=item.extra2,
                 last_page=str(last_page),
                 curr_page=str(curr_page),
                 page_factor=str(page_factor),
                 cnt_tot_match=str(cnt_tot_match),
                 matches=matches,
                 last_page_print=last_page_print,
                 post=post))

    return itemlist

示例#26

0

显示文件

def listado(item):  # Listado principal
    logger.info()

    itemlist = []
    matches = []
    item.category = categoria

    #logger.debug(item)

    cnt_tot = 30  # Poner el num. máximo de items por página
    cnt_title = 0  # Contador de líneas insertadas en Itemlist
    cnt_offset = 0  # offset para cnt_title en searchs
    curr_page = 1
    last_page = 999

    inicio = time.time(
    )  # Controlaremos que el proceso no exceda de un tiempo razonable
    fin = inicio + 5  # Después de este tiempo pintamos (segundos)
    timeout_search = timeout * 2  # Timeout para descargas

    next_page_url = item.url
    # Máximo num. de líneas permitidas por TMDB. Máx de 5 segundos por Itemlist para no degradar el rendimiento
    while (next_page_url and cnt_title < cnt_tot and curr_page <= last_page
           and fin > time.time()) or item.matches:

        # Descarga la página
        data = ''
        cnt_match = 0  # Contador de líneas procesadas de matches

        patron = 'data-column-clickable="([^"]+")[^>]*data-id="([^"]+)".*?<h4\s*'
        patron += 'class="elementor-heading-title[^>]+>\s*<span1[^>]*>\s*([^<]*)'
        patron += '<\/span1>([^<]*)<\/h4>.*?<h5\s*class="elementor-image-box-title">\s*'
        patron += '([^<]+)<\/h5>\s*(?:<p\s*class="elementor-image-box-description">\s*([^<]+)<\/p>)?'

        if not item.matches:  # si no viene de una pasada anterior, descargamos
            data, success, code, item, itemlist = generictools.downloadpage(
                next_page_url,
                timeout=timeout_search,
                s2=False,
                item=item,
                itemlist=itemlist)  # Descargamos la página)

            # Verificamos si se ha cargado una página correcta
            curr_page += 1  # Apunto ya a la página siguiente
            if not data or not success:  # Si la web está caída salimos sin dar error
                if len(itemlist) > 1:  # Si hay algo que pintar lo pintamos
                    last_page = 0
                    break
                return itemlist  # Si no hay nada más, salimos directamente

        # Comprobar si hay más páginas
        patron_page = '<a\s*class="page-numbers[^"]+"\s*href="([^"]+)">Siguiente'
        if scrapertools.find_single_match(data, patron_page):
            next_page_url = scrapertools.find_single_match(data, patron_page)
        else:
            next_page_url = ''

        if not item.matches:  # De pasada anterior?
            matches = re.compile(patron, re.DOTALL).findall(data)
        else:
            matches = item.matches
            del item.matches

        #logger.debug("PATRON: " + patron)
        #logger.debug(matches)
        #logger.debug(data)

        # Iniciamos el Plot y marcamos TMDB como no usable
        item.contentPlot = '[COLOR gold][B]%s[/B][/COLOR]%s\n\n' % (
            item.title, item.channel_sufix)  #iniciamos el Plot con el Deporte
        item.from_title = '%s - ' % item.title
        item.infoLabels['tmdb_id'] = null

        #Empezamos el procesado de matches
        for scrapedurl, scrapedthumb, scrapedtitle1, scrapedtitle2, date1, date2 in matches:
            #Generamos una copia de Item para trabajar sobre ella
            item_local = item.clone()

            cnt_match += 1

            title = '%s%s: %s %s' % (scrapedtitle1, scrapedtitle2, date1,
                                     date2)
            title = scrapertools.remove_htmltags(title).rstrip(
                '.')  # Removemos Tags del título
            title = title.replace("á", "a").replace("é", "e").replace("í", "i")\
                    .replace("ó", "o").replace("ú", "u").replace("ü", "u")\
                    .replace("ï¿½", "ñ").replace("Ã±", "ñ").replace("&#8217;", "'")\
                    .replace("&amp;", "&")

            cnt_title += 1  # Incrementamos el contador de entradas válidas

            # Procesamos idiomas
            item_local.language = []  #creamos lista para los idiomas
            if not item_local.language:
                item_local.language = ['CAST']  # [CAST] por defecto

            # Procesamos Calidad
            if not item_local.quality:
                item_local.quality = 'HDTV'

            patron_thumb = '\.elementor-element\.elementor-element-%s\s*>.*?url\(([^\)]+)\)' % scrapedthumb
            item_local.thumbnail = scrapertools.find_single_match(
                data, patron_thumb)  #iniciamos thumbnail
            item_local.contentThumbnail = item_local.thumbnail  #iniciamos thumbnail
            item_local.infoLabels[
                'fanart'] = item_local.thumbnail  #iniciamos Fanart

            item_local.url = urlparse.urljoin(host,
                                              scrapedurl)  #iniciamos la url
            item_local.url_tvshow = item_local.url

            # Guardamos los formatos para películas
            item_local.contentType = event
            item_local.action = "episodios"

            year = scrapertools.find_single_match(title, '\d{4}')
            if not year:
                import datetime
                year = datetime.datetime.now().year
            if year:
                item_local.infoLabels['year'] = int(year)
                item_local.infoLabels['aired'] = str(year)

            title = re.sub(
                r'(?i)TV|Online|(4k-hdr)|(fullbluray)|4k| - 4k|(3d)|miniserie|\d{4}',
                '', title).strip()
            item_local.title = title.strip().lower().title()
            item_local.contentTitle = item_local.title
            item_local.from_title += item_local.title

            #Ahora se filtra por idioma, si procede, y se pinta lo que vale
            if filter_languages > 0:  #Si hay idioma seleccionado, se filtra
                itemlist = filtertools.get_link(itemlist, item_local,
                                                list_language)
            else:
                itemlist.append(item_local.clone())  #Si no, pintar pantalla

            cnt_title = len(
                itemlist
            ) - cnt_offset  # Recalculamos los items después del filtrado
            if cnt_title >= cnt_tot and (
                    len(matches) - cnt_match
            ) + cnt_title > cnt_tot * 1.3:  #Contador de líneas añadidas
                break

            #logger.debug(item_local)

        matches = matches[cnt_match:]  # Salvamos la entradas no procesadas

    return itemlist

示例#27

0

显示文件

文件： dascer.py 项目： shlibidon/addon

def episodios(item):
    logger.info()
    
    itemlist = []
    item.url_enlaces = []
    season = 1
    episode = 1
    download_connector = config.get_setting('download_connector', channel, default=0)
    download_connectors = [
                           "Torrent",
                           "Ok",
                           "Mega",
                           "UpStream"
                          ]
    try:
        download_preference = download_connectors[download_connector]
    except:
        download_preference = download_connectors[0]
        logger.error(traceback.format_exc(1))
    
    #logger.debug(item)

    # Descargamos evento
    data, success, code, item, itemlist = generictools.downloadpage(item.url, timeout=timeout,  s2=False, 
                                          item=item, itemlist=[])               # Descargamos la página

    if not success:
        return itemlist

    item.contentPlot = '%s[B]%s %s[/B]\n\n' % (item.contentPlot, item.title, item.infoLabels['aired'])
    
    # Obtenemos el thumb del evento
    patron = '<div class="elementor-image">\s*<img\s*width="[^"]*"\s*height="[^"]*"\s*src="([^"]+)"'
    if scrapertools.find_single_match(data, patron):
        item.thumbnail = scrapertools.find_single_match(data, patron)
        item.contentThumbnail = item.thumbnail
    
    # Localizamos los datos del Evento
    patron = '(?i)<h2\s*class="elementor-heading-title[^>]+>\s*CIRCUITO\s*DE\s*(.*?)\s*<\/h2>'
    if scrapertools.find_single_match(data, patron):                            # Circuito
        item.contentPlot += '[COLOR yellow][B]%s:[/B][/COLOR]\n' % scrapertools.find_single_match(data, patron).capitalize()
    patron = '<tr><th\s*scope="row">(.*?)<\/th><td colspan[^<]+>(.*?)<\/td><\/tr>'  # Detalles del Circuito
    datos_evento = re.compile(patron, re.DOTALL).findall(data)
    for clave, valor in datos_evento:
        if 'Vuelta' not in clave:
            item.contentPlot += '- %s: %s\n' % (clave, valor)
        else:
            patron = '(.*?)(?:<br\s*\/>\s*)?<span.*?<\/span>(.*?)(?:<br\s*\/>\s*)?<span.*?<\/span>(.*?)(?:<br\s*\/>\s*)?$'
            if scrapertools.find_single_match(valor, patron):
                tiempo, deportista, equipo = scrapertools.find_single_match(valor, patron)
                tiempo = scrapertools.remove_htmltags(tiempo)                   # Removemos Tags del título
                item.contentPlot += '- %s: %s, %s, %s\n' % (clave, tiempo, deportista, equipo)

    # Procesamos los eventos
    patron = '<div[^>]+>(?:\s*<p>)?\s*<iframe\s*[^>]+>\s*<\/iframe>(?:\s*<\/p>)?\s*'
    patron += '<p\s*style[^>]+>\s*<a\s*href="([^"]+)"\s*(?:target[^>]+)?>\s*'
    patron +='(?:Descargar \w+)?\s*(.*?)\s*<\/a><\/p><\/div>'                   # Una sola sesión
    if scrapertools.find_single_match(data, patron):
        item_local = item.clone()
        item_local.url_enlaces = [(scrapertools.find_single_match(data, patron))]
        item_local.action = "findvideos"
        item_local.contentType = session
        return findvideos(item_local)

    patron = '<div\s*id="elementor-tab-title[^>]+>.*?<\/span>\s*<a\s*href="[^"]*"\s*(.*?)\s*<\/div>\s*<\/div>'      # Varias sesiones
    eventos = re.compile(patron, re.DOTALL).findall(data)
    
    #logger.debug("PATRON: " + patron)
    #logger.debug(eventos)
    #logger.debug(data)
    
    for actividad in eventos:
        item_local = item.clone()
        item_local.action = "findvideos"
        item_local.contentType = session
        item_local.url_enlaces = []

        patron = 'class="elementor-toggle-title">(.*?)<\/a>'
        item_local.title = scrapertools.find_single_match(actividad, patron)
        item_local.contentTitle = item_local.title
        
        if not item_local.title:
            return itemlist

        # Enlace por defecto de Okru
        patron = '(?:<p>)?<iframe.*?src="([^"]+)"[^<]+><\/iframe>'
        enlace = scrapertools.find_single_match(actividad, patron)
        if enlace and 'ok.ru' in enlace:
            if not enlace.startswith('http'):
                enlace = 'https:' + enlace
            item_local.url_enlaces.append((enlace, 'Okru'))
        
        # Otros enlaces
        patron = '(?:<p>)?<a\s*href="([^"]+)"[^>]*>(?:\s*Descargar\s*\w+)?\s*(.*?)\s*<\/a>'
        enlaces_descargas = re.compile(patron, re.DOTALL).findall(actividad)
        
        for enlace, server in enlaces_descargas:
            if enlace and server:
                if not enlace.startswith('http'):
                    enlace = 'https:' + enlace
                if server == download_preference:
                    item_local.url_enlaces.insert(0, (enlace, server))
                else:
                    item_local.url_enlaces.append((enlace, server))
        
        item_local.contentSeason = season
        item_local.contentEpisodeNumber = episode
        episode += 1
        itemlist.append(item_local.clone())

    return itemlist

示例#28

0

显示文件

def episodios(item):
    logger.info()
    
    itemlist = []
    item.category = categoria
    
    #logger.debug(item)

    if item.from_title:
        item.title = item.from_title
    
    #Limpiamos num. Temporada y Episodio que ha podido quedar por Novedades
    season_display = 0
    if item.contentSeason:
        if item.season_colapse:                                                 #Si viene del menú de Temporadas...
            season_display = item.contentSeason                                 #... salvamos el num de sesión a pintar
            item.from_num_season_colapse = season_display
            del item.season_colapse
            item.contentType = "tvshow"
            if item.from_title_season_colapse:
                item.title = item.from_title_season_colapse
                del item.from_title_season_colapse
                if item.infoLabels['title']:
                    del item.infoLabels['title']
        del item.infoLabels['season']
    if item.contentEpisodeNumber:
        del item.infoLabels['episode']
    if season_display == 0 and item.from_num_season_colapse:
        season_display = item.from_num_season_colapse

    # Obtener la información actualizada de la Serie.  TMDB es imprescindible para Videoteca
    try:
        tmdb.set_infoLabels(item, True, idioma_busqueda='es,en')
    except:
        pass
        
    modo_ultima_temp_alt = modo_ultima_temp
    if item.ow_force == "1":                                                    #Si hay una migración de canal o url, se actualiza todo 
        modo_ultima_temp_alt = False
    
    # Vemos la última temporada de TMDB y del .nfo
    max_temp = 1
    if item.infoLabels['number_of_seasons']:
        max_temp = item.infoLabels['number_of_seasons']
    y = []
    if modo_ultima_temp_alt and item.library_playcounts:                        #Averiguar cuantas temporadas hay en Videoteca
        patron = 'season (\d+)'
        matches = re.compile(patron, re.DOTALL).findall(str(item.library_playcounts))
        for x in matches:
            y += [int(x)]
        max_temp = max(y)

    # Si la series tiene solo una temporada, o se lista solo una temporada, guardamos la url y seguimos normalmente
    list_temp = []
    list_temp.append(item.url)

    # Descarga las páginas
    for url in list_temp:                                                       # Recorre todas las temporadas encontradas
        
        data, success, code, item, itemlist = generictools.downloadpage(url, timeout=timeout, s2=False, 
                                          item=item, itemlist=itemlist)         # Descargamos la página
        
        #Verificamos si se ha cargado una página, y si además tiene la estructura correcta
        if not success:                                                         # Si ERROR o lista de errores ...
            return itemlist                                                     # ... Salimos

        patron = '<tr>(?:\s*<td>[^<]+<\/td>)?\s*<td>([^<]+)<\/td>\s*<td>([^<]+)<\/td>\s*'
        patron += '<td\s*class=[^<]+<\/td>(?:<td>([^<]+)<\/td>)?\s*<td\s*class=[^<]+<\/td>\s*'
        patron += '<td>\s*<a\s*class="[^"]+"\s*(?:data-row="[^"]+"\s*data-post="[^"]+"\s*)?'
        patron += 'href="([^"]+)"\s*(?:data-season="([^"]+)"\s*data-serie="([^"]+)")?'
        
        matches = re.compile(patron, re.DOTALL).findall(data)

        #logger.debug("PATRON: " + patron)
        #logger.debug(matches)
        #logger.debug(data)

        # Recorremos todos los episodios generando un Item local por cada uno en Itemlist
        x = 0
        for scrapedquality, scrapedlanguage, scrapedsize, scrapedurl, season_num, episode_num  in matches:
            item_local = item.clone()
            item_local.action = "findvideos"
            item_local.contentType = "episode"
            if item_local.library_playcounts:
                del item_local.library_playcounts
            if item_local.library_urls:
                del item_local.library_urls
            if item_local.path:
                del item_local.path
            if item_local.update_last:
                del item_local.update_last
            if item_local.update_next:
                del item_local.update_next
            if item_local.channel_host:
                del item_local.channel_host
            if item_local.active:
                del item_local.active
            if item_local.contentTitle:
                del item_local.infoLabels['title']
            if item_local.season_colapse:
                del item_local.season_colapse

            item_local.url = url                                                # Usamos las url de la temporada, no hay de episodio
            item_local.matches = []
            item_local.matches.append(matches[x])                               # Salvado Matches de cada episodio
            x += 1
            item_local.context = "['buscar_trailer']"
            if not item_local.infoLabels['poster_path']:
                item_local.thumbnail = item_local.infoLabels['thumbnail']
            
            # Extraemos números de temporada y episodio
            try:
                item_local.contentSeason = int(season_num)
            except:
                item_local.contentSeason = 1
            try:
                item_local.contentEpisodeNumber = int(episode_num)
            except:
                item_local.contentEpisodeNumber = 0

            item_local.title = '%sx%s - ' % (str(item_local.contentSeason), 
                        str(item_local.contentEpisodeNumber).zfill(2))

            # Procesamos Calidad
            if scrapedquality:
                item_local.quality = scrapertools.remove_htmltags(scrapedquality)   # iniciamos calidad
                if '[720p]' in scrapedquality.lower() or '720p' in scrapedquality.lower():
                    item_local.quality = '720p'
                if '[1080p]' in scrapedquality.lower() or '1080p' in scrapedquality.lower():
                    item_local.quality = '1080p'
                if '4k' in scrapedquality.lower():
                    item_local.quality = '4K'
                if '3d' in scrapedquality.lower() and not '3d' in item_local.quality.lower():
                    item_local.quality += ', 3D'
            if not item_local.quality:
                item_local.quality = '720p'
            
            # Comprobamos si hay más de un enlace por episodio, entonces los agrupamos
            if len(itemlist) > 0 and item_local.contentSeason == itemlist[-1].contentSeason \
                        and item_local.contentEpisodeNumber == itemlist[-1].contentEpisodeNumber \
                        and itemlist[-1].contentEpisodeNumber != 0:             # solo guardamos un episodio ...
                if itemlist[-1].quality:
                    if item_local.quality not in itemlist[-1].quality:
                        itemlist[-1].quality += ", " + item_local.quality       # ... pero acumulamos las calidades
                else:
                    itemlist[-1].quality = item_local.quality
                itemlist[-1].matches.append(item_local.matches[0])              # Salvado Matches en el episodio anterior
                continue                                                        # ignoramos el episodio duplicado
            
            if season_display > 0:                                              # Son de la temporada estos episodios?
                if item_local.contentSeason > season_display:
                    break
                elif item_local.contentSeason < season_display:
                    continue
                    
            if modo_ultima_temp_alt and item.library_playcounts:                # Si solo se actualiza la última temporada de Videoteca
                if item_local.contentSeason < max_temp:
                    continue                                                    # Sale del bucle actual del FOR

            itemlist.append(item_local.clone())

            #logger.debug(item_local)
            
    if len(itemlist) > 1:
        itemlist = sorted(itemlist, key=lambda it: (int(it.contentSeason), int(it.contentEpisodeNumber)))       #clasificamos
        
    if item.season_colapse and not item.add_videolibrary:                       #Si viene de listado, mostramos solo Temporadas
        item, itemlist = generictools.post_tmdb_seasons(item, itemlist)

    if not item.season_colapse:                                                 #Si no es pantalla de Temporadas, pintamos todo
        # Pasada por TMDB y clasificación de lista por temporada y episodio
        tmdb.set_infoLabels(itemlist, True, idioma_busqueda='es,en')

        #Llamamos al método para el maquillaje de los títulos obtenidos desde TMDB
        item, itemlist = generictools.post_tmdb_episodios(item, itemlist)
    
    #logger.debug(item)

    return itemlist