Python pathify_url примеры, salts_lib.scraper_utils.pathify_url Python примеры использования

Пример #1

0

Показать файл

Файл: 2ddl_scraper.py Проект: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         html = self._http_get(page_url[0], require_debrid=True, cache_limit=1)
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = dom_parser2.parse_dom(post, 'a', req='href')
                 if match:
                     url, title = match[0].attrs['href'], match[0].content
                     if not force_title:
                         if scraper_utils.release_check(video, title, require_title=False):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
         if page_url: page_url = [page_url[0].attrs['href']]

Пример #2

0

Показать файл

Файл: 2ddl_scraper.py Проект: CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = '/search/' + urllib.quote_plus(title)
     html = self._http_get(search_url, require_debrid=True, cache_limit=1)
     if video_type == VIDEO_TYPES.TVSHOW:
         seen_urls = {}
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if CATEGORIES[video_type] not in post: continue
             match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
             if match:
                 show_url, match_title = match.groups()
                 if show_url in seen_urls: continue
                 result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                 seen_urls[show_url] = result
                 results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         norm_title = scraper_utils.normalize_title(title)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if CATEGORIES[video_type] not in post or self.__too_old(post): continue
             post_url, post_title = heading
             meta = scraper_utils.parse_movie_link(post_title)
             full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
             match_year = meta['year']
             
             match_norm_title = scraper_utils.normalize_title(meta['title'])
             if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                 result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                 results.append(result)
         
     return results

Пример #3

0

Показать файл

Файл: quikr_scraper.py Проект: EPiC-APOC/repository.xvbmc

    def _get_episode_url(self, show_url, video):
        episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
        result = self._default_get_episode_url(show_url, video, episode_pattern)
        if result:
            return result

        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        fragment = dom_parser.parse_dom(html, "ul", {"class": "episode_list"})
        if fragment:
            ep_urls = dom_parser.parse_dom(fragment[0], "a", ret="href")
            ep_dates = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_air_d"})
            ep_titles = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_name"})
            force_title = scraper_utils.force_title(video)
            if not force_title and kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate:
                for ep_url, ep_date in zip(ep_urls, ep_dates):
                    log_utils.log(
                        "Quikr Ep Airdate Matching: %s - %s - %s" % (ep_url, ep_date, video.ep_airdate),
                        log_utils.LOGDEBUG,
                    )
                    if video.ep_airdate == scraper_utils.to_datetime(ep_date, "%Y-%m-%d").date():
                        return scraper_utils.pathify_url(ep_url)

            if force_title or kodi.get_setting("title-fallback") == "true":
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for ep_url, ep_title in zip(ep_urls, ep_titles):
                    ep_title = re.sub("<span>.*?</span>\s*", "", ep_title)
                    log_utils.log(
                        "Quikr Ep Title Matching: %s - %s - %s" % (ep_url, norm_title, video.ep_title),
                        log_utils.LOGDEBUG,
                    )
                    if norm_title == scraper_utils.normalize_title(ep_title):
                        return scraper_utils.pathify_url(ep_url)

Пример #4

0

Показать файл

Файл: farda_scraper.py Проект: henry73/salts

    def search(self, video_type, title, year, season=""):
        results = []
        norm_title = scraper_utils.normalize_title(title)
        if video_type == VIDEO_TYPES.MOVIE:
            if year:
                base_url = urlparse.urljoin(self.base_url, "/Film/")
                html = self._http_get(base_url, cache_limit=48)
                for link in self.__parse_directory(html):
                    if year == link["title"]:
                        url = urlparse.urljoin(base_url, link["link"])
                        for movie in self.__get_files(url, cache_limit=24):
                            match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie["link"])
                            if (
                                not movie["directory"]
                                and norm_title in scraper_utils.normalize_title(match_title)
                                and (not year or not match_year or year == match_year)
                            ):
                                result = {"url": scraper_utils.pathify_url(url), "title": match_title, "year": year}
                                results.append(result)
        else:
            base_url = urlparse.urljoin(self.base_url, "/Serial/")
            html = self._http_get(base_url, cache_limit=48)
            for link in self.__parse_directory(html):
                if link["directory"] and norm_title in scraper_utils.normalize_title(link["title"]):
                    url = urlparse.urljoin(base_url, link["link"])
                    result = {"url": scraper_utils.pathify_url(url), "title": link["title"], "year": ""}
                    results.append(result)

        return results

Пример #5

0

Показать файл

Файл: 2ddl_scraper.py Проект: EPiC-APOC/repository.xvbmc

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for post in posts:
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
                 if match:
                     url, title = match.groups()
                     if not force_title:
                         if scraper_utils.release_check(video, title, require_title=False):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

Пример #6

0

Показать файл

Файл: ddlvalley_scraper.py Проект: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = scraper_utils.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 url, title = heading
                 if not force_title:
                     if scraper_utils.release_check(video, title, require_title=False):
                         return scraper_utils.pathify_url(url)
                 else:
                     if title_fallback and norm_title:
                         match = re.search('<strong>(.*?)</strong>', post)
                         if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                             return scraper_utils.pathify_url(url)
             
         page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
         if page_url: page_url = [page_url[0].attrs['href']]

Пример #7

0

Показать файл

Файл: myddl_scraper.py Проект: CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     if video_type == VIDEO_TYPES.TVSHOW and title:
         test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title))
         test_url = scraper_utils.urljoin(self.base_url, test_url)
         html = self._http_get(test_url, require_debrid=True, cache_limit=24)
         posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
         if posts:
             result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''}
             results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
         html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1)
         norm_title = scraper_utils.normalize_title(title)
         for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
             if match:
                 post_url, post_title = match.groups()
                 if '/tv-show/' in post or self.__too_old(post): continue
                 post_title = re.sub('<[^>]*>', '', post_title)
                 meta = scraper_utils.parse_movie_link(post_title)
                 full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
                 match_year = meta['year']
                 
                 match_norm_title = scraper_utils.normalize_title(meta['title'])
                 if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                     results.append(result)
         
     return results

Пример #8

0

Показать файл

Файл: tvrush_scraper.py Проект: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        if title:
            html = self._http_get(self.base_url, cache_limit=48)
            norm_title = scraper_utils.normalize_title(title)
            fragment = dom_parser2.parse_dom(html, 'div', {'class': 'container seo'})
            if fragment:
                match_year = ''
                for attrs, match_title in dom_parser2.parse_dom(fragment[0].content, 'a', {'class': 'link'}, req='href'):
                    if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                        result = {'url': scraper_utils.pathify_url(attrs['href']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                        results.append(result)
                    
            for _attrs, table in dom_parser2.parse_dom(html, 'table'):
                for _attrs, td in dom_parser2.parse_dom(table, 'td'):
                    match_url = dom_parser2.parse_dom(td, 'a', req='href')
                    match_title = dom_parser2.parse_dom(td, 'div', {'class': 'searchTVname'})
                    match_year = dom_parser2.parse_dom(td, 'span', {'class': 'right'})
                    if match_url and match_title:
                        match_url = match_url[0].attrs['href']
                        match_title = match_title[0].content
                        match_year = match_year[0].content if match_year else ''
                    
                        if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                            result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                            results.append(result)

        return results

Пример #9

0

Показать файл

Файл: watchepisodes_scraper.py Проект: monicarero/repository.xvbmc

    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        if html:
            force_title = scraper_utils.force_title(video)
            episodes = dom_parser.parse_dom(html, 'div', {'class': '\s*el-item\s*'})
            if not force_title:
                episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode))
                match = re.search(episode_pattern, html)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
                
                if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                    airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year)
                    for episode in episodes:
                        ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                        ep_airdate = dom_parser.parse_dom(episode, 'div', {'class': 'date'})
                        if ep_url and ep_airdate:
                            ep_airdate = ep_airdate[0].strip()
                            if airdate_pattern == ep_airdate:
                                return scraper_utils.pathify_url(ep_url[0])

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for episode in episodes:
                    ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                    ep_title = dom_parser.parse_dom(episode, 'div', {'class': 'e-name'})
                    if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0]):
                        return scraper_utils.pathify_url(ep_url[0])

Пример #10

0

Показать файл

Файл: ororotv_scraper.py Проект: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     query = scraper_utils.parse_query(show_url)
     if 'id' in query:
         url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id']))
         js_data = self._http_get(url, cache_limit=.5)
         if 'episodes' in js_data:
             force_title = scraper_utils.force_title(video)
             if not force_title:
                 for episode in js_data['episodes']:
                     if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))
                 
                 if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                     for episode in js_data['episodes']:
                         if 'airdate' in episode:
                             ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date()
                             if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)):
                                 return scraper_utils.pathify_url('?id=%s' % (episode['id']))
             else:
                 logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)
             
             if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                 norm_title = scraper_utils.normalize_title(video.ep_title)
                 for episode in js_data['episodes']:
                     if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))

Пример #11

0

Показать файл

Файл: scraper.py Проект: CYBERxNUKE/xbmc-addon

    def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''):
        logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG)
        if not html: return
        
        try: html = html[0].content
        except AttributeError: pass
        force_title = scraper_utils.force_title(video)
        if not force_title:
            if episode_pattern:
                match = re.search(episode_pattern, html, re.DOTALL | re.I)
                if match:
                    return scraper_utils.pathify_url(match.group(1))

            if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate:
                airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year))
                airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month))
                airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month))
                airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1])
                airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1])
                airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day))
                airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day))
                logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG)

                match = re.search(airdate_pattern, html, re.DOTALL | re.I)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
        else:
            logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)

        if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern:
            norm_title = scraper_utils.normalize_title(video.ep_title)
            for match in re.finditer(title_pattern, html, re.DOTALL | re.I):
                episode = match.groupdict()
                if norm_title == scraper_utils.normalize_title(episode['title']):
                    return scraper_utils.pathify_url(episode['url'])

Пример #12

0

Показать файл

Файл: farda_scraper.py Проект: azumimuo/family-xbmc-addon

 def search(self, video_type, title, year):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     if video_type == VIDEO_TYPES.MOVIE:
         if year:
             base_url = urlparse.urljoin(self.base_url, '/Film/')
             html = self._http_get(base_url, cache_limit=48)
             for link in self.__parse_directory(html):
                 if year == link['title']:
                     url = urlparse.urljoin(base_url, link['link'])
                     for movie in self.__get_files(url, cache_limit=24):
                         match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link'])
                         if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                             result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year}
                             results.append(result)
     else:
         base_url = urlparse.urljoin(self.base_url, '/Serial/')
         html = self._http_get(base_url, cache_limit=48)
         for link in self.__parse_directory(html):
             if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']):
                 url = urlparse.urljoin(base_url, link['link'])
                 result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''}
                 results.append(result)
         
     return results

Пример #13

0

Показать файл

Файл: 2ddl_scraper.py Проект: freeworldxbmc/KAOSbox-Repo

 def _get_episode_url(self, show_url, video):
     sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode))
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )')
     except: airdate_pattern = ''
     
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for post in posts:
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
                 if match:
                     url, title = match.groups()
                     if not force_title:
                         if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

Пример #14

0

Показать файл

Файл: ddlvalley_scraper.py Проект: monicarero/repository.xvbmc

 def _get_episode_url(self, show_url, video):
     sxe = '.S%02dE%02d.' % (int(video.season), int(video.episode))
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     try: ep_airdate = video.ep_airdate.strftime('.%Y.%m.%d.')
     except: ep_airdate = ''
     
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for heading, post in zip(headings, posts):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 url, title = heading
                 if not force_title:
                     if (sxe in title) or (ep_airdate and ep_airdate in title):
                         return scraper_utils.pathify_url(url)
                 else:
                     if title_fallback and norm_title:
                         match = re.search('<strong>(.*?)</strong>', post)
                         if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                             return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

Пример #15

0

Показать файл

Файл: quikr_scraper.py Проект: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=2)
     episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
     parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'})
     fragment = '\n'.join(part.content for part in parts)
     result = self._default_get_episode_url(fragment, video, episode_pattern)
     if result: return result
     
     ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')]
     ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})]
     ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})]
     force_title = scraper_utils.force_title(video)
     if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
         for ep_url, ep_date in zip(ep_urls, ep_dates):
             logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG)
             if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date():
                 return scraper_utils.pathify_url(ep_url)
 
     if force_title or kodi.get_setting('title-fallback') == 'true':
         norm_title = scraper_utils.normalize_title(video.ep_title)
         for ep_url, ep_title in zip(ep_urls, ep_titles):
             ep_title = re.sub('<span>.*?</span>\s*', '', ep_title)
             logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG)
             if norm_title == scraper_utils.normalize_title(ep_title):
                 return scraper_utils.pathify_url(ep_url)

Пример #16

0

Показать файл

Файл: hdmovie14_scraper.py Проект: EPiC-APOC/repository.xvbmc

 def _get_episode_url(self, season_url, video):
     url = urlparse.urljoin(self.base_url, season_url)
     html = self._http_get(url, cache_limit=2)
     if int(video.episode) == 1:
         return scraper_utils.pathify_url(url)
     else:
         pattern = 'location\.href=&quot;([^&]*season-%s[^/]*/%s)&quot;' % (video.season, video.episode)
         match = re.search(pattern, html)
         if match:
             return scraper_utils.pathify_url(match.group(1))

Пример #17

0

Показать файл

Файл: hdmovie14_scraper.py Проект: c0ns0le/YCBuilds

 def _get_episode_url(self, show_url, video):
     season_url = show_url + '-season-%s/' % (video.season)
     url = urlparse.urljoin(self.base_url, season_url)
     html = self._http_get(url, allow_redirect=False, cache_limit=.5)
     if html != '/':
         if int(video.episode) == 1:
             return scraper_utils.pathify_url(url)
         else:
             pattern = 'location\.href=&quot;([^&]*season-%s/%s)&quot;' % (video.season, video.episode)
             match = re.search(pattern, html)
             if match:
                 return scraper_utils.pathify_url(match.group(1))

Пример #18

0

Показать файл

Файл: ororotv_scraper.py Проект: kevintone/tdbaddon

    def search(self, video_type, title, year, season=''):
        url = urlparse.urljoin(self.base_url, 'http://ororo.tv/en')
        if video_type == VIDEO_TYPES.MOVIE:
            url += '/movies'
        html = self._http_get(url, cache_limit=.25)
        results = []
        norm_title = scraper_utils.normalize_title(title)
        include_paid = kodi.get_setting('%s-include_premium' %
                                        (self.get_name())) == 'true'
        for match in re.finditer(
                '''<span class='value'>(\d{4})(.*?)href="([^"]+)[^>]+>([^<]+)''',
                html, re.DOTALL):
            match_year, middle, url, match_title = match.groups()
            if not include_paid and video_type == VIDEO_TYPES.MOVIE and 'paid accounts' in middle:
                continue

            if norm_title in scraper_utils.normalize_title(match_title) and (
                    not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Пример #19

0

Показать файл

Файл: uflix_scraper.py Проект: assli100/kodi-openelec

 def search(self, video_type, title, year):
     search_url = urlparse.urljoin(self.base_url, '/index.php?menu=search&query=')
     search_url += urllib.quote_plus(title)
     html = self._http_get(search_url, cache_limit=.25)
     results = []
     sections = {VIDEO_TYPES.MOVIE: 'movies', VIDEO_TYPES.TVSHOW: 'series'}
     
     fragment = dom_parser.parse_dom(html, 'div', {'id': sections[video_type]})
     if fragment:
         for item in dom_parser.parse_dom(fragment[0], 'figcaption'):
             match = re.search('title="([^"]+)[^>]+href="([^"]+)', item)
             if match:
                 match_title_year, url = match.groups()
                 match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
                 if match:
                     match_title, match_year = match.groups()
                 else:
                     match_title = match_title_year
                     match_year = ''
                 if match_title.startswith('Watch '): match_title = match_title.replace('Watch ', '')
                 if match_title.endswith(' Online'): match_title = match_title.replace(' Online', '')
                 
                 if not year or not match_year or year == match_year:
                     result = {'title': match_title, 'url': scraper_utils.pathify_url(url), 'year': match_year}
                     results.append(result)
     return results

Пример #20

0

Показать файл

Файл: movie4k_scraper.py Проект: ScriptUp/salts

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/movies.php?list=search&search=')
        search_url += urllib.quote_plus(title)
        cookies = {'onlylanguage': 'en', 'lang': 'en'}
        html = self._http_get(search_url, cookies=cookies, cache_limit=.25)
        results = []
        pattern = 'id="tdmovies">\s*<a\s+href="([^"]+)">([^<]+).*?id="f7">(.*?)</TD>'
        for match in re.finditer(pattern, html, re.DOTALL):
            url, title, extra = match.groups('')
            if (video_type == VIDEO_TYPES.MOVIE and '(TVshow)' in title) or (video_type == VIDEO_TYPES.TVSHOW and '(TVshow)' not in title):
                continue

            title = title.replace('(TVshow)', '')
            title = title.strip()

            r = re.search('>(\d{4})<', extra)
            if r:
                match_year = r.group(1)
            else:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': match_year}
                results.append(result)
        return results

Пример #21

0

Показать файл

Файл: putmv_scraper.py Проект: kevintone/tdbaddon

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/search/%s.html' % urllib.quote_plus(title))
     html = self._http_get(search_url, cache_limit=.25)
     results = []
     fragment = dom_parser.parse_dom(html, 'div', {'class': 'list-movie'})
     if fragment:
         for item in dom_parser.parse_dom(fragment[0], 'div', {'class': 'movie'}):
             match = re.search('class="movie-name".*?href="([^"]+)[^>]+>([^<]+)', item)
             if match:
                 url, match_title = match.groups()
                 is_season = re.search('\s+-\s+[Ss](\d+)$', match_title)
                 if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
                     match_year = ''
                     if video_type == VIDEO_TYPES.MOVIE:
                         for info_frag in dom_parser.parse_dom(item, 'p', {'class': 'info'}):
                             match = re.search('(\d{4})', info_frag)
                             if match:
                                 match_year = match.group(1)
                                 break
                         
                         if not match_year:
                             match = re.search('(\d{4})$', url)
                             if match:
                                 match_year = match.group(1)
                     else:
                         if season and int(is_season.group(1)) != int(season):
                             continue
                             
                     if (not year or not match_year or year == match_year):
                         result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                         results.append(result)
     
     return results

Пример #22

0

Показать файл

Файл: solar_scraper.py Проект: henry73/salts

    def search(self, video_type, title, year, season=''):
        if video_type == VIDEO_TYPES.MOVIE:
            is_series = 1
        else:
            is_series = 2
        search_url = urlparse.urljoin(
            self.base_url,
            '/advanced-search/?q[title]=%s&q[is_series]=%s&q[year_from]=%s&q[year_to]=%s'
        )
        search_url = search_url % (urllib.quote_plus(title), is_series, year,
                                   year)

        results = []
        html = self._http_get(search_url, cache_limit=.25)
        if not re.search('Nothing was found', html):
            for match in re.finditer(
                    'class="name">\s*<a\s+title="([^"]+)\s+\((\d{4})\)"\s+href="([^"]+)',
                    html):
                title, year, url = match.groups('')
                if re.search('/season-\d+/episode-\d+', url):
                    continue  # exclude episodes
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': title,
                    'year': year
                }
                results.append(result)
        return results

Пример #23

0

Показать файл

Файл: clickplay_scraper.py Проект: AMOboxTV/AMOBox.LegoBuild

    def search(self, video_type, title, year, season=''):
        url = urlparse.urljoin(self.base_url, '/tv-series-a-z-list')
        html = self._http_get(url, cache_limit=8)

        results = []
        pattern = '<li>\s*<a.*?href="([^"]+)[^>]*>([^<]+)'
        norm_title = scraper_utils.normalize_title(title)
        for match in re.finditer(pattern, html, re.DOTALL):
            url, match_title_year = match.groups()
            r = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
            if r:
                match_title, match_year = r.groups()
            else:
                match_title = match_title_year
                match_year = ''

            if norm_title in scraper_utils.normalize_title(match_title) and (
                    not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Пример #24

0

Показать файл

Файл: dizifilmhd_scraper.py Проект: kevintone/tdbaddon

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/?s=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=8)
        title_strip = [word.decode('utf-8') for word in TITLE_STRIP]
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match_url = re.search('href="([^"]+)', item)
            match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'})
            if match_url and match_title:
                item_type = dom_parser.parse_dom(item, 'span', {'class': 'calidad2'})
                if item_type and item_type[0] in SEARCH_EXCLUDE: continue
                match_url = match_url.group(1)
                match_title = match_title[0]
                if 'SEZON' in match_title.upper(): continue

                year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'})
                if year_frag:
                    match_year = year_frag[0]
                else:
                    match_year = ''
                        
                match_title = ' '.join([word for word in match_title.split() if word.upper() not in title_strip])
                if (not year or not match_year or year == match_year):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        
        return results

Пример #25

0

Показать файл

Файл: onlinemoviespro_scraper.py Проект: normico21/repository.xvbmc

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=1)
        if re.search('Sorry, but nothing matched', html, re.I): return results

        fragment = dom_parser2.parse_dom(html, 'ul',
                                         {'class': 'listing-videos'})
        if not fragment: return results

        for attrs, match_title_year in dom_parser2.parse_dom(
                fragment[0].content, 'a', req='href'):
            match_url = attrs['href']
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Пример #26

0

Показать файл

    def search(self, video_type, title, year):
        search_url = urlparse.urljoin(self.base_url, '/?s=')
        search_url += urllib.quote_plus('%s %s' % (title, year))
        html = self._http_get(search_url, cache_limit=.25)
        results = []
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match = re.search('href="([^"]+).*?alt="([^"]+)', item, re.DOTALL)
            if match:
                url, match_title_year = match.groups()
                match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)',
                                  match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    year_fragment = dom_parser.parse_dom(
                        item, 'span', {'class': 'year'})
                    if year_fragment:
                        match_year = year_fragment[0]
                    else:
                        match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': match_title,
                        'year': match_year
                    }
                    results.append(result)
        return results

Пример #27

0

Показать файл

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(
            self.base_url, '/results?q=%s' % urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=.25)
        results = []
        for result in dom_parser.parse_dom(html, 'div', {'class': 'cell'}):
            match = re.search(
                'class="video_title".*?href="([^"]+)"[^>]*>\s*([^<]+)', result,
                re.DOTALL)
            if match:
                url, match_title_year = match.groups()
                match = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match = re.search(
                        'class="video_quality".*?Year\s*(?:</b>)?\s*:\s*(\d{4})',
                        result, re.DOTALL)
                    if match:
                        match_year = match.group(1)
                    else:
                        match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': match_title,
                        'year': match_year
                    }
                    results.append(result)
        return results

Пример #28

0

Показать файл

Файл: mintmovies_scraper.py Проект: kevintone/tdbaddon

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/?s=')
        search_url += urllib.quote_plus('%s %s' % (title, year))
        html = self._http_get(search_url, cache_limit=.25)
        results = []
        if not re.search('Sorry, but nothing matched', html):
            norm_title = scraper_utils.normalize_title(title)
            for item in dom_parser.parse_dom(
                    html, 'li', {'class': '[^"]*box-shadow[^"]*'}):
                match = re.search('href="([^"]+)"\s+title="([^"]+)', item)
                if match:
                    url, match_title_year = match.groups()
                    if re.search('S\d{2}E\d{2}', match_title_year):
                        continue  # skip episodes
                    if re.search('TV\s*SERIES', match_title_year, re.I):
                        continue  # skip shows
                    match = re.search('(.*?)\s+\(?(\d{4})\)?',
                                      match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                        match_year = ''

                    if (not year or not match_year or year == match_year
                        ) and norm_title in scraper_utils.normalize_title(
                            match_title):
                        result = {
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year,
                            'url': scraper_utils.pathify_url(url)
                        }
                        results.append(result)

        return results

Пример #29

0

Показать файл

Файл: movietube_scraper.py Проект: normico21/repository.xvbmc

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/index.php')
        data = {'subaction': 'search', 'story': title, 'do': 'search'}
        headers = {'Referer': search_url}
        html = self._http_get(search_url,
                              params={'do': 'search'},
                              data=data,
                              headers=headers,
                              cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'dle-content'})
        if not fragment: return results

        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'div',
                                                  {'class': 'short-film'}):
            match = re.search('<h5><a\s+href="([^"]+)[^>]+title="([^"]+)',
                              item)
            if not match: continue

            url, match_title = match.groups('')
            result = {
                'url': scraper_utils.pathify_url(url),
                'title': scraper_utils.cleanse_title(match_title),
                'year': ''
            }
            results.append(result)

        return results

Пример #30

0

Показать файл

Файл: moviehut_scraper.py Проект: AMOboxTV/AMOBox.LegoBuild

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url,
                                      '/bestmatch-search-%s.html')
        search_title = title.replace(' ', '-')
        search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower()
        search_url = search_url % (search_title)
        html = self._http_get(search_url, cache_limit=1)
        for item in dom_parser.parse_dom(html, 'div',
                                         {'class': 'thumbsTitle'}):
            match = re.search('href="([^"]+)[^>]*>(.*?)</a>', item)
            if match:
                url, match_title_year = match.groups('')
                match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''

                if (not year or not match_year or year == match_year):
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Пример #31

0

Показать файл

    def search(self, video_type, title, year):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/?s=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        fragment = dom_parser.parse_dom(html, 'ul',
                                        {'class': '[^"]*listing-videos[^"]*'})
        if fragment:
            for match in re.finditer('href="([^"]+)[^>]*>(.*?)</a>',
                                     fragment[0]):
                url, match_title_year = match.groups('')
                match_title_year = re.sub('<span>|</span>', '',
                                          match_title_year)
                if re.search('S\d{2}E\d{2}', match_title_year):
                    continue  # skip episodes
                match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''
                match_title = match_title.replace('&#8211;', '-')
                match_title = match_title.replace('&#8217;', "'")

                if (not year or not match_year or year == match_year):
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': match_title,
                        'year': match_year
                    }
                    results.append(result)

        return results

Пример #32

0

Показать файл

Файл: miradetodo_scraper.py Проект: kevintone/tdbaddon

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/?s=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        results = []
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match = re.search('href="([^"]+)', item)
            match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'})
            year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'})
            if match and match_title:
                url = match.group(1)
                match_title = match_title[0]
                if re.search('\d+\s*x\s*\d+', match_title):
                    continue  # exclude episodes
                match = re.search('(.*?)\s+\((\d{4})\)', match_title)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title
                    match_year = ''

                if year_frag:
                    match_year = year_frag[0]

                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(url)
                    }
                    results.append(result)

        return results

Пример #33

0

Показать файл

    def search(self, video_type, title, year):
        search_url = urlparse.urljoin(self.base_url, '/movie/search/')
        search_url += title
        html = self._http_get(search_url, cache_limit=1)
        results = []
        for item in dom_parser.parse_dom(html, 'div', {'class': 'ml-item'}):
            match_title = dom_parser.parse_dom(item, 'span',
                                               {'class': 'mli-info'})
            match_url = re.search('href="([^"]+)', item, re.DOTALL)
            match_year = re.search('class="jt-info">(\d{4})<', item)
            is_episodes = dom_parser.parse_dom(item, 'span',
                                               {'class': 'mli-eps'})

            if match_title and match_url and not is_episodes:
                match_title = match_title[0]
                match_title = re.sub('</?h2>', '', match_title)
                match_title = re.sub('\s+\d{4}$', '', match_title)
                url = urlparse.urljoin(match_url.group(1), 'watching.html')
                match_year = match_year.group(1) if match_year else ''

                if not year or not match_year or year == match_year:
                    result = {
                        'title': match_title,
                        'year': match_year,
                        'url': scraper_utils.pathify_url(url)
                    }
                    results.append(result)

        return results

Пример #34

0

Показать файл

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url,
                                      '/index.php?search_keywords=')
        search_url += urllib.quote_plus(title)
        search_url += '&year=' + urllib.quote_plus(str(year))
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            search_url += '&search_section=2'
        else:
            search_url += '&search_section=1'

        results = []
        html = self._http_get(self.base_url, cache_limit=0)
        match = re.search('input type="hidden" name="key" value="([0-9a-f]*)"',
                          html)
        if match:
            key = match.group(1)
            search_url += '&key=' + key

            html = self._http_get(search_url, cache_limit=.25)
            pattern = r'class="index_item.+?href="(.+?)" title="Watch (.+?)"?\(?([0-9]{4})?\)?"?>'
            for match in re.finditer(pattern, html):
                url, title, year = match.groups('')
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(title),
                    'year': year
                }
                results.append(result)
        else:
            log_utils.log('Unable to locate PW search key',
                          log_utils.LOGWARNING)
        return results

Пример #35

0

Показать файл

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/ajax/search.php')
        timestamp = int(time.time() * 1000)
        query = {
            'q': title,
            'limit': '100',
            'timestamp': timestamp,
            'verifiedCheck': ''
        }
        html = self._http_get(search_url,
                              data=query,
                              headers=XHR,
                              cache_limit=1)
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            media_type = 'TV SHOW'
        else:
            media_type = 'MOVIE'

        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            if item['meta'].upper().startswith(media_type):
                result = {
                    'title': scraper_utils.cleanse_title(item['title']),
                    'url': scraper_utils.pathify_url(item['permalink']),
                    'year': ''
                }
                results.append(result)

        return results

Пример #36

0

Показать файл

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(
            self.base_url, '/search?keyword=%s' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        results = []
        match_year = ''
        fragment = dom_parser.parse_dom(html, 'div',
                                        {'class': '[^"]*movie-list[^"]*'})
        if fragment:
            for item in dom_parser.parse_dom(fragment[0], 'div',
                                             {'class': 'item'}):
                links = dom_parser.parse_dom(item,
                                             'a', {'class': 'name'},
                                             ret='href')
                titles = dom_parser.parse_dom(item, 'a', {'class': 'name'})
                is_season = dom_parser.parse_dom(item, 'div',
                                                 {'class': 'status'})
                for match_url, match_title in zip(links, titles):
                    if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                            is_season and video_type == VIDEO_TYPES.SEASON):
                        if video_type == VIDEO_TYPES.SEASON:
                            if season and not re.search(
                                    '\s+%s$' % (season), match_title):
                                continue

                        if not year or not match_year or year == match_year:
                            result = {
                                'title':
                                scraper_utils.cleanse_title(match_title),
                                'year': '',
                                'url': scraper_utils.pathify_url(match_url)
                            }
                            results.append(result)

        return results

Пример #37

0

Показать файл

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/search-movies/%s.html')
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        results = []
        for thumb in dom_parser.parse_dom(html, 'div', {'class': 'thumb'}):
            match_title = dom_parser.parse_dom(thumb,
                                               'a', {'class': 'clip-link'},
                                               ret='title')
            url = dom_parser.parse_dom(thumb,
                                       'a', {'class': 'clip-link'},
                                       ret='href')
            if match_title and url:
                match_title, url = match_title[0], url[0]
                is_season = re.search('Season\s+(\d+)$', match_title, re.I)
                if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
                    match_year = ''
                    if video_type == VIDEO_TYPES.MOVIE:
                        match_year = dom_parser.parse_dom(
                            thumb, 'div', {'class': '[^"]*status-year[^"]*'})
                        if match_year:
                            match_year = match_year[0]
                    else:
                        if season and int(is_season.group(1)) != int(season):
                            continue

                    if not year or not match_year or year == match_year:
                        result = {
                            'url': scraper_utils.pathify_url(url),
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)
        return results

Пример #38

0

Показать файл

Файл: firemovies_scraper.py Проект: henry73/salts

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url,
                                      '/?s=%s' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        results = []
        match_year = ''
        for entry in dom_parser.parse_dom(html, 'header',
                                          {'class': 'entry-header'}):
            match = re.search('href="([^"]+)[^>]+>([^<]+)', entry)
            if match:
                match_url, match_title_year = match.groups()
                match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'title': match_title,
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Пример #39

0

Показать файл

    def search(self, video_type, title, year, season=''):
        results = []
        url = urlparse.urljoin(self.base_url, '/search.html')
        data = {'search': title}
        headers = {'Referer': self.base_url}
        html = self._http_get(url, data=data, headers=headers, cache_limit=2)
        if video_type == VIDEO_TYPES.MOVIE:
            query_type = 'watch-movie-'
        else:
            query_type = 'watch-tvshow-'

        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'a', {'href': '#'}):
            match = re.search('href="(%s[^"]+)' % (query_type), item)
            if match:
                link = match.group(1)
                match_title = self.__make_title(link, query_type)
                match_year = ''
                if norm_title in scraper_utils.normalize_title(
                        match_title) and (not year or not match_year
                                          or int(year) == int(match_year)):
                    result = {
                        'url': scraper_utils.pathify_url(link),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Пример #40

0

Показать файл

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL)
     referer = scraper_utils.urljoin(self.base_url, '/search/?q=%s')
     referer = referer % (urllib.quote_plus(title))
     headers = {'Referer': referer}
     headers.update(XHR)
     params = {
         'searchTerm': title,
         'type': SEARCH_TYPES[video_type],
         'limit': 500
     }
     html = self._http_get(search_url,
                           params=params,
                           headers=headers,
                           auth=False,
                           cache_limit=2)
     js_data = scraper_utils.parse_json(html, search_url)
     if 'results' in js_data:
         for result in js_data['results']:
             match_year = str(result.get('year', ''))
             match_url = result.get('permalink', '')
             match_title = result.get('title', '')
             if not year or not match_year or year == match_year:
                 result = {
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year,
                     'url': scraper_utils.pathify_url(match_url)
                 }
                 results.append(result)
     return results

Пример #41

0

Показать файл

    def search(self, video_type, title, year, season=''):
        if not self.include_paid and video_type != VIDEO_TYPES.MOVIE: return []
        search_url = urlparse.urljoin(self.base_url, '/search.php?q=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=.25)
        results = []
        if video_type == VIDEO_TYPES.MOVIE:
            pattern = '<i>\s*Movies\s*</i>(.*)'
        else:
            pattern = '<i>\s*TV Series\s*</i>(.*)'

        match = re.search(pattern, html)
        if match:
            container = match.group(1)
            pattern = "href='([^']+)'>([^<]+)\s*</a>\s*(?:\((\d{4})\))?"
            for match in re.finditer(pattern, container):
                url, match_title, match_year = match.groups('')
                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Пример #42

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/suggest.php')
        headers = {'Referer': self.base_url}
        headers.update(XHR)
        params = {'ajax': 1, 's': title, 'type': 'TVShows'}
        html = self._http_get(search_url, params=params, cache_limit=8)
        for attrs, match_title in dom_parser2.parse_dom(html, 'a', req='href'):
            match_url = attrs['href']
            match_title = re.sub('</?[^>]*>', '', match_title)
            match = re.search('\((\d{4})\)$', match_url)
            if match:
                match_year = match.group(1)
            else:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Пример #43

0

Показать файл

Файл: sezonlukdizi_scraper.py Проект: c0ns0le/YCBuilds

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, SEARCH_URL)
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=8)
        fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*items[^"]*'})
        if fragment:
            for item in dom_parser.parse_dom(fragment[0], 'div', {'class': 'item'}):
                match_url = dom_parser.parse_dom(item, 'a', {'class': 'header'}, ret='href')
                match_title_year = dom_parser.parse_dom(item, 'a', {'class': 'header'})
                if match_url and match_title_year:
                    match_url = match_url[0]
                    match_title_year = match_title_year[0]
                    r = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
                    if r:
                        match_title, match_year = r.groups()
                    else:
                        match_title = match_title_year
                        match_year = ''
                    
                    if not year or not match_year or year == match_year:
                        result = {'url': scraper_utils.pathify_url(match_url), 'title': match_title, 'year': match_year}
                        results.append(result)

        return results

Пример #44

0

Показать файл

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url,
                                      '/?s=%s' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=8)
        for movie in dom_parser.parse_dom(html, 'div', {'class': 'movie'}):
            match = re.search('href="([^"]+)', movie)
            if match:
                match_url = match.group(1)
                match_title_year = dom_parser.parse_dom(movie,
                                                        'img',
                                                        ret='alt')
                if match_title_year:
                    match_title_year = match_title_year[0]
                    match = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                        match_year = dom_parser.parse_dom(
                            movie, 'div', {'class': 'year'})
                        try:
                            match_year = match_year[0]
                        except:
                            match_year = ''

                    if not year or not match_year or year == match_year:
                        result = {
                            'url': scraper_utils.pathify_url(match_url),
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

Пример #45

0

Показать файл

Файл: moviestorm_scraper.py Проект: freeworldxbmc/KAOSbox-Repo

    def search(self, video_type, title, year, season=''):
        results = []
        if video_type == VIDEO_TYPES.TVSHOW:
            url = urlparse.urljoin(self.base_url, '/series/all/')
            html = self._http_get(url, cache_limit=8)
    
            links = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}, 'href')
            titles = dom_parser.parse_dom(html, 'a', {'class': 'underilne'})
            items = zip(links, titles)
        else:
            url = urlparse.urljoin(self.base_url, '/search?=%s' % urllib.quote_plus(title))
            data = {'q': title, 'go': 'Search'}
            html = self._http_get(url, data=data, cache_limit=8)
            match = re.search('you can search again in (\d+) seconds', html, re.I)
            if match:
                wait = int(match.group(1))
                if wait > self.timeout: wait = self.timeout
                time.sleep(wait)
                html = self._http_get(url, data=data, cache_limit=0)
                
            pattern = 'class="movie_box.*?href="([^"]+).*?<h1>([^<]+)'
            items = re.findall(pattern, html, re.DOTALL)

        norm_title = scraper_utils.normalize_title(title)
        for item in items:
            url, match_title = item
            if norm_title in scraper_utils.normalize_title(match_title):
                result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                results.append(result)

        return results

Пример #46

0

Показать файл

Файл: dizipas_scraper.py Проект: henry73/salts

    def search(self, video_type, title, year, season=''):
        results = []
        xml_url = urlparse.urljoin(self.base_url, '/series.xml')
        xml = self._http_get(xml_url, cache_limit=24)
        if xml:
            norm_title = scraper_utils.normalize_title(title)
            match_year = ''
            try:
                for element in ET.fromstring(xml).findall('.//dizi'):
                    name = element.find('adi')
                    if name is not None and norm_title in scraper_utils.normalize_title(
                            name.text):
                        url = element.find('url')
                        if url is not None and (not year or not match_year
                                                or year == match_year):
                            result = {
                                'url': scraper_utils.pathify_url(url.text),
                                'title': name.text,
                                'year': ''
                            }
                            results.append(result)
            except (ParseError, ExpatError) as e:
                log_utils.log('Dizilab Search Parse Error: %s' % (e),
                              log_utils.LOGWARNING)

        return results

Пример #47

0

Показать файл

Файл: viewmovies_scraper.py Проект: c0ns0le/YCBuilds

 def search(self, video_type, title, year, season=''):
     results = []
     search_url = urlparse.urljoin(self.base_url, '/?s=')
     search_url += urllib.quote_plus(title)
     html = self._http_get(search_url, cache_limit=1)
     fragment = dom_parser.parse_dom(html, 'ul', {'class': '[^"]*listing-videos[^"]*'})
     if fragment:
         for match in re.finditer('href="([^"]+)[^>]*>(.*?)</a>', fragment[0]):
             url, match_title_year = match.groups('')
             match_title_year = re.sub('<span>|</span>', '', match_title_year)
             if re.search('S\d{2}E\d{2}', match_title_year): continue  # skip episodes
             match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
             if match:
                 match_title, match_year = match.groups()
             else:
                 match_title = match_title_year
                 match_year = ''
             match_title = match_title.replace('&#8211;', '-')
             match_title = match_title.replace('&#8217;', "'")
             
             if (not year or not match_year or year == match_year):
                 result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': match_year}
                 results.append(result)
     
     return results

Пример #48

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        media_type = 'series' if video_type == VIDEO_TYPES.TVSHOW else 'movie'
        search_url = scraper_utils.urljoin(
            self.base_url, '/typeahead/%s' % (urllib.quote(title)))
        headers = {'Referer': self.base_url}
        headers.update(XHR)
        html = self._http_get(search_url,
                              headers=headers,
                              require_debrid=True,
                              cache_limit=.5)
        for item in scraper_utils.parse_json(html, search_url):
            match_title = item.get('title')
            match_url = item.get('link')
            match_year = ''
            if item.get('type') == media_type and match_title and match_url:
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Пример #49

0

Показать файл

Файл: pelispedia_scraper.py Проект: kevintone/tdbaddon

 def __tv_search(self, title, year):
     results = []
     if title:
         norm_title = scraper_utils.normalize_title(title)
         url = '/series/letra/%s/' % (title[0])
         url = urlparse.urljoin(self.base_url, url)
         html = self._http_get(url, cache_limit=48)
         for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*bpM12[^"]*'}):
             title_frag = dom_parser.parse_dom(item, 'h2')
             year_frag = dom_parser.parse_dom(item, 'div', {'class': '[^"]*sectionDetail[^"]*'})
             match_url = dom_parser.parse_dom(item, 'a', ret='href')
             if title_frag and match_url:
                 match_url = match_url[0]
                 match = re.search('(.*?)<br>', title_frag[0])
                 if match:
                     match_title = match.group(1)
                 else:
                     match_title = title_frag[0]
                     
                 match_year = ''
                 if year_frag:
                     match = re.search('(\d{4})', year_frag[0])
                     if match:
                         match_year = match.group(1)
 
                 if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                     results.append(result)
                     
     return results

Пример #50

0

Показать файл

Файл: pubfilm_scraper.py Проект: Stevie-Bs/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     results = []
     search_url = urlparse.urljoin(self.base_url, '/?s=%s')
     search_url = search_url % (urllib.quote(title))
     html = self._http_get(search_url, cache_limit=1)
     for item in dom_parser.parse_dom(html, 'h3', {'class': 'post-box-title'}):
         match = re.search('href="([^"]+)[^>]*>([^<]+)', item)
         if match:
             match_url, match_title_year = match.groups()
             is_season = re.search('Season\s+(\d+)$', match_title_year, re.I)
             if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
                 match_year = ''
                 if video_type == VIDEO_TYPES.SEASON:
                     match_title = match_title_year
                     if season and int(is_season.group(1)) != int(season):
                         continue
                 else:
                     match = re.search('(.*?)\s+(\d{4})$', match_title_year)
                     if match:
                         match_title, match_year = match.groups()
                     else:
                         match_title = match_title_year
                         match_year = ''
     
                 if not year or not match_year or year == match_year:
                     result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                     results.append(result)
     return results

Пример #51

0

Показать файл

Файл: merdb_scraper.py Проект: AMOboxTV/AMOBox.LegoBuild

    def search(self, video_type, title, year, season=''):
        search_url = self.base_url
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            search_url += '/tvshow'

        search_url += '/advanced-search.php?search='
        search_url += urllib.quote_plus(title)
        search_url += '&year=' + urllib.quote_plus(str(year))
        search_url += '&advanced_search=Search'

        html = self._http_get(search_url, cache_limit=.25)
        results = []
        for element in dom_parser.parse_dom(html, 'div', {'class': 'list_box_title'}):
            match = re.search('href="([^"]+)"\s+title="(?:Watch )?([^"]+)', element)
            if match:
                url, match_title_year = match.groups()
                match = re.search('(.*?)(?:\s+\(?\s*(\d{4})\s*\)?)', match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''
                
                if not year or not match_year or year == match_year:
                    result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        return results

Пример #52

0

Показать файл

Файл: xmovies8_scraper.py Проект: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/results')
        params = {'q': title}
        referer = search_url + '?' + urllib.urlencode(params)
        headers = {'Referer': referer}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)

        cookies = {'begin_referer': referer, 'prounder': 1}
        html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8)
        if any('jquery.js' in match.attrs['src'] for match in dom_parser2.parse_dom(html, 'script', req='src')):
            html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=0)
            
        for _attrs, result in dom_parser2.parse_dom(html, 'div', {'class': 'cell'}):
            title_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_title'})
            year_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_quality'})
            if not title_frag: continue
            match = dom_parser2.parse_dom(title_frag[0].content, 'a', req='href')
            if not match: continue
            match_url = match[0].attrs['href']
            match_title = match[0].content
            try:
                match = re.search('\s+(\d{4})\s+', year_frag[0].content)
                match_year = match.group(1)
            except:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

Пример #53

0

Показать файл

Файл: moviewatcher_scraper.py Проект: freeworldxbmc/KAOSbox-Repo

    def __movie_search(self, title, year):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search?q=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'video_item'}):
            match_url = dom_parser.parse_dom(item, 'a', ret='href')
            match_title = dom_parser.parse_dom(item, 'img', ret='alt')
            match_year = ''
            if match_url and match_title:
                match_url = match_url[0]
                match_title = match_title[0]
                
                if match_year:
                    match_year = match_year[0]
                else:
                    match_year = ''
        
                if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)

        return results

Пример #54

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/bestmatch-fund-movies-%s.html')
        search_title = title.replace(' ', '-')
        search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower()
        search_url = search_url % (search_title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'thumbsTitle'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Пример #55

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/advanced-search/')
        headers = {'Referer': self.base_url}
        params = {'search_query': title, 'orderby': '', 'order': '', 'wpas': 1}
        html = self._http_get(search_url,
                              params=params,
                              headers=headers,
                              cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'datos'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url = match[0].attrs['href']
            is_tvshow = '/tvshows/' in match_url
            if is_tvshow and video_type == VIDEO_TYPES.MOVIE or not is_tvshow and video_type == VIDEO_TYPES.TVSHOW:
                continue

            match_title = match[0].content
            match_title, match_year = scraper_utils.extra_year(match_title)
            if scraper_utils.normalize_title(match_title) in norm_title and (
                    not year or not match_year or year == match_year):
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Пример #56

0

Показать файл

    def search(self, video_type, title, year, season=''):
        self.__get_token()
        results = []
        search_url = urlparse.urljoin(self.base_url, '/api/v1/caut')
        timestamp = int(time.time() * 1000)
        query = {
            'q': title,
            'limit': '100',
            'timestamp': timestamp,
            'verifiedCheck': self.__token
        }
        html = self._http_get(search_url,
                              data=query,
                              headers=XHR,
                              cache_limit=1)
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            media_type = 'TV SHOW'
        else:
            media_type = 'MOVIE'

        for item in scraper_utils.parse_json(html, search_url):
            if item['meta'].upper().startswith(media_type):
                match_year = str(
                    item['year']) if 'year' in item and item['year'] else ''
                if not year or not match_year or year == match_year:
                    result = {
                        'title': item['title'],
                        'url': scraper_utils.pathify_url(item['permalink']),
                        'year': match_year
                    }
                    results.append(result)

        return results

Пример #57

0

Показать файл

Файл: noobroom_scraper.py Проект: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=""):
        if not self.include_paid and video_type != VIDEO_TYPES.MOVIE:
            return []
        search_url = urlparse.urljoin(self.base_url, "/search.php")
        html = self._http_get(search_url, params={"q": title}, cache_limit=0.25)
        results = []
        if video_type == VIDEO_TYPES.MOVIE:
            pattern = "<i>\s*Movies\s*</i>(.*)"
        else:
            pattern = "<i>\s*TV Series\s*</i>(.*)"

        match = re.search(pattern, html)
        if match:
            container = match.group(1)
            pattern = "href='([^']+)'>([^<]+)\s*</a>\s*(?:\((\d{4})\))?"
            for match in re.finditer(pattern, container):
                url, match_title, match_year = match.groups("")
                if not year or not match_year or year == match_year:
                    result = {
                        "url": scraper_utils.pathify_url(url),
                        "title": scraper_utils.cleanse_title(match_title),
                        "year": match_year,
                    }
                    results.append(result)

        return results

Пример #58

0

Показать файл

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url, '/search/%s' % (urllib.quote(title)))
        html = self._http_get(search_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'who-likes'})
        if not fragment: return results

        fragment = fragment[0].content
        match_url = dom_parser2.parse_dom(fragment, 'a', req='href')
        match_title_year = dom_parser2.parse_dom(fragment, 'img', req='alt')
        if match_url and match_title_year:
            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Пример #59

0

Показать файл

Файл: putlocker_scraper.py Проект: yam4me/plugin.video.blamo

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     headers = {'Referer': self.base_url}
     params = {'search': title}
     html = self._http_get(self.base_url,
                           params=params,
                           headers=headers,
                           cache_limit=8)
     for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                               {'class': 'listCard'}):
         match_title = dom_parser2.parse_dom(item, 'p',
                                             {'class': 'extraTitle'})
         match_url = dom_parser2.parse_dom(item, 'a', req='href')
         match_year = dom_parser2.parse_dom(item, 'p',
                                            {'class': 'cardYear'})
         if match_url and match_title:
             match_url = match_url[0].attrs['href']
             match_title = match_title[0].content
             match_year = match_year[0].content if match_year else ''
             if not year or not match_year or year == match_year:
                 result = {
                     'url': scraper_utils.pathify_url(match_url),
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year
                 }
                 results.append(result)
     return results

Пример #60

0

Показать файл

Файл: dizist_scraper.py Проект: enursha101/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        url = scraper_utils.urljoin(self.base_url, '/arsiv')
        html = self._http_get(url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        fragment = dom_parser2.parse_dom(html, 'div',
                                         {'class': 'ts-list-content'})
        if not fragment: return results

        items = dom_parser2.parse_dom(fragment[0].content, 'h1',
                                      {'class': 'ts-list-name'})
        details = dom_parser2.parse_dom(fragment[0].content, 'ul')
        for item, detail in zip(items, details):
            match = dom_parser2.parse_dom(item.content, 'a', req='href')
            match_year = re.search('<span>(\d{4})</span>', detail.content)
            if not match: continue

            match_url = match[0].attrs['href']
            match_title = match[0].content
            match_year = match_year.group(1) if match_year else ''

            if norm_title in scraper_utils.normalize_title(match_title):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Python pathify_url примеры использования