Python to_slug示例，salts_lib.scraper_utils.to_slug Python示例

示例#1

0

显示文件

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     if video_type == VIDEO_TYPES.TVSHOW and title:
         test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title))
         test_url = scraper_utils.urljoin(self.base_url, test_url)
         html = self._http_get(test_url, require_debrid=True, cache_limit=24)
         posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
         if posts:
             result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''}
             results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
         html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1)
         norm_title = scraper_utils.normalize_title(title)
         for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
             if match:
                 post_url, post_title = match.groups()
                 if '/tv-show/' in post or self.__too_old(post): continue
                 post_title = re.sub('<[^>]*>', '', post_title)
                 meta = scraper_utils.parse_movie_link(post_title)
                 full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
                 match_year = meta['year']
                 
                 match_norm_title = scraper_utils.normalize_title(meta['title'])
                 if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                     results.append(result)
         
     return results

示例#2

0

显示文件

文件： myddl_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     if video_type == VIDEO_TYPES.TVSHOW and title:
         test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title))
         test_url = scraper_utils.urljoin(self.base_url, test_url)
         html = self._http_get(test_url, require_debrid=True, cache_limit=24)
         posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
         if posts:
             result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''}
             results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
         html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1)
         norm_title = scraper_utils.normalize_title(title)
         for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
             if match:
                 post_url, post_title = match.groups()
                 if '/tv-show/' in post or self.__too_old(post): continue
                 post_title = re.sub('<[^>]*>', '', post_title)
                 meta = scraper_utils.parse_movie_link(post_title)
                 full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
                 match_year = meta['year']
                 
                 match_norm_title = scraper_utils.normalize_title(meta['title'])
                 if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                     results.append(result)
         
     return results

示例#3

0

显示文件

文件： emoviespro_scraper.py 项目： CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     slug = scraper_utils.to_slug(title)
     test_url = scraper_utils.urljoin(self.base_url, slug)
     test_url += '-%s' % (year)
     html = self._http_get(test_url, cache_limit=8)
     if html:
         result = {'title': scraper_utils.cleanse_title(title), 'year': year, 'url': scraper_utils.pathify_url(test_url)}
         results.append(result)
     return results

示例#4

0

显示文件

文件： emoviespro_scraper.py 项目： normico21/repository.xvbmc

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     slug = scraper_utils.to_slug(title)
     test_url = scraper_utils.urljoin(self.base_url, slug)
     test_url += '-%s' % (year)
     html = self._http_get(test_url, cache_limit=8)
     if html:
         result = {'title': scraper_utils.cleanse_title(title), 'year': year, 'url': scraper_utils.pathify_url(test_url)}
         results.append(result)
     return results

示例#5

0

显示文件

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html')
        search_url = search_url % (scraper_utils.to_slug(title))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'slideposter'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title_year = dom_parser2.parse_dom(item, 'img', req='alt')
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                match_title_year = match_title_year[0].attrs['alt']
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

示例#6

0

显示文件

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     if video_type == VIDEO_TYPES.TVSHOW and title:
         test_url = '/show/%s/' % (scraper_utils.to_slug(title))
         test_url = scraper_utils.urljoin(self.base_url, test_url)
         html = self._http_get(test_url, require_debrid=True, cache_limit=24)
         posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
         if posts and CATEGORIES[video_type] in posts[0].content:
             match = re.search('<div[^>]*>\s*show\s+name:.*?<a\s+href="([^"]+)[^>]+>(?!Season\s+\d+)([^<]+)', posts[0].content, re.I)
             if match:
                 show_url, match_title = match.groups()
                 result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                 results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         search_url = scraper_utils.urljoin(self.base_url, '/search/%s/')
         search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
         search_url = search_url % (urllib.quote_plus(search_title))
         headers = {'User-Agent': LOCAL_UA}
         html = self._http_get(search_url, headers=headers, require_debrid=True, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+).*?">(.*?)</a>', html)
         posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         norm_title = scraper_utils.normalize_title(title)
         for heading, post in zip(headings, posts):
             if not re.search('[._ -]S\d+E\d+[._ -]', heading[1], re.I) and not self.__too_old(post):
                 post_url, post_title = heading
                 post_title = re.sub('<[^>]*>', '', post_title)
                 meta = scraper_utils.parse_movie_link(post_title)
                 full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
                 match_year = meta['year']
                 
                 match_norm_title = scraper_utils.normalize_title(meta['title'])
                 if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                     results.append(result)
     
     return results