Пример #1
0
def check_title(title, aliases, release_title, hdlr, year, years=None):
    try:
        aliases = aliases_to_array(jsloads(aliases))
    except:
        aliases = None
    title_list = []
    title_list_append = title_list.append
    if aliases:
        for item in aliases:
            try:
                alias = item.replace('!', '').replace('(', '').replace(
                    ')', '').replace('&', 'and').replace(year, '')
                # alias = re.sub(r'[^A-Za-z0-9\s\.-]+', '', alias)
                if years:  # for movies only, scraper to pass None for episodes
                    for i in years:
                        alias = alias.replace(i, '')
                if alias in title_list: continue
                title_list_append(alias)
            except:
                from fenomscrapers.modules import log_utils
                log_utils.error()
    try:
        match = True
        title = title.replace('!',
                              '').replace('(',
                                          '').replace(')',
                                                      '').replace('&', 'and')
        # title = re.sub(r'[^A-Za-z0-9\s\.-]+', '', title)
        title_list_append(title)

        release_title = release_title_format(
            release_title)  # converts to .lower()
        h = hdlr.lower()
        t = release_title.split(h)[0].replace(year,
                                              '').replace('(', '').replace(
                                                  ')', '').replace('&', 'and')
        if years:
            for i in years:
                t = t.split(i)[0]
        t = t.split('2160p')[0].split('4k')[0].split('1080p')[0].split(
            '720p')[0]
        if all(cleantitle.get(i) != cleantitle.get(t) for i in title_list):
            match = False
        if years:  # for movies only, scraper to pass None for episodes
            if not any(value in release_title for value in years):
                match = False
        else:
            if h not in release_title: match = False
        return match
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return match
Пример #2
0
 def search(self, title, year):
     try:
         url = urljoin(self.base_link,
                       self.search_link % (quote_plus(title)))
         # r = self.scraper.get(url, headers=self.headers).content
         r = py_tools.ensure_str(self.scraper.get(
             url, headers=self.headers).content,
                                 errors='replace')
         # switch to client.parseDOM() to rid import
         if not r: return None
         r = dom_parser.parse_dom(r, 'div', {'class': 'list_items'})[0]
         r = dom_parser.parse_dom(r.content, 'li')
         r = [(dom_parser.parse_dom(i, 'a', {'class': 'title'})) for i in r]
         r = [(i[0].attrs['href'], i[0].content) for i in r]
         r = [(urljoin(self.base_link, i[0])) for i in r
              if cleantitle.get(title) in cleantitle.get(i[1])
              and year in i[1]]
         if r: return r[0]
         else: return None
     except:
         return None
Пример #3
0
def filter_show_pack(show_title, aliases, imdb, year, season, release_title,
                     total_seasons):
    try:
        aliases = aliases_to_array(jsloads(aliases))
    except:
        aliases = None
    title_list = []
    title_list_append = title_list.append
    if aliases:
        for item in aliases:
            try:
                alias = item.replace('!', '').replace('(', '').replace(
                    ')', '').replace('&', 'and').replace(year, '')
                # alias = re.sub(r'[^A-Za-z0-9\s\.-]+', '', alias)
                if alias in title_list: continue
                title_list_append(alias)
            except:
                from fenomscrapers.modules import log_utils
                log_utils.error()
    try:
        show_title = show_title.replace('!', '').replace('(', '').replace(
            ')', '').replace('&', 'and')
        # show_title = re.sub(r'[^A-Za-z0-9\s\.-]+', '', show_title)
        title_list_append(show_title)

        split_list = [
            '.all.seasons', 'seasons', 'season', 'the.complete', 'complete',
            'all.torrent', 'total.series', 'tv.series', 'series', 'edited',
            's1', 's01', year
        ]  #s1 or s01 used so show pack only kept that begin with 1
        release_title = release_title_format(release_title)
        t = release_title.replace('-', '.')
        for i in split_list:
            t_split = t.split
            t = t_split(i)[0]
        if all(cleantitle.get(x) != cleantitle.get(t) for x in title_list):
            return False, 0

# remove single episodes(returned in single ep scrape)
        episode_regex = [
            r's\d{1,3}e\d{1,3}', r's[0-3]{1}[0-9]{1}[.-]e\d{1,2}',
            r's\d{1,3}[.-]\d{1,3}e\d{1,3}',
            r'season[.-]?\d{1,3}[.-]?ep[.-]?\d{1,3}',
            r'season[.-]?\d{1,3}[.-]?episode[.-]?\d{1,3}'
        ]
        for item in episode_regex:
            if bool(re.search(item, release_title)):
                return False, 0

# remove season ranges that do not begin at 1
        season_range_regex = [
            r'(?:season|seasons|s)[.-]?(?:0?[2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]?to[.-]?|[.-]?thru[.-]?|[.-])(?:season|seasons|s|)[.-]?(?:0?[3-9]{1}(?!\d{2}p)|[1-3]{1}[0-9]{1}(?!\d{2}p))'
        ]  # seasons.5-6, seasons5.to.6, seasons.5.thru.6, season.2-9.s02-s09.1080p
        for item in season_range_regex:
            if bool(re.search(item, release_title)):
                return False, 0

# remove single seasons - returned in seasonPack scrape
        season_regex = [
            r'season[.-]?([1-9]{1})[.-]0{1}\1[.-]?complete',  # "season.1.01.complete" when 2nd number matches the fiirst group with leading 0
            r'season[.-]?([2-9]{1})[.-](?:[0-9]+)[.-]?complete',  # "season.9.10.complete" when first number is >1 followed by 2 digit number
            r'season[.-]?\d{1,2}[.-]s\d{1,2}',  # season.02.s02
            r'season[.-]?\d{1,2}[.-]complete',  # season.02.complete
            r'season[.-]?\d{1,2}[.-]\d{3,4}p{0,1}',  # "season.02.1080p" and no seperator "season02.1080p"
            r'season[.-]?\d{1,2}[.-](?!thru|to|\d{1,2}[.-])',  # "season.02." or "season.1" not followed by "to", "thru", or another single or 2 digit number then a dot(which would be a range)
            r'season[.-]?\d{1,2}[.]?$',  # end of line ex."season.1", "season.01", "season01" can also have trailing dot or end of line(dash would be a range)
            r'season[.-]?\d{1,2}[.-](?:19|20)[0-9]{2}',  # single season followed by 4 digit year ex."season.1.1971", "season.01.1971", or "season01.1971"
            r'season[.-]?\d{1,2}[.-]\d{3}[.-]{1,2}(?:19|20)[0-9]{2}',  # single season followed by 3 digits then 4 digit year ex."season.1.004.1971" or "season.01.004.1971" (comic book format)
            r'(?<!thru)(?<!to)(?<!\d{2})[.-]s\d{2}[.-]complete',  # ".s01.complete" not preceded by "thru", "to", or 2 digit number
            r'(?<!thru)(?<!to)(?<!s\d{2})[.-]s\d{2}(?![.-]thru)(?![.-]to)(?![.-]s\d{2})(?![.-]\d{2})'  # .s02. not preceded by "thru", "to", or "s01". Not followed by ".thru", ".to", ".s02", "-s02", ".02.", or "-02."
        ]
        for item in season_regex:
            if bool(re.search(item, release_title)):
                return False, 0

# remove spelled out single seasons
        season_regex = []
        [
            season_regex.append(r'(complete[.-]%s[.-]season)' % x)
            for x in season_ordinal_list
        ]
        [
            season_regex.append(r'(complete[.-]%s[.-]season)' % x)
            for x in season_ordinal2_list
        ]
        [season_regex.append(r'(season[.-]%s)' % x) for x in season_list]
        for item in season_regex:
            if bool(re.search(item, release_title)):
                return False, 0

# from here down we don't filter out, we set and pass "last_season" it covers for the range and addon can filter it so the db will have full valid showPacks.
# set last_season for range type ex "1.2.3.4" or "1.2.3.and.4" (dots or dashes)
        dot_release_title = release_title.replace('-', '.')
        dot_season_ranges = []
        all_seasons = '1'
        season_count = 2
        while season_count <= int(total_seasons):
            dot_season_ranges.append(all_seasons +
                                     '.and.%s' % str(season_count))
            all_seasons += '.%s' % str(season_count)
            dot_season_ranges.append(all_seasons)
            season_count += 1
        if any(i in dot_release_title for i in dot_season_ranges):
            keys = [i for i in dot_season_ranges if i in dot_release_title]
            last_season = int(keys[-1].split('.')[-1])
            return True, last_season

# "1.to.9" type range filter (dots or dashes)
        to_season_ranges = []
        start_season = '1'
        season_count = 2
        while season_count <= int(total_seasons):
            to_season_ranges.append(start_season +
                                    '.to.%s' % str(season_count))
            season_count += 1
        if any(i in dot_release_title for i in to_season_ranges):
            keys = [i for i in to_season_ranges if i in dot_release_title]
            last_season = int(keys[0].split('to.')[1])
            return True, last_season

# "1.thru.9" range filter (dots or dashes)
        thru_ranges = [i.replace('to', 'thru') for i in to_season_ranges]
        if any(i in dot_release_title for i in thru_ranges):
            keys = [i for i in thru_ranges if i in dot_release_title]
            last_season = int(keys[0].split('thru.')[1])
            return True, last_season

# "1-9" range filter
        dash_ranges = [i.replace('.to.', '-') for i in to_season_ranges]
        if any(i in release_title for i in dash_ranges):
            keys = [i for i in dash_ranges if i in release_title]
            last_season = int(keys[0].split('-')[1])
            return True, last_season

# "1~9" range filter
        tilde_ranges = [i.replace('.to.', '~') for i in to_season_ranges]
        if any(i in release_title for i in tilde_ranges):
            keys = [i for i in tilde_ranges if i in release_title]
            last_season = int(keys[0].split('~')[1])
            return True, last_season

# "01.to.09" 2 digit range filter (dots or dashes)
        to_season_ranges = []
        start_season = '01'
        season_count = 2
        while season_count <= int(total_seasons):
            to_season_ranges.append(
                start_season + '.to.%s' % '0' +
                str(season_count) if int(season_count) < 10 else start_season +
                '.to.%s' % str(season_count))
            season_count += 1
        if any(i in dot_release_title for i in to_season_ranges):
            keys = [i for i in to_season_ranges if i in dot_release_title]
            last_season = int(keys[0].split('to.')[1])
            return True, last_season

# "01.thru.09" 2 digit range filter (dots or dashes)
        thru_ranges = [i.replace('to', 'thru') for i in to_season_ranges]
        if any(i in dot_release_title for i in thru_ranges):
            keys = [i for i in thru_ranges if i in dot_release_title]
            last_season = int(keys[0].split('thru.')[1])
            return True, last_season

# "01-09" 2 digit range filtering
        dash_ranges = [i.replace('.to.', '-') for i in to_season_ranges]
        if any(i in release_title for i in dash_ranges):
            keys = [i for i in dash_ranges if i in release_title]
            last_season = int(keys[0].split('-')[1])
            return True, last_season

# "01~09" 2 digit range filtering
        tilde_ranges = [i.replace('.to.', '~') for i in to_season_ranges]
        if any(i in release_title for i in tilde_ranges):
            keys = [i for i in tilde_ranges if i in release_title]
            last_season = int(keys[0].split('~')[1])
            return True, last_season

# "s1.to.s9" single digit range filter (dots or dashes)
        to_season_ranges = []
        start_season = 's1'
        season_count = 2
        while season_count <= int(total_seasons):
            to_season_ranges.append(start_season +
                                    '.to.s%s' % str(season_count))
            season_count += 1
        if any(i in dot_release_title for i in to_season_ranges):
            keys = [i for i in to_season_ranges if i in dot_release_title]
            last_season = int(keys[0].split('to.s')[1])
            return True, last_season

# "s1.thru.s9" single digit range filter (dots or dashes)
        thru_ranges = [i.replace('to', 'thru') for i in to_season_ranges]
        if any(i in dot_release_title for i in thru_ranges):
            keys = [i for i in thru_ranges if i in dot_release_title]
            last_season = int(keys[0].split('thru.s')[1])
            return True, last_season

# "s1-s9" single digit range filtering (dashes)
        dash_ranges = [i.replace('.to.', '-') for i in to_season_ranges]
        if any(i in release_title for i in dash_ranges):
            keys = [i for i in dash_ranges if i in release_title]
            last_season = int(keys[0].split('-s')[1])
            return True, last_season

# "s1~s9" single digit range filtering (dashes)
        tilde_ranges = [i.replace('.to.', '~') for i in to_season_ranges]
        if any(i in release_title for i in tilde_ranges):
            keys = [i for i in tilde_ranges if i in release_title]
            last_season = int(keys[0].split('~s')[1])
            return True, last_season

# "s01.to.s09"  2 digit range filter (dots or dash)
        to_season_ranges = []
        start_season = 's01'
        season_count = 2
        while season_count <= int(total_seasons):
            to_season_ranges.append(
                start_season + '.to.s%s' % '0' +
                str(season_count) if int(season_count) < 10 else start_season +
                '.to.s%s' % str(season_count))
            season_count += 1
        if any(i in dot_release_title for i in to_season_ranges):
            keys = [i for i in to_season_ranges if i in dot_release_title]
            last_season = int(keys[0].split('to.s')[1])
            return True, last_season

# "s01.thru.s09" 2 digit  range filter (dots or dashes)
        thru_ranges = [i.replace('to', 'thru') for i in to_season_ranges]
        if any(i in dot_release_title for i in thru_ranges):
            keys = [i for i in thru_ranges if i in dot_release_title]
            last_season = int(keys[0].split('thru.s')[1])
            return True, last_season

# "s01-s09" 2 digit  range filtering (dashes)
        dash_ranges = [i.replace('.to.', '-') for i in to_season_ranges]
        if any(i in release_title for i in dash_ranges):
            keys = [i for i in dash_ranges if i in release_title]
            last_season = int(keys[0].split('-s')[1])
            return True, last_season

# "s01~s09" 2 digit  range filtering (dashes)
        tilde_ranges = [i.replace('.to.', '~') for i in to_season_ranges]
        if any(i in release_title for i in tilde_ranges):
            keys = [i for i in tilde_ranges if i in release_title]
            last_season = int(keys[0].split('~s')[1])
            return True, last_season


# "s01.s09" 2 digit  range filtering (dots)
        dot_ranges = [i.replace('.to.', '.') for i in to_season_ranges]
        if any(i in release_title for i in dot_ranges):
            keys = [i for i in dot_ranges if i in release_title]
            last_season = int(keys[0].split('.s')[1])
            return True, last_season

        return True, total_seasons
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
Пример #4
0
def filter_season_pack(show_title, aliases, year, season, release_title):
    try:
        aliases = aliases_to_array(jsloads(aliases))
    except:
        aliases = None
    title_list = []
    title_list_append = title_list.append
    if aliases:
        for item in aliases:
            try:
                alias = item.replace('!', '').replace('(', '').replace(
                    ')', '').replace('&', 'and').replace(year, '')
                # alias = re.sub(r'[^A-Za-z0-9\s\.-]+', '', alias)
                if alias in title_list: continue
                title_list_append(alias)
            except:
                from fenomscrapers.modules import log_utils
                log_utils.error()
    try:
        show_title = show_title.replace('!', '').replace('(', '').replace(
            ')', '').replace('&', 'and')
        # show_title = re.sub(r'[^A-Za-z0-9\s\.-]+', '', show_title)
        title_list_append(show_title)

        season_fill = season.zfill(2)
        season_check = '.s%s.' % season
        season_fill_check = '.s%s.' % season_fill
        season_full_check = '.season.%s.' % season
        season_full_check_ns = '.season%s.' % season
        season_full_fill_check = '.season.%s.' % season_fill
        season_full_fill_check_ns = '.season%s.' % season_fill

        string_list = [
            season_check, season_fill_check, season_full_check,
            season_full_check_ns, season_full_fill_check,
            season_full_fill_check_ns
        ]
        split_list = [
            season_check, season_fill_check, '.' + season + '.season',
            'total.season', 'season', 'the.complete', 'complete', year
        ]

        release_title = release_title_format(release_title)
        t = release_title.replace('-', '.')
        for i in split_list:
            t_split = t.split
            t = t_split(i)[0]
        if all(cleantitle.get(x) != cleantitle.get(t) for x in title_list):
            return False

# remove single episodes(returned in single ep scrape)
        episode_regex = [
            r's\d{1,3}e\d{1,3}', r's[0-3]{1}[0-9]{1}[.-]e\d{1,2}',
            r's\d{1,3}[.-]\d{1,3}e\d{1,3}',
            r'season[.-]?\d{1,3}[.-]?ep[.-]?\d{1,3}',
            r'season[.-]?\d{1,3}[.-]?episode[.-]?\d{1,3}'
        ]
        for item in episode_regex:
            if bool(re.search(item, release_title)):
                return False


# remove season ranges - returned in showPack scrape, plus non conforming season and specific crap
        rt = release_title.replace('-', '.')
        if any(i in rt for i in string_list):
            for item in [
                    season_check.rstrip('.') +
                    r'[.-]s([2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]|$)',  # ex. ".s1-s9.", .s1-s39.
                    season_fill_check.rstrip('.') +
                    r'[.-]s\d{2}(?:[.-]|$)',  # ".s01-s09.", .s01-s39.
                    season_fill_check.rstrip('.') +
                    r'[.-]\d{2}(?:[.-]|$)',  # ".s01.09."
                    r'\Ws\d{2}\W%s' % season_fill_check.lstrip(
                        '.'),  # may need more reverse ranges
                    season_full_check.rstrip('.') +
                    r'[.-]to[.-]([2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]|$)',  # ".season.1.to.9.", ".season.1.to.39"
                    season_full_check.rstrip('.') +
                    r'[.-]season[.-]([2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]|$)',  # ".season.1.season.9.", ".season.1.season.39"
                    season_full_check.rstrip('.') +
                    r'[.-]([2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]|$)',  # "season.1.9.", "season.1.39.
                    season_full_check.rstrip('.') +
                    r'[.-]\d{1}[.-]\d{1,2}(?:[.-]|$)',  #  "season.1.9.09."
                    season_full_check.rstrip('.') +
                    r'[.-]\d{3}[.-](?:19|20)[0-9]{2}(?:[.-]|$)',  # single season followed by 3 digit followed by 4 digit year ex."season.1.004.1971"
                    season_full_fill_check.rstrip('.') +
                    r'[.-]\d{3}[.-]\d{3}(?:[.-]|$)',  # 2 digit season followed by 3 digit dash range ex."season.10.001-025."
                    season_full_fill_check.rstrip('.') +
                    r'[.-]season[.-]\d{2}(?:[.-]|$)'  # 2 digit season followed by 2 digit season range ex."season.01-season.09."
            ]:
                if bool(re.search(item, release_title)):
                    return False
            return True
        return False
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return True