Пример #1
0
    def correctWords(self, rel_name, media):
        media_title = fire_event('searcher.get_search_title', media, single=True)
        media_words = re.split('\W+', simplify_string(media_title))

        rel_name = simplify_string(rel_name)
        rel_words = re.split('\W+', rel_name)

        required_words, contains_required = self.containsWords(rel_name, rel_words, 'required', media)
        if len(required_words) > 0 and not contains_required:
            log.info2('Wrong: Required word missing: %s', rel_name)
            return False

        ignored_words, contains_ignored = self.containsWords(rel_name, rel_words, 'ignored', media)
        if len(ignored_words) > 0 and contains_ignored:
            log.info2("Wrong: '%s' contains 'ignored words'", rel_name)
            return False

        # Ignore p**n stuff
        pron_tags = ['xxx', 'sex', 'anal', 't**s', 'f**k', 'p**n', 'orgy', 'milf', 'boobs', 'erotica', 'erotic', 'c**k', 'dick']
        pron_words = list(set(rel_words) & set(pron_tags) - set(media_words))
        if pron_words:
            log.info('Wrong: %s, probably pr0n', rel_name)
            return False

        return True
Пример #2
0
    def createStringIdentifier(self,
                               file_path,
                               folder='',
                               exclude_filename=False):

        identifier = file_path.replace(folder,
                                       '').lstrip(os.path.sep)  # root folder
        identifier = os.path.splitext(identifier)[0]  # ext

        # Exclude file name path if needed (f.e. for DVD files)
        if exclude_filename:
            identifier = identifier[:len(identifier) -
                                    len(os.path.split(identifier)[-1])]

        # Make sure the identifier is lower case as all regex is with lower case tags
        identifier = identifier.lower()

        try:
            path_split = split_string(identifier, os.path.sep)
            identifier = path_split[-2] if len(path_split) > 1 and len(
                path_split[-2]) > len(path_split[-1]) else path_split[
                    -1]  # Only get filename
        except:
            pass

        # multipart
        identifier = self.removeMultipart(identifier)

        # remove cptag
        identifier = self.removeCPTag(identifier)

        # simplify the string
        identifier = simplify_string(identifier)

        year = self.findYear(file_path)

        # groups, release tags, scenename cleaner
        identifier = re.sub(self.clean, '::', identifier).strip(':')

        # Year
        if year and identifier[:4] != year:
            split_by = ':::' if ':::' in identifier else year
            identifier = '%s %s' % (identifier.split(split_by)[0].strip(),
                                    year)
        else:
            identifier = identifier.split('::')[0]

        # Remove duplicates
        out = []
        for word in identifier.split():
            if not word in out:
                out.append(word)

        identifier = ' '.join(out)

        return simplify_string(identifier)
Пример #3
0
def namePositionScore(nzb_name, movie_name):
    score = 0

    nzb_words = re.split('\W+', simplify_string(nzb_name))
    qualities = fire_event('quality.all', single=True)

    try:
        nzb_name = re.search(r'([\'"])[^\1]*\1', nzb_name).group(0)
    except:
        pass

    name_year = fire_event('scanner.name_year', nzb_name, single=True)

    # Give points for movies beginning with the correct name
    split_by = simplify_string(movie_name)
    name_split = []
    if len(split_by) > 0:
        name_split = simplify_string(nzb_name).split(split_by)
        if name_split[0].strip() == '':
            score += 10

    # If year is second in line, give more points
    if len(name_split) > 1 and name_year:
        after_name = name_split[1].strip()
        if try_int(after_name[:4]) == name_year.get('year', None):
            score += 10
            after_name = after_name[4:]

        # Give -point to crap between year and quality
        found_quality = None
        for quality in qualities:
            # Main in words
            if quality['identifier'] in nzb_words:
                found_quality = quality['identifier']

            # Alt in words
            for alt in quality['alternative']:
                if alt in nzb_words:
                    found_quality = alt
                    break

        if not found_quality:
            return score - 20

        allowed = []
        for value in name_scores:
            name, sc = value.split(':')
            allowed.append(name)

        inbetween = re.split('\W+', after_name.split(found_quality)[0].strip())

        score -= (10 * len(set(inbetween) - set(allowed)))

    return score
Пример #4
0
def nameScore(name, year, preferred_words):
    """ Calculate score for words in the NZB name """

    try:
        score = 0
        name = name.lower()

        # give points for the cool stuff
        for value in name_scores:
            v = value.split(':')
            add = int(v.pop())
            if v.pop() in name:
                score += add

        # points if the year is correct
        if str(year) in name:
            score += 5

        # Contains preferred word
        nzb_words = re.split('\W+', simplify_string(name))
        score += 100 * len(list(set(nzb_words) & set(preferred_words)))

        return score
    except:
        log.error('Failed doing nameScore: %s', traceback.format_exc())

    return 0
Пример #5
0
def duplicateScore(nzb_name, movie_name):

    try:
        nzb_words = re.split('\W+', simplify_string(nzb_name))
        movie_words = re.split('\W+', simplify_string(movie_name))

        # minus for duplicates
        duplicates = [
            x for i, x in enumerate(nzb_words) if nzb_words[i:].count(x) > 1
        ]

        return len(list(set(duplicates) - set(movie_words))) * -4
    except:
        log.error('Failed doing duplicateScore: %s', traceback.format_exc())

    return 0
Пример #6
0
    def correctName(self, check_name, movie_name):

        check_names = [check_name]

        # Match names between "
        try: check_names.append(re.search(r'([\'"])[^\1]*\1', check_name).group(0))
        except: pass

        # Match longest name between []
        try: check_names.append(max(re.findall(r'[^[]*\[([^]]*)\]', check_name), key = len).strip())
        except: pass

        for check_name in remove_duplicate(check_names):
            check_movie = fire_event('scanner.name_year', check_name, single=True)

            try:
                check_words = remove_empty(re.split('\W+', check_movie.get('name', '')))
                movie_words = remove_empty(re.split('\W+', simplify_string(movie_name)))

                if len(check_words) > 0 and len(movie_words) > 0 and len(list(set(check_words) - set(movie_words))) == 0:
                    return True
            except:
                pass

        return False
Пример #7
0
    def simplify_value(self, value):
        if not value:
            return value

        if isinstance(value, str):
            return simplify_string(value)

        if isinstance(value, list):
            return [self.simplify_value(x) for x in value]

        raise ValueError("Unsupported value type")
Пример #8
0
    def first(self, title):
        title = to_unicode(title)
        title = simplify_string(title)

        for prefix in ['the ', 'an ', 'a ']:
            if prefix == title[:len(prefix)]:
                title = title[len(prefix):]
                break

        return str(title[0] if title and len(title) > 0
                   and title[0] in ascii_letters else '#').lower()
Пример #9
0
def nameRatioScore(nzb_name, movie_name):
    try:
        nzb_words = re.split(
            '\W+',
            fire_event('scanner.create_file_identifier', nzb_name,
                       single=True))
        movie_words = re.split('\W+', simplify_string(movie_name))

        left_over = set(nzb_words) - set(movie_words)
        return 10 - len(left_over)
    except:
        log.error('Failed doing nameRatioScore: %s', traceback.format_exc())

    return 0
Пример #10
0
    def simplify(self, title):

        title = to_unicode(title)

        nr_prefix = '' if title and len(
            title) > 0 and title[0] in ascii_letters else '#'
        title = simplify_string(title)

        for prefix in ['the ', 'an ', 'a ']:
            if prefix == title[:len(prefix)]:
                title = title[len(prefix):]
                break

        return str(nr_prefix + title).ljust(32, ' ')[:32]
Пример #11
0
    def make_key_value(self, data):

        if data.get('_t') == 'media' and len(data.get('title', '')) > 0:

            out = set()
            title = str(simplify_string(data.get('title').lower()))
            l = self.__l
            title_split = title.split()

            for x in range(len(title_split)):
                combo = ' '.join(title_split[x:])[:32].strip()
                out.add(combo.rjust(32, '_'))
                combo_range = max(l, min(len(combo), 32))

                for cx in range(1, combo_range):
                    ccombo = combo[:-cx].strip()
                    if len(ccombo) > l:
                        out.add(ccombo.rjust(32, '_'))

            return out, None
Пример #12
0
    def correctRelease(self, nzb=None, media=None, quality=None, **kwargs):

        if media.get('type') != 'movie': return

        media_title = fire_event('searcher.get_search_title',
                                 media,
                                 single=True)

        imdb_results = kwargs.get('imdb_results', False)
        retention = Env.setting('retention', section='nzb')

        if nzb.get('seeders') is None and 0 < retention < nzb.get('age', 0):
            log.info2(
                'Wrong: Outside retention, age is %s, needs %s or lower: %s',
                (nzb['age'], retention, nzb['name']))
            return False

        # Check for required and ignored words
        if not fire_event(
                'searcher.correct_words', nzb['name'], media, single=True):
            return False

        preferred_quality = quality if quality else fire_event(
            'quality.single', identifier=quality['identifier'], single=True)

        # Contains lower quality string
        contains_other = fire_event('searcher.contains_other_quality',
                                    nzb,
                                    movie_year=media['info']['year'],
                                    preferred_quality=preferred_quality,
                                    single=True)
        if contains_other and isinstance(contains_other, dict):
            log.info2(
                'Wrong: %s, looking for %s, found %s',
                (nzb['name'], quality['label'], [x for x in contains_other]
                 if contains_other else 'no quality'))
            return False

        # Contains lower quality string
        if not fire_event('searcher.correct_3d',
                          nzb,
                          preferred_quality=preferred_quality,
                          single=True):
            log.info2(
                'Wrong: %s, %slooking for %s in 3D',
                (nzb['name'],
                 ('' if preferred_quality['custom'].get('3d') else 'NOT '),
                 quality['label']))
            return False

        # File to small
        if nzb['size'] and try_int(preferred_quality['size_min']) > try_int(
                nzb['size']):
            log.info2(
                'Wrong: "%s" is too small to be %s. %sMB instead of the minimal of %sMB.',
                (nzb['name'], preferred_quality['label'], nzb['size'],
                 preferred_quality['size_min']))
            return False

        # File to large
        if nzb['size'] and try_int(preferred_quality['size_max']) < try_int(
                nzb['size']):
            log.info2(
                'Wrong: "%s" is too large to be %s. %sMB instead of the maximum of %sMB.',
                (nzb['name'], preferred_quality['label'], nzb['size'],
                 preferred_quality['size_max']))
            return False

        # Provider specific functions
        get_more = nzb.get('get_more_info')
        if get_more:
            get_more(nzb)

        extra_check = nzb.get('extra_check')
        if extra_check and not extra_check(nzb):
            return False

        if imdb_results:
            return True

        # Check if nzb contains imdb link
        if get_imdb(nzb.get('description', '')) == get_identifier(media):
            return True

        for raw_title in media['info']['titles']:
            for movie_title in possible_titles(raw_title):
                movie_words = re.split('\W+', simplify_string(movie_title))

                if fire_event('searcher.correct_name',
                              nzb['name'],
                              movie_title,
                              single=True):
                    # if no IMDB link, at least check year range 1
                    if len(movie_words) > 2 and fire_event(
                            'searcher.correct_year',
                            nzb['name'],
                            media['info']['year'],
                            1,
                            single=True):
                        return True

                    # if no IMDB link, at least check year
                    if len(movie_words) <= 2 and fire_event(
                            'searcher.correct_year',
                            nzb['name'],
                            media['info']['year'],
                            0,
                            single=True):
                        return True

        log.info("Wrong: %s, undetermined naming. Looking for '%s (%s)'",
                 (nzb['name'], media_title, media['info']['year']))
        return False
Пример #13
0
    def getReleaseNameYear(self, release_name, file_name=None):

        release_name = release_name.strip(' .-_')

        # Use guessit first
        guess = {}
        if file_name:
            try:
                guessit = guess_movie_info(to_unicode(file_name))
                if guessit.get('title') and guessit.get('year'):
                    guess = {
                        'name': guessit.get('title'),
                        'year': guessit.get('year'),
                    }
            except:
                log.debug('Could not detect via guessit "%s": %s',
                          (file_name, traceback.format_exc()))

        # Backup to simple
        release_name = os.path.basename(release_name.replace('\\', '/'))
        cleaned = ' '.join(re.split('\W+', simplify_string(release_name)))
        cleaned = re.sub(self.clean, ' ', cleaned)

        year = None
        for year_str in [file_name, release_name, cleaned]:
            if not year_str: continue
            year = self.findYear(year_str)
            if year:
                break

        cp_guess = {}

        if year:  # Split name on year
            try:
                movie_name = cleaned.rsplit(year, 1).pop(0).strip()
                if movie_name:
                    cp_guess = {
                        'name': movie_name,
                        'year': int(year),
                    }
            except:
                pass

        if not cp_guess:  # Split name on multiple spaces
            try:
                movie_name = cleaned.split('  ').pop(0).strip()
                cp_guess = {
                    'name': movie_name,
                    'year': int(year) if movie_name[:4] != year else 0,
                }
            except:
                pass

        if cp_guess.get('year') == guess.get('year') and len(
                cp_guess.get('name', '')) > len(guess.get('name', '')):
            cp_guess['other'] = guess
            return cp_guess
        elif guess == {}:
            cp_guess['other'] = guess
            return cp_guess

        guess['other'] = cp_guess
        return guess
Пример #14
0
 def getUrl(self, url):
     return self.getCache(md5(simplify_string(url)), url=url)