Python simplifyString示例，whatpotato.core.helpers.encoding.simplifyString Python示例

示例#1

0

显示文件

文件： main.py 项目： michaelsexton/WhatPotato

    def correctWords(self, rel_name, media):
        media_title = fireEvent('searcher.get_search_title', media, single = True)
        media_words = re.split('\W+', simplifyString(media_title))

        rel_name = simplifyString(rel_name)
        rel_words = re.split('\W+', rel_name)

        required_words, contains_required = self.containsWords(rel_name, rel_words, 'required', media)
        if len(required_words) > 0 and not contains_required:
            log.info2('Wrong: Required word missing: %s', rel_name)
            return False

        ignored_words, contains_ignored = self.containsWords(rel_name, rel_words, 'ignored', media)
        if len(ignored_words) > 0 and contains_ignored:
            log.info2("Wrong: '%s' contains 'ignored words'", rel_name)
            return False

        # Ignore p**n stuff
        pron_tags = ['xxx', 'sex', 'anal', 't**s', 'f**k', 'p**n', 'orgy', 'milf', 'boobs', 'erotica', 'erotic', 'c**k', 'dick']
        pron_words = list(set(rel_words) & set(pron_tags) - set(media_words))
        if pron_words:
            log.info('Wrong: %s, probably pr0n', rel_name)
            return False

        return True

示例#2

0

显示文件

文件： scores.py 项目： michaelsexton/WhatPotato

def namePositionScore(nzb_name, movie_name):
    score = 0

    nzb_words = re.split('\W+', simplifyString(nzb_name))
    qualities = fireEvent('quality.all', single = True)

    try:
        nzb_name = re.search(r'([\'"])[^\1]*\1', nzb_name).group(0)
    except:
        pass

    name_year = fireEvent('scanner.name_year', nzb_name, single = True)

    # Give points for movies beginning with the correct name
    split_by = simplifyString(movie_name)
    name_split = []
    if len(split_by) > 0:
        name_split = simplifyString(nzb_name).split(split_by)
        if name_split[0].strip() == '':
            score += 10

    # If year is second in line, give more points
    if len(name_split) > 1 and name_year:
        after_name = name_split[1].strip()
        if tryInt(after_name[:4]) == name_year.get('year', None):
            score += 10
            after_name = after_name[4:]

        # Give -point to crap between year and quality
        found_quality = None
        for quality in qualities:
            # Main in words
            if quality['identifier'] in nzb_words:
                found_quality = quality['identifier']

            # Alt in words
            for alt in quality['alternative']:
                if alt in nzb_words:
                    found_quality = alt
                    break

        if not found_quality:
            return score - 20

        allowed = []
        for value in name_scores:
            name, sc = value.split(':')
            allowed.append(name)

        inbetween = re.split('\W+', after_name.split(found_quality)[0].strip())

        score -= (10 * len(set(inbetween) - set(allowed)))

    return score

示例#3

0

显示文件

文件： variable.py 项目： michaelsexton/WhatPotato

def possibleTitles(raw_title):

    titles = [
        toSafeString(raw_title).lower(),
        raw_title.lower(),
        simplifyString(raw_title)
    ]

    # replace some chars
    new_title = raw_title.replace('&', 'and')
    titles.append(simplifyString(new_title))

    return removeDuplicate(titles)

示例#4

0

显示文件

文件： scores.py 项目： michaelsexton/WhatPotato

def duplicateScore(nzb_name, movie_name):

    try:
        nzb_words = re.split('\W+', simplifyString(nzb_name))
        movie_words = re.split('\W+', simplifyString(movie_name))

        # minus for duplicates
        duplicates = [x for i, x in enumerate(nzb_words) if nzb_words[i:].count(x) > 1]

        return len(list(set(duplicates) - set(movie_words))) * -4
    except:
        log.error('Failed doing duplicateScore: %s', traceback.format_exc())

    return 0

示例#5

0

显示文件

文件： main.py 项目： michaelsexton/WhatPotato

    def correctName(self, check_name, movie_name):

        check_names = [check_name]

        # Match names between "
        try: check_names.append(re.search(r'([\'"])[^\1]*\1', check_name).group(0))
        except: pass

        # Match longest name between []
        try: check_names.append(max(re.findall(r'[^[]*\[([^]]*)\]', check_name), key = len).strip())
        except: pass

        for check_name in removeDuplicate(check_names):
            check_movie = fireEvent('scanner.name_year', check_name, single = True)

            try:
                check_words = removeEmpty(re.split('\W+', check_movie.get('name', '')))
                movie_words = removeEmpty(re.split('\W+', simplifyString(movie_name)))

                if len(check_words) > 0 and len(movie_words) > 0 and len(list(set(check_words) - set(movie_words))) == 0:
                    return True
            except:
                pass

        return False

示例#6

0

显示文件

文件： scores.py 项目： michaelsexton/WhatPotato

def nameScore(name, year, preferred_words):
    """ Calculate score for words in the NZB name """

    try:
        score = 0
        name = name.lower()

        # give points for the cool stuff
        for value in name_scores:
            v = value.split(':')
            add = int(v.pop())
            if v.pop() in name:
                score += add

        # points if the year is correct
        if str(year) in name:
            score += 5

        # Contains preferred word
        nzb_words = re.split('\W+', simplifyString(name))
        score += 100 * len(list(set(nzb_words) & set(preferred_words)))

        return score
    except:
        log.error('Failed doing nameScore: %s', traceback.format_exc())

    return 0

示例#7

0

显示文件

文件： scanner.py 项目： michaelsexton/WhatPotato

    def createStringIdentifier(self, file_path, folder = '', exclude_filename = False):

        identifier = file_path.replace(folder, '').lstrip(os.path.sep) # root folder
        identifier = os.path.splitext(identifier)[0] # ext

        # Exclude file name path if needed (f.e. for DVD files)
        if exclude_filename:
            identifier = identifier[:len(identifier) - len(os.path.split(identifier)[-1])]

        # Make sure the identifier is lower case as all regex is with lower case tags
        identifier = identifier.lower()

        try:
            path_split = splitString(identifier, os.path.sep)
            identifier = path_split[-2] if len(path_split) > 1 and len(path_split[-2]) > len(path_split[-1]) else path_split[-1] # Only get filename
        except: pass

        # multipart
        identifier = self.removeMultipart(identifier)

        # remove cptag
        identifier = self.removeCPTag(identifier)

        # simplify the string
        identifier = simplifyString(identifier)

        year = self.findYear(file_path)

        # groups, release tags, scenename cleaner
        identifier = re.sub(self.clean, '::', identifier).strip(':')

        # Year
        if year and identifier[:4] != year:
            split_by = ':::' if ':::' in identifier else year
            identifier = '%s %s' % (identifier.split(split_by)[0].strip(), year)
        else:
            identifier = identifier.split('::')[0]

        # Remove duplicates
        out = []
        for word in identifier.split():
            if not word in out:
                out.append(word)

        identifier = ' '.join(out)

        return simplifyString(identifier)

示例#8

0

显示文件

文件： index.py 项目： michaelsexton/WhatPotato

    def first(self, title):
        title = toUnicode(title)
        title = simplifyString(title)

        for prefix in ['the ', 'an ', 'a ']:
            if prefix == title[:len(prefix)]:
                title = title[len(prefix):]
                break

        return str(title[0] if title and len(title) > 0 and title[0] in ascii_letters else '#').lower()

示例#9

0

显示文件

文件： scores.py 项目： michaelsexton/WhatPotato

def nameRatioScore(nzb_name, movie_name):
    try:
        nzb_words = re.split('\W+', fireEvent('scanner.create_file_identifier', nzb_name, single = True))
        movie_words = re.split('\W+', simplifyString(movie_name))

        left_over = set(nzb_words) - set(movie_words)
        return 10 - len(left_over)
    except:
        log.error('Failed doing nameRatioScore: %s', traceback.format_exc())

    return 0

示例#10

0

显示文件

文件： base.py 项目： michaelsexton/WhatPotato

    def simplifyValue(self, value):
        if not value:
            return value

        if isinstance(value, basestring):
            return simplifyString(value)

        if isinstance(value, list):
            return [self.simplifyValue(x) for x in value]

        raise ValueError("Unsupported value type")

示例#11

0

显示文件

文件： index.py 项目： michaelsexton/WhatPotato

    def simplify(self, title):

        title = toUnicode(title)

        nr_prefix = '' if title and len(title) > 0 and title[0] in ascii_letters else '#'
        title = simplifyString(title)

        for prefix in ['the ', 'an ', 'a ']:
            if prefix == title[:len(prefix)]:
                title = title[len(prefix):]
                break

        return str(nr_prefix + title).ljust(32, ' ')[:32]

示例#12

0

显示文件

文件： index.py 项目： michaelsexton/WhatPotato

    def make_key_value(self, data):

        if data.get('_t') == 'media' and len(data.get('title', '')) > 0:

            out = set()
            title = str(simplifyString(data.get('title').lower()))
            l = self.__l
            title_split = title.split()

            for x in range(len(title_split)):
                combo = ' '.join(title_split[x:])[:32].strip()
                out.add(combo.rjust(32, '_'))
                combo_range = max(l, min(len(combo), 32))

                for cx in range(1, combo_range):
                    ccombo = combo[:-cx].strip()
                    if len(ccombo) > l:
                        out.add(ccombo.rjust(32, '_'))

            return out, None

示例#13

0

显示文件

文件： variable.py 项目： michaelsexton/WhatPotato

def getImdb(txt, check_inside = False, multiple = False):

    if not check_inside:
        txt = simplifyString(txt)
    else:
        txt = ss(txt)

    if check_inside and os.path.isfile(txt):
        output = open(txt, 'r')
        txt = output.read()
        output.close()

    try:
        ids = re.findall('(tt\d{4,7})', txt)

        if multiple:
            return removeDuplicate(['tt%07d' % tryInt(x[2:]) for x in ids]) if len(ids) > 0 else []

        return 'tt%07d' % tryInt(ids[0][2:])
    except IndexError:
        pass

    return False

示例#14

0

显示文件

文件： base.py 项目： michaelsexton/WhatPotato

 def getUrl(self, url):
     return self.getCache(md5(simplifyString(url)), url = url)

示例#15

0

显示文件

文件： scanner.py 项目： michaelsexton/WhatPotato

    def getReleaseNameYear(self, release_name, file_name = None):

        release_name = release_name.strip(' .-_')

        # Use guessit first
        guess = {}
        if file_name:
            try:
                guessit = guess_movie_info(toUnicode(file_name))
                if guessit.get('title') and guessit.get('year'):
                    guess = {
                        'name': guessit.get('title'),
                        'year': guessit.get('year'),
                    }
            except:
                log.debug('Could not detect via guessit "%s": %s', (file_name, traceback.format_exc()))

        # Backup to simple
        release_name = os.path.basename(release_name.replace('\\', '/'))
        cleaned = ' '.join(re.split('\W+', simplifyString(release_name)))
        cleaned = re.sub(self.clean, ' ', cleaned)

        year = None
        for year_str in [file_name, release_name, cleaned]:
            if not year_str: continue
            year = self.findYear(year_str)
            if year:
                break

        cp_guess = {}

        if year:  # Split name on year
            try:
                movie_name = cleaned.rsplit(year, 1).pop(0).strip()
                if movie_name:
                    cp_guess = {
                        'name': movie_name,
                        'year': int(year),
                    }
            except:
                pass

        if not cp_guess:  # Split name on multiple spaces
            try:
                movie_name = cleaned.split('  ').pop(0).strip()
                cp_guess = {
                    'name': movie_name,
                    'year': int(year) if movie_name[:4] != year else 0,
                }
            except:
                pass

        if cp_guess.get('year') == guess.get('year') and len(cp_guess.get('name', '')) > len(guess.get('name', '')):
            cp_guess['other'] = guess
            return cp_guess
        elif guess == {}:
            cp_guess['other'] = guess
            return cp_guess

        guess['other'] = cp_guess
        return guess

示例#16

0

显示文件

文件： searcher.py 项目： michaelsexton/WhatPotato

    def correctRelease(self, nzb = None, media = None, quality = None, **kwargs):

        if media.get('type') != 'movie': return

        media_title = fireEvent('searcher.get_search_title', media, single = True)

        imdb_results = kwargs.get('imdb_results', False)
        retention = Env.setting('retention', section = 'nzb')

        if nzb.get('seeders') is None and 0 < retention < nzb.get('age', 0):
            log.info2('Wrong: Outside retention, age is %s, needs %s or lower: %s', (nzb['age'], retention, nzb['name']))
            return False

        # Check for required and ignored words
        if not fireEvent('searcher.correct_words', nzb['name'], media, single = True):
            return False

        preferred_quality = quality if quality else fireEvent('quality.single', identifier = quality['identifier'], single = True)

        # Contains lower quality string
        contains_other = fireEvent('searcher.contains_other_quality', nzb, movie_year = media['info']['year'], preferred_quality = preferred_quality, single = True)
        if contains_other and isinstance(contains_other, dict):
            log.info2('Wrong: %s, looking for %s, found %s', (nzb['name'], quality['label'], [x for x in contains_other] if contains_other else 'no quality'))
            return False

        # Contains lower quality string
        if not fireEvent('searcher.correct_3d', nzb, preferred_quality = preferred_quality, single = True):
            log.info2('Wrong: %s, %slooking for %s in 3D', (nzb['name'], ('' if preferred_quality['custom'].get('3d') else 'NOT '), quality['label']))
            return False

        # File to small
        if nzb['size'] and tryInt(preferred_quality['size_min']) > tryInt(nzb['size']):
            log.info2('Wrong: "%s" is too small to be %s. %sMB instead of the minimal of %sMB.', (nzb['name'], preferred_quality['label'], nzb['size'], preferred_quality['size_min']))
            return False

        # File to large
        if nzb['size'] and tryInt(preferred_quality['size_max']) < tryInt(nzb['size']):
            log.info2('Wrong: "%s" is too large to be %s. %sMB instead of the maximum of %sMB.', (nzb['name'], preferred_quality['label'], nzb['size'], preferred_quality['size_max']))
            return False

        # Provider specific functions
        get_more = nzb.get('get_more_info')
        if get_more:
            get_more(nzb)

        extra_check = nzb.get('extra_check')
        if extra_check and not extra_check(nzb):
            return False


        if imdb_results:
            return True

        # Check if nzb contains imdb link
        if getImdb(nzb.get('description', '')) == getIdentifier(media):
            return True

        for raw_title in media['info']['titles']:
            for movie_title in possibleTitles(raw_title):
                movie_words = re.split('\W+', simplifyString(movie_title))

                if fireEvent('searcher.correct_name', nzb['name'], movie_title, single = True):
                    # if no IMDB link, at least check year range 1
                    if len(movie_words) > 2 and fireEvent('searcher.correct_year', nzb['name'], media['info']['year'], 1, single = True):
                        return True

                    # if no IMDB link, at least check year
                    if len(movie_words) <= 2 and fireEvent('searcher.correct_year', nzb['name'], media['info']['year'], 0, single = True):
                        return True

        log.info("Wrong: %s, undetermined naming. Looking for '%s (%s)'", (nzb['name'], media_title, media['info']['year']))
        return False