Пример #1
0
 def process(self, lst):
     """Main processing method"""
     result = []
     for s in lst:  # pylint: disable=invalid-name
         translated = translate_string(s, keep_chars="'-").replace("'", ' ')
         result += [
             token
             for token in nltk.word_tokenize(translated, self.language)
             if token and len(token) > 1
         ]
     return result
Пример #2
0
def FileView(request):  # pylint: disable=invalid-name
    """Default file view"""
    context = request.context

    # set content type
    content_type = context.content_type
    if isinstance(content_type, bytes):
        content_type = content_type.decode('utf-8')

    # check for last modification date
    response = Response(content_type=content_type)
    zdc = IZopeDublinCore(context, None)
    if zdc is not None:
        modified = zdc.modified
        if modified is not None:
            if_modified_since = request.if_modified_since
            # pylint: disable=no-member
            if if_modified_since and \
                    (int(modified.timestamp()) <= int(if_modified_since.timestamp())):
                return Response(content_type=content_type, status=NOT_MODIFIED)
            response.last_modified = modified

    body_file = context.get_blob(mode='c')

    if request.params.get('dl') is not None:
        filename = context.filename or 'noname.txt'
        response.content_disposition = 'attachment; filename="{0}"'.format(
            translate_string(filename, force_lower=False))

    # check for range request
    if request.range is not None:
        try:
            body = body_file.read()
            body_length = len(body)
            range_start = request.range.start or 0
            if 'Firefox' in request.user_agent:  # avoid partial range for Firefox videos
                range_end = body_length
            else:
                range_end = request.range.end or min(
                    body_length, range_start + MAX_RANGE_LENGTH)
            ranged_body = body[range_start:range_end]
            response.status = PARTIAL_CONTENT
            response.headers[
                'Content-Range'] = 'bytes {first}-{last}/{len}'.format(
                    first=range_start,
                    last=range_start + len(ranged_body) - 1,
                    len=body_length)
            response.body = ranged_body
        finally:
            body_file.close()
    else:
        response.body_file = body_file

    return response
Пример #3
0
 def process(self, lst):
     """Main process method"""
     result = []
     for s in lst:  # pylint: disable=invalid-name
         translated = translate_string(s, keep_chars="'-").replace("'", ' ')
         tokens = nltk.word_tokenize(translated, self.language)
         result += [
             stem for stem in [
                 self.stemmer.stem(token) for token in tokens
                 if token not in self.stemmer.stopwords
             ] if stem and (len(stem) > 1) and (
                 stem not in self.stemmer.stopwords)
         ]
     return result
Пример #4
0
def generate_url(title, min_word_length=2):
    """Generate an SEO-friendly content URL from it's title

    The original title is translated to remove accents, converted to lowercase, and words
    shorter than three characters (by default) are removed; terms are joined by hyphens.

    :param title: the input text
    :param min_word_length: minimum length of words to keep

    >>> from pyams_utils.url import generate_url
    >>> generate_url('This is my test')
    'this-is-my-test'

    Single letters are removed from generated URLs:

    >>> generate_url('This word has a single a')
    'this-word-has-single'

    But you can define the minimum length of word:

    >>> generate_url('This word has a single a', min_word_length=4)
    'this-word-single'

    If input text contains slashes, they are replaced with hyphens:

    >>> generate_url('This string contains/slash')
    'this-string-contains-slash'

    Punctation and special characters are completely removed:

    >>> generate_url('This is a string with a point. And why not?')
    'this-is-string-with-point-and-why-not'
    """
    return '-'.join(
        filter(
            lambda x: len(x) >= min_word_length,
            translate_string(title.replace('/', '-'),
                             escape_slashes=False,
                             force_lower=True,
                             spaces='-',
                             remove_punctuation=True,
                             keep_chars='-').split('-')))