def process(self, lst): """Main processing method""" result = [] for s in lst: # pylint: disable=invalid-name translated = translate_string(s, keep_chars="'-").replace("'", ' ') result += [ token for token in nltk.word_tokenize(translated, self.language) if token and len(token) > 1 ] return result
def FileView(request): # pylint: disable=invalid-name """Default file view""" context = request.context # set content type content_type = context.content_type if isinstance(content_type, bytes): content_type = content_type.decode('utf-8') # check for last modification date response = Response(content_type=content_type) zdc = IZopeDublinCore(context, None) if zdc is not None: modified = zdc.modified if modified is not None: if_modified_since = request.if_modified_since # pylint: disable=no-member if if_modified_since and \ (int(modified.timestamp()) <= int(if_modified_since.timestamp())): return Response(content_type=content_type, status=NOT_MODIFIED) response.last_modified = modified body_file = context.get_blob(mode='c') if request.params.get('dl') is not None: filename = context.filename or 'noname.txt' response.content_disposition = 'attachment; filename="{0}"'.format( translate_string(filename, force_lower=False)) # check for range request if request.range is not None: try: body = body_file.read() body_length = len(body) range_start = request.range.start or 0 if 'Firefox' in request.user_agent: # avoid partial range for Firefox videos range_end = body_length else: range_end = request.range.end or min( body_length, range_start + MAX_RANGE_LENGTH) ranged_body = body[range_start:range_end] response.status = PARTIAL_CONTENT response.headers[ 'Content-Range'] = 'bytes {first}-{last}/{len}'.format( first=range_start, last=range_start + len(ranged_body) - 1, len=body_length) response.body = ranged_body finally: body_file.close() else: response.body_file = body_file return response
def process(self, lst): """Main process method""" result = [] for s in lst: # pylint: disable=invalid-name translated = translate_string(s, keep_chars="'-").replace("'", ' ') tokens = nltk.word_tokenize(translated, self.language) result += [ stem for stem in [ self.stemmer.stem(token) for token in tokens if token not in self.stemmer.stopwords ] if stem and (len(stem) > 1) and ( stem not in self.stemmer.stopwords) ] return result
def generate_url(title, min_word_length=2): """Generate an SEO-friendly content URL from it's title The original title is translated to remove accents, converted to lowercase, and words shorter than three characters (by default) are removed; terms are joined by hyphens. :param title: the input text :param min_word_length: minimum length of words to keep >>> from pyams_utils.url import generate_url >>> generate_url('This is my test') 'this-is-my-test' Single letters are removed from generated URLs: >>> generate_url('This word has a single a') 'this-word-has-single' But you can define the minimum length of word: >>> generate_url('This word has a single a', min_word_length=4) 'this-word-single' If input text contains slashes, they are replaced with hyphens: >>> generate_url('This string contains/slash') 'this-string-contains-slash' Punctation and special characters are completely removed: >>> generate_url('This is a string with a point. And why not?') 'this-is-string-with-point-and-why-not' """ return '-'.join( filter( lambda x: len(x) >= min_word_length, translate_string(title.replace('/', '-'), escape_slashes=False, force_lower=True, spaces='-', remove_punctuation=True, keep_chars='-').split('-')))