def process_django_models(app, what, name, obj, options, lines): """Append params from fields to model documentation.""" from django.utils.html import strip_tags from django.db import models spelling_white_list = ['', '.. spelling::'] if inspect.isclass(obj) and issubclass(obj, models.Model): for field in obj._meta.fields: help_text = strip_tags(ugettext(field.help_text)) verbose_name = ugettext(field.verbose_name) if help_text: lines.append(':param %s: "%s" - %s' % (field.attname, verbose_name, help_text)) else: lines.append(':param %s: "%s"' % (field.attname, verbose_name)) if enchant is not None: from enchant.tokenize import basic_tokenize words = verbose_name.replace('-', '.').replace('_', '.').split('.') words = [s for s in words if s != ''] for word in words: spelling_white_list += [" %s" % ''.join(i for i in word if not i.isdigit())] spelling_white_list += [" %s" % w[0] for w in basic_tokenize(word)] field_type = type(field) module = field_type.__module__ if 'django.db.models' in module: # scope with django.db.models * imports module = 'django.db.models' lines.append(':type %s: %s.%s' % (field.attname, module, field_type.__name__)) if enchant is not None: lines += spelling_white_list return lines
def test_tokenize_strip(): """Test special-char-stripping edge-cases in basic_tokenize.""" input = "((' <this> \"\" 'text' has (lots) of (special chars} >>]" output = [ ("<this>", 4), ("text", 15), ("has", 21), ("lots", 26), ("of", 32), ("special", 36), ("chars}", 44), (">>", 51), ] assert output == [i for i in basic_tokenize(input)] for (itmO, itmV) in zip(output, basic_tokenize(input)): assert itmO, itmV
def test_basic_tokenize(): """Simple regression test for basic white-space tokenization.""" input = """This is a paragraph. It's not very special, but it's designed 2 show how the splitter works with many-different combos of words. Also need to "test" the (handling) of 'quoted' words.""" output = [ ("This", 0), ("is", 5), ("a", 8), ("paragraph", 10), ("It's", 22), ("not", 27), ("very", 31), ("special", 36), ("but", 45), ("it's", 49), ("designed", 54), ("2", 63), ("show", 65), ("how", 70), ("the", 74), ("splitter", 78), ("works", 87), ("with", 93), ("many-different", 98), ("combos", 113), ("of", 120), ("words", 123), ("Also", 130), ("need", 135), ("to", 140), ("test", 144), ("the", 150), ("handling", 155), ("of", 165), ("quoted", 169), ("words", 177), ] assert output == [i for i in basic_tokenize(input)] for (itmO, itmV) in zip(output, basic_tokenize(input)): assert itmO == itmV
def process_text(self, text): """ accepts: [String] text input returns: [List] list of lower-case tokens with URLs filtered out """ try: del self.result[:] to_check = [] for (word,pos) in basic_tokenize(text): if '@' not in word and 'RT' not in word: to_check.append(word) tknzr = get_tokenizer("en_US",filters=[URLFilter]) return [word for (word,pos) in tknzr(' '.join(to_check))] except UnicodeEncodeError: pass
def process_modules(app, what, name, obj, options, lines): """Add module names to spelling white list.""" if what != 'module': return lines from enchant.tokenize import basic_tokenize spelling_white_list = ['', '.. spelling::'] words = name.replace('-', '.').replace('_', '.').split('.') words = [s for s in words if s != ''] for word in words: spelling_white_list += [" %s" % ''.join(i for i in word if not i.isdigit())] spelling_white_list += [" %s" % w[0] for w in basic_tokenize(word)] lines += spelling_white_list return lines
def process_django_models(app, what, name, obj, options, lines): """Append params from fields to model documentation.""" # print("DEBUG: process_django_models() called: obj={0}".format(obj)) from django.utils.html import strip_tags from django.db import models spelling_white_list = ['', '.. spelling::'] activate('en-gb') if inspect.isclass(obj) and issubclass(obj, models.Model): for field in obj._meta.fields: help_text = strip_tags(ugettext(field.help_text)) verbose_name = ugettext(field.verbose_name) choices = field.choices # print("DEBUG: process_django_models() dir(field) = {0}".format(dir(field))) # print("DEBUG: process_django_models() field: {0} ; choices = {1}".format(field, choices)) if help_text: lines.append(':param {0}: "{1}" - {2}'.format( field.attname, verbose_name, help_text)) else: lines.append(':param {0}: "{1}"'.format( field.attname, verbose_name)) if enchant is not None: from enchant.tokenize import basic_tokenize words = verbose_name.replace('-', '.').replace('_', '.').split('.') words = [s for s in words if s != ''] for word in words: spelling_white_list += [ " %s" % ''.join(i for i in word if not i.isdigit()) ] spelling_white_list += [ " %s" % w[0] for w in basic_tokenize(word) ] field_type = type(field) module = field_type.__module__ if 'django.db.models' in module: # scope with django.db.models * imports module = 'django.db.models' lines.append(':type %s: %s.%s' % (field.attname, module, field_type.__name__)) if enchant is not None: lines += spelling_white_list return lines
def process_modules(app, what, name, obj, options, lines): """Add module names to spelling white list.""" if what != 'module': return lines from enchant.tokenize import basic_tokenize spelling_white_list = ['', '.. spelling::'] words = name.replace('-', '.').replace('_', '.').split('.') words = [s for s in words if s != ''] for word in words: spelling_white_list += [ " %s" % ''.join(i for i in word if not i.isdigit()) ] spelling_white_list += [" %s" % w[0] for w in basic_tokenize(word)] lines += spelling_white_list return lines
def process_django_models(app, what, name, obj, options, lines): """Append params from fields to model documentation.""" from django.utils.encoding import force_text from django.utils.html import strip_tags from django.db import models spelling_white_list = ['', '.. spelling::'] if inspect.isclass(obj) and issubclass(obj, models.Model): for field in obj._meta.fields: help_text = strip_tags(force_text(field.help_text)) verbose_name = force_text(field.verbose_name).capitalize() if help_text: lines.append(':param %s: %s - %s' % (field.attname, verbose_name, help_text)) else: lines.append(':param %s: %s' % (field.attname, verbose_name)) if enchant is not None: from enchant.tokenize import basic_tokenize words = verbose_name.replace('-', '.').replace('_', '.').split('.') words = [s for s in words if s != ''] for word in words: spelling_white_list += [ " %s" % ''.join(i for i in word if not i.isdigit()) ] spelling_white_list += [ " %s" % w[0] for w in basic_tokenize(word) ] field_type = type(field) module = field_type.__module__ if 'django.db.models' in module: # scope with django.db.models * imports module = 'django.db.models' lines.append(':type %s: %s.%s' % (field.attname, module, field_type.__name__)) if enchant is not None: lines += spelling_white_list return lines