Python Histogram.inc примеры использования

Язык программирования: Python

Пространство имен/Пакет: rolodexer.histogram

Класс/Тип: Histogram

Метод/Функция: inc

Примеров на hotexamples.com: 3

Python Histogram.inc - 3 примера найдено. Это лучшие примеры Python кода для rolodexer.histogram.Histogram.inc, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

inc(3)

iterkeys(1)

max(1)

min(1)

val(1)

Основные методы

inc (3)

iterkeys (1)

max (1)

min (1)

val (1)

Пример #1

Показать файл

Файл: test_rolodexer.py Проект: fish2000/rolodexer

 def test_file_read(self):
     from os.path import join, dirname
     from rolodexer.histogram import Histogram
     entries = []
     errors  = []
     colors  = Histogram()
     inpth = join(dirname(dirname(__file__)), 'data', 'data.in')
     with open(inpth, 'rb') as fh:
         idx = 0
         while True:
             linen = fh.readline()
             if not linen:
                 break
             line = linen.strip()
             tokens = rolodexer.tokenize(line)
             try:
                 terms = rolodexer.classify(tokens)
             except rolodexer.RolodexerError:
                 errors.append(idx)
             else:
                 entries.append(terms)
                 colors.inc(terms.get('color', 'CLEAR'))
             idx += 1
         output_dict = { u"entries": entries, u"errors": errors }
         output_json = json.dumps(output_dict, indent=2, sort_keys=True)
         print(output_json)
         print(colors)
         # all classified lines have colors:
         self.assertEquals(colors.min(), 3)
         self.assertEquals(colors.max(), 10)
         self.assertEquals(colors.val('CLEAR'), 0)

Пример #2

Показать файл

Файл: cli.py Проект: fish2000/rolodexer

def cli(argv=None):
    if not argv:
        argv = sys.argv
    
    arguments = docopt(__doc__, argv=argv[1:],
                                help=True,
                                version='0.1.3')
    
    # print(argv)
    # print(arguments)
    # sys.exit()
    
    entries = []
    errors  = []
    colors  = Histogram()
    
    ipth = arguments.get('INFILE')
    opth = arguments.get('--output')
    verbose = bool(arguments.get('--verbose'))
    
    with open(ipth, 'rb') as fh:
        idx = 0
        while True:
            linen = fh.readline()
            if not linen:
                break
            line = linen.strip()
            tokens = rolodexer.tokenize(line)
            try:
                terms = rolodexer.classify(tokens)
            except rolodexer.RolodexerError:
                errors.append(idx)
            else:
                entries.append(terms)
                if 'color' in terms:
                    colors.inc(terms.get('color'))
            idx += 1
        
        output_dict = { u"entries": entries, u"errors": errors }
        
        if verbose:
            print("Entries parsed: %s" % len(entries), file=sys.stderr)
            print("Errors encountered: %s" % len(errors), file=sys.stderr)
            print_colors(colors)
        
        if opth == 'stdout':
            output_json = json.dumps(output_dict, **JSON_ARGS)
            print(output_json, file=sys.stdout)
        elif not exists(opth) and isdir(dirname(opth)):
            if verbose:
                print("rolodexer: saving output to %s" % opth, file=sys.stderr)
            with open(opth, 'wb') as fp:
                json.dump(output_dict, fp, **JSON_ARGS)

Пример #3

Показать файл

Файл: __init__.py Проект: fish2000/rolodexer

def classify(orig_terms):
    out = dict()
    terms = copy(orig_terms)
    
    # first, sanity-check the digified terms --
    # if more than one can pass for a phone number, a color,
    # or a zip code (that is to say, the input is ambiguous),
    # we bail:
    for term in terms:
        # check each term against all test funcs --
        # if more than one bucket is nonzero, it's a problem
        h = Histogram()
        if is_zip(term):
            h.inc('zip')
        if is_phone(term):
            h.inc('phone')
        if is_color(term):
            h.inc('color')
        if len(h) > 1:
            # ERROR: couldn't distinguish one thing
            # from another... BAIL
            raise RDAmbiguousTerms("Term '%s' parsed ambiguously\n"
                                   "Passed multiple tests: %s" % (
                                       term, SEP_WS.join(h.iterkeys())
                                   ))
    
    # next, recurse and grab the phone number and color
    # ... they are the easiest to find:
    for idx, term in enumerate(copy(terms)):
        # tref = terms[idx] # I do miss C++ sometimes
        if is_phone(term):
            out.update({ u'phonenumber': u"%s" % phone_format(term) })
            terms.remove(term)
            continue
        elif is_color(term):
            out.update({ u'color': u"%s" % term })
            terms.remove(term)
            continue
        elif is_zip(term):
            out.update({ u'zipcode': u"%s" % term })
            terms.remove(term)
            continue
    
    if not out.has_key(u'phonenumber'):
        # ERROR: NO PHONE / BAD PHONE!
        raise RDPhoneNumberError("No valid phone number in %d-term list\n"
                                 "Reconstructed original line:\n"
                                 "\t%s" % (len(terms), reconstruct(orig_terms)))
    
    if not out.has_key(u'zipcode'):
        # ERROR: NO ZIPCODE / BAD ZIPCODE!
        raise RDZipCodeError("No valid zip code in %d-term list\n"
                             "Reconstructed original line:\n"
                             "\t%s" % (len(terms), reconstruct(orig_terms)))
    
    if not out.has_key(u'color'):
        # LESS DISCONCERTING ERROR: NO COLOR / BAD COLOR!
        pass
    
    # what is left "should" be the pieces of the name,
    # e.g. ['Washington', 'Booker T.'], ['James Murphy'], &c
    if len(terms) > 2:
        # ERROR: wtf is going on
        pass
    elif len(terms) == 2:
        out.update({ 
            u'firstname':    u"%s" % terms[-1],
            u'lastname':     u"%s" % terms[0]
        })
    elif len(terms) == 1:
        names = terms[0].split()
        if len(names) > 1:
            out.update({
                u'firstname':    u"%s" % names[0],
                u'lastname':     u"%s" % names[-1]
            })
        else:
            # ERROR: only one name -- `raise MadonnaError()` ?
            # ... use it as the *last* name for now, maybe
            # ... naw, f that: ERROR.
            raise RDAmbiguousNames("Only one name present: '%s'\n"
                                   "Reconstructed original line:\n"
                                   "\t%s" % (names.pop(), reconstruct(orig_terms)))
        
    else:
        # WHY ARE WE HERE. No names... really??
        raise RDAmbiguousNames("No names present!"
                               "Reconstructed original line:\n"
                               "\t%s" % reconstruct(orig_terms))
        
    # pprint(out, indent=4)
    return out