def __unicode__(self): c = len(self.pizza_set.all()) return u"%s pizza%s to %s @ %s" % ( num2eng.num2eng(c), "s" if c > 1 else "", # Plural self.client, self.create_date.strftime("%H:%M"), )
def normalize_post(all_facts): all_normalized_facts = {} for number, facts in all_facts.items(): if facts: all_normalized_facts[number] = [] for fact in facts: try: text = fact['text'] # ignore element if it contains : or \n as these indicate element is complex if text.find(':') >= 0 or text.find('\n') >= 0: continue #if len(text) < 20 or len(text) > 150: # continue # capitalize beginning of sentence text = capitalize_head(text) # add ending period if necessary if text[-1] != '.': text = text + '.' # see if the number itself appears in the fact # TODO: should use regexp for this matching to handle word boundary text_lc = text.lower() word_number_lc = num2eng.num2eng(int(number)).lower() if text_lc.find(number) >= 0 or (word_number_lc and text_lc.find(word_number_lc) >= 0): fact['self'] = True else: fact['self'] = False fact['manual'] = False fact['text'] = text all_normalized_facts[number].append(fact) except: print 'Error post-parsing [{0}: {1}]'.format(number, fact['text']) traceback.print_exc(file=sys.stdout) return all_normalized_facts
def normalize_number(all_facts): all_normalized_facts = {} for number, facts in all_facts.items(): all_normalized_facts[number] = [] for fact in facts: try: text = fact['text'] # normalize sentence start # TODO: only keep if it does not contain number further in the sentence # TODO: handle "In ..., # is ..." word_number = num2eng.num2eng(int(number)) word_number = capitalize_head(word_number) text = capitalize_head(text) words_tags = get_words_tags(text) word_number_len = len(get_words_tags(word_number)) if words_tags[0][1] =='DET': offset = 0 elif word_number and text.startswith(word_number) and words_tags[word_number_len][1] =='V': offset = word_number_len + 1 elif (text.startswith(number) or words_tags[0][1] == 'PRO') and words_tags[1][1] == 'V': offset = 2 else: continue regexp = r'^' for i in xrange(0, offset): regexp += r'.*?\s' text = re.sub(regexp, '', text) fact['pos'] = words_tags[offset][1] fact['text'] = capitalize_head(text) all_normalized_facts[number].append(fact) except: print 'Error parsing number [{0}: {1}]'.format(number, fact['text']) #traceback.print_exc(file=sys.stdout) return all_normalized_facts
__author__ = 'flaviocaetano' from datetime import datetime from num2eng import num2eng if __name__ == '__main__': t0 = datetime.now() result = 0 for i in range(1000): word = num2eng(i+1) print word result += len(word.replace(' ', '')) t1 = datetime.now() - t0 print 'result: %s (%ss)' % (result, t1)