示例#1
0
    def _lookup_words(self, words, lang, max_per_language):
        count_success = count_failure = 0
        for word in words:
            if lang in ('en-us','en-gb'):
                # wordnet
                definition = _get_word_definition(word, language=lang)
            else:
                definition = None

            if not definition:
                definition = _get_word_definition_scrape(word, language=lang)
                
            if definition:
                if len(definition) > 250:
                    definition = definition[:250-3]+'...'
                try:
                    add_word_definition(word, definition, language=lang)
                except Word.DoesNotExist:
                    # for example, "centre" in en-us doesn't exist
                    if lang in ('en-us','en-gb'):
                        pass
                    else:
                        raise
                print "FOUND!", repr(word)
                count_success += 1
            else:
                # '' is different from null. It tells us not to try again
                try:
                    add_word_definition(word, '', language=lang)
                except Word.DoesNotExist:
                    # for example, "centre" in en-us doesn't exist
                    if lang in ('en-us','en-gb'):
                        pass
                    else:
                        raise
                count_failure += 1
                print "Failed :(", repr(word)

            if (count_success + count_failure) >= max_per_language:
                break
            
        return count_success, count_failure
        
示例#2
0
    def handle_noargs(self, **options):

        from search.views import ALL_LANGUAGE_OPTIONS, _get_word_definition, \
          _get_word_definition_scrape, add_word_definition

        words = unicode(raw_input("Word(s): ").strip(), 'utf8')
        words = [x.strip() for x in re.split(',?\s*|\s', words) if x.strip()]

        langs = []
        for option in ALL_LANGUAGE_OPTIONS:
            answer = raw_input('\t%s [y/N] ' % option['label'].encode('latin1'))
            if answer.lower() in ('y','yes'):
                langs.append(option['code'].lower())


        # insert them finally!
        from search.models import Word

        definition = ''
        for word in words:
            length = len(word)
            print repr(word), length

            for lang in langs:
                word_object, created = Word.objects.get_or_create(word=word, language=lang, length=length)
                if created:
                    print "Already had", repr(word)
                cache_key = '_find_alternatives_%s_%s' % (word, lang)
                cache.delete(cache_key)

                if not definition:
                    try:
                        definition = _get_word_definition(word, language=lang)
                    except AttributeError:
                        # sometimes you get a weird AttributeError in nltk
                        pass

                    if not definition:
                        _get_word_definition_scrape(word, language=lang)

                if definition:
                    add_word_definition(word, definition, language=lang)