Пример #1
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        '''
        one noun MUST be in им
        there are problems with nouns in multiple number: рога
        '''
        main_noun = None
        main_properties = None

        phrase = []

        for word in src.split(' '):
            if word:
                try:
                    class_, properties = get_gram_info(morph,
                                                       efication(word.upper()),
                                                       tech_vocabulary)
                except NoGrammarFound:
                    return cls(normalized=src)

                if class_ == u'С':
                    if u'им' == properties.case and (u'ед' == properties.number
                                                     or properties.gender
                                                     == u'мн'):
                        main_noun = word
                        main_properties = properties
                        phrase.append((class_, efication(word).upper(), False))
                    else:
                        phrase.append((class_, efication(word).upper(), True))
                else:
                    phrase.append((class_, efication(word).upper(), False))

        if not main_noun:
            # return cls(normalized=src)
            raise NormalFormNeeded('no main noun found in phrase "%s"' % src)

        forms = []

        for number in PROPERTIES.NUMBERS:

            additional_properties = []
            # if number == u'ед':
            #     additional_properties = [properties.gender]

            for case in PROPERTIES.CASES:
                phrase_form = []

                for class_, word, constant in phrase:
                    if constant:
                        phrase_form.append(word.lower())
                    else:
                        phrase_form.append(
                            morph.inflect_ru(
                                word, u','.join([case, number] +
                                                additional_properties),
                                class_).lower())
                forms.append(' '.join(phrase_form))

        return cls(normalized=src,
                   forms=forms,
                   properties=[main_properties.gender])
Пример #2
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        '''
        one noun MUST be in им
        there are problems with nouns in multiple number: рога
        '''
        main_noun = None
        main_properties = None

        phrase = []

        for word in src.split(' '):
            if word:
                try:
                    class_, properties = get_gram_info(morph, efication(word.upper()), tech_vocabulary)
                except NoGrammarFound:
                    return cls(normalized=src)

                if class_ == u'С':
                    if u'им' == properties.case and (u'ед' == properties.number or properties.gender == u'мн'):
                        main_noun = word
                        main_properties = properties
                        phrase.append((class_, efication(word).upper(), False))
                    else:
                        phrase.append((class_, efication(word).upper(), True))
                else:
                    phrase.append((class_, efication(word).upper(), False))

        if not main_noun:
            # return cls(normalized=src)
            raise NormalFormNeeded('no main noun found in phrase "%s"' % src)

        forms = []

        for number in PROPERTIES.NUMBERS:

            additional_properties = []
            # if number == u'ед':
            #     additional_properties = [properties.gender]

            for case in PROPERTIES.CASES:
                phrase_form = []

                for class_, word, constant in phrase:
                    if constant:
                        phrase_form.append(word.lower())
                    else:
                        phrase_form.append(morph.inflect_ru(word, u','.join([case, number]+additional_properties), class_ ).lower())
                forms.append( ' '.join(phrase_form))

        return cls(normalized=src, forms=forms, properties=[main_properties.gender])
Пример #3
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized,
                                               tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' %
                                   (src, properties))

        forms = []

        for time in (u'прш', u'нст'):
            for gender in PROPERTIES.GENDERS:
                forms.append(
                    morph.inflect_ru(normalized, u'%s,%s,ед' % (time, gender),
                                     class_).lower())

            forms.append(
                morph.inflect_ru(normalized, u'%s,мн' % (time, ),
                                 class_).lower())

        return cls(normalized=src, forms=forms, properties=[])
Пример #4
0
    def _preprocess_externals(self, dictionary, externals):
        processed_externals = {}

        for external_id, external in externals.items():
            additional_args = ()
            if isinstance(external, tuple):
                normalized, additional_args = external
                additional_args = additional_args.split(u',') if isinstance(
                    additional_args, basestring) else additional_args
            else:
                normalized = external

            if isinstance(normalized, numbers.Number):
                word = Numeral(normalized)
                arguments = Args()
            elif isinstance(normalized, WordBase):
                word = normalized
                arguments = Args(*word.properties)
            else:
                word = dictionary.get_word(efication(normalized))
                arguments = Args(*word.properties)

            arguments.update(*additional_args)

            processed_externals[external_id] = (word, arguments)

        return processed_externals
Пример #5
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized,
                                               tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' %
                                   (src, properties))

        base = morph.inflect_ru(normalized, u'ед,мр', u'Г')

        forms = [
            morph.inflect_ru(base, u'прш,мр,ед').lower(),
            morph.inflect_ru(base, u'прш,жр,ед').lower(),
            morph.inflect_ru(base, u'прш,ср,ед').lower(),
            morph.inflect_ru(base, u'прш,мн').lower(),
            morph.inflect_ru(base, u'нст,1л,ед').lower(),
            morph.inflect_ru(base, u'нст,1л,мн').lower(),
            morph.inflect_ru(base, u'нст,2л,ед').lower(),
            morph.inflect_ru(base, u'нст,2л,мн').lower(),
            morph.inflect_ru(base, u'нст,3л,ед').lower(),
            morph.inflect_ru(base, u'нст,3л,мн').lower(),
            morph.inflect_ru(base, u'буд,1л,ед').lower(),
            morph.inflect_ru(base, u'буд,1л,мн').lower(),
            morph.inflect_ru(base, u'буд,2л,ед').lower(),
            morph.inflect_ru(base, u'буд,2л,мн').lower(),
            morph.inflect_ru(base, u'буд,3л,ед').lower(),
            morph.inflect_ru(base, u'буд,3л,мн').lower()
        ]

        return cls(normalized=src, forms=forms, properties=[])
Пример #6
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized,
                                               tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'им' != properties.case or u'ед' != properties.number:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' %
                                   (src, properties))

        forms = []

        # single
        for gender in PROPERTIES.GENDERS:
            for case in PROPERTIES.CASES:
                forms.append(
                    morph.inflect_ru(normalized, u'%s,%s,ед' % (case, gender),
                                     class_).lower())

        #multiple
        for case in PROPERTIES.CASES:
            forms.append(
                morph.inflect_ru(normalized, u'%s,%s' % (case, u'мн'),
                                 class_).lower())

        return cls(normalized=src, forms=forms, properties=[])
Пример #7
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized, tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties))

        base = morph.inflect_ru(normalized, u'ед,мр', u'Г')

        forms = [morph.inflect_ru(base, u'прш,мр,ед').lower(),
                 morph.inflect_ru(base, u'прш,жр,ед').lower(),
                 morph.inflect_ru(base, u'прш,ср,ед').lower(),
                 morph.inflect_ru(base, u'прш,мн').lower(),
                 morph.inflect_ru(base, u'нст,1л,ед').lower(),
                 morph.inflect_ru(base, u'нст,1л,мн').lower(),
                 morph.inflect_ru(base, u'нст,2л,ед').lower(),
                 morph.inflect_ru(base, u'нст,2л,мн').lower(),
                 morph.inflect_ru(base, u'нст,3л,ед').lower(),
                 morph.inflect_ru(base, u'нст,3л,мн').lower(),
                 morph.inflect_ru(base, u'буд,1л,ед').lower(),
                 morph.inflect_ru(base, u'буд,1л,мн').lower(),
                 morph.inflect_ru(base, u'буд,2л,ед').lower(),
                 morph.inflect_ru(base, u'буд,2л,мн').lower(),
                 morph.inflect_ru(base, u'буд,3л,ед').lower(),
                 morph.inflect_ru(base, u'буд,3л,мн').lower()]

        return cls(normalized=src, forms=forms, properties=[])
Пример #8
0
    def _preprocess_externals(self, dictionary, externals):
        processed_externals = {}

        for external_id, external in externals.items():
            additional_args = ()
            if isinstance(external, tuple):
                normalized, additional_args = external
                additional_args = additional_args.split(u',') if isinstance(additional_args, basestring) else additional_args
            else:
                normalized = external

            if isinstance(normalized, numbers.Number):
                word = Numeral(normalized)
                arguments = Args()
            elif isinstance(normalized, WordBase):
                word = normalized
                arguments = Args(*word.properties)
            else:
                word = dictionary.get_word(efication(normalized))
                arguments = Args(*word.properties)

            arguments.update(*additional_args)

            processed_externals[external_id] = (word, arguments)

        return processed_externals
Пример #9
0
    def create_from_string(morph, string, tech_vocabulary={}):
        normalized = efication(string.upper())

        if ' ' in string:
            return WORD_CONSTRUCTORS[WORD_TYPE.NOUN_GROUP].create_from_baseword(morph, string, tech_vocabulary)

        class_, properties = get_gram_info(morph, normalized, tech_vocabulary)

        if class_ == u'С':
            return WORD_CONSTRUCTORS[WORD_TYPE.NOUN].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'П':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'КР_ПРИЛ':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'Г':
            return WORD_CONSTRUCTORS[WORD_TYPE.VERB].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'ПРИЧАСТИЕ':
            return WORD_CONSTRUCTORS[WORD_TYPE.PARTICIPLE].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'КР_ПРИЧАСТИЕ':
            return WORD_CONSTRUCTORS[WORD_TYPE.SHORT_PARTICIPLE].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'МС':
            return WORD_CONSTRUCTORS[WORD_TYPE.PRONOUN].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'МС-П':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary)
        else:
            raise TextgenException(u'unknown word type: %s of word: %s' % (class_, string) )
Пример #10
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized, tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'им' != properties.case or (u'ед' != properties.number and properties.gender in (u'мр', u'ср', u'жр')):
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties))

        forms = []

        for number in PROPERTIES.NUMBERS:
            for case in PROPERTIES.CASES:
                forms.append(morph.inflect_ru(normalized, u'%s,%s' % (case, number), class_ ).lower() )

        return cls(normalized=src, forms=forms, properties=[properties.gender])
Пример #11
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized, tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties))

        forms = []

        for time in (u'прш', u'нст'):
            for gender in PROPERTIES.GENDERS:
                forms.append(morph.inflect_ru(normalized, u'%s,%s,ед' % (time, gender), class_).lower() )

            forms.append(morph.inflect_ru(normalized, u'%s,мн' % (time, ), class_).lower() )

        return cls(normalized=src, forms=forms, properties=[])
Пример #12
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())

        # pymorphy do not change gender of PRONOUNS, and we always need some words, so we hardcode them

        if normalized == u'ОН':
            return cls(normalized=src,
                       forms=(u'он', u'его', u'ему', u'его', u'им', u'нем',
                              u'она', u'ее', u'ей', u'ее', u'ей', u'ней',
                              u'оно', u'его', u'ему', u'его', u'им', u'нём',
                              u'они', u'их', u'им', u'их',  u'ими', u'них'),
                       properties=[])

        if normalized == u'Я':
            return cls(normalized=src,
                       forms=(u'я', u'меня', u'мне', u'меня', u'мной', u'мне',
                              u'я', u'меня', u'мне', u'меня', u'мной', u'мне',
                              u'я', u'меня', u'мне', u'меня', u'мной', u'мне',
                              u'я', u'меня', u'мне', u'меня', u'мной', u'мне'),
                       properties=[])

        return Adjective.create_from_baseword(morph, src, tech_vocabulary)
Пример #13
0
    def create_from_string(morph, string, tech_vocabulary={}):
        normalized = efication(string.upper())

        if ' ' in string:
            return WORD_CONSTRUCTORS[
                WORD_TYPE.NOUN_GROUP].create_from_baseword(
                    morph, string, tech_vocabulary)

        class_, properties = get_gram_info(morph, normalized, tech_vocabulary)

        if class_ == u'С':
            return WORD_CONSTRUCTORS[WORD_TYPE.NOUN].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'П':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'КР_ПРИЛ':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'Г':
            return WORD_CONSTRUCTORS[WORD_TYPE.VERB].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'ПРИЧАСТИЕ':
            return WORD_CONSTRUCTORS[
                WORD_TYPE.PARTICIPLE].create_from_baseword(
                    morph, string, tech_vocabulary)
        elif class_ == u'КР_ПРИЧАСТИЕ':
            return WORD_CONSTRUCTORS[
                WORD_TYPE.SHORT_PARTICIPLE].create_from_baseword(
                    morph, string, tech_vocabulary)
        elif class_ == u'МС':
            return WORD_CONSTRUCTORS[WORD_TYPE.PRONOUN].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'МС-П':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(
                morph, string, tech_vocabulary)
        else:
            raise TextgenException(u'unknown word type: %s of word: %s' %
                                   (class_, string))
Пример #14
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized, tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'им' != properties.case or u'ед' != properties.number:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties))

        forms = []

        # single
        for gender in PROPERTIES.GENDERS:
            for case in PROPERTIES.CASES:
                forms.append(morph.inflect_ru(normalized, u'%s,%s,ед' % (case, gender), class_).lower() )

        #multiple
        for case in PROPERTIES.CASES:
            forms.append(morph.inflect_ru(normalized, u'%s,%s' % (case, u'мн'), class_).lower() )

        return cls(normalized=src, forms=forms, properties=[])
Пример #15
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())

        # pymorphy do not change gender of PRONOUNS, and we always need some words, so we hardcode them

        if normalized == u'ОН':
            return cls(normalized=src,
                       forms=(u'он', u'его', u'ему', u'его', u'им', u'нем',
                              u'она', u'ее', u'ей', u'ее', u'ей', u'ней',
                              u'оно', u'его', u'ему', u'его', u'им', u'нём',
                              u'они', u'их', u'им', u'их', u'ими', u'них'),
                       properties=[])

        if normalized == u'Я':
            return cls(normalized=src,
                       forms=(u'я', u'меня', u'мне', u'меня', u'мной', u'мне',
                              u'я', u'меня', u'мне', u'меня', u'мной', u'мне',
                              u'я', u'меня', u'мне', u'меня', u'мной', u'мне',
                              u'я', u'меня', u'мне', u'меня', u'мной', u'мне'),
                       properties=[])

        return Adjective.create_from_baseword(morph, src, tech_vocabulary)
Пример #16
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized,
                                               tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'им' != properties.case or (u'ед' != properties.number
                                        and properties.gender
                                        in (u'мр', u'ср', u'жр')):
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' %
                                   (src, properties))

        forms = []

        for number in PROPERTIES.NUMBERS:
            for case in PROPERTIES.CASES:
                forms.append(
                    morph.inflect_ru(normalized, u'%s,%s' % (case, number),
                                     class_).lower())

        return cls(normalized=src, forms=forms, properties=[properties.gender])
Пример #17
0
 def get_word(self, normalized):
     normalized = efication(normalized)
     if normalized in self.data:
         return self.data[normalized]
     return Fake(u'<word not found: %s>' % normalized)
Пример #18
0
 def get_word(self, normalized):
     normalized = efication(normalized)
     if normalized in self.data:
         return self.data[normalized]
     return Fake(u'<word not found: %s>' % normalized)
Пример #19
0
 def add_word(self, word, overwrite=False):
     if not overwrite and efication(word.normalized) in self.data:
         # TODO: add test
         return
     self.data[efication(word.normalized)] = word
Пример #20
0
 def add_word(self, word, overwrite=False):
     if not overwrite and efication(word.normalized) in self.data:
         # TODO: add test
         return
     self.data[efication(word.normalized)] = word