Пример #1
0
    def __init__(self):
        conj_f = 'panoptes/ling/verb/conjugations.csv'
        verb_f = 'data/verbs.json'
        verb_mgr = VerbManager.from_files(conj_f, verb_f)

        syllable_counter = SyllableCounter.default()
        comparative_mgr = ComparativeManager.default(syllable_counter)
        det_pronoun_mgr = DetPronounManager()
        pro_adverb_mgr = ProAdverbManager()
        time_of_day_mgr = TimeOfDayManager()

        inflection_mgr = InflectionManager()
        personal_mgr = PersonalManager(inflection_mgr)
        plural_mgr = PluralManager.default()

        self.say_state = SayState(comparative_mgr, det_pronoun_mgr,
                                  inflection_mgr, personal_mgr, plural_mgr,
                                  pro_adverb_mgr, time_of_day_mgr, verb_mgr)

        # The SayContext is needed for conjugation.  None of its fields affect
        # conjugation for any object.
        arbitrary_say_context = SayContext(prep=None,
                                           has_left=False,
                                           has_right=False,
                                           is_possessive=False)
        purpose_mgr = PurposeManager()
        relation_mgr = RelationManager()
        self.transform_state = \
            TransformState(arbitrary_say_context, purpose_mgr, relation_mgr)

        # Text -> surface structure -> deep structure.
        self.text_to_parse = TextToParse()
        self.parse_to_surface = ParseToSurface(comparative_mgr,
                                               det_pronoun_mgr, personal_mgr,
                                               plural_mgr, pro_adverb_mgr,
                                               self.say_state, time_of_day_mgr,
                                               verb_mgr)
        self.surface_to_deep = SurfaceToDeep(purpose_mgr, relation_mgr)

        self.joiner = Joiner()
Пример #2
0
    def __init__(self):
        conj_f = 'panoptes/ling/verb/conjugations.csv'
        verb_f = 'data/verbs.json'
        verb_mgr = VerbManager.from_files(conj_f, verb_f)

        syllable_counter = SyllableCounter.default()
        comparative_mgr = ComparativeManager.default(syllable_counter)
        det_pronoun_mgr = DetPronounManager()
        pro_adverb_mgr = ProAdverbManager()
        time_of_day_mgr = TimeOfDayManager()

        inflection_mgr = InflectionManager()
        personal_mgr = PersonalManager(inflection_mgr)
        plural_mgr = PluralManager.default()

        self.say_state = SayState(
            comparative_mgr, det_pronoun_mgr, inflection_mgr, personal_mgr,
            plural_mgr, pro_adverb_mgr, time_of_day_mgr, verb_mgr)

        # The SayContext is needed for conjugation.  None of its fields affect
        # conjugation for any object.
        arbitrary_say_context = SayContext(
            prep=None, has_left=False, has_right=False, is_possessive=False)
        purpose_mgr = PurposeManager()
        relation_mgr = RelationManager()
        self.transform_state = \
            TransformState(arbitrary_say_context, purpose_mgr, relation_mgr)

        # Text -> surface structure -> deep structure.
        self.text_to_parse = TextToParse()
        self.parse_to_surface = ParseToSurface(
            comparative_mgr, det_pronoun_mgr, personal_mgr, plural_mgr,
            pro_adverb_mgr, self.say_state, time_of_day_mgr, verb_mgr)
        self.surface_to_deep = SurfaceToDeep(purpose_mgr, relation_mgr)

        self.joiner = Joiner()
Пример #3
0
class English(object):
    def __init__(self):
        conj_f = 'panoptes/ling/verb/conjugations.csv'
        verb_f = 'data/verbs.json'
        verb_mgr = VerbManager.from_files(conj_f, verb_f)

        syllable_counter = SyllableCounter.default()
        comparative_mgr = ComparativeManager.default(syllable_counter)
        det_pronoun_mgr = DetPronounManager()
        pro_adverb_mgr = ProAdverbManager()
        time_of_day_mgr = TimeOfDayManager()

        inflection_mgr = InflectionManager()
        personal_mgr = PersonalManager(inflection_mgr)
        plural_mgr = PluralManager.default()

        self.say_state = SayState(
            comparative_mgr, det_pronoun_mgr, inflection_mgr, personal_mgr,
            plural_mgr, pro_adverb_mgr, time_of_day_mgr, verb_mgr)

        # The SayContext is needed for conjugation.  None of its fields affect
        # conjugation for any object.
        arbitrary_say_context = SayContext(
            prep=None, has_left=False, has_right=False, is_possessive=False)
        purpose_mgr = PurposeManager()
        relation_mgr = RelationManager()
        self.transform_state = \
            TransformState(arbitrary_say_context, purpose_mgr, relation_mgr)

        # Text -> surface structure -> deep structure.
        self.text_to_parse = TextToParse()
        self.parse_to_surface = ParseToSurface(
            comparative_mgr, det_pronoun_mgr, personal_mgr, plural_mgr,
            pro_adverb_mgr, self.say_state, time_of_day_mgr, verb_mgr)
        self.surface_to_deep = SurfaceToDeep(purpose_mgr, relation_mgr)

        self.joiner = Joiner()

    def recognize(self, text, verbose=False):
        """
        text -> Recognition
        """
        parses = []
        for parse in self.text_to_parse.parse(text):
            parses.append(parse)

        if verbose:
            print '-- %d parses' % len(parses)
            for parse in parses:
                print '-- PARSE'
                parse.dump()

        keys = set()
        keys_ssens = []
        for parse in parses:
            for ssen in self.parse_to_surface.recog(parse):
                key = json.dumps(ssen.dump(), indent=4, sort_keys=True)
                if key in keys:
                    continue
                keys_ssens.append((key, ssen))
        keys_ssens.sort()

        if verbose:
            print '-- %d ssens' % len(keys_ssens)
            for i, (key, ssen) in enumerate(keys_ssens):
                print '-- SSEN %d' % i
                print key

        keys = set()
        keys_dsens = []
        for _, ssen in keys_ssens:
            for dsen in self.surface_to_deep.recog(ssen):
                key = json.dumps(dsen.dump(), indent=4, sort_keys=True)
                if key in keys:
                    continue
                keys_dsens.append((key, dsen))
        keys_dsens.sort()

        if verbose:
            print '-- %d dsens' % len(keys_dsens)
            for i, (key, dsen) in enumerate(keys_dsens):
                print '-- DSEN %d' % i
                print key

        ssens = map(lambda (k, s): s, keys_ssens)
        dsens = map(lambda (k, d): d, keys_dsens)
        return Recognition(parses, ssens, dsens)

    def say(self, dsen, idiolect):
        """
        DeepSentence, Idiolect -> text
        """
        ssen = dsen.to_surface(self.transform_state, self.say_state, idiolect)
        tokens = ssen.say(self.say_state, idiolect)
        text = self.joiner.join(tokens, idiolect.contractions)
        return text
Пример #4
0
class English(object):
    def __init__(self):
        conj_f = 'panoptes/ling/verb/conjugations.csv'
        verb_f = 'data/verbs.json'
        verb_mgr = VerbManager.from_files(conj_f, verb_f)

        syllable_counter = SyllableCounter.default()
        comparative_mgr = ComparativeManager.default(syllable_counter)
        det_pronoun_mgr = DetPronounManager()
        pro_adverb_mgr = ProAdverbManager()
        time_of_day_mgr = TimeOfDayManager()

        inflection_mgr = InflectionManager()
        personal_mgr = PersonalManager(inflection_mgr)
        plural_mgr = PluralManager.default()

        self.say_state = SayState(comparative_mgr, det_pronoun_mgr,
                                  inflection_mgr, personal_mgr, plural_mgr,
                                  pro_adverb_mgr, time_of_day_mgr, verb_mgr)

        # The SayContext is needed for conjugation.  None of its fields affect
        # conjugation for any object.
        arbitrary_say_context = SayContext(prep=None,
                                           has_left=False,
                                           has_right=False,
                                           is_possessive=False)
        purpose_mgr = PurposeManager()
        relation_mgr = RelationManager()
        self.transform_state = \
            TransformState(arbitrary_say_context, purpose_mgr, relation_mgr)

        # Text -> surface structure -> deep structure.
        self.text_to_parse = TextToParse()
        self.parse_to_surface = ParseToSurface(comparative_mgr,
                                               det_pronoun_mgr, personal_mgr,
                                               plural_mgr, pro_adverb_mgr,
                                               self.say_state, time_of_day_mgr,
                                               verb_mgr)
        self.surface_to_deep = SurfaceToDeep(purpose_mgr, relation_mgr)

        self.joiner = Joiner()

    def recognize(self, text, verbose=False):
        """
        text -> Recognition
        """
        parses = []
        for parse in self.text_to_parse.parse(text):
            parses.append(parse)

        if verbose:
            print '-- %d parses' % len(parses)
            for parse in parses:
                print '-- PARSE'
                parse.dump()

        keys = set()
        keys_ssens = []
        for parse in parses:
            for ssen in self.parse_to_surface.recog(parse):
                key = json.dumps(ssen.dump(), indent=4, sort_keys=True)
                if key in keys:
                    continue
                keys_ssens.append((key, ssen))
        keys_ssens.sort()

        if verbose:
            print '-- %d ssens' % len(keys_ssens)
            for i, (key, ssen) in enumerate(keys_ssens):
                print '-- SSEN %d' % i
                print key

        keys = set()
        keys_dsens = []
        for _, ssen in keys_ssens:
            for dsen in self.surface_to_deep.recog(ssen):
                key = json.dumps(dsen.dump(), indent=4, sort_keys=True)
                if key in keys:
                    continue
                keys_dsens.append((key, dsen))
        keys_dsens.sort()

        if verbose:
            print '-- %d dsens' % len(keys_dsens)
            for i, (key, dsen) in enumerate(keys_dsens):
                print '-- DSEN %d' % i
                print key

        ssens = map(lambda (k, s): s, keys_ssens)
        dsens = map(lambda (k, d): d, keys_dsens)
        return Recognition(parses, ssens, dsens)

    def say(self, dsen, idiolect):
        """
        DeepSentence, Idiolect -> text
        """
        ssen = dsen.to_surface(self.transform_state, self.say_state, idiolect)
        tokens = ssen.say(self.say_state, idiolect)
        text = self.joiner.join(tokens, idiolect.contractions)
        return text