Exemplo n.º 1
0
    def __init__(self, language):
        super(DmrsRealizer, self).__init__(language)
        prepare_ace()
        prepare_grammar(language=language)
        directory = os.path.join(os.path.dirname(os.path.realpath(__file__)))
        self.ace_path = os.path.join(directory, 'resources', 'ace')
        self.erg_path = os.path.join(directory, 'languages', language + '.dat')

        self.successful_regex = re.compile(pattern=r'^NOTE: [0-9]+ passive, [0-9]+ active edges in final generation chart; built [0-9]+ passives total. \[1 results\]$')
        self.unsuccessful_regex = re.compile(pattern=r'^NOTE: [0-9]+ passive, [0-9]+ active edges in final generation chart; built [0-9]+ passives total. \[0 results\]$')
        self.final_regex = re.compile(pattern=r'^(NOTE: generated [0-9]+ / [0-9]+ sentences, avg [0-9]+k, time [0-9]+.[0-9]+s)|(NOTE: transfer did [0-9]+ successful unifies and [0-9]+ failed ones)$')

        if util.v2() and os.path.isfile(os.path.join(directory, 'languages', language + '_v2.json')):
            with open(os.path.join(directory, 'languages', language + '_v2.json'), 'r') as filehandle:
                language = json.load(fp=filehandle)
        else:
            with open(os.path.join(directory, 'languages', language + '.json'), 'r') as filehandle:
                language = json.load(fp=filehandle)

        if 'sortinfos' in language:
            sortinfo_classes = dict()
            sortinfo_shortforms = dict()
            for cvarsort, sortinfo in language['sortinfos'].items():
                assert 'features' in sortinfo
                sortinfo_class = create_sortinfo(cvarsort, tuple(sortinfo['features']))
                sortinfo_classes[cvarsort] = sortinfo_class
                if 'shortform' in sortinfo:
                    shortform = sortinfo['shortform']
                    assert all(feature in sortinfo_class.features for feature in shortform)
                    assert all(len(key) == 1 and key not in '_?' for feature, kvs in shortform.items() for key in kvs)
                    sortinfo_shortforms[cvarsort] = shortform
        else:
            sortinfo_classes = None
            sortinfo_shortforms = None

        self.attributes = dict()
        self.attribute_by_key = dict()
        self.relation_attribute = None
        if 'attributes' in language:
            for predtype, values in language['attributes'].items():
                predtype = parse_string(predtype)
                if predtype == 'relation':
                    self.relation_attribute = Dmrs.parse(values['dmrs'])
                    continue
                elif predtype not in self.attributes:
                    self.attributes[predtype] = dict()
                for value, attribute in values.items():
                    value = parse_string(value)
                    self.attributes[predtype][value] = Dmrs.parse(attribute['dmrs'])
                    assert attribute['key'] not in self.attribute_by_key
                    self.attribute_by_key[attribute['key']] = (predtype, value)

        self.entity_type = None
        if 'type' in language:
            self.entity_type = Dmrs.parse(language['type']['dmrs'])

        self.relations = dict()
        self.relation_by_key = dict()
        self.attribute_relation = None
        self.type_relation = None
        if 'relations' in language:
            for predtype, values in language['relations'].items():
                predtype = parse_string(predtype)
                if predtype == 'attribute':
                    self.attribute_relation = Dmrs.parse(values['dmrs'])
                    continue
                elif predtype == 'type':
                    self.type_relation = Dmrs.parse(values['dmrs'])
                    continue
                elif predtype not in self.relations:
                    self.relations[predtype] = dict()
                for value, relation in values.items():
                    value = parse_string(value)
                    self.relations[predtype][value] = Dmrs.parse(relation['dmrs'])
                    assert relation['key'] not in self.relation_by_key
                    self.relation_by_key[relation['key']] = (predtype, value)

        self.existential = None
        if 'existential' in language:
            self.existential = Dmrs.parse(language['existential']['dmrs'])

        self.quantifiers = dict()
        self.quantifier_by_key = dict()
        if 'quantifiers' in language:
            for qtype, qranges in language['quantifiers'].items():
                qtype = parse_string(qtype)
                if qtype not in self.quantifiers:
                    self.quantifiers[qtype] = dict()
                if qtype == 'composed':
                    for identifier, quantifier in qranges.items():
                        identifier = parse_string(identifier)
                        definition = tuple((str(qtype), str(qrange), quantity) for qtype, qrange, quantity in quantifier.pop('definition'))
                        self.quantifiers[qtype][identifier] = {definition: Dmrs.parse(quantifier['dmrs'])}
                        assert identifier not in self.quantifier_by_key
                        self.quantifier_by_key[identifier] = (qtype, identifier, definition)
                    continue
                for qrange, quantities in qranges.items():
                    qrange = parse_string(qrange)
                    if qrange not in self.quantifiers[qtype]:
                        self.quantifiers[qtype][qrange] = dict()
                    for quantity, quantifier in quantities.items():
                        quantity = parse_string(quantity)
                        self.quantifiers[qtype][qrange][quantity] = Dmrs.parse(quantifier['dmrs'])
                        assert quantifier['key'] not in self.quantifier_by_key
                        self.quantifier_by_key[quantifier['key']] = (qtype, qrange, quantity)

        self.number_bounds = dict()
        self.number_bound_by_key = dict()
        if 'number-bounds' in language:
            for bound, number_bound in language['number-bounds'].items():
                bound = parse_string(bound)
                self.number_bounds[bound] = Dmrs.parse(number_bound['dmrs'])
                assert number_bound['key'] not in self.number_bound_by_key
                self.number_bound_by_key[number_bound['key']] = (bound,)

        self.comparative_quantifiers = dict()
        self.comparative_quantifier_by_key = dict()
        if 'comparative-quantifiers' in language:
            for qtype, qranges in language['comparative-quantifiers'].items():
                qtype = parse_string(qtype)
                if qtype not in self.comparative_quantifiers:
                    self.comparative_quantifiers[qtype] = dict()
                if qtype == 'composed':
                    for identifier, comparative_quantifier in qranges.items():
                        identifier = parse_string(identifier)
                        definition = tuple((str(qtype), str(qrange), quantity) for qtype, qrange, quantity in comparative_quantifier.pop('definition'))
                        self.comparative_quantifiers[qtype][identifier] = {definition: Dmrs.parse(comparative_quantifier['dmrs'])}
                        assert identifier not in self.comparative_quantifier_by_key
                        self.comparative_quantifier_by_key[identifier] = (qtype, identifier, definition)
                    continue
                for qrange, quantities in qranges.items():
                    qrange = parse_string(qrange)
                    if qrange not in self.comparative_quantifiers[qtype]:
                        self.comparative_quantifiers[qtype][qrange] = dict()
                    for quantity, quantifier in quantities.items():
                        quantity = parse_string(quantity)
                        self.comparative_quantifiers[qtype][qrange][quantity] = Dmrs.parse(quantifier['dmrs'])
                        assert quantifier['key'] not in self.comparative_quantifier_by_key
                        self.comparative_quantifier_by_key[quantifier['key']] = (qtype, qrange, quantity)

        self.propositions = dict()
        self.proposition_by_key = dict()
        for connective, proposition in language['propositions'].items():
            connective = parse_string(connective)
            if isinstance(proposition['dmrs'], list):
                self.propositions[connective] = tuple(Dmrs.parse(dmrs) for dmrs in proposition['dmrs'])
            else:
                self.propositions[connective] = Dmrs.parse(proposition['dmrs'])
            assert proposition['key'] not in self.proposition_by_key
            self.proposition_by_key[proposition['key']] = connective

        self.hierarchy = language['hierarchy']

        self.post_processing = list()
        self.post_processing_by_key = dict()
        for n, paraphrase in enumerate(language['post-processing']):
            search = Dmrs.parse(paraphrase['search'])
            replace = Dmrs.parse(paraphrase['replace'])
            disable_hierarchy = paraphrase.get('disable_hierarchy', False)
            self.post_processing.append((search, replace, disable_hierarchy))
            assert paraphrase['key'] not in self.post_processing_by_key
            self.post_processing_by_key[paraphrase['key']] = n
Exemplo n.º 2
0
    def __init__(self, language):
        super(DmrsRealizer, self).__init__(language)
        prepare_grammar(language=language)
        directory = os.path.join(os.path.dirname(os.path.realpath(__file__)))
        self.ace_path = os.path.join(directory, 'resources', 'ace')
        self.erg_path = os.path.join(directory, 'languages', language + '.dat')
        self.regex = re.compile(pattern=r'^(NOTE: [0-9]+ passive, [0-9]+ active edges in final generation chart; built [0-9]+ passives total. \[1 results\])|(NOTE: generated [0-9]+ / [0-9]+ sentences, avg [0-9]+k, time [0-9]+.[0-9]+s)|(NOTE: transfer did [0-9]+ successful unifies and [0-9]+ failed ones)$')

        with open(os.path.join(directory, 'languages', language + '.json'), 'r') as filehandle:
            language = json.load(fp=filehandle)

        if 'sortinfos' in language:
            sortinfo_classes = dict()
            sortinfo_shortforms = dict()
            for cvarsort, sortinfo in language['sortinfos'].items():
                assert 'features' in sortinfo
                sortinfo_class = create_sortinfo(cvarsort, tuple(sortinfo['features']))
                sortinfo_classes[cvarsort] = sortinfo_class
                if 'shortform' in sortinfo:
                    shortform = sortinfo['shortform']
                    assert all(feature in sortinfo_class.features for feature in shortform)
                    assert all(len(key) == 1 and key not in '_?' for feature, kvs in shortform.items() for key in kvs)
                    sortinfo_shortforms[cvarsort] = shortform
        else:
            sortinfo_classes = None
            sortinfo_shortforms = None

        self.attributes = {attrtype: dict() for attrtype in language['attributes']}
        self.attribute_by_name = dict()
        for attrtype, values in language['attributes'].items():
            if attrtype == 'empty' or attrtype == 'relation':
                values['dmrs'] = Dmrs.parse(values['dmrs'])
                self.attributes[attrtype] = values
                continue
            for value, attribute in values.items():
                value = parse_string(value)
                attribute['dmrs'] = Dmrs.parse(attribute['dmrs'])
                self.attributes[attrtype][value] = attribute
                self.attribute_by_name[attribute['key']] = (attrtype, value)

        self.relations = {reltype: dict() for reltype in language['relations']}
        self.relation_by_name = dict()
        for reltype, values in language['relations'].items():
            if reltype == 'attribute' or reltype == 'type':
                values['dmrs'] = Dmrs.parse(values['dmrs'])
                self.relations[reltype] = values
                continue
            for value, relation in values.items():
                value = parse_string(value)
                relation['dmrs'] = Dmrs.parse(relation['dmrs'])
                self.relations[reltype][value] = relation
                self.relation_by_name[relation['key']] = (reltype, value)

        self.quantifiers = {qtype: {qrange: dict() for qrange in qranges} for qtype, qranges in language['quantifiers'].items()}
        self.quantifier_by_name = dict()
        for qtype, qranges in language['quantifiers'].items():
            if qtype == 'existential':
                qranges['dmrs'] = Dmrs.parse(qranges['dmrs'])
                self.quantifiers[qtype] = qranges
                continue
            for qrange, quantities in qranges.items():
                qrange = parse_string(qrange)
                for quantity, quantifier in quantities.items():
                    quantity = parse_string(quantity)
                    quantifier['dmrs'] = Dmrs.parse(quantifier['dmrs'])
                    self.quantifiers[qtype][qrange][quantity] = quantifier
                    self.quantifier_by_name[quantifier['key']] = (qtype, qrange, quantity)

        self.propositions = dict()
        self.proposition_by_name = dict()
        for connective, proposition in language['propositions'].items():
            if connective in ('attribute', 'type', 'relation', 'existential', 'quantifier'):
                proposition['dmrs'] = Dmrs.parse(proposition['dmrs'])
                self.propositions[connective] = proposition
                continue
            if isinstance(proposition['dmrs'], list):
                proposition['dmrs'] = tuple(Dmrs.parse(dmrs) for dmrs in proposition['dmrs'])
            else:
                proposition['dmrs'] = Dmrs.parse(proposition['dmrs'])
            self.propositions[connective] = proposition
            self.proposition_by_name[proposition['key']] = connective

        self.hierarchy = language['hierarchy']

        self.post_processing = dict()
        for key, paraphrase in language['post-processing'].items():
            search = Dmrs.parse(paraphrase['search'])
            replace = Dmrs.parse(paraphrase['replace'])
            self.post_processing[key] = (search, replace)