Exemplo n.º 1
0
def get_aligned_frames_xml(tokenized, frame_instances, root):
    # read DRG
    tuples = get_drg(tokenized)
    drgparser = drg.DRGParser()
    d = drgparser.parse_tup_lines(tuples)

    for instance_id, frame_instance in frame_instances.iteritems():
        if len(frame_instance['roles']) > 0:
            if frame_instance['frame'] != "Unmapped":
                framebase_id = "{0}-{1}".format(frame_instance['frame'], mapping_net[frame_instance['synset']].split("#")[0].replace('-', '.'))
            else:
                log.info('No mapping found for synset {0}'.format(frame_instance['synset']))
                continue
            tag_frameinstance = objectify.SubElement(root, "frameinstance")
            tag_frameinstance.attrib['id']=instance_id
            tag_frameinstance.attrib['type']=framebase_id
            tag_frameinstance.attrib['internalvariable']=frame_instance['variable']

            for reificated_frame_var in d.reificated[frame_instance['variable']]:
                tag_framelexicalization = objectify.SubElement(tag_frameinstance, "framelexicalization")
                surface = []
                unboxer.generate_from_referent(d, reificated_frame_var, surface, complete=False)
                tag_framelexicalization[0] = ' '.join(surface)
                tag_instancelexicalization = objectify.SubElement(tag_frameinstance, "instancelexicalization")
                surface = []
                unboxer.generate_from_referent(d, reificated_frame_var, surface, complete=True)
                tag_instancelexicalization[0] = ' '.join(surface)
                tag_frameelements = objectify.SubElement(tag_frameinstance, "frameelements")
                for role, (variable, filler) in frame_instance['roles'].iteritems():
                    tag_frameelement = objectify.SubElement(tag_frameelements, "frameelement")
                    tag_frameelement.attrib['role'] = role
                    tag_frameelement.attrib['internalvariable'] = variable
                    tag_concept = objectify.SubElement(tag_frameelement, "concept")
                    tag_concept[0] = filler
                    try:
                        for reificated_role_var in d.reificated[variable]:
                            # composed lexicalization
                            surface = unboxer.generate_from_relation(d, reificated_frame_var, reificated_role_var)
                            if surface != None:
                                tag_rolelexicalization = objectify.SubElement(tag_frameelement, "rolelexicalization")
                                tag_rolelexicalization[0] = surface

                                # complete surface forms
                                surface = []
                                unboxer.generate_from_referent(d, reificated_role_var, surface, complete=True)
                                tag_conceptlexicalization = objectify.SubElement(tag_frameelement, "conceptlexicalization")
                                tag_conceptlexicalization[0] = ' '.join(surface)
                    except:
                        log.error("error with DRG reification: {0}".format(variable))

    objectify.deannotate(root, xsi_nil=True)
    etree.cleanup_namespaces(root)
    return etree.tostring(root, pretty_print=True)
def get_aligned_frames_xml(tokenized, frame_instances, root):
    # read DRG
    tuples = get_drg(tokenized)
    drgparser = drg.DRGParser()
    d = drgparser.parse_tup_lines(tuples)

    for instance_id, frame_instance in frame_instances.iteritems():
        if len(frame_instance['roles']) > 0:
            try:
                framebase_id = "{0}-{1}".format(frame_instance['frame'], offset2wn[frame_instance['synset']].split("#")[0].replace('-', '.'))
            except:
                log.info('No mapping found for synset {0}'.format(frame_instance['synset']))
                continue
            tag_frameinstance = objectify.SubElement(root, "frameinstance")
            tag_frameinstance.attrib['id']=instance_id
            tag_frameinstance.attrib['type']=framebase_id
            tag_frameinstance.attrib['internalvariable']=frame_instance['variable']

            for reificated_frame_var in d.reificated[frame_instance['variable']]:
                tag_framelexicalization = objectify.SubElement(tag_frameinstance, "framelexicalization")
                surface = []
                unboxer.generate_from_referent(d, reificated_frame_var, surface, complete=False)
                tag_framelexicalization[0] = ' '.join(surface)
                tag_instancelexicalization = objectify.SubElement(tag_frameinstance, "instancelexicalization")
                surface = []
                unboxer.generate_from_referent(d, reificated_frame_var, surface, complete=True)
                tag_instancelexicalization[0] = ' '.join(surface)
                tag_frameelements = objectify.SubElement(tag_frameinstance, "frameelements")
                for role, (variable, filler) in frame_instance['roles'].iteritems():
                    tag_frameelement = objectify.SubElement(tag_frameelements, "frameelement")
                    tag_frameelement.attrib['role'] = role
                    tag_frameelement.attrib['internalvariable'] = variable
                    tag_concept = objectify.SubElement(tag_frameelement, "concept")
                    tag_concept[0] = filler
                    try:
                        for reificated_role_var in d.reificated[variable]:
                            # composed lexicalization
                            surface = unboxer.generate_from_relation(d, reificated_frame_var, reificated_role_var)
                            if surface != None:
                                tag_rolelexicalization = objectify.SubElement(tag_frameelement, "rolelexicalization")
                                tag_rolelexicalization[0] = surface

                                # complete surface forms
                                surface = []
                                unboxer.generate_from_referent(d, reificated_role_var, surface, complete=True)
                                tag_conceptlexicalization = objectify.SubElement(tag_frameelement, "conceptlexicalization")
                                tag_conceptlexicalization[0] = ' '.join(surface)
                    except:
                        log.error("error with DRG reification: {0}".format(variable))

    objectify.deannotate(root, xsi_nil=True)
    etree.cleanup_namespaces(root)
    return etree.tostring(root, pretty_print=True)