Exemplo n.º 1
0
 def test_GPred_from_string(self):
     """
     GPred.from_string should instantiate GPreds
     It requires a string without a leading underscore
     """
     # No intermediate underscores
     pron_rel = GPred.from_string('pron_rel')
     pron = GPred.from_string('pron')
     self.assertEqual(GPred('pron'), pron_rel)
     self.assertEqual(GPred('pron'), pron)
     self.assertIsInstance(pron_rel, GPred)
     self.assertIsInstance(pron, GPred)
     # Intermediate underscores
     udef_q_rel = GPred.from_string('udef_q_rel')
     udef_q = GPred.from_string('udef_q')
     self.assertEqual(GPred('udef_q'), udef_q_rel)
     self.assertEqual(GPred('udef_q'), udef_q)
     self.assertIsInstance(udef_q_rel, GPred)
     self.assertIsInstance(udef_q, GPred)
     # Leading underscore or not a string
     with self.assertRaises(ValueError):
         GPred.from_string("_the_q_rel")
     with self.assertRaises(TypeError):
         GPred.from_string(1)
Exemplo n.º 2
0
 def test_GPred_from_string(self):
     """
     GPred.from_string should instantiate GPreds
     It requires a string without a leading underscore
     """
     # No intermediate underscores
     pron_rel = GPred.from_string('pron_rel')
     pron = GPred.from_string('pron')
     self.assertEqual(GPred('pron'), pron_rel)
     self.assertEqual(GPred('pron'), pron)
     self.assertIsInstance(pron_rel, GPred)
     self.assertIsInstance(pron, GPred)
     # Intermediate underscores
     udef_q_rel = GPred.from_string('udef_q_rel')
     udef_q = GPred.from_string('udef_q')
     self.assertEqual(GPred('udef_q'), udef_q_rel)
     self.assertEqual(GPred('udef_q'), udef_q)
     self.assertIsInstance(udef_q_rel, GPred)
     self.assertIsInstance(udef_q, GPred)
     # Leading underscore or not a string
     with self.assertRaises(ValueError):
         GPred.from_string("_the_q_rel")
     with self.assertRaises(TypeError):
         GPred.from_string(1)
Exemplo n.º 3
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='DMRS simplification tool')
    parser.add_argument('-c', '--config', default=None,
                        help='Path to simplifaction configuration file. By default, configuration in __config__/default_simplification.conf is used.')
    parser.add_argument('input_dmrs', help='Specify input DMRS file')
    parser.add_argument('output_dmrs', help='Specify output dmrs file.')
    args = parser.parse_args()
    if args.config is not None:  # Load the given file
        config = load_config(args.config, default=False)
    else:
        config = load_config(DEFAULT_CONFIG_FILE)
else:
    config = load_config(DEFAULT_CONFIG_FILE)

DEFAULT_FILTER = frozenset(GPred.from_string(x) for x in get_config_option(config, 'General Predicate Filtering', 'filter', opt_type=list))
DEFAULT_ALLOW_DISC = get_config_option(config, 'General Predicate Filtering', 'allow_disconnected_dmrs') 

def gpred_filtering(dmrs, gpred_filter=DEFAULT_FILTER, allow_disconnected_dmrs=DEFAULT_ALLOW_DISC):
    """
    Remove general predicate nodes on the filter list from the DMRS.
    :param dmrs_xml: Input DMRS object
    :param gpred_filter: A list of general predicates to filter (as strings)
    :param allow_disconnected_dmrs: Remove gpred nodes even if their removal would result in a disconnected DMRS.
     If DMRS was already disconnected, gpred nodes are removed regardless.
    :return: Output DMRS object
    """

    filterable_nodeids = set()

    # Find general predicate nodes to filter
Exemplo n.º 4
0
 def test_GPred_str(self):
     """
     The 'informal' string representation of a GPred
     """
     pronstring = 'pron'
     self.assertEqual(str(GPred.from_string(pronstring)), pronstring)
Exemplo n.º 5
0
 def test_GPred_str(self):
     """
     The 'informal' string representation of a GPred
     """
     pronstring = 'pron'
     self.assertEqual(str(GPred.from_string(pronstring)), pronstring)
Exemplo n.º 6
0
def loads_xml(bytestring, encoding=None, cls=ListDmrs, **kwargs):
    """
    Currently processes "<dmrs>...</dmrs>"
    To be updated for "<dmrslist>...</dmrslist>"...
    Expects a bytestring; to load from a string instead, specify encoding
    Produces a ListDmrs by default; for a different type, specify cls
    """
    if encoding:
        bytestring = bytestring.encode(encoding)
    xml = ET.XML(bytestring)

    dmrs = cls(**kwargs)

    dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
    dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
    dmrs.surface = xml.get('surface')
    dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
    index_id = int(xml.get('index')) if 'index' in xml.attrib else None
    top_id = None

    for elem in xml:
        if elem.tag == 'node':
            nodeid = int(elem.get('nodeid')) if 'nodeid' in elem.attrib else None
            cfrom = int(elem.get('cfrom')) if 'cfrom' in elem.attrib else None
            cto = int(elem.get('cto')) if 'cto' in elem.attrib else None
            surface = elem.get('surface')
            base = elem.get('base')
            carg = elem.get('carg')

            pred = None
            sortinfo = None
            for sub in elem:
                if sub.tag == 'realpred':
                    try:
                        pred = RealPred(sub.get('lemma'), sub.get('pos'), sub.get('sense'))
                    except PydmrsValueError:
                        # If the whole pred name is under 'lemma', rather than split between 'lemma', 'pos', 'sense'
                        pred = RealPred.from_string(sub.get('lemma'))
                        warn("RealPred given as string rather than lemma, pos, sense", PydmrsWarning)
                elif sub.tag == 'gpred':
                    try:
                        pred = GPred.from_string(sub.text)
                    except PydmrsValueError:
                        # If the string is actually for a RealPred, not a GPred
                        pred = RealPred.from_string(sub.text)
                        warn("RealPred string found in a <gpred> tag", PydmrsWarning)
                elif sub.tag == 'sortinfo':
                    sortinfo = sub.attrib
                else:
                    raise PydmrsValueError(sub.tag)

            dmrs.add_node(cls.Node(nodeid=nodeid, pred=pred, carg=carg, sortinfo=sortinfo, cfrom=cfrom, cto=cto, surface=surface, base=base))

        elif elem.tag == 'link':
            start = int(elem.get('from'))
            end = int(elem.get('to'))

            if start == 0:
                top_id = end
            else:
                rargname = None
                post = None
                for sub in elem:
                    if sub.tag == 'rargname':
                        rargname = sub.text
                    elif sub.tag == 'post':
                        post = sub.text
                    else:
                        raise PydmrsValueError(sub.tag)
                dmrs.add_link(Link(start, end, rargname, post))
        else:
            raise PydmrsValueError(elem.tag)

    if top_id:
        dmrs.top = dmrs[top_id]
    if index_id:
        dmrs.index = dmrs[index_id]

    return dmrs
Exemplo n.º 7
0
        default=None,
        help=
        'Path to simplifaction configuration file. By default, configuration in __config__/default_simplification.conf is used.'
    )
    parser.add_argument('input_dmrs', help='Specify input DMRS file')
    parser.add_argument('output_dmrs', help='Specify output dmrs file.')
    args = parser.parse_args()
    if args.config is not None:  # Load the given file
        config = load_config(args.config, default=False)
    else:
        config = load_config(DEFAULT_CONFIG_FILE)
else:
    config = load_config(DEFAULT_CONFIG_FILE)

DEFAULT_FILTER = frozenset(
    GPred.from_string(x) for x in get_config_option(
        config, 'General Predicate Filtering', 'filter', opt_type=list))
DEFAULT_ALLOW_DISC = get_config_option(config, 'General Predicate Filtering',
                                       'allow_disconnected_dmrs')


def gpred_filtering(dmrs,
                    gpred_filter=DEFAULT_FILTER,
                    allow_disconnected_dmrs=DEFAULT_ALLOW_DISC):
    """
    Remove general predicate nodes on the filter list from the DMRS.
    :param dmrs_xml: Input DMRS object
    :param gpred_filter: A list of general predicates to filter (as strings)
    :param allow_disconnected_dmrs: Remove gpred nodes even if their removal would result in a disconnected DMRS.
     If DMRS was already disconnected, gpred nodes are removed regardless.
    :return: Output DMRS object