示例#1
0
def test_start(ace_mismatch, tmp_path, monkeypatch):
    popen = mock_popen(
        pid=10,
        returncode=255,
        stdout=io.StringIO(),
        stderr=io.StringIO(ace_mismatch))
    grm = tmp_path / 'grm.dat'
    grm.write_text('')
    with monkeypatch.context() as m:
        m.setattr(ace, 'Popen', popen)
        m.setattr(ace, '_ace_version', lambda x: (0, 9, 29))
        with pytest.raises(ace.ACEProcessError):
            ace.ACEParser(str(grm))
        with pytest.raises(ace.ACEProcessError):
            ace.parse(str(grm), 'Dogs sleep.')
示例#2
0
    def full_parse(sent, selected_grammar, max_parses):
        """
        """

        results = dd(lambda: dd())

        # if grammar_mode == 'erg_strict':
        #     GRAMMAR = ERG
        # elif grammar_mode == 'erg_robust':
        #     GRAMMAR = MAL_ERG
        # elif grammar_mode == 'zhong_strict':
        #     GRAMMAR = ZHONG

        #######################################################################
        # ACE cmdargs (currently only for the number of parses)
        #######################################################################
        if max_parses == 'max':
            ace_cmdargs = [
                '-n', "50", '--timeout=20', '--rooted-derivations', '--udx',
                '--max-chart-megabytes=3000', '--max-unpack-megabytes=3000'
            ]
        else:
            # Just make sure shenanigans can happen with HTML/JS source
            if int(max_parses) > 50:
                max_parses = "50"
            ace_cmdargs = [
                '-n', max_parses, '--timeout=20', '--rooted-derivations',
                '--udx', '--max-chart-megabytes=3000',
                '--max-unpack-megabytes=3000'
            ]

        #######################################################################
        # To silence ACE we need to give it a file to stream its own stderr.
        #######################################################################
        with ace.ACEParser(path.join(ROOT, 'delphin/' + selected_grammar),
                           executable=path.join(ROOT, ACE),
                           cmdargs=ace_cmdargs,
                           stderr=ace_stderr) as parser:

            erg_parse = parser.interact(sent)

        if erg_parse['results']:

            n_parses = len(erg_parse['results'])

            for n in list(range(n_parses)):

                deriv = erg_parse.result(n).derivation()
                deriv_json = json.dumps(deriv.to_dict())
                # print(deriv_json)

                mrs = erg_parse.result(n).mrs()
                mrs_json = mrsjson.encode(mrs)
                mrs_simplemrs = simplemrs.encode(mrs)

                ################################################################
                # This was breaking too often, throwing keyErrors for handles.
                # We need to check if it's well formed before conversion.
                ################################################################
                if delphin.mrs.is_well_formed(mrs):
                    dmrs = delphin.dmrs.from_mrs(mrs)
                    dmrs_json = dmrsjson.encode(dmrs)
                else:
                    dmrs_json = False

                sent_struct = sent_leaf_ids(deriv)
                errors = check_nodes(deriv, [], sent_struct)

                results[n]['deriv_json'] = deriv_json
                results[n]['mrs_json'] = mrs_json
                results[n]['mrs_simplemrs'] = mrs_simplemrs
                results[n]['dmrs_json'] = dmrs_json
                results[n]['errors'] = errors

            return results
        else:
            return results
示例#3
0
    def check_sents(sent_list):
        """
        Given a list of sentences, this function tries to parse each one with
        the default ERG and, if it fails, it uses the ERG enhanced with
        mal-rules to parse the same input. It returns a list with the same
        list of sentences and a list of error codes found for each sentence.

        [(sent1, [error1.1, error1.2]), (sent2, [error2.1, error2.2])]
        """

        erg_results = []
        with ace.ACEParser(path.join(ROOT, ERG),
                           executable=path.join(ROOT, ACE),
                           cmdargs=['-1', '--timeout=20',
                                    '--max-chart-megabytes=3000',
                                    '--max-unpack-megabytes=3000']) as parser,\
             ace.ACEParser(path.join(ROOT, MAL_ERG),
                           executable=path.join(ROOT, ACE),
                           cmdargs=['-1', '--timeout=20', '--udx', '--max-chart-megabytes=3000', '--max-unpack-megabytes=3000']) as mal:

            for sent in sent_list:

                ###############################################################
                # We are eliminating the double parsing method for now. This
                # is the same as always assuming that ERG produces no parse.
                # This way this decision is reversible if:
                # DOUBLE_GRAMMAR_PARSE = True
                ###############################################################
                DOUBLE_GRAMMAR_PARSE = False
                if DOUBLE_GRAMMAR_PARSE:
                    erg_parse = parser.interact(sent)
                else:
                    erg_parse = {'results': None}
                ###############################################################

                if not erg_parse['results']:  # if there were no parses

                    mal_result = mal.interact(sent)

                    if mal_result['results']:  # If the mal-grammar got a parse
                        sent_struct = sent_leaf_ids(
                            mal_result.result(0).derivation())
                        error_tags = check_nodes(
                            mal_result.result(0).derivation(), [], sent_struct)

                        # print(error_tags)
                        # WHY WAS THIS HERE?   ASK FCB
                        # for tag in rbst_tags:
                        # for tag, string in error_tags:
                        #     if type(tag) == list:
                        #         if len(tag) == 0:
                        #             tag = "empty_tag"
                        #         else:
                        #             tag = ":".join(tag)

                        erg_results.append((sent, error_tags))

                    else:  # only a general NoParse tag can be given
                        erg_results.append((sent, [('NoParse', '')]))

                else:

                    # Check for Mood (Imperative and Interrogative)
                    try:
                        mrs = erg_parse.result(0).mrs()
                        sf = mrs.properties(mrs.index)['SF']
                    except:
                        print("MRS ERROR: " + sent, file=sys.stderr)
                        sf = 'noSF-MRSerror'

                    if sf != 'prop':
                        erg_results.append((sent, [(sf, '')]))

                    else:  # Propositions are good
                        erg_results.append((sent, []))

        return erg_results
示例#4
0
    def check_sents(sent_list):
        """
        Given a list of sentences, this function tries to parse each one with
        the default ERG and, if it fails, it uses the ERG enhanced with mal-rules
        to parse the same input. It returns a list with the same list of
        sentences and a list of error codes found for each sentence.

        [(sent1, [error1.1, error1.2]), (sent2, [error2.1, error2.2])]
        """

        erg_results = []
        with ace.ACEParser(path.join(ROOT, ERG),
                           executable=path.join(ROOT, ACE),
                           cmdargs=['-1', '--timeout=10']) as parser, \
             ace.ACEParser(path.join(ROOT, MAL_ERG),
                           executable=path.join(ROOT, ACE),
                           cmdargs=['-1', '--timeout=10', '--udx']) as mal:

            for sent in sent_list:

                erg_parse = parser.interact(sent)

                if not erg_parse['results']:  # if there were no parses

                    mal_result = mal.interact(sent)

                    if mal_result['results']:  # If the mal-grammar got a parse
                        error_tags = check_nodes(
                            mal_result.result(0).derivation(), [])

                        # print(error_tags)

                        # WHY WAS THIS HERE?   ASK FCB
                        # for tag in rbst_tags:
                        for tag, string in error_tags:
                            if type(tag) == list:
                                if len(tag) == 0:
                                    tag = "empty_tag"
                                else:
                                    tag = ":".join(tag)

                        erg_results.append((sent, error_tags))

                    else:  # only a general NoParse tag can be given
                        erg_results.append((sent, [('NoParse', '')]))

                else:

                    # Check for Mood (Imperative and Interrogative)
                    try:
                        mrs = erg_parse.result(0).mrs()
                        sf = mrs.properties(mrs.index)['SF']
                    except:
                        print("MRS ERROR: " + sent, file=sys.stderr)
                        sf = []

                    if sf != 'prop':
                        erg_results.append((sent, [(sf, '')]))

                    else:  # Propositions are good
                        erg_results.append((sent, []))

        return erg_results
示例#5
0
from delphin import ace
from delphin import tsdb
from delphin import itsdb

ts = itsdb.TestSuite('sample-200-py')
with ace.ACEParser('terg-mac.dat',
                   cmdargs=['--disable-generalization'],
                   full_forest=True) as cpu:
    ts.process(cpu)