Exemplo n.º 1
0
def evaluate(gold_file, pred_file):
    """Evaluate using official CoNLL-X evaluation script (Yuval Krymolowski)

    Args:
      gold_file(str): The gold conllx file
      pred_file(str): The pred conllx file

    Returns:

    
    """
    gold_file = get_resource(gold_file)
    fixed_pred_file = tempfile.NamedTemporaryFile().name
    copy_cols(gold_file, pred_file, fixed_pred_file, keep_comments=False)
    if gold_file.endswith('.conllu'):
        fixed_gold_file = tempfile.NamedTemporaryFile().name
        copy_cols(gold_file, gold_file, fixed_gold_file, keep_comments=False)
        gold_file = fixed_gold_file

    exitcode, out, err = get_exitcode_stdout_stderr(
        f'perl {CONLLX_EVAL} -q -b -g {gold_file} -s {fixed_pred_file}')
    if exitcode:
        raise RuntimeError(
            f'eval.pl exited with error code {exitcode} and error message {err} and output {out}.'
        )
    lines = out.split('\n')[-4:]
    las = int(lines[0].split()[3]) / int(lines[0].split()[5])
    uas = int(lines[1].split()[3]) / int(lines[1].split()[5])
    return uas, las
Exemplo n.º 2
0
def ensure_python_points_to_python2():
    exitcode, out, version = get_exitcode_stdout_stderr('python --version')
    if not version:
        version = out
    if not version.startswith('Python 2'):
        raise EnvironmentError(
            f'Your python command needs to be Python2, not {version.strip()}. Try:\n\n\t'
            'ln -sf "$(which python2)" "$(which python)"')
Exemplo n.º 3
0
def run_perl(script, src, dst=None):
    os.environ['PERL5LIB'] = f''
    exitcode, out, err = get_exitcode_stdout_stderr(
        f'perl -I{os.path.expanduser("~/.local/lib/perl5")} {script} {src}')
    if exitcode:
        # cpanm -l ~/.local namespace::autoclean
        # cpanm -l ~/.local Moose
        # cpanm -l ~/.local MooseX::SemiAffordanceAccessor module
        raise RuntimeError(err)
    with open(dst, 'w') as ofile:
        ofile.write(out)
    return dst
Exemplo n.º 4
0
def run_perl(script, src, dst=None):
    if not dst:
        dst = tempfile.NamedTemporaryFile().name
    exitcode, out, err = get_exitcode_stdout_stderr(
        f'perl -I{os.path.expanduser("~/.local/lib/perl5")} {script} {src}')
    if exitcode:
        # cpanm -l ~/.local namespace::autoclean
        # cpanm -l ~/.local Moose
        # cpanm -l ~/.local MooseX::SemiAffordanceAccessor module
        raise RuntimeError(err)
    with open(dst, 'w') as ofile:
        ofile.write(out)
    return dst
Exemplo n.º 5
0
def convert_to_stanford_dependency_330(src, dst):
    cprint(
        f'Converting {os.path.basename(src)} to {os.path.basename(dst)} using Stanford Parser Version 3.3.0. '
        f'It might take a while [blink][yellow]...[/yellow][/blink]')
    sp_home = 'https://nlp.stanford.edu/software/stanford-parser-full-2013-11-12.zip'
    sp_home = get_resource(sp_home)
    # jar_path = get_resource(f'{sp_home}#stanford-parser.jar')
    code, out, err = get_exitcode_stdout_stderr(
        f'java -cp {sp_home}/* edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure '
        f'-basic -keepPunct -conllx '
        f'-treeFile {src}')
    with open(dst, 'w') as f:
        f.write(out)
    if code:
        raise RuntimeError(
            f'Conversion failed with code {code} for {src}. The err message is:\n {err}\n'
            f'Do you have java installed? Do you have enough memory?')
Exemplo n.º 6
0
def convert_to_dependency(src,
                          dst,
                          language='zh',
                          version='3.3.0',
                          conllx=True,
                          ud=False):
    cprint(
        f'Converting {os.path.basename(src)} to {os.path.basename(dst)} using Stanford Parser Version {version}. '
        f'It might take a while [blink][yellow]...[/yellow][/blink]')
    if version == '3.3.0':
        sp_home = 'https://nlp.stanford.edu/software/stanford-parser-full-2013-11-12.zip'
    elif version == '4.2.0':
        sp_home = 'https://nlp.stanford.edu/software/stanford-parser-4.2.0.zip'
    else:
        raise ValueError(f'Unsupported version {version}')
    sp_home = get_resource(sp_home)
    # jar_path = get_resource(f'{sp_home}#stanford-parser.jar')
    if ud:
        jclass = 'edu.stanford.nlp.trees.international.pennchinese.UniversalChineseGrammaticalStructure' if language == 'zh' \
            else 'edu.stanford.nlp.trees.ud.UniversalDependenciesConverter'
    else:
        jclass = 'edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure' if language == 'zh' \
            else 'edu.stanford.nlp.trees.EnglishGrammaticalStructure'
    cmd = f'java -cp {sp_home}/* {jclass} ' \
          f'-treeFile {src}'
    if conllx:
        cmd += ' -conllx'
    if not ud:
        cmd += f' -basic -keepPunct'
    code, out, err = get_exitcode_stdout_stderr(cmd)
    with open(dst, 'w') as f:
        f.write(out)
    if code:
        raise RuntimeError(
            f'Conversion failed with code {code} for {src}. The err message is:\n {err}\n'
            f'Do you have java installed? Do you have enough memory?')