示例#1
0
def run_baseline(treebank_name, outdir=None, trainfile=None, testfile=None):
    if not outdir: outdir= config.exp + treebank_name
    malteval = Malteval()
    TM = TreebankTransformer(treebank_name=treebank_name)
    """Train and parse"""
    train_gold = TM.trainfile
    test_gold = TM.testfile

    TM._parser.train(train_gold)
    parsed_baseline = outdir + '/dev_parsed_baseline.conll'
    TM._parser.parse(test_gold, parsed_baseline)

    """RESULTS"""
    uas, las= malteval.accuracy(test_gold,parsed_baseline)
    output = "%s;%s;%s\n"%(treebank_name,las,uas)
    return output
示例#2
0
def plot_dep_scores(indir, treebank_number, dep, fig=None, outfile="Figures/figure.png"):
    gold = indir + "test_gold.conll"
    baseline = indir + "dev_parsed_baseline.conll"
    transf = indir + "dev_parsed.ud.conll"

    malteval = Malteval()
    mb = malteval.deprel_matrix(gold, baseline)
    mt = malteval.deprel_matrix(gold, transf)

    """The matplotlib thing"""
    x = np.array([malt[6].strip("\n") for malt in mb])
    p1 = np.array([float(malt[0]) if malt[0] is not "-" else 0.0 for malt in mb])  # sorry
    p2 = np.array([float(malt[0]) if malt[0] is not "-" else 0.0 for malt in mt])  # not sorry
    r1 = np.array([float(malt[1]) if malt[1] is not "-" else 0.0 for malt in mb])
    r2 = np.array([float(malt[1]) if malt[1] is not "-" else 0.0 for malt in mt])
    f1 = np.array([(2 * p * r / (p + r)) if 0.0 not in (p, r) else 0.0 for p, r in zip(p1, r1)])
    f2 = np.array([(2 * p * r / (p + r)) if 0.0 not in (p, r) else 0.0 for p, r in zip(p2, r2)])
    n1 = np.array([int(malt[2]) if malt[2] is not "-" else 0 for malt in mb])
    n2 = np.array([int(malt[2]) if malt[2] is not "-" else 0 for malt in mt])

    me1 = np.array([0.98 / sqrt(n) if n != 0 else 0.0 for n in n1])
    me2 = np.array([0.98 / sqrt(n) if n != 0 else 0.0 for n in n2])

    allinfo = [(xi, fi1, fi2, mei1, mei2) for (xi, fi1, fi2, mei1, mei2) in zip(x, f1, f2, me1, me2) if xi == dep]
    x = [tup[0] for tup in allinfo]
    f1 = [tup[1] for tup in allinfo]
    f2 = [tup[2] for tup in allinfo]
    me1 = [tup[3] for tup in allinfo]
    me2 = [tup[4] for tup in allinfo]

    if not fig:
        fig, ax = plt.subplots()
    else:
        ax = fig.add_subplot(1, 1, 1)

    if len(x) == 0:
        return fig, ax

    bar_width = 0.45
    last_index = treebank_number * 3 * bar_width
    index = last_index + np.arange(len(x))
    error_config = {"ecolor": "0.3"}

    ax.bar(
        index,
        f1,
        bar_width,
        color="darkblue",
        error_kw=error_config,
        yerr=me1,
        label="baseline" if treebank_number == 0 else "",
    )

    ax.bar(
        index + bar_width,
        f2,
        bar_width,
        color="skyblue",
        error_kw=error_config,
        yerr=me2,
        label="transformed" if treebank_number == 0 else "",
    )

    if treebank_number == 0:
        plt.xlabel("%s deprel" % dep)
        plt.ylabel("Attachment score")

        plt.legend(bbox_to_anchor=(0.95, 0.92), bbox_transform=plt.gcf().transFigure)
    return (fig, ax)