Пример #1
0
    def findSourceDoc(self):
        root.filename = tkinter.filedialog.askopenfilename(
            initialdir=os.path.dirname(os.path.realpath(__file__)),
            title="Select file to search",
            filetypes=(("txt files", "*.txt"), ("all files", "*.*")))

        if root.filename == '':
            print("cancelled")
        else:
            srcCandidate, dupl = FindSourceDoc.run(root.filename,
                                                   root,
                                                   filter=False,
                                                   filamt=0)
            try:
                ParseXML.run(root.filename)
            except:
                print("already in annotation")

            dicc = g.openResult('output/annotation.csv')

            annoSource = dicc.get(ntpath.basename(root.filename[:-4]))[0]
            annoDup = dicc.get(ntpath.basename(root.filename[:-4]))[1]

            p, r, f = g.allmeasure(srcCandidate, dupl, annoSource, annoDup)
            print(p)
            print(r)
            print(f)
            result = tkinter.Toplevel(root)
            result.minsize(200, 200)
            result.title("Evaluation " + ntpath.basename(root.filename))
            tkinter.Label(result, text="Precision ").grid(row=0,
                                                          column=0,
                                                          padx=3)
            tkinter.Label(result, text="Recall ").grid(row=1, column=0, padx=3)
            tkinter.Label(result, text="F1 Score ").grid(row=2,
                                                         column=0,
                                                         padx=3)
            tkinter.Label(result, text=round(p, 3)).grid(row=0,
                                                         column=1,
                                                         padx=3)
            tkinter.Label(result, text=round(r, 3)).grid(row=1,
                                                         column=1,
                                                         padx=3)
            tkinter.Label(result, text=round(f, 3)).grid(row=2,
                                                         column=1,
                                                         padx=3)
Пример #2
0
def simul(filteramt, file):
    if file == "Simulated":
        file = glob.glob("testdoc/simulatedFiles/*.txt")
    else:
        file = glob.glob("testdoc/artificialFile/*.txt")

    #print(file)
    #print(filteramt)

    precision = []
    recall = []
    F1 = []
    docname = []
    count = 0
    for fi in file:
        count += 1
        docname.append(count)
        if fi == '':
            print('cancelled')
        else:
            if filteramt == 0:
                srcCandidate, dupl = FindSourceDoc.run(fi,
                                                       root,
                                                       show=False,
                                                       filter=False,
                                                       filamt=0)
            else:
                srcCandidate, dupl = FindSourceDoc.run(fi,
                                                       root,
                                                       show=False,
                                                       filamt=filteramt)
            try:
                ParseXML.run(fi)
            except:
                print("already in annotation")
            dicc = g.openResult('output/annotation.csv')

            annoSource = dicc.get(ntpath.basename(fi[:-4]))[0]
            annoDup = dicc.get(ntpath.basename(fi[:-4]))[1]

            p, r, f = g.allmeasure(srcCandidate, dupl, annoSource, annoDup)
            precision.append(p)
            recall.append(r)
            F1.append(f)

    plt.subplot(2, 2, 1)
    plt.bar(docname, precision)
    plt.title('Precision = ' + str(round(sum(precision) / len(precision), 3)))
    plt.ylim(top=1.05)

    plt.subplot(2, 2, 2)
    plt.bar(docname, recall)
    plt.title('Recall = ' + str(round(sum(recall) / len(recall), 3)))
    plt.ylim(top=1.05)

    plt.subplot(2, 2, 3)
    plt.bar(docname, recall)
    plt.title('F1 Score = ' + str(round(sum(F1) / len(F1), 3)))
    plt.ylim(top=1.05)

    plt.subplots_adjust(hspace=0.3)
    plt.show()