Пример #1
0
def essay_jaccard_similarity(kunci_jawaban,
                             jawaban,
                             char=False,
                             fitur=True,
                             praproses=True):
    if type(kunci_jawaban) != list:
        kunci_jawaban = [kunci_jawaban]
    kunci_jawaban = cek_negasi_list(kunci_jawaban)
    kunci_jawaban = stopword_list(kunci_jawaban)
    kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban))
    # print(kunci_jawaban_unik)
    if praproses == True:
        jawaban = praproses_(jawaban, kunci_jawaban_unik)
    # print(jawaban)
    simm = list()
    idx = list()
    bobot = list()
    for ix, kj in enumerate(kunci_jawaban):
        fitur_ = list(set(kj.split()))
        if char == True or fitur == True:
            t = w.tf_idf(kj, jawaban, vocab=fitur_, fitur=fitur, char=char)[1]
            t_x = t.transform([kj, jawaban]).toarray()
        else:
            t_x = w2.tf_idf([kj, jawaban], vocab=fitur_)
        bobot.append(t_x)
        # print(t_x)
        jaccard = simi.jaccard(t_x[0], t_x[1])
        simm.append(jaccard)
        idx.append(ix)
    max_sim = max(simm)
    max_idx = simm.index(max_sim)
    return [max_sim, max_idx, bobot, jawaban]
Пример #2
0
def essay_dice_similarity(kunci_jawaban, jawaban, bychar=False, fixed=False):
    if type(kunci_jawaban) != list:
        kunci_jawaban = [kunci_jawaban]
    kunci_jawaban = cek_negasi_list(kunci_jawaban)
    kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban))
    # print(kunci_jawaban_unik)
    jawaban = praproses(jawaban, kunci_jawaban_unik)
    # print(jawaban)
    simm = list()
    for kj in kunci_jawaban:
        # print(t2c(kj))
        t = w.tf_idf(kj, jawaban, fitur)
        # print(t.A)
        dice = simi.dice_similarity(t.A[0], t.A[1])
        print(dice)
Пример #3
0
def essay_jaccard_similarity(kunci_jawaban, jawaban, bychar=False, fixed=False):
    if type(kunci_jawaban) != list:
        kunci_jawaban = [kunci_jawaban]
    kunci_jawaban = cek_negasi_list(kunci_jawaban)
    kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban))
    # print(kunci_jawaban_unik)
    jawaban = praproses(jawaban, kunci_jawaban_unik)
    # print(jawaban)
    simm = list()   
    for kj in kunci_jawaban:
        fitur = list(set(kj.split()))
        #print(fitur)
        t = w.tf_idf(kj, jawaban, fitur)
        jaccard = simi.jaccard(t.A[0], t.A[1])
        simm.append(jaccard)
    return [max(simm), transform(max(simm))]
Пример #4
0
def essay_dice_similarity(kunci_jawaban, jawaban, char=False, fitur=False):
    if type(kunci_jawaban) != list:
        kunci_jawaban = [kunci_jawaban]
    kunci_jawaban = cek_negasi_list(kunci_jawaban)
    kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban))
    # print(kunci_jawaban_unik)
    jawaban = praproses(jawaban, kunci_jawaban_unik)
    # print(jawaban)
    simm = list()
    for kj in kunci_jawaban:
        fitur_ = list(set(kj.split()))
        t = w.tf_idf(kj, jawaban, vocab=fitur_, fitur=fitur, char=char)
        # print(t.A)
        dice = simi.dice_similarity(t.A[0], t.A[1])
        simm.append(dice)
    return [round(max(simm), 10), transform(max(simm))]
Пример #5
0
def essay_cosine_similarity(kunci_jawaban, jawaban, char=False, fitur=True):
    if type(kunci_jawaban) != list:
        kunci_jawaban = [kunci_jawaban]
    kunci_jawaban = cek_negasi_list(kunci_jawaban)
    kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban))
    # print(kunci_jawaban_unik)
    jawaban = praproses(jawaban, kunci_jawaban_unik)
    # print(jawaban)
    simm = list()
    for kj in kunci_jawaban:
        if char == True and fitur == True:
            print("tt", end="")
        elif char == True and fitur == False:
            print("tf", end="")
        elif char == False and fitur == True:
            print("ft", end="")
        elif char == False and fitur == False:
            print("ff", end="")
        fitur_ = list(set(kj.split()))
        t = w.tf_idf(kj, jawaban, vocab=fitur_, fitur=fitur, char=char)
        print(t.A)
        cosine = simi.cosine_similarity(t.A[0], t.A[1])
        simm.append(cosine)
    return [round(max(simm), 10), transform(max(simm))]