def essay_jaccard_similarity(kunci_jawaban, jawaban, char=False, fitur=True, praproses=True): if type(kunci_jawaban) != list: kunci_jawaban = [kunci_jawaban] kunci_jawaban = cek_negasi_list(kunci_jawaban) kunci_jawaban = stopword_list(kunci_jawaban) kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban)) # print(kunci_jawaban_unik) if praproses == True: jawaban = praproses_(jawaban, kunci_jawaban_unik) # print(jawaban) simm = list() idx = list() bobot = list() for ix, kj in enumerate(kunci_jawaban): fitur_ = list(set(kj.split())) if char == True or fitur == True: t = w.tf_idf(kj, jawaban, vocab=fitur_, fitur=fitur, char=char)[1] t_x = t.transform([kj, jawaban]).toarray() else: t_x = w2.tf_idf([kj, jawaban], vocab=fitur_) bobot.append(t_x) # print(t_x) jaccard = simi.jaccard(t_x[0], t_x[1]) simm.append(jaccard) idx.append(ix) max_sim = max(simm) max_idx = simm.index(max_sim) return [max_sim, max_idx, bobot, jawaban]
def essay_dice_similarity(kunci_jawaban, jawaban, bychar=False, fixed=False): if type(kunci_jawaban) != list: kunci_jawaban = [kunci_jawaban] kunci_jawaban = cek_negasi_list(kunci_jawaban) kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban)) # print(kunci_jawaban_unik) jawaban = praproses(jawaban, kunci_jawaban_unik) # print(jawaban) simm = list() for kj in kunci_jawaban: # print(t2c(kj)) t = w.tf_idf(kj, jawaban, fitur) # print(t.A) dice = simi.dice_similarity(t.A[0], t.A[1]) print(dice)
def essay_jaccard_similarity(kunci_jawaban, jawaban, bychar=False, fixed=False): if type(kunci_jawaban) != list: kunci_jawaban = [kunci_jawaban] kunci_jawaban = cek_negasi_list(kunci_jawaban) kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban)) # print(kunci_jawaban_unik) jawaban = praproses(jawaban, kunci_jawaban_unik) # print(jawaban) simm = list() for kj in kunci_jawaban: fitur = list(set(kj.split())) #print(fitur) t = w.tf_idf(kj, jawaban, fitur) jaccard = simi.jaccard(t.A[0], t.A[1]) simm.append(jaccard) return [max(simm), transform(max(simm))]
def essay_dice_similarity(kunci_jawaban, jawaban, char=False, fitur=False): if type(kunci_jawaban) != list: kunci_jawaban = [kunci_jawaban] kunci_jawaban = cek_negasi_list(kunci_jawaban) kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban)) # print(kunci_jawaban_unik) jawaban = praproses(jawaban, kunci_jawaban_unik) # print(jawaban) simm = list() for kj in kunci_jawaban: fitur_ = list(set(kj.split())) t = w.tf_idf(kj, jawaban, vocab=fitur_, fitur=fitur, char=char) # print(t.A) dice = simi.dice_similarity(t.A[0], t.A[1]) simm.append(dice) return [round(max(simm), 10), transform(max(simm))]
def essay_cosine_similarity(kunci_jawaban, jawaban, char=False, fitur=True): if type(kunci_jawaban) != list: kunci_jawaban = [kunci_jawaban] kunci_jawaban = cek_negasi_list(kunci_jawaban) kunci_jawaban_unik = " ".join(get_unik(kunci_jawaban)) # print(kunci_jawaban_unik) jawaban = praproses(jawaban, kunci_jawaban_unik) # print(jawaban) simm = list() for kj in kunci_jawaban: if char == True and fitur == True: print("tt", end="") elif char == True and fitur == False: print("tf", end="") elif char == False and fitur == True: print("ft", end="") elif char == False and fitur == False: print("ff", end="") fitur_ = list(set(kj.split())) t = w.tf_idf(kj, jawaban, vocab=fitur_, fitur=fitur, char=char) print(t.A) cosine = simi.cosine_similarity(t.A[0], t.A[1]) simm.append(cosine) return [round(max(simm), 10), transform(max(simm))]