def calculate_score(phrases: Phrases, worda: str, wordb: str) -> str: """ Calculate the score of any given word. Only for Phrases, not applicable for Phraser :param phrases: :param worda: :param wordb: :return: """ score = -1 try: score = original_scorer(phrases.vocab[worda.encode('utf-8')], phrases.vocab[wordb.encode('utf-8')], phrases.vocab[(worda+'_'+wordb).encode('utf-8')], len(phrases.vocab), phrases.min_count, phrases.corpus_word_count) except Exception as e: print(str(e)) pass print("[%s, %s]: %f" % (worda, wordb, score))
len_vocab = float(len(vocab2)) min_count = float(bigram.min_count) corpus_word_count = float(bigram.corpus_word_count) print('-----> Table of bigrams') for c in sorted(bi_vocab.keys(), reverse=True): for val in bi_vocab[c]: if any((v.isalpha() for v in val.decode(decode_format))): [worda, wordb] = re.split(b'_', val, 1) s1 = vocab2[worda] s2 = vocab2[wordb] if s1 > 0 and s2 > 0: score = original_scorer(worda_count=float(s1), wordb_count=float(s2), bigram_count=float(c), len_vocab=len_vocab, min_count=min_count, corpus_word_count=corpus_word_count) if c > 100: print(f"{val.decode(decode_format):10} {c:5} \t{score:.4}") grams[val.decode(decode_format)] = (c, score) vocab3 = trigram.vocab tri_vocab = defaultdict(list) for p, c in vocab3.items(): if len(re.findall(b'_', p)) == 2: tri_vocab[c].append(p) len_vocab = float(len(vocab3)) min_count = float(trigram.min_count) corpus_word_count = float(trigram.corpus_word_count)
def default_scorer(worda_count, wordb_count, bigram_count, min_count): return original_scorer(worda_count, wordb_count, bigram_count, 59543584, 15, 0)