Exemplo n.º 1
0
def extract_sentences_from_ngram(docs, target_n_gram):
    output = {}
    for doc in docs:
        for sent in doc:
            sent = merge_subword(sent)
            for ngram in ngrams(sent, 3):
                if ngram in target_n_gram:
                    assign_list_if_not_exists(output, ngram)
                    output[ngram].append(sent)
    return output
Exemplo n.º 2
0
def display():
    topic = "abortion"
    token_dict = dev_pretend_ukp_load_tokens_for_topic(topic)

    terms = ["should"]
    for doc in token_dict.values():
        for sent in doc:
            do_print = False
            if terms[0] in sent:
                sent = merge_subword(sent)
                do_print = True
                if do_print:
                    print(" ".join(sent))
            if do_print:
                print("")
Exemplo n.º 3
0
def display():
    target_n_gram = get_top_cont_ngram()
    for doc, preds in enum_docs_and_stance():
        assert len(preds) == len(doc)
        for sent, pred in zip(doc, preds):
            probs = softmax(pred)

            sent = merge_subword(sent)
            do_print = False
            found_ngram = []
            for ngram in ngrams(sent, 3):
                if ngram in target_n_gram:
                    do_print = True
                    found_ngram.append(ngram)

            if do_print:
                print(probs)
                print(found_ngram)
                print(" ".join(sent))
Exemplo n.º 4
0
def count_controversy(topic):
    token_controversial = ["controversy", "controversial"]

    def contain_controversy(tf):
        for t in token_controversial:
            if t in tf:
                return True
        return False

    tokens_dict = ukp_load_tokens_for_topic(topic)
    c_tf = Counter()
    nc_tf = Counter()
    for doc_id, doc in tokens_dict.items():
        doc = [merge_subword(s) for s in doc]
        tf = build_uni_lm_from_tokens_list(doc)
        if contain_controversy(tf):
            c_tf.update(tf)
        else:
            nc_tf.update(tf)
    return c_tf, nc_tf
Exemplo n.º 5
0
def build_ngram_lm_in_word_level(doc, n):
    doc = [merge_subword(s) for s in doc]
    return build_ngram_lm_from_tokens_list(doc, n)
Exemplo n.º 6
0
 def merge_subword_in_doc(doc):
     r = []
     for segment in doc:
         r += merge_subword(segment)
     return r