def temp():
    art_files = [glob.glob(art_dirs_pre + art_dir + "*.txt") for art_dir in art_dirs]

    with open(art_files[1][134], "r") as f:
        art_text = f.read()

    art_sents = nltk.sent_tokenize(art_text)
    sent_ = art_sents[7]
    sent_doc = source.nlp(unicode(sent_))
    _questRewr.is_sentence(sent_doc)

    good = wn.synset("good.a.01")
    good.antonyms()
Пример #2
0
def temp():
    art_files = [glob.glob(art_dirs_pre + art_dir + '*.txt') for art_dir in art_dirs]
    
    with open(art_files[1][134], 'r') as f:
        art_text = f.read()

    art_sents = nltk.sent_tokenize(art_text)
    sent_ = art_sents[7]
    sent_doc = source.nlp(unicode(sent_))
    _questRewr.is_sentence(sent_doc)
    
    
    
    
    good = wn.synset('good.a.01')
    good.antonyms()
def select_sents(art_text, nlp, min_sent_n=1, max_sent_n=5, max_sent_l=30, rng=np.random.RandomState(156), max_iter=20):
    sent_offs = 3
    sents = [unicode(s) for s in nltk.sent_tokenize(art_text)]
    sents_docs = [nlp(s) for s in sents]
    sents_n = len(sents)
    all_sents = False
    i_ = 0
    try:
        while not all_sents and i_ < max_iter:
            sent_n = rng.randint(min_sent_n, max_sent_n + 1)
            sent_i0 = rng.randint(sent_offs, sents_n - sent_offs - sent_n)
            sent_in = sent_i0 + sent_n - 1
            if len(nltk.word_tokenize(sents[sent_in])) < max_sent_l:
                all_sents = np.all([_questRewr.is_compl_sentence(s) for s in sents_docs[slice(sent_i0, sent_in)]])
            i_ += 1
    except ValueError:
        return None
    if all_sents:
        return sents[slice(sent_i0, sent_in)]
    else:
        return None
Пример #4
0
def select_sents(art_text, nlp, min_sent_n=1, max_sent_n=5,
                 max_sent_l=30, rng=np.random.RandomState(156), max_iter=20):
    sent_offs = 3
    sents = [unicode(s) for s in nltk.sent_tokenize(art_text)]
    sents_docs = [nlp(s) for s in sents]
    sents_n = len(sents)
    all_sents = False
    i_ = 0
    try:
        while not all_sents and i_ < max_iter:
            sent_n = rng.randint(min_sent_n, max_sent_n+1)
            sent_i0 = rng.randint(sent_offs, sents_n - sent_offs - sent_n)
            sent_in = sent_i0 + sent_n - 1
            if len(nltk.word_tokenize(sents[sent_in])) < max_sent_l:
                all_sents = np.all([_questRewr.is_compl_sentence(s)
                                    for s in sents_docs[slice(sent_i0, sent_in)]])
            i_+=1
    except ValueError:
        return None
    if all_sents:
        return sents[slice(sent_i0, sent_in)]
    else:
        return None