def temp(): art_files = [glob.glob(art_dirs_pre + art_dir + "*.txt") for art_dir in art_dirs] with open(art_files[1][134], "r") as f: art_text = f.read() art_sents = nltk.sent_tokenize(art_text) sent_ = art_sents[7] sent_doc = source.nlp(unicode(sent_)) _questRewr.is_sentence(sent_doc) good = wn.synset("good.a.01") good.antonyms()
def temp(): art_files = [glob.glob(art_dirs_pre + art_dir + '*.txt') for art_dir in art_dirs] with open(art_files[1][134], 'r') as f: art_text = f.read() art_sents = nltk.sent_tokenize(art_text) sent_ = art_sents[7] sent_doc = source.nlp(unicode(sent_)) _questRewr.is_sentence(sent_doc) good = wn.synset('good.a.01') good.antonyms()
def select_sents(art_text, nlp, min_sent_n=1, max_sent_n=5, max_sent_l=30, rng=np.random.RandomState(156), max_iter=20): sent_offs = 3 sents = [unicode(s) for s in nltk.sent_tokenize(art_text)] sents_docs = [nlp(s) for s in sents] sents_n = len(sents) all_sents = False i_ = 0 try: while not all_sents and i_ < max_iter: sent_n = rng.randint(min_sent_n, max_sent_n + 1) sent_i0 = rng.randint(sent_offs, sents_n - sent_offs - sent_n) sent_in = sent_i0 + sent_n - 1 if len(nltk.word_tokenize(sents[sent_in])) < max_sent_l: all_sents = np.all([_questRewr.is_compl_sentence(s) for s in sents_docs[slice(sent_i0, sent_in)]]) i_ += 1 except ValueError: return None if all_sents: return sents[slice(sent_i0, sent_in)] else: return None
def select_sents(art_text, nlp, min_sent_n=1, max_sent_n=5, max_sent_l=30, rng=np.random.RandomState(156), max_iter=20): sent_offs = 3 sents = [unicode(s) for s in nltk.sent_tokenize(art_text)] sents_docs = [nlp(s) for s in sents] sents_n = len(sents) all_sents = False i_ = 0 try: while not all_sents and i_ < max_iter: sent_n = rng.randint(min_sent_n, max_sent_n+1) sent_i0 = rng.randint(sent_offs, sents_n - sent_offs - sent_n) sent_in = sent_i0 + sent_n - 1 if len(nltk.word_tokenize(sents[sent_in])) < max_sent_l: all_sents = np.all([_questRewr.is_compl_sentence(s) for s in sents_docs[slice(sent_i0, sent_in)]]) i_+=1 except ValueError: return None if all_sents: return sents[slice(sent_i0, sent_in)] else: return None