def sentence_db(nlp, fh): sentences = list() clause_list = list() for line in fh.readlines(): line = line.decode('utf8').strip() src, text = line.split("\t") span_obj = first_s(nlp, text) ccs = clauses(span_obj) pps = prep_phrases(span_obj.root) agree = requires_past_tense_agreement(span_obj) # keep sentences with no recognizable subject try: nsubj = get_nsubj(span_obj) plural = nsubj_is_plural(nsubj) except ValueError: nsubj = None plural = None sentences.append({ 'src': int(src), 'text': text, 'span': span_obj, 'nsubj': nsubj, 'agree': agree, 'plural': plural, 'pps': pps, }) if len(ccs) > 1: clause_list.extend([(src, span_obj, c) for c in ccs]) for src, span_obj, clause in clause_list: pps = prep_phrases(clause.root) agree = requires_past_tense_agreement(clause) try: nsubj = get_nsubj(clause) plural = nsubj_is_plural(nsubj) except ValueError: continue sentences.append({ 'src': int(src), 'text': clause.text, 'span': clause, 'nsubj': nsubj, 'agree': agree, 'plural': plural, 'pps': [], }) return sentences
def random_sentence_for_nsubj(sdb, nsubj): no_agreement_sentences = [x for x in sdb if x['nsubj'] is not None \ and not(x['agree'])] agree_sg = [x for x in sdb if x['nsubj'] is not None and \ (not(x['plural']) and x['agree'])] agree_pl = [x for x in sdb if x['nsubj'] is not None and \ (x['plural'] and x['agree'])] if nsubj is not None: if nsubj_is_plural(nsubj): d = random.choice(no_agreement_sentences + agree_pl) pronoun = "they" else: d = random.choice(no_agreement_sentences + agree_sg) pronoun = "it" else: d = random.choice(no_agreement_sentences + agree_sg) pronoun = "it" return pronoun, d
def reminded(sdb, state): verbs = ['reminded me of', 'reminded you of', 'reminded us of', 'recalled', 'brought to mind', 'evoked', 'suggested', 'seemed like', 'resembled', 'had the quality of'] adverbs = ['somehow', 'at the time', 'sometimes', 'at first', 'maybe'] if len(state.topics) > 0 and state.topics[-1] is not None and \ nsubj_is_plural(state.topics[-1]): subj = 'they' else: subj = 'it' if len(state.subj_orth) > 0 \ and state.subj_orth[-1].lower() in ('it', 'they') \ and len(state.topics) > 0 \ and state.topics[-1] is not None: subj = "the " + state.topics[-1].root.orth_ state.subj_orth.append(subj) nps = [x['nsubj'] for x in sdb if x['nsubj'] is not None] s = subj + " " + random.choice(verbs) + " " + random.choice(nps).text if random.randrange(6) == 0: s = random.choice(adverbs) + " " + s return s
def reminded(sdb, state): verbs = [ 'reminded me of', 'reminded you of', 'reminded us of', 'recalled', 'brought to mind', 'evoked', 'suggested', 'seemed like', 'resembled', 'had the quality of' ] adverbs = ['somehow', 'at the time', 'sometimes', 'at first', 'maybe'] if len(state.topics) > 0 and state.topics[-1] is not None and \ nsubj_is_plural(state.topics[-1]): subj = 'they' else: subj = 'it' if len(state.subj_orth) > 0 \ and state.subj_orth[-1].lower() in ('it', 'they') \ and len(state.topics) > 0 \ and state.topics[-1] is not None: subj = "the " + state.topics[-1].root.orth_ state.subj_orth.append(subj) nps = [x['nsubj'] for x in sdb if x['nsubj'] is not None] s = subj + " " + random.choice(verbs) + " " + random.choice(nps).text if random.randrange(6) == 0: s = random.choice(adverbs) + " " + s return s