示例#1
0
def sentence_db(nlp, fh):
    sentences = list()
    clause_list = list()
    for line in fh.readlines():
        line = line.decode('utf8').strip()
        src, text = line.split("\t")
        span_obj = first_s(nlp, text)
        ccs = clauses(span_obj)
        pps = prep_phrases(span_obj.root)
        agree = requires_past_tense_agreement(span_obj)
        # keep sentences with no recognizable subject
        try:
            nsubj = get_nsubj(span_obj)
            plural = nsubj_is_plural(nsubj)
        except ValueError:
            nsubj = None
            plural = None
        sentences.append({
            'src': int(src),
            'text': text,
            'span': span_obj,
            'nsubj': nsubj,
            'agree': agree,
            'plural': plural,
            'pps': pps,
            })
        if len(ccs) > 1:
            clause_list.extend([(src, span_obj, c) for c in ccs])
    for src, span_obj, clause in clause_list:
        pps = prep_phrases(clause.root)
        agree = requires_past_tense_agreement(clause)
        try:
            nsubj = get_nsubj(clause)
            plural = nsubj_is_plural(nsubj)
        except ValueError:
            continue
        sentences.append({
            'src': int(src),
            'text': clause.text,
            'span': clause,
            'nsubj': nsubj,
            'agree': agree,
            'plural': plural,
            'pps': [],
            })
    return sentences
示例#2
0
def sentence_db(nlp, fh):
    sentences = list()
    clause_list = list()
    for line in fh.readlines():
        line = line.decode('utf8').strip()
        src, text = line.split("\t")
        span_obj = first_s(nlp, text)
        ccs = clauses(span_obj)
        pps = prep_phrases(span_obj.root)
        agree = requires_past_tense_agreement(span_obj)
        # keep sentences with no recognizable subject
        try:
            nsubj = get_nsubj(span_obj)
            plural = nsubj_is_plural(nsubj)
        except ValueError:
            nsubj = None
            plural = None
        sentences.append({
            'src': int(src),
            'text': text,
            'span': span_obj,
            'nsubj': nsubj,
            'agree': agree,
            'plural': plural,
            'pps': pps,
        })
        if len(ccs) > 1:
            clause_list.extend([(src, span_obj, c) for c in ccs])
    for src, span_obj, clause in clause_list:
        pps = prep_phrases(clause.root)
        agree = requires_past_tense_agreement(clause)
        try:
            nsubj = get_nsubj(clause)
            plural = nsubj_is_plural(nsubj)
        except ValueError:
            continue
        sentences.append({
            'src': int(src),
            'text': clause.text,
            'span': clause,
            'nsubj': nsubj,
            'agree': agree,
            'plural': plural,
            'pps': [],
        })
    return sentences
示例#3
0
def random_sentence_for_nsubj(sdb, nsubj):
    no_agreement_sentences = [x for x in sdb if x['nsubj'] is not None \
            and not(x['agree'])]
    agree_sg = [x for x in sdb if x['nsubj'] is not None and \
            (not(x['plural']) and x['agree'])]
    agree_pl = [x for x in sdb if x['nsubj'] is not None and \
            (x['plural'] and x['agree'])]
    if nsubj is not None:
        if nsubj_is_plural(nsubj):
            d = random.choice(no_agreement_sentences + agree_pl)
            pronoun = "they"
        else:
            d = random.choice(no_agreement_sentences + agree_sg)
            pronoun = "it"
    else:
        d = random.choice(no_agreement_sentences + agree_sg)
        pronoun = "it"
    return pronoun, d
示例#4
0
def random_sentence_for_nsubj(sdb, nsubj):
    no_agreement_sentences = [x for x in sdb if x['nsubj'] is not None \
            and not(x['agree'])]
    agree_sg = [x for x in sdb if x['nsubj'] is not None and \
            (not(x['plural']) and x['agree'])]
    agree_pl = [x for x in sdb if x['nsubj'] is not None and \
            (x['plural'] and x['agree'])]
    if nsubj is not None:
        if nsubj_is_plural(nsubj):
            d = random.choice(no_agreement_sentences + agree_pl)
            pronoun = "they"
        else:
            d = random.choice(no_agreement_sentences + agree_sg)
            pronoun = "it"
    else:
        d = random.choice(no_agreement_sentences + agree_sg)
        pronoun = "it"
    return pronoun, d
示例#5
0
def reminded(sdb, state):
    verbs = ['reminded me of', 'reminded you of', 'reminded us of',
            'recalled', 'brought to mind', 'evoked', 'suggested',
            'seemed like', 'resembled', 'had the quality of']
    adverbs = ['somehow', 'at the time', 'sometimes', 'at first',
            'maybe']
    if len(state.topics) > 0 and state.topics[-1] is not None and \
            nsubj_is_plural(state.topics[-1]):
        subj = 'they'
    else:
        subj = 'it'
    if len(state.subj_orth) > 0 \
            and state.subj_orth[-1].lower() in ('it', 'they') \
            and len(state.topics) > 0 \
            and state.topics[-1] is not None:
        subj = "the " + state.topics[-1].root.orth_
    state.subj_orth.append(subj)
    nps = [x['nsubj'] for x in sdb if x['nsubj'] is not None]
    s = subj + " " + random.choice(verbs) + " " + random.choice(nps).text
    if random.randrange(6) == 0:
        s = random.choice(adverbs) + " " + s
    return s
示例#6
0
def reminded(sdb, state):
    verbs = [
        'reminded me of', 'reminded you of', 'reminded us of', 'recalled',
        'brought to mind', 'evoked', 'suggested', 'seemed like', 'resembled',
        'had the quality of'
    ]
    adverbs = ['somehow', 'at the time', 'sometimes', 'at first', 'maybe']
    if len(state.topics) > 0 and state.topics[-1] is not None and \
            nsubj_is_plural(state.topics[-1]):
        subj = 'they'
    else:
        subj = 'it'
    if len(state.subj_orth) > 0 \
            and state.subj_orth[-1].lower() in ('it', 'they') \
            and len(state.topics) > 0 \
            and state.topics[-1] is not None:
        subj = "the " + state.topics[-1].root.orth_
    state.subj_orth.append(subj)
    nps = [x['nsubj'] for x in sdb if x['nsubj'] is not None]
    s = subj + " " + random.choice(verbs) + " " + random.choice(nps).text
    if random.randrange(6) == 0:
        s = random.choice(adverbs) + " " + s
    return s