def optimise_determiner(phrase, np_phrases, context): """Select the approrpiate determiner. """ get_log().debug('Fixing determiners: {}'.format(phrase)) if (not isinstance(phrase, NounPhrase)): get_log().debug('...not an NP') return phrase get_log().debug('NPs: {}' .format(' '.join([str(x) for x in np_phrases]))) # FIXME: this whould look at all modifiers distractors = [x for x in np_phrases if (hasattr(x, 'head') and hasattr(phrase, 'head') and phrase.head == x.head)] get_log().debug('distractors: {}' .format(' '.join([str(x) for x in distractors]))) if (phrase.head.has_feature('PROPER', 'true') or phrase.head.has_feature('cat', 'PRONOUN')): get_log().debug('...proper or pronoun') phrase.spec = Element() elif (not phrase.head.has_feature('cat', 'PRONOUN') and phrase in distractors[-1:] and len(distractors) == 1): get_log().debug('...unpronominalised phrase that is last mentioned') phrase.spec = Word('the', 'DETERMINER') elif (lexicon.guess_phrase_number(phrase) != Number.plural and not phrase.head.has_feature('cat', 'PRONOUN')): get_log().debug('...indefinite') if phrase.head.string and phrase.head.string[0] in "aeiouy": phrase.spec = Word('an', 'DETERMINER') else: phrase.spec = Word('a', 'DETERMINER') return phrase
def optimise_ref_exp(phrase, context): """Replace anaphoric noun phrases with pronouns when possible. """ # TODO: include Number in the dicision process (it vs they) # FIXME: Coordinated elements need some special attention result = copy(phrase) # test for selecting phrases taht can be processed test = lambda x: isinstance(x, NounPhrase) or isinstance(x, Coordination) # reverse so that we start with large phrases first (eg CC) get_log().debug('-='*40) get_log().debug('constituents:') for x in phrase.constituents(): get_log().debug('\t {}'.format(' '.join(repr(x).split()))) nps = [x for x in phrase.constituents() if test(x)] uttered = [] processed_ids = set() for np in nps: replaced = False get_log().debug('current NP:\n{}'.format(np)) gender = lexicon.guess_phrase_gender(np) get_log().debug('gender of NP: {}'.format(gender)) number = lexicon.guess_phrase_number(np) get_log().debug('number of NP: {}'.format(number)) if not np.has_feature('PERSON'): if context.is_last_speaker(np): person = Person.first else: person = Person.third else: person = ('PERSON', np.get_feature('PERSON')) phrases = [x for x in (context.np_stack + uttered) if lexicon.guess_phrase_gender(x) == gender] # get_log().debug('distractors of NP:\n\t{}'.format(distractors)) if id(np) in processed_ids: get_log().debug('current NP: {} was already processed'.format(np)) continue # if ((np in context.np_stack or np in uttered) and np == phrases[-1]): if (np in phrases[-1:]): # this np is the most salient so pronominalise it if isinstance(phrase, Clause): if id(np) == id(phrase.subj): pronoun = pronominalise(np, gender, PronounUse.subjective, person) elif (np in phrase.subj.constituents() and np in phrase.vp.constituents()): pronoun = pronominalise(np, gender, PronounUse.reflexive, person) # TODO: implement -- possessive will be used if it is a complement of an NP? # elif any(id(np) in [id(x) for x in pp.constituents()] # for pp in pps): # pronoun = pronominalise(np, gender, PronounUse.possessive) elif (np in phrase.vp.constituents()): pronoun = pronominalise(np, gender, PronounUse.objective, person) else: pronoun = pronominalise(np, gender, PronounUse.subjective, person) else: pronoun = pronominalise(np, gender, PronounUse.subjective, person) get_log().debug('replacing {}:{} with {}'.format(id(np), np, pronoun)) replace_element_with_id(result, id(np), pronoun) replaced = True # if you replace an element, remove all the subphrases from the list processed = [y for y in np.constituents()] processed_ids.update([id(x) for x in processed]) unspec_np = deepcopy(np) unspec_np.spec = Element() uttered.append(unspec_np) if not replaced: # fix determiners in the processed NP optimise_determiner(np, phrases, context) context.add_sentence(phrase) return result