"a", "an", "the", "of", "at", "on", "upon", "in", "to", "from", "out", "as", "so", "such", "or", "and", "those", "this", "these", "that", "for", ",", "is", "was", "am", "are", "'s", "been", "were")) extended_stop_words = list_checker(( "the", "this", "mr.", "miss", "mrs.", "dr.", "ms.", "inc.", "ltd.", "corp.", "'s", ",", ".")) # , "..", "..", "-", "''", '"', "-")) # all pronouns are added to stop_word common_NE_subfixes = list_checker(("corp", "co", "inc", "ltd")) non_words = list_checker(("mm", "hmm", "ahem", "um")) _invalid = list_checker(("u.s.", "u.k", "u.s.s.r.", "there", "ltd.")) _invalid_start_word_a = matcher("'s.*") _invalid_start_word_b = matcher("etc.*") _invalid_end_a = matcher(".*etc.") invalid_words = lambda x: _invalid(x) or _invalid_end_a(x) or _invalid_start_word_a(x) or _invalid_start_word_b(x) \ or non_words(x) location_modifiers = list_checker(("east", "west", "north", "south", "eastern", "western", "northern", "southern", "upper", "lower")) unreliable = list_checker(("this",)) speaking_begin = list_checker(("``",)) speaking_end = list_checker(("''",)) speaking_ambiguous = list_checker(('"',))
# coding=utf-8 from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail __author__ = 'Josu Bermudez <*****@*****.**>' # Is a root constituent root = list_checker(("root", "top", "ROOT", "TOP")) # Is a clause clause = matcher("^S") # Is a Noun phrase noun_phrase = equality_checker("NP") # Is a Noun phrase #prepositional_phrase = equality_checker("SP") # Is a Verb phrase verb_phrase = equality_checker("VP") # Is a particle constituent particle_constituent = equality_checker("PRT") # Is an interjection constituent past_participle_verb = equality_checker("VBN") # Is an interjection constituent interjection = equality_checker("INTJ") # Is a simple or subordinated clause
# Is a Verb phrase verb_phrase = equality_checker("VP") # Is a Adverbial phrase adverbial_phrase = fail() # Is a complement direct complement_direct = list_checker(("CD", )) # Is a particle constituent particle_constituent = fail() # Is a past_participle verb constituent past_participle_verb = equality_checker("VBN") # Is an interjection constituent interjection = fail() # Is a NER annotated into semantic tree ner_constituent = fail() preposition = fail() enumerable = noun_phrase head_rules = noun_phrase # The mention is a plausible constituent mention_constituents = matcher("NP.*")
# from Freeling dict PI.* indefinite = list_checker( ('quelcom', 'algú', 'alguna', 'algunes', 'algun', 'alguns', 'ambdues', 'ambdós', 'bastant', 'bastants', 'qualssevol', 'qualsevol', 'altres', 'massa', 'mitja', 'mitjans', 'mateixa', ' mateixes ', ' mateix ', 'mateixos', 'molta', 'moltes', 'molt', 'molts', 'res', 'ningú', 'cap', 'gens', 'ningú', 'ninguns', 'altres', 'altre', 'poca', 'poques', 'poc', 'pocs', 'qualsevol', 'tantes', 'tanta', 'tants', 'tant', 'totes', 'tota', 'tots', 'tot', 'unes', 'una', 'uns', 'un', 'diverses', 'diversos')) # from Freeling dict PR.* relative = list_checker(('on', 'com', 'qui', 'quins', 'quan', 'quanta', 'quantes', 'quants', 'que', 'qui', 'quins')) reflexive = matcher(r'^[^\s]* mism(o|a)s?$') no_organization = fail() first_person = list_checker( ("em", "meva", "meves", "me", "meu", "meu", "meus", "meues", "ens", "ens", "nosaltres", "nosaltres", "nostra", "nostres", "nostre", "jo")) second_person = list_checker( ("us", "et", "tu", "teves", "teva", "teus", "teu", "teues", "vostès", "vostè", "vosaltres", "vós", "vostres", "vostra", "vostres", "vostre")) third_person = list_checker( ("ell", "ella", "elles", "ells", "la", "les", "li", "els", "el", "seves", "seva", "seus", "seu", "seues"))
# coding=utf-8 from corefgraph.resources.lambdas import equality_checker, matcher, fail __author__ = '' # Features questions female = matcher(".*FEM.*") male = matcher(".*MASC*") neutral = fail() singular = matcher(".*SING.*") plural = matcher(".*PLUR.*") animate = fail() inanimate = fail() # Adjectives adjective = matcher("^ADJ.*") # Pronouns pronoun = matcher("^PRON.*") personal_pronoun = matcher("^PRON.*PRS.*") relative_pronoun = matcher("^PRON.*REL.*") interrogative_pronoun = matcher("^PRON.*INT.*") mention_pronoun = lambda x: relative_pronoun(x) or personal_pronoun(x) # Nouns singular_common_noun = matcher("^NOUN.*SING.*") plural_common_noun = matcher("^NOUN.*PLUR.*")
from corefgraph.resources.lambdas import equality_checker, fail, matcher __author__ = 'Valeria Quochi <*****@*****.**>' __date__ = '5/16/2013' # features questions female = fail() male = fail() neutral = fail() singular = equality_checker("^NOU_CS") plural = equality_checker("^NOU_CP") animate = fail() inanimate = fail() # Adjectives adjective = matcher("^ADJ.*") # pronouns personal_pronoun = matcher("^PRO~PE") relative_pronoun = matcher("^PRO~RE") pronoun = matcher("^PRO") mention_pronoun = lambda x: relative_pronoun(x) or personal_pronoun(x) singular_common_noun = equality_checker("^NOU_CS") plural_common_noun = equality_checker("^NOU_CP") proper_noun = matcher("^NOU~PR") noun = matcher("^NOU.*") verbs = matcher("^V.*") modals = equality_checker("^VMO.*") mod_forms = lambda x: singular_common_noun(x) or plural_common_noun(
# coding=utf-8 from corefgraph.resources.lambdas import equality_checker, matcher, fail __author__ = 'Josu Bermudez <*****@*****.**>' # Features questions female = matcher(r"^[ADP]..F|^N.F|^V.....F") male = matcher(r"^[ADPS]..M|^N.M|^V.....M") neutral = matcher(r"^[ADP]..N") singular = matcher(r"^[ADPS]...S|^N..S|^V....S") plural = matcher(r"^[ADPS]...P|^N..P|^V....P") animate = fail() inanimate = fail() # Adjectives adjective = matcher(r"^A") # Pronouns pronoun = matcher(r"^D?P") personal_pronoun = matcher(r"^PP") relative_pronoun = matcher(r"^PR") interrogative_pronoun = matcher(r"^PT") mention_pronoun = matcher(r"P[PXRL]|^DP") # Nouns singular_common_noun = matcher(r"^NC.S") plural_common_noun = matcher(r"^NC.P")
# pronouns personal_pronoun = list_checker((_personal_pronoun, _possessive_pronoun)) relative_pronoun = list_checker((_wh_pronoun, _wh_possessive_pronoun)) pronoun = list_checker((_personal_pronoun, _possessive_pronoun, _wh_pronoun, _wh_possessive_pronoun)) mention_pronoun = lambda x: relative_pronoun(x) or personal_pronoun(x) singular_common_noun = equality_checker(_noun) plural_common_noun = equality_checker(_noun_plural) proper_noun = list_checker((_proper_noun, _proper_noun_plural)) noun = lambda x: singular_common_noun(x) or plural_common_noun( x) or proper_noun(x) verbs = list_checker(_verbs_list) modals = equality_checker(_modal) mod_forms = lambda x: singular_common_noun(x) or plural_common_noun( x) or adjective(x) or verbs(x) or cardinal(x) indefinite = fail() # enumerations enumerable_mention_words = list_checker(("NOU~PR", "NOU~PR")) conjunction = equality_checker(_conjunctions) interjections = equality_checker(_interjection) cardinal = equality_checker("CD") wh_words = list_checker( (_wh_pronoun, _wh_possessive_pronoun, _wh_determiner, _wh_adverb)) head_rules = matcher("#^NOU.*") determinant = fail() # TODO
# coding=utf-8 """ Ancora POS tag checkers. Each elements in this module is a function that check if a POS tag. Elements starting with _ is only for internal use. """ from corefgraph.resources.lambdas import fail, matcher __author__ = 'Josu Bermudez <*****@*****.**>' _pronouns = matcher(r"pos=p") _possessive = matcher(r".*postype=possessive") _relative = matcher(r".*postype=relative") _personal = matcher(r".*postype=personal") _personal_pronouns = lambda x: _pronouns(x) and _personal(x) # features questions male = matcher(r".*gen=m") female = matcher(r".*gen=f") neutral = matcher(r".*gen=n") singular = matcher(r".*num=s") plural = matcher(r".*num=p") animate = fail() inanimate = fail() # Adecjtives adjectives = matcher(r"pos=a")
_proper_noun_plural = "NNPS" _adjective = "JJ" _adjective_comparative = "JJR" _adjective_superlative = "JJS" _conjunction = ("CC", ) # comma = equality_checker(",") # Features questions female = fail() male = fail() neutral = fail() singular = matcher("^NNP?$") plural = matcher("^NNP?S$") animate = fail() inanimate = fail() # Adjectives adjective = list_checker((_adjective, )) # Pronouns pronoun = list_checker((_personal_pronoun, _possessive_pronoun, _wh_pronoun, _wh_possessive_pronoun)) relative_pronoun = list_checker((_wh_pronoun, _wh_possessive_pronoun)) mention_pronoun = matcher("^PRP") # Nouns