Python list_checker примеры, corefgraph.resources.lambdas.list_checker Python примеры использования

Пример #1

0

Показать файл

Файл: temporals.py Проект: josubg/CorefGraph

# coding=utf-8
from corefgraph.resources.lambdas import list_checker

__author__ = 'Josu Bermudez <*****@*****.**>'

#temporals = list_checker(("segundo", "minuto", "hora", "día", "semana", "mes", "año", "década", "siglo", "milenio",
#             "lunes", "martes", "miércoles", "jueves", "viernes", "sábado", "domingo", "ahora",
#             "ayer", "mañana", "edad", "tiempo", "era", "época", "noche", "mediodía", "tarde",
#             "semestre", "trimestre", "cuatrimestre", "término", "invierno", "primavera", "verano", "otoño", "estación",
#             "enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre",
#             "noviembre", "diciembre"))

temporals = list_checker(
    ("segon", "minut", "hora", "dia", "setmana", "mes", "any", "dècada",
     "segle", "mil·lenni", "dilluns", "dimarts", "dimecres", "dijous",
     "divendres", "dissabte", "diumenge", "ara", "ahir", "demà", "edat",
     "temps", "era", "època", "nit", "migdia", "tard", "semestre", "trimestre",
     "quadrimestre", "terme", "hivern", "primavera", "estiu", "tardor",
     "estació", "gener", "febrer", "març", "abril", "maig", "juny", "juliol",
     "agost", "setembre", "octubre", "novembre", "desembre"))

Пример #2

0

Показать файл

# coding=utf-8

from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

__author__ = 'Josu Bermudez <*****@*****.**>'

# Is a root constituent
root = list_checker(("root", "top", "ROOT", "TOP"))

# Is a clause
clause = matcher("^S")

# Is a Noun phrase
noun_phrase = equality_checker("NP")

# Is a Noun phrase
#prepositional_phrase = equality_checker("SP")

# Is a Verb phrase
verb_phrase = equality_checker("VP")

# Is a particle constituent
particle_constituent = equality_checker("PRT")

# Is an interjection constituent
past_participle_verb = equality_checker("VBN")

# Is an interjection constituent
interjection = equality_checker("INTJ")

# Is a simple or subordinated clause

Пример #3

0

Показать файл

# coding=utf-8
""" List of Stopwords  and other meaningless or confusing words of english
"""
from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

__author__ = 'Josu Bermudez <*****@*****.**>'


# List extracted from Stanford CoreNLP

stop_words = list_checker((
    "a", "an", "the", "of", "at", "on", "upon", "in", "to", "from", "out", "as", "so",
    "such", "or", "and", "those", "this", "these", "that", "for", ",", "is", "was",
    "am", "are", "'s", "been", "were"))

extended_stop_words = list_checker((
    "the", "this", "mr.", "miss", "mrs.", "dr.", "ms.", "inc.", "ltd.", "corp.", "'s", ",", "."))
    # , "..", "..", "-", "''", '"', "-"))

# all pronouns are added to stop_word

common_NE_subfixes = list_checker(("corp", "co", "inc", "ltd"))

non_words = list_checker(("mm", "hmm", "ahem", "um"))


_invalid = list_checker(("u.s.", "u.k", "u.s.s.r.", "there", "ltd."))
_invalid_start_word_a = matcher("'s.*")
_invalid_start_word_b = matcher("etc.*")
_invalid_end_a = matcher(".*etc.")
invalid_words = lambda x: _invalid(x) or _invalid_end_a(x) or _invalid_start_word_a(x) or _invalid_start_word_b(x) \

Пример #4

0

Показать файл

Файл: pronouns.py Проект: josubg/CorefGraph

Notes:
 + Mark internal use elements with a initial "_".
 + Use tuples or sets
"""
from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

__author__ = 'Rodrigo Agerri <*****@*****.**>'
__date__ = '2013-05-03'

# from Freeling dict (P[PDXITR][123][MFCN]P.*')
# plural = list_checker(('ellas', 'ellos', 'las', 'les', 'los', 'mías', 'míos', 'nos', 'nosotras', 'nosotros', 'nuestras',
#                        'nuestros', 'os', 'suyas', 'suyos', 'tuyas', 'tuyos', 'ustedes', 'vosotras', 'vosotros',
#                        'vuestras', 'vuestros'))
plural = list_checker(
    ('elles', 'ells', 'les', 'els', 'meves', 'meus', 'ens', 'nosaltres',
     'nostres', 'seves', 'seus', 'teves', 'teus', 'vostès', 'vosaltres',
     'vostres', 'nostros', 'vostros'))

# from Freeling dict P[PDXITR][123][MFCN]S.*'
# singular = list_checker(('conmigo', 'contigo', 'él', 'ella', 'la', 'le', 'lo', 'me', 'mía', 'mí', 'mío', 'nuestra',
#                          'nuestro', 'nuestro', 'suya', 'suyo', 'suyo', 'te', 'ti', 'tú', 'tuya', 'tuyo', 'tuyo',
#                          'usted', 'vos', 'vuestra', 'vuestro', 'vuestro', 'yo'))
singular = list_checker(
    ('ell', 'ella', 'la', 'li', 'meva', 'meu', 'seva', 'seu', 'seua', 'vostè',
     'tu', 'teua', 'teu', 'vós', 'vostra', 'vostre', 'jo'))

# from Freeling dict P[PDXITR][123]F.*'
# female = list_checker(('ella', 'ellas', 'la', 'las', 'mía', 'mías', 'nosotras', 'nuestra', 'nuestras', 'suyas', 'suya',
#                        'tuyas', 'tuya', 'vosotras', 'vuestras', 'vuestra'))
female = list_checker(('ella', 'elles', 'la', 'les', 'nostra', 'meva', 'teva',
                       'seva', 'meua', 'teua', 'seua'))

Пример #5

0

Показать файл

Файл: temporals.py Проект: josubg/CorefGraph

# coding=utf-8
from corefgraph.resources.lambdas import list_checker

__author__ = 'Valeria Quochi <*****@*****.**>'
__date__ = '5/13/13'

temporals = list_checker(
    ("secondo", "secondi", "minuto", "minuti", "ora", "ore", "giorno",
     "giorni", "settimana", "settimane", "mese", "mesi", "anno", "anni",
     "decade", "decadi", "secolo", "secoli", "millennio", "millenni", "lunedì",
     "martedì", "mercoledì", "giovedì", "venerdì", "sabato", "sabati",
     "domenica", "domeniche", "adesso", "ieri", "domani", "dopodomani", "età",
     "tempo", "tempi", "periodo", "periodi", "era", "ere", "epoca", "epoche",
     "mattino", "mattini", "mattine", "sera", "sere", "giornata", "giornate",
     "notte", "notti", "mezzogiorno", "mezzogiorni", "pomeriggio", "pomeriggi",
     "semestre", "semestri", "trimestre", "trimestri", "quadrimestre",
     "quadrimestri", "semestre", "semestri", "inverno", "inverni", "primavera",
     "primavere", "estate", "estati", "autunno", "autunni", "stagione",
     "stagioni", "gennaio", "febbraio", "marzo", "aprile", "maggio", "giugno",
     "luglio", "agosto", "settembre", "ottobre", "novembre", "dicembre"))

Пример #6

0

Показать файл

Файл: verbs.py Проект: josubg/CorefGraph

_ser = list_checker((
    #    'ser', 'erais', 'éramos', 'eran', 'era', 'era', 'eras', 'eres', 'es', 'fuerais', 'fuéramos', 'fueran', 'fuera',
    #    'fuera', 'fueras', 'fuereis', 'fuéremos', 'fueren', 'fuere', 'fuere', 'fueres', 'fueron', 'fueseis', 'fuésemos',
    #    'fuesen', 'fue', 'fuese', 'fuese', 'fueses', 'fuimos', 'fui', 'fuisteis', 'fuiste', 'seréis', 'seamos', 'seamos',
    #    'sean', 'sean', 'sea', 'sea', 'sea', 'seas', 'sed', 'serían', 'sería', 'serías', 'seriáis', 'seremos', 'ser',
    #    'seráis', 'seríamos', 'serán', 'será', 'seré', 'serás', 'ser', 'sé', 'sido', 'siendo', 'sois', 'somos', 'son',
    #    'soy'))
    'ser',
    'éreu',
    'érem',
    'eren',
    'eran',
    'era',
    'eres',
    'ets',
    'és',
    'fóssiu',
    'fóssim',
    'fossin',
    'fou',
    'fossis',
    'fos',
    'fórem',
    'siguin',
    'sigui',
    'siguis',
    'siguem',
    'sigueu',
    'serieu',
    'serem',
    'sereu',
    'seria',
    'serian',
    'seríem',
    'seré',
    'seràs',
    'ser',
    'sé',
    'estat',
    'ets',
    'es',
    'sent',
    'sou',
    'som',
    'són',
    'sóc'))

Пример #7

0

Показать файл

Файл: constituent.py Проект: josubg/CorefGraph

# coding=utf-8

__author__ = 'Josu Bermudez <*****@*****.**>'


from corefgraph.resources.lambdas import list_checker, equality_checker, fail

# Is a root constituent
root = list_checker(("root", "top", "ROOT", "TOP"))

# Is a clause
clause = list_checker(("S", "SENTENCE"))

# Is a Noun phrase
noun_phrase = list_checker(("SN", "GRUP.NOM", "SUJ"))

# Is a Verb phrase
verb_phrase = list_checker(("GRUP.VERB",))

# Is a complement direct
complement_direct = list_checker(("CD",))

# Is a particle constituent
particle_constituent = fail()

# Is a past_participle verb constituent
past_participle_verb = fail()

# Is an interjection constituent
interjection = equality_checker("INTERJECCIÓ")

Пример #8

0

Показать файл

# coding=utf-8


from corefgraph.resources.lambdas import list_checker, fail


__author__ = 'Josu Bermudez <*****@*****.**>'

_no_ner = "O"

all = lambda x: x != _no_ner and x is not None and x != ""

# Classic 3 types useful in some cases
person = list_checker(("PERSON", "PER"))
organization = list_checker(("ORG", "ORGANIZATION"))
location = list_checker(("LOCATION", "LOC"))
other = list_checker(("MISC", "OTHER"))


singular = lambda x: all(x) and not organization(x)
plural = fail()

animate = list_checker(("PERSON", "PER"))
inanimate = list_checker(("FACILITY", "FAC", "NORP", "LOCATION", "LOC",
                          "PRODUCT", "EVENT", "ORGANIZATION", "ORG",
                          "WORK OF ART", "LAW", "LANGUAGE", "DATE", "TIME",
                          "PERCENT", "MONEY", "NUMBER", "QUANTITY",
                          "ORDINAL", "CARDINAL", "MISC", "GPE", "WEA", "NML"))

# NE types that denotes mention
mention_ner = lambda x: (x is not None) and (

Пример #9

0

Показать файл

Файл: temporals.py Проект: josubg/CorefGraph

# coding=utf-8
from corefgraph.resources.lambdas import list_checker

__author__ = 'Josu Bermudez <*****@*****.**>'

temporals = list_checker(
    ("segundo", "minuto", "hora", "día", "semana", "mes", "año", "década",
     "siglo", "milenio", "lunes", "martes", "miércoles", "jueves", "viernes",
     "sábado", "domingo", "ahora", "ayer", "mañana", "edad", "tiempo", "era",
     "época", "noche", "mediodía", "tarde", "semestre", "trimestre",
     "cuatrimestre", "término", "invierno", "primavera", "verano", "otoño",
     "estación", "enero", "febrero", "marzo", "abril", "mayo", "junio",
     "julio", "agosto", "septiembre", "octubre", "noviembre", "diciembre"))

Пример #10

0

Показать файл

Файл: verbs.py Проект: josubg/CorefGraph

# coding=utf-8
""" List of verbs used in sieves and mention detection.
"""

from corefgraph.resources.lambdas import list_checker, equality_checker

__author__ = 'Josu Bermudez <*****@*****.**>'

# Source: Are based on list found Stanford CoreLP, also it may been modified.
_ser = list_checker(
    ('ser', 'erais', 'éramos', 'eran', 'era', 'era', 'eras', 'eres', 'es',
     'fuerais', 'fuéramos', 'fueran', 'fuera', 'fuera', 'fueras', 'fuereis',
     'fuéremos', 'fueren', 'fuere', 'fuere', 'fueres', 'fueron', 'fueseis',
     'fuésemos', 'fuesen', 'fue', 'fuese', 'fuese', 'fueses', 'fuimos', 'fui',
     'fuisteis', 'fuiste', 'seréis', 'seamos', 'seamos', 'sean', 'sean', 'sea',
     'sea', 'sea', 'seas', 'sed', 'serían', 'sería', 'serías', 'seriáis',
     'seremos', 'ser', 'seráis', 'seríamos', 'serán', 'será', 'seré', 'serás',
     'ser', 'sé', 'sido', 'siendo', 'sois', 'somos', 'son', 'soy'))

_estar = list_checker(
    ('estoy', 'estás', 'está', 'estamos', 'estáis', 'están', 'estaba',
     'estabas', 'estaba', 'estábamos', 'estabais', 'estaban', 'estaré',
     'estarás', 'estará', 'estaremos', 'estaréis', 'estarán', 'estaría',
     'estarías', 'estaría', 'estaríamos', 'estaríais', 'estarían', 'estuve',
     'estuviste', 'estuvo', 'estuvimos', 'estuvisteis', 'estuvieron', 'esté',
     'estés', 'esté', 'estemos', 'estéis', 'estén', 'estuviera'
     'estuvieras', 'estuviera', 'estuviéramos', 'estuvierais', 'estuvieran'
     'estuviese', 'estuvieses', 'estuviese', 'estuviésemos', 'estuvieseis',
     'estuviesen'
     'estuviere', 'estuvieres', 'estuviere', 'estuviéremos', 'estuviereis',
     'estuvieren'

Пример #11

0

Показать файл

Файл: pronouns.py Проект: josubg/CorefGraph

Notes:
 + Mark internal use elements with a initial "_".
 + Use tuples or sets
"""
from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

__author__ = 'Rodrigo Agerri <*****@*****.**>'
__date__ = '2013-05-03'


# from Freeling dict (P[PDXITR][123][MFCN]P.*')
# plural = list_checker(('ellas', 'ellos', 'las', 'les', 'los', 'mías', 'míos', 'nos', 'nosotras', 'nosotros', 'nuestras',
#                        'nuestros', 'os', 'suyas', 'suyos', 'tuyas', 'tuyos', 'ustedes', 'vosotras', 'vosotros',
#                        'vuestras', 'vuestros'))
plural = list_checker(('ellas', 'ellos', 'las', 'les', 'los', 'nos', 'nosotras', 'nosotros', 'nuestras',
                        'nuestros', 'os', 'ustedes', 'vosotras', 'vosotros',
                        'vuestras', 'vuestros', 'nuestra', 'nuestro', 'nuestro', 'vuestra', 'vuestro', 'vuestro',))
# from Freeling dict P[PDXITR][123][MFCN]S.*'
# singular = list_checker(('conmigo', 'contigo', 'él', 'ella', 'la', 'le', 'lo', 'me', 'mía', 'mí', 'mío', 'nuestra',
#                          'nuestro', 'nuestro', 'suya', 'suyo', 'suyo', 'te', 'ti', 'tú', 'tuya', 'tuyo', 'tuyo',
#                          'usted', 'vos', 'vuestra', 'vuestro', 'vuestro', 'yo'))
singular = list_checker(('conmigo', 'contigo', 'él', 'ella', 'la', 'le', 'lo', 'me', 'mía', 'mí', 'mío',
                         'te', 'ti', 'tú', 'mías', 'míos', "tus", 'tuya', 'tuyo', 'tuyas', 'tuyos',
                         'usted', 'vos',  'yo', 'mías', 'míos', 'ello'))
# from Freeling dict P[PDXITR][123]F.*'
# female = list_checker(('ella', 'ellas', 'la', 'las', 'mía', 'mías', 'nosotras', 'nuestra', 'nuestras', 'suyas', 'suya',
#                        'tuyas', 'tuya', 'vosotras', 'vuestras', 'vuestra'))
female = list_checker(('ella', 'ellas', 'la', 'las', 'nosotras', 'vosotras',))
# from Freeling dict P[PDXITR][123]M.*
# male = list_checker(('él', 'ellos', 'lo', 'los', 'mío', 'míos', 'nosotros', 'nuestro', 'nuestros', 'suyos', 'suyo',
#                      'tuyos', 'tuyo', 'vosotros', 'vuestros', 'vuestro'))

Пример #12

0

Показать файл

# coding=utf-8
""" List of Stopwords 
"""
from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

__author__ = 'Josu Bermudez <*****@*****.**>'


# List extracted from Stanford CoreNLP

stop_words = list_checker(('unes', 'una', 'uns', 'un', 'del', 'al', 'el', 'la', 'les', 'lo', 'de', 'en',
                           'sobre', 'per', 'dins', 'fins', 'desde', 'fora', 'com', 'així', 'tal', 'o', 'i', 'a',
                           'aquest', 'aquesta', 'aquelles', 'aquells', ',', 'es',
                           'era', 'sóc', 'eres', 'sido', 'eras'))

extended_stop_words = list_checker(("el", "la",  "sr", "sra", "srta", "dr", "ms.", "s.", "s.l.",
                                    "s.a", ",", "."))  # , "..", "..", "-", "''", '"', "-"))
# all pronouns are added to stop_word

common_NE_subfixes = list_checker(("s.a.", "s.l.", "s.a.l.", "s.l.l.", "s.c.", "s.com", "s.coop"))

non_words = list_checker(('ejem', 'ajá', 'hm', 'jo'))


invalid_words = list_checker(("sa", "sl", "etc", "dólars", "pesetes", "euros"))

location_modifiers = list_checker(("nord", "sud", "est", "oest", "adalt", "abaix"))

unreliable = list_checker(("_", "que", "su", "sus"))

speaking_begin = list_checker(("``",))

Пример #13

0

Показать файл

Файл: constituent.py Проект: josubg/CorefGraph

# coding=utf-8
from corefgraph.resources.lambdas import equality_checker, list_checker, matcher, fail

__author__ = 'Josu Bermudez <*****@*****.**>'

# Is a root constituent
root = list_checker(("root", "top", "ROOT", "TOP"))

# Is a clause
clause = list_checker(("S", "SENTENCE"))

# Is a simple or subordinated clause
simple_or_sub_phrase = clause

# Is a Noun phrase
noun_phrase = list_checker(("SN", "SUJ", "GRUP.NOM"))

# Is a Verb phrase
verb_phrase = equality_checker("GRUP.VERB")

# Is a Adverbial phrase
adverbial_phrase = equality_checker("SADV")

# Is a complement direct
complement_direct = equality_checker("CD")

# Is a particle constituent
particle_constituent = fail()

# Is a past_participle verb constituent
past_participle_verb = fail()

Пример #14

0

Показать файл

# -*- coding: utf-8 -*-
from corefgraph.resources.lambdas import list_checker
__author__ = 'Valeria Quochi, [email protected]'
__date__ = '5/15/13'

# Only intrinsic copulative verbs added: essere, sembrare, apparire, parere, divenire, diventare, restare, rimanere, risultare, stare
# TODO, see if other verb forms are useful/needed

copulative = list_checker(("essere", "sono", "sei",  "è", "e'", "siamo", "siete", "ero", "eri", "era", "eravamo", "eravate", "erano", "fui" "fosti", "fu", "fummo", "foste", "furono", "sarò", "saro'", "sarai", "sarà", "sara'", "saremo", "sarete", "saranno", "sia", "siate", "siano", "fossi", "fosse", "fossimo", "fossero", "sarei", "saresti", "sarebbe", "saremmo", "sareste", "sarebbero", "essendo", "stato", "stata", "stati", "state", "sembrare", "sembro", "sembri", "sembra", "sembriamo", "sembrate", "sembrano", "sembravo", "sembravi", "sembrava", "sembravamo", "sembravate", "sembravano", "sembrai", "sembrasti", "sembrò", "sembro'", "sembrammo", "sembraste", "sembrarono", "sembrerò", "sembrero'", "sembrerai", "sembrerà", "sembrera'", "sembreremo", "sembrerete", "sembreranno", "sembriate", "sembrino", "sembrassi", "sembrasse", "sembrassimo", "sembrassero", "sembrando", "sembrato", "sembrata", "sembrati", "appaia", "appaiano", "appaio", "appaiono", "appare", "apparendo", "appari", "appariamo", "appariate", "apparimmo", "apparira'", "apparirai", "appariranno", "apparire", "apparirebbe", "apparirebbero", "apparirei", "appariremmo", "appariremo", "apparireste", "appariresti", "apparirete", "appariro'", "apparirà", "apparirò", "apparisse", "apparissero", "apparissi", "apparissimo", "appariste", "apparisti", "apparite", "appariva", "apparivamo", "apparivano", "apparivate", "apparivi", "apparivo", "apparsa", "apparse", "apparsi", "apparso", "apparve", "apparvero", "apparvi", "divenendo", "divenga", "divengano", "divengo", "divengono", "diveniamo", "diveniate", "divenimmo", "divenire", "divenisse", "divenissero", "divenissi", "divenissimo", "diveniste", "divenisti", "divenite", "diveniva", "divenivamo", "divenivano", "divenivate", "divenivi", "divenivo", "divenne", "divennero", "divenni", "diventa", "diventai", "diventammo", "diventando", "diventano", "diventare", "diventarono", "diventasse", "diventassero", "diventassi", "diventassimo", "diventaste", "diventasti", "diventata", "diventate", "diventati", "diventato", "diventava", "diventavamo", "diventavano", "diventavate", "diventavi", "diventavo", "diventera'", "diventerai", "diventeranno", "diventerebbe", "diventerebbero", "diventerei", "diventeremmo", "diventeremo", "diventereste", "diventeresti", "diventerete", "diventero'", "diventerà", "diventerò", "diventi", "diventiamo", "diventiate", "diventino", "divento", "divento'", "diventò", "divenuta", "divenute", "divenuti", "divenuto", "diverra'", "diverrai", "diverranno", "diverrebbe", "diverrebbero", "diverrei", "diverremmo", "diverremo", "diverreste", "diverresti", "diverrete", "diverro'", "diverrà", "diverrò", "diviene", "divieni", "paia", "paiamo", "paiano", "paiate", "paio", "paiono", "pare", "paremmo", "parendo", "parere", "paresse", "paressero", "paressi", "paressimo", "pareste", "paresti", "parete", "pareva", "parevamo", "parevano", "parevate", "parevi", "parevo", "pari", "parra'", "parrai", "parranno", "parrebbe", "parrebbero", "parrei", "parremmo", "parremo", "parreste", "parresti", "parrete", "parro'", "parrà", "parrò", "parsa", "parse", "parsi", "parso", "parve", "parvero", "parvi", "resta", "restai", "restammo", "restando", "restano", "restare", "restarono", "restasse", "restassero", "restassi", "restassimo", "restaste", "restasti", "restata", "restate", "restati", "restato", "restava", "restavamo", "restavano", "restavate", "restavi", "restavo", "restera'", "resterai", "resteranno", "resterebbe", "resterebbero", "resterei", "resteremmo", "resteremo", "restereste", "resteresti", "resterete", "restero'", "resterà", "resterò", "resti", "restiamo", "restiate", "restino", "resto", "resto'", "restò", "rimane", "rimanemmo", "rimanendo", "rimanere", "rimanesse", "rimanessero", "rimanessi", "rimanessimo", "rimaneste", "rimanesti", "rimanete", "rimaneva", "rimanevamo", "rimanevano", "rimanevate", "rimanevi", "rimanevo", "rimanga", "rimangano", "rimango", "rimangono", "rimani", "rimaniamo", "rimaniate", "rimarra'", "rimarrai", "rimarranno", "rimarrebbe", "rimarrebbero", "rimarrei", "rimarremmo", "rimarremo", "rimarreste", "rimarresti", "rimarrete", "rimarro'", "rimarrà", "rimarrò", "rimase", "rimasero", "rimasi", "rimasta", "rimaste", "rimasti", "rimasto", "risulta", "risultai", "risultammo", "risultando", "risultano", "risultare", "risultarono", "risultasse", "risultassero", "risultassi", "risultassimo", "risultaste", "risultasti", "risultata", "risultate", "risultati", "risultato", "risultava", "risultavamo", "risultavano", "risultavate", "risultavi", "risultavo", "risultera'", "risulterai", "risulteranno", "risulterebbe", "risulterebbero", "risulterei", "risulteremmo", "risulteremo", "risultereste", "risulteresti", "risulterete", "risultero'", "risulterà", "risulterò", "risulti", "risultiamo", "risultiate", "risultino", "risulto", "risulto'", "risultò", "sta", "stai", "stando", "stanno", "stara'", "starai", "staranno", "stare", "starebbe", "starebbero", "starei", "staremmo", "staremo", "stareste", "staresti", "starete", "staro'", "starà", "starò", "state", "stava", "stavamo", "stavano", "stavate", "stavi", "stavo", "stemmo", "stesse", "stessero", "stessi", "stessimo", "steste", "stesti", "stette", "stettero", "stetti", "stia", "stiamo", "stiano", "stiate", "sto" ))

# From StanfordCoreNLP
reporting = list_checker(("accuse", "acknowledge", "add", "admit", "advise", "agree", "alert", "allege", "announce", "answer", "apologize", "argue", "ask", "assert", "assure", "beg", "blame", "boast", "caution", "charge", "cite", "claim", "clarify", "command", "comment", "compare", "complain", "concede", "conclude", "confirm", "confront", "congratulate", "contend", "contradict", "convey", "counter", "criticize", "debate", "decide", "declare", "defend", "demand", "demonstrate", "deny", "describe", "determine", "disagree", "disclose", "discount", "discover", "discuss", "dismiss", "dispute", "disregard", "doubt", "emphasize", "encourage", "endorse", "equate", "estimate", "expect", "explain", "express", "extoll", "fear", "feel", "find", "forbid", "forecast", "foretell", "forget", "gather", "guarantee", "guess", "hear", "hint", "hope", "illustrate", "imagine", "imply", "indicate", "inform", "insert", "insist", "instruct", "interpret", "interview", "invite", "issue", "justify", "learn", "maintain", "mean", "mention", "negotiate", "note", "observe", "offer", "oppose", "order", "persuade", "pledge", "point", "point out", "praise", "pray", "predict", "prefer", "present", "promise", "prompt", "propose", "protest", "prove", "provoke", "question", "quote", "raise", "rally", "read", "reaffirm", "realise", "realize", "rebut", "recall", "reckon", "recommend", "refer", "reflect", "refuse", "refute", "reiterate", "reject", "relate", "remark", "remember", "remind", "repeat", "reply", "add_report", "request", "respond", "restate", "reveal", "rule", "say", "see", "show", "signal", "sing", "slam", "speculate", "spoke", "spread", "state", "stipulate", "stress", "suggest", "support", "suppose", "surmise", "suspect", "swear", "teach", "tell", "testify", "think", "threaten", "told", "uncover", "underline", "underscore", "urge", "voice", "vow", "warn", "welcome", "wish", "wonder", "worry", "write"))
generics_you_verbs = list_checker(("know"))
pleonastic_verbs = list_checker(("is", "was", "became", "become"))
alternative_a_pleonastic_verbs = list_checker(("seems", "appears", "means", "follows"))
alternative_b_pleonastic_verbs = list_checker(("turns",))

Пример #15

0

Показать файл

Файл: determiners.py Проект: josubg/CorefGraph

# -*- coding: utf-8 -*-
__author__ = 'Valeria Quochi <*****@*****.**>'
__date__ = '5/13/13'

from corefgraph.resources.lambdas import list_checker

indefinite_articles = list_checker(("un", "una", "uno", "un'"))

quantifiers = list_checker((
    "niente",
    "nessun",
    "nessuno",
    "nessuna",
    "tutto",
    "tutti",
    "tutte",
    "tutta",
    "alcun",
    "alcuno",
    "alcuna",
    "alcuni",
    "alcune",
    "ogni",
    "ognun",
    "ognuno",
    "ognuna",
    "ciascuno",
    "ciascuna",
    "abbastanza",
    "qualche",
    "qualunque",

Пример #16

0

Показать файл

# coding=utf-8

__author__ = 'Josu Bermudez <*****@*****.**>'

from corefgraph.resources.lambdas import list_checker

object = list_checker(("cd", "ci"))
subject = list_checker(("suj", ))

Пример #17

0

Показать файл

copulative = list_checker((
    "act",
    "acts",
    "acting",
    "acted",
    "appear",
    "appears",
    "appearing",
    "appeared",
    "be",
    "am",
    "are",
    "is",
    "was",
    "were",
    "being",
    "been",
    "isn't",
    "aren't",
    "wasn't",
    "weren't",
    "'m",
    "'re"
    "'s",
    "isn't",
    "aren't",
    "wasn't",
    "weren't",
    "become",
    "becomes",
    "becoming",
    "became",
    "come",
    "comes",
    "coming",
    "came"
    "come out",
    "comes out",
    "coming out",
    "came out",
    "end up",
    "ends up",
    "ending up",
    "ended up",
    "get",
    "gets",
    "getting",
    "got",
    "got",
    "gotten"
    "go",
    "goes",
    "going",
    "went",
    "gone",
    "grow",
    "grows",
    "growing",
    "grew",
    "grown",
    "fall",
    "falls",
    "falling",
    "fell",
    "fallen",
    "feel",
    "feels",
    "feeling",
    "felt",
    "keep",
    " keeps",
    "keeping",
    "kept",
    "leave",
    "leaves",
    "leaving",
    "left",
    "look",
    "looks",
    "looking",
    "looked",
    "prove",
    "proves",
    "proving",
    "proved",
    "proved",
    "proven",
    "remain ",
    "remains",
    "remaining",
    "remained",
    "seem",
    "seems",
    "seeming",
    "seemed",
    "smell",
    "smells",
    "smelling",
    "smelled",
    "smelt",
    "sound",
    "sounds",
    "sounding",
    "sounded",
    "stay",
    "stays",
    "staying",
    "stayed",
    "taste",
    "tastes",
    "tasting",
    "tasted",
    "turn",
    "turns",
    "turning",
    "turned",
    "turn up",
    "turns up",
    "turning up",
    "turned up",
))

Пример #18

0

Показать файл

# coding=utf-8

__author__ = 'Josu Bermudez <*****@*****.**>'

from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

temporals = list_checker(
    ("second", "minute", "hour", "day", "week", "month", "year", "decade",
     "century", "millennium", "monday", "tuesday", "wednesday", "thursday",
     "friday", "saturday", "sunday", "now", "yesterday", "tomorrow", "age",
     "time", "era", "epoch", "morning", "evening", "day", "night", "noon",
     "afternoon", "semester", "trimester", "quarter", "term", "winter",
     "spring", "summer", "fall", "autumn", "season", "january", "february",
     "march", "april", "may", "june", "july", "august", "september", "october",
     "november", "december"))

Пример #19

0

Показать файл

Файл: dependency.py Проект: josubg/CorefGraph

# coding=utf-8

__author__ = 'Josu Bermudez <*****@*****.**>'

from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

object = list_checker(("iobj", "pobj", "dobj"))
subject = list_checker(("nsubj", "csubj"))

Пример #20

0

Показать файл

Файл: partofspeech_old.py Проект: josubg/CorefGraph

_conjunctions = equality_checker("CONJ", )

# Usable functions

# features questions
female = fail()
male = fail()
neutral = fail()
singular = fail()
plural = fail()
animate = fail()
inanimate = fail()

# Adjectives
adjective = list_checker(
    (_adjective, _adjective_comparative, _adjective_superlative))

# pronouns
personal_pronoun = list_checker((_personal_pronoun, _possessive_pronoun))
relative_pronoun = list_checker((_wh_pronoun, _wh_possessive_pronoun))
pronoun = list_checker((_personal_pronoun, _possessive_pronoun, _wh_pronoun,
                        _wh_possessive_pronoun))
mention_pronoun = lambda x: relative_pronoun(x) or personal_pronoun(x)

singular_common_noun = equality_checker(_noun)
plural_common_noun = equality_checker(_noun_plural)
proper_noun = list_checker((_proper_noun, _proper_noun_plural))
noun = lambda x: singular_common_noun(x) or plural_common_noun(
    x) or proper_noun(x)

verbs = list_checker(_verbs_list)

Пример #21

0

Показать файл

# coding=utf-8

__author__ = 'Josu Bermudez <*****@*****.**>'

from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

_no_ner = "O"

no_ner = lambda x: x == _no_ner or x is None or x == ""
all = lambda x: x != _no_ner and x is not None and x != ""

# Classic 3 types useful in some cases
person = list_checker(("person", "per"))
organization = list_checker(("org", "organization"))
location = list_checker(("location", "loc"))
other = list_checker(("misc", "other"))

singular = fail()  # lambda x: all(x) and not organization(x)
plural = fail()  # organization

animate = fail()  #list_checker(("PERSON", "PER"))
inanimate = fail(
)  #list_checker(("FACILITY", "FAC", "NORP", "LOCATION", "LOC", "PRODUCT", "EVENT", "ORGANIZATION", "ORG",
#        "WORK OF ART", "LAW", "LANGUAGE", "DATE", "TIME", "PERCENT", "MONEY", "NUMBER", "QUANTITY",
#        "ORDINAL", "CARDINAL", "MISC", "GPE", "WEA", "NML"))

# NE types that denotes mention
mention_ner = lambda x: x != _no_ner and x is not None and x != ""
mention_ner = list_checker(
    ("person", "norp", "facility", "organization", "gpe", "nml", "location",
     "product", "event", "work of art", "law", "language", "date", "time"))

Пример #22

0

Показать файл

Файл: determiners.py Проект: josubg/CorefGraph

indefinite_articles: Used to determine if the mention is indefinite.
quantifiers: Used to filter mention that starts with quantifiers.
partitives: Used to filter mentions that are inside a partitive expression.
partitive_particle: Used among partitives.

"""

__author__ = 'Rodrigo Agerri <*****@*****.**>'

from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

# Used to determine if the mention is an indefinite mention Un hombre
indefinite_articles = list_checker(
    ('alguna', 'algun', 'algunes', 'algú', 'alguns', 'ambdues', 'ambdòs',
     'bastant', 'bastants', 'cada', 'qualssevol', 'qualsevol', 'quant',
     'quants', 'massa', 'molta', 'moltes', 'molt', 'molts', 'cap', 'gens',
     'ninguns', 'ningú', 'altra', 'altres', 'altre', 'poca', 'poques', 'poc',
     'pocs', 'sengles', 'tantes', 'tanta', 'tants', 'tant', 'totes', 'tota',
     'tots', 'tot', 'unes', 'una', 'uns', 'un', 'diverses', 'diversos'))

# Used to determine a quantified mention:  Bastante Queso
# cuantificadores; they overlap with indefinite_articles in Spanish
quantifiers = list_checker(
    ('no', 'res', 'suficientment', 'suficient', 'harto', 'alguna', 'algun',
     'algunes', 'alguns', 'ambdues', 'ambdòs', 'prou', 'cada', 'qualssevol',
     'qualsevol', 'quantes', 'quants', 'altres', 'massa', 'molta', 'moltes',
     'molt', 'molts', 'cap', 'gens', 'ninguns', 'altra', 'altres', 'altre',
     'altres', 'poca', 'poques', 'poc', 'pocs', 'sengles', 'tantes', 'tanta',
     'tants', 'tant', 'totes', 'tota', 'tots', 'tot', 'unes', 'una', 'uns',
     'un', 'diverses', 'diversos', 'divers'))

Пример #23

0

Показать файл

Файл: constituent.py Проект: josubg/CorefGraph

# coding=utf-8
from corefgraph.resources.lambdas import equality_checker, list_checker, matcher, fail

__author__ = 'Valeria Quochi <*****@*****.**>'
__date__ = '5/16/2013'

# Is a root constituent
root = list_checker(("root", "top", "ROOT", "TOP"))
"""Clause introduced by a (possibly empty) subordinating conjunction."""

# Is a clause
clause = list_checker((
    "S",
    "SBAR",
))

# Is a Noun phrase
noun_phrase = equality_checker("NP")

# Is a Verb phrase
verb_phrase = equality_checker("VP")

particle_constituents = fail()
past_participle_verb = equality_checker("VBN")

interjections = fail()
simple_or_sub_phrase = list_checker(("S", "SBAR"))
ner_constituent = fail()
mention_constituents = matcher("NP.*")
head_rules = list_checker(("SN", "SUJ", "GRUP.NOM"))

Пример #24

0

Показать файл

Файл: stopwords.py Проект: josubg/CorefGraph

# coding=utf-8
""" List of Stopwords 
"""
from corefgraph.resources.lambdas import list_checker, fail

__author__ = 'Valeria Quochi <*****@*****.**>'

# very basic Stopword list.

stop_words = list_checker(
    ("a", "ad", "ai", "al", "alla", "allo", "con", "cosi'", "così", "da",
     "del", "della", "dello", "dentro", "di", "e", "ecco", "ed", "fra",
     "fuori", "ha", "hai", "hanno", "ho", "il", "in", "nei", "nella", "o",
     "per", "qua'", "quello", "questo", "qui", "quindi", "quà", "sopra",
     "sotto", "su", "sul", "sulla", "tra", "un", "una", "uno"))

extended_stop_words = list_checker(("ad", "ad"))

non_words = list_checker(
    ("mm", "hmm", "ahm", "uhm", "ehm", "ah", "eh", "oh", "uh", "ih"))

unreliable = fail()
invalid_stop_words = list_checker(
    ("c'è", "c'e'", "spa", "s.p.a.", "s.r.l.", "ecc",
     "etc"))  # TODO. Re-Check. not sure of what should go here
invalid_start_words = list_checker((
    "'s",
    "etc",
))
invalid_end_words = list_checker(("etc", ))

Пример #25

0

Показать файл

# coding=utf-8
"""

"""

__author__ = 'Josu Bermudez <*****@*****.**>'
__date__ = '3/13' # DD/MM/YY

from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

# Extracted from CoreNLP

indefinite_articles = list_checker(("a", "an"))

quantifiers = list_checker(("not", "every", "any", "none", "everything", "anything", "nothing", "all", "enough"))

partitives = list_checker((
    "half", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten",
    "hundred", "thousand", "million", "billion", "tens", "dozens", "hundreds", "thousands", "millions", "billions",
    "group", "groups", "bunch", "number", "numbers", "pinch", "amount", "amount", "total", "all", "mile",
    "miles", "pounds"))

partitive_particle = equality_checker("of")

Пример #26

0

Показать файл

Файл: namedentities.py Проект: josubg/CorefGraph

# coding=utf-8
""" Named entity labels used in Semeval 2010.

"""
from corefgraph.resources.lambdas import list_checker, fail

__author__ = 'Josu Bermudez <*****@*****.**>'

all = lambda x: not ((x == "O") or (x is None) or (x == ""))

# Classic 3 types useful in some cases
person = list_checker(("PERSON", "PER", "person"))
organization = list_checker(("ORG", "ORGANIZATION", "org"))
location = list_checker(("LOCATION", "LOC", "loc"))
#other = list_checker(("MISC", "OTHER", "other"))

singular = lambda x: all(x) and not organization(x)
plural = fail()

animate = person
inanimate = location

# NE types that denotes mention
mention_ner = all

# NE types that must be filtered from mention candidates
no_mention_ner = lambda: False

Пример #27

0

Показать файл

# coding=utf-8
from corefgraph.resources.lambdas import equality_checker, list_checker, matcher, fail

__author__ = 'Valeria Quochi <*****@*****.**>'

# Is a root constituent
root = list_checker(("root", "top", "ROOT", "TOP"))

# Is a clause
clause = list_checker((
    "S",
    "SBAR",
))

# Is a simple or subordinated clause
simple_or_sub_phrase = clause

# Is a Noun phrase
noun_phrase = equality_checker("NP")

# Is a Verb phrase
verb_phrase = equality_checker("VP")

# Is a Adverbial phrase
adverbial_phrase = fail()

# Is a complement direct
complement_direct = list_checker(("CD", ))

# Is a particle constituent
particle_constituent = fail()

Пример #28

0

Показать файл

Файл: determiners.py Проект: josubg/CorefGraph

quantifiers: Used to filter mention that starts with quantifiers.
partitives: Used to filter mentions that are inside a partitive expression.
partitive_particle: Used among partitives.

"""

__author__ = 'Rodrigo Agerri <*****@*****.**>'

from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

# Used to determine if the mention is an indefinite mention Un hombre
indefinite_articles = list_checker(
    ('alguna', 'algún', 'algunas', 'alguno', 'algunos', 'ambas', 'ambos',
     'bastante', 'bastantes', 'cada', 'cualesquier', 'cualquier', 'cuantas',
     'cuantos', 'demás', 'demasiada', 'demasiadas', 'demasiado', 'demasiados',
     'mucha', 'muchas', 'mucho', 'muchos', 'ninguna', 'ningunas', 'ningún',
     'ninguno', 'ningunos', 'otra', 'otras', 'otro', 'otros', 'poca', 'pocas',
     'poco', 'pocos', 'sendas', 'sendos', 'tantas', 'tanta', 'tantos', 'tanto',
     'todas', 'toda', 'todos', 'todo', 'unas', 'una', 'unos', 'un', 'varias',
     'varios'))

# Used to determine a quantified mention:  Bastante Queso
# cuantificadores; they overlap with indefinite_articles in Spanish
quantifiers = list_checker(
    ("no", "nada", "suficientemente", "suficiente", "harto", 'alguna', 'algún',
     'algunas', 'alguno', 'algunos', 'ambas', 'ambos', 'bastante', 'bastantes',
     'cada', 'cualesquier', 'cualquier', 'cuantas', 'cuantos', 'demás',
     'demasiada', 'demasiadas', 'demasiado', 'demasiados', 'mucha', 'muchas',
     'mucho', 'muchos', 'ninguna', 'ningunas', 'ningún', 'ninguno', 'ningunos',
     'otra', 'otras', 'otro', 'otros', 'poca', 'pocas', 'poco', 'pocos',
     'sendas', 'sendos', 'tantas', 'tanta', 'tantos', 'tanto', 'todas', 'toda',

Пример #29

0

Показать файл

Файл: pronouns.py Проект: josubg/CorefGraph

Additional or language specific elements:

(Put here any additional list added)

Notes:
 + Mark internal use elements with a initial "_".
 + Use tuples or sets
"""
from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

__author__ = 'Josu Bermudez <*****@*****.**>'
__date__ = '14/3/13'  # DD/MM/YY

plural = list_checker(
    ("we", "us", "ourself", "ourselves", "ours", "our", "yourselves", "they",
     "them", "themselves", "theirs", "their"))
singular = list_checker(
    ("i", "me", "myself", "mine", "my", "yourself", "he", "him", "himself",
     "his", "she", "herself", "hers", "her", "it", "itself", "its", "one",
     "oneself", "one's"))

female = list_checker(("her", "hers", "herself", "she"))
male = list_checker(("he", "him", "himself", "his"))
neutral = list_checker(
    ("it", "its", "itself", "where", "here", "there", "which"))

animate = list_checker(
    ("i", "me", "myself", "mine", "my", "we", "us", "yourself", "ourselves",
     "ours", "our", "you", "yourself", "yours", "your", "yourselves", "he",
     "him", "himself", "his", "she", "her", "herself", "hers", "her", "one",

Пример #30

0

Показать файл

Each elements in this module is a function that check if a POS tag.

Elements starting with _ is only for internal use.
"""
from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

__author__ = 'Josu Bermudez <*****@*****.**>'

# Inner usage only
_personal_pronoun = "PRP"
_possessive_pronoun = "PRP$"
_wh_pronoun = "WP"
_wh_possessive_pronoun = "WP$"
_wh_determiner = "WDT"
_wh_adverb = "WRB"
_wh_words = list_checker(
    (_wh_pronoun, _wh_possessive_pronoun, _wh_determiner, _wh_adverb))
_verbs_list = ("VB", "VBD", "VBG", "VBN", "VBP ", "VBZ")
_modal = "MD"
_noun = "NN"
_noun_plural = "NNS"
_interjection = "UH"
_proper_noun = "NNP"
_proper_noun_plural = "NNPS"

_adjective = "JJ"
_adjective_comparative = "JJR"
_adjective_superlative = "JJS"

_conjunction = ("CC", )

# comma = equality_checker(",")

Python list_checker примеры использования