Exemplo n.º 1
0
                               Tk)

from nltk.corpus import (cess_cat, brown, nps_chat, treebank, sinica_treebank,
                         alpino, indian, floresta, mac_morpho, machado,
                         cess_esp)
from nltk.util import in_idle
from nltk.probability import FreqDist

CORPUS_LOADED_EVENT = '<<CL_EVENT>>'
ERROR_LOADING_CORPUS_EVENT = '<<ELC_EVENT>>'
POLL_INTERVAL = 100

_DEFAULT = 'English: Brown Corpus (Humor)'
_CORPORA = {
    'Catalan: CESS-CAT Corpus':
    lambda: cess_cat.words(),
    'English: Brown Corpus':
    lambda: brown.words(),
    'English: Brown Corpus (Press)':
    lambda: brown.words(categories=['news', 'editorial', 'reviews']),
    'English: Brown Corpus (Religion)':
    lambda: brown.words(categories='religion'),
    'English: Brown Corpus (Learned)':
    lambda: brown.words(categories='learned'),
    'English: Brown Corpus (Science Fiction)':
    lambda: brown.words(categories='science_fiction'),
    'English: Brown Corpus (Romance)':
    lambda: brown.words(categories='romance'),
    'English: Brown Corpus (Humor)':
    lambda: brown.words(categories='humor'),
    'English: NPS Chat Corpus':
Exemplo n.º 2
0
 def test_catalan(self):
     words = cess_cat.words()[:15]
     txt = "El Tribunal_Suprem -Fpa- TS -Fpt- ha confirmat la condemna a quatre anys d' inhabilitació especial"
     self.assertEqual(words, txt.split())
     self.assertEqual(cess_cat.tagged_sents()[0][34][0], "càrrecs")
 def test_catalan(self):
     words = cess_cat.words()[:15]
     txt = "El Tribunal_Suprem -Fpa- TS -Fpt- ha confirmat la condemna a quatre anys d' inhabilitació especial"
     self.assertEqual(words, txt.split())
     self.assertEqual(cess_cat.tagged_sents()[0][34][0], "càrrecs")
Exemplo n.º 4
0
                     Text, Tk)

from nltk.corpus import (cess_cat, brown, nps_chat, treebank, sinica_treebank, alpino,
                         indian, floresta, mac_morpho, machado, cess_esp)
from nltk.util import in_idle
from nltk.probability import FreqDist


CORPUS_LOADED_EVENT = '<<CL_EVENT>>'
ERROR_LOADING_CORPUS_EVENT = '<<ELC_EVENT>>'
POLL_INTERVAL = 100

_DEFAULT = 'English: Brown Corpus (Humor)'
_CORPORA = {
            'Catalan: CESS-CAT Corpus':
                lambda: cess_cat.words(),
            'English: Brown Corpus':
                lambda: brown.words(),
            'English: Brown Corpus (Press)':
                lambda: brown.words(categories=['news', 'editorial', 'reviews']),
            'English: Brown Corpus (Religion)':
                lambda: brown.words(categories='religion'),
            'English: Brown Corpus (Learned)':
                lambda: brown.words(categories='learned'),
            'English: Brown Corpus (Science Fiction)':
                lambda: brown.words(categories='science_fiction'),
            'English: Brown Corpus (Romance)':
                lambda: brown.words(categories='romance'),
            'English: Brown Corpus (Humor)':
                lambda: brown.words(categories='humor'),
            'English: NPS Chat Corpus':
Exemplo n.º 5
0
import nltk
from nltk.corpus import cess_cat
import pylab
paraules = cess_cat.words()
paraules1 = paraules[:1000]
freqdist = nltk.FreqDist(paraules1)
freqdist.plot()
Exemplo n.º 6
0
    floresta,
    mac_morpho,
    machado,
    cess_esp,
)
from nltk.util import in_idle
from nltk.probability import FreqDist


CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
POLL_INTERVAL = 100

_DEFAULT = "English: Brown Corpus (Humor)"
_CORPORA = {
    "Catalan: CESS-CAT Corpus": lambda: cess_cat.words(),
    "English: Brown Corpus": lambda: brown.words(),
    "English: Brown Corpus (Press)": lambda: brown.words(categories=["news", "editorial", "reviews"]),
    "English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"),
    "English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"),
    "English: Brown Corpus (Science Fiction)": lambda: brown.words(categories="science_fiction"),
    "English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"),
    "English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"),
    "English: NPS Chat Corpus": lambda: nps_chat.words(),
    "English: Wall Street Journal Corpus": lambda: treebank.words(),
    "Chinese: Sinica Corpus": lambda: sinica_treebank.words(),
    "Dutch: Alpino Corpus": lambda: alpino.words(),
    "Hindi: Indian Languages Corpus": lambda: indian.words(files="hindi.pos"),
    "Portuguese: Floresta Corpus (Portugal)": lambda: floresta.words(),
    "Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.words(),
    "Portuguese: Machado Corpus (Brazil)": lambda: machado.words(),
Exemplo n.º 7
0
    floresta,
    mac_morpho,
    machado,
    cess_esp,
)
from nltk.util import in_idle
from nltk.probability import FreqDist


CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
POLL_INTERVAL = 100

_DEFAULT = "English: Brown Corpus (Humor)"
_CORPORA = {
    "Catalan: CESS-CAT Corpus": lambda: cess_cat.words(),
    "English: Brown Corpus": lambda: brown.words(),
    "English: Brown Corpus (Press)": lambda: brown.words(
        categories=["news", "editorial", "reviews"]
    ),
    "English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"),
    "English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"),
    "English: Brown Corpus (Science Fiction)": lambda: brown.words(
        categories="science_fiction"
    ),
    "English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"),
    "English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"),
    "English: NPS Chat Corpus": lambda: nps_chat.words(),
    "English: Wall Street Journal Corpus": lambda: treebank.words(),
    "Chinese: Sinica Corpus": lambda: sinica_treebank.words(),
    "Dutch: Alpino Corpus": lambda: alpino.words(),