Exemplo n.º 1
0
def test_lemmatize():
    """Test lemmatization method."""
    assert utils.lemmatize({"corriendo": "correr"}, "corriendo") == "correr"
Exemplo n.º 2
0
def test_lemmatize():
    """Test lemmatization method."""
    assert utils.lemmatize({'corriendo': 'correr'}, 'corriendo') \
        == 'correr'
Exemplo n.º 3
0
    def __init__(self, doc, lexico_semantic_norm_dict, lemmatizer=None):
        """Initialize lexico semantic norm object.

        Calculate average over number of tokens given a text.

        :param doc: Text to be processed
        :type doc: Spacy Doc
        :param lexico_semantic_norm_dict: Lexico semantic norms for words
        :type lexico_semantic_norm_dict: dict
        :param lemmatizer: Lemmatizer, defaults to None
        :type lemmatizer: dict, optional
        """
        valence = 0
        arousal = 0
        concreteness = 0
        imageability = 0
        context_availability = 0
        familiarity = 0
        count = 0.0

        for token in doc:
            word = token.text.lower()
            word_lemma = word
            if lemmatizer:
                word_lemma = lemmatize(lemmatizer, word)

            if word in lexico_semantic_norm_dict:
                valence += lexico_semantic_norm_dict[word].get("valence")
                arousal += lexico_semantic_norm_dict[word].get("arousal")
                concreteness += lexico_semantic_norm_dict[word].get(
                    "concreteness")
                imageability += lexico_semantic_norm_dict[word].get(
                    "imageability")
                context_availability += lexico_semantic_norm_dict[word].get(
                    "context_availability")
                familiarity += lexico_semantic_norm_dict[word].get(
                    "familiarity")
                count += 1
            elif word_lemma in lexico_semantic_norm_dict:
                word = word_lemma
                valence += lexico_semantic_norm_dict[word].get("valence")
                arousal += lexico_semantic_norm_dict[word].get("arousal")
                concreteness += lexico_semantic_norm_dict[word].get(
                    "concreteness")
                imageability += lexico_semantic_norm_dict[word].get(
                    "imageability")
                context_availability += lexico_semantic_norm_dict[word].get(
                    "context_availability")
                familiarity += lexico_semantic_norm_dict[word].get(
                    "familiarity")
                count += 1.0

        self.__valence = valence
        self.__arousal = arousal
        self.__concreteness = concreteness
        self.__imageability = imageability
        self.__context_avilability = context_availability
        self.__familiarity = familiarity
        if count > 0:
            self.__valence /= count
            self.__arousal /= count
            self.__concreteness /= count
            self.__imageability /= count
            self.__context_avilability /= count
            self.__familiarity /= count
Exemplo n.º 4
0
    def __init__(self, doc: Doc, lemmatizer: Optional[Dict[str, str]] = None):
        """Initialize emotions class.

        Average over number of tokens.

        :param doc: Texto a ser procesado
        :type doc: Spacy Doc
        :param lemmatizer: Lematizador a utilizar, defaults to None
        :type lemmatizer: Python dict, optional
        """
        alegria = 0
        enojo = 0
        miedo = 0
        repulsion = 0
        sorpresa = 0
        tristeza = 0
        count = 0.0

        for token in doc:
            word = token.text.lower()
            word_lemma = word
            if lemmatizer:
                word_lemma = lemmatize(lemmatizer, word)

            if word in SPANISH_EMOTION_LEXICON:
                pfa, emotion = SPANISH_EMOTION_LEXICON[word]
                if emotion == "Alegría":
                    alegria += pfa
                elif emotion == "Enojo":
                    enojo += pfa
                elif emotion == "Miedo":
                    miedo += pfa
                elif emotion == "Repulsión":
                    repulsion += pfa
                elif emotion == "Sorpresa":
                    sorpresa += pfa
                else:
                    tristeza += pfa
                count += 1
            elif word_lemma in SPANISH_EMOTION_LEXICON:
                word = word_lemma
                pfa, emotion = SPANISH_EMOTION_LEXICON[word]
                if emotion == "Alegría":
                    alegria += pfa
                elif emotion == "Enojo":
                    enojo += pfa
                elif emotion == "Miedo":
                    miedo += pfa
                elif emotion == "Repulsión":
                    repulsion += pfa
                elif emotion == "Sorpresa":
                    sorpresa += pfa
                else:
                    tristeza += pfa
                count += 1.0

        self.__alegria = alegria
        self.__enojo = enojo
        self.__miedo = miedo
        self.__repulsion = repulsion
        self.__sorpresa = sorpresa
        self.__tristeza = tristeza
        if count > 0:
            self.__alegria /= count
            self.__enojo /= count
            self.__miedo /= count
            self.__repulsion /= count
            self.__sorpresa /= count
            self.__tristeza /= count