Python SnowballStemmer.lower примеры использования

Язык программирования: Python

Пространство имен/Пакет: nltk.stem.snowball

Класс/Тип: SnowballStemmer

Метод/Функция: lower

Примеров на hotexamples.com: 3

Python SnowballStemmer.lower - 3 примера найдено. Это лучшие примеры Python кода для nltk.stem.snowball.SnowballStemmer.lower, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

stem(30)

SnowballStemmer(30)

lemmatize(8)

lower(3)

stemmer_convert(2)

__init__(1)

append(1)

isnumeric(1)

stopwords(1)

Пример #1

Показать файл

def preprocessing(conversations):
    """
    Word Stemming and Stop words Removal
    """
    pred_conversations = []
    for i in range(len(conversations)):
        # Remove unique chars
        conversation = ""
        for j in range(len(conversations[i])):
            if ord(conversations[i][j]) < 128:
                conversation += conversations[i][j]
        sentence = []
        # word tokenize
        words = word_tokenize(conversation)
        removal = "?!.,( )"
        stop_words = set(stopwords.words('english'))
        stop_words.update(("'s", "n't", "'m", "'ve", "'re", "'d", "'"))
        for word in words:
            # Remove ?!.,
            pred_word = ""
            for j in range(len(word)):
                if word[j] in removal: continue
                pred_word += word[j]
            # Lower, word stemming and stop words removal
            if len(pred_word) != 0:
                pred_word = SnowballStemmer("english").stem(pred_word.lower())
                if pred_word in stop_words: continue
                sentence.append(pred_word)
        pred_conversations.append(sentence)

    return pred_conversations

Пример #2

Показать файл

def morphy_stem(word):
    """
    Simple stemmer
    """
    #stem = wn.morphy(word)
    stem = SnowballStemmer('english').stem(word)
    #stem = lemma.lemmatize(stem2)
    if stem:
        return stem.lower()
    else:
        return word.lower()

Пример #3

Показать файл

Файл: train_classification_model.py Проект: eugeniey/IFT6285_Traitement_automatique_des_langues_naturelles

def get_data(name_train, name_test_closed, name_test_open):
    PATH_TRAINING = name_train
    PATH_TEST = name_test_open

    train_set = pd.read_csv(PATH_TRAINING,
                            header=0,
                            sep=',',
                            quotechar='"',
                            names=['autor', 'gender', 'age', 'text'])
    test_set = pd.read_csv(PATH_TEST,
                           header=0,
                           sep=',',
                           quotechar='"',
                           names=['autor', 'gender', 'age', 'text'])

    # train_set = train_set.groupby('autor').agg({'gender': 'first',
    #                                             'age': 'first',
    #                                             'text': ' '.join}).reset_index()

    punct = set(punctuation)
    punct_cleaned = set(punctuation.replace("-", "").replace("'", ""))

    for i in range(len(train_set)):
        old_text = train_set["text"].iloc[i]

        # split into sentences
        sentences = re.split('[.!?]', old_text)
        sentences_clean = []

        # for each sentences
        for sentence in sentences:
            sentence_new = []
            for word in sentence.split():
                if word not in punct_cleaned:
                    word = ''.join(ch for ch in word
                                   if ch not in punct_cleaned)
                    word = word.lower()
                    word = SnowballStemmer('english').stem(word)
                    if word.isnumeric():
                        word = "NUMBER"
                    sentences_clean.append(word)
        train_set["text"].iloc[i] = ' '.join(sentences_clean)

    for i in range(len(test_set)):
        old_text = test_set["text"].iloc[i]

        # split into sentences
        sentences = re.split('[.!?]', old_text)
        sentences_clean = []

        # for each sentences
        for sentence in sentences:
            sentence_new = []
            for word in sentence.split():
                if word not in punct_cleaned:
                    word = ''.join(ch for ch in word
                                   if ch not in punct_cleaned)
                    word = word.lower()
                    word = SnowballStemmer('english').stem(word)
                    if word.isnumeric():
                        word = "NUMBER"
                    sentences_clean.append(word)
        test_set["text"].iloc[i] = ' '.join(sentences_clean)

    return train_set, test_set