Python TextBlob.tokenize примеры использования

Язык программирования: Python

Пространство имен/Пакет: textblob

Класс/Тип: TextBlob

Метод/Функция: tokenize

Примеров на hotexamples.com: 18

Python TextBlob.tokenize - 18 примеров найдено. Это лучшие примеры Python кода для textblob.TextBlob.tokenize, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TextBlob(30)

correct(30)

ngrams(30)

translate(30)

detect_language(30)

lower(30)

classify(30)

split(27)

parse(20)

replace(18)

tokenize(16)

upper(11)

find(10)

strip(5)

lemmatize(5)

append(4)

startswith(4)

singularize(3)

pluralize(3)

draw(3)

__str__(3)

word_counts(2)

remove(2)

sentiment(2)

count(2)

noun_phrases(1)

tags(1)

update(1)

correction(1)

to_json(1)

title(1)

strftime(1)

subjectivity(1)

decode(1)

keys(1)

encode(1)

spellcheck(1)

get(1)

index(1)

pos_tags(1)

__init__(1)

words(1)

Пример #1

Показать файл

def translate(text):
    blob = TextBlob(text)
    transl = blob.translate(to='en')

    resuljsondetail(blob.tags, blob.noun_phrases, blob.word_counts, blob.words,
                    blob.tokenize(), blob.sentiment_assessments, transl)
    return transl

Пример #2

Показать файл

Файл: app 3.py Проект: bhangun/covidsentiment

def analisis(text):
    blob = TextBlob(text)
    #lang = blob.detect_language()
    transl = ''
    polarity = 0
    #sentences = blob.sentences

    #if (lang != 'en'):
    transl = blob.translate(to='en')
    enBlob = transblob(str(transl))
    blob = enBlob
    sentences = enBlob.sentences

    for sentence in sentences:
        polarity += sentence.sentiment.polarity

    percent = round(polarity * 100)

    print(percent)

    result = jsonify({
        "polarity": percent,
        "positive": posneg(percent),
        "negative": neg(percent),
        "isHoax": is_hoax(percent),
        #"language":lang,
        "tags": blob.tags,
        "noun_phrases": blob.noun_phrases,
        "word_counts": blob.word_counts,
        "words": blob.words,
        "tokenize": blob.tokenize(),
        "sentiment_assessments": blob.sentiment_assessments,
        "translation": transl
    })
    return result

Пример #3

Показать файл

Файл: personal_assistant.py Проект: MichalKacprzak99/jarvis

    def process_order(self, order: str, source: sr.Microphone):
        """
        Converted voice command is processed using nltk,TextBlob.vectorizer
        Command sentence is tokenized and filtered in purpose to catch "hot" words and decide
        if sentence is connected with any implemented feature.

        :param order: str
            Voice command converted to text
        :param source: speech_recognition.Microphone
            object of speech_recognition.Microphone,  which represents a physical microphone on the computer
        :return: None
        """
        sentence_to_analyze = TextBlob(order)
        self.check_sentence_polarity(sentence_to_analyze)

        order_vector = self.vectorizer.transform([order]).toarray()
        command_category = self.classifier.predict(order_vector)[0]
        if self.check_command_category(source, command_category):
            tokenized_order = sentence_to_analyze.tokenize()
            preprocess_order = [
                word for word in tokenized_order
                if word not in stopwords.words('english')
            ]
            self.commands[command_category](source, preprocess_order)
        else:
            self.convert_text_to_speech(BasicPhrases.NO_COMMEND)

Пример #4

Показать файл

Файл: match.py Проект: Jacobe2169/my_toolbox

 def match_syntagm_text_blob_multi(syntagms, text):
     from textblob import TextBlob
     from textblob_fr import PatternTagger, PatternAnalyzer
     blob = TextBlob(text,
                     pos_tagger=PatternTagger(),
                     analyzer=PatternAnalyzer())
     return match_sequences(syntagms, list(blob.tokenize()))

Пример #5

Показать файл

Файл: vegan_analyser.py Проект: samarth-math/Facebook_Hackathon

def process_keywords(input_file):
    list = []
    for line in input_file:
        blob = TextBlob(line, pos_tagger=nltkTagger)
        kwds = blob.tokenize()
        if len(kwds) > 2:
            kwds = blob.noun_phrases
        list.append(kwds)
    return list

Пример #6

Показать файл

Файл: vegan_analyser.py Проект: samarth-math/Facebook_Hackathon

def process_keywords(input_file):
    list = []
    for line in input_file:
        blob = TextBlob(line, pos_tagger=nltkTagger)
        kwds = blob.tokenize()
        if len(kwds) > 2:
            kwds = blob.noun_phrases
        list.append(kwds)
    return list

Пример #7

Показать файл

def translate(text):
    blob = TextBlob(text)
    transl = blob.translate(to='en')
    return {
            "tags": blob.tags,
            "noun_phrases": blob.noun_phrases,
            "word_counts": blob.word_counts,
            "words":blob.words,
            "tokenize":blob.tokenize(),
            "sentiment_assessments":blob.sentiment_assessments,
            "translation":transl
        }

Пример #8

Показать файл

Файл: transformer.py Проект: s-ghosh/aiheath

    def __call__(self, raw_data: str):
        """Transform the raw_data to a new data
        """
        word_list = []

        # corrrect the words
        t = TextBlob(raw_data).correct()

        # t.tokenize()
        # for w in t.tokenize():
        #     w = self.st.stem(w.lower())
        #     word_list.append(w)

        word_list = [self.st.stem(w.lower()) for w in t.tokenize()]

        s = " ".join(word_list)
        tb = TextBlob(s).correct()

        return str(tb)

Пример #9

Показать файл

Файл: app.py Проект: bhangun/covidsentiment

def translate(text):
    blob = TextBlob(text)
    lang = blob.detect_language()
    transl = ''
    p = 0
    sa = []
    if lang != 'en':  #selain bahasa inggris masuk kesini
        transl = blob.translate(to='en')
        pol = polarity(str(transl))
        p = pol[0]
        sa = pol[1]
    else:
        p = polarity(text)[0]
        sa = blob.sentiment_assessments

    return [
        transl, lang, blob.tags, blob.noun_phrases, blob.word_counts,
        blob.words,
        blob.tokenize(), sa, text, p
    ]

Пример #10

Показать файл

Файл: wordnet.py Проект: periode/software-art-text

        entry = {'word': word, 'synonyms': []}
        for synset in word.get_synsets(pos='n'):
            for syn in synset.lemmas():
                entry['synonyms'].append(syn.name().replace('_', ' '))

        verbs.append(entry)

    elif tag == "JJ":
        entry = {'word': word, 'synonyms': []}
        for synset in word.get_synsets(pos='n'):
            for syn in synset.lemmas():
                entry['synonyms'].append(syn.name().replace('_', ' '))

        adjectives.append(entry)

for token in processed_paragraph.tokenize():

    for entry in nouns:
        if token == entry['word']:

            if len(entry['synonyms']) != 0:
                synonym = random.choice(entry['synonyms'])
                paragraph = paragraph.replace(token, synonym)

    for entry in verbs:
        if token == entry['word']:

            if len(entry['synonyms']) != 0:
                synonym = random.choice(entry['synonyms'])
                paragraph = paragraph.replace(token, synonym)

Пример #11

Показать файл

Файл: demo.py Проект: sky96line/VisualCode

from textblob import TextBlob

text = "What am i are you dooing here?"

blob = TextBlob(text)


ans = blob.tokenize()
print ans
'''
t = blob.correct()
print t

word = blob.words
print word

word = blob.word_counts
print word

for every in blob.sentiment_assessments:
  print every
'''

snt = blob.sentiment.polarity
print snt

Пример #12

Показать файл

Файл: utils.py Проект: Atala/arguman.org

def noun_phrases(text):
    blob = TextBlob(text)
    return blob.tokenize()

Пример #13

Показать файл

from textblob import TextBlob  #first, we need to import TextBlob, the package that will help us analyze text

source_text = "Don't tell me the moon is shining; show me the glint of light on broken glass."  #chekov

processed_text = TextBlob(
    source_text)  #in order for us to process it, we pass it to TextBlob
nouns = []  #this is the list where we will store all our nouns
verbs = []  #this one is where we will store verbs
adjectives = []  #this is where we store adjectives

print "\n======================\n"

# TOKENIZING is the process by which you can separate the sentence in individual tokens (essentially words, suffixes, punctuation)
for word in processed_text.tokenize():
    print word

print "\n======================\n"

# PARTS OF SPEECH allows you to get the grammatical role of each word
for word, tag in processed_text.tags:
    print "word: %s || part-of-speech: %s" % (word, tag)
    if tag == "NN":  #here we are looking for nouns (NN)
        nouns.append(word)  #if we find one, we append it to our list
    elif tag == "VB":  #here we look for verbs (VB)
        verbs.append(word)
    elif tag == "JJ":  #and here for adjectives
        adjectives.append(word)

print "\n======================\n"

# SYNSETS are specific structures related to Wordnet, through which you can get all the sets of related words given a specific word

Пример #14

Показать файл

Файл: b_travesty.py Проект: periode/software-art-text

print source
custom_dictionary = []

for word, tag in processed.tags:
    if tag == 'NN':

        entry = {               # each of our entries in our dictionary
            'word': word,       # has the initial word
            'others': []        # as well as a list of other possibilities
        }

        for synset in word.get_synsets(pos="n"):
            for syn in synset.lemmas():                                 # here we loop through the list of lemmas that are related to the current noun
                entry['others'].append(syn.name().replace('_', ' '))    # we also replace any possible '_' character with a ' ' space character when we add it to our list of other possibilities
                if syn.antonyms():
                    entry['others'].append(syn.antonyms()[0].name().replace('_', ' '))

        custom_dictionary.append(entry) # then we add the entry to our dictionary


# this the part where we actually replace the source text
for token in processed.tokenize():                      # we need to tokenize it in order to make sure we get each part of the sentences
    for entry in custom_dictionary:                     # then for each token, we go through our custom dictionary
        if token == entry['word']:                      # if we match the word

            if len(entry['others']) != 0:               # and if we actually do have a word to replace it with!
                other = random.choice(entry['others'])  # then we pick a random alternative
                source = source.replace(token, other)   # and we replace it in the source text

print source

Пример #15

Показать файл

Файл: handle_spelling.py Проект: junailin/mode_zoo

Created on Mon Apr  9 17:25:19 2018

@author: miaoji
"""
import nltk.tokenize as nt
from textblob import TextBlob
import time

start_time = time.time()

in_file = open("/data/zhangbin/caozhaojun/true_procress_data/daodao_en.txt",
               'r')
out_file = open("handle_daodao_en.txt", 'a+')
tokenizer = nt.TweetTokenizer()

line_id = 0
for line in in_file.readlines():
    line_id += 1
    if line_id % 1000 == 0:
        print(line_id)
    correct_line = TextBlob(line.lower().replace('...',
                                                 ' ').strip())  #.correct()
    token_line = correct_line.tokenize(tokenizer)
    final_line = ' '.join([word for word in token_line])
    out_file.write(final_line + '\n')
in_file.close()
out_file.close()

end_time = time.time()
print(float(end_time - start_time))

Пример #16

Показать файл

# toNote: pluralize & singularize!
print(attack_blob.words.singularize())
print(attack_blob.words.pluralize())

print(attack_blob.word_counts['of'])

print(attack_blob.ngrams(n=2))
print(attack_blob.ngrams(n=4))

from textblob import Word
for word in attack_blob.words:
    print(Word(word).correct() == word)

#%% Example from https://www.analyticsvidhya.com/blog/2018/02/natural-language-processing-for-beginners-using-textblob/
av_blob = TextBlob("Analytics Vidhya is a great platform to learn data science. \n It helps community through blogs, hackathons, discussions,etc.")
print(av_blob.tokenize())
print(av_blob.sentences, av_blob.sentences[0])

for phrase in av_blob.noun_phrases:
    print(phrase)  # analytics vidhya; great platform; data science

# toNote: part-of-speech tagging
for words, tag in av_blob.tags:
    print(words, tag)

# inflection - process of word formation in which characters are added to the base form of a word to express grammatical meanings.
# words inflection and lemmatization
print(av_blob.sentences[1].words[1].singularize())  # helps -> help

# pluralize
w = Word('Platform')

Пример #17

Показать файл

# Sentiment analyzer train onto movies reviews
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
blob = TextBlob("I love this library", analyzer=NaiveBayesAnalyzer())
blob.sentiment

# Tokenizer
from nltk.tokenize import TabTokenizer
tokenizer = TabTokenizer()
blob = TextBlob("This is\ta rather tabby\tblob.", tokenizer=tokenizer)
blob.tokens

#This is an alternative way
tokenizer = BlanklineTokenizer()
blob = TextBlob("A token\n\nof appreciation")
blob.tokenize(tokenizer)

# Noun phrase chunkers
from textblob.np_extractors import ConllExtractor
extractor = ConllExtractor()
blob = TextBlob("Python is a high-level programming language.", np_extractor=extractor)
blob.noun_phrases

# POS taggers
from textblob.taggers import NLTKTagger
nltk_tagger = NLTKTagger()
blob = TextBlob("Tag! You're It!", pos_tagger=nltk_tagger)
blob.pos_tags

# Parser
from textblob.parsers import PatternParser

Пример #18

Показать файл

Файл: utils.py Проект: un-project/un-project.org

def noun_phrases(text):
    blob = TextBlob(text)
    return blob.tokenize()