Exemplos de TextBlob em Python, exemplos de text.blob.TextBlob em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: grapher.py Projeto: jenalgit/WordGraph

    def initialize_document(self, doc, docs_list_mode=False):
        if not docs_list_mode:
            self.doc = doc.lower()

            self.blob = TextBlob(text=self.doc, tokenizer=self.tokenizer)
            self.tokens = copy.deepcopy(self.blob.tokens)

            self.bigrams = self.bigramify(self.blob.tokens)
            self.tokens.extend(self.bigrams)

            self.trigrams = self.trigramify(self.blob.tokens)
            self.tokens.extend(self.trigrams)
        else:
            doc = doc.lower()

            blob = TextBlob(text=doc, tokenizer=self.tokenizer)
            tokens = copy.deepcopy(blob.tokens)

            bigram = self.bigramify(tokens=tokens)
            tokens.extend(bigram)

            trigram = self.trigramify(tokens=tokens)
            tokens.extend(trigram)

            return tokens

Exemplo n.º 2

0

Exibir arquivo

Arquivo: models.py Projeto: pepourquier/press_review

    def save(self, *args, **kwargs):
        from goose import Goose
        from text.blob import TextBlob
        g = Goose()
        article = g.extract(url=self.url)
        try:
            b = TextBlob(article.title)
            lang = b.detect_language()
        except:
            lang='en'

        g = Goose({'use_meta_language': False, 'target_language':lang, 'paper_class':'soup'})
        if not self.title:
            self.title = article.title
        if not self.newspaper:
            self.newspaper = article.domain
        if not self.content:
            self.content = article.cleaned_text
        try:
            if article.top_image.src:
                layout = Photo()
                #layout.photo = "images/news/"+str(self.id)+".jpg"
                layout.url = article.top_image.src
                layout.article = self
                layout.save() 
        except:
            pass
        super(Article, self).save()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: models.py Projeto: pepourquier/press_review

 def save(self):
     if not self.content:
         articles = self.article.all()
         content = ''
         for a in articles:
             content += a.content + '\r'
         self.content = content
         content = content.replace(u'–', '-')
         content = content.replace(u'“', '"')
         content = content.replace(u'”', '"')
         content = content.replace(u'’', "'")
         content = content.replace('[\d]', "")
     if not self.summary:
         self.summary = get_summary(self.content, self.compression)
         from text.blob import TextBlob
         try:
             b = TextBlob(self.content.split('\n', 1)[0])
             lang = b.detect_language()
         except:
             lang='en'
         
         o = ots.OTS(lang, self.compression)
         filename = u'text'+str(self.id)+'.txt'
         f = open(filename, 'w')
         f.write(self.content.encode("utf-8"))
         f.close()
         o.parse(filename, 60)
         try: 
             os.remove(filename)
         except:
             pass
         self.summary = str(o)
     if not self.spin_summary:
         self.spin_summary = get_text_synonymizer(self.summary)
     super(Syntesis, self).save()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_taggers.py Projeto: ouassimBenMosbah/sms_analysis

 def test_tag_blob(self):
     blob = TextBlob(self.text, pos_tagger=self.tagger)
     tags = blob.tags
     logging.debug("tags: {0}".format(tags))
     words = self.text.split()
     for i, word_tag in enumerate(tags):
         assert_equal(word_tag[0], words[i])

Exemplo n.º 5

0

Exibir arquivo

def sentences_sentiment():
    text = get_text(request)
    blob = TextBlob(text)
    sentences = [{
        "sentence": unicode(s),
        "sentiment": s.sentiment[0]
    } for s in blob.sentences]
    return jsonify({"result": sentences})

Exemplo n.º 6

0

Exibir arquivo

Arquivo: summarize.py Projeto: shubhampachori12110095/summarize-2

def sentence_to_words(sentence):
    """
    Converts passed sentences into a list of words.
    Returns all words but stop words.
    """
    blob = TextBlob(sentence)
    return [
        word.lower() for word in blob.words
        if word not in stopwords.words('english')
    ]

Exemplo n.º 7

0

Exibir arquivo

Arquivo: CorpusBase.py Projeto: jeffzhengye/yelib

 def cache_sentences(self):
     self.cached = True
     self.cache_list = []
     for key, value in self.__dict__.items():
         if key not in filterTag:
             try:
                 blob = TextBlob(value)
                 for sentence in blob.sentences:
                     self.cache_list.append(sentence)
             except Exception as e:
                 logger.debug("textblob error| %s:%s" % (key, value))

Exemplo n.º 8

0

Exibir arquivo

Arquivo: textblob-api-server.py Projeto: sguignot/textblob-api-server

 def create_blob(self, request_json):
     options = {}
     if request_json.get('analyzer') == 'NaiveBayesAnalyzer':
         options['analyzer'] = self.naive_bayes_analyzer
     if request_json.get('np_extractor') == 'ConllExtractor':
         options['np_extractor'] = self.conll_extractor
     if request_json.get('pos_tagger') == 'NLTKTagger':
         options['pos_tagger'] = self.nltk_tagger
     elif request_json.get('pos_tagger') == 'PerceptronTagger':
         options['pos_tagger'] = self.perceptron_tagger
     return TextBlob(request_json['text'], **options)

Exemplo n.º 9

0

Exibir arquivo

    def one_sentence_from(self, quote):
        """Reduce the given quote to a single sentence.

        The choice is biased against the first sentence, which is less likely
        to be the start of a real in-text sentence.
        """
        blob = TextBlob(quote)
        try:
            sentences = blob.sentences
        except Exception, e:
            # TextBlob can't parse this. Just return the whole string
            return quote

Exemplo n.º 10

0

Exibir arquivo

    def truncate_at_stopword(self, string):
        # Truncate a string at the last stopword not preceded by
        # another stopword.
        # print "%s =>" % string

        if type(string) == Sentence:
            words = string.words
        else:
            try:
                words = TextBlob(string).sentences
            except Exception, e:
                # TextBlob can't parse this. Just return the whole string
                return string

Exemplo n.º 11

0

Exibir arquivo

Arquivo: CorpusBase.py Projeto: jeffzhengye/yelib

 def to_sentences(self):
     if self.cached:
         for sentence in self.cache_list:
             yield sentence
     else:
         for key, value in self.__dict__.items():
             if key not in filterTag:
                 try:
                     blob = TextBlob(value)
                     for sentence in blob.sentences:
                         yield sentence
                 except Exception as e:
                     logger.debug("textblob error| %s:%s" % (key, value))

Exemplo n.º 12

0

Exibir arquivo

Arquivo: grapher.py Projeto: jenalgit/WordGraph

    def freq(self, word, docs=None):
        if docs is None:
            return self.tokens.count(word)
        else:
            if not isinstance(docs, str):
                d = ""
                for item in docs:
                    d = "%s %s" % (d, item)
                docs = d

            blob = TextBlob(text=docs, tokenizer=self.tokenizer)
            blob.tokens.extend(self.bigramify(blob))
            blob.tokens.extend(self.trigramify(blob))
            return blob.tokens.count(word)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: textblob-api-server.py Projeto: sguignot/textblob-api-server

 def __init__(self):
     # create custom components
     self.naive_bayes_analyzer = NaiveBayesAnalyzer()
     self.conll_extractor = ConllExtractor()
     self.nltk_tagger = NLTKTagger()
     self.perceptron_tagger = PerceptronTagger()
     if DEV_ENV:
         return
     # train all components (default and custom)
     text = 'TextBlob blobs great!'
     default_blob = TextBlob(text)
     default_blob.sentiment
     default_blob.noun_phrases
     default_blob.pos_tags
     custom_blob = TextBlob(text,
                            analyzer=self.naive_bayes_analyzer,
                            np_extractor=self.conll_extractor,
                            pos_tagger=self.nltk_tagger)
     custom_blob.sentiment
     custom_blob.noun_phrases
     custom_blob.pos_tags
     custom2_blob = TextBlob(text, pos_tagger=self.perceptron_tagger)
     custom2_blob.pos_tags

Exemplo n.º 14

0

Exibir arquivo

    def rate(cls, s, base_score=1.0, frequencies=None, obscurity_cutoff=None):
        "Rate a string's suitability as an _ebook quote."
        s = s.strip()
        score = float(base_score)
        # print s
        # print " Starting rating: %.2f" % score

        # People like very short or very long quotes.
        # if len(s) < 40:
        #    score *= 2
        if len(s) > 128:
            score *= 2
            # print " Length bonus: %.2f" % score

        blob = TextBlob(s.decode("utf8"))
        try:
            words = blob.words
        except Exception, e:
            # TODO: I'm sick of trying to get TextBlob to parse
            # strings that include things like ". . . ". Just return
            # the current score.
            return score

Exemplo n.º 15

0

Exibir arquivo

    def quotes_in(self, paragraph):
        para = textwrap.wrap(paragraph, self.wrap_at)
        if len(para) == 0:
            return

        probability = self.probability
        if para[0][0].upper() == para[0][0]:
            # We greatly prefer lines that start with capital letters.
            probability *= 5
        else:
            probability /= 4

        gathering = False
        in_progress = None
        last_yield = None
        for i in range(len(para)):
            line = para[i]
            if gathering:
                # We are currently putting together a quote.
                done = False
                if (random.random() < self.truncate_chance
                    and len(in_progress) >= self.minimum_quote_size):
                    # Yield a truncated quote.
                    done = True
                else:
                    potential = in_progress + ' ' + line.strip()
                    if len(potential) >= self.maximum_quote_size:
                        # That would be too long. We're done.
                        done = True
                    else:
                        in_progress = potential

                if done:
                    quote = in_progress
                    in_progress = None
                    gathering = done = False

                    # Miscellaneous tweaks to increase the chance that
                    # the quote will be funny.
                    if random.random() < 0.6:
                        quote = self.one_sentence_from(quote)

                    if random.random() < 0.4:
                        quote = self.truncate_at_stopword(quote)

                    # Quotes that end with two consecutive stopwords
                    # are not funny. It would be best to parse every
                    # single quote and make sure it doesn't end with
                    # two consecutive stopwords. But in practice it's
                    # much faster to just check for the biggest
                    # offenders, which all end in 'the', and then trim
                    # the 'the'.
                    low = quote.lower()
                    for stopwords in ('of the', 'in the', 'and the',
                                      'in the', 'on the', 'for the'):
                        if low.endswith(stopwords):
                            quote = quote[:len(" the")-1]
                            break

                    quote = unicode(quote)
                    quote = self.remove_ending_punctuation(quote)
                    quote = self.remove_beginning_punctuation(quote)

                    if random.random() > 0.75:
                        quote = self.truncate_to_common_word(quote)

                    if (len(quote) >= self.minimum_quote_size
                        and len(quote) <= self.maximum_quote_size
                        and self.ONE_LETTER.search(quote)):
                        yield quote
                        last_yield = quote
                        continue
            else:
                # We are not currently gathering a quote. Should we
                # be?
                r = random.random()
                if random.random() < probability:
                    # Run the regular expression and see if it matches.
                    m = self.SEVERAL_CAPITALIZED_WORDS.search(line)
                    if m is not None:
                        phrase = m.groups()[0]
                        if "Gutenberg" in phrase or "Proofreader" in phrase:
                            # Part of the meta, not part of text.
                            continue
                        # Tag the text to see if it's a proper noun.
                        blob = TextBlob(phrase)
                        tags = blob.tags
                        proper_nouns = [x for x, tag in tags if tag.startswith('NNP')]
                        if len(proper_nouns) < len(tags) / 3.0:
                            # We're good.
                            yield phrase
                            continue

                matches = self._line_matches(line)
                if matches or random.random() < probability:
                    gathering = True
                    if matches:
                        # A keyword match! Start gathering a quote either
                        # at this line or some earlier line.
                        maximum_backtrack = (
                            self.maximum_quote_size / self.wrap_at) - 1
                        backtrack = random.randint(0, maximum_backtrack)
                        start_at = max(0, i - backtrack)
                        in_progress = " ".join(
                            [x.strip() for x in para[start_at:i+1]])
                    else:
                        in_progress = line.strip()

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_upper(self):
     blob = TextBlob('lorem ipsum')
     assert_true(is_blob(blob.upper()))
     assert_equal(blob.upper(), TextBlob('LOREM IPSUM'))

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_upper_and_words(self):
     blob = TextBlob('beautiful is better')
     assert_equal(blob.upper().words, WordList(['BEAUTIFUL', 'IS', 'BETTER'
                  ]))

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_index(self):
     blob = TextBlob(self.text)
     assert_equal(blob.index('Namespaces'), self.text.index('Namespaces'))

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_strip_and_words(self):
     blob = TextBlob('Beautiful is better! ')
     assert_equal(blob.strip().words, WordList(['Beautiful', 'is', 'better'
                  ]))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_lower(self):
     blob = TextBlob('Lorem Ipsum')
     assert_true(is_blob(blob.lower()))
     assert_equal(blob.lower(), TextBlob('lorem ipsum'))

Exemplo n.º 21

0

Exibir arquivo

Arquivo: text_textblob.py Projeto: sichumon/pycheat

# -*- coding: utf-8 -*-
"""
Created on Fri Oct  4 09:44:50 2013

@author: ozdemircili
"""

from text.blob import TextBlob

text = TextBlob(
    "Once upon a time a there was a program called Pycheat.It was one of the cheats"
)

text.tags

text.noun_phrases

text.sentiment

text.words

text.sentences

text.title

text.words[-1].singularize()
text.words[3].pluralize()

from text.blob import Word
from text.blob import Verb

Exemplo n.º 22

0

Exibir arquivo

Arquivo: tweetAnalyser.py Projeto: ddevlin678/analysis

 def text(self, tweetObject):
  analysis = TextBlob(tweetObject)
  print("HELLO")
  print(analysis.sentiment)

Exemplo n.º 23

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_split(self):
     blob = TextBlob('Beautiful is better')
     assert_equal(blob.split(), WordList(['Beautiful', 'is', 'better']))

Exemplo n.º 24

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_endswith(self):
     blob = TextBlob(self.text)
     assert_true(blob.endswith('of those!'))
     assert_true(blob.ends_with('of those!'))

Exemplo n.º 25

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_startswith(self):
     blob = TextBlob(self.text)
     assert_true(blob.startswith('Beautiful'))
     assert_true(blob.starts_with('Beautiful'))

Exemplo n.º 26

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_rfind(self):
     text = 'Beautiful is better than ugly. '
     blob = TextBlob(text)
     assert_equal(blob.rfind('better'), text.rfind('better'))

Exemplo n.º 27

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_find(self):
     text = 'Beautiful is better than ugly.'
     blob = TextBlob(text)
     assert_equal(blob.find('better', 5, len(blob)), text.find('better', 5,
                  len(text)))

Exemplo n.º 28

0

Exibir arquivo

Arquivo: tbbay.py Projeto: cgdub/crazyfilter

def classify(new_comment, bayes):
    '''takes a comment string (to be classified) and a trained bayes
    returns string 'pos' (normal) or 'neg' (crazy)'''
    analyze = TextBlob(new_comment,classifier = bayes)
    return analyze.classify()

Exemplo n.º 29

0

Exibir arquivo

Arquivo: summarize.py Projeto: shubhampachori12110095/summarize-2

def content_to_sentences(text):
    """
    Converts passed text into a list of sentences.
    """
    blob = TextBlob(text)
    return [str(sentence) for sentence in blob.sentences]

Exemplo n.º 30

0

Exibir arquivo

def noun_phrases():
    text = get_text(request)
    noun_phrases = set(TextBlob(text).noun_phrases)
    # Strip punctuation from ends of noun phrases and exclude long phrases
    stripped = [strip_punc(np) for np in noun_phrases if len(np.split()) <= 5]
    return jsonify({"result": stripped})

Exemplo n.º 31

0

Exibir arquivo

 def test_blob_analyze(self):
     pos_blob = TextBlob(self.pos, analyzer=self.analyzer)
     assert_true(pos_blob.sentiment[0] > 0.0)
     neg_blob = TextBlob(self.neg, analyzer=self.analyzer)
     assert_true(neg_blob.sentiment[0] < 0.0)

Exemplo n.º 32

0

Exibir arquivo

  # writer.writerow(columns)
  global_row = 0  # xlsxwriter doesn't have a writerow(function), so we have to keep track of what row we're on
  columns = ['unit', 'id_article', 'position (+1)', 'unit_content', 'adjectives', 'verbs', 'article_title', 'article_content_no_tags', 'article_url']
  workbook = xlsxwriter.Workbook(publication_prefix+'full_report.xlsx')  # Create new spreadsheet
  worksheet = workbook.add_worksheet()  # Make new worksheet
  for col in range(0, len(columns)):
    worksheet.write(global_row, col, columns[col])
  global_row += 1


for row in ngo_mentions:  # Loop through all rows in the database results
  # Use TextBlob to parse the article
  # blob.tags returns the following parts of speech (some are missing, like VBN, etc.):
  #   noun (NN), adjective (JJ), determiner (DT), verb (VB), noun phrase (NP),
  #   sentence subject (SBJ), and prepositional noun phrase (PNP)
  blob = TextBlob(row['article_content_no_tags'])

  # Split the article into paragraphs
  paragraphs = (re.split('(\n)+', row['article_content_no_tags']))
  paragraphs = [paragraph for paragraph in paragraphs if paragraph != "\n"]
  paragraphs_lower = [paragraph.lower() for paragraph in paragraphs]

  # Add line numbers
  # enumerate(list, 1) results in (list1, 1), (list2, 2), etc.
  article_numbered = ['(' + str(paragraph[0]) + ') ' + paragraph[1] for paragraph in enumerate(paragraphs, 1)]
  csv_article = '\n'.join(article_numbered)

  # Get a list of all the paragraphs that mention one of the organizations
  paragraph_position = [i for i, x in enumerate(paragraphs_lower) if any(org.lower() in x for org in organizations)]

  # Split the article into sentences

Exemplo n.º 33

0

Exibir arquivo

Arquivo: shit.py Projeto: jluc19/disambiguator

]
test = [
    ('The beer was good.', 'pos'),
    ('I do not enjoy my job', 'neg'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'pos'),
    ('Gary is a friend of mine.', 'pos'),
    ("I can't believe I'm doing this.", 'neg')
]
 
cl = NaiveBayesClassifier(train)
 
# Classify some text
print(cl.classify("Their burgers are amazing."))  # "pos"
print(cl.classify("I don't like their pizza."))   # "neg"
 
# Classify a TextBlob
blob = TextBlob("The beer was amazing. But the hangover was horrible. "
                "My boss was not pleased.", classifier=cl)
print(blob)
print(blob.classify())
 
for sentence in blob.sentences:
    print(sentence)
    print(sentence.classify())
 
# Compute accuracy
print("Accuracy: {0}".format(cl.accuracy(test)))
 
# Show 5 most informative features
cl.show_informative_features(5)

Exemplo n.º 34

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_title(self):
     blob = TextBlob('Beautiful is better')
     assert_equal(blob.title(), TextBlob('Beautiful Is Better'))

Exemplo n.º 35

0

Exibir arquivo

Arquivo: grapher.py Projeto: jenalgit/WordGraph

class WordGrapher(object):

    doc = ""
    blob = None
    docs = []

    bigrams = None
    trigrams = None

    tokens = None
    tokenizer = None

    tfidf = None

    stopwords = []
    try:
        stopwords.extend(nltk.corpus.stopwords.words('indonesian'))
        stopwords.extend(nltk.corpus.stopwords.words('english'))
    except IOError:
        pass

    def __init__(self, doc=None, docs=None):
        self.tokenizer = StopwordsTokenizer(stopwords=self.stopwords)

        if doc:
            self.set_document(doc=doc)

        if docs:
            self.set_documents(docs=docs)

    def set_document(self, doc, docs_list_mode=False):
        if doc:
            return self.initialize_document(doc=doc, docs_list_mode=docs_list_mode)
        else:
            raise ValueError("Document must not be None or empty")

    def set_documents(self, docs):
        if docs and isinstance(docs, list) and len(docs) > 0:
            self.docs = [self.set_document(doc=doc, docs_list_mode=True) for doc in docs]
        else:
            raise ValueError("Documents must not be None or and empty List")

    def initialize_document(self, doc, docs_list_mode=False):
        if not docs_list_mode:
            self.doc = doc.lower()

            self.blob = TextBlob(text=self.doc, tokenizer=self.tokenizer)
            self.tokens = copy.deepcopy(self.blob.tokens)

            self.bigrams = self.bigramify(self.blob.tokens)
            self.tokens.extend(self.bigrams)

            self.trigrams = self.trigramify(self.blob.tokens)
            self.tokens.extend(self.trigrams)
        else:
            doc = doc.lower()

            blob = TextBlob(text=doc, tokenizer=self.tokenizer)
            tokens = copy.deepcopy(blob.tokens)

            bigram = self.bigramify(tokens=tokens)
            tokens.extend(bigram)

            trigram = self.trigramify(tokens=tokens)
            tokens.extend(trigram)

            return tokens

    def bigramify(self, tokens, as_string=True):
        if as_string:
            return ["%s %s" % (words[0], words[1]) for words in bigrams(tokens)]
        else:
            return bigrams(tokens)

    def trigramify(self, tokens, as_string=True):
        if as_string:
            return ["%s %s %s" % (words[0], words[1], words[2]) for words in trigrams(tokens)]
        else:
            return trigrams(tokens)

    def ngrams(self, n):
        return self.blob.ngrams(n=n)

    def freq(self, word, docs=None):
        if docs is None:
            return self.tokens.count(word)
        else:
            if not isinstance(docs, str):
                d = ""
                for item in docs:
                    d = "%s %s" % (d, item)
                docs = d

            blob = TextBlob(text=docs, tokenizer=self.tokenizer)
            blob.tokens.extend(self.bigramify(blob))
            blob.tokens.extend(self.trigramify(blob))
            return blob.tokens.count(word)

    def tf(self, word):
        return self.freq(word=word) / float(self.doc_word_count())

    def doc_word_count(self):
        return len(self.tokens)

    def num_docs_containing(self, word):
        if self.docs is None:
            return 2
        else:
            count = 0
            for document in self.docs:
                if self.freq(word=word, docs=document) > 0:
                    count += 1
            return 1 + count

    def idf(self, word):
        if self.docs is None:
            docs_length = 1
        else:
            docs_length = len(self.docs)

        num_docs = self.num_docs_containing(word)
        return math.log(docs_length / float(num_docs))

    def tf_idf(self, word):
        return self.tf(word) * self.idf(word)

    def analyze(self, count=10, percentage=False):
        if not self.doc or not self.docs:
            raise ValueError("Document and its Documents Set must not be None or empty")

        score = {
            'freq': {},
            'tf': {},
            'idf': {},
            'tf-idf': {}
        }

        for token in self.tokens:
            score['freq'][token] = self.freq(token)
            score['tf'][token] = self.tf(token)
            score['idf'][token] = self.idf(token)
            score['tf-idf'][token] = math.fabs(self.tf_idf(token))

        final = {}
        for token in score['tf-idf']:
            if token not in final:
                final[token] = score['tf-idf'][token]
            else:
                if score['tf-idf'][token] > final[token]:
                    final[token] = score['tf-idf'][token]

        if not percentage:
            self.tfidf = [item for item in sorted(final.items(), key=lambda x: x[1], reverse=True)[:count]]
            return self.tfidf
        else:
            result = [item for item in sorted(final.items(), key=lambda x: x[1], reverse=True)[:count]]
            max = 0.0
            for item in result:
                if item[1] > max:
                    max = item[1]
            self.tfidf = [(item[0], "%.2f%%" % (item[1]/max*100)) for item in result]
            return self.tfidf

    def graph(self, word):
        return self.graph_doc(word=word)

    def graph_doc(self, word):
        if not self.tfidf:
            raise ValueError("Please call analyze first before creating a graph")

        result = {}
        tris = self.trigramify(tokens=self.blob.tokens, as_string=False)

        matches = ["%s %s %s" % (tri[0], tri[1], tri[2]) for tri in tris if word in tri[1]]
        result['tris'] = [item for item in self.tfidf if item[0] in matches]

        bis = self.bigramify(tokens=self.blob.tokens, as_string=False)
        matches = ["%s %s" % (bi[0], bi[1]) for bi in bis if word in bi[0] or word in bi[1]]
        result['twos'] = [item for item in self.tfidf if item[0] in matches]

        return result

Exemplo n.º 36

0

Exibir arquivo

def sentiment():
    text = get_text(request)
    sentiment = TextBlob(text).sentiment[0]  # Polarity score
    return jsonify({"result": sentiment})

Exemplo n.º 37

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_format(self):
     blob = TextBlob('1 + 1 = {0}')
     assert_equal(blob.format(1 + 1), TextBlob('1 + 1 = 2'))
     assert_equal('1 + 1 = {0}'.format(TextBlob('2')), '1 + 1 = 2')

Exemplo n.º 38

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_replace(self):
     blob = TextBlob('textblob is a blobby blob')
     assert_equal(blob.replace('blob', 'bro'),
                  TextBlob('textbro is a broby bro'))
     assert_equal(blob.replace('blob', 'bro', 1),
                  TextBlob('textbro is a blobby blob'))

Exemplo n.º 39

0

Exibir arquivo

Arquivo: test_blob.py Projeto: sbrosinski/TextBlob

 def test_strip(self):
     text = 'Beautiful is better than ugly. '
     blob = TextBlob(text)
     assert_true(is_blob(blob))
     assert_equal(blob.strip(), TextBlob(text.strip()))

Exemplo n.º 40

0

Exibir arquivo

Arquivo: shit.py Projeto: jluc19/disambiguator

         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'), ('He is my sworn enemy!', 'neg'),
         ('My boss is horrible.', 'neg')]
test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'), ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')]

cl = NaiveBayesClassifier(train)

# Classify some text
print(cl.classify("Their burgers are amazing."))  # "pos"
print(cl.classify("I don't like their pizza."))  # "neg"

# Classify a TextBlob
blob = TextBlob(
    "The beer was amazing. But the hangover was horrible. "
    "My boss was not pleased.",
    classifier=cl)
print(blob)
print(blob.classify())

for sentence in blob.sentences:
    print(sentence)
    print(sentence.classify())

# Compute accuracy
print("Accuracy: {0}".format(cl.accuracy(test)))

# Show 5 most informative features
cl.show_informative_features(5)

Exemplo n.º 41

0

Exibir arquivo

Arquivo: text_textblob.py Projeto: atassumer/pycheat

# -*- coding: utf-8 -*-
"""
Created on Fri Oct  4 09:44:50 2013

@author: ozdemircili
"""

from text.blob import TextBlob

text = TextBlob("Once upon a time a there was a program called Pycheat.It was one of the cheats")

text.tags

text.noun_phrases

text.sentiment

text.words

text.sentences

text.title

text.words[-1].singularize()
text.words[3].pluralize()


from text.blob import Word 
from text.blob import Verb

#Lemmatization