Пример #1
0
def gcp_analyze_entities(text, debug=0):
    """
    Analyzing Entities in a String

    Args:
      text_content The text content to analyze
    """
    if request.method == "POST":
        text = str(request.form)

    client = language.LanguageServiceClient()
    document = language.Document(content=text,
                                 type_=language.Document.Type.PLAIN_TEXT)
    response = client.analyze_entities(document=document)
    output = []

    # Loop through entitites returned from the API
    for entity in response.entities:
        item = {}
        item["name"] = entity.name
        item["type"] = language.Entity.Type(entity.type_).name
        item["Salience"] = entity.salience

        if debug:
            print(u"Representative name for the entity: {}".format(
                entity.name))

            # Get entity type, e.g. PERSON, LOCATION, ADDRESS, NUMBER, et al
            print(u"Entity type: {}".format(
                language.Entity.Type(entity.type_).name))

            # Get the salience score associated with the entity in the [0, 1.0] range
            print(u"Salience score: {}".format(entity.salience))

        # Loop over the metadata associated with entity. For many known entities,
        # the metadata is a Wikipedia URL (wikipedia_url) and Knowledge Graph MID (mid).
        # Some entity types may have additional metadata, e.g. ADDRESS entities
        # may have metadata for the address street_name, postal_code, et al.
        for metadata_name, metadata_value in entity.metadata.items():
            item[metadata_name] = metadata_value
            if debug:
                print(u"{}: {}".format(metadata_name, metadata_value))

        # Loop over the mentions of this entity in the input document.
        # The API currently supports proper noun mentions.
        if debug:
            for mention in entity.mentions:
                print(u"Mention text: {}".format(mention.text.content))
                # Get the mention type, e.g. PROPER for proper noun
                print(u"Mention type: {}".format(
                    language.EntityMention.Type(mention.type_).name))
        output.append(item)

    # Get the language of the text, which will be the same as
    # the language specified in the request or, if not specified,
    # the automatically-detected language.
    if debug:
        print(u"Language of the text: {}".format(response.language))

    return str(output)
Пример #2
0
    def _analyze_handler(self, text, text_file, file_type, json_file, lang,
                         analyze_method):
        file_type = to_texttype(file_type)
        parameters = {"type_": file_type}
        if text:
            parameters["content"] = text
        elif text_file:
            with open(text_file, "r") as f:  # pylint: disable=unspecified-encoding
                parameters["content"] = f.read()
        else:
            raise AttributeError("Either 'text' or 'text_file' must be given")

        if lang is not None:
            parameters["language"] = lang

        document = language_v1.Document(**parameters)
        if analyze_method == "classify":
            response = self.service.classify_text(document=document)
        elif analyze_method == "sentiment":
            # Available values: NONE, UTF8, UTF16, UTF32
            # encoding_type = enums.EncodingType.UTF8
            response = self.service.analyze_sentiment(document=document,
                                                      encoding_type="UTF8")
        self.write_json(json_file, response)
        return response
Пример #3
0
def classify(text, verbose=True):
    """Classify the input text into categories. """

    language_client = language_v1.LanguageServiceClient()

    document = language_v1.Document(content=text,
                                    type_=language_v1.Document.Type.PLAIN_TEXT)
    response = language_client.classify_text(request={'document': document})
    categories = response.categories

    result = {}

    for category in categories:
        # Turn the categories into a dictionary of the form:
        # {category.name: category.confidence}, so that they can
        # be treated as a sparse vector.
        result[category.name] = category.confidence

    if verbose:
        print(text)
        for category in categories:
            print(u"=" * 20)
            print(u"{:<16}: {}".format("category", category.name))
            print(u"{:<16}: {}".format("confidence", category.confidence))

    return result
Пример #4
0
def classify_text(text):
    client = language.LanguageServiceClient()
    document = language.Document(content=text,
                                 type_=language.Document.Type.PLAIN_TEXT)

    response = client.classify_text(document=document)
    return response
Пример #5
0
def analyze_text_sentiment(text):
    client = language.LanguageServiceClient()
    document = language.Document(content=text,
                                 type_=language.Document.Type.PLAIN_TEXT)

    response = client.analyze_sentiment(document=document)

    sentiment = response.document_sentiment
    results = dict(
        text=text,
        score=f"{sentiment.score:.1%}",
        magnitude=f"{sentiment.magnitude:.1%}",
    )
    for k, v in results.items():
        print(f"{k:10}: {v}")

    # Get sentiment for all sentences in the document
    sentence_sentiment = []
    for sentence in response.sentences:
        item = {}
        item["text"] = sentence.text.content
        item["sentiment score"] = sentence.sentiment.score
        item["sentiment magnitude"] = sentence.sentiment.magnitude
        sentence_sentiment.append(item)

    return sentence_sentiment
Пример #6
0
def get_sentiment_predictions(texts):
    predictions = []
    for text in texts:
        #text = preprocess_text(text)
        document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT)
        sentiments = client.analyze_sentiment(request={'document': document})
        score = 0
        for sentence in sentiments.sentences:
            for keyword in keywords:
                if keyword['phrase'].replace(' ', '') in sentence.text.content.lower().replace(' ', ''):
                    if sentence.sentiment.score <= -0.4:
                        if keyword['negative'] == 'left':
                            score -= keyword['priority']
                        else:
                            score += keyword['priority']
                    elif sentence.sentiment.score >= 0.4:
                        if keyword['positive'] == 'left':
                            score -= keyword['priority']
                        else:
                            score += keyword['priority']
                    break
        if score < 0:
            predictions.append('left')
        elif score > 0:
            predictions.append('right')
        else:
            predictions.append('non-political')
    return predictions
Пример #7
0
    def process_ideas_gcp(self, session_id):
        '''Method to pass idea to GCP'''
        log.debug('process idea GCP: starting')

        messages = self.db.find(coll='raw_messages',
                                filtro={"session_id": session_id})

        for message in messages:
            document = language_v1.Document(
                content=message["content"],
                type_=language_v1.Document.Type.PLAIN_TEXT)
            try:
                entities = json.loads(
                    proto.Message.to_json(
                        self.gcp.analyze_entities(
                            request={'document': document})))
            except Exception as e:
                log.error(e)
            syntax = json.loads(
                proto.Message.to_json(
                    self.gcp.analyze_syntax(request={'document': document})))
            log.debug(f'response {type(entities)}')
            self.db.insert_db(coll='gcp_response',
                              doc={
                                  "session_id": session_id,
                                  "message": message["content"],
                                  "entities": entities["entities"],
                                  "language_text": entities["language"],
                                  "syntax": syntax
                              })
Пример #8
0
def analyze_text_sentiment(text):
    """
    This is modified from the Google NLP API documentation found here:
    https://cloud.google.com/natural-language/docs/analyzing-sentiment
    It makes a call to the Google NLP API to retrieve sentiment analysis.
    """
    client = language.LanguageServiceClient()
    document = language.Document(content=text,
                                 type_=language.Document.Type.PLAIN_TEXT)

    response = client.analyze_sentiment(document=document)

    # Format the results as a dictionary
    sentiment = response.document_sentiment
    results = dict(
        text=text,
        score=f"{sentiment.score:.1%}",
        magnitude=f"{sentiment.magnitude:.1%}",
    )

    # Print the results for observation
    for k, v in results.items():
        print(f"{k:10}: {v}")

    # Get sentiment for all sentences in the document
    sentence_sentiment = []
    for sentence in response.sentences:
        item = {}
        item["text"] = sentence.text.content
        item["sentiment score"] = sentence.sentiment.score
        item["sentiment magnitude"] = sentence.sentiment.magnitude
        sentence_sentiment.append(item)

    return sentence_sentiment
Пример #9
0
def run_quickstart():
    # [START language_quickstart]
    # Imports the Google Cloud client library
    # [START language_python_migration_imports]
    from google.cloud import language_v1

    # [END language_python_migration_imports]
    # Instantiates a client
    # [START language_python_migration_client]
    client = language_v1.LanguageServiceClient()
    # [END language_python_migration_client]

    # The text to analyze
    text = "Hello, world!"
    document = language_v1.Document(
        content=text, type_=language_v1.Document.Type.PLAIN_TEXT
    )

    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(
        request={"document": document}
    ).document_sentiment

    print("Text: {}".format(text))
    print("Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude))
Пример #10
0
def analyze_text(text):

    # os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= 'sentiment_analysis.json'
    # Instantiates a client
    client = language_v1.LanguageServiceClient()

    document = language_v1.Document(content=text,
                                    type_=language_v1.Document.Type.HTML)

    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(request={
        'document': document
    }).document_sentiment

    score = round(sentiment.score, 4)
    magnitude = round(sentiment.magnitude, 4)
    if score >= 0.6 and magnitude >= 0.8:
        label = 'Positive'
    elif score <= -0.6 and magnitude >= 0.8:
        label = 'Negative'
    elif magnitude < 0.8:
        label = 'Neutral'
    else:
        label = 'Mixed'

    short_text = text if len(text) < 500 else text[:500] + '...'

    return (label, score, magnitude, short_text)
Пример #11
0
def sentiment_analysis():
    client, config, ret = language_v1.LanguageServiceClient(), get_config(), []

    # get all keywords
    keywords = []
    for k, v in config["currencies"].items():
        keywords += v["keywords"]

    # analyze tweets
    d = fetch_tweets(config["twitter"]["accounts"], keywords)
    for user_id, tweets in d.items():
        for tweet in tweets:
            tweet_id = tweet["id"]
            text = u"{}".format(tweet["text"])

            doc = language_v1.Document(
                content=text, type_=language_v1.Document.Type.PLAIN_TEXT
            )

            sent = client.analyze_sentiment(
                request={"document": doc}
            ).document_sentiment

            """
            print("Text: {}".format(text))
            print("Sentiment: {:.2%}, {:.2%}".format(
                sent.score, sent.magnitude))
            """

            ret.append((tweet_id, text, sent.score, sent.magnitude))
    return ret
Пример #12
0
    def __get_sentiment(self, client, significant_data, today, results,
                        containers):

        # This is helper function called by the get_news function.
        # significat_data is the string of data passed in whose sentiment score
        # we are interested in. Today is a boolean value True if the publish date of
        # the article is today, else False. results is the dictionary of values
        # that will be returned by get_news. Containers is the dictionary of data
        # structures used to store intermediate calculations.

        try:
            document = language_v1.Document(
                content=significant_data,
                type_=language_v1.Document.Type.PLAIN_TEXT)
            sentiment = client.analyze_sentiment(request={
                'document': document
            }).document_sentiment

            if today == True:
                results['articles_today'] += 1
                containers['today_total_magnitude'] += sentiment.magnitude
                containers['today_magnitude_scores'].append(
                    sentiment.magnitude)
                containers['today_sentiment_scores'].append(sentiment.score)

            elif today == False:
                containers['total_magnitude'] += sentiment.magnitude
                containers['magnitude_scores'].append(sentiment.magnitude)
                containers['sentiment_scores'].append(sentiment.score)

        except Exception as e:
            print(e)

        return results, containers
Пример #13
0
def analyze_text(text):
    client = language_v1.LanguageServiceClient()
    document = language_v1.Document(content=text,
                                    type_=language_v1.Document.Type.PLAIN_TEXT)
    return client.analyze_sentiment(request={
        'document': document
    }).document_sentiment
Пример #14
0
def store_sentiment_of_article(formatted_text, filename):

    # remove html tags
    text = remove_html_tags(formatted_text)

    if filename.exists():
        logger.info(f'File {filename} already exists')
    else:

        try:

            # set environment variable for authentication
            os.environ[
                "GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\Users\di872\Desktop\renewrs_recsys\renewrs-e50f271b94e7.json"

            # run sentiment analysis request on text
            client = language_v1.LanguageServiceClient()

            document = language_v1.Document(
                content=text, type_=language_v1.Document.Type.PLAIN_TEXT)
            annotations = client.analyze_sentiment(
                request={'document': document})

            json_data = json.loads(annotations.__class__.to_json(annotations))

            # create folder to save sentiment in
            filename.parent.mkdir(parents=True, exist_ok=True)

            # save sentiment
            with open(str(filename), "w") as file:
                json.dump(json_data, file, indent=4)

        except Exception as e:
            logger.error(f'Failure for {filename}')
Пример #15
0
 def get_sentiment(self, text):
     document = language_v1.Document(
         content=text, type_=language_v1.Document.Type.PLAIN_TEXT)
     sentiment = self.client.analyze_sentiment(request={
         'document': document
     }).document_sentiment
     return sentiment
Пример #16
0
def get_twitter_text(keyword):
    global avg
    for tweet in tweepy.Cursor(api.search,
                               q=keyword,
                               lang='en',
                               include_entities=False,
                               tweet_mode='extended').items(100):
        if "RT" not in tweet.full_text:
            document = language_v1.Document(
                content=cleaning_text(tweet.full_text),
                type_=language_v1.Document.Type.PLAIN_TEXT)
            sentiment = client.analyze_sentiment(request={
                'document': document
            }).document_sentiment
            # result.append([cleaning_text(tweet.full_text), sentiment.score, sentiment.magnitude])
            result = {
                'text': cleaning_text(tweet.full_text),
                'sentimentscore': sentiment.score
            }

            if sentiment.score != 0:
                get_avg_sentimentscore(sentiment.score)

            results.append(result)
    avg = allscore / count
    return results
Пример #17
0
def defineScore(text):
    # Instantiates a client
    client = language_v1.LanguageServiceClient()
    document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT)
    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(request={'document': document}).document_sentiment
    return sentiment.score
Пример #18
0
def analyze_text_sentiment_rest(text):
    client = language.LanguageServiceClient()
    document = language.Document(content=text,
                                 type_=language.Document.Type.PLAIN_TEXT)

    response = client.analyze_sentiment(document=document)

    sentiment = response.document_sentiment
    results = dict(
        text=text,
        score=f"{sentiment.score:.1%}",
        magnitude=f"{sentiment.magnitude:.1%}",
    )
    for k, v in results.items():
        print(f"{k:10}: {v}")

    sentence_sentiment = []
    for index, sentence in enumerate(response.sentences, start=1):
        item = {}
        item["text"] = sentence.text.content
        item["score"] = sentence.sentiment.score
        item["magnitude"] = sentence.sentiment.magnitude
        item["index"] = index
        sentence_sentiment.append(item)

    return sentence_sentiment
Пример #19
0
def getSentimentFromWeb(description):
  # Instantiates a client
    # data = request.get_json()
    # headers = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'}
    # # URL = request.get_json()['url']
    # URL = url
    # page = requests.get(URL, headers=headers)
    # soup1 = soup(page.content, 'html.parser')
    client = language_v1.LanguageServiceClient(credentials=creds2)
    # data = soup1.find('body').text
    # wordlen = len(data)
    # half = int(wordlen / 2)
    # if half > 1000:
    #     half = wordlen - 100
    # data = data[half:]
    # print("Text: "+(data["data"]))
    # The text to analyze
    text = u"".join(description)
    document = language_v1.Document(content=text, type_=language_v1.Document.Type.PLAIN_TEXT)

    # # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(request={'document': document}).document_sentiment

    # print("Text: {}".format(text))
    # print("Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude))
    # return "Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude)
    return sentiment.score
Пример #20
0
def classify(text, verbose=True):
    """Classify the input text into categories. """

    language_client = language_v1.LanguageServiceClient()

    document = language_v1.Document(
        content=text, type_=language_v1.Document.Type.PLAIN_TEXT
    )
    response = language_client.classify_text(request={'document': document})
    categories = response.categories

    categories_list = []

    for category in categories:
        # Turn the categories into a dictionary of the form:
        # {category.name: category.confidence}, so that they can
        # be treated as a sparse vector.
        # result[category.name] = category.confidence
        categories_list.append(category.name)

    # Available values: NONE, UTF8, UTF16, UTF32
    encoding_type = language_v1.EncodingType.UTF8

    response1 = language_client.analyze_entities(request={'document': document, 'encoding_type': encoding_type})

    # Loop through entities returned from the API
    entities_list = []
    for entity in response1.entities:
        if entity.salience > 0.05:
            entities_list.append(entity.name)
        else:
            break

    return categories_list, entities_list
def analyzeScore(text=None):
    document = language_v1.Document(content=text,
                                    type_=language_v1.Document.Type.PLAIN_TEXT)
    response = client.analyze_sentiment(request={
        'document': document
    }).document_sentiment
    return response.score
Пример #22
0
def analyze(filename):
    """Run a sentiment analysis request on text within a passed filename."""
    client = language_v1.LanguageServiceClient()

    with open(filename, "r") as review_file:
        # Instantiates a plain text document.
        content = review_file.read()

    document = language_v1.Document(content=content,
                                    type_=language_v1.Document.Type.PLAIN_TEXT)
    annotations = client.analyze_sentiment(request={'document': document})

    # Print the results
    # print_result(annotations)

    # Write sentence scores to csv
    csv_filename = write_result_to_csv(annotations, filename)

    # text = document.content

    # read csv data into dataframe
    sentence_scores = pd.read_csv(csv_filename)
    print(sentence_scores)

    # describe the sentence sentiments
    sentence_sentiments = sentence_scores.SentenceSentiment.describe()
    print(sentence_sentiments)

    # plot max temp. vs month
    sentence_scores.plot(kind='line', y='SentenceSentiment', x='SentenceIndex')
    plt.show()
Пример #23
0
def tweet_sentiment(name):
    client = language_v1.LanguageServiceClient()

    tweet_list, df = twitter_user(name)

    score_list = ['\0' for i in range(len(tweet_list))]

    for i in range(len(tweet_list)):
        sentence_str = str(tweet_list[i])

        document = language_v1.Document(
            content=sentence_str, type_=language_v1.Document.Type.PLAIN_TEXT)

        sentiment = client.analyze_sentiment(request={
            'document': document
        }).document_sentiment
        score_list[i] = format(
            (sentiment.score + 1) * 25 + sentiment_predict(sentence_str) * 50,
            ".1f")
    #   print("Text: {}".format(sentence_str.strip("['']")))
    #    print("Sentiment: (score) {}   (magnitude) {}".format(sentiment.score, sentiment.magnitude))

#    return sentiment.score, sentence_str.strip("['']")
#     return score_list, sentiment.magnitude
    return score_list
Пример #24
0
def get_sentiment_score_using_google(text_list):
    client = language_v1.LanguageServiceClient()

    texts = []
    text_sentiment = []
    text_score = []

    for text in tqdm(text_list):
        try:
            document = language_v1.Document(
                content=text,
                type_=language_v1.Document.Type.PLAIN_TEXT,
                language='en')
            sentiment = client.analyze_sentiment(request={
                'document': document
            }).document_sentiment

            texts.append(text)
            text_score.append(sentiment.score)
            sentiment_ = 'positive' if sentiment.score > 0 else 'negative'  # This is just me
            text_sentiment.append(sentiment_)
        except:
            pass

    return texts, text_sentiment, text_score
Пример #25
0
def get_sentiment_score(tweet):
    client = language_v1.LanguageServiceClient()
    document = language_v1.Document(content=tweet, type_=language_v1.Document.Type.PLAIN_TEXT)
    sentiment = client.analyze_sentiment(request={'document': document}).document_sentiment
    sentiment_score = sentiment.score
    sentiment_magnitude = sentiment.magnitude

    print(sentiment_score, sentiment_magnitude)
Пример #26
0
 def getSentiment(self, text):
     doc = language.Document(content=text,
                             language='en',
                             type_=language.Document.Type.PLAIN_TEXT)
     textSentiment = self.client.analyze_sentiment(document=doc,
                                                   encoding_type='UTF32')
     SentimentScore = textSentiment.document_sentiment.score
     return SentimentScore
def get_sentiment(tweet):
    client = language_v1.LanguageServiceClient()
    document = language_v1.Document(content=tweet,
                                    type_=language_v1.Document.Type.PLAIN_TEXT)
    result = client.analyze_sentiment(request={
        'document': document
    }).document_sentiment.score
    return (result)
Пример #28
0
def gcp_classify_text(text):
    client = language.LanguageServiceClient()
    document = language.Document(content=text,
                                 type_=language.Document.Type.PLAIN_TEXT)
    response = client.classify_text(document=document)
    #response = client.classify(document=document)
    for category in response.categories:
        return category.name
Пример #29
0
def analyze(content, client):
    """Run a sentiment analysis request on text within a passed filename."""
    document = language_v1.Document(content=content,
                                    language='zh',
                                    type_=language_v1.Document.Type.PLAIN_TEXT)
    annotations = client.analyze_sentiment(request={'document': document})

    # Print the results
    return annotations
Пример #30
0
    def getSentiment(text):
        document = language_v1.Document(
            content=text, type_=language_v1.Document.Type.PLAIN_TEXT)

        # Detects the sentiment of the text
        sentiment = client.analyze_sentiment(request={
            'document': document
        }).document_sentiment
        return sentiment.score