Exemplo n.º 1
0
def main(text):
    client = language.LanguageServiceClient()

    document = types.Document(
        content=text,
        type=enums.Document.Type.PLAIN_TEXT)

    entities = client.analyze_entities(document=document).entities

    # entity types from enums.Entity.Type
    entity_type = ('UNKNOWN', 'PERSON', 'LOCATION', 'ORGANIZATION',
                   'EVENT', 'WORK_OF_ART', 'CONSUMER_GOOD', 'OTHER')

    for entity in entities:
        print('=' * 20)
        print('         name: {0}'.format(entity.name))
        print('         type: {0}'.format(entity_type[entity.type]))
        print('     salience: {0}'.format(entity.salience))
        print('wikipedia_url: {0}'.format(entity.metadata.get('wikipedia_url', '-')))
    def checkSentiment(self, text):
        text.strip()
        print('Text: {}'.format(text))
        document = types.Document(content=text,
                                  language="en",
                                  type=enums.Document.Type.PLAIN_TEXT)

        if (self.retries < 5):
            try:
                sentiment = self.client.analyze_sentiment(
                    document=document).document_sentiment
                return sentiment
            except:
                print("Waiting...")
                time.sleep(1)
                self.retries = self.retries + 1
        else:
            print("Too many requests")
            return None
Exemplo n.º 3
0
def sentimentConvos(ticker):
    FYConvos = getFY.convos(ticker)
    count = 0
    score = 0
    magnitude = 0
    for convo in FYConvos:
        count += 1
        try:
            document = types.Document(content=convo,
                                      type=enums.Document.Type.PLAIN_TEXT)
            annotations = client.analyze_sentiment(document=document)
            score += annotations.document_sentiment.score
            magnitude += annotations.document_sentiment.magnitude
        except:
            pass
    try:
        return score / count, magnitude / count
    except ZeroDivisionError:
        return None, None
def get_sentiment_scores(tweets, client):
    """
	Calculates sentiment scores for each of a series of Tweets (text inputs) 
	using the Google Cloud Natural Language API. Must have an account setup
	with Google API in order to connect to a client. Returns a list with the
	sentiment scores for each Tweet.

	Parameters
	----------
	tweets: list or Series of strings
		The input text dataset (one string for each document)
	client: google.cloud.language.LanguageServiceClient
		Client connection to the Google Cloud Natural Language API
		Initiate with google.oauth2.service_account.Credentials as needed

	Returns
	-------
	list
		Sentiment scores calculated for each of the input documents
	"""

    from google.api_core.exceptions import InvalidArgument
    from google.cloud.language import enums, types
    import numpy as np
    import time

    score_list = []

    for tweet in tweets:  # define document type
        document = types.Document(content=tweet,
                                  type=enums.Document.Type.PLAIN_TEXT)

        try:
            sentiment = client.analyze_sentiment(document=document)
        except InvalidArgument:  # Google API-specific error
            score_list.append(np.nan)
            continue

        score = sentiment.document_sentiment.score  # sentiment score
        score_list.append(score)
        time.sleep(0.1)  # allowed 600 requests per minute from API

    return score_list
Exemplo n.º 5
0
def NLP_analyze(text):
    # Instantiates a client
    #client = language.LanguageServiceClient()

    # The text to analyze
    #with open('Tweets.txt', 'r') as review_file:
    #    text = review_file.read()

    #text = u'Hello, world!'
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)

    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(document=document).document_sentiment

    print('Text: {}'.format(text))
    print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))

    return sentiment
Exemplo n.º 6
0
    def tag(self, text):
        document = types.Document(
            content=text,
            type=enums.Document.Type.PLAIN_TEXT)

        tokens = self.client.analyze_syntax(document).tokens

        ind = enums.PartOfSpeech
        sent = {}
        #print(tokens[0])
        #print()
        for token in tokens:
            sent[token.text.content] = {"pos":str(ind.Tag(token.part_of_speech.tag))[4:],
                                        "tense":str(ind.Tense(token.part_of_speech.tense))[6:],
                                        "person":str(ind.Person(token.part_of_speech.person))[7:],
                                        "number":str(ind.Number(token.part_of_speech.number))[7:],
                                        "mood": str(ind.Mood(token.part_of_speech.mood))[6:]}

        return sent
Exemplo n.º 7
0
def analyze_text_sentiment(text):
    client = language.LanguageServiceClient()
    # initialize the Document
    document = types.Document(
        content=text, type=enums.Document.Type.PLAIN_TEXT
    )  # this one is plain text, choice = PLAIN_TEXT, HTML

    response = client.analyze_sentiment(document=document)

    sentiment = response.document_sentiment
    results = [
        ('text', text),
        ('score', sentiment.score),
        ('magnitude', sentiment.magnitude),
    ]
    # for k, v in results:
    #     print('{:10}: {}'.format(k, v))

    return sentiment.score
Exemplo n.º 8
0
    def consume(self, text, **kwargs):
        """Classifies content categories of the provided text."""

        ## TODO: add concurency in case of list of objects
        client = language.LanguageServiceClient()

        if isinstance(text, six.binary_type):
            text = text.decode('utf-8')

        document = types.Document(content=text.encode('utf-8'),
                                  type=enums.Document.Type.PLAIN_TEXT)

        categories = client.classify_text(document).categories

        ##TODO: Use formaters here as in MSsqlDatabaseTool
        return [{
            'label': x.name,
            'confidence': x.confidence
        } for x in categories]
Exemplo n.º 9
0
def hello():
    """Return a friendly HTTP greeting."""
    # Instantiates a client
    client = language.LanguageServiceClient()

    # This is our request data coming in. Currently as a param but could be moved into request body
    text = request.args.get('data')
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)

    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(document=document).document_sentiment

    # Constructing our repsonse
    result = {}
    result['score'] = sentiment.score
    result['magntitude'] = sentiment.magntitude

    return json.dumps(result)
Exemplo n.º 10
0
def categorize(content):
    # Instantiates a client
    client = language.LanguageServiceClient()

    # The text to analyze
    text = content.encode('utf-8')
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)

    # Detects the sentiment of the text
    try:
        sentiment = client.analyze_sentiment(
            document=document).document_sentiment
        print(type(sentiment))
        print("{},{}".format(sentiment.score, sentiment.magnitude))
        return sentiment
    except Exception as e:
        print('error {}'.format(e))
        return None
Exemplo n.º 11
0
    def nlp(self, raw, from_json=False):
        if from_json is False:
            text = raw

            # Instantiates a client
            client = language.LanguageServiceClient()
            # Instantiates a plain text document.
            document = types.Document(content=text,
                                      language=self.lang,
                                      type=enums.Document.Type.PLAIN_TEXT)

            # Detects syntax in the document. You can also analyze HTML with:
            ret = client.analyze_syntax(document, enums.EncodingType.UTF32)
            data = ret
            return GSDoc(text, data, from_json=from_json)
        else:
            json = raw
            text = json["text"]
            return GSDoc(text, json, from_json=from_json)
Exemplo n.º 12
0
def gCloudAnalyzer():
    dfResult = df.copy()
    dfResult['score'] = 0.0
    dfResult['magnitude'] = 0.0

    client = language.LanguageServiceClient()

    for index, row in tqdm(dfResult.iterrows()):
        tweet = row['Text']
        document = types.Document(content=tweet,
                                  type=enums.Document.Type.PLAIN_TEXT)

        response = client.analyze_sentiment(document=document,
                                            encoding_type='UTF32')

        dfResult.at[index, 'score'] = response.document_sentiment.score
        dfResult.at[index, 'magnitude'] = response.document_sentiment.magnitude

    dfResult.to_csv('gcloud.csv', index=False)
Exemplo n.º 13
0
def get_label(inp_text):
    print inp_text
    text = inp_text
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)

    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(document=document).document_sentiment

    score = sentiment.score
    print score
    response_list = []
    if score > 0.25:
        response_list.append(["POSITIVE", score])
    elif score < -0.25:
        response_list.append(["NEGATIVE", score])
    else:
        response_list.append(["NEUTRAL", score])
    return response_list
Exemplo n.º 14
0
def get_parts_of_speech(text):
    client = language.LanguageServiceClient()
    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')
    document = types.Document(
        content=text,
        type=enums.Document.Type.PLAIN_TEXT)
    tokens = client.analyze_syntax(document).tokens
    pos_tag = ('UNKNOWN', 'ADJ', 'ADP', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM',
               'PRON', 'PRT', 'PUNCT', 'VERB', 'X', 'AFFIX')
    output = []
    for token in tokens:
        output.append(
            {
                'parts_of_speech': pos_tag[token.part_of_speech.tag],
                'token': token.text.content
            }
        )
    return output
Exemplo n.º 15
0
def sentiment():
    """do a sentiment analysis on the fragment."""

    # The text to analyze
    text = request.args.get('text')
    if text == None:
        return "Missing query"

    # Instantiates a client
    client = language.LanguageServiceClient(credentials=credentials)

    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)

    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(document=document).document_sentiment

    return '<p style="font-size:40px">Text: {}<br>\nSentiment {}, {}</p>'.format(
        text, sentiment.score, sentiment.magnitude)
Exemplo n.º 16
0
def main():
    #open all speech to text files, these files would have been created by the speech to text program
    with open('fname.txt', 'r+') as fname:
        First_name = fname.readline()

    with open('lname.txt', 'r+') as lname:
        Last_name = lname.readline()

    with open('age.txt', 'r+') as age:
        aged = age.readline()

    with open('address.txt', 'r+') as address:
        addresss = address.readline()

    with open('postal.txt', 'r+') as postal:
        postal_code = postal.readline()

    with open('need.txt', 'r+') as need:
        needs = need.readline()

    #store all speech to text files in a list
    senior_info = [First_name, Last_name, aged, addresss, postal_code, needs]

    #use a loop to refer to each instance and analyze the entities for keywords in senior_info

    count = 0
    while count < len(senior_info):
        document = types.Document(content=senior_info[count],
                                  type=enums.Document.Type.PLAIN_TEXT)

        # Detects the entities of the text
        response_entities = client.analyze_entities(document=document,
                                                    encoding_type='UTF32')

        with open('fireDB.txt', 'a+') as f:
            for entity in response_entities.entities:
                f.writelines("{0}\n".format(entity.name))
        f.close()
        count += 1

    #View what the text analysis will spit out
    Firebase_input = open('fireDB.txt', 'r')
    print(Firebase_input.read())
Exemplo n.º 17
0
def syntax_file(gcs_uri):
    """Detects syntax in the file located in Google Cloud Storage."""
    client = language.LanguageServiceClient()

    # Instantiates a plain text document.
    document = types.Document(gcs_content_uri=gcs_uri,
                              type=enums.Document.Type.PLAIN_TEXT)

    # Detects syntax in the document. You can also analyze HTML with:
    #   document.type == enums.Document.Type.HTML
    tokens = client.analyze_syntax(document).tokens

    # part-of-speech tags from enums.PartOfSpeech.Tag
    pos_tag = ('UNKNOWN', 'ADJ', 'ADP', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM',
               'PRON', 'PRT', 'PUNCT', 'VERB', 'X', 'AFFIX')

    for token in tokens:
        print(u'{}: {}'.format(pos_tag[token.part_of_speech.tag],
                               token.text.content))
Exemplo n.º 18
0
def classify_file():
    # [START language_classify_gcs]
    from google.cloud import language
    from google.cloud.language import enums
    from google.cloud.language import types

    gcs_uri = 'gs://cloud-samples-data/language/android.txt'

    client = language.LanguageServiceClient()

    document = types.Document(gcs_content_uri=gcs_uri,
                              type=enums.Document.Type.PLAIN_TEXT)

    categories = client.classify_text(document).categories

    for category in categories:
        print(u'=' * 20)
        print(u'{:<16}: {}'.format('name', category.name))
        print(u'{:<16}: {}'.format('confidence', category.confidence))
Exemplo n.º 19
0
def finding_nouns_and_shit(dicti):
    client = language.LanguageServiceClient()

    doc = types.Document(content=dicti, type=enums.Document.Type.PLAIN_TEXT)
    nouns_n_shit = client.analyze_syntax(doc).tokens
    pos_tag = ('UNKNOWN', 'ADJ', 'ADP', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM',
               'PRON', 'PRT', 'PUNCT', 'VERB', 'X', 'AFFIX')
    noun = []
    verb = []
    adj = []
    for token in nouns_n_shit:
        if (pos_tag[token.part_of_speech.tag] == 'NOUN'):
            noun.append(token.text.content)
        if (pos_tag[token.part_of_speech.tag] == 'VERB'):
            verb.append(token.text.content)
        if (pos_tag[token.part_of_speech.tag] == 'ADJ'):
            adj.append(token.text.content)

    return noun, verb, adj
Exemplo n.º 20
0
def gnlp_sentiment(document, isFile=True) -> dict:
    client = language.LanguageServiceClient()

    if not isFile:
        text = document.lower()
    else:
        text = unmark(document["text"]).replace("\n", " ").lower()

    request = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT)
    response = client.analyze_sentiment(document=request,
                                        encoding_type="UTF32")

    sentiment = response.document_sentiment
    data = {
        "score": sentiment.score,
        "magnitude": sentiment.magnitude,
    }

    return data
Exemplo n.º 21
0
def syntax_text(text):
    """Detects syntax in the text."""
    client = language.LanguageServiceClient()

    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')

    # Instantiates a plain text document.
    # [START language_python_migration_syntax_text]
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)

    # Detects syntax in the document. You can also analyze HTML with:
    #   document.type == enums.Document.Type.HTML
    tokens = client.analyze_syntax(document).tokens

    for token in tokens:
        part_of_speech_tag = enums.PartOfSpeech.Tag(token.part_of_speech.tag)
        print(u'{}: {}'.format(part_of_speech_tag.name, token.text.content))
Exemplo n.º 22
0
def parse_tweets(tweets):
    '''Return dictionary with each tweet caption and photo url.'''
    dict = {}
    i = 0
    for status in tweets:
        tweet = {}
        id = str(status.id)
        text = status.full_text
        tweet['text'] = text
        tweet['id'] = id
        tweet['name'] = status.user.name
        document = types.Document(content=tweet['text'],
                                  type=enums.Document.Type.PLAIN_TEXT)
        tweet['sentiment'] = client.analyze_sentiment(
            document=document).document_sentiment.score
        tweet['technical'] = 0
        dict[id] = tweet
        i += 1
    return dict
Exemplo n.º 23
0
def GetSentiment(t):  # Returns sentiment of submitted text
    # Clean submitted text
    t1 = u(t)
    clean_t = clean_tweet(t1)
    # Convert cleaned text to unicode (required by API)
    text = u(clean_t)
    result = []
    # The text to analyze
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT,
                              language='en')
    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(document=document).document_sentiment
    # Build array to return original Tweet text, cleaned Tweet text, sentiment, and magnitude
    result.append(t)
    result.append(clean_t)
    result.append(sentiment.score)
    result.append(sentiment.magnitude)
    return result
Exemplo n.º 24
0
    def analyse_sentiment(self, text):
        """
        This function computes the sentiment
        analysis of the provided text.
        """
        print("")
        print("Start analysing...")
        document = types.Document(
            content=text.encode('utf-8'),
            type=enums.Document.Type.PLAIN_TEXT)
        try:
            result = self.client.analyze_sentiment(document, self.encoding)
        except Exception as e:
            print(e)
            exit(1)
        print("... done analysis.")
        print("")

        return result
Exemplo n.º 25
0
def syntax_analysis(text):
    """
	This method is responsible for the actual syntax analysis of text using google cloud natural language 
	client service and creating list using parts of speech from pos_tag declaration above and tokens 
	analyzed for text.
	param: text: user inputted text for syntax analys
	return: list of data consisting of parts of speech and tokens
	"""
    client = language.LanguageServiceClient()
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)
    syntax = client.analyze_syntax(document=document)
    data = dict()
    data['original'] = text
    data['analysis'] = [
        '{}:{}'.format(pos_tag[token.part_of_speech.tag], token.text.content)
        for token in syntax.tokens
    ]
    return data
Exemplo n.º 26
0
def get_valence(excerpt, use="crr", treat_na="strict"):
    # The Google Natural Language API returns a score and magnitude. From the
    # docs
    # (https://cloud.google.com/natural-language/docs/basics#sentiment-analysis-values):
    #
    # score of the sentiment ranges between -1.0 (negative) and 1.0 (positive)
    #   and corresponds to the overall emotional leaning of the text.
    # magnitude indicates the overall strength of emotion (both positive and
    #   negative) within the given text, between 0.0 and +inf. Unlike score,
    #   magnitude is not normalized; each expression of emotion within the text
    #   (both positive and negative) contributes to the text's magnitude (so
    #   longer text blocks may have greater magnitudes).
    if use == "google":
        document = types.Document(content=excerpt,
                                  type=enums.Document.Type.PLAIN_TEXT)
        annotations = client.analyze_sentiment(document=document)
        return (annotations.document_sentiment.score,
                annotations.document_sentiment.magnitude)

    # The pattern API returns a polarity and subjectivity score. From the docs
    # (https://www.clips.uantwerpen.be/pages/pattern-en#sentiment):
    #
    # The sentiment() function returns a (polarity, subjectivity)-tuple for the
    # given sentence, based on the adjectives it contains, where polarity is a
    # value between -1.0 and +1.0 and subjectivity between 0.0 and 1.0.
    elif use == "pattern":
        return sentiment(excerpt)

    # The Center for Reading Research data contains scores for valence, arousal
    # and dominance. From Warriner et al. (2013):
    #
    # **[V]alence** (or pleasantness) of the emotions invoked by a word, going
    # from *unhappy* to *happy*
    #
    # [T]he degree of **arousal** evoked by a word
    #
    # [T]he **dominance**/power of the word--the extent to which the word
    # denotes something that is weak/submissive or strong/dominant
    elif use == "crr":
        assert len(
            excerpt.split()) == 1  # Not sure yet how to handle n>1-grams
        return _get_crr_scores(excerpt, treat_na=treat_na)
Exemplo n.º 27
0
def getSenimentScoreForTopic(topic):

    fetched_tweet = json.loads(
        Storage.load('data/twitterData/tweetState_{}.txt'.format(topic)))
    sentimentScore = {}
    numRequest = 0

    for state in fetched_tweet:
        totalSentiment, tweetCount = 0, 0
        tweetsByState = fetched_tweet[state]

        for tweets in tweetsByState:
            document = types.Document(content=tweets['text'],
                                      type=enums.Document.Type.PLAIN_TEXT)
            #use exponential back-off to adjust for resource exhaustion
            exp_retry = retry.Retry(predicate=if_transient_error,
                                    initial=1,
                                    maximum=240,
                                    multiplier=2,
                                    deadline=480)
            try:
                sentiment = client.analyze_sentiment(
                    document=document, retry=exp_retry).document_sentiment

            except InvalidArgument:
                logging.error('Invalid argument as been passed')
                pass
            else:
                if abs(sentiment.score) > 0.35:
                    totalSentiment += (sentiment.score * sentiment.magnitude)
                    tweetCount += 1
        logging.info(
            'Sentiment has been analyzed for {} on the topic of {}'.format(
                state, topic))

        if tweetCount == 0:
            sentimentScore[state] = 0
        else:
            sentimentScore[state] = totalSentiment / tweetCount

    fileName = 'data/sentiments/tweetSentiments_{}_score.json'.format(topic)
    Storage.upload(json.dumps(sentimentScore, indent=4), fileName)
Exemplo n.º 28
0
    def analyze_text_entities(text):
        if isinstance(text, six.binary_type):
            text = text.decode('utf-8')
        client = language.LanguageServiceClient()

        # Instantiates a plain text document.
        document = types.Document(content=text,
                                  type=enums.Document.Type.PLAIN_TEXT)

        entities = client.analyze_entities(document,
                                           encoding_type='UTF8').entities
        ent_obj_list = [None] * len(text.split(' '))

        consumed_ent_idx = []
        for entity in entities:
            for mention in entity.mentions:
                if entity.name != mention.text.content:
                    continue

                ent_location = mention.text.begin_offset
                words = text.encode('utf-8')[:ent_location].decode(
                    'utf-8').split(' ')
                ent_idx = len([w for w in words if w != ''])

                for i, word in enumerate(entity.name.split(' ')):
                    if i + ent_idx >= len(
                            ent_obj_list) or i + ent_idx in consumed_ent_idx:
                        break
                    consumed_ent_idx.append(i + ent_idx)
                    ent_type = entity.type
                    ent_salience = entity.salience
                    ent_content = entity.name
                    ent_wiki = entity.metadata.wikipedia_url if hasattr(
                        entity, 'metadata') and hasattr(
                            entity.metadata, 'wikipedia_url') else None
                    ent_obj_list[ent_idx + i] = {
                        'type': ent_type,
                        'salience': ent_salience,
                        'content': ent_content,
                        'wiki': ent_wiki
                    }
        return ent_obj_list
Exemplo n.º 29
0
def run_language():
    # Create a Cloud Natural Language client
    client = language.LanguageServiceClient()

    # Retrieve inputted text from the form and create document object
    text = request.form['text']
    document = types.Document(content=text, type=enums.Document.Type.PLAIN_TEXT)

    # Retrieve response from Natural Language API's analyze_sentiment() method
    response = client.analyze_sentiment(document)
    sentiment = response.document_sentiment

    sedcscore= score(sentiment)


    # Return a Jinja2 HTML template of the homepage and pass the 'text', 'entities',
    # and 'sentiment' variables to the frontend. These contain information retrieved
    # from the Natural Language API.

    return render_template('homepage.html', text=text, sentiment=sentiment,score=sedcscore )
Exemplo n.º 30
0
def get_entities(text):
    #Documentation: https://cloud.google.com/natural-language/docs/analyzing-entities#language-entities-string-python
    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')

    # Instantiates a plain text document.
    document = types.Document(
        content=re_clean(text),
        type=enums.Document.Type.PLAIN_TEXT)

    # Detects entities in the document. You can also analyze HTML with:
    #   document.type == enums.Document.Type.HTML
    try:
        entities = client.analyze_entities(document).entities
        response = parse_response(entities)
    except Exception as e:
        response = repr(e)
        print(e)
    
    return response