def posting_generator(): jobs = [] #job_title = "software engineer" #job_location = "Pittsburgh, PA" search_url = 'https://www.careerbuilder.com/jobs-software-engineer-in-pittsburgh,pa?keywords=Software+Engineer&location=Pittsburgh%2C+PA' base_url = 'https://www.careerbuilder.com' next_page = urllib.request.urlopen(search_url, None, None) nlu = NLU( _apikey='BU11gy3frJMRMKz4XQ_sPJ_HGF3p-qEr74xUlEVTWvsY', version='2018-03-19' ) def nextPage(soup): print("BREAK 1") next_link = soup.find("a", class_="Next Page") if next_link is not None: print("BREAK 2") next_url = next_link.find_parent("a")['href'] next_page = next_url return next_page else: print("BREAK 3") return 0 while True: soup = BeautifulSoup(next_page, 'html.parser') #next_page = nextPage(soup) for job in soup.find_all('h2'): if job.get('class') == 'job-title show-for-medium-up': url = 'https://www.careerbuilder.com' + job.a['href'] response = nlu.analyze( url=url, features=Features( entities=EntitiesOptions( limit=1000 ), keywords=KeywordsOptions( limit=1000 ), ) ).get_result() jobs.append(response) # jsonprinter(response) yield response next_url = nextPage(soup) if next_url == 0: break else: next_page = urllib.request.urlopen(next_url, None, None) print("END OF PROGRAM!")
def test_version_date(self): with pytest.raises(TypeError): NaturalLanguageUnderstandingV1() # pylint: disable=E1120 nlu = NaturalLanguageUnderstandingV1(version='2016-01-23', url='http://bogus.com', username='******', password='******') assert nlu
def main(): load_dotenv(find_dotenv()) nlu_username = os.environ.get('NLU_USERNAME') nlu_password = os.environ.get('NLU_PASSWORD') nlu = NLU(username=nlu_username, password=nlu_password, version='2017-02-27') result = nlu.analyze(text='I hate galvanize', features=[features.Sentiment()])['sentiment']['document'] print(result['label'], result['score'])
def test_version_date(self): with pytest.raises(TypeError): NaturalLanguageUnderstandingV1() with pytest.raises(WatsonException): NaturalLanguageUnderstandingV1(version='2016-01-23') with pytest.raises(WatsonException): NaturalLanguageUnderstandingV1(version='2016-01-23', url='https://bogus.com') nlu = NaturalLanguageUnderstandingV1(version='2016-01-23', url='http://bogus.com', username='******', password='******') assert(nlu)
def get_text_data(text,language): username = os.environ.get("BLUEMIX-NLU-USERNAME") password = os.environ.get("BLUEMIX-NLU-PASSWORD") natural_language_understanding = NaturalLanguageUnderstanding( version = "2017-02-27", username=username, password=password ) return natural_language_understanding.analyze( text = text, features = [features.Emotion(), features.Sentiment(), features.Keywords()], language = language )
def processText(fname): print('fname', fname) in_text = readfile(fname) in_text = str(in_text) natural_language_understanding = NaturalLanguageUnderstandingV1( username="******", password="******", version="2017-02-27") response = natural_language_understanding.analyze( text=in_text, features=[ Features.Concepts( # Concepts options limit=50), # Features.Keywords( # # Keywords options # # sentiment=True, # # emotion=True, # limit=10 # ), # Features.Entities( # ) ]) # return jsonify(response) return json.dumps(response, indent=2)
def get_sentiment(target_text): from watson_developer_cloud import NaturalLanguageUnderstandingV1 from watson_developer_cloud.natural_language_understanding_v1 \ import Features, EntitiesOptions, KeywordsOptions natural_language_understanding = NaturalLanguageUnderstandingV1( username="******", password="******", version="2018-03-16") """ Opening the file and reading the text contained """ file = open("messages_to_be_analyzed.txt",'r') target_text = file.read() response = natural_language_understanding.analyze( text= target_text, features=Features( entities=EntitiesOptions( emotion=True, sentiment=True, limit=2), keywords=KeywordsOptions( emotion=True, sentiment=True, limit=2))) return [response.result.get("keywords")[0].get("emotion").get(emotion) for emotion in [entry for entry in response.result.get("keywords")[0].get("emotion")]]
def analyseEmotions(input): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') #response = natural_language_understanding.analyze( # text=input, # features=Features(entities=EntitiesOptions(), keywords=KeywordsOptions(), emotion=EmotionOptions())) response = natural_language_understanding.analyze( text=input, features=Features(emotion=EmotionOptions())) #print(json.dumps(response, indent=2)); #print "" anger = response["emotion"]["document"]["emotion"]["anger"] joy = response["emotion"]["document"]["emotion"]["joy"] sadness = response["emotion"]["document"]["emotion"]["sadness"] fear = response["emotion"]["document"]["emotion"]["fear"] disgust = response["emotion"]["document"]["emotion"]["disgust"] total = anger + joy + sadness + fear + disgust #print ("Anger: %f" % (anger)); #print ("Joy: %f" % (joy)); #print ("Sadness: %f" % (sadness)); #print ("Fear: %f" % (fear)); #print ("Disgust: %f" % (disgust)); #print ("total: %f" % (total)); return total
def on_status(self, status): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') twitts = status.text coordinates = status.coordinates language = status.user.lang if status.place and language == 'en': if coordinates is not None and len(coordinates) > 0: coordinates = status.coordinates['coordinates'] print 'coordinates: ', coordinates print 'twitts: ', twitts try: response = natural_language_understanding.analyze( text=twitts, features=[features.Sentiment()]) sentiment = response['sentiment']['document']['label'] except Exception as e: sentiment = "neutral" print sentiment upload_data = { "twitts": twitts, "coordinates": coordinates, "sentiment": sentiment } print requests.post( 'https://search-trends-pnoxxtizp4zrbmwnvgsifem74y.us-east-1.es.amazonaws.com/twittmap/data', json=upload_data) return True
def nlu_data(): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') r = open("C:\\Users\\kishan.sampat\\Desktop\\user_input.txt", "r") extracted = r.read().splitlines() #print(extracted) mainArray = [] for elements in extracted: array = [] if len(elements) > 0: #print(elements) response = natural_language_understanding.analyze( text=elements, features=Features(entities=EntitiesOptions(sentiment=True, limit=3), keywords=KeywordsOptions())) with open('C:\\Users\\kishan.sampat\\Desktop\\user_input.csv', 'w', newline='') as outfile: for each in response['keywords']: tex = each['text'] lemet = porter_stemmer.stem(tex) array.append(lemet) mainArray.append(array) csv.writer(outfile).writerows(mainArray) else: break
def getAnalysis(review): natural_language_understanding = NaturalLanguageUnderstandingV1( username='******', password='******', version='2018-03-16') response = natural_language_understanding.analyze( text=review, features=Features(entities=EntitiesOptions(emotion=True, sentiment=True, limit=2), keywords=KeywordsOptions(emotion=True, sentiment=True, limit=2))).get_result() keywords = response["keywords"] numKeywords = len(keywords) if numKeywords == 0: print("no keywords") return np.zeros(6) sentiments = np.array([ keyword["sentiment"]["score"] * keyword["relevance"] for keyword in keywords ]) totalSentiment = np.sum(sentiments) / numKeywords emotionNames = ['sadness', 'joy', 'fear', 'disgust', 'anger'] emotions = np.array([ np.array([keyword["emotion"][name] for name in emotionNames]) for keyword in keywords ]) totalEmotions = np.sum(emotions, 0) / numKeywords features = np.insert(totalEmotions, 0, totalSentiment, axis=0) return features
def IBMtran(df): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2019-07-12', iam_apikey='v8j7M76fx4hOFr35AhLUso35qgmsocV5_WM-Ag0IdKg5', url= 'https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/dbf791a6-366c-48d9-81ac-9a08ac7f130c' ) ibm = [] i = 0 x = 0.5 def senti(x): ### Vader Sentiment analyzer = SentimentIntensityAnalyzer() try: response = natural_language_understanding.analyze( text=str(x), features=Features(sentiment=SentimentOptions())).get_result() res = response.get('sentiment').get('document').get('score') # response1 = natural_language_understanding.analyze( # text = str(df['review_title'][i]), # features = Features(sentiment=SentimentOptions())).get_result() # res1 = response1.get('sentiment').get('document').get('score') # final = (res*x + res1*(1-x)) return res except: vs = analyzer.polarity_scores(str(x)) return vs['compound'] df['ibm2'] = df.apply(lambda x: senti(x['review_body_english']), axis=1) return df
def __init__(self): #インスタンス生成 self.natural_language_understanding = NaturalLanguageUnderstandingV1( version='2018-12-08', iam_apikey={}, url={} )
def constructRow(text, isEarthquake): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2018-11-16', iam_apikey='34qzJpNfbmmav0ZFkGM9vM_enLCTAOuQsd5s4odeF19l', url= 'https://gateway-lon.watsonplatform.net/natural-language-understanding/api' ) keywords = {"quake", "shake", "tremble"} try: response = natural_language_understanding.analyze( html=text, features=Features(emotion=EmotionOptions())).get_result() dictionary = response["emotion"]["document"]["emotion"] # going to remove the ones with only 0's except: dictionary = { "anger": 0, "disgust": 0, "fear": 0, "joy": 0, "sadness": 0 } for keyword in keywords: columnName = "hasSubstring_" + keyword # find() returns index of location of keyword, and -1 # if none, so I changed it into true/ false dictionary[columnName] = text.lower().find(keyword) >= 0 print(dictionary) dictionary["duringEarthquake"] = isEarthquake print(pd.Series(dictionary)) return pd.Series(dictionary)
def calculateRelevance(text, regModel): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2018-11-16', iam_apikey='34qzJpNfbmmav0ZFkGM9vM_enLCTAOuQsd5s4odeF19l', url= 'https://gateway-lon.watsonplatform.net/natural-language-understanding/api' ) keywords = {"quake", "shake", "tremble"} try: response = natural_language_understanding.analyze( html=text, features=Features(emotion=EmotionOptions())).get_result() dictionary = response["emotion"]["document"]["emotion"] except Exception as e: print(e) return None for keyword in keywords: columnName = "hasSubstring_" + keyword dictionary[columnName] = text.lower().find(keyword) >= 0 one = pd.Series([1]) data = pd.DataFrame([list(one.append(pd.Series(dictionary)))]) return regModel.predict(data)
def test_analyze_throws(self): nlu = NaturalLanguageUnderstandingV1(version='2016-01-23', url='http://bogus.com', username='******', password='******') with pytest.raises(ValueError): nlu.analyze(None, text="this will not work")
def watson_nlp_analysis(text): if text == '': return text max_limit_one = 10 max_limit_two = 30 naturalLanguageUnderstanding = NaturalLanguageUnderstandingV1( version='2018-11-16', iam_apikey=os.environ['WATSON'], url= 'https://gateway.watsonplatform.net/natural-language-understanding/api' ) response = naturalLanguageUnderstanding.analyze( text=text, features=Features(concepts=ConceptsOptions(limit=max_limit_one), categories=CategoriesOptions(limit=max_limit_one), sentiment=SentimentOptions(document=True), emotion=EmotionOptions(document=True), entities=EntitiesOptions(emotion=True, sentiment=True, limit=max_limit_two), keywords=KeywordsOptions( emotion=True, sentiment=True, limit=max_limit_two))).get_result() return response
def analyze_emotions(read_path, write_path1, write_path2): natural_language_understanding = NaturalLanguageUnderstandingV1( username='******', password='******', version='2018-03-16') with open(write_path1, 'wb') as outFile1, open(write_path2, 'w') as outFile2: file_writer1 = csv.writer(outFile1) file_writer2 = csv.writer(outFile2) i = 1 with open(read_path, 'r') as inFile: fileReader = csv.reader(inFile) # for i in range(417): # next(fileReader) for row in fileReader: tweet = row[4] tweet = remove_words_with_numbers(tweet) print(i, tweet) # data = [row[0], # row[1], # row[4], # row[5], # row[10]] if isNotEmpty(tweet): response = natural_language_understanding.analyze( language="en", text=tweet, features=Features(emotion=EmotionOptions())) jsonData = json.dumps(response, indent=2) print(jsonData) my_dict = json.loads(jsonData) my_dict2 = my_dict["emotion"] document = my_dict2["document"] emotion = document["emotion"] highest_emotion = get_highest_emotion( emotion["anger"], emotion["joy"], emotion["sadness"], emotion["fear"], emotion["disgust"]) print(highest_emotion) data = [ row[0], row[1], row[4], row[5], row[10], emotion["anger"], emotion["joy"], emotion["sadness"], emotion["fear"], emotion["disgust"], highest_emotion ] file_writer1.writerow(data) i = i + 1 else: data = [row[0], row[1], row[4], row[5], row[10]] file_writer2.writerow(data)
def IBMNonTran(df): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2019-07-12', iam_apikey='v8j7M76fx4hOFr35AhLUso35qgmsocV5_WM-Ag0IdKg5', url= 'https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/dbf791a6-366c-48d9-81ac-9a08ac7f130c' ) ibm = [] i = 0 x = 0.5 for i in range(0, len(df)): try: response = natural_language_understanding.analyze( language=df['langCode'][i], text=str(df['review_body'][i]), features=Features(sentiment=SentimentOptions())).get_result() res = response.get('sentiment').get('document').get('score') ibm.append(res) except: ibm.append('NA') df['ibm1'] = ibm return df
def requestWatsonSentence(sentences): nlu = NaturalLanguageUnderstandingV1(version='2017-02-27', username=credentials.nluKey, password=credentials.nluId) keywords = dict() entities = dict() for i in range(len(sentences)): tmp = nlu.analyze(text=sentences[i], features=Features( entities=EntitiesOptions(emotion=True, sentiment=True, limit=2), keywords=KeywordsOptions(emotion=True, sentiment=True, limit=2))) keywords[len(keywords)] = [ item.get('text', {}) for item in tmp.get('keywords', {}) if item.get("relevance") > 0.1 ] entities[len(entities)] = [ item.get('text', {}) for item in tmp.get('entities', {}) if item.get("relevance") > 0.75 ] # return keywords, tones #'rtype': dictionary return entities, keywords
def findKeywords(filename): file = open(filename, "r") outputfile = open("jsonOutput.json", "w") keywords = [] natural_language_understanding = NaturalLanguageUnderstandingV1( version='2018-11-16', iam_apikey='m620e2y3lML5qG_oRJy9JERrlR0-159j3vJVrtPJkhJg', url= 'https://gateway-wdc.watsonplatform.net/natural-language-understanding/api' ) try: response = natural_language_understanding.analyze( text=string, features=Features(keywords=KeywordsOptions( sentiment=False, emotion=False))).get_result() except: return [] #print(json.dumps(response, indent=2)) file.close() outputfile.write(json.dumps(response, indent=2)) outputfile.close() with open("jsonOutput.json", "r") as read_file: data = json.load(read_file) keywordslist = data['keywords'] my_dict = {} for i in range((len(keywordslist))): x = keywordslist[i] keywords.append(x['text']) my_dict.update({x['text']: x['relevance']}) #for i in keywords: #print(i) return createRelavantKeywordsList(my_dict, keywords)
def handleMoodLogging(): # Display the form if request.method == 'GET': return render_template('index.html') else: # Validate using credentials natural_language_understanding = NaturalLanguageUnderstandingV1( username=secret_dictionary['username'], password=secret_dictionary['password'], version='2018-03-16') # Grab the text from the user journal_contents = request.form['journal_content'] #print('journal contents: ', journal_contents.encode('ascii', 'ignore')) # Make a call to the API with the text passed in alchemy_results = natural_language_understanding.analyze( text=journal_contents.encode('ascii', 'ignore'), features=Features(emotion=EmotionOptions(), sentiment=SentimentOptions())) #print 'Writing results to a file:' fo = open('static/mockresponses/emotion_response.json', 'w+') fo.write(json.dumps(alchemy_results, indent=2)) fo.close() return render_template('gauge.html')
def process(key, text): naturalLanguageUnderstanding = NaturalLanguageUnderstandingV1( version='2018-11-16', iam_apikey=key, url= 'https://gateway.watsonplatform.net/natural-language-understanding/api' ) # t = 'IBM is an American multinational technology company ' # 'headquartered in Armonk, New York, United States, ' # 'with operations in over 170 countries.' t = text try: response = naturalLanguageUnderstanding.analyze( text=t, features=Features(entities=EntitiesOptions(emotion=True, sentiment=True, limit=2), keywords=KeywordsOptions(emotion=True, sentiment=True, limit=2))).get_result() except: return False print(json.dumps(response, indent=2)) return (json.dumps(response, indent=2))
def nlu_fact(): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2017-02-27', username='******', password='******') r = open("C:\\Users\\kishan.sampat\\Desktop\\fact.txt","r") extracted = r.read() response = natural_language_understanding.analyze( text= extracted, features=Features(entities=EntitiesOptions(sentiment=True,limit=3), keywords=KeywordsOptions())) #print(response) # print(json.dumps(response, indent=5)) #json_parsed = json.dumps(response) #print(json_parsed) with open('C:\\Users\\kishan.sampat\\Desktop\\fact_data.csv', 'w') as outfile: #json.dump(response , outfile) for each in response['keywords']: tex = each['text'] lemet = porter_stemmer.stem(tex) #print(lemet) json.dump(lemet , outfile) outfile.write('\n') for ent in response['entities']: ents = ent['text'] lemet_ent = porter_stemmer.stem(ents) #print(lemet_ent) json.dump(lemet_ent , outfile) outfile.write('\n')
def topic_ibm(content): natural_language_understanding = NaturalLanguageUnderstandingV1( version='2018-11-16', iam_apikey="MpIV_8-gUahq8R3WzNh7C0g1JgDZGueVrUzq8441rQVb", url= "https://gateway-lon.watsonplatform.net/natural-language-understanding/api" ) response = natural_language_understanding.analyze( text=content, features=Features(categories=CategoriesOptions(limit=3))).get_result() fichier = json.dumps(response, indent=2) print(fichier) data_dict_02 = json.loads(fichier) if len(data_dict_02["categories"]) > 2: category1 = data_dict_02["categories"][0]["label"] category2 = data_dict_02["categories"][1]["label"] resultat1 = category1.split("/") resultat2 = category2.split("/") print('resultat1 ', resultat1) print('resultat2 ', resultat2) resultat = [] resultat.append(resultat1[1]) resultat.append(resultat2[1]) print(resultat) return resultat
def detect_text(path): """Detects text in the file.""" from google.cloud import vision client = vision.ImageAnnotatorClient() with io.open(path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) response = client.text_detection(image=image) texts = response.text_annotations natural_language_understanding = NaturalLanguageUnderstandingV1( version='2018-11-16', iam_apikey='ysuAh_Jc3ASnVq3mvfwjONT5dD5G2oqcTGLizYs7HXyC', url= 'https://gateway.watsonplatform.net/natural-language-understanding/api' ) strings = "" for text in texts: strings += str(text.description) break response = natural_language_understanding.analyze( text=strings, features=Features(emotion=EmotionOptions( targets=strings.split('\n')))).get_result() print(json.dumps(response, indent=2))
def __init__(self): ''' Class constructor or initialization method. ''' # keys and tokens from the Twitter Dev Console consumer_key = "H3l8vFiB3UidU6uy5h53meohu" consumer_secret = "sViYwc5Md5scOyaV4Sqr6URjY88tLzHG7fJox0dj6lAoJaoZXa" access_token = "956641182603411457-NyNWQKaFCx73fz7sbiyFydBLQP8eQS0" access_secret = "Jh9USoFO3OG2keTHYeGN8wuimEuI3uGwBB2bDDc85Urka" self.natural_language_understanding = NaturalLanguageUnderstandingV1( username='******', password='******', version='2017-02-27') # attempt authentication try: # create OAuthHandler object self.auth = OAuthHandler(consumer_key, consumer_secret) print ("lol1") # create tweepy API object to fetch tweets self.api = tweepy.API(self.auth) print ("lol3") # set access token and secret self.auth.set_access_token(access_token, access_token_secret) print ("lol2") except: print("Error: Authentication Failed")
def watson_analyze_text_understanding(text): """ Input: text to be analyzed Output: response from the watson API Taken from the watson API docs. """ natural_language_understanding = NaturalLanguageUnderstandingV1( username=settings.WATSON_UNDERSTANDING_USERNAME, password=settings.WATSON_UNDERSTANDING_PASSWORD, version="2017-02-27", ) response = {} try: response = natural_language_understanding.analyze( text=text, features=Features( entities=EntitiesOptions(emotion=True, sentiment=True, limit=2), keywords=KeywordsOptions(emotion=True, sentiment=True, limit=2), ), ) except Exception as e: print e print "Proceeding without the watson data" return response
def analyse(queue, sns_topic): messages = queue.receive_messages(MessageAttributeNames=['All'], VisibilityTimeout=30, MaxNumberOfMessages=10) for message in messages: if message.body is not None and message.message_attributes is not None: #print(message.body) sns_message = {} nlp = NaturalLanguageUnderstandingV1(version='2017-02-27',\ url = "https://gateway.watsonplatform.net/natural-language-understanding/api",\ username = configure.ibm_username,\ password = configure.ibm_password) response = nlp.analyze(text=message.body, features=[features.Sentiment()]) print(response["sentiment"]["document"]["label"]) if response["sentiment"]["document"]["label"] is not None: username = message.message_attributes["username"] sentiment = response["sentiment"]["document"]["label"] lat = message.message_attributes["lat"] long = message.message_attributes["long"] timestamp_ms = message.message_attributes["timestamp_ms"] sns_message["username"] = username sns_message["content"] = message.body sns_message["lat"] = lat sns_message["long"] = long sns_message["sentiment"] = sentiment sns_message["timestamp_ms"] = timestamp_ms message_for_send = json.dumps(sns_message) response1 = sns_topic.publish(Message=message_for_send) print(response1) else: print("sentiment analyse error!")
def main(): credentials = eval("\n".join(open(CREDENTIALS, "r").readlines())) natural_language_understanding = NaturalLanguageUnderstandingV1( version="2017-02-27", username=credentials["username"], password=credentials["password"] ) listOfFiles = [f for f in listdir(PATH) if isfile(join(PATH, f))]#["testData/354962.json"] for fileName in listOfFiles: fileIn = open(join(PATH, fileName), "r") data = json.load(fileIn) val = data["html_lawbox"] if any(word in val for word in ["convict","acquit","guilty","innocen", "sentenc"]): print("Good file: ", fileName) for word in ["convict","acquit","guilty","innocen", "sentenc"]: if word in val: print "it has ", word response = natural_language_understanding.analyze( text=data["html_lawbox"], features = [features.Entities(), features.Relations()] ) print json.dumps(response, indent=2) else: print("Bad file: ", fileName)