def search_for_keywords(): ### search the tweets for perticular keyword in the tweets by a perticular user user = input("Enter the name of the user:"******"Enter the keywords ro be searched for :") query.append(qw) ask= input("Do you want to add more keywords?(Y/N)") ask= ask.upper() if ask == "Y": pass elif ask == "N": shows = False else : print("invalid input \n" "Taking default input as 'No' ") shows = False result = api.user_timeline(screen_name=user,count = 200) ### searches for the 200 tweets by the perticular user for data in result: tweets.append(data) oldest = tweets[-1].id ### stores the id of the last tweet retrieved oldest_at = tweets[-1].created_at ### Stores the time and date of the last tweet retrieved qu = True while qu: ###to retrieve more older tweets qu2 = input("Do you want more tweets older than "+str(oldest_at)+"(Y/N) :") qu2 = qu2.upper() if qu2 == "Y": result2 = api.user_timeline(screen_name=user, count= 200, max_id =oldest) ###retrieve the tweet older then the given tweet id for data in result2: tweets.append(data) ### appended the new tweets retrived oldest = tweets[-1].id oldest_at = tweets[-1].created_at elif qu2 == "N": qu = False else: print("invalid input \n" "Taking default input as 'No' ") qu = False count = -1 for tweet1 in tweets: count = count + 1 ###starts the list from beginning tweet = tweet1.text tweet = re.sub(r"http\S+", "", tweet) ##### removes the URL from the text of tweet tweets[count].text = tweet for keyword in query: for data in tweets: tex = TextBlob(data.text) qw = tex.find(keyword) if qw != -1: tweets2.append(data)### Appends the modified tweets else: pass if len(tweets2)>0: print_tweets(tweets2) else : print("No tweet found with the related keywords")
case_sensitive=True)) # specify case sensitivity print(wiki.noun_phrases.count('python')) # translation and language detection # en_blob = TextBlob(u'Simple is better than complex.') # print(en_blob.translate(to='es')) # chinese_blob = TextBlob(u"美丽优于丑陋") # print(chinese_blob.translate(from_lang="zh-CN", to='en')) # b = TextBlob(u"بسيط هو أفضل من مجمع") # print(b.detect_language()) # parsing b = TextBlob("And now for something completely different.") print(b.parse()) # textblobs are like python strings! print(zen[0:19]) print(zen.upper()) print(zen.find("Simple")) apple_blob = TextBlob('apples') banana_blob = TextBlob('bananas') print(apple_blob < banana_blob) print(apple_blob == 'apples') apple_blob + ' and ' + banana_blob TextBlob("apples and bananas") print("{0} and {1}".format(apple_blob, banana_blob)) # n-grams blob = TextBlob("Now is better than never.") print(blob.ngrams(n=3)) # getting start and end indices of sentences for s in zen.sentences: print(s) print("---- Starts at index {}, Ends at index {}".format(s.start, s.end))
def search(max_tweets=100, query="", from_mail=""): global array_of_ids global array_of_comments global array_of_time global array_of_date global array_of_polarity global array_of_colors global mentions global average_sentiment global array_of_names array_of_stuff = [] array_of_subs = [] array_of_avgs = [] array_of_dates = [] array_of_names = [] array_of_ids = [] array_of_comments = [] array_of_time = [] array_of_date = [] array_of_polarity = [] array_of_colors = [] max_tweets = int(max_tweets) searched_tweets = [ status for status in tweepy.Cursor(api.search, q=query).items(max_tweets) ] try: for z in searched_tweets: try: time_created = z.created_at time_created_2 = str( time_created.date().strftime("%a-%d-%m-%y")) # print(time_created_2) # print(z.text) wiki = TextBlob(z.text) # print(wiki.sentiment.polarity) if wiki.sentiment.polarity != 0.0: if wiki.sentiment.polarity > 0: pass else: user_name = str( wiki[wiki.find("@"):wiki[wiki.find("@")::].find(" " )]) print(user_name) array_of_names.append(str(user_name)) array_of_stuff.append(str(wiki)) print(wiki) tr_day = '"' + str(time_created_2) + '"' array_of_dates.append(time_created_2) array_of_comments.append(z.text) array_of_time.append(str(time_created.time())) array_of_date.append(str(time_created.date())) pp = "" array_of_polarity.append( str( float("{0:.2f}".format( (1 + wiki.sentiment.polarity) * 50))) + " % negative") array_of_colors.append("table-danger") ####send_mail(toaddr=user_name, body=wiki) # array_of_subs.append(wiki.sentiment.subjectivity) # avgs=float((wiki.sentiment.polarity + wiki.sentiment.subjectivity)/2) # array_of_avgs.append(avgs) except Exception as g: # print(g) pass return ([array_of_stuff, array_of_names]) except Exception as e: pass # print(e) x = 0.0 # print(len(array_of_names)) # print(len(array_of_stuff)) try: for zy in array_of_stuff: zy += x x = zy zy = zy / len(array_of_stuff) # print("average=",str(zy),"%") perc = zy + 1 cvf = perc / 2 cvf = cvf * 100 average_sentiment = (str(cvf), "%") mentions = int(len(array_of_stuff)) ## y_plot = list(reversed(array_of_dates)) x_plot = str(list(reversed(array_of_stuff))) array_of_ids = [de for de in range(len(x_plot))] # return (y_plot, x_plot) except Exception as d: # print(d) pass
sentence = TextBlob('Use 4 spaces per indentation level.') print sentence.words print sentence.words[2].singularize() print sentence.words[-1].pluralize() animals = TextBlob("cat dog octopus") print animals.words print animals.words.pluralize() # TextBlobs Are Like Python Strings print animals[0:10] # You can use common string methods. print animals.upper() print animals.find('dog') print "---------------------------------------------------------------" from textblob import Word w = Word("octopi") print w.lemmatize() w = Word("went") print w.lemmatize("v") # Pass in WordNet part of speech (verb) print "---------------------------------------------------------------" b = TextBlob("I havv goood speling!") print(b.correct())
def for_donald_trump(): print("Retreving tweets") api = tweepy.API(auth) Tweets = api.user_timeline( screen_name="@realdonaldtrump", count=200) ###Search for tweets by the given screen name for data in Tweets: tweet = TextBlob(data.text) print(tweet) print(tweet.sentiment) print("Done at :" + str(data.created_at)) print("Username :"******"---------------------------------------------") query = [] ### Stores the keywords to be searched for tweets = [] ### to store the tweets tweets2 = [] ### to store modified tweets shows = True while shows: ### to store the no of keywords till we want qw = input("Enter the keywords ro be searched for :") query.append(qw) ask = input("Do you want to add more keywords?(Y/N)") ask = ask.upper() if ask == "Y": pass elif ask == "N": shows = False else: print("invalid input \n" "Taking default input as 'No' ") shows = False for data in Tweets: tweets.append(data) oldest = tweets[-1].id ### stores the id of the last tweet retrieved oldest_at = tweets[ -1].created_at ### Stores the time and date of the last tweet retrieved qu = True while qu: ###to retrieve more older tweets qu2 = input("Do you want more tweets older than " + str(oldest_at) + "(Y/N) :") qu2 = qu2.upper() if qu2 == "Y": result = api.user_timeline( screen_name="@realdonaldtrump", count=200, max_id=oldest ) ###retrieve the tweet older then the given tweet id for data in result: tweets.append(data) ### appended the new tweets retrived oldest = tweets[-1].id oldest_at = tweets[-1].created_at elif qu2 == "N": qu = False else: print("invalid input \n" "Taking default input as 'No' ") qu = False count = -1 for tweet1 in tweets: count = count + 1 ###starts the list from beginning tweet = tweet1.text tweet = re.sub(r"http\S+", "", tweet) ##### removes the URL from the text of tweet tweets[count].text = tweet for keyword in query: for data in Tweets: tex = TextBlob(data.text) qw = tex.find(keyword) if qw != -1: tweets2.append(data) else: pass if len(tweets2) > 0: for data in tweets: tweet = TextBlob(data.text) print(tweet) print(tweet.sentiment) print("Done at :" + str(data.created_at)) print("Username :"******"---------------------------------------------") else: print("No tweet found with the related keywords")
def main(argv): """ Reads in the filepath and starts analysis. """ if len(argv) > 1: # Input via arguments # TODO: Doesn't work yet filepath = str(argv[1:]) else: # Input via console if sys.version_info[0] < 3: # <- python 2 filepath = str(raw_input("Filepath: ")) else: filepath = str(input("Filepath: ")) # Settings language = 'de' # Data paths attentionfile = 'data/' + language + '/attentionwords.txt' ngramfile = 'data/' + language + '/ngramlist.txt' # Test path # filepath = '../tests/test.docx' # Load textfile and convert to plain text text = pypandoc.convert_file(filepath, 'plain') # Load attentionwords as list attentionwords = open(attentionfile, encoding='utf-8').read().splitlines() # Load ngramlist as list ngramlist = open(ngramfile, encoding='utf-8').read().splitlines() # Create a textblob to work with blob = TextBlob(text) # Contains all the words wordlist = blob.words # Unordered set (unique words) wordset = list(set(wordlist)) # Contains all the sentences sentences = blob.sentences # Contains all the ngrams ngrams = blob.ngrams(n=2) ## Print all the sentences #for sentence in sentences: # print(sentence) # create wordtable wordtable = [] for word in wordset: wordtable.append( [word, str(wordlist.count(word)), str(blob.find(word))]) # sort by amount wordtable = sorted(wordtable, key=lambda word: int(word[1]), reverse=True) # print amount table print(tabulate(wordtable)) print("\n") # print attentionswords list and position where found in text attentiontable = [] for word in attentionwords: attentiontable.append([word, str(blob.find(word))]) # sort by position attentiontable = sorted(attentiontable, key=lambda word: int(word[1])) # print amount table print(tabulate(attentiontable)) print("\n") # print the ngrams which are intersting phrasetable = [] for ngram in ngrams: if (ngram[0].lower() == ngram[1].lower()) | (ngram[0].lower() in ngramlist): phrase = ' '.join(str(word) for word in ngram) phrasetable.append([phrase, str(blob.find(phrase))]) # sort by position phrasetable = sorted(phrasetable, key=lambda word: int(word[1])) # print amount table print(tabulate(phrasetable)) print("\n")
access_token = '' access_token_secret = '' auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api=tweepy.API(auth) TweetName = sys.argv[1] public_tweets = api.search(TweetName) ptweets =[] for tweet in public_tweets: #print(tweet.text) tw = TextBlob(tweet.text) ptweets.append(tw) if '/' in tw: tw=tw[:tw.find('/')] print(tw) print('*------------------------------------------------------**') print('') print(TweetName) sys.stdout.flush()
sent = TextBlob("I haawve goood speling") correct_sent = sent.correct() w = Word("haave") spellcheck = w.spellcheck() #Get Word and Noun Phrase Frequencies words = TextBlob('We are no longer together. We are enemies now.') word_counts = words.word_counts #You can specify whether or not the search should be case-sensitive (default is False). #Translation and Language Detection en_blob = TextBlob("You are my best friend") pl_blob = en_blob.translate(to='pl') blob = TextBlob("Mam na imię Piotr") detected_lang = blob.detect_language() #Parsing text = TextBlob('I know You') text_parse = text.parse() #string text = TextBlob("Hello World") upper_text = text.upper() find_world = text.find("World") #ngrams blob = TextBlob("Now is better than never.") ngram = blob.ngrams(n=3)
def DownloadFullHistoryDataSeries(self, Index, FinancialIndex=False, Commodity=False, Currency=False, Bond=False, Etf=False): # define functions to click and pause def pause(wait=2): sleep(wait) def click(): m.press(Button.left) pause() m.release(Button.left) def rclick(): m.press(Button.right) pause() m.release(Button.right) m = mouse.Controller() Button = mouse.Button k = keyboard.Controller() Key = keyboard.Key # open browser and get the page, click on space in order to move down and # visualize the object driver = webdriver.Firefox() if FinancialIndex == True: PageUrl = r'https://www.investing.com/indices/' + str( Index) + r'-historical-data' if Commodity == True: PageUrl = r'https://www.investing.com/currencies/' + str( Index) + r'-historical-data' if Currency == True: PageUrl = r'https://www.investing.com/quotes/' + str( Index) + r'-historical-data' if Bond == True: PageUrl = r'https://www.investing.com/rates-bonds/' + str( Index) + r'-historical-data' if Etf == True: PageUrl = r'https://www.investing.com/etfs/' + str( Index) + r'-historical-data' driver.get(PageUrl) pause(2) # move down k.press(Key.space) k.release(Key.space) pause(2) # click on the DatePicker table in order # to open the tables to select the dates # ClickPosition = (824, 267) if FinancialIndex == True: ClickPosition = (825, 317) if Commodity == True: ClickPosition = (824, 286) if Currency == True: ClickPosition = (824, 269) if Bond == True: ClickPosition = (825, 268) if Etf == True: ClickPosition = (824, 317) m.position = ClickPosition pause(1) click() # write the date into the start date DatePicker # do not need to write all the date, only the year that we want # in this case 2000 because all the time series will start from there pause(2) k.type('2001') pause() k.press(Key.enter) # click on Apply in order to made the changes #ApplyPosition = (806,470) if FinancialIndex == True: ApplyPosition = (802, 526) if Commodity == True: ApplyPosition = (806, 495) if Currency == True: ApplyPosition = (806, 479) if Bond == True: ApplyPosition = (806, 481) if Etf == True: ApplyPosition = (806, 527) m.position = ApplyPosition pause(2) click() # Select and Copy to Clipboard # Used this way instead of use Beautiful Soup # because it did not work properly (it requires too much time to write that correctly) # so I decided to simply select everything and right click and copy to clipboard # after I will get only the historical table copied to clipboard pause(10) k.press(Key.ctrl) k.press("a") k.release(Key.ctrl) k.release("a") pause() CenterPage = (562, 518) m.position = CenterPage rclick() pause() CopyPosition = (621, 536) m.position = CopyPosition click() pause() ClipboardData = self.GetClipboardData() # Convert the Data copied into the clipboard to TextBlob FXTimeSeriesText = TextBlob(ClipboardData) # find the position of the first word in the table FirstWord = FXTimeSeriesText.find("Change %") + 10 # find the position of the last word LastWord = FXTimeSeriesText.find("Highest") - 2 # select the table FXTable = FXTimeSeriesText[FirstWord:LastWord] # convert to a list ListFXTable = [] for i in range(0, len(FXTable.words), 8): ListFXTable.append(list(FXTable.words[i:(i + 8)])) # convert to a PandaDataframe if Currency == True: FXTable = re.sub('\t- ', '\tNA', str(FXTable)) FXTable = TextBlob(FXTable) ListFXTable = [] for i in range(0, len(FXTable.words), 9): ListFXTable.append(list(FXTable.words[i:(i + 9)])) if Etf == True: FXTable = re.sub('\t- ', '\tNA', str(FXTable)) FXTable = TextBlob(FXTable) ListFXTable = [] for i in range(0, len(FXTable.words), 9): ListFXTable.append(list(FXTable.words[i:(i + 9)])) FXTab = pd.DataFrame(ListFXTable) # close the browser driver.close() # format the dataset # Copy the Dataset for Currency (it's not beautiful but it's doing its work) FXTab2 = FXTab.copy() if Currency == True: FXTab2.columns = [ 'Date1', 'Date2', 'Date3', 'Price', 'Open', 'Max', 'Min', 'Volume', 'Var' ] FXTab2[ 'Date'] = FXTab2['Date1'] + FXTab2['Date2'] + FXTab2['Date3'] FXTab2['Date'] = pd.to_datetime(FXTab2['Date'], format="%b%d%Y", errors='coerce') FXTab2 = FXTab2[['Date', 'Price', 'Open', 'Max', 'Min']] FXTab2['Price'] = pd.to_numeric(FXTab2['Price'].str.replace( ',', ''), errors='coerce') FXTab2['Open'] = pd.to_numeric(FXTab2['Open'].str.replace(',', ''), errors='coerce') FXTab2['Max'] = pd.to_numeric(FXTab2['Max'].str.replace(',', ''), errors='coerce') FXTab2['Min'] = pd.to_numeric(FXTab2['Min'].str.replace(',', ''), errors='coerce') return FXTab2 if Etf == True: FXTab2.columns = [ 'Date1', 'Date2', 'Date3', 'Price', 'Open', 'Max', 'Min', 'Volume', 'Var' ] FXTab2[ 'Date'] = FXTab2['Date1'] + FXTab2['Date2'] + FXTab2['Date3'] FXTab2['Date'] = pd.to_datetime(FXTab2['Date'], format="%b%d%Y", errors='coerce') FXTab2 = FXTab2[['Date', 'Price', 'Open', 'Max', 'Volume', 'Min']] FXTab2['Price'] = pd.to_numeric(FXTab2['Price'].str.replace( ',', ''), errors='coerce') FXTab2['Open'] = pd.to_numeric(FXTab2['Open'].str.replace(',', ''), errors='coerce') FXTab2['Max'] = pd.to_numeric(FXTab2['Max'].str.replace(',', ''), errors='coerce') FXTab2['Min'] = pd.to_numeric(FXTab2['Min'].str.replace(',', ''), errors='coerce') return FXTab2 FXTab.columns = [ 'Date1', 'Date2', 'Date3', 'Price', 'Open', 'Max', 'Min', 'Var' ] FXTab['Date'] = FXTab['Date1'] + FXTab['Date2'] + FXTab['Date3'] FXTab['Date'] = pd.to_datetime(FXTab['Date'], format="%b%d%Y", errors='coerce') FXTab = FXTab[['Date', 'Price', 'Open', 'Max', 'Min']] FXTab['Price'] = pd.to_numeric(FXTab['Price'].str.replace(',', ''), errors='coerce') FXTab['Open'] = pd.to_numeric(FXTab['Open'].str.replace(',', ''), errors='coerce') FXTab['Max'] = pd.to_numeric(FXTab['Max'].str.replace(',', ''), errors='coerce') FXTab['Min'] = pd.to_numeric(FXTab['Min'].str.replace(',', ''), errors='coerce') # Save into the Database ('Same collection') # if Update.lower() in ('true','yes','t','y'): # Tab = self.UpdateTimeSeries(FXTab,'MarketData',str(BaseCurrency).upper() +str(SecondCurrency).upper()) # elif Update.lower() in ('false','no','f','n'): # Tab = self.UploadTimeSeries(FXTab,'MarketData',str(BaseCurrency).upper() +str(SecondCurrency).upper()) # else: # raise argparse.ArgumentTypeError('Boolean Value Expected with '' ') return FXTab
def DownloadFullHistoryFXDataSeries(self,BaseCurrency,SecondCurrency,Update): # define functions to click and pause def pause(wait = 2): sleep(wait) def click(): m.press(Button.left) pause() m.release(Button.left) def rclick(): m.press(Button.right) pause() m.release(Button.right) m = mouse.Controller() Button = mouse.Button k = keyboard.Controller() Key = keyboard.Key # open browser and get the page, click on space in order to move down and # visualize the object driver = webdriver.Firefox() PageUrl = r'https://it.investing.com/currencies/' + str(BaseCurrency) +r'-'+str(SecondCurrency)+r'-historical-data#datePickerIconWrap' driver.get(PageUrl) pause(2) # move down k.press(Key.space) k.release(Key.space) pause(2) # click on the DatePicker table in order # to open the tables to select the dates ClickPosition = (825, 341) m.position = ClickPosition pause(1) click() # write the date into the start date DatePicker # do not need to write all the date, only the year that we want # in this case 2000 because all the time series will start from there pause(2) k.type('2000') pause() k.press(Key.enter) # click on Apply in order to made the changes ApplyPosition = (800,551) m.position = ApplyPosition pause(2) click() # Select and Copy to Clipboard # Used this way instead of use Beautiful Soup # because it did not work properly (it requires too much time to write that correctly) # so I decided to simply select everything and right click and copy to clipboard # after I will get only the historical table copied to clipboard pause(10) k.press(Key.ctrl) k.press("a") k.release(Key.ctrl) k.release("a") pause() CenterPage = (562,518) m.position = CenterPage rclick() pause() CopyPosition = (621,536) m.position = CopyPosition click() pause() ClipboardData = self.GetClipboardData() # Convert the Data copied into the clipboard to TextBlob FXTimeSeriesText = TextBlob(ClipboardData) # find the position of the first word in the table FirstWord = FXTimeSeriesText.find("Data") # find the position of the last word LastWord = FXTimeSeriesText.find("Media") - 55 # select the table FXTable = FXTimeSeriesText[FirstWord:LastWord] # convert to a list ListFXTable = [] for i in range(0,len(FXTable.words),6): ListFXTable.append(list(FXTable.words[i:(i+5)])) # convert to a PandaDataframe FXTab = pd.DataFrame(ListFXTable) # rename columns FXTab.columns = FXTab.iloc[0] FXTab = FXTab[1:] # close the browser driver.close() # format the dataset FXTab.columns = ['Date','Price','Open','Max','Min'] FXTab['Date'] = pd.to_datetime(FXTab['Date'], format="%d.%m.%Y") FXTab['Price'] = pd.to_numeric(FXTab['Price'].str.replace(',','.')) FXTab['Open'] = pd.to_numeric(FXTab['Open'].str.replace(',','.')) FXTab['Max'] = pd.to_numeric(FXTab['Max'].str.replace(',','.')) FXTab['Min'] = pd.to_numeric(FXTab['Min'].str.replace(',','.')) # Save into the Database ('Same collection') if Update.lower() in ('true','yes','t','y'): Tab = self.UpdateTimeSeries(FXTab,'MarketData',str(BaseCurrency).upper() +str(SecondCurrency).upper()) elif Update.lower() in ('false','no','f','n'): Tab = self.UploadTimeSeries(FXTab,'MarketData',str(BaseCurrency).upper() +str(SecondCurrency).upper()) else: raise argparse.ArgumentTypeError('Boolean Value Expected with '' ') return Tab