def return_train_tweets(self,filter=None): if os.path.isfile('tweets_train'+filter+'.pkl') : tweets= joblib.load('tweets_train'+filter+'.pkl') return tweets tweets=[] if filter != None: self.cur.execute("SELECT `ID`, `Target`, `Tweet`, `Stance`,`Opinion Towards`, `Sentiment` FROM `data_training` where target=%s order by id",(filter)) else: self.cur.execute("SELECT `ID`, `Target`, `Tweet`, `Stance`,`Opinion Towards`, `Sentiment` FROM `data_training` order by id") for tweet in self.cur.fetchall(): sentiment= return_labeled_sentiment(tweet[5]) opiniontowards= return_labeld_opinion_toward(tweet[4]) this_tweet=make_tweet(tweet[0], tweet[2],tweet[1],opiniontowards,sentiment,stance= tweet[3]) tweets.append(this_tweet) joblib.dump(tweets, 'tweets_train'+filter+'.pkl') return tweets
def return_tweets_irony(self,filter=None): if os.path.isfile('tweets_irony'+filter+'.pkl') : tweets= joblib.load('tweets_irony'+filter+'.pkl') return tweets tweets=[] if filter != None: self.cur.execute("SELECT `ID`, `Target`, `Tweet`, `Irony` FROM `data_ironic` where target=%s order by id",(filter)) else: self.cur.execute("SELECT `ID`, `Target`, `Tweet`, `Irony` FROM `data_ironic` order by id") for tweet in self.cur.fetchall(): irony=tweet[3] truesentiment = None opiniontowards = None this_tweet=make_tweet(tweet[0], tweet[2], tweet[1], opiniontowards, truesentiment,irony=irony) tweets.append(this_tweet) joblib.dump(tweets, 'tweets_irony'+filter+'.pkl') return tweets
def return_tweets_ids(self, language, target, ids, set=None): where = "WHERE id in " + str(ids) + " " if set is not None: where += " and `set`='" + set + "'" tweets = [] self.cur.execute(" SELECT id, tweet, pos, stance from " + target + "_" + language + " " + where + " order by id") i = 0 not_founds = 0 for tweet in self.cur.fetchall(): i += 1 tweet_id = tweet[0] text = tweet[1] pos = tweet[2] stance = tweet[3] this_tweet = make_tweet(tweet_id, text, pos, stance, language, target) tweets.append(this_tweet) return tweets
def return_tweets_test(self, phase=None): #if os.path.isfile('tweets.pkl') : # tweets= joblib.load('tweets.pkl') # return tweets tweets = [] if phase is None: self.cur.execute( " SELECT `id_tweet`, `text_tweet`, `text_retweet`, `text_reply`, `text_reply_to`, `stance`, `phase`, `user_id` FROM `corpus_automatic_stance` where stance !='disagreement'" ) else: self.cur.execute( " SELECT `id_tweet`, `text_tweet`, `text_retweet`, `text_reply`, `text_reply_to`, `stance`, `phase`, `user_id` FROM `corpus_automatic_stance` where stance !='disagreement' and phase=%s", phase) i = 0 for row in self.cur.fetchall(): i += 1 id = row[0] tweet = row[1] retweet = row[2] reply = row[3] reply_to = row[4] stance = row[5] phase = row[6] user_id = row[7] this_tweet = make_tweet(object, id, user_id, tweet, retweet, reply, reply_to, stance, phase) tweets.append(this_tweet) #joblib.dump(tweets, 'tweets.pkl') return tweets
def return_tweets_test(self): """Return an array containing tweets. Tweets are encoded as Tweet objects. """ """ You could recover tweets from db or csv file """ tweets=[] self.cur.execute("SELECT `ID`, `Text`,`Label` FROM `test` where target = 'Hillary Clinton'") for tweet in self.cur.fetchall(): id=tweet[0] text=tweet[1] label=tweet[2] """ Create a new istance of a Tweet object """ this_tweet=make_tweet(id, text, label) tweets.append(this_tweet) return tweets
def return_tweets_training(self): """Return an array containing tweets. Tweets are encoded as Tweet objects. """ """ You could recover tweets from db or csv file """ if os.path.isfile('..........'): tweets= joblib.load('..........') return tweets tweets = [] filelist = sorted(glob.glob(".........")) for file in filelist: first = True csvfile=open(file, newline='') spamreader = csv.reader(csvfile, delimiter='\t', quotechar='"') for tweet in spamreader: if not first: id = tweet[0] text = tweet[1] # language = file.split(".")[1] # topic = tweet[4] label = tweet[2] """ Create a new instance of a Tweet object """ this_tweet = make_tweet(id, text, label) tweets.append(this_tweet) first = False joblib.dump(tweets, '..........') return tweets
def return_tweets_training(self): """Returns an array containing a list of trainig tweets. Tweets are encoded as Tweet objects. """ tweets = [] csvfile = open("../data/TRAIN.csv") next(csvfile) #skip header spamreader = csv.reader(csvfile, delimiter=',', quotechar='"') for tweet in spamreader: id = tweet[0] user_id = tweet[1] text = tweet[2] label = tweet[3] """ Create a new istance of a Tweet object """ this_tweet = make_tweet(id, user_id, text, label) tweets.append(this_tweet) return tweets
def return_tweets(self, language, target, set=None): if os.path.isfile('tweets_' + language + '_' + target + '_' + str(set) + '.pkl'): tweets = joblib.load('tweets_' + language + '_' + target + '_' + str(set) + '.pkl') return tweets where = "" if set is not None: where = " WHERE `set`='" + set + "'" tweets = [] self.cur.execute(" SELECT id, tweet, pos, stance from " + target + "_" + language + " " + where + " order by id") i = 0 not_founds = 0 for tweet in self.cur.fetchall(): i += 1 print(i, not_founds) tweet_id = tweet[0] text = tweet[1] pos = tweet[2] stance = tweet[3] this_tweet = make_tweet(tweet_id, text, pos, stance, language, target) tweets.append(this_tweet) joblib.dump( tweets, 'tweets_' + language + '_' + target + '_' + str(set) + '.pkl') return tweets