Пример #1
0
    def return_train_tweets(self,filter=None):


        if os.path.isfile('tweets_train'+filter+'.pkl') :
            tweets= joblib.load('tweets_train'+filter+'.pkl')
            return tweets


        tweets=[]
        if filter != None:
            self.cur.execute("SELECT `ID`, `Target`, `Tweet`, `Stance`,`Opinion Towards`, `Sentiment` FROM `data_training` where target=%s  order by id",(filter))
        else:
            self.cur.execute("SELECT `ID`, `Target`, `Tweet`, `Stance`,`Opinion Towards`, `Sentiment` FROM `data_training`  order by id")

        for tweet in self.cur.fetchall():

                sentiment= return_labeled_sentiment(tweet[5])
                opiniontowards= return_labeld_opinion_toward(tweet[4])


                this_tweet=make_tweet(tweet[0], tweet[2],tweet[1],opiniontowards,sentiment,stance= tweet[3])

                tweets.append(this_tweet)


        joblib.dump(tweets, 'tweets_train'+filter+'.pkl')

        return tweets
Пример #2
0
    def return_tweets_irony(self,filter=None):


        if os.path.isfile('tweets_irony'+filter+'.pkl') :
            tweets= joblib.load('tweets_irony'+filter+'.pkl')
            return tweets


        tweets=[]
        if filter != None:
            self.cur.execute("SELECT `ID`, `Target`, `Tweet`, `Irony` FROM `data_ironic` where target=%s order by id",(filter))
        else:
            self.cur.execute("SELECT `ID`, `Target`, `Tweet`, `Irony` FROM `data_ironic` order by id")

        for tweet in self.cur.fetchall():


                irony=tweet[3]
                truesentiment  = None
                opiniontowards = None

                this_tweet=make_tweet(tweet[0], tweet[2], tweet[1], opiniontowards, truesentiment,irony=irony)

                tweets.append(this_tweet)


        joblib.dump(tweets, 'tweets_irony'+filter+'.pkl')

        return tweets
    def return_tweets_ids(self, language, target, ids, set=None):

        where = "WHERE id in " + str(ids) + " "

        if set is not None:
            where += " and  `set`='" + set + "'"

        tweets = []
        self.cur.execute(" SELECT id, tweet, pos,  stance from " + target +
                         "_" + language + " " + where + "  order by id")
        i = 0
        not_founds = 0
        for tweet in self.cur.fetchall():
            i += 1
            tweet_id = tweet[0]

            text = tweet[1]
            pos = tweet[2]
            stance = tweet[3]

            this_tweet = make_tweet(tweet_id, text, pos, stance, language,
                                    target)

            tweets.append(this_tweet)

        return tweets
Пример #4
0
    def return_tweets_test(self, phase=None):

        #if os.path.isfile('tweets.pkl') :
        #    tweets= joblib.load('tweets.pkl')
        #    return tweets

        tweets = []
        if phase is None:
            self.cur.execute(
                " SELECT  `id_tweet`, `text_tweet`, `text_retweet`, `text_reply`, `text_reply_to`, `stance`, `phase`, `user_id` FROM `corpus_automatic_stance` where stance !='disagreement'"
            )
        else:
            self.cur.execute(
                " SELECT  `id_tweet`,  `text_tweet`, `text_retweet`, `text_reply`, `text_reply_to`, `stance`, `phase`, `user_id` FROM `corpus_automatic_stance` where stance !='disagreement' and phase=%s",
                phase)
        i = 0
        for row in self.cur.fetchall():
            i += 1
            id = row[0]
            tweet = row[1]
            retweet = row[2]
            reply = row[3]
            reply_to = row[4]
            stance = row[5]
            phase = row[6]
            user_id = row[7]

            this_tweet = make_tweet(object, id, user_id, tweet, retweet, reply,
                                    reply_to, stance, phase)

            tweets.append(this_tweet)

        #joblib.dump(tweets, 'tweets.pkl')

        return tweets
Пример #5
0
    def return_tweets_test(self):
        """Return an array containing tweets.
           Tweets are encoded as Tweet objects.
        """
        """
         You could recover tweets from db or csv file

        """
        tweets=[]
        self.cur.execute("SELECT `ID`, `Text`,`Label`  FROM `test` where target = 'Hillary Clinton'")

        for tweet in self.cur.fetchall():
                id=tweet[0]
                text=tweet[1]
                label=tweet[2]

                """
                Create a new istance of a Tweet object
                """
                this_tweet=make_tweet(id, text, label)

                tweets.append(this_tweet)


        return tweets
Пример #6
0
    def return_tweets_training(self):
        """Return an array containing tweets.
           Tweets are encoded as Tweet objects.
        """
        """
         You could recover tweets from db or csv file

        """

        if os.path.isfile('..........'):
            tweets= joblib.load('..........')
            return tweets

        tweets = []

        filelist = sorted(glob.glob("........."))


        for file in filelist:
            first = True

            csvfile=open(file, newline='')
            spamreader = csv.reader(csvfile, delimiter='\t', quotechar='"')
            for tweet in spamreader:

                if not first:

                        id = tweet[0]
                        text = tweet[1]
                        # language = file.split(".")[1]
                        # topic = tweet[4]
                        label = tweet[2]

                        """
                        Create a new instance of a Tweet object
                        """
                        this_tweet = make_tweet(id, text, label)

                        tweets.append(this_tweet)

                first = False

        joblib.dump(tweets, '..........')

        return tweets
    def return_tweets_training(self):
        """Returns an array containing a list of trainig tweets.
           Tweets are encoded as Tweet objects.
        """
        tweets = []
        csvfile = open("../data/TRAIN.csv")
        next(csvfile)  #skip header
        spamreader = csv.reader(csvfile, delimiter=',', quotechar='"')

        for tweet in spamreader:
            id = tweet[0]
            user_id = tweet[1]
            text = tweet[2]
            label = tweet[3]
            """
                Create a new istance of a Tweet object
                """
            this_tweet = make_tweet(id, user_id, text, label)
            tweets.append(this_tweet)
        return tweets
    def return_tweets(self, language, target, set=None):

        if os.path.isfile('tweets_' + language + '_' + target + '_' +
                          str(set) + '.pkl'):
            tweets = joblib.load('tweets_' + language + '_' + target + '_' +
                                 str(set) + '.pkl')
            return tweets

        where = ""
        if set is not None:
            where = " WHERE `set`='" + set + "'"

        tweets = []
        self.cur.execute(" SELECT id, tweet, pos,  stance from " + target +
                         "_" + language + " " + where + "  order by id")
        i = 0
        not_founds = 0
        for tweet in self.cur.fetchall():
            i += 1
            print(i, not_founds)
            tweet_id = tweet[0]

            text = tweet[1]
            pos = tweet[2]
            stance = tweet[3]

            this_tweet = make_tweet(tweet_id, text, pos, stance, language,
                                    target)

            tweets.append(this_tweet)

        joblib.dump(
            tweets,
            'tweets_' + language + '_' + target + '_' + str(set) + '.pkl')

        return tweets