Exemplo n.º 1
0
def lexical_diversity(text, count=100):
    try:
        tweets_text = get_tweets(text,int(count))       # List of the text in all the resulting tweets
        settings = loadSettings()                       # Instance of the settings
        lexicalValue_list = []                          # List of all the lexical diversity values calculated
        number_list = []                                # List of of all the number of tweets
        sum = 0                                         # Sum of all the lexical diversity values
        number = 1                                      # Counter of texts

        # For the text in a list of all the texts
        for text in tweets_text:
            words = []
            # For all the words in the text
            for w in text.split():
                words.append(w)
            lexicalValue = 1.0*len(set(words))/len(words)      # Calculate the lexical diversity
            lexicalValue_list.append(lexicalValue)
            sum += lexicalValue
            number_list.append(number)
            if settings["verbose"] == True:
                print("Lexical Diversity value from 0 to 1 is " + str(lexicalValue))
            number += 1
        if settings["verbose"] == True:
                print("The average lexical value is " + str(sum/len(lexicalValue_list)))

        # Scatter Plot of the data
        plt.xlabel("Number of Tweets")
        plt.ylabel("Lexical Diversity")
        plt.scatter(number_list, lexicalValue_list)
        plt.show

    except tweepy.TweepError as e:
            print(e.reason)
Exemplo n.º 2
0
def main(argv):
    print('ANCLA Command Line')

    while (True):
        settings = loadSettings()
        print("\nCurrent settings:\n" + str(settings))
        user_input = input("-> ")
        if user_input.lower() == "q" or user_input.lower(
        ) == "quit" or user_input.lower() == "exit":
            break

        if user_input.lower() == "help" or user_input.lower(
        ) == "manual" or user_input.lower() == "man":
            with open("help/manual.txt", 'r') as helpFile:
                for line in helpFile:
                    print(line)
        else:
            #Boilerplate code for antlr4
            i_stream = InputStream(user_input)
            lexer = ANCLALexer(i_stream)
            stream = CommonTokenStream(lexer)
            parser = ANCLAParser(stream)
            parser.buildParseTrees = True
            #Here we parse our input as a line
            tree = parser.exp()

            if parser.getNumberOfSyntaxErrors() < 1:
                ANCLA = CustomANCLAListener()
                walker = ParseTreeWalker()
                walker.walk(ANCLA, tree)
Exemplo n.º 3
0
def writeToFile(data, folder):
    settings = loadSettings()
    if settings['datalog'] == True:
        if not os.path.exists('data/' + folder):
            os.makedirs('data/' + folder)
        name = str(datetime.today().strftime('%d-%m-%Y')) + "_" + str(time.strftime("%Hh-%Mm-%Ss"))+ ".json"
        with open("data/" + folder + "/" + name, 'w') as outfile:
            json.dump(data, outfile, indent=4)
Exemplo n.º 4
0
def getTweets(keyword, tweetNumber = 100, backup = 10):
    if backup == '0':
        backup = '1'

    tweetNumber = int(tweetNumber)

    number = 1
    settings = loadSettings()
    tweetJSON = {}
    tweetJSON['Tweets'] = []

    for tweet in tweepy.Cursor(api.search, keyword, lang="en").items(tweetNumber):
        try:

            if settings["verbose"] == True:
                #Current count        
                print("Tweet: " + str(number))
                #Tweet text
                print(tweet.text)
            
            tweetJSON['Tweets'].append({
                'tweet_id': tweet.id,
                'tweet_created': str(tweet.created_at),
                'text': tweet.text,
                'favorites': tweet.favorite_count,
                'retweets': tweet.retweet_count,
                'user_name': tweet.user.name,
                'user_handle': tweet.user.screen_name,
                'verified': tweet.user.verified,
                'followers': tweet.user.followers_count,
                'friends': tweet.user.friends_count,
                'user_likes': tweet.user.favourites_count,
                'user_tweets': tweet.user.statuses_count,
                'user_created': str(tweet.user.created_at)    
            })

            number = number + 1

            if number == number % int(backup) and settings["datalog"] == True:
                writeToFile(tweetJSON, 'search-tweets')

        except tweepy.TweepError as e:
            print(e.reason)
        
        except StopIteration:
            break

    if settings["datalog"] == True:
        writeToFile(tweetJSON, 'search-tweets')
       
    return tweetJSON
Exemplo n.º 5
0
    def on_data(self, data):
        all_data = json.loads(data)
        tweet = all_data["text"]
        tweet = " ".join(re.findall("[a-zA-Z]+", tweet))
        blob = TextBlob(tweet.strip())

        global initime
        global positive
        global negative
        global compound
        global count
        global minusx
        global x
        global minusy
        global y
        global settings

        count = count + 1

        if count == 1:
            initime = time.time()
            settings = loadSettings()
            x = 70
            minusy = -20
            y = 20

        t = int(calctime(initime))

        senti = 0
        for sen in blob.sentences:
            senti = senti + sen.sentiment.polarity
            if sen.sentiment.polarity >= 0:
                positive = positive + sen.sentiment.polarity
            else:
                negative = negative + sen.sentiment.polarity

        compound = compound + senti

        if settings["verbose"] == True:
            #Time in seconds
            print("Time: " + str(t))
            #Current count
            print("Tweet: " + str(count))
            #Tweet text
            print(tweet)
            #Sentiment value
            print("Sentiment value: " + str(senti) + "\n")
            #Sentiment values
            #print(str(positive) + ' ' + str(negative) + ' ' + str(compound))

        tweetJSON['Tweets'].append({
            'tweet_id':
            all_data['id'],
            'tweet_created':
            all_data['created_at'],
            'text':
            all_data['text'],
            'sentiment':
            senti,
            'favorites':
            all_data['favorite_count'],
            'retweets':
            all_data['retweet_count'],
            'replies':
            all_data['reply_count'],
            'user_name':
            all_data['user']['name'],
            'user_handle':
            all_data['user']['screen_name'],
            'verified':
            all_data['user']['verified'],
            'followers':
            all_data['user']['followers_count'],
            'friends':
            all_data['user']['friends_count'],
            'user_likes':
            all_data['user']['favourites_count'],
            'user_tweets':
            all_data['user']['statuses_count'],
            'user_created':
            all_data['user']['created_at']
        })

        if count % self.backup == 0 and settings["datalog"] == True:
            writeToFile(tweetJSON, 'live-sentiment')

        if t > x:
            x = t + 5
        if negative < minusy:
            minusy = negative - 5
        if positive > y:
            y = positive + 5

        if count == 1:
            minusx = t

        if not plt.fignum_exists(1) and count != 1:
            count = 0
            return False

        plt.axis([minusx, x, minusy, y])
        plt.title("Live Tweet Sentiment Analysis")
        plt.xlabel('Time')
        plt.ylabel('Sentiment')
        plt.plot([t], [positive], 'bo', [t], [negative], 'ro', [t], [compound],
                 'mo')
        plt.figure(num=1)
        plt.pause(0.0001)

        if self.maxTweets != 0:
            if count == int(self.maxTweets):
                count = 0
                return False

        if self.maxTime != 0:
            if t >= self.maxTime:
                count = 0
                return False
            else:
                return True
Exemplo n.º 6
0
from textblob import TextBlob
from tweepy import Stream
from tweepy import API
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
from settingHandler import loadSettings
from writeFile import writeToFile
from loadCreds import loadCreds

positive = 0
negative = 0
compound = 0
initime = 0
count = 0
plt.ion()
settings = loadSettings()

tweetJSON = {}
tweetJSON['Tweets'] = []


def calctime(a):
    return time.time() - a


class listener(StreamListener):
    def __init__(self, count, maxTweets, maxTime, backup):
        self.count = count
        self.maxTweets = maxTweets
        self.maxTime = maxTime
        self.backup = backup
Exemplo n.º 7
0
def runSentimentAnalysis(keyword, tweetNumber=100, backup=10):
    if backup == '0':
        backup = '1'

    tweetNumber = int(tweetNumber)

    polarity_list = []
    numbers_list = []
    number = 1
    settings = loadSettings()
    tweetJSON = {}
    tweetJSON['Tweets'] = []

    for tweet in tweepy.Cursor(api.search, keyword,
                               lang="en").items(tweetNumber):
        try:

            analysis = TextBlob(tweet.text)
            analysis = analysis.sentiment
            polarity = analysis.polarity
            polarity_list.append(polarity)
            numbers_list.append(number)

            if settings["verbose"] == True:
                #Current count
                print("Tweet: " + str(number))
                #Tweet text
                print(tweet.text)
                #Sentiment value
                print("Sentiment value: " + str(polarity) + "\n")

            tweetJSON['Tweets'].append({
                'tweet_id':
                tweet.id,
                'tweet_created':
                str(tweet.created_at),
                'text':
                tweet.text,
                'polarity':
                polarity,
                'favorites':
                tweet.favorite_count,
                'retweets':
                tweet.retweet_count,
                'user_name':
                tweet.user.name,
                'user_handle':
                tweet.user.screen_name,
                'verified':
                tweet.user.verified,
                'followers':
                tweet.user.followers_count,
                'friends':
                tweet.user.friends_count,
                'user_likes':
                tweet.user.favourites_count,
                'user_tweets':
                tweet.user.statuses_count,
                'user_created':
                str(tweet.user.created_at)
            })

            number = number + 1

            if number == number % int(backup) and settings["datalog"] == True:
                writeToFile(tweetJSON, 'analyze-sentiment')

        except tweepy.TweepError as e:
            print(e.reason)

        except StopIteration:
            break

    if settings["datalog"] == True:
        writeToFile(tweetJSON, 'analyze-sentiment')

    #Here we define axes
    plt.figure(num=1)
    plt.ion()
    axes = plt.gca()
    axes.set_ylim([-1, 2])

    plt.scatter(numbers_list, polarity_list)

    #Here we calculate imporant info to show on a plt box
    averagePolarity = (sum(polarity_list)) / (len(polarity_list))
    averagePolarity = "{0:.0f}%".format(averagePolarity * 100)
    time = datetime.now().strftime("At: %H:%M\nOn: %m-%d-%y")
    plt.text(1,
             1.25,
             "Average Polarity:  " + str(averagePolarity) + "\n" + time,
             fontsize=12,
             bbox=dict(facecolor='none',
                       edgecolor='black',
                       boxstyle='square, pad = 1'))

    plt.title("Sentiment of " + keyword + " on Twitter")
    plt.xlabel("Number of Tweets")
    plt.ylabel("Sentiment")

    while plt.fignum_exists(1):
        plt.show()
        plt.pause(0.0001)

    return tweetJSON
Exemplo n.º 8
0
def runComptetitorAnalysis(usr, cmptr):
    settings = loadSettings()
    tweetJSON = {}
    tweetJSON['Tweets'] = []

    user = api.get_user(screen_name=usr)

    competitor = api.get_user(screen_name=cmptr)

    userVals = [
        "@" + user.screen_name, user.followers_count, user.statuses_count,
        user.favourites_count, user.friends_count
    ]
    competitorVals = [
        "@" + competitor.screen_name, competitor.followers_count,
        competitor.statuses_count, competitor.favourites_count,
        competitor.friends_count
    ]
    cat = [
        "User Handle: ", "Followers: ", "Tweets: ", "FavesGiven: ", "Friends: "
    ]
    #userFollowers = (user.followers_count / (user.followers_count + competitor.followers_count)) * 100
    #competitorFollowers = (competitor.followers_count / (user.followers_count + competitor.followers_count)) * 100

    #userTweets = (user.statuses_count / (user.statuses_count + competitor.statuses_count)) * 100
    #competitorTweets = (competitor.statuses_count / (user.statuses_count + competitor.statuses_count)) * 100

    #userLikes = (user.favourites_count / (user.favourites_count + competitor.favourites_count)) * 100
    #competitorLikes = (competitor.favourites_count / (user.favourites_count + competitor.favourites_count)) * 100

    #userFriends = (user.friends_count / (user.friends_count + competitor.friends_count)) * 100
    #competitorFriends = (competitor.friends_count / (user.friends_count + competitor.friends_count)) * 100

    extra = 0
    for userV in userVals:
        if len(str(userV)) > extra:
            extra = len(str(userV))

    if extra < 4:
        extra = 0
    else:
        extra -= 4

    #print("User" + " " * extra + " Competitor")
    for userV, compV, c in zip(userVals, competitorVals, cat):
        print(c + str(userV) + " | " + str(compV))

    tweetJSON['Tweets'].append({
        'id': user.id,
        'created_at': str(user.created_at),
        'description': user.description,
        'user_name': user.name,
        'user_handle': user.screen_name,
        'verified': user.verified,
        'followers': user.followers_count,
        'friends': user.friends_count,
        'likes': user.favourites_count,
        'tweets': user.statuses_count,
    })

    tweetJSON['Tweets'].append({
        'id': competitor.id,
        'created_at': str(competitor.created_at),
        'description': competitor.description,
        'user_name': competitor.name,
        'user_handle': competitor.screen_name,
        'verified': competitor.verified,
        'followers': competitor.followers_count,
        'friends': competitor.friends_count,
        'likes': competitor.favourites_count,
        'tweets': competitor.statuses_count,
    })
    '''
    N = 4
    ind = np.arange(N)  # the x locations for the groups
    width = 0.27       # the width of the bars

    fig = plt.figure()
    ax = fig.add_subplot(111)

    userPercents = [userFollowers, userTweets, userLikes, userFriends]
    rects1 = ax.bar(ind, userPercents, width, color='b')
    competitorPercents = [competitorFollowers, competitorTweets, competitorLikes, competitorFriends]
    rects2 = ax.bar(ind+width, competitorPercents, width, color='r')

    ax.set_ylabel('Percentage')
    ax.set_xticks(ind+width)
    ax.set_xticklabels( ('Followers', 'Tweets', 'Likes', 'Friends') )
    ax.legend( (rects1[0], rects2[0]), (usr, cmptr) )

    print(userVals)
    print(userPercents)

    def autolabel(rects, vals):
        for rect, val in zip(rects, vals):
            print(val)
            h = rect.get_height()
            ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, val,
                    ha='center', va='bottom')

    autolabel(rects1, userVals)
    autolabel(rects2, competitorVals)

    plt.show()
    '''

    if settings["datalog"] == True:
        writeToFile(tweetJSON, 'analyze-sentiment')

    return tweetJSON