Пример #1
0
print('The consumer key must be of a Twitter developer account and the access key must be of the account you want to remove the followers of (they can be of the same account if you wish to remove the followers of your developer account).')
consumerKey = str(input('Enter your consumer key: '))
consumerSecret = str(input('Enter your consumer secret key: '))
accessKey = str(input('Enter your access key: '))
accessSecret = str(input('Enter your access secret key: '))

auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessKey, accessSecret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True)


ids = []

print('Starting to remove followers. You\'ll see their Twitter user ID printed out when they have been blocked and unblocked.')
for page in tweepy.Cursor(api.followers_ids, screen_name=screenname).pages():
    ids.extend(page)

for user in ids:
    try:
        api.create_block(user)
        print('Blocked', user)
    except:
        print('There was an error blocking the user with ID', user)
        continue
     
    try:
        api.destroy_block(user)
        print('Unblocked', user)
    except:
        print('There was an error unblocking the user with ID', user)
Пример #2
0
def sd_friend_scraper(target):
	for i in tweepy.Cursor(api.friends, target).items():
		sd_friends.append(i.id)
		print "successfully scraping"
Пример #3
0
    terms = [term for row in list(csvreader) for term in row]


#Defining labelling logic.
def labelTweet(tweet):
    return np.random.randint(2)


#Scrape result from terms
clear = lambda: os.system('clear')
from time import sleep
#from IPython.display import clear_output

tweets = {}
filename = "tweets.csv"
f = csv.writer(open(filename, "a"))
count = 0
for term in terms:
    for tweet in tweepy.Cursor(api.search, q=term, count=100,
                               lang="en").items():
        if tweet.text not in tweets:
            count = count + 1
            tweets[tweet.text] = labelTweet(tweet.text)
            f.writerow(
                ['''"''' + tweet.text + '''"''',
                 labelTweet(tweet.text)])
            print(term + " " + str(count))
            sleep(1)
            clear()
            #clear_output(wait = True)
Пример #4
0
def main():
    consumer_key = '1Zhm0krQV7hiP1fWnyYH5IZlx'
    consumer_secret = 'Pkyj4LbbNH4r2B9NJgWo1xFIbLv7nsCcJr6X18RwhqtfCIGj3N'

    access_token = '979802802364051456-Oo2zhibedp4EeBtzupxf7XBVeAM45br'
    access_token_secret = '3VrSDugBiQv1gV6a9mdFieSrEPRzijCOGcLlH10MOV5HI'

    auth =  tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)

    api = tweepy.API(auth)

    searchTerm = input('Enter the term/hashtag to search about: ')
    noOfSearchTerms = int(input('Enter how many Tweets to analyze: '))
    tweets = tweepy.Cursor(api.search, q = searchTerm, lang = 'en').items(noOfSearchTerms)

    positive = 0
    weaklyPositive = 0
    stronglyPositive = 0
    negative = 0
    weaklyNegative = 0
    stronglyNegative = 0
    neutral = 0
    polarity = 0

    for tweet in tweets:
        print(tweet)
        analysis = TextBlob(tweet.text)
        polarity += analysis.sentiment.polarity

        if (analysis.sentiment.polarity == 0):
            neutral += 1
        elif (analysis.sentiment.polarity > 0 and analysis.sentiment.polarity <= 0.3):
            weaklyPositive += 1
        elif (analysis.sentiment.polarity > 0.3 and analysis.sentiment.polarity <= 0.6):
            positive += 1
        elif (analysis.sentiment.polarity > 0.6 and analysis.sentiment.polarity <= 1):
            stronglyPositive += 1
        elif (analysis.sentiment.polarity > -0.3 and analysis.sentiment.polarity <= 0):
            weaklyNegative += 1
        elif (analysis.sentiment.polarity > -0.6 and analysis.sentiment.polarity <= -0.3):
            negative += 1
        elif (analysis.sentiment.polarity > -1 and analysis.sentiment.polarity <= -0.6):
            stronglyNegative += 1
    
    positive = percentage(positive, noOfSearchTerms)
    weaklyPositive = percentage(weaklyPositive, noOfSearchTerms)
    stronglyPositive = percentage(stronglyPositive, noOfSearchTerms)
    negative = percentage(negative, noOfSearchTerms)
    weaklyNegative = percentage(weaklyNegative, noOfSearchTerms)
    stronglyNegative = percentage(stronglyNegative, noOfSearchTerms)
    neutral = percentage(neutral, noOfSearchTerms)
    polarity = percentage(polarity, noOfSearchTerms)

    # Average reaction
    polarity = polarity / noOfSearchTerms

    print('How many people are reacting to ' + searchTerm + ' by analyzing ' + str(noOfSearchTerms) + ' Tweets')

    if (polarity == 0):
        print("Neutral")
    elif (polarity > 0 and polarity <= 0.3):
        print("Weakly Positive")
    elif (polarity > 0.3 and polarity <= 0.6):
        print("Positive")
    elif (polarity > 0.6 and polarity <= 1):
        print("Strongly Positive")
    elif (polarity > -0.3 and polarity <= 0):
        print("Weakly Negative")
    elif (polarity > -0.6 and polarity <= -0.3):
        print("Negative")
    elif (polarity > -1 and polarity <= -0.6):
        print("Strongly Negative")
    
    print("Detailed Report: ")
    print(str(positive) + "% people thought it was positive")
    print(str(weaklyPositive) + "% people thought it was weakly positive")
    print(str(stronglyPositive) + "% people thought it was strongly positive")
    print(str(negative) + "% people thought it was negative")
    print(str(weaklyNegative) + "% people thought it was weakly negative")
    print(str(stronglyNegative) + "% people thought it was strongly negative")
    print(str(neutral) + "% people thought it was neutral")

    labels = ['Positive [' + str(positive) + '%]', 'Weakly Positive [' + str(weaklyPositive) + '%]', 'Strongly Positive [' + str(stronglyPositive) + '%]', 'Neutral [' + str(neutral) + '%]',
              'Negative [' + str(negative) + '%]', 'Weakly Negative [' + str(weaklyNegative) + '%]', 'Strongly Negative [' + str(stronglyNegative) + '%]']
    sizes = [positive, weaklyPositive, stronglyPositive,
             neutral, negative, weaklyNegative, stronglyNegative]
    colors = ['yellowgreen', 'lightgreen', 'darkgreen',
              'gold', 'red', 'lightsalmon', 'darkred']
    patches, texts = plt.pie(sizes, colors=colors, startangle=90)
    plt.legend(patches, labels, loc="best")
    plt.title('How people are reacting on ' + searchTerm +
              ' by analyzing ' + str(noOfSearchTerms) + ' Tweets.')
    plt.axis('equal')
    plt.tight_layout()
    plt.show()
    def DownloadData(self):
        # authenticating
        consumerKey = ''
        consumerSecret = ''
        accessToken = ''
        accessTokenSecret = ''
        auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
        auth.set_access_token(accessToken, accessTokenSecret)
        api = tweepy.API(auth)

        # input for term to be searched and how many tweets to search
        searchTerm = input("Enter Keyword/Tag to search about: ")
        NoOfTerms = int(input("Enter how many tweets to search: "))

        # searching for tweets
        self.tweets = tweepy.Cursor(api.search, q=searchTerm, lang = "en").items(NoOfTerms)

        # Open/create a file to append data to
        csvFile = open('result.csv', 'a')

        # Use csv writer
        csvWriter = csv.writer(csvFile)


        # creating some variables to store info
        polarity = 0
        positive = 0
        weak_positive = 0
        strong_positive = 0
        negative = 0
        weak_negative = 0
        strong_negative = 0
        neutral = 0


        # iterating through tweets fetched
        for tweet in self.tweets:
            #Append to temp so that we can store in csv later. I use encode UTF-8
            self.tweetText.append(self.cleanTweet(tweet.text).encode('utf-8'))
            # print (tweet.text.translate(non_bmp_map))    #print tweet's text
            analysis = TextBlob(tweet.text)
            # print(analysis.sentiment)  # print tweet's polarity
            polarity += analysis.sentiment.polarity  # adding up polarities to find the average later

            if (analysis.sentiment.polarity == 0):  # adding reaction of how people are reacting to find average later
                neutral += 1
            elif (analysis.sentiment.polarity > 0 and analysis.sentiment.polarity <= 0.3):
                weak_positive += 1
            elif (analysis.sentiment.polarity > 0.3 and analysis.sentiment.polarity <= 0.6):
                positive += 1
            elif (analysis.sentiment.polarity > 0.6 and analysis.sentiment.polarity <= 1):
                strong_positive += 1
            elif (analysis.sentiment.polarity > -0.3 and analysis.sentiment.polarity <= 0):
                weak_negative += 1
            elif (analysis.sentiment.polarity > -0.6 and analysis.sentiment.polarity <= -0.3):
                negative += 1
            elif (analysis.sentiment.polarity > -1 and analysis.sentiment.polarity <= -0.6):
                strong_negative += 1


        # Write to csv and close csv file
        csvWriter.writerow(self.tweetText)
        csvFile.close()

        # finding average of how people are reacting
        positive = self.percentage(positive, NoOfTerms)
        wpositive = self.percentage(weak_positive, NoOfTerms)
        spositive = self.percentage(strong_positive, NoOfTerms)
        negative = self.percentage(negative, NoOfTerms)
        wnegative = self.percentage(weak_negative, NoOfTerms)
        snegative = self.percentage(strong_negative, NoOfTerms)
        neutral = self.percentage(neutral, NoOfTerms)

        # finding average reaction
        polarity = polarity / NoOfTerms

        # printing out data
        print("How people are reacting on " + searchTerm + " by analyzing " + str(NoOfTerms) + " tweets.")
        print()
        print("General Report: ")

        if (polarity == 0):
            print("Neutral")
        elif (polarity > 0 and polarity <= 0.3):
            print("Weakly Positive")
        elif (polarity > 0.3 and polarity <= 0.6):
            print("Positive")
        elif (polarity > 0.6 and polarity <= 1):
            print("Strongly Positive")
        elif (polarity > -0.3 and polarity <= 0):
            print("Weakly Negative")
        elif (polarity > -0.6 and polarity <= -0.3):
            print("Negative")
        elif (polarity > -1 and polarity <= -0.6):
            print("Strongly Negative")

        print()
        print("Detailed Report: ")
        print(str(positive) + "% people thought it was positive")
        print(str(weak_positive) + "% people thought it was weakly positive")
        print(str(strong_positive) + "% people thought it was strongly positive")
        print(str(negative) + "% people thought it was negative")
        print(str(weak_negative) + "% people thought it was weakly negative")
        print(str(strong_negative) + "% people thought it was strongly negative")
        print(str(neutral) + "% people thought it was neutral")

        self.plotPieChart(positive, weak_positive, strong_positive, negative, weak_negative, strong_negative, neutral, searchTerm, NoOfTerms)
Пример #6
0
consumer_key = 'on_twitter'
consumer_secret = 'on_twitter'
access_token = 'on_twitter'
access_token_secret = 'on_twitter'

#

auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)

# search_term = "#climate+change -filter:retweets"
yesterday = datetime.strftime(datetime.now() - timedelta(1), '%Y-%m-%d')
search_term = \
    '(disease) -filter:retweets'
tweets = tw.Cursor(api.search, q=search_term, lang='en',
                   since=yesterday).items(1000)

all_tweets = [tweet.text for tweet in tweets]

#


def remove_url(txt):
    return ' '.join(re.sub("([^0-9A-Za-z \t])|(\w+:\/\/\S+)", '', txt).split())


#

all_tweets_no_urls = [remove_url(tweet) for tweet in all_tweets]

#
Пример #7
0
    def delete(self):
        if not self.authenticated:
            return

        # First, run fetch
        click.secho("Before deleting anything, fetch", fg="cyan")
        self.fetch()

        # Unretweet and unlike tweets
        if self.common.settings.get("retweets_likes"):
            # Unretweet
            if self.common.settings.get("retweets_likes_delete_retweets"):
                datetime_threshold = datetime.datetime.utcnow() - datetime.timedelta(
                    days=self.common.settings.get("retweets_likes_retweets_threshold")
                )
                tweets = (
                    self.common.session.query(Tweet)
                    .filter(Tweet.user_id == int(self.common.settings.get("user_id")))
                    .filter(Tweet.is_deleted == 0)
                    .filter(Tweet.is_retweet == 1)
                    .filter(Tweet.created_at < datetime_threshold)
                    .order_by(Tweet.created_at)
                    .all()
                )

                click.secho(
                    "Deleting {} retweets, starting with the earliest".format(
                        len(tweets)
                    ),
                    fg="cyan",
                )

                count = 0
                for tweet in tweets:
                    try:
                        self.api.destroy_status(tweet.status_id)
                        tweet.unretweet_summarize()
                        tweet.is_deleted = True
                        self.common.session.add(tweet)
                    except tweepy.error.TweepError as e:
                        if e.api_code == 144:
                            click.echo(
                                "Error, retweet {} is already deleted, updating database".format(
                                    tweet.status_id
                                )
                            )
                            tweet.is_deleted = True
                            self.common.session.add(tweet)
                        else:
                            click.echo(
                                "Error for tweet {}: {}".format(tweet.status_id, e)
                            )

                    count += 1
                    if count % 20 == 0:
                        self.common.session.commit()

                self.common.session.commit()
                self.common.log("Deleted %s retweets" % count)

            # Unlike
            if self.common.settings.get("retweets_likes_delete_likes"):
                datetime_threshold = datetime.datetime.utcnow() - datetime.timedelta(
                    days=self.common.settings.get("retweets_likes_likes_threshold")
                )
                tweets = (
                    self.common.session.query(Tweet)
                    .filter(Tweet.user_id != int(self.common.settings.get("user_id")))
                    .filter(Tweet.is_unliked == False)
                    .filter(Tweet.favorited == True)
                    .filter(Tweet.created_at < datetime_threshold)
                    .order_by(Tweet.created_at)
                    .all()
                )

                click.secho(
                    "Unliking {} tweets, starting with the earliest".format(
                        len(tweets)
                    ),
                    fg="cyan",
                )

                count = 0
                for tweet in tweets:
                    try:
                        self.api.destroy_favorite(tweet.status_id)
                        tweet.unlike_summarize()
                        tweet.is_unliked = True
                        self.common.session.add(tweet)
                    except tweepy.error.TweepError as e:
                        if e.api_code == 144:
                            click.echo(
                                "Error, tweet {} is already unliked, updating database".format(
                                    tweet.status_id
                                )
                            )
                            tweet.is_unliked = True
                            self.common.session.add(tweet)
                        else:
                            click.echo(
                                "Error for tweet {}: {}".format(tweet.status_id, e)
                            )

                    count += 1
                    if count % 20 == 0:
                        self.common.session.commit()

                self.common.session.commit()
                self.common.log("Unliked %s tweets" % count)

        # Deleting tweets
        if self.common.settings.get("delete_tweets"):
            tweets_to_delete = self.common.get_tweets_to_delete()

            click.secho(
                "Deleting {} tweets, starting with the earliest".format(
                    len(tweets_to_delete)
                ),
                fg="cyan",
            )

            count = 0
            for tweet in tweets_to_delete:
                try:
                    self.api.destroy_status(tweet.status_id)
                    tweet.delete_summarize()
                    tweet.is_deleted = True
                    self.common.session.add(tweet)
                except tweepy.error.TweepError as e:
                    if e.api_code == 144:
                        click.echo(
                            "Error, tweet {} is already deleted, updating database".format(
                                tweet.status_id
                            )
                        )
                        tweet.is_deleted = True
                        self.common.session.add(tweet)
                    else:
                        click.echo("Error for tweet {}: {}".format(tweet.status_id, e))

                count += 1
                if count % 20 == 0:
                    self.common.session.commit()

            self.common.session.commit()
            self.common.log("Deleted %s tweets" % count)

        # Delete DMs
        if self.common.settings.get("delete_dms"):
            datetime_threshold = datetime.datetime.utcnow() - datetime.timedelta(
                days=self.common.settings.get("dms_days_threshold")
            )

            # Sadly, only the last 30 days worth
            # https://developer.twitter.com/en/docs/direct-messages/sending-and-receiving/api-reference/list-events
            click.secho(
                "Fetching direct message metadata for the last 30 days", fg="cyan"
            )

            # Fetch direct messages
            count = 0
            for page in tweepy.Cursor(self.api.list_direct_messages).pages():
                for dm in page:
                    created_timestamp = datetime.datetime.fromtimestamp(
                        int(dm.created_timestamp) / 1000
                    )
                    if created_timestamp <= datetime_threshold:
                        self.api.destroy_direct_message(dm.id)
                        click.echo(
                            "Deleted DM {}, id {}".format(
                                created_timestamp.strftime("%Y-%m-%d"), dm.id
                            )
                        )
                        count += 1
                    else:
                        click.secho(
                            "Skipping DM {}, id {}".format(
                                created_timestamp.strftime("%Y-%m-%d"), dm.id
                            ),
                            dim=True,
                        )

            self.common.log("Deleted %s DMs" % count)
Пример #8
0
def follow_followers(api):
    logger.info("Retrieving and following followers")
    for follower in tweepy.Cursor(api.followers).items():
        if not follower.following:
            logger.info(f"Following {follower.name}")
            follower.follow()
Пример #9
0
import tweepy
import time
from tweepy import OAuthHandler

import twitter_credentials

auth = OAuthHandler(twitter_credentials.CONSUMER_KEY,
                    twitter_credentials.CONSUMER_SECRET)
auth.set_access_token(twitter_credentials.ACCESS_TOKEN,
                      twitter_credentials.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

user = api.me()
search = 'Javascript'
numberOfTweets = 500

for tweet in tweepy.Cursor(api.search, search).items(numberOfTweets):
    try:
        print('Tweet liked')
        tweet.favorite()
        time.sleep(10)
    except tweepy.TweepError as e:
        print(e.reason)
    except StopIteration:
        break
Пример #10
0
def get_firefox_mentions(api):
    # uses standard search API standard which can only access last 7 days of data
    # get data using sinceId to ensure no duplicates

    # If results from a specific ID onwards are reqd, set since_id to that ID.
    # else default to no lower limit, go as far back as API allows
    qry_max_id = (
        """SELECT max(id_str) max_id FROM {0} """).format(dataset_name +
                                                          ".twitter_mentions")
    query_job = bq_client.query(qry_max_id)
    max_id_result = query_job.to_dataframe()
    max_id = max_id_result['max_id'].values[0]
    print(max_id)

    #searchQuery = '#someHashtag'  # this is what we're searching for
    maxTweets = 10000000  # Some arbitrary large number

    tweetCount = 0
    print("Downloading max {0} tweets".format(maxTweets))

    # tweet_mode="extended" to include truncated tweets
    results = []

    try:
        if max_id is not None:
            new_tweets = tweepy.Cursor(api.search,
                                       q="@firefox",
                                       tweet_mode="extended",
                                       since_id=str(max_id)).items()
        else:
            new_tweets = tweepy.Cursor(api.search,
                                       q="@firefox",
                                       tweet_mode="extended").items()

        for tweet in new_tweets:
            tweet_row = get_tweet_data_row(tweet)
            #print(tweet.id_str)
            results.append(tweet_row)

            tweetCount = tweetCount + 1
            if (tweetCount > maxTweets):
                break

        df = pd.DataFrame.from_records(results,
                                       columns=[
                                           "id_str", "created_at", "full_text",
                                           "user_id",
                                           "in_reply_to_status_id_str"
                                       ])

        if df.shape[0] > 0:
            min_id_str = df['id_str'].min()
            max_id_str = df['id_str'].max()
            print('min: ' + min_id_str + ', max: ' + max_id_str)
            fn = 'twitter_data_mentions_' + str(min_id_str) + "_to_" + str(
                max_id_str) + '.csv'
            df.to_csv("/tmp/" + fn, index=False, encoding='utf-8')
            print("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fn))

            blob = sumo_bucket.blob("twitter/" + fn)
            blob.upload_from_filename("/tmp/" + fn)

            s = [
                bigquery.SchemaField("id_str", "INTEGER"),
                bigquery.SchemaField("created_at", "TIMESTAMP"),
                bigquery.SchemaField("full_text", "STRING"),
                bigquery.SchemaField("user_id", "INTEGER"),
                bigquery.SchemaField("in_reply_to_status_id_str", "INTEGER"),
            ]
            update_bq_table("gs://{}/twitter/".format(bucket), fn,
                            'twitter_mentions', s)
        else:
            print("Downloaded {0} tweets, no mentions updates.".format(
                tweetCount))

    except tweepy.TweepError as e:
        # Just exit if any error
        print("some error : " + str(e))
Пример #11
0
                               twitter_config["consumer_secret"])
    auth.set_access_token(twitter_config["access_token"],
                          twitter_config["access_token_secret"])
    api = tweepy.API(auth)

    # retrieve last savepoint if available
    try:
        with open(last_id_file, "r") as file:
            savepoint = file.read()
    except IOError:
        savepoint = ""
        print("No savepoint found. Bot is now searching for results")

    # search query
    timelineIterator = tweepy.Cursor(api.search,
                                     q=search,
                                     since_id=savepoint,
                                     lang=tweetLanguage).items(100)

    # put everything into a list to be able to sort/filter
    timeline = []
    for status in timelineIterator:
        timeline.append(status)
    print(timeline)

    try:
        last_tweet_id = timeline[0].id
    except IndexError:
        last_tweet_id = savepoint

    # filter @replies/blacklisted words & users out and reverse timeline
    #timeline = filter(lambda status: status.text[0] = "@", timeline)   - uncomment to remove all tweets with an @mention
Пример #12
0
def get_firefox_reviews(api):
    #get all tweets with id=firefox

    # If results from a specific ID onwards are reqd, set since_id to that ID.
    # else default to no lower limit, go as far back as API allows
    sinceId = None

    # If results only below a specific ID are, set max_id to that ID.
    # else default to no upper limit, start from the most recent tweet matching the search query.
    qry_max_id = (
        """SELECT max(id_str) max_id FROM {0} """).format(dataset_name +
                                                          ".twitter_reviews")
    query_job = bq_client.query(qry_max_id)
    max_id_result = query_job.to_dataframe()
    max_id = max_id_result['max_id'].values[0]
    print(max_id)

    maxTweets = 10000000  # Some arbitrary large number

    tweetCount = 0
    print("Downloading max {0} tweets".format(maxTweets))

    # tweet_mode="extended" to include truncated tweets
    results = []

    try:
        if max_id is not None:
            new_tweets = tweepy.Cursor(
                api.user_timeline,
                screen_name='@firefox',
                tweet_mode="extended",
                since_id=str(max_id)
            ).items(
            )  # max_id-1 to exclude max_id since that will have already been added in previous pass
        else:
            new_tweets = tweepy.Cursor(api.user_timeline,
                                       screen_name='@firefox',
                                       tweet_mode="extended").items()

        for tweet in new_tweets:

            # if in_reply_to_status_id_str has number, then look up that info, else, put blanks for fields reply_text, reply created_at, reply_user_id. we wouldn't now what % goes un-replied anyway so...
            tweet_row = get_tweet_data_row(tweet)
            in_reply_to_status_id_str = tweet.in_reply_to_status_id_str
            #print(in_reply_to_status_id_str)
            if in_reply_to_status_id_str:
                try:
                    reply_tweet = api.get_status(in_reply_to_status_id_str)
                    tweet_row.extend([
                        reply_tweet.text.replace("\n", "\\n"),
                        reply_tweet.created_at, reply_tweet.user.id
                    ])
                except tweepy.TweepError as e:
                    print(
                        "Error trying to get in_reply_to_status_id_str={0}: {1}",
                        in_reply_to_status_id_str, str(e))
                    tweet_row.extend(['', '', ''])
            else:
                tweet_row.extend(['', '', ''])

            results.append(tweet_row)  #get_tweet_data_row(tweet))

            tweetCount = tweetCount + 1

            if (tweetCount > maxTweets):
                break

        df = pd.DataFrame.from_records(
            results,
            columns=[
                "id_str", "created_at", "full_text", "user_id",
                "in_reply_to_status_id_str", "in_reply_to_status_text",
                "in_reply_to_status_created_at", "in_reply_to_status_user_id"
            ])
        #  df['ga_date'] = pd.to_datetime(df['ga_date'], format="%Y%m%d").dt.strftime("%Y-%m-%d")

        if df.shape[0] > 0:
            min_id_str = df['id_str'].min()
            max_id_str = df['id_str'].max()
            fn = 'twitter_data_' + str(min_id_str) + "_to_" + str(
                max_id_str) + '.csv'
            df.to_csv("/tmp/" + fn, index=False)
            print("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fn))

            blob = sumo_bucket.blob("twitter/" + fn)
            blob.upload_from_filename("/tmp/" + fn)

            s = [
                bigquery.SchemaField("id_str", "INTEGER"),
                bigquery.SchemaField("created_at", "TIMESTAMP"),
                bigquery.SchemaField("full_text", "STRING"),
                bigquery.SchemaField("user_id", "INTEGER"),
                bigquery.SchemaField("in_reply_to_status_id_str", "INTEGER"),
                bigquery.SchemaField("in_reply_to_status_text", "STRING"),
                bigquery.SchemaField("in_reply_to_status_created_at",
                                     "TIMESTAMP"),
                bigquery.SchemaField("in_reply_to_status_user_id", "INTEGER"),
            ]
            update_bq_table("gs://{}/twitter/".format(bucket), fn,
                            'twitter_reviews', s)
        else:
            print("Downloaded {0} tweets, no reviews updates.".format(
                tweetCount))

    except tweepy.TweepError as e:
        # Just exit if any error
        print("some error : " + str(e))
Пример #13
0
def followbot():
    try:
        #name_to_find_ids_of = input("Name to find ids of? Example: twitter, Android, gameinformer, gamasutra\n>")
        name_to_find_ids_of = 'twitter'
        name_list = []
        info_list = []
        with open("twitter_accounts.txt", "r") as f:
            lines = f.readlines()

            print("Gathering status of all accounts.")
            with open("status.txt", "a") as f:
                f.write("\nGathering status of all accounts.\n")

            for line in lines:
                list = line.split(':')

                name = list[0]
                CONSUMER_KEY = list[1]
                CONSUMER_SECRET = list[2]
                ACCESS_KEY = list[3]
                ACCESS_SECRET = list[4][:-1]
                #print(repr(CONSUMER_KEY))
                #print(repr(CONSUMER_SECRET))
                #print(repr(ACCESS_KEY))
                #print(repr(ACCESS_SECRET))

                auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
                auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
                api = tweepy.API(auth)

                text_file = open("unfollowlist.txt", "w")
                following_counter = 0
                try:
                    for page in tweepy.Cursor(api.friends_ids,
                                              screen_name=name).pages():
                        for line in page:
                            following_counter += 1
                            text_file.write(str(line))
                            text_file.write("\n")
                        #time.sleep(60)

                    text_file.close()
                    with open("unfollowlist.txt", "r") as f:
                        lines = f.readlines()
                        last_line = lines[-1]
                    write_actions = True
                    try:
                        api.destroy_friendship(int(last_line))

                    except tweepy.TweepError as e:
                        errorcode = e.args[0][0]['code']
                        if (errorcode == 261):
                            print(
                                "Application cannot do write actions. Name: " +
                                name)
                            with open("status.txt", "a") as f:
                                f.write(
                                    "\nApplication cannot do write actions. Name: "
                                    + name + "\n")
                            write_actions = False

                        elif (errorcode == 88):
                            print("Rate limit exceeded")

                        else:
                            print(e)
                            print("Error happened on account: " + name)
                            time.sleep(10)
                        pass

                    except ConnectionResetError:
                        print("Connection error, sleeping 10s and continuing")
                        time.sleep(10)
                        continue

                    new_list = [
                        name, CONSUMER_KEY, CONSUMER_SECRET, ACCESS_KEY,
                        ACCESS_SECRET, following_counter, write_actions
                    ]
                    info_list.append(new_list)
                    name_list.append(new_list[0])
                except tweepy.TweepError as e:
                    print(e)
                    print("Error happened on account: " + name)
                    pass

        print("\nDONE.\n")
        with open("status.txt", "a") as f:
            f.write("\nDONE.\n")

        for acc in info_list:
            if (int(acc[5]) > 2126) and acc[6]:
                number_to_unfollow = int(acc[5]) - 2126
                unfollow(acc, number_to_unfollow)

        for acc in info_list:
            if acc[6]:
                follow(acc, name_to_find_ids_of)
    except (tweepy.TweepError, RuntimeError, TypeError, NameError,
            ConnectionError) as e:
        print("Fatal error, starting over.")
        print(e)
        with open("status.txt", "a") as f:
            f.write("Fatal error, starting over.")
            f.write(e)
        pass
Пример #14
0
def unfollow(acc, number_to_unfollow):

    name = acc[0]
    CONSUMER_KEY = acc[1]
    CONSUMER_SECRET = acc[2]
    ACCESS_KEY = acc[3]
    ACCESS_SECRET = acc[4]

    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
    api = tweepy.API(auth)

    text_file = open("unfollowlist.txt", "w")
    for page in tweepy.Cursor(api.friends_ids, screen_name=name).pages():
        for line in page:
            text_file.write(str(line))
            text_file.write("\n")
    print("Starting unfollowing on: " + name)
    with open("status.txt", "a") as f:
        f.write("\nStarting unfollowing on: " + name + "\n")
    text_file.close()

    #print("All current 'following now gathered', Starting to unfollow.\n")
    running = True
    counter = 0
    while running:
        try:
            with open("unfollowlist.txt", "r") as f:
                lines = f.readlines()
                last_line = lines[-1]
            try:
                api.destroy_friendship(int(last_line))
            except tweepy.TweepError as e:
                print(e)
                print("error happened on : " + name)
                time.sleep(20)
                pass
            except ConnectionResetError:
                print("Connection error")
                time.sleep(20)
                pass
            time.sleep(6)
            with open("unfollowlist.txt", "r") as fin:
                data = fin.read().splitlines(True)
            with open('unfollowlist.txt', 'w') as fout:
                fout.writelines(data[:-1])

                s = str(counter) + ' / ' + str(
                    number_to_unfollow) + ' unfollowed.'  # string for output
                print(s, end='')  # just print and flush
                #sys.stdout.flush()                    # needed for flush when using \x08
                backspace(len(s))  # back for n chars

            if counter >= number_to_unfollow:
                running = False
            counter = counter + 1

        except tweepy.TweepError or ConnectionResetError:
            print("Some sort of error, waiting 20s")
            time.sleep(20)
            pass
    print("Unfollowing done on " + name + "!")
    with open("status.txt", "a") as f:
        f.write("\nUnfollowing done on " + name + "!\n")
''' Importing libraries '''
import tweepy
import csv
import pandas as pd
''' Authenticating Twitter Access with Developer Account '''
consumer_key = 'bIVfVMIgLtWaJAmrDqDEpRkwt'
consumer_secret = 'cTzQQCUyxHU7hDCMXaRtKTHeqTxEqA6lAaGaE5nbvrOF930VTY'
access_token = '1143353752792920065-S2JoUbphkswzmT5JP57WRalAaRR1mJ'
access_token_secret = 'I0EyYYGoJ5vDpu3gVY2foePUGuCSUf9pbofGP9J5TFZ1d'
''' Main Crawler Code'''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)
''' Searching Tweets based on keyword'''
# Open/Create a file to append data
csvFile = open('info.csv', 'a')
#Use csv Writer
csvWriter = csv.writer(csvFile)

for tweet in tweepy.Cursor(api.search,
                           q="#DevOpsatUPES",
                           count=100,
                           lang="en",
                           since="2020-02-02").items():
    print(tweet.created_at, tweet.text)
    csvWriter.writerow([tweet.created_at, tweet.text.encode('utf-8')])
Пример #16
0
 def follow(self):
     for follower in tweepy.Cursor(self.api.followers).items():    
         follower.follow() 
Пример #17
0
 def get_history(self, api, ntweets):
     cursor = tweepy.Cursor(api.user_timeline,
                            screen_name=self.tweeter,
                            count=100).items(ntweets)
     self.tweeter_history = [tweet._json for tweet in limit_cursor(cursor)]
Пример #18
0
import time
import tweepy
import sys

# Replace the foo bar with your twitter API keys from dev.twitter.com
auth = tweepy.auth.OAuthHandler(consumer_key="foo", consumer_secret="bar")
auth.set_access_token("foo", "bar")

# the following dictionaries etc aren't strictly needed for this
# but useful for your own more in-depth apps.

api = tweepy.API(auth_handler=auth, wait_on_rate_limit=True)

print("Loading followers..")
followers = []
for follower in tweepy.Cursor(api.followers).items():
    followers.append(follower)

print("Found %s followers, finding friends.." % len(followers))
friends = []
for friend in tweepy.Cursor(api.friends).items():
    friends.append(friend)

# creating dictionaries based on id's is handy too

friend_dict = {}
for friend in friends:
    friend_dict[friend.id] = friend

follower_dict = {}
for follower in followers:
from RefinedMOMTweetRetrieverCode import TweetCollector as TC

#Keys to initiate connection to twitter api for bot
consumer_key = consumer_key
consumer_secret = consumer_secret
access_token = access_token
access_token_secret = access_token_secret
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

tweetjsons = []
for tweet in tweepy.Cursor(api.search,
                           q="#makeovermonday ",
                           since='2018-04-24',
                           until='2018-05-03',
                           include_entities=True,
                           result_type='recent',
                           exclude_replies=True).items():
    if 'RT' not in tweet.text:  #remove retweets
        tweetjsons.append(tweet)

viz_info = []
for tweet in tweetjsons:
    ids = tweet.id_str
    name = tweet.author.name
    screen_name = tweet.author.screen_name
    try:
        location = tweet.author.location
    except:
        location = None
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 23 21:42:17 2018

@author: mathe
"""

import pandas as pd
import tweepy

consumer_key = 'eICGRZ3xMVGAS2LZtW2HZ8ESP'
consumer_secret = 'uaaibUdbfNujAJbaXJ6fZCDoqTPZVapf6iLMSuOl7zvEATfRky'
access_token = '988578283922100225-kGZrxiEFmMNPVlIupkBz45BzCf5n4Dk'
access_token_secret = 'toqhW7r8FfmWQwyf0ebUsQY9tfa2QfUiqHAJYuarET3Br'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
tweets = list() 
sentimentos = list()

for tweet in tweepy.Cursor(api.search,q='#Lula  -filter:retweets',tweet_mode='extended',count=100,lang="pt").items(100):
    if (not tweet.retweeted) and ('RT @' not in tweet.full_text):
        tweets.append(tweet.full_text)
        
print('Coletados tweets sobre Lula')
tweets_Dataframe = pd.DataFrame({'Text':tweets})
tweets_Dataframe.to_csv('../bases/tweets_lula_teste.csv', encoding='utf-8',index = False)

print(api.rate_limit_status()['resources']['search'])
Пример #21
0
    def fetch(self):
        if not self.authenticated:
            return

        if self.common.settings.get("delete_tweets"):
            # We fetch tweets since the last fetch (or all tweets, if it's None)
            since_id = self.common.settings.get("since_id")
            if since_id:
                click.secho("Fetching all recent tweets", fg="cyan")
            else:
                click.secho(
                    "Fetching all tweets, this first run may take a long time",
                    fg="cyan",
                )

            # Fetch tweets from timeline a page at a time
            for page in tweepy.Cursor(
                self.api.user_timeline,
                id=self.common.settings.get("username"),
                since_id=since_id,
                tweet_mode="extended",
            ).pages():
                fetched_count = 0

                # Import these tweets, and all their threads
                for status in page:
                    fetched_count += self.import_tweet_and_thread(Tweet(status))

                    # Only commit every 20 tweets
                    if fetched_count % 20 == 0:
                        self.common.session.commit()

                # Commit the leftovers
                self.common.session.commit()

                # Now hunt for threads. This is a dict that maps the root status_id
                # to a list of status_ids in the thread
                threads = {}
                for status in page:
                    if status.in_reply_to_status_id:
                        status_ids = self.calculate_thread(status.id)
                        root_status_id = status_ids[0]
                        if root_status_id in threads:
                            for status_id in status_ids:
                                if status_id not in threads[root_status_id]:
                                    threads[root_status_id].append(status_id)
                        else:
                            threads[root_status_id] = status_ids

                # For each thread, does this thread already exist, or do we create a new one?
                for root_status_id in threads:
                    status_ids = threads[root_status_id]
                    thread = (
                        self.common.session.query(Thread)
                        .filter_by(root_status_id=root_status_id)
                        .first()
                    )
                    if not thread:
                        thread = Thread(root_status_id)
                        count = 0
                        for status_id in status_ids:
                            tweet = (
                                self.common.session.query(Tweet)
                                .filter_by(status_id=status_id)
                                .first()
                            )
                            if tweet:
                                thread.tweets.append(tweet)
                                count += 1
                        if count > 0:
                            click.echo(
                                "Added new thread with {} tweets (root id={})".format(
                                    count, root_status_id
                                )
                            )
                    else:
                        count = 0
                        for status_id in status_ids:
                            tweet = (
                                self.common.session.query(Tweet)
                                .filter_by(status_id=status_id)
                                .first()
                            )
                            if tweet and tweet not in thread.tweets:
                                thread.tweets.append(tweet)
                                count += 1
                        if count > 0:
                            click.echo(
                                "Added {} tweets to existing thread (root id={})".format(
                                    count, root_status_id
                                )
                            )
                    self.common.session.commit()

        if self.common.settings.get("retweets_likes") and self.common.settings.get(
            "retweets_likes_delete_likes"
        ):
            like_since_id = self.common.settings.get("since_id")

            # Fetch tweets that are liked
            click.secho("Fetching tweets that you liked", fg="cyan")
            for page in tweepy.Cursor(
                self.api.favorites,
                id=self.common.settings.get("username"),
                since_id=like_since_id,
                tweet_mode="extended",
            ).pages():
                # Import these tweets
                for status in page:
                    tweet = Tweet(status)
                    if not tweet.already_saved(self.common.session):
                        tweet.fetch_summarize()
                        self.common.session.add(tweet)
                # Commit a page of tweets at a time
                self.common.session.commit()

            # All done, update the since_id
            tweet = (
                self.common.session.query(Tweet)
                .order_by(Tweet.status_id.desc())
                .first()
            )
            if tweet:
                self.common.settings.set("since_id", tweet.status_id)
                self.common.settings.save()

        # Calculate which threads should be excluded from deletion
        self.calculate_excluded_threads()

        self.common.settings.set(
            "last_fetch", datetime.datetime.today().strftime(self.last_fetch_format)
        )
        self.common.settings.save()

        self.common.log(
            "last_fetch: %s"
            % datetime.datetime.today().strftime(self.last_fetch_format)
        )
#QUESTION 1A

##Installing and loading the tweepy module
pip install tweepy
import tweepy

##Establishing the twitter API to be used by my program
twitter = imp.load_source('oswaldapp1','/Users/oswaldcodjoe/OneDrive - Washington University in St. Louis/Summer 19/Python/SecretAPIFolder/twitterAPI/start_twitter_oswald.py')
api = twitter.client

##Creating a user object called WashU
WashU = api.get_user('@WUSTL')

##Extracting a list of 20 followers of WashU
WashU_followers = []
for item in tweepy.Cursor(api.followers_ids,'@WUSTL').items(20): 
        WashU_followers.append(item) 

WashU_followers

##Determining who among the 20 WashU followers has the highest number of tweets
WashU_followersstatuses_count =[]
for i in WashU_followers:
    WashU_followersstatuses_count.append(api.get_user(i).statuses_count)
    
WashU_followersstatuses_count 

max_value = max(WashU_followersstatuses_count)
max_index = WashU_followersstatuses_count.index(max_value)

api.get_user(WashU_followers[max_index]).name
Пример #23
0
consumer_secret = keys[1]
access_token = keys[2]
access_token_secret = keys[3]

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)
cliente = MongoClient("mongodb://localhost:27017")
bd = cliente['bigdata']
coleccion = bd['tweets']
ultimo = coleccion.find_one(sort=list({'id': -1}.items()))
if ultimo != None: ultimo_tweet = ultimo['id']
else: ultimo_tweet = None

tweets = []
contador = 1
for tweet in tweepy.Cursor(api.user_timeline,
                           since_id=ultimo_tweet,
                           screen_name='alferdez',
                           tweet_mode='extended').items(2500):
    tweet_dic = tweet._json
    tweets.append(tweet_dic)
    print("tweet capturado", contador)
    contador += 1
if len(tweets) > 0:
    coleccion.insert_many(tweets)
    print("Subidos:", len(tweets), 'tweets')
else:
    print("No hay nuevos tweets para subir")
consumer_key = 'Enter consumer key within single quotes'
consumer_secret = 'Enter consumer secret key within single quotes'
access_token = 'Enter access token key within single quotes'
access_token_secret = 'Enter secret access key within single quotes'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

csvFile = open('tweets.csv', 'w')
#Use csv Writer
csvWriter = csv.writer(csvFile)

for tweet in tweepy.Cursor(api.search,
                           q="#AyodhyaVerdict",
                           count=100,
                           lang="en",
                           since="2019-11-28").items():
    #print (tweet.created_at, tweet.text)
    csvWriter.writerow([tweet.user.screen_name, tweet.text.encode('utf-8')])

csv = pd.read_csv('tweets.csv', names=["Username", "Tweet"])
count = csv['Username'].value_counts()[:]
csv.head(10)

top2 = count.head(2)
top2

import matplotlib.pyplot as plt
colors = [
    "#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"
Пример #25
0
def fd_friends_scraper(target):
	for i in tweepy.Cursor(api.friends, target).items():
		CWLL_Tourney_friends.append(i.id)
Пример #26
0
# if the geojson file is already present open it and append the new result. Otherwise create it
if (os.path.exists(filename)):
    gdf = geopandas.read_file(filename)
else:
    gdf = geopandas.GeoDataFrame(columns=[
        'id_str', 'user_id_str', 'user_screen_name', 'text', 'created_at',
        'user_lang', 'user_location', 'longitude', 'latitude', 'geometry'
    ])
geolocator = Nominatim(user_agent="my_application")

twitter_account_without_coordinate = 0
new_twitter_user_added = 0
# Collect tweets
for tweet in tweepy.Cursor(api.search,
                           q=search_words_no_retweet,
                           lang="en",
                           since=date_since).items(numberOfTweets):
    condition = tweet.id_str in gdf[
        'id_str'].values  # the tweet is already present

    if condition is False:
        if follow_user and tweet.user.id_str != user.id_str:
            api.create_friendship(tweet.user.id_str)
            print("friendship with " + tweet.user.screen_name)

        if (tweet.user.location is not None):
            location = geolocator.geocode(tweet.user.location)

        if (location is None):
            twitter_account_without_coordinate += 1
        else:
Пример #27
0
def fd_follower_scraper(target):
	for i in tweepy.Cursor(api.followers, target).items():
		CWLL_Tourney_followers.append(i.id)
Пример #28
0
tweetCount = 1

fileName = "All_Tweets_From_@" + screenName + "_Until_" + str(time) + ".txt"

#### Writing tweets to txt

with open(fileName, "w") as f:

    f.write("All Tweets From @" + screenName + " Until " + str(time))
    f.write("\n")
    f.write("---------------------")

    for status in tweepy.Cursor(api.user_timeline,
                                user_id=userId,
                                tweet_mode="extended",
                                exclude_replies=False,
                                include_rts=True).items():

        try:
            f.write("\n")
            f.write("Tweet No: " + str(tweetCount) + "\n")
            f.write("\n")
            f.write(name + " @" + screenName + " :\n")
            f.write(status.full_text + "\n")
            f.write("\n")
            f.write(str(status.created_at) + "\n")
            f.write("\n")
            f.write("---------------------")
            f.write("\n")
        except UnicodeEncodeError:
Пример #29
0
class TweetAnalyzer():
    def tweet_to_dataframe(self, tweets):
        df = pd.DataFrame(data=[tweet.text for tweet in tweets],
                          columns=['tweets'])

        df['id'] = np.array([tweet.id for tweet in tweets])
        df['date'] = np.array([tweet.created_at for tweet in tweets])
        df['source'] = np.array([tweet.source for tweet in tweets])
        df['likes'] = np.array([tweet.favorite_count for tweet in tweets])
        df['retweets'] = np.array([tweet.retweet_count for tweet in tweets])
        df['location'] = np.array([tweet.geo for tweet in tweets])
        #print(tweets)
        return df


if __name__ == "__main__":  # calling main class
    twitterclient = TwitterClient()
    api = twitterclient.get_twitter_api()
    tweetanalyzer = TweetAnalyzer()
    tweets = tw.Cursor(api.search, q=['#fire'], lang='en',
                       since='2020-03-10').items(1000)
    tweets_details = [[tweet.text, tweet.geo, tweet.user.location]
                      for tweet in tweets]
    df = pd.DataFrame(data=tweets_details, columns=['text', 'geo', 'location'])
    df.to_csv('fire.csv')
    #tweets = api.search(q = ['corona','pandemic'], lang = "en", count = 2000)    #bulit in function in api
    #print(tweets[0])
    #df = tweetanalyzer.tweet_to_dataframe(tweets)
    #df.to_csv('result.csv')
    #print(df.head(10))
Пример #30
0
# Tweeter search with keyword
target_num = 50
query = "olympics"

csvFile = open('TwitterResults.csv', 'w')
csvWriter = csv.writer(csvFile)
csvWriter.writerow([
    "username", "author id", "created", "text", "retwc", "hashtag",
    "followers", "friends", "polarity", "subjectivity"
])
counter = 0

for tweet in tweepy.Cursor(api.search,
                           q=query,
                           lang="en",
                           result_type="popular",
                           count=target_num).items():
    created = tweet.created_at
    text = tweet.text
    text = unidecode.unidecode(text)
    retwc = tweet.retweet_count
    try:
        hashtag = tweet.entities[u'hashtags'][0][u'text']  #hashtags used
    except:
        hashtag = "None"
    username = tweet.author.name  #author/user name
    authorid = tweet.author.id  #author/user ID#
    followers = tweet.author.followers_count  #number of author/user followers (inlink)
    friends = tweet.author.friends_count  #number of author/user friends (outlink)