示例#1
0
def get_profile():

    

    twitter = Twitter()
    credentials = twitter.get_credentials()
    tw = Twython(credentials['consumer_key'], credentials['consumer_secret'], credentials['access_token'], credentials['access_token_secret'])

    tw_user_a = tw.show_user(screen_name=PROFILE_A)
    tw_user_b = tw.show_user(screen_name=PROFILE_B)

    user_a = {
        'name': tw_user_a['name'],
        'description': tw_user_a['description'],
        'photo': tw_user_a['profile_image_url']
    }

    user_b = {
        'name': tw_user_b['name'],
        'description': tw_user_b['description'],
        'photo': tw_user_b['profile_image_url']
    }

    dict_ = {
        'A': user_a,
        'B': user_b
    }

    
    return dict_
import time
from datetime import datetime

import pytz
from sqlalchemy.exc import IntegrityError
from twython import Twython
from twython.exceptions import TwythonError, TwythonRateLimitError

from db.database import db_session
from db.models import AllTweets
from scripts.d2l_collector.twitter import Twitter

parent_dir_name = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

t = Twitter()
credentials = t.get_credentials()

tw = Twython(credentials['consumer_key'], credentials['consumer_secret'],
             credentials['access_token'], credentials['access_token_secret'])

result = []
count = 0

context = "bvs"
tag = "#teambatman OR #teamsuperman"

with open(parent_dir_name + '/files/tweets_ids.csv', 'rt',
          encoding="utf-8") as csv_file:
    reader = csv.DictReader(csv_file)
    all_tweets = []
    for row in reader:
示例#3
0
def profile_information():

    list_topics = TOPICS.replace(" ", "").split(",")
    

    processing = Processing()

    twitter = Twitter()
    credentials = twitter.get_credentials()
    tw = Twython(credentials['consumer_key'], credentials['consumer_secret'], credentials['access_token'], credentials['access_token_secret'])



    tw_user_a = tw.show_user(screen_name=PROFILE_A)
    tw_user_b = tw.show_user(screen_name=PROFILE_B)

    user_a = {
        'name': tw_user_a['name'],
        'description': tw_user_a['description'],
        'photo': tw_user_a['profile_image_url']
    }

    user_b = {
        'name': tw_user_b['name'],
        'description': tw_user_b['description'],
        'photo': tw_user_b['profile_image_url']
    }

    timeline_a = tw.get_user_timeline(screen_name=PROFILE_A, count=200)
    timeline_b = tw.get_user_timeline(screen_name=PROFILE_B, count=200)

    args = {}

    all_texts_a = []
    all_texts_b = []

    for tweet in timeline_a:
        tweet_data = twitter.get_tweet_data(tweet)
        all_texts_a.append(tweet_data['text'])

    for tweet in timeline_b:
        tweet_data = twitter.get_tweet_data(tweet)
        all_texts_b.append(tweet_data['text'])

    words_a, hashtags_a, topics_a = processing.get_words_simple(all_texts_a, list_topics)
    words_b, hashtags_b, topics_b = processing.get_words_simple(all_texts_b, list_topics)

    
    temp_a = dict(Counter(words_a).most_common(250))
    temp_b = dict(Counter(words_b).most_common(250))
    sorted_a = dict(sorted(temp_a.items(), key=operator.itemgetter(1), reverse=True))
    sorted_b = dict(sorted(temp_b.items(), key=operator.itemgetter(1), reverse=True))

    top_10_a = [word for word in sorted_a][:10]
    top_10_b = [word for word in sorted_b][:10]

    user_a['top_10'] = top_10_a
    user_b['top_10'] = top_10_b

    user_a['words'] = sorted_a
    user_b['words'] = sorted_b

    user_a['hashtags'] = Counter(hashtags_a).most_common(3)
    user_b['hashtags'] = Counter(hashtags_b).most_common(3)

    user_a['topics'] = dict(Counter(topics_a))
    user_b['topics'] = dict(Counter(topics_b))

    args = {
        'user_a': user_a,
        'user_b': user_b,
        'topics_name': list_topics
    }

    return jsonify(args)
示例#4
0
class TwitterCollector():
    def __init__(self):

        self.twitter = Twitter()
        self.credentials = self.twitter.get_credentials()
        self.twython = Twython(self.credentials['consumer_key'],
                               self.credentials['consumer_secret'],
                               self.credentials['access_token'],
                               self.credentials['access_token_secret'])

    def collect(self, query, context, waiting_time, count, number_of_attempts):
        def find(word, letter):
            for l in word:
                if l == letter:
                    return True
            return False

        if query == '':
            return

        geocoder = Geocoder(
            access_token=
            "pk.eyJ1IjoiZ3Jhbmhva2FpbiIsImEiOiJjam8wbXdyeDIwMXprM3Bubm04ZnUyYzRqIn0.XKzTzY7qAr44O8hU2M2agA"
        )

        tweets_collect = []
        list_ids = []

        for i in range(0, number_of_attempts):

            print("Collecting " + query + "... Attempt: " + str(i))

            if i == 0:
                results = self.twython.search(q=query, count=count, lang='pt')
            else:
                results = self.twython.search(q=query,
                                              count=count,
                                              lang='pt',
                                              max_id=last_since)

            #count_control = 0
            tweets = results['statuses']
            indice = len(tweets) - 1
            last_tweet = tweets[indice]
            last_since = last_tweet['id']

            for t in results['statuses']:

                tweet = self.twitter.get_tweet_data(t)

                try:
                    user_ = t['user']
                    _location = user_['location']
                    saveFile = open('raw.json', 'a')
                    if _location == None:
                        #print (':(   User %s is not sharing location'%t['user']['screen_name'])
                        saveFile.close()
                    else:
                        if find(_location, ',') == False:
                            #print (':(   User %s is sharing wrong location'%t['user']['screen_name'])
                            saveFile.close()
                        else:
                            coordinates = geocoder.forward(_location)
                            if coordinates.geojson()['features'] == []:
                                #print (':(   User %s is sharing wrong city'%t['user']['screen_name'])
                                saveFile.close()
                            else:
                                mapboxResponse = coordinates.geojson(
                                )['features'][0]
                                coordinates_ = mapboxResponse['geometry'][
                                    'coordinates']
                                t['geo'] = {
                                    "type":
                                    "Point",
                                    "coordinates":
                                    (coordinates_[1], coordinates_[0])
                                }
                                novo_tweet = json.dumps(t)
                                saveFile.write(novo_tweet + '\n')
                                saveFile.close()
                                #Collecting all here
                                tweet_instance = AllTweets(
                                    tweet['object_id'], tweet['user_name'],
                                    tweet['text'], tweet['date_formated'],
                                    tweet['user_rt'], tag, context)
                                db_session.add(tweet_instance)
                                db_session.commit()
                                print("Saved " + str(tweet['object_id']))
                except exc.IntegrityError as e:
                    print("The tweet " + str(tweet['object_id']) +
                          " has already on database")
                    db_session.rollback()
                except Exception as e:
                    raise Exception("Database Error: " + str(e))

            #waiting_time in seconds
            print("Waiting Sleep Time ...")
            time.sleep(waiting_time)
示例#5
0
    if len(p) < 3:
        print("Number of arguments is not correct")
        exit(0)
    elif len(p) == 3:
        tag = str(p[1])
        context = str(p[2])
        language = None
    else:
        tag = str(p[1])
        context = str(p[2])
        language = str(p[3])

    twitter = Twitter()

    credentials = twitter.get_credentials()

    consumer_key = credentials['consumer_key']
    consumer_secret = credentials['consumer_secret']
    access_token = credentials['access_token']
    access_token_secret = credentials['access_token_secret']

    stream = MyStreamer(consumer_key, consumer_secret, access_token,
                        access_token_secret, twitter)
    stream.set_stringsearch(tag)
    stream.set_context(context)

    try:
        if language == None:
            stream.statuses.filter(track=tag)
        else:
class TwitterCollector():
    def __init__(self):

        self.twitter = Twitter()
        self.credentials = self.twitter.get_credentials()
        self.twython = Twython(self.credentials['consumer_key'],
                               self.credentials['consumer_secret'],
                               self.credentials['access_token'],
                               self.credentials['access_token_secret'])

    def collect(self, query, context, waiting_time, count, number_of_attempts):

        if query == '':
            return

        last_since = -1

        tweets_collect = []
        list_ids = []

        for i in range(0, number_of_attempts):

            print("Collecting " + query + "... Attempt: " + str(i))

            if i == 0:
                results = self.twython.search(q=query, count=count, lang='pt')
            else:
                results = self.twython.search(q=query,
                                              count=count,
                                              lang='pt',
                                              since_id=last_since)

            count_control = 0

            for t in results['statuses']:

                tweet = self.twitter.get_tweet_data(t)

                if count_control == 0:
                    last_since = tweet['object_id']
                    count_control += 1

                try:
                    tweet_instance = AllTweets(tweet['object_id'],
                                               tweet['user_name'],
                                               tweet['text'],
                                               tweet['date_formated'],
                                               tweet['user_rt'], tag, context)
                    db_session.add(tweet_instance)
                    db_session.commit()
                    print("Saved " + str(tweet['object_id']))
                except exc.IntegrityError as e:
                    print("The tweet " + str(tweet['object_id']) +
                          " has already on database")
                    db_session.rollback()
                except Exception as e:
                    raise Exception("Database Error: " + str(e))

            #waiting_time in seconds
            print("Waiting Sleep Time ...")
            time.sleep(waiting_time)