Пример #1
0
    def __init__(self, credentials, mode, direction):
        FOLHA = "14594813"
        ESTADAO = "9317502"
        UOLNOT = "14594698"
        G1 = "8802752"
        R7 = "65473559"
        self.source_ids = [int(FOLHA), int(ESTADAO), int(UOLNOT), int(G1), int(R7)]
        self.direction = direction
        self.mode = mode

        self.persister = TweetsPersister(credentials)  # connect to mysql database

        json_fp = open(credentials, "r")
        cred = json.load(json_fp)

        # connect to mongodb database
        # self.relationsdb = MongoClient(cred['mongo']['host'], cred['mongo']['port']).twitter

        # get credentials, to connect with Twitter API
        self.apps = cred["twitter_apps"]

        n_apps = len(self.apps)

        self.apps_resources = [0] * n_apps  # number of requisitions left
        self.apps_timeout = [0] * n_apps  # time where requisitions will be restored

        for app in range(n_apps):
            self.connect(app)  # connect once in all apps, to get right info

        self.current_app = 0  # app currently connected to API

        self.connect(self.current_app)  # connect to first app of the list
        self.api_waiter()
        self.buildnetwork(self.mode, self.direction)
Пример #2
0
import json
from persistencylayer import TweetsPersister
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream

FOLHA = '14594813'
ESTADAO = '9317502'
UOLNOT = '14594698'
G1 = '8802752'
R7 = '65473559'

json_fp = open('credentials.json')
cred = json.load(json_fp)

persister = TweetsPersister()


class StdOutListener(StreamListener):
    """ A listener handles tweets are the received from the stream.
    This is a basic listener that just prints received tweets to stdout.

    """
    def on_data(self, data):
        parsed = json.loads(data)    
        
        user_id = int(parsed['user']['id_str'])
        if user_id in userslist:
            print ">>>>>USUARIO REPETIDO:",  parsed['user']['name'], parsed['user']['screen_name']

        else:
Пример #3
0
class NetworkBuilder:
    def __init__(self, credentials, mode, direction):
        FOLHA = "14594813"
        ESTADAO = "9317502"
        UOLNOT = "14594698"
        G1 = "8802752"
        R7 = "65473559"
        self.source_ids = [int(FOLHA), int(ESTADAO), int(UOLNOT), int(G1), int(R7)]
        self.direction = direction
        self.mode = mode

        self.persister = TweetsPersister(credentials)  # connect to mysql database

        json_fp = open(credentials, "r")
        cred = json.load(json_fp)

        # connect to mongodb database
        # self.relationsdb = MongoClient(cred['mongo']['host'], cred['mongo']['port']).twitter

        # get credentials, to connect with Twitter API
        self.apps = cred["twitter_apps"]

        n_apps = len(self.apps)

        self.apps_resources = [0] * n_apps  # number of requisitions left
        self.apps_timeout = [0] * n_apps  # time where requisitions will be restored

        for app in range(n_apps):
            self.connect(app)  # connect once in all apps, to get right info

        self.current_app = 0  # app currently connected to API

        self.connect(self.current_app)  # connect to first app of the list
        self.api_waiter()
        self.buildnetwork(self.mode, self.direction)

    def connect(self, app_id=0):
        """connect to Twitter API and to database"""

        # OAuth autentication with Twitter API
        auth = tweepy.OAuthHandler(self.apps[app_id]["consumer_key"], self.apps[app_id]["consumer_secret"])
        auth.set_access_token(self.apps[app_id]["access_token"], self.apps[app_id]["access_token_secret"])

        self.api = tweepy.API(auth)
        self.current_app = app_id

        # show the resources available
        rl = self.api.rate_limit_status()
        info = rl["resources"][self.direction]["/" + self.direction + "/ids"]
        print "current app: %d" % self.current_app
        print "remaining: %d" % info["remaining"]
        print "reset time: %s" % time.ctime(info["reset"])
        self.apps_resources[app_id] = info["remaining"]
        self.apps_timeout[app_id] = info["reset"]

    def api_waiter(self, wait_anyway=True):
        """Handles with api rating limits"""

        rl = self.api.rate_limit_status()
        info = rl["resources"][self.direction]["/" + self.direction + "/ids"]
        remaining_resources = info["remaining"]
        self.apps_resources[self.current_app] = remaining_resources
        self.apps_timeout[self.current_app] = info["reset"]

        # check if there is still resources
        if remaining_resources > 0:
            if wait_anyway:
                time.sleep(3)
                return
            else:
                return  # just try again

        elif remaining_resources == 0:
            # try to change the app 1
            for app_id, remaining in enumerate(self.apps_resources):
                if remaining > 0:
                    self.connect(app_id)
                    self.api_waiter()
                    return

            # try to change the app 2
            # if there isn't any app with resources, choose the closer to reset and wait
            oldest_app = self.apps_timeout.index(min(self.apps_timeout))
            if oldest_app != self.current_app:
                self.connect(oldest_app)
                self.api_waiter()
                return

            elif oldest_app == self.current_app:
                while remaining_resources == 0:
                    time_to_reset = (
                        rl["resources"][self.direction]["/" + self.direction + "/ids"]["reset"] - int(time.time()) + 1
                    )
                    print "dormindo por %s segundos esperando o reset" % time_to_reset
                    if time_to_reset > 0:
                        time.sleep(time_to_reset)
                    else:
                        time.sleep(5)  # sleep at least 5 seconds

                    # update rate limits
                    rl = self.api.rate_limit_status()
                    remaining_resources = rl["resources"][self.direction]["/" + self.direction + "/ids"]["remaining"]
                    self.apps_resources[self.current_app] = remaining_resources
                    self.apps_timeout[self.current_app] = rl["resources"][self.direction][
                        "/" + self.direction + "/ids"
                    ]["reset"]

        return

    def buildnetwork(self, mode, direction):  # active or queue
        """Get users connections in order to build a graph of relationships
        
        Args:
            mode: 'active' - build connections of users that twitted in the past, and already are in database
                  'queue' - do a breadth-first search in the network, starting from the root users
            direction: 'followers' - get users followers (out edges)
                       'friends' - get users friends (in edges)
    
        """

        if direction == "followers":
            request = self.api.followers_ids
        elif direction == "friends":
            request = self.api.friends_ids

        while True:
            if mode == "active":  # get new users from database
                new_user = self.persister.loadUnprocessedUser(direction, origin=mode)
                print "new_user", new_user
            elif mode == "queue":  # get new users from queue
                # try first to get unprocessed users from Users database
                new_user = self.persister.loadUnprocessedUser(direction, origin=mode)
                if not new_user:
                    # try to find suitable user in the queue
                    new_user = self.persister.popQueueUser()

                    # look for users not already in database
                    while new_user and self.persister.findUser(new_user):
                        new_user = self.persister.popQueueUser()

                    if new_user:
                        # insert user in database
                        try:
                            userobject = self.api.get_user(new_user)
                            self.persister.insertUserObject(userobject, origin=2)
                        except (tweepy.TweepError), e:
                            print e
                            continue

            if new_user:
                print "building network for user %s" % new_user

                # TODO: update user info

                # get user's connections
                connections = []
                try:
                    next_cursor = -1
                    while next_cursor != 0:
                        # request = eval(request_cmd) #evaluate everytime, in case app has changed
                        ids, (_, next_cursor) = request(id=new_user, cursor=next_cursor)

                        connections += ids
                        print len(connections), "connections"

                        # wait if needed to do more requests
                        self.api_waiter()

                        # update request object, case api has changed
                        if direction == "followers":
                            request = self.api.followers_ids
                        elif direction == "friends":
                            request = self.api.friends_ids

                    # save follower list in file
                    if direction == "followers":
                        f = open("net/" + str(new_user), "w")
                    elif direction == "friends":
                        f = open("netr/" + str(new_user), "w")
                    json.dump(connections, f)
                    f.close()

                    # register in database that user was processed
                    self.persister.saveProcessedUser(new_user, direction)

                #                    if self.persister.isUserProcessed(new_user): #check if everything was collected already (friends & followers)
                #                        f = open('net/'+str(new_user))
                #                        followers = set(json.load(f))
                #                        f.close()
                #
                #                        f = open('netr/'+str(new_user))
                #                        friends = set(json.load(f))
                #                        f.close()
                #
                #                        true_friends = list(followers & friends)
                #
                #                        self.persister.pushQueueUsers(true_friends)

                except (tweepy.TweepError), e:
                    print (e)

                    if type(e.message[0]) == dict and e.message[0]["code"] == 88:
                        print "skipping user error limit"
                        time.sleep(60)
                    else:
                        self.persister.saveProcessedUser(new_user, error=True)
                    self.api_waiter()
                    continue
                    # TODO: treat specially lack of resources error

            elif new_user == None:
                print ">>>Network Builder desocupado!"
                time.sleep(10)
                break