Exemplo n.º 1
0
    def get_profiles(self, ids=None, screen_names=None):
        # TODO Deal with timeouts

        if ids:
            lookup = []
            remain = len(ids)
            for id_ in ids:
                lookup.append(id_)
                if len(lookup) >= 100:
                    # limit 100 profiles per request
                    self._stream_profiles(ids=lookup, by_id=True)
                    remain -= len(lookup)
                    l.INFO("Fetching {lookup} profiles. {remain} remain".format(
                        lookup, remain))
            l.INFO("Fetching remaining %s profile(s)" % (len(lookup)))
            self._stream_profiles(lookup, by_id=True)

        if screen_names:
            lookup = []
            remain = len(screen_names)
            for sn in screen_names:
                lookup.append(sn)
                if len(lookup) >= 100:
                    # limit 100 profiles per request
                    self._stream_profiles(lookup, by_id=False)
                    remain -= len(lookup)
                    l.INFO("Fetching {lookup} profiles. {remain} remain".format(
                        lookup, remain))
            l.INFO("Fetching remaining %s profile(s)" % (len(lookup)))
            self._stream_profiles(lookup, by_id=False)
Exemplo n.º 2
0
    def _stream_tweets_by_user_id(self, id_, **kwargs):
        # TODO rework this to use min/max tweets instead of assuming < 200
        # means done
        kwargs = dict(
            user_id=id_,
            count=200
        )

        # TODO consider breaking up/refactoring
        while True:
            try:
                l.INFO("Fetching 200 tweets %s" % (kwargs))
                tweets = self.api.GetUserTimeline(**kwargs)

            except Exception as e:
                l.WARN("%s kwargs %s" % (e, kwargs))
                return None

            l.INFO("Streaming tweets")
            for tweet in tweets:
                self.on_tweet(tweet)

            if len(tweets) < 200:
                # TODO Fix - Using <200 as proxy for end of user timeline
                l.INFO("Stream ended < 200 tweets")
                break

            tweet_ids = [tweet.id for tweet in tweets]
            if len(tweet_ids) > 0:
                # Next request start at oldest tweet in current request
                l.INFO("Setting max ID: {}".format(min(tweet_ids)))
                kwargs['max_id'] = min(tweet_ids)
Exemplo n.º 3
0
 def get_tweets(self, ids=None, screen_names=None, limit=3200):
     if ids:
         for id_ in ids:
             l.INFO("Gathering tweets for user ID {}".format(id_))
             self._stream_tweets(id_=id_, limit=limit)
     if screen_names:
         for screen_name in screen_names:
             l.INFO("Gathering tweets for user {}".format(screen_name))
             self._stream_tweets(screen_name=screen_name, limit=limit)
Exemplo n.º 4
0
    def _stream_friends_by_screen_name(self, screen_name, request_limit=3):
        kwargs = dict(
            screen_name=screen_name,
            cursor=-1,
            total_count=request_limit * 5000
        )

        l.INFO("Getting friends %s" % (kwargs))
        friends = self.api.GetFriendIDs(**kwargs)
        l.INFO("Streaming connections %s friends found" % (len(friends)))
        for friend in friends:
            self.on_connection(user_id, friend, type_=friend)
        return friends
Exemplo n.º 5
0
    def _stream_followers_by_id(self, user_id, request_limit):
        kwargs = dict(
            user_id=user_id,
            cursor=-1,
            total_count=request_limit * 5000
        )

        l.INFO("Getting friends %s" % (kwargs))
        followers = self.api.GetFollowerIDs(**kwargs)
        l.INFO("Streaming connections %s followers found" % (len(followers)))
        for follower in followers:
            self.on_connection(user_id, follower, type_=follower)
        return followers
Exemplo n.º 6
0
 def _fetch_new(self, active_threads):
     """
     Adds new threads to thread cache
     :param active_threads: list of currently active threads
     :return: int number threads added
     """
     active_thread_ids = [t.id for t in self.thread_cache]
     processed = 0
     for thread in active_threads:
         if thread not in active_thread_ids:
             self.thread_cache.append(self._fetch_one(thread))
             l.INFO("{} added to thread cache".format(thread))
             processed += 1
     l.INFO("Processed {} new threads".format(processed))
     return processed
Exemplo n.º 7
0
    def _poll_thread(self, thread):
        """Polls 4chan thread for updates or archival"""
        time.sleep(self.sleep_per_request)
        update = thread.update()

        if update:
            l.INFO("{} has {} new updates".format(thread.id, update))
            self.on_update(thread)
        else:
            l.INFO("{} no updates".format(thread.id))

        if thread.archived:
            self.thread_cache.remove(thread)
            self.on_archive(thread)

            l.INFO("{} has been archived".format(thread.id))
Exemplo n.º 8
0
    def get_tweets(self, ids=None, screen_names=None, limit=3200):
        if ids is None:
            ids = []

        if not screen_names is None:
            ids += self._screen_names_to_ids(screen_names)

        l.INFO("Getting tweets for ids: %s" % (ids))
        for id_ in ids:
            self._stream_tweets_by_user_id(id_, limit=limit)
Exemplo n.º 9
0
 def _fetch_one(self, thread_id):
     """
     Get a new thread
     :param thread_id: int single id of thread to initiate
     :return: thread object
     """
     time.sleep(self.sleep_per_request)
     thread = self.board.get_thread(thread_id)
     l.INFO("Fetching thread ID {}".format(thread_id))
     return thread
Exemplo n.º 10
0
    def _stream_followers(self, user_id=None, screen_name=None, request_limit=3):
        kwargs = dict(
            cursor=-1,
            total_count=request_limit * 5000
        )
        if user_id:
            kwargs['user_id'] = user_id
        if screen_name:
            kwargs['screen_name'] = screen_name

            # User ID needed for connection object
            user_id = self._fetch_profile_by_screen_name(screen_name=[screen_name])[0].id

        l.INFO("Getting followers %s" % (kwargs))

        followers = self.api.GetFollowerIDs(**kwargs)
        l.INFO("Streaming connections %s followers found" % (len(followers)))
        for follower in followers:
            self.on_connection(user_id, follower, type_='follower')
Exemplo n.º 11
0
def cli(ctx, config):
    s3_config = {}

    try:
        with open(config, 'r') as config_file:
            s3_config = json.load(config_file)
            l.INFO("Using custom CSV configuration: %s" % (s3_config))
    except TypeError:
        l.WARN("Unable to parse s3 config")

    input_ = click.get_text_stream('stdin')
    convert(input_, configuration=s3_config)
Exemplo n.º 12
0
    def get_profiles(self, ids=None, stream=True):
        # TODO profiles by screen_name
        # TODO Deal with timeouts

        lookup = []
        remain = len(ids)
        profiles = []
        for id_ in ids:
            lookup.append(id_)
            if len(lookup) >= 100:
                # limit 100 profiles per request
                chunk = self._fetch_users_by_id(ids=lookup, stream=stream)
                remain -= len(lookup)
                l.INFO("""
                       Fetching {lookup} profiles. {remain} remain
                       """.format(lookup, remain))
                profiles += chunk
        profiles += self._fetch_users_by_id(ids=lookup, stream=stream)
        l.INFO("Fetching remaining %s profile(s)" % (len(lookup)))

        return profiles
Exemplo n.º 13
0
def cli(ctx, config):
    csv_config = {}

    try:
        with open(config, 'r') as config_file:
            csv_config = json.load(config_file)
            l.INFO("Using custom CSV configuration: %s" % (csv_config))
    except TypeError:
        l.WARN("Using default CSV configuration: %s" % (CSV_DEFAULT_CONFIG))

    input_ = click.get_text_stream('stdin')
    convert(input_, configuration=csv_config)
Exemplo n.º 14
0
    def follow(self):
        """
        Build a thread cache of active threads. Loop over threads until they are archived
        Ends on first loop after stop_timer limit is hit. If stop_timer=false follow
        runs indefinitely
        """
        self.start = datetime.datetime.utcnow()
        self.thread_cache = self.board.get_all_threads()
        l.INFO("Thread cache initialized {} active threads".format(
            len(self.thread_cache)))
        l.INFO("Running for {} minutes".format(self.stop_timer))
        active_threads = self.board.get_all_thread_ids()

        while not self._time_expired() and self.stop_timer:
            self.loop_start = datetime.datetime.utcnow()
            self._fetch_new(active_threads)
            self.update()
            l.INFO("Thread cache loop complete time elapsed: {}".format(
                datetime.datetime.utcnow() - self.loop_start))

            time.sleep(self.sleep_per_loop)
            l.INFO("Sleeping {} seconds before restart".format(
                self.sleep_per_loop))

        end = datetime.datetime.utcnow()
        elapsed = end - self.start
        l.INFO("Stopping /{} collection".format(self.board.name.upper()))
        l.INFO("Time Elapsed {}".format(elapsed))

        return
Exemplo n.º 15
0
    def _stream_tweets(self, user_id=None, screen_name=None, limit=3200):
        # TODO rework this to use min/max tweets instead of assuming < 200
        # means done
        kwargs = dict(
            count=200
        )
        tweets_gathered = 0

        while True:
            try:
                l.INFO("Fetching 200 tweets %s" % (kwargs))
                tweets = self.api.GetUserTimeline(**kwargs)
                tweets_gathered += len(tweets)

            except Exception as e:
                l.WARN("%s kwargs %s" % (e, kwargs))
                return None

            l.INFO("Streaming tweets")
            for tweet in tweets:
                self.on_tweet(tweet)

            if tweets_gathered >= limit:
                l.INFO("Per user limit hit {} tweets gathered".format(limit))
                break

            if len(tweets) < 200:
                # TODO Fix - Using <200 as proxy for end of user timeline
                l.INFO("Stream ended < 200 tweets")
                break

            tweet_ids = [tweet.id for tweet in tweets]
            if len(tweet_ids) > 0:
                # Next request start at oldest tweet in current request
                l.INFO("Setting max ID: {}".format(min(tweet_ids)))
                kwargs['max_id'] = min(tweet_ids)
Exemplo n.º 16
0
def cli(ctx, users, from_file, from_pipe):
    collector = GetFriendsLogger()
    screen_names = []

    if not users is None:
        screen_names = users.split(',')

    if not from_file is None:
        reader = csv.reader(from_file)
        for row in reader:
            screen_names.append(row[0])

    if from_pipe:
        try:
            stdin_text = (
                click.get_text_stream('stdin').read().strip()).split('\n')
            for line in stdin_text:
                screen_names.append(line)
        except Exception as e:
            raise RuntimeError("Error while reading pipe: %s" % (e))

    l.INFO("Getting user relationship for users: %s" % (screen_names))
    collector.get_friends(screen_names=screen_names)
Exemplo n.º 17
0
 def on_profile(self, profile):
     l.INFO("PROFILE: %s" % (profile))
Exemplo n.º 18
0
 def on_tweet(self, tweet):
     l.INFO(tweet.text.encode('utf-8'))
Exemplo n.º 19
0
def main(**kwargs):
    l.INFO("Starting SMTK")
Exemplo n.º 20
0
 def on_connection(self, account, connection, type_):
     """Called when connection is found"""
     l.INFO("{} found {} with {}".format(type_, account, connection))
Exemplo n.º 21
0
 def on_profile(self, profile):
     """Called when profile is found"""
     l.INFO(profile)
Exemplo n.º 22
0
 def on_tweet(self, tweet):
     """ Called when tweet is found"""
     l.INFO("TWEET FOUND: {}".format(tweet.text))
Exemplo n.º 23
0
def google():
    l.INFO("Google Command Detected")
Exemplo n.º 24
0
 def on_tweet(self, tweet):
     l.INFO("TWEET: %s" % (tweet.text.encode('utf-8')))
Exemplo n.º 25
0
def target():
    l.INFO("Target Command Detected")
Exemplo n.º 26
0
def twitter():
    l.INFO("Twitter Command Detected")
Exemplo n.º 27
0
 def update(self):
     """Cycle through thread_cache polling for updates"""
     for thread in self.thread_cache:
         self._poll_thread(thread)
     l.INFO("Active threads {}".format(len(self.thread_cache)))
Exemplo n.º 28
0
 def on_start(self):
     l.INFO("""
            Starting GoogleImageCrawler for keyword: %s
            """ % (self.keyword))
     singer.write_schema(self.stream_name, self.schema, ['image', 'link'])