Пример #1
    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        data = json.loads(HTMLParser().unescape(raw_data))

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False
        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False
            logging.error("Unknown message type: " + str(raw_data))
 def process(self, tweet):
     status = Status.parse(api, json.loads(tweet))
     for lf in UNICODE_LINES:
         text = status.text.replace(lf, ' ')
     print "@%s (%s, %s, %s, %s): %s"%(status.user.screen_name, 
         status.user.lang, status.user.statuses_count, status.user.friends_count, 
         status.user.followers_count, text)
Пример #3
    def on_data(self, data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.

        if '{"delete"' in data:
                delete = json.loads(data)['delete']['status']
                if self.on_delete(delete['id'], delete['user_id']) is False:
                    return False
                delete = json.loads(data)['delete']['direct_message']
                if self.on_direct_message_delete(delete['id'], delete['user_id']) is False:
                    return False
        elif '{"direct_message"' in data:
            message = DirectMessage.parse(self.api, json.loads(data)['direct_message'])
            if self.on_direct_message(message) is False:
                return False
        elif '{"target"' in data:
            event = json.loads(data)
            if self.on_event(event) is False:
                return False
        elif '{"limit"' in data:
            if self.on_limit(json.loads(data)['limit']['track']) is False:
                return False
        elif '"in_reply_to_user_id_str"' in data:
            status = Status.parse(self.api, json.loads(data))
            if self.on_status(status) is False:
                return False
Пример #4
def test_end_to_end(filename, connections, expected, tmpdir):
    api = MockAPI(connections=connections)

    with open(filename, 'r') as f:
        status = Status.parse(api, json.load(fp=f))

    l = LessListener(api=api, post_replies=True, gather='tweets', state_dir=str(tmpdir))

    # 100% festivity for all of December
    l.december_greetings = ('It is cold outside.',)
    l.festive_probability = 1.
    assert l.get_festive_probability(dt.date(2016, 12, 5)) == 1.


    # Never reply to the same toot twice

    # Rate-limit replies for same word
    setattr(status, 'id', status.id + 1)

    if expected is None:
        assert api._updates == []
        assert len(api._updates) == 1
        u = api._updates[0]
        assert u['status'] == expected

    for k, before in connections.items():
        after = api._connections[k]
        assert ('following' in after) == ('followed_by' in before), \
            (k, before, after)
Пример #5
 def on_data(self, data):
     if time.time() >= self.started + self.duration:
         stats = open('{0}-sample.stats'.format(int(self.started)), 'w+')
         stats.write("================= STATISTICS =================" + "\n")
         stats.write("Start time: " + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.started)) + "\n")
         stats.write("End time: " + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + "\n")
         stats.write("First Tweet ID: " + self.first_tweet_id + "\n")
         stats.write("Last Tweet ID: " + self.last_tweet_id + "\n")
         stats.write("Language: " + self.lang + "\n")
         stats.write("Language classification threshold: " + str(self.lang_threshold) + "\n")
         stats.write("Above threshold: " + str(self.counter[self.lang + '-above']) + "\n")
         stats.write("Below threshold: " + str(self.counter[self.lang + '-below']) + "\n")
         stats.write("Exluded: " + str(self.counter['excluded']) + "\n")
         return False
     elif 'in_reply_to_status_id' in data: 
         status = Status.parse(self.api, json.loads(data))
         langclass = langid.classify(status.text)
         if (self.counter == {self.lang + '-above':0, self.lang + '-below':0, 'excluded':0}):
             self.first_tweet_id = str(status.id)
         self.last_tweet_id = str(status.id)
         if (langclass[0] == self.lang):                
             if langclass[1] >= self.lang_threshold:
                 self.counter[self.lang + '-above'] += 1
                 self.counter[self.lang + '-below'] += 1
             self.counter['excluded'] += 1
         return True
Пример #6
 def on_data(self, data):
     if "entities" in data:
         data = json.loads(data)
         user_mentions = data["entities"]["user_mentions"]
         screen_names = [mention["screen_name"] for mention in user_mentions]
         if "testeMagazine" in screen_names:
             status = Tweet.parse(self.api, data)
Пример #7
  def on_data(self, raw_data):
    """Called when raw data is received from connection.

    This is where all the data comes first. Normally we could use (inherit)
    the on_data() in tweepy.StreamListener, but it unnecessarily and naively
    reports unknown event types as errors (to simple log); also, we might want
    to tweak it further later on.

    But for now, this is basically taken from tweepy's on_data().

    Return False to stop stream and close connection.

    self.processing_data = True

    data = json.loads(raw_data)

    if 'in_reply_to_status_id' in data:
      status = Status.parse(self.api, data)
      if self.on_status(status) is False:
        return False
    elif 'delete' in data:
      delete = data['delete']['status']
      if self.on_delete(delete['id'], delete['user_id']) is False:
        return False
    elif 'event' in data:
      status = Status.parse(self.api, data)
      if self.on_event(status) is False:
        return False
    elif 'direct_message' in data:
      status = Status.parse(self.api, data)
      if self.on_direct_message(status) is False:
        return False
    elif 'limit' in data:
      if self.on_limit(data['limit']['track']) is False:
        return False
    elif 'disconnect' in data:
      if self.on_disconnect(data['disconnect']) is False:
        return False
      log.debug('TwitterBotStreamListener::on_data(): got event/stream data of'
          ' unknown type. Raw data follows:\n%s', data)

    self.processing_data = False
Пример #8
def test_sanitize(filename, expected):
    api = NonCallableMock()

    with open(os.path.join('tests', filename), 'r') as f:
        status = Status.parse(api, json.load(f))

    text = get_sanitized_text(status)
    assert '&' not in text
    assert 'http' not in text
    assert text == expected
Пример #9
 def _read_from_table(self):
     self.running = True
     conn = StatusSource.engine.connect()
     meta = MetaData()
     table = Table(self.table_name, meta, autoload=True, autoload_with=StatusSource.engine)
     cmd = select([table])
     results = conn.execute(cmd)
     for result in results:
         status = Status.parse(None, result)
         if self.running == False:
Пример #10
    def on_data(self, raw_data):
        # called on recieval of raw data
        data = json.loads(raw_data)

        # start of if tree
        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
Пример #11
def test_save_tweet(tmpdir, id_, expected_filename):
    api = MockAPI(connections={})
    foo = tmpdir.join('foo')

    l = LessListener(api=api, gather=str(foo), state_dir=str(tmpdir))
    s = Status.parse(api=api, json={
        'id': int(id_),
        'id_str': id_,

    j = tmpdir.join('foo', expected_filename)
    assert j.check()
Пример #12
    def test_patched_status(self):
        """@todo: Docstring for test_patched_status.
        :returns: @todo

        from tweepy.models import Status
        from crawler.tweepy_patch import patch
        s = Status.parse('test_api', {'a': 1, 'b': 2})
        # pylint: disable=E1101,W0212
        self.assertEqual(s._raw, '{"a": 1, "b": 2}')
        self.assertEqual(s.a, 1)
        self.assertEqual(s.b, 2)
Пример #13
    def on_data(self, raw_data):
        data = json.loads(raw_data)
        if self.verbose:
            print data
            print '-'*60

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'friends' in data:
            pass # ignore
        elif 'delete' in data:
            pass # ignore
        elif 'user_suspend' in data:
            pass # ignore
            logging.error("Unknown message type: " + str(raw_data))
Пример #14
 def on_data(self, data):
     if 'in_reply_to_status_id' in data:
         status = Status.parse(self.api, json.loads(data))
         if self.on_status(status, data) is False:
             return False
     elif 'delete' in data:
         delete = json.loads(data)['delete']['status']
         if self.on_delete(delete['id'], delete['user_id']) is False:
              return False
     elif 'limit' in data:
         if self.on_limit(json.loads(data)['limit']['track']) is False:
             return False
Пример #15
    def save_status(self, data):
        status = Status.parse(self.api, json.loads(data))

        if not status.geo:
            # _datafile.write(data+'\n')

        if Author.objects.filter(owner__userprofile__twitter_id=status.user.id_str).exists():
            # this tweet's author is on stargazer

            author = Author.objects.filter(source=Author.T_TWITTER, external_id=status.user.id_str).get()
        except Author.DoesNotExist:
            author = Author(

            post = Post.objects.filter(source=Post.T_TWITTER, external_id=status.id_str).get()
        except Post.DoesNotExist:
            lat = float(status.geo["coordinates"][0])
            lng = float(status.geo["coordinates"][1])

                addr = self._latlng2addr.get(lat, lng)
            except (LatLng2Addr.ConnectionFailed, LatLng2Addr.GeocodingFailed) as e:
                addr = ""

            # twitter api response in UTC
            created = status.created_at + timedelta(hours=8)

            post = Post(

Пример #16
    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        data = json.loads(raw_data)

        if "in_reply_to_status_id" in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif "delete" in data:
            delete = data["delete"]["status"]
            if self.on_delete(delete["id"], delete["user_id"]) is False:
                return False
        elif "event" in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif "direct_message" in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
        elif "friends" in data:
            if self.on_friends(data["friends"]) is False:
                return False
        elif "limit" in data:
            if self.on_limit(data["limit"]["track"]) is False:
                return False
        elif "disconnect" in data:
            if self.on_disconnect(data["disconnect"]) is False:
                return False
        elif "warning" in data:
            if self.on_warning(data["warning"]) is False:
                return False
            logging.error("Unknown message type: " + str(raw_data))
 def get_place(status: Status) -> dict:
     place: dict = {
         "coordinates": [],
         "country": "N/A",
         "country_code": "N/A",
         "full_name": "N/A"
         if status.__getattribute__("place") is not None:
             status_place: dict = status.__getattribute__("place").__dict__
             coordinates: list = status_place.get(
                 "bounding_box").__dict__.get("coordinates", [])
             country: str = status_place.get("country", "N/A")
             country_code: str = status_place.get("country_code", "N/A")
             full_name: str = status_place.get("full_name", "N/A")
             place: dict = {
                 "coordinates": coordinates,
                 "country": country,
                 "country_code": country_code,
                 "full_name": full_name
     except Exception as e:
     return place
Пример #18
    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        from tweepy.models import Status
        data = json.loads(raw_data)

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
        elif 'friends' in data:
            if self.on_friends(data['friends']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False
        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False
            self.bot._log("Unknown message type: " + str(raw_data))
Пример #19
    def on_data(self, data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, json.loads(data))
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = json.loads(data)['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(json.loads(data)['limit']['track']) is False:
                return False
Пример #20
 def __init__(self, tweetDict):
     self.tweet = Status.parse(API(), tweetDict["tweet"])
         self.keywords = tweetDict["keywords"]
     except KeyError:
         self.groups = tweetDict["groups"]
     except KeyError:
     self.tokens = []
     self.filt_tokens = []
     for token in tweetDict["tokens"]:
         t = Token(token)
         if not t.filter_token():
Пример #21
    def on_data(self, data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.

        if "in_reply_to_status_id" in data:
            status = Status.parse(self.api, json.loads(data))
            if self.on_status(status) is False:
                return False
        elif "delete" in data:
            delete = json.loads(data)["delete"]["status"]
            if self.on_delete(delete["id"], delete["user_id"]) is False:
                return False
        elif "limit" in data:
            if self.on_limit(json.loads(data)["limit"]["track"]) is False:
                return False
Пример #22
    def on_data(self, data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, json.loads(data))
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = json.loads(data)['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(json.loads(data)['limit']['track']) is False:
                return False
Пример #23
    def on_data(self, data):
        if time.time() >= self.started + self.duration:
            stats = open('{0}-sample.stats'.format(int(self.started)), 'w+')
            stats.write("================= STATISTICS =================" +
            stats.write("Start time: " + time.strftime(
                '%Y-%m-%d %H:%M:%S', time.localtime(self.started)) + "\n")
            stats.write("End time: " + time.strftime(
                '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + "\n")
            stats.write("First Tweet ID: " + self.first_tweet_id + "\n")
            stats.write("Last Tweet ID: " + self.last_tweet_id + "\n")
            stats.write("Language: " + self.lang + "\n")
            stats.write("Language classification threshold: " +
                        str(self.lang_threshold) + "\n")
            stats.write("Above threshold: " +
                        str(self.counter[self.lang + '-above']) + "\n")
            stats.write("Below threshold: " +
                        str(self.counter[self.lang + '-below']) + "\n")
            stats.write("Exluded: " + str(self.counter['excluded']) + "\n")
            return False
        elif 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, json.loads(data))
            langclass = langid.classify(status.text)

            if (self.counter == {
                    self.lang + '-above': 0,
                    self.lang + '-below': 0,
                    'excluded': 0
                self.first_tweet_id = str(status.id)
            self.last_tweet_id = str(status.id)

            if (langclass[0] == self.lang):
                if langclass[1] >= self.lang_threshold:
                    self.counter[self.lang + '-above'] += 1
                    self.counter[self.lang + '-below'] += 1
                self.counter['excluded'] += 1

            return True
Пример #24
 def process(self, tweet):
     status = Status.parse(api, json.loads(tweet))
     out = {"screen_name": status.user.screen_name, 
         "id": status.id,
         "lang": status.user.lang, 
         "statuses_count": status.user.statuses_count, 
         "friend_count": status.user.friends_count, 
         "profile_image_url": status.user.profile_image_url,
         "text": status.text.encode('utf8'),
         "entities": status.entities,
         "created_at": status.created_at.strftime("%Y-%m-%d %H:%M:%S"),
     now = time.strftime(self.fmt)
     if now != self.time:
         self.time = str(now)
         self.fid = gzip.open(os.path.join(self.path, self.base + '-' + self.time + '.txt.gz'), 'ab')
     self.fid.write(json.dumps(out) + '\n')
Пример #25
    def test_sending_images(self):
        # ensure there is an image as the mock object will not do anything
        shutil.copy('./image.jpg', '/tmp/image.jpg')
        client = boto3.client('s3')
        client.download_file = MagicMock(return_value=None)

        auth = tweepy.OAuthHandler('foo', 'bar')
        api = tweepy.API(auth)
        api.update_with_media = MagicMock(return_value=Status())

        tweet_images = TweetS3Images(api, client)
        tweet_images.send_image('test_bucket', 'image.jpg', cleanup=True)

        client.download_file.assert_called_with('test_bucket', 'image.jpg',
            status='New image image.jpg brought to you by lambda-tweet',
                         'The image was not cleaned up correctly.')
Пример #26
    def _get_status(self, data):

        status = Status.parse(self.api, self.json.loads(data))

        if status.user.screen_name in self.block_users:
            raise TweepError(">> User ignored: @%s" % status.user.screen_name)
            status = status.retweeted_status
        except AttributeError as atr:
            if not self.original:
                text = self._proccess_status(status.text)
                trunc_text = (text[:72] + '...') if len(text) > 75 else text
                raise TweepError(">> Original tweet ignored: %s" % trunc_text)

        if status.is_quote_status:
            if self.quoted:
                status = status.quoted_status
                text = self._proccess_status(status.text)
                raise TweepError(">> Quoted tweet ignored: %s" % text)

        return status
Пример #27
    async def on_data(self, raw_data):

        This is called when raw data is received from the stream.
        This method handles sending the data to other methods, depending on the
        message type.

        raw_data : JSON
            The raw data from the stream

        data = json.loads(raw_data)

        if "in_reply_to_status_id" in data:
            status = Status.parse(None, data)
            return await self.on_status(status)
        if "delete" in data:
            delete = data["delete"]["status"]
            return await self.on_delete(delete["id"], delete["user_id"])
        if "disconnect" in data:
            return await self.on_disconnect_message(data["disconnect"])
        if "limit" in data:
            return await self.on_limit(data["limit"]["track"])
        if "scrub_geo" in data:
            return await self.on_scrub_geo(data["scrub_geo"])
        if "status_withheld" in data:
            return await self.on_status_withheld(data["status_withheld"])
        if "user_withheld" in data:
            return await self.on_user_withheld(data["user_withheld"])
        if "warning" in data:
            return await self.on_warning(data["warning"])

        log.warning("Received unknown message type: %s", raw_data)
Пример #28
  def on_data(self, data):
    """Called when raw data is received from connection.

    Override this method if you wish to manually handle
    the stream data. Return False to stop stream and close connection.

    if 'in_reply_to_status_id' in data:
      status = Status.parse(self.api, json.loads(data))
      return self.on_status(status)
    elif 'delete' in data:
      delete = json.loads(data)['delete']['status']
      if self.on_delete(delete['id'], delete['user_id']) is False:
        return False
    elif 'limit' in data:
        if self.on_limit(json.loads(data)['limit']['track']) is False:
          return False
    elif 'sender_id' in data and 'recipient_id' in data:
      dm = DirectMessage.parse(self.api, json.loads(data))
      return self.on_dm(dm)
    elif 'event' in data and 'follow' in data:
      content = json.loads(data)
      if 'event' in content and content['event'] == 'follow':
        return self.on_follow(content)
Пример #29
 hashtag = 0
 url = 0
 question = 0
 exclamation = 0
 pos_term = 0
 neg_term = 0
 pos_emoticon = 0
 neg_emoticon = 0
 reply = 0
 moment_morning = 0
 moment_afternoon = 0
 moment_evening = 0
 moment_night = 0
 retweeted = 0
 status = Status.parse(api, json.loads(tweet[0]))
 if status.id in error_list_tweet_ids:
     tweets_discarded_error += 1
 elif status.text.startswith("RT @"):
     tweets_discarded_retweet += 1     
     tweets_considered += 1
     if regex_username.search(status.text) != None:
         tweets_username += 1
         username = 1
     if regex_hashtag.search(status.text) != None:
         tweets_hashtag += 1
         hashtag = 1
     if regex_url.search(status.text) != None:
         tweets_url += 1
Пример #30
def make_mock_statuses(json_text):
    tweet_array = json.loads(json_text)
    statuses = Status.parse_list(None, tweet_array)
    return statuses
Пример #31
    def on_data(self, data):
        full_text = ""

        data2 = json.loads(data)

        if 'extended_tweet' in data2:
            if ('full_text' in data2["extended_tweet"]):

                full_text = bytes(
                full_text = full_text.decode('utf-8')
                    'FUL TEXT *******************************************************************************'

            #print(self.find_between( data, '"extended_tweet":{"full_text":"','",'))
        if ("retweeted_status" in data2):
            if ('full_text' in data2["retweeted_status"]):
                full_text = bytes(
                full_text = full_text.decode('utf-8')
                    'FUL TEXT *******************************************************************************'

        data = json.loads(data)

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status, full_text) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
        elif 'friends' in data:
            if self.on_friends(data['friends']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False
        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False
        elif 'warning' in data:
            if self.on_warning(data['warning']) is False:
                return False
            logging.error("Unknown message type: " + str(raw_data))
Пример #32
 def on_data(self, data):
     Generic class for site streams that just print each
     action that comes in - override these methods to actually
     process them
     if 'for_user' in data:
         parsed_data = json.loads(data)
         user_id = parsed_data['for_user']
         if 'message' in data:
             message = parsed_data['message']
             if u'friends' in message:
                 if self.on_friends(user_id, message['friends']) is False:
                     return False
             elif u'event' in message:
                 if message[u'event'] == u'follow':
                     if self.on_follow(
                     ) is False:
                         return False
                 elif message[u'event'] == u'unfollow':
                     if self.on_unfollow(
                     ) is False:
                         return False
                 elif message[u'event'] == u'favorite':
                     if self.on_favorite(
                     ) is False:
                         return False
                 elif message[u'event'] == u'unfavorite':
                     if self.on_unfavorite(
                     ) is False:
                         return False
             # Need this second check - could be a retweet of
             # a tweet mentioning the user of interest
             elif (u'retweeted_status' in message and
                 int(message[u'retweeted_status'][u'user'][u'id']) ==
                 if self.on_retweet(user_id, message) is False:
                     return False
             elif u'text' in message:
                 status = Status.parse(self.api, message)
                 # tweet from the user of interest
                 if status.author.id == user_id:
                     if self.on_user_status(user_id, status) is False:
                         return False
                 else:   # tweet mentioning the user of interest
                     if self.on_user_mention(user_id, status) is False:
                         return False
             elif u'direct_message' in message:
                 if self.on_direct_message(
                     user_id, message[u'direct_message']
                 ) is False:
                     return False
                 print parsed_data
Пример #33
 def __init__(self, status: Status):
     self.created_at: str = preprocess_date(
     self.id: int = status.__getattribute__("id")
     self.hashtags: list = status.__getattribute__("entities").get(
         "hashtags", [])
     self.user_mentions: list = status.__getattribute__("entities").get(
         "user_mentions", [])
     # self.urls: list = status.__getattribute__("entities").get("urls", [])
     # self.media: list = status.__getattribute__("entities").get("media", [])
     self.text: str = self.get_text(status=status)
     self.retweet_count: int = status.__getattribute__("retweet_count")
     self.retweeted: bool = status.__getattribute__("retweeted")
     self.user_id: int = status.__getattribute__("user").__getattribute__(
     self.profile_image_url: str = status.__getattribute__(
     self.screen_name: str = status.__getattribute__(
     self.possibly_sensitive: bool = status.__getattribute__("possibly_sensitive") if \
         hasattr(status, 'possibly_sensitive') else False
     self.favorite_count: int = status.__getattribute__("favorite_count")
     self.favorited: bool = status.__getattribute__("favorited")
     self.lang: str = detect(self.text)
     self.sentiment_analysis: dict = {}
     self.source: str = status.__getattribute__("source")
     self.geolocation: str = self.get_geolocation(status=status)
     self.place: dict = self.get_place(status=status)
     self.url: str = f"https://twitter.com/user/status/{status.__getattribute__('id')}"
     self.uuid: str = self.get_128_uuid(data_str=str(self.id))
     self.user_uuid: str = self.get_128_uuid(data_str=str(self.user_id))
Пример #34
 def load_status():
     with open('./tests/cassettes/sample-tweet.json') as infile:
         status = Status.parse(api=None, json=load(infile))
         return status
Пример #35
	def update_tweets(self):
		print "Updating tweets"

		statuses = []
			while True:
				item = self.incoming.pop() # It's gonna throw up someday!
				if "in_reply_to_status_id" in item:
					statuses.append(Status.parse(self.stream.api, json.loads(item)))
				# Ignore anything other than status updates for now
				#	statuses.append(json.loads(item))
		except IndexError:
		broadcast = {}
		broadcast['general'] = {}
		broadcast['channels'] = {}
		for s in statuses:
			tags = re.findall("#([\w]+)(?iu)", s.text) # Case-insensitive, Unicode matching
			print "Tags: "
			print tags
			self.db.execute("INSERT INTO tweets (id, user_id, screen_name, profile_image_url, created_at, text) VALUES (%s,%s,%s,%s,%s,%s)", s.id, s.user.id, s.user.screen_name, s.user.profile_image_url, s.created_at, s.text)

			# Establish HABTM relationships, tweets with tags
			for t in tags:
				t = t.lower() # Force all to lowercase
				print "Inserting tag: %s" % t
				self.db.execute('''INSERT INTO hashtags (tag) VALUES (%s) ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id), tag=%s; 
					INSERT INTO hashtags_tweets (hash_id, tweet_id) VALUES (LAST_INSERT_ID(), %s)''', t, t, s.id)
				# Count the votes while we're at it
				if t in campboard['sessions']:
					# Attach the tweet to the broadcast channel
					if not broadcast['channels'].has_key(t):
						broadcast['channels'][t] = {}
					broadcast['channels'][t]['recent_tweets'] = []
							'text': s.text, 'created_at': unicode(s.created_at), 'id': s.id,
							'user': {
								'id': s.user.id,
								'screen_name': s.user.screen_name,
								'profile_image_url': s.user.profile_image_url
					vote_type = None
					if re.search('\+1', s.text):
						#vote_type = "positive"
						self.db.execute("INSERT INTO session_votes (`session`, positive) VALUES (%s, 1) ON DUPLICATE KEY UPDATE positive=positive+1", t)
					elif re.search('\-1', s.text):
						#vote_type = "negative"
						self.db.execute("INSERT INTO session_votes (`session`, negative) VALUES (%s, 1) ON DUPLICATE KEY UPDATE negative=negative+1", t)

		broadcast['general']['recent_tweets'] = [
				'text': s.text, 'created_at': unicode(s.created_at), 'id': s.id,
				'user': {
					'id': s.user.id,
					'screen_name': s.user.screen_name,
					'profile_image_url': s.user.profile_image_url
			for s in statuses
		return broadcast
Пример #36
def bulk_load(listkey, tweets):
    with open("/home/marcua/data/tweets/%s" % (listkey), "w") as tmpfile:
        print "file %s" % (tmpfile.name)
        for jsontweet in tweets:
            tweet = Status.parse(api, json.loads(jsontweet))
            tmpfile.write(convert_to_utf8_str(tweet.text) + "\n")
import unittest
import logging
import sys
from tweepy.models import Status
from TwitterWatcher.tweet_tracker import TweetTracker
from tests.database.mock_database import MockDatabase

dummy_status = Status()
dummy_status._json = {
		'id': 1,
		'id_str': '1',
		'text': 'test',
		'user': {
				'screen_name': 'test_user'

dummy_reply = Status()
dummy_reply._json = {
		'id': 2,
		'id_str': '2',
		'text': 'test reply',
		'user': {
				'screen_name': 'test_reply_user'
		'in_reply_to_status_id': 1

class TwitterWatcherDatabaseTests(unittest.TestCase):
		def setUp(self):
    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        self.count += 1
        data = json.loads(raw_data)
        if self.count >50000:
            self.count = 0
            ts = time.strftime("./data/%Y%m%d%H%M")
            self.statusf = open(ts+'_status.csv','w',newline='')
            self.statusw = csv.writer(self.statusf)
            self.statusw.writerow(['id', 'created_at', 'coordinates',\
                               'hashtags', 'user_mentions', 'symbols', 'urls', \
                               'media', \
                               'in_reply_to_screen_name', \
                               'in_reply_to_user_id_str', \
                               'in_reply_to_status_id_str', \
                               'place', 'retweeted_status_id', 'source', \
                               'text', 'user id' \
                               # some other attributes exsits, they are list below
                               #, status.withheld_copyright, \#optional
                               #status.withheld_in_countries, \#optional
                               #status.withheld_scope, \#optional
                               #status.truncated, \#default False
                               #status.retweeted, status.retweet_count, \#for no rt
                               #status.scopes, possibly_sensitive, \
                               #status.lang, status.fiter_level, \lang=en
                               #status.favorited, status.favorite_count, \
                               #status.current_user_retweet, \
                               #status.contributors, status.annotations \
            self.userf = open(ts+'_user.csv','w',newline='')
            self.userw = csv.writer(self.userf)
            self.userw.writerow(['created_at', 'default_profile', \
                             #user.default_profile_image, \
                             'description', \
                             #user.entities, \
                             'favourites_count', \
                             #user.follow_request_sent, user.following,\#relate to given user
                             'followers_count', 'friends_count', \
                             'geo_enabled', 'id_str', 'is_translator', \
                             'lang', 'listed_count', 'location', \
                             #user.notifications, \
                             'name', \
                             #user.profile_background_color, user.profile_background_image_url, \
                             #user.profile_background_image_url_https, user.profile_background_tile, \
                             #user.profile_banner_url, user.profile_image_url, \
                             #user.profile_image_url_https, user.profile_link_color, \
                             #user.profile_sidebar_border_color, user.profile_sidebar_fill_color, \
                             #user.profile_text_color, user.profile_use_background_image, \
                             'protected', 'screen_name', \
                             #user.show_all_inline_media, user.status, \
                             'statuses_count', 'time_zone', 'user.url', \
                             #user.utc_offset, \
                             #user.withheld_in_countries, user.withheld_scope, 
            self.deletef = open(ts+'_delete.csv','w',newline='')
            self.deletew = csv.writer(self.deletef)

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False
        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False
        elif 'warning' in data:
            if self.on_warning(data['warning']) is False:
                return False
            logging.error("Unknown message type: " + str(raw_data))
            return False
        return True
Пример #39
    hashtag = 0
    url = 0
    question = 0
    exclamation = 0
    pos_term = 0
    neg_term = 0
    pos_emoticon = 0
    neg_emoticon = 0
    reply = 0
    moment_morning = 0
    moment_afternoon = 0
    moment_evening = 0
    moment_night = 0
    retweeted = 0

    status = Status.parse(api, tweet)

    if tweet['id'] in error_list_tweet_ids:
        tweets_discarded_error += 1
    elif tweet['text'].startswith("RT @"):
        tweets_discarded_retweet += 1
        tweets_considered += 1
        if regex_username.search(tweet['text']) != None:
            tweets_username += 1
            username = 1
        if regex_hashtag.search(tweet['text']) != None:
            tweets_hashtag += 1
            hashtag = 1
        if regex_url.search(tweet['text']) != None:
            tweets_url += 1
Пример #40
 hashtag = 0
 url = 0
 question = 0
 exclamation = 0
 pos_term = 0
 neg_term = 0
 pos_emoticon = 0
 neg_emoticon = 0
 reply = 0
 moment_morning = 0
 moment_afternoon = 0
 moment_evening = 0
 moment_night = 0
 retweeted = 0
 status = Status.parse(api, tweet)
 if tweet['id'] in error_list_tweet_ids:
     tweets_discarded_error += 1
 elif tweet['text'].startswith("RT @"):
     tweets_discarded_retweet += 1     
     tweets_considered += 1
     if regex_username.search(tweet['text']) != None:
         tweets_username += 1
         username = 1
     if regex_hashtag.search(tweet['text']) != None:
         tweets_hashtag += 1
         hashtag = 1
     if regex_url.search(tweet['text']) != None:
         tweets_url += 1
Пример #41
    def setUp(self):
        def load_status():
            with open('./tests/cassettes/sample-tweet.json') as infile:
                status = Status.parse(api=None, json=load(infile))
                return status

        self._status = Status.parse(
                'created_at': 'Fri Dec 01 01:53:45 +0000 2017',
                'id': 936412976520876032,
                'id_str': '936412976520876032',
                'text': '@realDonaldTrump https://t.co/0BW86RBIRH',
                'display_text_range': [17, 40],
                '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
                'truncated': False,
                'in_reply_to_status_id': 936395008139198464,
                'in_reply_to_status_id_str': '936395008139198464',
                'in_reply_to_user_id': 25073877,
                'in_reply_to_user_id_str': '25073877',
                'in_reply_to_screen_name': 'realDonaldTrump',
                'user': {
                    'id': 29363354,
                    'id_str': '29363354',
                    'name': 'Kate',
                    'screen_name': 'k8_doo',
                    'location': 'United States',
                    'url': None,
                    'Follow me if you want to know how far I walked, hiked or ran today for #charitymiles',
                    'translator_type': 'none',
                    'protected': False,
                    'verified': False,
                    'followers_count': 322,
                    'friends_count': 943,
                    'listed_count': 3,
                    'favourites_count': 26916,
                    'statuses_count': 3334,
                    'created_at': 'Tue Apr 07 02:56:52 +0000 2009',
                    'utc_offset': -18000,
                    'time_zone': 'Eastern Time (US & Canada)',
                    'geo_enabled': True,
                    'lang': 'en',
                    'contributors_enabled': False,
                    'is_translator': False,
                    'profile_background_color': 'EBEBEB',
                    'profile_background_tile': False,
                    'profile_link_color': '990000',
                    'profile_sidebar_border_color': 'DFDFDF',
                    'profile_sidebar_fill_color': 'F3F3F3',
                    'profile_text_color': '333333',
                    'profile_use_background_image': True,
                    'default_profile': False,
                    'default_profile_image': False,
                    'following': None,
                    'follow_request_sent': None,
                    'notifications': None
                'geo': None,
                'coordinates': None,
                'place': {
                    'bounding_box': {
                        'coordinates': [[1, 2], [3, 2, 1]]
                'contributors': None,
                'quoted_status_id': 936379603651883008,
                'quoted_status_id_str': '936379603651883008',
                'quoted_status': {
                    'created_at': 'Thu Nov 30 23:41:09 +0000 2017',
                    'id': 936379603651883008,
                    'id_str': '936379603651883008',
                    'On the left: @BarackObama’s National Tree Lighting\nOn the right: @realDonaldTrump’s National Tree Lighting… https://t.co/PcsatAL7Lu',
                    'display_text_range': [0, 140],
                    '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
                    'truncated': True,
                    'in_reply_to_status_id': None,
                    'in_reply_to_status_id_str': None,
                    'in_reply_to_user_id': None,
                    'in_reply_to_user_id_str': None,
                    'in_reply_to_screen_name': None,
                    'user': {
                        'id': 329433192,
                        'id_str': '329433192',
                        'name': 'Jeremy Dickey',
                        'screen_name': 'JeremyDDickey',
                        'location': 'Washington, D.C.',
                        'url': 'https://medium.com/@JeremyDDickey',
                        'City Government Media Specialist. Aspiring CJ Cregg. Graduate of @MercyhurstU & @LCCLondon. RTs = you got my attention. Tweets are my own. Sarcasm also my own.',
                        'translator_type': 'none',
                        'protected': False,
                        'verified': False,
                        'followers_count': 1860,
                        'friends_count': 2452,
                        'listed_count': 129,
                        'favourites_count': 5864,
                        'statuses_count': 64253,
                        'created_at': 'Tue Jul 05 02:20:11 +0000 2011',
                        'utc_offset': -18000,
                        'time_zone': 'Eastern Time (US & Canada)',
                        'geo_enabled': True,
                        'lang': 'en',
                        'contributors_enabled': False,
                        'is_translator': False,
                        'profile_background_color': '1A1B1F',
                        'profile_background_tile': False,
                        'profile_link_color': '3B94D9',
                        'profile_sidebar_border_color': 'FFFFFF',
                        'profile_sidebar_fill_color': '252429',
                        'profile_text_color': '666666',
                        'profile_use_background_image': False,
                        'default_profile': False,
                        'default_profile_image': False,
                        'following': None,
                        'follow_request_sent': None,
                        'notifications': None
                    'geo': None,
                    'coordinates': None,
                    'place': {
                        'id': '6417871953fa5e86',
                        'place_type': 'city',
                        'name': 'Silver Spring',
                        'full_name': 'Silver Spring, MD',
                        'country_code': 'US',
                        'country': 'United States',
                        'bounding_box': {
                            'coordinates': [[[-77.064086, 38.979735],
                                             [-77.064086, 39.036964],
                                             [-76.97162, 39.036964],
                                             [-76.97162, 38.979735]]]
                        'attributes': {}
                    'contributors': None,
                    'is_quote_status': False,
                    'extended_tweet': {
                        'On the left: @BarackObama’s National Tree Lighting\nOn the right: @realDonaldTrump’s National Tree Lighting #Christmas https://t.co/wYoLJRO2r6',
                        'display_text_range': [0, 117],
                        'entities': {
                            'hashtags': [{
                                'text': 'Christmas',
                                'indices': [107, 117]
                            'urls': [],
                            'user_mentions': [{
                                'screen_name': 'BarackObama',
                                'name': 'Barack Obama',
                                'id': 813286,
                                'id_str': '813286',
                                'indices': [13, 25]
                            }, {
                                'screen_name': 'realDonaldTrump',
                                'name': 'Donald J. Trump',
                                'id': 25073877,
                                'id_str': '25073877',
                                'indices': [65, 81]
                            'symbols': [],
                            'media': [{
                                'id': 936379576682450944,
                                'id_str': '936379576682450944',
                                'indices': [118, 141],
                                'url': 'https://t.co/wYoLJRO2r6',
                                'display_url': 'pic.twitter.com/wYoLJRO2r6',
                                'type': 'photo',
                                'sizes': {
                                    'medium': {
                                        'w': 1200,
                                        'h': 800,
                                        'resize': 'fit'
                                    'small': {
                                        'w': 680,
                                        'h': 453,
                                        'resize': 'fit'
                                    'thumb': {
                                        'w': 150,
                                        'h': 150,
                                        'resize': 'crop'
                                    'large': {
                                        'w': 1752,
                                        'h': 1168,
                                        'resize': 'fit'
                            }, {
                                'id': 936379575839358977,
                                'id_str': '936379575839358977',
                                'indices': [118, 141],
                                'url': 'https://t.co/wYoLJRO2r6',
                                'display_url': 'pic.twitter.com/wYoLJRO2r6',
                                'type': 'photo',
                                'sizes': {
                                    'small': {
                                        'w': 680,
                                        'h': 680,
                                        'resize': 'fit'
                                    'thumb': {
                                        'w': 150,
                                        'h': 150,
                                        'resize': 'crop'
                                    'medium': {
                                        'w': 1200,
                                        'h': 1200,
                                        'resize': 'fit'
                                    'large': {
                                        'w': 2048,
                                        'h': 2048,
                                        'resize': 'fit'
                        'extended_entities': {
                            'media': [{
                                'id': 936379576682450944,
                                'id_str': '936379576682450944',
                                'indices': [118, 141],
                                'url': 'https://t.co/wYoLJRO2r6',
                                'display_url': 'pic.twitter.com/wYoLJRO2r6',
                                'type': 'photo',
                                'sizes': {
                                    'medium': {
                                        'w': 1200,
                                        'h': 800,
                                        'resize': 'fit'
                                    'small': {
                                        'w': 680,
                                        'h': 453,
                                        'resize': 'fit'
                                    'thumb': {
                                        'w': 150,
                                        'h': 150,
                                        'resize': 'crop'
                                    'large': {
                                        'w': 1752,
                                        'h': 1168,
                                        'resize': 'fit'
                            }, {
                                'id': 936379575839358977,
                                'id_str': '936379575839358977',
                                'indices': [118, 141],
                                'url': 'https://t.co/wYoLJRO2r6',
                                'display_url': 'pic.twitter.com/wYoLJRO2r6',
                                'type': 'photo',
                                'sizes': {
                                    'small': {
                                        'w': 680,
                                        'h': 680,
                                        'resize': 'fit'
                                    'thumb': {
                                        'w': 150,
                                        'h': 150,
                                        'resize': 'crop'
                                    'medium': {
                                        'w': 1200,
                                        'h': 1200,
                                        'resize': 'fit'
                                    'large': {
                                        'w': 2048,
                                        'h': 2048,
                                        'resize': 'fit'
                    'quote_count': 56,
                    'reply_count': 44,
                    'retweet_count': 326,
                    'favorite_count': 385,
                    'entities': {
                        'hashtags': [],
                        'urls': [{
                            'url': 'https://t.co/PcsatAL7Lu',
                            'display_url': 'twitter.com/i/web/status/9…',
                            'indices': [108, 131]
                        'user_mentions': [{
                            'screen_name': 'BarackObama',
                            'name': 'Barack Obama',
                            'id': 813286,
                            'id_str': '813286',
                            'indices': [13, 25]
                        }, {
                            'screen_name': 'realDonaldTrump',
                            'name': 'Donald J. Trump',
                            'id': 25073877,
                            'id_str': '25073877',
                            'indices': [65, 81]
                        'symbols': []
                    'favorited': False,
                    'retweeted': False,
                    'possibly_sensitive': False,
                    'filter_level': 'low',
                    'lang': 'en'
                'is_quote_status': True,
                'quote_count': 0,
                'reply_count': 0,
                'retweet_count': 0,
                'favorite_count': 0,
                'entities': {
                    'hashtags': [],
                    'urls': [{
                        'url': 'https://t.co/0BW86RBIRH',
                        'display_url': 'twitter.com/jeremyddickey/…',
                        'indices': [17, 40]
                    'user_mentions': [{
                        'screen_name': 'realDonaldTrump',
                        'name': 'Donald J. Trump',
                        'id': 25073877,
                        'id_str': '25073877',
                        'indices': [0, 16]
                    'symbols': []
                'favorited': False,
                'retweeted': False,
                'possibly_sensitive': False,
                'filter_level': 'low',
                'lang': 'und',
                'timestamp_ms': '1512093225971'

        self._status_backup = deepcopy(self._status)
Пример #42
from tweepy.models import Status

from teebr.text.utils import normalize_text
from teebr.features import filter_status

DIMS = 100

tweets = []

#tw_count = 0

with open("raw_tweets.jsons") as f:
    for line in f:
        j = loads(line)
        t = Status.parse(None, j)
        if filter_status(t):
            tweet = normalize_text(t.text)
            #tw_count += 1
            #if tw_count >= 2000:
            #    break

# less tweets for the tests
#tweets = tweets[:10000]

print "tweets: %d" % len(tweets)

#hasher = HashingVectorizer(stop_words='english', non_negative=True, norm=None)
#vectorizer = make_pipeline(hasher, TfidfTransformer())
Пример #43
def gen_tuple(jsontweet):
    tweet = Status.parse(api, json.loads(jsontweet))
    retweeted = (getattr(tweet, 'retweeted_status', None) != None)
    return (tweet.author.id, tweet.created_at, convert_to_utf8_str(tweet.text), retweeted)
Пример #44
    def on_data(self, data):                
        '''Parse raw data from twitter and pass the status object to on_status()
        Call when raw data is passed from twitter.        
        If this function return False, it stop listening to the streamining.
        gSave_raw_json: if true, write json raw text to the ../json/
                        Set it to true only if you would like to debug.
            self.on_data_running = True
            self.log("Get raw data from Twitter", screen_only=True)
            if gSave_raw_json:
                ### save the json into disk ###
                parsed_data = tweepy.utils.import_simplejson().loads(data)
                if "id" not in parsed_data.keys():  #may return {"limit":{"track":73}} or {delete...}, ignore this data
                    return True #chucheng, this line is equal to check if 'delete'/;limit' in data
                folder_name = parsed_data["id"]%1000
                    if not os.path.exists("../json/"+str(folder_name)):
                except OSError as ose:
                    self.log("OS ERROR")
                filename = "../json/"+str(folder_name) + "/" + str(parsed_data["id"]) + ".json" 
                #print filename # for debug
                output = open(filename,"w")
                ### done ###
            # Chucheng 4/25/2011:
            #   We must override the method, because the original one might             
            #   return false, cause a stop of the listerner.
            #   In short, you cannot simply call:
            #       tweepy.StreamListener.on_data(self, data) 
            if 'in_reply_to_status_id' in data:
                status = Status.parse(self.api, json.loads(data))
                if self.on_status(status) is False: #Trigger on_status now!!
                    self.log('in_reply_to_status_id in data: on_status() returns False. (this line should never be reached)')
                pass #do nothing, the data we get is not what we need.
            """ These lines should never be triggered in that we check :
            elif 'delete' in data:
                delete = json.loads(data)['delete']['status']
                if self.on_delete(delete['id'], delete['user_id']) is False:
                    self.log('delete in data: a delete notice arrives for a status')
            elif 'limit' in data:
                if self.on_limit(json.loads(data)['limit']['track']) is False:
                    self.log('limit in data: a limitation notice arrvies')       
            self.on_data_running = False # This variable signal whether 
                                         # we are in the middle of processing data.
            if self.running == False: # see: StreamingCrawler.stop_listner()
                return False #stop the listener while catching a SIGTERM
        except Exception as e:
            self.on_data_running = False            
            self.log("Error:" + str(e), sys.exc_traceback)

        return True
Пример #45
 def __init__(self, status: Status):
     self.created_at: datetime = get_datetime_from_date(
     self.id: int = status.__getattribute__("id")
     self.hashtags: list = status.__getattribute__("entities").get(
         "hashtags", [])
     self.user_mentions: list = status.__getattribute__("entities").get(
         "user_mentions", [])
     self.urls: list = status.__getattribute__("entities").get("urls", [])
     self.media: list = status.__getattribute__("entities").get("media", [])
     self.is_quote_status: bool = status.__getattribute__("is_quote_status")
     self.quote_count: int = status.__getattribute__("quote_count") if\
         hasattr(status, 'quote_count') else 0
     self.text: str = self.get_text(status=status)
     self.retweet_count: int = status.__getattribute__("retweet_count")
     self.retweeted: bool = status.__getattribute__("retweeted")
     self.user_id: int = status.__getattribute__("user").__getattribute__(
     self.possibly_sensitive: bool = status.__getattribute__("possibly_sensitive") if\
         hasattr(status, 'possibly_sensitive') else False
     self.favorite_count: int = status.__getattribute__("favorite_count")
     self.favorited: bool = status.__getattribute__("favorited")
     self.lang: str = detect(self.text)
     self.url: str = f"https://twitter.com/user/status/{status.__getattribute__('id')}"
     self.sentiment_analysis: dict = {}
     self.source: str = status.__getattribute__("source")
     self.coordinates: dict = status.__getattribute__("coordinates") if\
         hasattr(status, 'coordinates') else {}
     self.place: dict = self.get_place(status=status)
     self.reply_count: int = status.__getattribute__("reply_count") if\
         hasattr(status, 'quote_count') else 0
     self.uuid: str = self.get_128_uuid(data_str=str(self.id))
     self.user_uuid: str = self.get_128_uuid(data_str=str(self.user_id))
Пример #46
    hashtag = 0
    url = 0
    question = 0
    exclamation = 0
    pos_term = 0
    neg_term = 0
    pos_emoticon = 0
    neg_emoticon = 0
    reply = 0
    moment_morning = 0
    moment_afternoon = 0
    moment_evening = 0
    moment_night = 0
    retweeted = 0

    status = Status.parse(api, json.loads(tweet[0]))

    if status.id in error_list_tweet_ids:
        tweets_discarded_error += 1
    elif status.text.startswith("RT @"):
        tweets_discarded_retweet += 1
        tweets_considered += 1
        if regex_username.search(status.text) != None:
            tweets_username += 1
            username = 1
        if regex_hashtag.search(status.text) != None:
            tweets_hashtag += 1
            hashtag = 1
        if regex_url.search(status.text) != None:
            tweets_url += 1
Пример #47
def test_skip_check():
    filt = skip_check([])
    tweet = Status()
    tweet.text = 'This is a test #nowplaying'
    assert filt(tweet) is True
Пример #48
def test_skip_check_custom(text, passed):
    filt = skip_check(['#nowplaying', '@boring'])
    tweet = Status()
    tweet.text = text
    assert filt(tweet) is passed
Пример #49
def bulk_load(listkey, tweets):
    with open('/home/marcua/data/tweets/%s' % (listkey), 'w') as tmpfile:
        print "file %s" % (tmpfile.name)
        for jsontweet in tweets:
            tweet = Status.parse(api, json.loads(jsontweet))
            tmpfile.write(convert_to_utf8_str(tweet.text) + "\n")
Пример #50
    def save_tweets(self):
        while True:
            raw_data = self.q.get()

            data = json.loads(raw_data)

            if 'in_reply_to_status_id' in data:
                status = Status.parse(self.api, data)

                is_retweet = False
                retweeted_id = 0
                if hasattr(status, 'retweeted_status'):
                    is_retweet = True
                    retweeted_id = status.retweeted_status.id

                    if hasattr(status.retweeted_status, 'extended_tweet'):
                        text = status.retweeted_status.extended_tweet[
                        text = status.retweeted_status.text

                    if hasattr(status, 'extended_tweet'):
                        text = status.extended_tweet['full_text']
                        text = status.text

                is_quote = hasattr(status, "quoted_status")
                quoted_text = ""
                quoted_id = 0
                if is_quote:
                    quoted_id = status.quoted_status.id

                    if hasattr(status.quoted_status, "extended_tweet"):
                        quoted_text = status.quoted_status.extended_tweet[
                        quoted_text = status.quoted_status.text

                for keyword_obj in self.keyword_obj_list:
                    keyword = keyword_obj.keyword

                    if keyword.lower() in text.lower() or keyword.lower(
                    ) in quoted_text.lower():
                        tweet_obj = Tweet.objects.create(

                        lang = detect(keyword)
                        if lang == 'en':
                            text = text_utils.pre_process(text)

                        triple_list = knowledge_graph_extract.extract_entity(
                            text, lang=lang)
                        for triple in triple_list:

Пример #51
 def on_data(self, data):
     tweet = Status.parse(tweepy_api, json.loads(data))
Пример #52
 def parse_tweet(tweet):
     """ Parse a JSON tweet into a tweepy object and insert missing author. """
     t = Status.parse(self.api, tweet)
     t.author = current_user
     return t
Пример #53
    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.

        data = json.loads(raw_data)

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False

        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False

        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False

        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False

        elif 'friends' in data:
            if self.on_friends(data['friends']) is False:
                return False

        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False

        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False

        elif 'warning' in data:
            if self.on_warning(data['warning']) is False:
                return False

            return False

        # If this tweet contains text.
        if "user" in list(data.keys()):

            # --------------------------------------------------------------- #
            # Stupid print for fun.
            uname = data["user"]["screen_name"]
            umsg = data["text"]
            nspc = (20 - len(uname))
            if nspc < 1:
                nspc = 1
            spc = " " * nspc
            if not umsg.startswith("RT"):
                print("<tweet>", uname, spc, umsg.replace("\n", ""))
            # --------------------------------------------------------------- #

            # Write the tweet to the buffer.

            # Running counter.
            self.count += 1

            # If the buffer is full, then cycle the buffer.
            if self.count % self.save_interval == 0:

            # If the counter is a check-in interval, do all the check-in tasks.
            if self.count % check_in_interval == 0:

                # Shutdown if the `runtime` `run` value is False.
                if checkin_killstream():
                    return False

                # pause if there are too many files in the new tweet directory.
                if not checkin_pausestream():
                    return False