Python Status.parse示例，tweepy.models.Status.parse Python示例

示例#1

0

显示文件

    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """
        data = json.loads(HTMLParser().unescape(raw_data))

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
        elif 'friends' in data:
            if self.on_friends(data['friends']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False
        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False
        else:
            logging.error("Unknown message type: " + str(raw_data))

示例#2

0

显示文件

文件： streaming.py 项目： Ignalion/tweepy

    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """
        data = json.loads(HTMLParser().unescape(raw_data))

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False
        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False
        else:
            logging.error("Unknown message type: " + str(raw_data))

示例#3

0

显示文件

文件： stream.py 项目： suspended/althea

    def on_data(self, raw_data):
        """Called when raw data is received from connection.
        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """
        try:
            data = json.loads(raw_data)

            if 'in_reply_to_status_id' in data:
                status = Status.parse(self.api, data)
                if self.on_status(status) is False:
                    return False
            elif 'delete' in data:
                delete = data['delete']['status']
                if self.on_delete(delete['id'], delete['user_id']) is False:
                    return False
            elif 'event' in data:
                status = Status.parse(self.api, data)
                if self.on_event(status) is False:
                    return False
            elif 'direct_message' in data:
                status = Status.parse(self.api, data)
                if self.on_direct_message(status) is False:
                    return False
            elif 'friends' in data:
                if self.on_friends(data['friends']) is False:
                    return False
            elif 'limit' in data:
                if self.on_limit(data['limit']['track']) is False:
                    return False
            elif 'disconnect' in data:
                if self.on_disconnect(data['disconnect']) is False:
                    return False
            elif 'warning' in data:
                if self.on_warning(data['warning']) is False:
                    return False
            elif 'scrub_geo' in data:
                if self.on_scrub_geo(data['scrub_geo']) is False:
                    return False
            elif 'status_withheld' in data:
                if self.on_status_withheld(data['status_withheld']) is False:
                    return False
            elif 'user_withheld' in data:
                if self.on_user_withheld(data['user_withheld']) is False:
                    return False
            else:
                insert_logger.error("Unknown message type: %s", raw_data)
        except IncompleteRead as e:
            insert_logger.exception(str(e))
            time.sleep(5)
            return True

示例#4

0

显示文件

    def on_data(self, raw_data):
        data = json.loads(raw_data)

        if self.my_screen_name == data['user']['screen_name']:
            return True

        try:
            data['tweet_text'] = data['extended_tweet']['full_text']
        except KeyError:
            try:
                data['tweet_text'] = data['text']
            except KeyError:
                data['tweet_text'] = u''

        if 'retweeted_status' in data:
            self.logger.info('retweet detected')
            status = Status.parse(self.api, data)
            if self.on_status(status, is_retweet=True) is False:
                return False
        elif 'in_reply_to_status_id' in data:
            self.logger.info('in_reply_to_status_id')
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
        elif 'friends' in data:
            if self.on_friends(data['friends']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False
        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False
        elif 'warning' in data:
            if self.on_warning(data['warning']) is False:
                return False
        else:
            self.logger.error('Unknown message type: %s', str(raw_data))

示例#5

0

显示文件

文件： streaming.py 项目： elstefani/tweepy

    def on_data(self, raw_data):
        """This is called when raw data is received from the stream.
        This method handles sending the data to other methods, depending on the
        message type.

        https://developer.twitter.com/en/docs/twitter-api/v1/tweets/filter-realtime/guides/streaming-message-types
        """
        data = json.loads(raw_data)

        if "in_reply_to_status_id" in data:
            status = Status.parse(None, data)
            return self.on_status(status)
        if "delete" in data:
            delete = data["delete"]["status"]
            return self.on_delete(delete["id"], delete["user_id"])
        if "disconnect" in data:
            return self.on_disconnect_message(data["disconnect"])
        if "limit" in data:
            return self.on_limit(data["limit"]["track"])
        if "scrub_geo" in data:
            return self.on_scrub_geo(data["scrub_geo"])
        if "status_withheld" in data:
            return self.on_status_withheld(data["status_withheld"])
        if "user_withheld" in data:
            return self.on_user_withheld(data["user_withheld"])
        if "warning" in data:
            return self.on_warning(data["warning"])

        log.error("Received unknown message type: %s", raw_data)

示例#6

0

显示文件

文件： SampleDownloader.py 项目： StijnPieper/wse

 def on_data(self, data):
     if time.time() >= self.started + self.duration:
         stats = open('{0}-sample.stats'.format(int(self.started)), 'w+')
         stats.write("================= STATISTICS =================" + "\n")
         stats.write("Start time: " + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.started)) + "\n")
         stats.write("End time: " + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + "\n")
         stats.write("First Tweet ID: " + self.first_tweet_id + "\n")
         stats.write("Last Tweet ID: " + self.last_tweet_id + "\n")
         stats.write("Language: " + self.lang + "\n")
         stats.write("Language classification threshold: " + str(self.lang_threshold) + "\n")
         stats.write("Above threshold: " + str(self.counter[self.lang + '-above']) + "\n")
         stats.write("Below threshold: " + str(self.counter[self.lang + '-below']) + "\n")
         stats.write("Exluded: " + str(self.counter['excluded']) + "\n")
         return False
     elif 'in_reply_to_status_id' in data: 
         status = Status.parse(self.api, json.loads(data))
         langclass = langid.classify(status.text)
         
         if (self.counter == {self.lang + '-above':0, self.lang + '-below':0, 'excluded':0}):
             self.first_tweet_id = str(status.id)
         self.last_tweet_id = str(status.id)
         
         if (langclass[0] == self.lang):                
             if langclass[1] >= self.lang_threshold:
                 self.above_output.write(data)
                 self.counter[self.lang + '-above'] += 1
             else:
                 self.below_output.write(data)
                 self.counter[self.lang + '-below'] += 1
         else:
             self.excl_output.write(data)
             self.counter['excluded'] += 1
            
         return True

示例#7

0

显示文件

    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """
        data = json.loads(raw_data)

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            return self.on_status(status)
        if 'delete' in data:
            delete = data['delete']['status']
            return self.on_delete(delete['id'], delete['user_id'])
        if 'limit' in data:
            return self.on_limit(data['limit']['track'])
        if 'disconnect' in data:
            return self.on_disconnect(data['disconnect'])
        if 'warning' in data:
            return self.on_warning(data['warning'])
        if 'scrub_geo' in data:
            return self.on_scrub_geo(data['scrub_geo'])
        if 'status_withheld' in data:
            return self.on_status_withheld(data['status_withheld'])
        if 'user_withheld' in data:
            return self.on_user_withheld(data['user_withheld'])

        log.error("Unknown message type: %s", raw_data)

示例#8

0

显示文件

文件： streaming.py 项目： Mezgrman/tweepy

    def on_data(self, data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """

        if '{"delete"' in data:
            try:
                delete = json.loads(data)['delete']['status']
                if self.on_delete(delete['id'], delete['user_id']) is False:
                    return False
            except:
                delete = json.loads(data)['delete']['direct_message']
                if self.on_direct_message_delete(delete['id'], delete['user_id']) is False:
                    return False
        elif '{"direct_message"' in data:
            message = DirectMessage.parse(self.api, json.loads(data)['direct_message'])
            if self.on_direct_message(message) is False:
                return False
        elif '{"target"' in data:
            event = json.loads(data)
            if self.on_event(event) is False:
                return False
        elif '{"limit"' in data:
            if self.on_limit(json.loads(data)['limit']['track']) is False:
                return False
        elif '"in_reply_to_user_id_str"' in data:
            status = Status.parse(self.api, json.loads(data))
            if self.on_status(status) is False:
                return False

示例#9

0

显示文件

文件： basicListener.py 项目： covcov/btweet

    def on_data(self, data):

        if self.print_data:
            print(data)

        self._print_status(Status.parse(self.api, self.json.loads(data)))
        sleep(self.delay)

示例#10

0

显示文件

文件： test_twitter.py 项目： wjt/fewerror

def test_end_to_end(filename, connections, expected, tmpdir):
    api = MockAPI(connections=connections)

    with open(filename, 'r') as f:
        status = Status.parse(api, json.load(fp=f))

    l = LessListener(api=api, post_replies=True, gather='tweets', state_dir=str(tmpdir))

    # 100% festivity for all of December
    l.december_greetings = ('It is cold outside.',)
    l.festive_probability = 1.
    assert l.get_festive_probability(dt.date(2016, 12, 5)) == 1.

    l.on_status(status)

    # Never reply to the same toot twice
    l.on_status(status)

    # Rate-limit replies for same word
    setattr(status, 'id', status.id + 1)
    l.on_status(status)

    if expected is None:
        assert api._updates == []
    else:
        assert len(api._updates) == 1
        u = api._updates[0]
        assert u['status'] == expected

    for k, before in connections.items():
        after = api._connections[k]
        assert ('following' in after) == ('followed_by' in before), \
            (k, before, after)

示例#11

0

显示文件

文件： drink_the_hose.py 项目： edwardabraham/drink_the_hose

 def process(self, tweet):
     status = Status.parse(api, json.loads(tweet))
     for lf in UNICODE_LINES:
         text = status.text.replace(lf, ' ')
     print "@%s (%s, %s, %s, %s): %s"%(status.user.screen_name, 
         status.user.lang, status.user.statuses_count, status.user.friends_count, 
         status.user.followers_count, text)

示例#12

0

显示文件

文件： listen.py 项目： orygens/bot_crm

 def on_data(self, data):
     if "entities" in data:
         data = json.loads(data)
         user_mentions = data["entities"]["user_mentions"]
         screen_names = [mention["screen_name"] for mention in user_mentions]
         if "testeMagazine" in screen_names:
             status = Tweet.parse(self.api, data)
             self.on_mention(status)

示例#13

0

显示文件

def get(name, mx=-1):
    ss = []
    with open("%s%s%s" % (_prefix, name, _suffix)) as f:
        for i, l in enumerate(f):
            if mx > 0 and i > mx:
                break
            ss.append(Status.parse(None, loads(l)))
    return ss

示例#14

0

显示文件

文件： twitter_bot.py 项目： wfn/twidibot

  def on_data(self, raw_data):
    """Called when raw data is received from connection.

    This is where all the data comes first. Normally we could use (inherit)
    the on_data() in tweepy.StreamListener, but it unnecessarily and naively
    reports unknown event types as errors (to simple log); also, we might want
    to tweak it further later on.

    But for now, this is basically taken from tweepy's on_data().

    Return False to stop stream and close connection.
    """

    self.processing_data = True

    data = json.loads(raw_data)

    if 'in_reply_to_status_id' in data:
      status = Status.parse(self.api, data)
      if self.on_status(status) is False:
        return False
    elif 'delete' in data:
      delete = data['delete']['status']
      if self.on_delete(delete['id'], delete['user_id']) is False:
        return False
    elif 'event' in data:
      status = Status.parse(self.api, data)
      if self.on_event(status) is False:
        return False
    elif 'direct_message' in data:
      status = Status.parse(self.api, data)
      if self.on_direct_message(status) is False:
        return False
    elif 'limit' in data:
      if self.on_limit(data['limit']['track']) is False:
        return False
    elif 'disconnect' in data:
      if self.on_disconnect(data['disconnect']) is False:
        return False
    else:
      log.debug('TwitterBotStreamListener::on_data(): got event/stream data of'
          ' unknown type. Raw data follows:\n%s', data)

    self.processing_data = False

示例#15

0

显示文件

文件： test_twitter.py 项目： wjt/fewerror

def test_sanitize(filename, expected):
    api = NonCallableMock()

    with open(os.path.join('tests', filename), 'r') as f:
        status = Status.parse(api, json.load(f))

    text = get_sanitized_text(status)
    assert '&amp;' not in text
    assert 'http' not in text
    assert text == expected

示例#16

0

显示文件

文件： stream_from_stream.py 项目： bh0085/tweeql

 def _read_from_table(self):
     self.running = True
     conn = StatusSource.engine.connect()
     meta = MetaData()
     table = Table(self.table_name, meta, autoload=True, autoload_with=StatusSource.engine)
     cmd = select([table])
     results = conn.execute(cmd)
     for result in results:
         status = Status.parse(None, result)
         self.listener.on_status(status)
         if self.running == False:
             break

示例#17

0

显示文件

文件： Twitter.py 项目： u-ahmed/HookedUp

    def on_data(self, raw_data):
        # called on recieval of raw data
        data = json.loads(raw_data)

        # start of if tree
        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False

示例#18

0

显示文件

文件： test_tweepy_patch.py 项目： spacelis/crawler.kka

    def test_patched_status(self):
        """@todo: Docstring for test_patched_status.
        :returns: @todo

        """
        from tweepy.models import Status
        from crawler.tweepy_patch import patch
        patch()
        s = Status.parse('test_api', {'a': 1, 'b': 2})
        # pylint: disable=E1101,W0212
        self.assertEqual(s._raw, '{"a": 1, "b": 2}')
        self.assertEqual(s.a, 1)
        self.assertEqual(s.b, 2)

示例#19

0

显示文件

文件： test_twitter.py 项目： wjt/fewerror

def test_save_tweet(tmpdir, id_, expected_filename):
    api = MockAPI(connections={})
    foo = tmpdir.join('foo')

    l = LessListener(api=api, gather=str(foo), state_dir=str(tmpdir))
    s = Status.parse(api=api, json={
        'id': int(id_),
        'id_str': id_,
    })
    l.save_tweet(s)

    j = tmpdir.join('foo', expected_filename)
    assert j.check()

示例#20

0

显示文件

文件： Stream.py 项目： surajjana/pyTwitterCollector

    def on_data(self, data):

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, json.loads(data))
            if self.on_status(status, data) is False:
                return False
        elif 'delete' in data:
            delete = json.loads(data)['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(json.loads(data)['limit']['track']) is False:
                return False

示例#21

0

显示文件

文件： Streamer.py 项目： jaredmichaelsmith/PyMiner

 def on_data(self, data):
     
     if 'in_reply_to_status_id' in data:
         status = Status.parse(self.api, json.loads(data))
         if self.on_status(status, data) is False:
             return False
     elif 'delete' in data:
         delete = json.loads(data)['delete']['status']
         if self.on_delete(delete['id'], delete['user_id']) is False:
              return False
     elif 'limit' in data:
         if self.on_limit(json.loads(data)['limit']['track']) is False:
             return False

示例#22

0

显示文件

    def on_data(self, raw_data):
        data = json.loads(raw_data)
        if self.verbose:
            print data
            print '-' * 60

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'friends' in data:
            pass  # ignore
        elif 'delete' in data:
            pass  # ignore
        elif 'user_suspend' in data:
            pass  # ignore
        else:
            logging.error("Unknown message type: " + str(raw_data))

示例#23

0

显示文件

文件： tweet.py 项目： aomoriringo/number_bot

    def on_data(self, raw_data):
        data = json.loads(raw_data)
        if self.verbose:
            print data
            print '-'*60

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'friends' in data:
            pass # ignore
        elif 'delete' in data:
            pass # ignore
        elif 'user_suspend' in data:
            pass # ignore
        else:
            logging.error("Unknown message type: " + str(raw_data))

示例#24

0

显示文件

文件： streaming.py 项目： snow/stargazer

    def save_status(self, data):
        """TODO"""
        status = Status.parse(self.api, json.loads(data))

        if not status.geo:
            # _datafile.write(data+'\n')
            return

        if Author.objects.filter(owner__userprofile__twitter_id=status.user.id_str).exists():
            # this tweet's author is on stargazer
            return

        try:
            author = Author.objects.filter(source=Author.T_TWITTER, external_id=status.user.id_str).get()
        except Author.DoesNotExist:
            author = Author(
                name=status.user.screen_name,
                avatar_uri=status.user.profile_image_url,
                source=Author.T_TWITTER,
                external_id=status.user.id_str,
            )
            author.save()

        try:
            post = Post.objects.filter(source=Post.T_TWITTER, external_id=status.id_str).get()
        except Post.DoesNotExist:
            lat = float(status.geo["coordinates"][0])
            lng = float(status.geo["coordinates"][1])

            try:
                addr = self._latlng2addr.get(lat, lng)
            except (LatLng2Addr.ConnectionFailed, LatLng2Addr.GeocodingFailed) as e:
                addr = ""

            # twitter api response in UTC
            created = status.created_at + timedelta(hours=8)

            post = Post(
                content=status.text,
                author=author,
                latitude=lat,
                longitude=lng,
                address=addr,
                source=Post.T_TWITTER,
                external_id=status.id_str,
                external_data=data,
                created=created,
            )
            post.save()

        return

示例#25

0

显示文件

文件： streaming.py 项目： jrgrafton/tweet-debate

    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """
        data = json.loads(raw_data)

        if "in_reply_to_status_id" in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif "delete" in data:
            delete = data["delete"]["status"]
            if self.on_delete(delete["id"], delete["user_id"]) is False:
                return False
        elif "event" in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif "direct_message" in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
        elif "friends" in data:
            if self.on_friends(data["friends"]) is False:
                return False
        elif "limit" in data:
            if self.on_limit(data["limit"]["track"]) is False:
                return False
        elif "disconnect" in data:
            if self.on_disconnect(data["disconnect"]) is False:
                return False
        elif "warning" in data:
            if self.on_warning(data["warning"]) is False:
                return False
        else:
            logging.error("Unknown message type: " + str(raw_data))

示例#26

0

显示文件

文件： video_tweet.py 项目： invinst/CPDBv2_backend

    def post_tweet(self, media_id, status, in_reply_to_status_id):
        request_data = {
            'status': status,
            'media_ids': media_id,
            'in_reply_to_status_id': in_reply_to_status_id
        }

        req = self.post(url=POST_TWEET_URL,
                        data={
                            key: val
                            for key, val in request_data.items()
                            if val is not None
                        })
        return Status.parse(self.api, req.json())

示例#27

0

显示文件

 def __init__(self, tweetDict):
     self.tweet = Status.parse(API(), tweetDict["tweet"])
     try:
         self.keywords = tweetDict["keywords"]
     except KeyError:
         pass
     try:
         self.groups = tweetDict["groups"]
     except KeyError:
         pass
     self.tokens = []
     self.filt_tokens = []
     for token in tweetDict["tokens"]:
         t = Token(token)
         self.tokens.append(t)
         if not t.filter_token():
             self.filt_tokens.append(t)

示例#28

0

显示文件

文件： streaming.py 项目： dtran320/tweepy

    def on_data(self, data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """
        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, json.loads(data))
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = json.loads(data)['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(json.loads(data)['limit']['track']) is False:
                return False

示例#29

0

显示文件

文件： streaming.py 项目： artemrizhov/tweepy

    def on_data(self, data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, json.loads(data))
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = json.loads(data)['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(json.loads(data)['limit']['track']) is False:
                return False

示例#30

0

显示文件

文件： streaming.py 项目： nmay732/tweepy

    def on_data(self, data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """

        if "in_reply_to_status_id" in data:
            status = Status.parse(self.api, json.loads(data))
            if self.on_status(status) is False:
                return False
        elif "delete" in data:
            delete = json.loads(data)["delete"]["status"]
            if self.on_delete(delete["id"], delete["user_id"]) is False:
                return False
        elif "limit" in data:
            if self.on_limit(json.loads(data)["limit"]["track"]) is False:
                return False

示例#31

0

显示文件

    def on_data(self, data):
        if time.time() >= self.started + self.duration:
            stats = open('{0}-sample.stats'.format(int(self.started)), 'w+')
            stats.write("================= STATISTICS =================" +
                        "\n")
            stats.write("Start time: " + time.strftime(
                '%Y-%m-%d %H:%M:%S', time.localtime(self.started)) + "\n")
            stats.write("End time: " + time.strftime(
                '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + "\n")
            stats.write("First Tweet ID: " + self.first_tweet_id + "\n")
            stats.write("Last Tweet ID: " + self.last_tweet_id + "\n")
            stats.write("Language: " + self.lang + "\n")
            stats.write("Language classification threshold: " +
                        str(self.lang_threshold) + "\n")
            stats.write("Above threshold: " +
                        str(self.counter[self.lang + '-above']) + "\n")
            stats.write("Below threshold: " +
                        str(self.counter[self.lang + '-below']) + "\n")
            stats.write("Exluded: " + str(self.counter['excluded']) + "\n")
            return False
        elif 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, json.loads(data))
            langclass = langid.classify(status.text)

            if (self.counter == {
                    self.lang + '-above': 0,
                    self.lang + '-below': 0,
                    'excluded': 0
            }):
                self.first_tweet_id = str(status.id)
            self.last_tweet_id = str(status.id)

            if (langclass[0] == self.lang):
                if langclass[1] >= self.lang_threshold:
                    self.above_output.write(data)
                    self.counter[self.lang + '-above'] += 1
                else:
                    self.below_output.write(data)
                    self.counter[self.lang + '-below'] += 1
            else:
                self.excl_output.write(data)
                self.counter['excluded'] += 1

            return True

示例#32

0

显示文件

文件： drink_the_hose.py 项目： edwardabraham/drink_the_hose

 def process(self, tweet):
     status = Status.parse(api, json.loads(tweet))
     out = {"screen_name": status.user.screen_name, 
         "id": status.id,
         "lang": status.user.lang, 
         "statuses_count": status.user.statuses_count, 
         "friend_count": status.user.friends_count, 
         "followers_count":status.user.followers_count,
         "profile_image_url": status.user.profile_image_url,
         "text": status.text.encode('utf8'),
         "entities": status.entities,
         "created_at": status.created_at.strftime("%Y-%m-%d %H:%M:%S"),
         "geo":status.geo,
         "location":status.user.location,
         "timezone":status.user.time_zone}
     now = time.strftime(self.fmt)
     if now != self.time:
         self.time = str(now)
         self.fid.close()
         self.fid = gzip.open(os.path.join(self.path, self.base + '-' + self.time + '.txt.gz'), 'ab')
     self.fid.write(json.dumps(out) + '\n')

示例#33

0

显示文件

文件： giveawayBot.py 项目： covcov/btweet

    def _get_status(self, data):

        status = Status.parse(self.api, self.json.loads(data))

        if status.user.screen_name in self.block_users:
            raise TweepError(">> User ignored: @%s" % status.user.screen_name)
        try:
            status = status.retweeted_status
        except AttributeError as atr:
            if not self.original:
                text = self._proccess_status(status.text)
                trunc_text = (text[:72] + '...') if len(text) > 75 else text
                raise TweepError(">> Original tweet ignored: %s" % trunc_text)

        if status.is_quote_status:
            if self.quoted:
                status = status.quoted_status
            else:
                text = self._proccess_status(status.text)
                raise TweepError(">> Quoted tweet ignored: %s" % text)

        return status

示例#34

0

显示文件

文件： nextroute.py 项目： vinodkone/NextRoute

  def on_data(self, data):
    """Called when raw data is received from connection.

    Override this method if you wish to manually handle
    the stream data. Return False to stop stream and close connection.
    """

    if 'in_reply_to_status_id' in data:
      status = Status.parse(self.api, json.loads(data))
      return self.on_status(status)
    elif 'delete' in data:
      delete = json.loads(data)['delete']['status']
      if self.on_delete(delete['id'], delete['user_id']) is False:
        return False
    elif 'limit' in data:
        if self.on_limit(json.loads(data)['limit']['track']) is False:
          return False
    elif 'sender_id' in data and 'recipient_id' in data:
      dm = DirectMessage.parse(self.api, json.loads(data))
      return self.on_dm(dm)
    elif 'event' in data and 'follow' in data:
      content = json.loads(data)
      if 'event' in content and content['event'] == 'follow':
        return self.on_follow(content)

示例#35

0

显示文件

def gen_tuple(jsontweet):
    tweet = Status.parse(api, json.loads(jsontweet))
    retweeted = (getattr(tweet, 'retweeted_status', None) != None)
    return (tweet.author.id, tweet.created_at, convert_to_utf8_str(tweet.text), retweeted)

示例#36

0

显示文件

文件： campboard.py 项目： ruiwen/CampBoard

	def update_tweets(self):
		print "Updating tweets"

		statuses = []
		try:
			while True:
				item = self.incoming.pop() # It's gonna throw up someday!
				if "in_reply_to_status_id" in item:
					statuses.append(Status.parse(self.stream.api, json.loads(item)))
				# Ignore anything other than status updates for now
				#else:
				#	statuses.append(json.loads(item))
		except IndexError:
			pass
		
		broadcast = {}
		broadcast['general'] = {}
		broadcast['channels'] = {}
				
		for s in statuses:
			tags = re.findall("#([\w]+)(?iu)", s.text) # Case-insensitive, Unicode matching
			print "Tags: "
			print tags
			self.db.execute("INSERT INTO tweets (id, user_id, screen_name, profile_image_url, created_at, text) VALUES (%s,%s,%s,%s,%s,%s)", s.id, s.user.id, s.user.screen_name, s.user.profile_image_url, s.created_at, s.text)

			# Establish HABTM relationships, tweets with tags
			for t in tags:
				t = t.lower() # Force all to lowercase
				print "Inserting tag: %s" % t
				self.db.execute('''INSERT INTO hashtags (tag) VALUES (%s) ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id), tag=%s; 
					INSERT INTO hashtags_tweets (hash_id, tweet_id) VALUES (LAST_INSERT_ID(), %s)''', t, t, s.id)
				
				# Count the votes while we're at it
				if t in campboard['sessions']:
					
					# Attach the tweet to the broadcast channel
					if not broadcast['channels'].has_key(t):
						broadcast['channels'][t] = {}
					
					broadcast['channels'][t]['recent_tweets'] = []
					broadcast['channels'][t]['recent_tweets'].append(
						{
							'text': s.text, 'created_at': unicode(s.created_at), 'id': s.id,
							'user': {
								'id': s.user.id,
								'screen_name': s.user.screen_name,
								'profile_image_url': s.user.profile_image_url
							}				
						}
					)
			
					vote_type = None
					if re.search('\+1', s.text):
						#vote_type = "positive"
						self.db.execute("INSERT INTO session_votes (`session`, positive) VALUES (%s, 1) ON DUPLICATE KEY UPDATE positive=positive+1", t)
					elif re.search('\-1', s.text):
						#vote_type = "negative"
						self.db.execute("INSERT INTO session_votes (`session`, negative) VALUES (%s, 1) ON DUPLICATE KEY UPDATE negative=negative+1", t)
		

		broadcast['general']['recent_tweets'] = [
			{
				'text': s.text, 'created_at': unicode(s.created_at), 'id': s.id,
				'user': {
					'id': s.user.id,
					'screen_name': s.user.screen_name,
					'profile_image_url': s.user.profile_image_url
				}
			}
			for s in statuses
		]
	
		return broadcast

示例#37

0

显示文件

    hashtag = 0
    url = 0
    question = 0
    exclamation = 0
    pos_term = 0
    neg_term = 0
    pos_emoticon = 0
    neg_emoticon = 0
    reply = 0
    moment_morning = 0
    moment_afternoon = 0
    moment_evening = 0
    moment_night = 0
    retweeted = 0

    status = Status.parse(api, json.loads(tweet[0]))

    if status.id in error_list_tweet_ids:
        tweets_discarded_error += 1
    elif status.text.startswith("RT @"):
        tweets_discarded_retweet += 1
    else:
        tweets_considered += 1
        if regex_username.search(status.text) != None:
            tweets_username += 1
            username = 1
        if regex_hashtag.search(status.text) != None:
            tweets_hashtag += 1
            hashtag = 1
        if regex_url.search(status.text) != None:
            tweets_url += 1

示例#38

0

显示文件

from tweepy.models import Status

from teebr.text.utils import normalize_text
from teebr.features import filter_status

CLUSTERS = 40
DIMS = 100

tweets = []

#tw_count = 0

with open("raw_tweets.jsons") as f:
    for line in f:
        j = loads(line)
        t = Status.parse(None, j)
        if filter_status(t):
            tweet = normalize_text(t.text)
            tweets.append(tweet)
            #tw_count += 1
            #if tw_count >= 2000:
            #    break

# less tweets for the tests
#tweets = tweets[:10000]

print "tweets: %d" % len(tweets)

#hasher = HashingVectorizer(stop_words='english', non_negative=True, norm=None)
#vectorizer = make_pipeline(hasher, TfidfTransformer())

示例#39

0

显示文件

    hashtag = 0
    url = 0
    question = 0
    exclamation = 0
    pos_term = 0
    neg_term = 0
    pos_emoticon = 0
    neg_emoticon = 0
    reply = 0
    moment_morning = 0
    moment_afternoon = 0
    moment_evening = 0
    moment_night = 0
    retweeted = 0

    status = Status.parse(api, tweet)

    if tweet['id'] in error_list_tweet_ids:
        tweets_discarded_error += 1
    elif tweet['text'].startswith("RT @"):
        tweets_discarded_retweet += 1
    else:
        tweets_considered += 1
        if regex_username.search(tweet['text']) != None:
            tweets_username += 1
            username = 1
        if regex_hashtag.search(tweet['text']) != None:
            tweets_hashtag += 1
            hashtag = 1
        if regex_url.search(tweet['text']) != None:
            tweets_url += 1

示例#40

0

显示文件

文件： streaming.py 项目： dtran320/tweepy

 def on_data(self, data):
     """
     Generic class for site streams that just print each
     action that comes in - override these methods to actually
     process them
     """
     if 'for_user' in data:
         parsed_data = json.loads(data)
         user_id = parsed_data['for_user']
         if 'message' in data:
             message = parsed_data['message']
             if u'friends' in message:
                 if self.on_friends(user_id, message['friends']) is False:
                     return False
             elif u'event' in message:
                 if message[u'event'] == u'follow':
                     if self.on_follow(
                         user_id=user_id,
                         source=message[u'source'],
                         target=message[u'target'],
                         time=message[u'created_at']
                     ) is False:
                         return False
                 elif message[u'event'] == u'unfollow':
                     if self.on_unfollow(
                         user_id,
                         source=message[u'source'],
                         target=message[u'target'],
                         time=message[u'created_at']
                     ) is False:
                         return False
                 elif message[u'event'] == u'favorite':
                     if self.on_favorite(
                         user_id,
                         source=message[u'source'],
                         favorited=message[u'target_object'],
                         time=message[u'created_at']
                     ) is False:
                         return False
                 elif message[u'event'] == u'unfavorite':
                     if self.on_unfavorite(
                         user_id,
                         source=message[u'source'],
                         favorited=message[u'target_object']
                     ) is False:
                         return False
             # Need this second check - could be a retweet of
             # a tweet mentioning the user of interest
             elif (u'retweeted_status' in message and
                 int(message[u'retweeted_status'][u'user'][u'id']) ==
                 int(user_id)
             ):
                 if self.on_retweet(user_id, message) is False:
                     return False
             elif u'text' in message:
                 status = Status.parse(self.api, message)
                 # tweet from the user of interest
                 if status.author.id == user_id:
                     if self.on_user_status(user_id, status) is False:
                         return False
                 else:   # tweet mentioning the user of interest
                     if self.on_user_mention(user_id, status) is False:
                         return False
             elif u'direct_message' in message:
                 if self.on_direct_message(
                     user_id, message[u'direct_message']
                 ) is False:
                     return False
             else:
                 print parsed_data

示例#41

0

显示文件

文件： twitter.py 项目： pixelead0/semiphemeral

 def parse_tweet(tweet):
     """ Parse a JSON tweet into a tweepy object and insert missing author. """
     t = Status.parse(self.api, tweet)
     t.author = current_user
     return t

示例#42

0

显示文件

    def save_tweets(self):
        while True:
            raw_data = self.q.get()

            data = json.loads(raw_data)

            if 'in_reply_to_status_id' in data:
                status = Status.parse(self.api, data)

                is_retweet = False
                retweeted_id = 0
                if hasattr(status, 'retweeted_status'):
                    is_retweet = True
                    retweeted_id = status.retweeted_status.id

                    if hasattr(status.retweeted_status, 'extended_tweet'):
                        text = status.retweeted_status.extended_tweet[
                            'full_text']
                    else:
                        text = status.retweeted_status.text

                else:
                    if hasattr(status, 'extended_tweet'):
                        text = status.extended_tweet['full_text']
                    else:
                        text = status.text

                is_quote = hasattr(status, "quoted_status")
                quoted_text = ""
                quoted_id = 0
                if is_quote:
                    quoted_id = status.quoted_status.id

                    if hasattr(status.quoted_status, "extended_tweet"):
                        quoted_text = status.quoted_status.extended_tweet[
                            "full_text"]
                    else:
                        quoted_text = status.quoted_status.text

                for keyword_obj in self.keyword_obj_list:
                    keyword = keyword_obj.keyword

                    if keyword.lower() in text.lower() or keyword.lower(
                    ) in quoted_text.lower():
                        tweet_obj = Tweet.objects.create(
                            keyword=keyword_obj,
                            tweet_id=status.id,
                            created_at=make_aware(status.created_at),
                            user_id=status.user.id,
                            retweeted_id=retweeted_id,
                            quoted_id=quoted_id,
                            text=text,
                            quoted_text=quoted_text)

                        lang = detect(keyword)
                        if lang == 'en':
                            text = text_utils.pre_process(text)

                        triple_list = knowledge_graph_extract.extract_entity(
                            text, lang=lang)
                        for triple in triple_list:
                            Knowledge.objects.create(tweet=tweet_obj,
                                                     k_subject=triple[0],
                                                     k_predicate=triple[1],
                                                     k_object=triple[2],
                                                     subject_type=triple[3],
                                                     object_type=triple[4])

            self.q.task_done()

示例#43

0

显示文件

文件： twitterlisten.py 项目： beckastar/neighborhood_tweets

 def on_data(self, data):
     tweet = Status.parse(tweepy_api, json.loads(data))
     self.handler(tweet)

示例#44

0

显示文件

文件： TwitterStreamDownload.py 项目： henryoier/Automatic-Rumor-Detection

    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.
        """
        self.count += 1
        
        data = json.loads(raw_data)
        
        if self.count >50000:
            self.statusf.close()
            self.userf.close()
            self.deletef.close()
            self.count = 0
            ts = time.strftime("./data/%Y%m%d%H%M")
            self.statusf = open(ts+'_status.csv','w',newline='')
            self.statusw = csv.writer(self.statusf)
            self.statusw.writerow(['id', 'created_at', 'coordinates',\
                               'hashtags', 'user_mentions', 'symbols', 'urls', \
                               'media', \
                               'in_reply_to_screen_name', \
                               'in_reply_to_user_id_str', \
                               'in_reply_to_status_id_str', \
                               'place', 'retweeted_status_id', 'source', \
                               'text', 'user id' \
                               # some other attributes exsits, they are list below
                               #, status.withheld_copyright, \#optional
                               #status.withheld_in_countries, \#optional
                               #status.withheld_scope, \#optional
                               #status.truncated, \#default False
                               #status.retweeted, status.retweet_count, \#for no rt
                               #status.scopes, possibly_sensitive, \
                               #status.lang, status.fiter_level, \lang=en
                               #status.favorited, status.favorite_count, \
                               #status.current_user_retweet, \
                               #status.contributors, status.annotations \
                               ])
            self.userf = open(ts+'_user.csv','w',newline='')
            self.userw = csv.writer(self.userf)
            self.userw.writerow(['created_at', 'default_profile', \
                             #user.default_profile_image, \
                             'description', \
                             #user.entities, \
                             'favourites_count', \
                             #user.follow_request_sent, user.following,\#relate to given user
                             'followers_count', 'friends_count', \
                             'geo_enabled', 'id_str', 'is_translator', \
                             'lang', 'listed_count', 'location', \
                             #user.notifications, \
                             'name', \
                             #user.profile_background_color, user.profile_background_image_url, \
                             #user.profile_background_image_url_https, user.profile_background_tile, \
                             #user.profile_banner_url, user.profile_image_url, \
                             #user.profile_image_url_https, user.profile_link_color, \
                             #user.profile_sidebar_border_color, user.profile_sidebar_fill_color, \
                             #user.profile_text_color, user.profile_use_background_image, \
                             'protected', 'screen_name', \
                             #user.show_all_inline_media, user.status, \
                             'statuses_count', 'time_zone', 'user.url', \
                             #user.utc_offset, \
                             #user.withheld_in_countries, user.withheld_scope, 
                             'verified'])
            self.deletef = open(ts+'_delete.csv','w',newline='')
            self.deletew = csv.writer(self.deletef)
            self.deletew.writerow(['status_id','user_id'])

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False
        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False
        elif 'warning' in data:
            if self.on_warning(data['warning']) is False:
                return False
        else:
            logging.error("Unknown message type: " + str(raw_data))
            return False
        return True

示例#45

0

显示文件

    def on_data(self, data):
        full_text = ""

        data2 = json.loads(data)

        if 'extended_tweet' in data2:
            if ('full_text' in data2["extended_tweet"]):

                full_text = bytes(
                    str(data2["extended_tweet"]["full_text"]).encode("utf-8"))
                full_text = full_text.decode('utf-8')
                print(
                    'FUL TEXT *******************************************************************************'
                )
                print(full_text)

            #print(self.find_between( data, '"extended_tweet":{"full_text":"','",'))
            #print(data)
        if ("retweeted_status" in data2):
            if ('full_text' in data2["retweeted_status"]):
                full_text = bytes(
                    str(data2["retweeted_status"]["full_text"]).encode(
                        "utf-8"))
                full_text = full_text.decode('utf-8')
                print(
                    'FUL TEXT *******************************************************************************'
                )
                print(full_text)
        #print(full_text)

        data = json.loads(data)

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status, full_text) is False:
                return False
        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False
        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False
        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False
        elif 'friends' in data:
            if self.on_friends(data['friends']) is False:
                return False
        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False
        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False
        elif 'warning' in data:
            if self.on_warning(data['warning']) is False:
                return False
        else:
            logging.error("Unknown message type: " + str(raw_data))

示例#46

0

显示文件

    def setUp(self):
        def load_status():
            with open('./tests/cassettes/sample-tweet.json') as infile:
                status = Status.parse(api=None, json=load(infile))
                return status

        self._status = Status.parse(
            api=None,
            json={
                'created_at': 'Fri Dec 01 01:53:45 +0000 2017',
                'id': 936412976520876032,
                'id_str': '936412976520876032',
                'text': '@realDonaldTrump https://t.co/0BW86RBIRH',
                'display_text_range': [17, 40],
                'source':
                '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
                'truncated': False,
                'in_reply_to_status_id': 936395008139198464,
                'in_reply_to_status_id_str': '936395008139198464',
                'in_reply_to_user_id': 25073877,
                'in_reply_to_user_id_str': '25073877',
                'in_reply_to_screen_name': 'realDonaldTrump',
                'user': {
                    'id': 29363354,
                    'id_str': '29363354',
                    'name': 'Kate',
                    'screen_name': 'k8_doo',
                    'location': 'United States',
                    'url': None,
                    'description':
                    'Follow me if you want to know how far I walked, hiked or ran today for #charitymiles',
                    'translator_type': 'none',
                    'protected': False,
                    'verified': False,
                    'followers_count': 322,
                    'friends_count': 943,
                    'listed_count': 3,
                    'favourites_count': 26916,
                    'statuses_count': 3334,
                    'created_at': 'Tue Apr 07 02:56:52 +0000 2009',
                    'utc_offset': -18000,
                    'time_zone': 'Eastern Time (US & Canada)',
                    'geo_enabled': True,
                    'lang': 'en',
                    'contributors_enabled': False,
                    'is_translator': False,
                    'profile_background_color': 'EBEBEB',
                    'profile_background_image_url':
                    'http://abs.twimg.com/images/themes/theme7/bg.gif',
                    'profile_background_image_url_https':
                    'https://abs.twimg.com/images/themes/theme7/bg.gif',
                    'profile_background_tile': False,
                    'profile_link_color': '990000',
                    'profile_sidebar_border_color': 'DFDFDF',
                    'profile_sidebar_fill_color': 'F3F3F3',
                    'profile_text_color': '333333',
                    'profile_use_background_image': True,
                    'profile_image_url':
                    'http://pbs.twimg.com/profile_images/823305825297006593/LhjPdILK_normal.jpg',
                    'profile_image_url_https':
                    'https://pbs.twimg.com/profile_images/823305825297006593/LhjPdILK_normal.jpg',
                    'profile_banner_url':
                    'https://pbs.twimg.com/profile_banners/29363354/1485126381',
                    'default_profile': False,
                    'default_profile_image': False,
                    'following': None,
                    'follow_request_sent': None,
                    'notifications': None
                },
                'geo': None,
                'coordinates': None,
                'place': {
                    'bounding_box': {
                        'coordinates': [[1, 2], [3, 2, 1]]
                    }
                },
                'contributors': None,
                'quoted_status_id': 936379603651883008,
                'quoted_status_id_str': '936379603651883008',
                'quoted_status': {
                    'created_at': 'Thu Nov 30 23:41:09 +0000 2017',
                    'id': 936379603651883008,
                    'id_str': '936379603651883008',
                    'text':
                    'On the left: @BarackObama’s National Tree Lighting\nOn the right: @realDonaldTrump’s National Tree Lighting… https://t.co/PcsatAL7Lu',
                    'display_text_range': [0, 140],
                    'source':
                    '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
                    'truncated': True,
                    'in_reply_to_status_id': None,
                    'in_reply_to_status_id_str': None,
                    'in_reply_to_user_id': None,
                    'in_reply_to_user_id_str': None,
                    'in_reply_to_screen_name': None,
                    'user': {
                        'id': 329433192,
                        'id_str': '329433192',
                        'name': 'Jeremy Dickey',
                        'screen_name': 'JeremyDDickey',
                        'location': 'Washington, D.C.',
                        'url': 'https://medium.com/@JeremyDDickey',
                        'description':
                        'City Government Media Specialist. Aspiring CJ Cregg. Graduate of @MercyhurstU & @LCCLondon. RTs = you got my attention. Tweets are my own. Sarcasm also my own.',
                        'translator_type': 'none',
                        'protected': False,
                        'verified': False,
                        'followers_count': 1860,
                        'friends_count': 2452,
                        'listed_count': 129,
                        'favourites_count': 5864,
                        'statuses_count': 64253,
                        'created_at': 'Tue Jul 05 02:20:11 +0000 2011',
                        'utc_offset': -18000,
                        'time_zone': 'Eastern Time (US & Canada)',
                        'geo_enabled': True,
                        'lang': 'en',
                        'contributors_enabled': False,
                        'is_translator': False,
                        'profile_background_color': '1A1B1F',
                        'profile_background_image_url':
                        'http://pbs.twimg.com/profile_background_images/474534472373649408/gaee5mbF.png',
                        'profile_background_image_url_https':
                        'https://pbs.twimg.com/profile_background_images/474534472373649408/gaee5mbF.png',
                        'profile_background_tile': False,
                        'profile_link_color': '3B94D9',
                        'profile_sidebar_border_color': 'FFFFFF',
                        'profile_sidebar_fill_color': '252429',
                        'profile_text_color': '666666',
                        'profile_use_background_image': False,
                        'profile_image_url':
                        'http://pbs.twimg.com/profile_images/932429063280627713/HnHFID4p_normal.jpg',
                        'profile_image_url_https':
                        'https://pbs.twimg.com/profile_images/932429063280627713/HnHFID4p_normal.jpg',
                        'profile_banner_url':
                        'https://pbs.twimg.com/profile_banners/329433192/1443752276',
                        'default_profile': False,
                        'default_profile_image': False,
                        'following': None,
                        'follow_request_sent': None,
                        'notifications': None
                    },
                    'geo': None,
                    'coordinates': None,
                    'place': {
                        'id': '6417871953fa5e86',
                        'url':
                        'https://api.twitter.com/1.1/geo/id/6417871953fa5e86.json',
                        'place_type': 'city',
                        'name': 'Silver Spring',
                        'full_name': 'Silver Spring, MD',
                        'country_code': 'US',
                        'country': 'United States',
                        'bounding_box': {
                            'type':
                            'Polygon',
                            'coordinates': [[[-77.064086, 38.979735],
                                             [-77.064086, 39.036964],
                                             [-76.97162, 39.036964],
                                             [-76.97162, 38.979735]]]
                        },
                        'attributes': {}
                    },
                    'contributors': None,
                    'is_quote_status': False,
                    'extended_tweet': {
                        'full_text':
                        'On the left: @BarackObama’s National Tree Lighting\nOn the right: @realDonaldTrump’s National Tree Lighting #Christmas https://t.co/wYoLJRO2r6',
                        'display_text_range': [0, 117],
                        'entities': {
                            'hashtags': [{
                                'text': 'Christmas',
                                'indices': [107, 117]
                            }],
                            'urls': [],
                            'user_mentions': [{
                                'screen_name': 'BarackObama',
                                'name': 'Barack Obama',
                                'id': 813286,
                                'id_str': '813286',
                                'indices': [13, 25]
                            }, {
                                'screen_name': 'realDonaldTrump',
                                'name': 'Donald J. Trump',
                                'id': 25073877,
                                'id_str': '25073877',
                                'indices': [65, 81]
                            }],
                            'symbols': [],
                            'media': [{
                                'id': 936379576682450944,
                                'id_str': '936379576682450944',
                                'indices': [118, 141],
                                'media_url':
                                'http://pbs.twimg.com/media/DP6wQ4sWkAAvTDD.jpg',
                                'media_url_https':
                                'https://pbs.twimg.com/media/DP6wQ4sWkAAvTDD.jpg',
                                'url': 'https://t.co/wYoLJRO2r6',
                                'display_url': 'pic.twitter.com/wYoLJRO2r6',
                                'expanded_url':
                                'https://twitter.com/JeremyDDickey/status/936379603651883008/photo/1',
                                'type': 'photo',
                                'sizes': {
                                    'medium': {
                                        'w': 1200,
                                        'h': 800,
                                        'resize': 'fit'
                                    },
                                    'small': {
                                        'w': 680,
                                        'h': 453,
                                        'resize': 'fit'
                                    },
                                    'thumb': {
                                        'w': 150,
                                        'h': 150,
                                        'resize': 'crop'
                                    },
                                    'large': {
                                        'w': 1752,
                                        'h': 1168,
                                        'resize': 'fit'
                                    }
                                }
                            }, {
                                'id': 936379575839358977,
                                'id_str': '936379575839358977',
                                'indices': [118, 141],
                                'media_url':
                                'http://pbs.twimg.com/media/DP6wQ1jWAAE7CdA.jpg',
                                'media_url_https':
                                'https://pbs.twimg.com/media/DP6wQ1jWAAE7CdA.jpg',
                                'url': 'https://t.co/wYoLJRO2r6',
                                'display_url': 'pic.twitter.com/wYoLJRO2r6',
                                'expanded_url':
                                'https://twitter.com/JeremyDDickey/status/936379603651883008/photo/1',
                                'type': 'photo',
                                'sizes': {
                                    'small': {
                                        'w': 680,
                                        'h': 680,
                                        'resize': 'fit'
                                    },
                                    'thumb': {
                                        'w': 150,
                                        'h': 150,
                                        'resize': 'crop'
                                    },
                                    'medium': {
                                        'w': 1200,
                                        'h': 1200,
                                        'resize': 'fit'
                                    },
                                    'large': {
                                        'w': 2048,
                                        'h': 2048,
                                        'resize': 'fit'
                                    }
                                }
                            }]
                        },
                        'extended_entities': {
                            'media': [{
                                'id': 936379576682450944,
                                'id_str': '936379576682450944',
                                'indices': [118, 141],
                                'media_url':
                                'http://pbs.twimg.com/media/DP6wQ4sWkAAvTDD.jpg',
                                'media_url_https':
                                'https://pbs.twimg.com/media/DP6wQ4sWkAAvTDD.jpg',
                                'url': 'https://t.co/wYoLJRO2r6',
                                'display_url': 'pic.twitter.com/wYoLJRO2r6',
                                'expanded_url':
                                'https://twitter.com/JeremyDDickey/status/936379603651883008/photo/1',
                                'type': 'photo',
                                'sizes': {
                                    'medium': {
                                        'w': 1200,
                                        'h': 800,
                                        'resize': 'fit'
                                    },
                                    'small': {
                                        'w': 680,
                                        'h': 453,
                                        'resize': 'fit'
                                    },
                                    'thumb': {
                                        'w': 150,
                                        'h': 150,
                                        'resize': 'crop'
                                    },
                                    'large': {
                                        'w': 1752,
                                        'h': 1168,
                                        'resize': 'fit'
                                    }
                                }
                            }, {
                                'id': 936379575839358977,
                                'id_str': '936379575839358977',
                                'indices': [118, 141],
                                'media_url':
                                'http://pbs.twimg.com/media/DP6wQ1jWAAE7CdA.jpg',
                                'media_url_https':
                                'https://pbs.twimg.com/media/DP6wQ1jWAAE7CdA.jpg',
                                'url': 'https://t.co/wYoLJRO2r6',
                                'display_url': 'pic.twitter.com/wYoLJRO2r6',
                                'expanded_url':
                                'https://twitter.com/JeremyDDickey/status/936379603651883008/photo/1',
                                'type': 'photo',
                                'sizes': {
                                    'small': {
                                        'w': 680,
                                        'h': 680,
                                        'resize': 'fit'
                                    },
                                    'thumb': {
                                        'w': 150,
                                        'h': 150,
                                        'resize': 'crop'
                                    },
                                    'medium': {
                                        'w': 1200,
                                        'h': 1200,
                                        'resize': 'fit'
                                    },
                                    'large': {
                                        'w': 2048,
                                        'h': 2048,
                                        'resize': 'fit'
                                    }
                                }
                            }]
                        }
                    },
                    'quote_count': 56,
                    'reply_count': 44,
                    'retweet_count': 326,
                    'favorite_count': 385,
                    'entities': {
                        'hashtags': [],
                        'urls': [{
                            'url': 'https://t.co/PcsatAL7Lu',
                            'expanded_url':
                            'https://twitter.com/i/web/status/936379603651883008',
                            'display_url': 'twitter.com/i/web/status/9…',
                            'indices': [108, 131]
                        }],
                        'user_mentions': [{
                            'screen_name': 'BarackObama',
                            'name': 'Barack Obama',
                            'id': 813286,
                            'id_str': '813286',
                            'indices': [13, 25]
                        }, {
                            'screen_name': 'realDonaldTrump',
                            'name': 'Donald J. Trump',
                            'id': 25073877,
                            'id_str': '25073877',
                            'indices': [65, 81]
                        }],
                        'symbols': []
                    },
                    'favorited': False,
                    'retweeted': False,
                    'possibly_sensitive': False,
                    'filter_level': 'low',
                    'lang': 'en'
                },
                'is_quote_status': True,
                'quote_count': 0,
                'reply_count': 0,
                'retweet_count': 0,
                'favorite_count': 0,
                'entities': {
                    'hashtags': [],
                    'urls': [{
                        'url': 'https://t.co/0BW86RBIRH',
                        'expanded_url':
                        'https://twitter.com/jeremyddickey/status/936379603651883008',
                        'display_url': 'twitter.com/jeremyddickey/…',
                        'indices': [17, 40]
                    }],
                    'user_mentions': [{
                        'screen_name': 'realDonaldTrump',
                        'name': 'Donald J. Trump',
                        'id': 25073877,
                        'id_str': '25073877',
                        'indices': [0, 16]
                    }],
                    'symbols': []
                },
                'favorited': False,
                'retweeted': False,
                'possibly_sensitive': False,
                'filter_level': 'low',
                'lang': 'und',
                'timestamp_ms': '1512093225971'
            })

        self._status_backup = deepcopy(self._status)

示例#47

0

显示文件

文件： streamfilewriter.py 项目： praneeth130/tweeql

def bulk_load(listkey, tweets):
    with open("/home/marcua/data/tweets/%s" % (listkey), "w") as tmpfile:
        print "file %s" % (tmpfile.name)
        for jsontweet in tweets:
            tweet = Status.parse(api, json.loads(jsontweet))
            tmpfile.write(convert_to_utf8_str(tweet.text) + "\n")

示例#48

0

显示文件

 def load_status():
     with open('./tests/cassettes/sample-tweet.json') as infile:
         status = Status.parse(api=None, json=load(infile))
         return status

示例#49

0

显示文件

文件： SQLReformatter.py 项目： StijnPieper/wse

 hashtag = 0
 url = 0
 question = 0
 exclamation = 0
 pos_term = 0
 neg_term = 0
 pos_emoticon = 0
 neg_emoticon = 0
 reply = 0
 moment_morning = 0
 moment_afternoon = 0
 moment_evening = 0
 moment_night = 0
 retweeted = 0
 
 status = Status.parse(api, json.loads(tweet[0]))
 
 if status.id in error_list_tweet_ids:
     tweets_discarded_error += 1
 elif status.text.startswith("RT @"):
     tweets_discarded_retweet += 1     
 else:
     tweets_considered += 1
     if regex_username.search(status.text) != None:
         tweets_username += 1
         username = 1
     if regex_hashtag.search(status.text) != None:
         tweets_hashtag += 1
         hashtag = 1
     if regex_url.search(status.text) != None:
         tweets_url += 1

示例#50

0

显示文件

文件： SampleReformatter.py 项目： StijnPieper/wse

 hashtag = 0
 url = 0
 question = 0
 exclamation = 0
 pos_term = 0
 neg_term = 0
 pos_emoticon = 0
 neg_emoticon = 0
 reply = 0
 moment_morning = 0
 moment_afternoon = 0
 moment_evening = 0
 moment_night = 0
 retweeted = 0
 
 status = Status.parse(api, tweet)
 
 if tweet['id'] in error_list_tweet_ids:
     tweets_discarded_error += 1
 elif tweet['text'].startswith("RT @"):
     tweets_discarded_retweet += 1     
 else:
     tweets_considered += 1
     if regex_username.search(tweet['text']) != None:
         tweets_username += 1
         username = 1
     if regex_hashtag.search(tweet['text']) != None:
         tweets_hashtag += 1
         hashtag = 1
     if regex_url.search(tweet['text']) != None:
         tweets_url += 1

示例#51

0

显示文件

    def on_data(self, raw_data):
        """Called when raw data is received from connection.

        Override this method if you wish to manually handle
        the stream data. Return False to stop stream and close connection.

        """
        data = json.loads(raw_data)

        if 'in_reply_to_status_id' in data:
            status = Status.parse(self.api, data)
            if self.on_status(status) is False:
                return False

        elif 'delete' in data:
            delete = data['delete']['status']
            if self.on_delete(delete['id'], delete['user_id']) is False:
                return False

        elif 'event' in data:
            status = Status.parse(self.api, data)
            if self.on_event(status) is False:
                return False

        elif 'direct_message' in data:
            status = Status.parse(self.api, data)
            if self.on_direct_message(status) is False:
                return False

        elif 'friends' in data:
            if self.on_friends(data['friends']) is False:
                return False

        elif 'limit' in data:
            if self.on_limit(data['limit']['track']) is False:
                return False

        elif 'disconnect' in data:
            if self.on_disconnect(data['disconnect']) is False:
                return False

        elif 'warning' in data:
            if self.on_warning(data['warning']) is False:
                return False

        else:
            return False

        # If this tweet contains text.
        if "user" in list(data.keys()):

            # --------------------------------------------------------------- #
            # Stupid print for fun.
            uname = data["user"]["screen_name"]
            umsg = data["text"]
            nspc = (20 - len(uname))
            if nspc < 1:
                nspc = 1
            spc = " " * nspc
            if not umsg.startswith("RT"):
                print("<tweet>", uname, spc, umsg.replace("\n", ""))
            # --------------------------------------------------------------- #

            # Write the tweet to the buffer.
            self.buffer.write(raw_data)

            # Running counter.
            self.count += 1

            # If the buffer is full, then cycle the buffer.
            if self.count % self.save_interval == 0:
                self.swap_buffer()

            # If the counter is a check-in interval, do all the check-in tasks.
            if self.count % check_in_interval == 0:

                # Shutdown if the `runtime` `run` value is False.
                if checkin_killstream():
                    return False

                # pause if there are too many files in the new tweet directory.
                if not checkin_pausestream():
                    return False

示例#52

0

显示文件

文件： streamfilewriter.py 项目： tcpavel/tweeql

def bulk_load(listkey, tweets):
    with open('/home/marcua/data/tweets/%s' % (listkey), 'w') as tmpfile:
        print "file %s" % (tmpfile.name)
        for jsontweet in tweets:
            tweet = Status.parse(api, json.loads(jsontweet))
            tmpfile.write(convert_to_utf8_str(tweet.text) + "\n")

示例#53

0

显示文件

文件： StreamingCrawler.py 项目： chucheng/EmhTwitterCrwaler

    def on_data(self, data):                
        '''Parse raw data from twitter and pass the status object to on_status()
        
        Call when raw data is passed from twitter.        
        If this function return False, it stop listening to the streamining.
        
        gSave_raw_json: if true, write json raw text to the ../json/
                        Set it to true only if you would like to debug.
                        
                           
        '''
        
        try:
            self.on_data_running = True
            self.log("Get raw data from Twitter", screen_only=True)
            
            if gSave_raw_json:
                ### save the json into disk ###
                parsed_data = tweepy.utils.import_simplejson().loads(data)
                
                if "id" not in parsed_data.keys():  #may return {"limit":{"track":73}} or {delete...}, ignore this data
                    return True #chucheng, this line is equal to check if 'delete'/;limit' in data
                    
                folder_name = parsed_data["id"]%1000
                
                
                try:
                    if not os.path.exists("../json/"+str(folder_name)):
                        os.makedirs("../json/"+str(folder_name))
                except OSError as ose:
                    self.log("OS ERROR")
                    pass
                
                filename = "../json/"+str(folder_name) + "/" + str(parsed_data["id"]) + ".json" 
                #print filename # for debug
                output = open(filename,"w")
                output.write(data)
                output.write('\n')
                output.close()
                ### done ###
                        
            # Chucheng 4/25/2011:
            #   We must override the method, because the original one might             
            #   return false, cause a stop of the listerner.
            #   In short, you cannot simply call:
            #       tweepy.StreamListener.on_data(self, data) 
            if 'in_reply_to_status_id' in data:
                status = Status.parse(self.api, json.loads(data))
                if self.on_status(status) is False: #Trigger on_status now!!
                    self.log('in_reply_to_status_id in data: on_status() returns False. (this line should never be reached)')
            else:
                pass #do nothing, the data we get is not what we need.
                    
            """ These lines should never be triggered in that we check :
                
            
            elif 'delete' in data:
                delete = json.loads(data)['delete']['status']
                if self.on_delete(delete['id'], delete['user_id']) is False:
                    self.log('delete in data: a delete notice arrives for a status')
            elif 'limit' in data:
                if self.on_limit(json.loads(data)['limit']['track']) is False:
                    self.log('limit in data: a limitation notice arrvies')       
            """
            
            self.on_data_running = False # This variable signal whether 
                                         # we are in the middle of processing data.
        
            if self.running == False: # see: StreamingCrawler.stop_listner()
                return False #stop the listener while catching a SIGTERM
            
        except Exception as e:
            self.on_data_running = False            
            self.log("Error:" + str(e), sys.exc_traceback)

        return True