Пример #1
0
 def parse(cls, api, json):
     status = cls(api)
     setattr(status, '_json', json)
     for k, v in json.items():
         if k == 'user':
             user_model = getattr(api.parser.model_factory, 'user') if api else User
             user = user_model.parse(api, v)
             setattr(status, 'author', user)
             setattr(status, 'user', user)  # DEPRECIATED
         elif k == 'created_at':
             setattr(status, k, parse_datetime(v))
         elif k == 'source':
             if '<' in v:
                 setattr(status, k, parse_html_value(v))
                 setattr(status, 'source_url', parse_a_href(v))
             else:
                 setattr(status, k, v)
                 setattr(status, 'source_url', None)
         elif k == 'retweeted_status':
             setattr(status, k, Status.parse(api, v))
         elif k == 'place':
             if v is not None:
                 setattr(status, k, Place.parse(api, v))
             else:
                 setattr(status, k, None)
         else:
             setattr(status, k, v)
     return status
Пример #2
0
 def parse(cls, api, json):
     status = cls(api)
     setattr(status, "_json", json)
     for k, v in json.items():
         if k == "user":
             user_model = getattr(api.parser.model_factory, "user") if api else User
             user = user_model.parse(api, v)
             setattr(status, "author", user)
             setattr(status, "user", user)  # DEPRECIATED
         elif k == "created_at":
             setattr(status, k, parse_datetime(v))
         elif k == "source":
             if "<" in v:
                 setattr(status, k, parse_html_value(v))
                 setattr(status, "source_url", parse_a_href(v))
             else:
                 setattr(status, k, v)
                 setattr(status, "source_url", None)
         elif k == "retweeted_status":
             setattr(status, k, Status.parse(api, v))
         elif k == "place":
             if v is not None:
                 setattr(status, k, Place.parse(api, v))
             else:
                 setattr(status, k, None)
         else:
             setattr(status, k, v)
     return status
Пример #3
0
 def parse(cls, api, json):
     ss = cls(api)
     for k, v in json.items():
         if k == 'created_at':
             setattr(ss, k, parse_datetime(v))
         else:
             setattr(ss, k, v)
     return ss
Пример #4
0
 def parse(cls, api, json):
     ss = cls(api)
     for k, v in json.items():
         if k == 'created_at':
             setattr(ss, k, parse_datetime(v))
         else:
             setattr(ss, k, v)
     return ss
Пример #5
0
 def parse(cls, api, json):
     lst = List(api)
     for k, v in json.items():
         if k == 'user':
             setattr(lst, k, User.parse(api, v))
         elif k == 'created_at':
             setattr(lst, k, parse_datetime(v))
         else:
             setattr(lst, k, v)
     return lst
Пример #6
0
 def parse(cls, api, json):
     dm = cls(api)
     for k, v in json.items():
         if k == 'sender' or k == 'recipient':
             setattr(dm, k, User.parse(api, v))
         elif k == 'created_at':
             setattr(dm, k, parse_datetime(v))
         else:
             setattr(dm, k, v)
     return dm
Пример #7
0
 def parse(cls, api, json):
     lst = List(api)
     for k, v in json.items():
         if k == 'user':
             setattr(lst, k, User.parse(api, v))
         elif k == 'created_at':
             setattr(lst, k, parse_datetime(v))
         else:
             setattr(lst, k, v)
     return lst
Пример #8
0
 def parse(cls, api, json):
     dm = cls(api)
     for k, v in json.items():
         if k == 'sender' or k == 'recipient':
             setattr(dm, k, User.parse(api, v))
         elif k == 'created_at':
             setattr(dm, k, parse_datetime(v))
         else:
             setattr(dm, k, v)
     return dm
Пример #9
0
 def parse(cls, api, json):
     dm = cls(api)
     for k, v in pyjson.loads(dm._api.last_response.text).items():
         if k == 'sender' or k == 'recipient':
             setattr(dm, k, User.parse(api, v))
         elif k == 'created_at':
             setattr(dm, k, parse_datetime(v))
         else:
             setattr(dm, k, v)
     return dm
Пример #10
0
 def parse(cls, api, json):
     dm = cls(api)
     dm._json = json
     for k, v in json.items():
         if k == "sender" or k == "recipient":
             setattr(dm, k, User.parse(api, v))
         elif k == "created_at":
             setattr(dm, k, parse_datetime(v))
         else:
             setattr(dm, k, v)
     return dm
Пример #11
0
 def __init__(self, data):
     self._data = data
     self._sentiment = None
     self._user_keys = ('followers_count', 'friends_count', 'location')
     self._sentiment_keys = ('sentiment', 'pos', 'neu', 'neg')
     try:
         ts = parse_datetime(data['created_at'])
         data['created_at'] = ts
     except KeyError:
         print(data)
         raise
Пример #12
0
 def parse(cls, api, json):
     lst = List(api)
     lst._json = json
     for k, v in json.items():
         if k == "user":
             setattr(lst, k, User.parse(api, v))
         elif k == "created_at":
             setattr(lst, k, parse_datetime(v))
         else:
             setattr(lst, k, v)
     return lst
Пример #13
0
    def __init__(self, data):
        self.data = data
        self.id = data["id"]
        self.options = data["options"]

        self.duration_minutes = data.get("duration_minutes")

        self.end_datetime = data.get("end_datetime")
        if self.end_datetime is not None:
            self.end_datetime = parse_datetime(self.end_datetime)

        self.voting_status = data.get("voting_status")
Пример #14
0
    def __init__(self, data):
        self.data = data
        self.id = data["id"]
        self.state = data["state"]

        self.created_at = data.get("created_at")
        if self.created_at is not None:
            self.created_at = parse_datetime(self.created_at)

        self.ended_at = data.get("ended_at")
        if self.ended_at is not None:
            self.ended_at = parse_datetime(self.ended_at)

        self.host_ids = data.get("host_ids", [])
        self.lang = data.get("lang")
        self.is_ticketed = data.get("is_ticketed")
        self.invited_user_ids = data.get("invited_user_ids", [])
        self.participant_count = data.get("participant_count")

        self.scheduled_start = data.get("scheduled_start")
        if self.scheduled_start is not None:
            self.scheduled_start = parse_datetime(self.scheduled_start)

        self.speaker_ids = data.get("speaker_ids", [])

        self.started_at = data.get("started_at")
        if self.started_at is not None:
            self.started_at = parse_datetime(self.started_at)

        # https://twittercommunity.com/t/missing-documentation-for-new-space-object-subscriber-count-field-on-space-object-page/166943
        self.subscriber_count = data.get("subscriber_count")

        self.title = data.get("title")

        self.topic_ids = data.get("topic_ids", [])

        self.updated_at = data.get("updated_at")
        if self.updated_at is not None:
            self.updated_at = parse_datetime(self.updated_at)
Пример #15
0
    def __init__(self, data):
        self.data = data
        self.id = data["id"]
        self.name = data["name"]

        self.created_at = data.get("created_at")
        if self.created_at is not None:
            self.created_at = parse_datetime(self.created_at)

        self.description = data.get("description")
        self.follower_count = data.get("follower_count")
        self.member_count = data.get("member_count")
        self.private = data.get("private")
        self.owner_id = data.get("owner_id")
Пример #16
0
    def __init__(self, data):
        self.data = data
        self.id = data["id"]
        self.state = data["state"]

        self.created_at = data.get("created_at")
        if self.created_at is not None:
            self.created_at = parse_datetime(self.created_at)

        self.ended_at = data.get("ended_at")
        if self.ended_at is not None:
            self.ended_at = parse_datetime(self.ended_at)

        self.host_ids = data.get("host_ids", [])
        self.lang = data.get("lang")
        self.is_ticketed = data.get("is_ticketed")
        self.invited_user_ids = data.get("invited_user_ids", [])
        self.participant_count = data.get("participant_count")
        self.subscriber_count = data.get("subscriber_count")

        self.scheduled_start = data.get("scheduled_start")
        if self.scheduled_start is not None:
            self.scheduled_start = parse_datetime(self.scheduled_start)

        self.speaker_ids = data.get("speaker_ids", [])

        self.started_at = data.get("started_at")
        if self.started_at is not None:
            self.started_at = parse_datetime(self.started_at)

        self.title = data.get("title")

        self.topic_ids = data.get("topic_ids", [])

        self.updated_at = data.get("updated_at")
        if self.updated_at is not None:
            self.updated_at = parse_datetime(self.updated_at)
Пример #17
0
 def parse(cls, api, json):
     user = cls(api)
     for k, v in json.items():
         if k == 'created_at':
             setattr(user, k, parse_datetime(v))
         elif k == 'status':
             setattr(user, k, Status.parse(api, v))
         elif k == 'following':
             # twitter sets this to null if it is false
             if v is True:
                 setattr(user, k, True)
             else:
                 setattr(user, k, False)
         else:
             setattr(user, k, v)
     return user
Пример #18
0
 def parse(cls, api, json):
     user = cls(api)
     for k, v in json.items():
         if k == "created_at":
             setattr(user, k, parse_datetime(v))
         elif k == "status":
             setattr(user, k, Status.parse(api, v))
         elif k == "following":
             # twitter sets this to null if it is false
             if v is True:
                 setattr(user, k, True)
             else:
                 setattr(user, k, False)
         else:
             setattr(user, k, v)
     return user
Пример #19
0
def save_tweet(tweetobj):
    twitter_id = tweetobj["id"]
    username = tweetobj["user"]["name"]
    screenname = tweetobj["user"]["screen_name"]
    text = tweetobj["text"]
    created_at = parse_datetime(tweetobj["created_at"])
    created_at = timezone.make_aware(created_at, timezone=pytz.UTC)  # tweets are stored int UTC

    image_urls = []
    try:
        for media in tweetobj["entities"]["media"]:
            if media["type"] == "photo":
                image_urls.append(media["media_url"] + ":large")
                # cut image url  from tweet text
                text = text.replace(media["url"], "")
    except KeyError:
        pass
        # print("no picture")
        # return  # no picture

    # if len(image_list) < 2:
    #     print ("less than 2 pictures" + str(len(image_list)))
    #     return
    # create tweet
    newtweet = Tweet(
        twitter_id=twitter_id,
        username=username,
        screenname=screenname,
        text=text,
        created_at=created_at,
        from_twitter=True,
    )
    newtweet.save()
    if image_urls:
        for image_url in image_urls:
            # print(image_url)
            image = retrieve_image(image_url)
            image = process_image(image)  # returns jpg
            image_name = "tmp.jpg"  # will be renamed by model save function
            newpic = TweetPic()
            newpic.tweet = newtweet
            newpic.picture.save(image_name, image)
            newpic.save()
            generate_all_aliases(newpic.picture, include_global=True)  # create thumbnails

    print("saved tweet with id %s" % (str(twitter_id)))
Пример #20
0
 def parse(cls, api, json):
     user = cls(api)
     user._json = json
     for k, v in json.items():
         if k == 'created_at':
             setattr(user, k, parse_datetime(v))
         elif k == 'status':
             setattr(user, k, Status.parse(api, v))
         elif k == 'following':
             # twitter sets this to null if it is false
             if v is True:
                 setattr(user, k, True)
             else:
                 setattr(user, k, False)
         else:
             setattr(user, k, v)
     return user
Пример #21
0
    def __init__(self, data):
        self.data = data
        self.id = int(data["id"])
        self.text = data["text"]

        self.attachments = data.get("attachments")

        self.author_id = data.get("author_id")
        if self.author_id is not None:
            self.author_id = int(self.author_id)

        self.context_annotations = data.get("context_annotations", [])

        self.conversation_id = data.get("conversation_id")
        if self.conversation_id is not None:
            self.conversation_id = int(self.conversation_id)

        self.created_at = data.get("created_at")
        if self.created_at is not None:
            self.created_at = parse_datetime(self.created_at)

        self.entities = data.get("entities")
        self.geo = data.get("geo")

        self.in_reply_to_user_id = data.get("in_reply_to_user_id")
        if self.in_reply_to_user_id is not None:
            self.in_reply_to_user_id = int(self.in_reply_to_user_id)

        self.lang = data.get("lang")
        self.non_public_metrics = data.get("non_public_metrics")
        self.organic_metrics = data.get("organic_metrics")
        self.possibly_sensitive = data.get("possibly_sensitive")
        self.promoted_metrics = data.get("promoted_metrics")
        self.public_metrics = data.get("public_metrics")

        self.referenced_tweets = None
        if "referenced_tweets" in data:
            self.referenced_tweets = [
                ReferencedTweet(referenced_tweet)
                for referenced_tweet in data["referenced_tweets"]
            ]

        self.reply_settings = data.get("reply_settings")
        self.source = data.get("source")
        self.withheld = data.get("withheld")
Пример #22
0
 def parse_follow_unfollow_event(self, data):
     """
     if data is follow or unfollow event-object then return parsed event-object,
     else return False.
     """
     if '"event":"follow"' not in data and '"event":"unfollow"' not in data:
         return False
     jobj = json.loads(data)
     if 'event' not in jobj or 'created_at' not in jobj or 'target' not in jobj:
         return False
     event_name = jobj['event']
     if event_name != 'follow' and event_name != 'unfollow':
         return False
     target = User.parse(self.api, jobj['target'])
     if event_name == 'follow' and target.id == self.my_id:  # User is followed
         return False
     created_at = parse_datetime(jobj['created_at'])
     return dict(event_name=event_name, created_at=created_at, target=target)
Пример #23
0
def save_tweet(tweetobj):
    twitter_id = tweetobj['id']
    username = tweetobj['user']['name']
    screenname = tweetobj['user']['screen_name']
    text = tweetobj['text']
    created_at = parse_datetime(tweetobj['created_at'])
    created_at = timezone.make_aware(
        created_at, timezone=pytz.UTC)  #tweets are stored int UTC

    image_urls = []
    try:
        for media in tweetobj['entities']['media']:
            if media['type'] == 'photo':
                image_urls.append(media['media_url'] + ":large")
                # cut image url  from tweet text
                text = text.replace(media['url'], "")
    except KeyError:
        pass
        # print("no picture")
        # return  # no picture

    # if len(image_urls) < 2:
    #     print ("less than 2 pictures" + str(len(image_urls)))
    #     return
    # create tweet
    newtweet, created = Tweet.objects.get_or_create(twitter_id=twitter_id,
                                                    username=username,
                                                    screenname=screenname,
                                                    text=text,
                                                    created_at=created_at,
                                                    from_twitter=True)
    if created:
        if image_urls:
            for image_url in image_urls:
                image = retrieve_image(image_url)
                image = process_image(image)  # returns jpg
                image_name = "tmp.jpg"  # will be renamed by model save function
                newpic = TweetPic()
                newpic.tweet = newtweet
                newpic.picture.save(image_name, image, save=False)
                newpic.save()
        print("saved tweet with id %s" % (str(twitter_id)))
    else:
        print(" tweet with id %s already exists" % (str(twitter_id)))
Пример #24
0
    def parse_status_event(self, data):
        """
        if status contains user-object then return parsed user-object,
        else return False.

        ** NOTE **
        this method treats tweet and RT,
        but other data also contains '"in_reply_to_status_id":', e.g. fav.
        """
        if '"in_reply_to_status_id":' not in data:
            return False
        jobj = json.loads(data)
        if 'user' not in jobj or 'created_at' not in jobj:
            return False
        user = User.parse(self.api, jobj['user'])
        if user.id != self.my_id and user.id not in self.friends:
            return False
        created_at = parse_datetime(jobj['created_at'])
        return dict(user=user, created_at=created_at)
Пример #25
0
 def parse(cls, api, json):
     status = cls(api)
     for k, v in json.items():
         if k == 'user':
             user = User.parse(api, v)
             setattr(status, 'author', user)
             setattr(status, 'user', user)  # DEPRECIATED
         elif k == 'created_at':
             setattr(status, k, parse_datetime(v))
         elif k == 'source':
             if '<' in v:
                 setattr(status, k, parse_html_value(v))
                 setattr(status, 'source_url', parse_a_href(v))
             else:
                 setattr(status, k, v)
         elif k == 'retweeted_status':
             setattr(status, k, Status.parse(api, v))
         else:
             setattr(status, k, v)
     return status
Пример #26
0
 def parse(cls, api, json):
     event = cls(api)
     for k, v in json.items():
         if k == 'target':
             user_model = getattr(api.parser.model_factory, 'user')
             user = user_model.parse(api, v)
             setattr(event, 'target', user)
         elif k == 'source':
             user_model = getattr(api.parser.model_factory, 'user')
             user = user_model.parse(api, v)
             setattr(event, 'source', user)
         elif k == 'created_at':
             setattr(event, k, parse_datetime(v))
         elif k == 'target_object':
             setattr(event, 'target_object', v)
         elif k == 'event':
             setattr(event, 'event', v)
         else:
             setattr(event, k, v)
     return event
Пример #27
0
 def parse(cls, api, json):
     status = cls(api)
     for k, v in json.items():
         if k == 'user':
             user = User.parse(api, v)
             setattr(status, 'author', user)
             setattr(status, 'user', user)  # DEPRECIATED
         elif k == 'created_at':
             setattr(status, k, parse_datetime(v))
         elif k == 'source':
             if '<' in v:
                 setattr(status, k, parse_html_value(v))
                 setattr(status, 'source_url', parse_a_href(v))
             else:
                 setattr(status, k, v)
         elif k == 'retweeted_status':
             setattr(status, k, User.parse(api, v))
         else:
             setattr(status, k, v)
     return status
Пример #28
0
 def first_parse(cls, api, json):
     status = cls(api)
     for k, v in json.items():
         if k == 'user':
             user_model = getattr(api.parser.model_factory, 'user')
             user = user_model.parse(api, v)
             setattr(status, 'author', user)
             setattr(status, 'user', user)  # DEPRECIATED
         elif k == 'created_at':
             setattr(status, k, parse_datetime(v))
         elif k == 'source':
             if '<' in v:
                 setattr(status, k, parse_html_value(v))
                 setattr(status, 'source_url', parse_a_href(v))
             else:
                 setattr(status, k, v)
                 setattr(status, 'source_url', None)
         elif k == 'retweeted_status':
             setattr(status, k, Status.parse(api, v))
         else:
             setattr(status, k, v)
     return status
Пример #29
0
def save_tweet(tweetobj):
    twitter_id = tweetobj['id']
    username = tweetobj['user']['name']
    screenname = tweetobj['user']['screen_name']
    text = tweetobj['text']
    created_at = parse_datetime(tweetobj['created_at'])
    created_at = timezone.make_aware(created_at, timezone=pytz.UTC) #tweets are stored int UTC

    image_urls = []
    try:
        for media in tweetobj['entities']['media']:
            if media['type'] == 'photo':
                image_urls.append(media['media_url'] + ":large")
                # cut image url  from tweet text
                text = text.replace(media['url'],"")
    except KeyError:
        pass
        # print("no picture")
        # return  # no picture

    # if len(image_urls) < 2:
    #     print ("less than 2 pictures" + str(len(image_urls)))
    #     return
    # create tweet
    newtweet, created  = Tweet.objects.get_or_create(twitter_id=twitter_id, username=username, screenname=screenname, text=text, created_at=created_at, from_twitter=True)
    if created:
        if image_urls:
            for image_url in image_urls:
                image = retrieve_image(image_url)
                image = process_image(image) # returns jpg
                image_name = "tmp.jpg" # will be renamed by model save function
                newpic = TweetPic()
                newpic.tweet = newtweet
                newpic.picture.save(image_name,image,save=False)
                newpic.save()
        print("saved tweet with id %s" %(str(twitter_id)))
    else:
        print(" tweet with id %s already exists" %(str(twitter_id)))
Пример #30
0
    def __init__(self, data):
        self.data = data
        self.id = int(data["id"])
        self.name = data["name"]
        self.username = data["username"]

        self.created_at = data.get("created_at")
        if self.created_at is not None:
            self.created_at = parse_datetime(self.created_at)

        self.description = data.get("description")
        self.entities = data.get("entities")
        self.location = data.get("location")

        self.pinned_tweet_id = data.get("pinned_tweet_id")
        if self.pinned_tweet_id is not None:
            self.pinned_tweet_id = int(self.pinned_tweet_id)

        self.profile_image_url = data.get("profile_image_url")
        self.protected = data.get("protected")
        self.public_metrics = data.get("public_metrics")
        self.url = data.get("url")
        self.verified = data.get("verified")
        self.withheld = data.get("withheld")
Пример #31
0
 def on_data(self, data):
     statuse = json.loads(data)
     if 'delete' in statuse:
         return True  # keep stream alive
     if 'id' in statuse:
         statuse_quoted_text = None
         geoloc = None
         url_expanded = None
         url_media = None
         type_media = None
         text = None
         location = None
         description = None
         name = None
         date = None
         app = None
         try:
             id_tweet = statuse['id']
             recent_tweet = id_tweet
             profile_user = statuse['user']
             if 'quoted_status_id' in statuse:
                 print statuse['quoted_status_id']
                 if 'quoted_status' in statuse:
                     statuse_quoted = statuse['quoted_status']
                     if 'text' in statuse_quoted:
                         statuse_quoted_text = statuse_quoted['text']
                         statuse_quoted_text = re.sub(
                             '[\r\n\t]+', ' ', statuse_quoted_text)
                         print 'tweet nested', statuse_quoted_text
             if 'coordinates' in statuse:
                 coordinates = statuse['coordinates']
                 if coordinates != None:
                     list_geoloc = coordinates['coordinates']
                     geoloc = '%s, %s' % (list_geoloc[0], list_geoloc[1])
             if 'entities' in statuse:
                 entities = statuse['entities']
                 urls = entities['urls']
                 if len(urls) > 0:
                     url = urls[0]
                     url_expanded = url['expanded_url']
             text = re.sub('[\r\n\t]+', ' ', statuse['text'])
             if profile_user['location'] != None:
                 location = re.sub('[\r\n\t]+', ' ',
                                   profile_user['location'], re.UNICODE)
             if profile_user['description'] != None:
                 description = re.sub('[\r\n\t]+', ' ',
                                      profile_user['description'],
                                      re.UNICODE)
             if profile_user['name'] != None:
                 name = re.sub('[\r\n\t]+', ' ', profile_user['name'],
                               re.UNICODE)
             date = parse_datetime(statuse['created_at'])
             app = parse_html_value(statuse['source'])
             tweet = '%s\t%s\t@%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (
                 id_tweet, date, profile_user['screen_name'], text, app,
                 profile_user['id'], profile_user['followers_count'],
                 profile_user['friends_count'],
                 profile_user['statuses_count'], location, url_expanded,
                 geoloc, name, description, url_media, type_media,
                 statuse_quoted_text)
             self.f_out.write(tweet)
             print '---->collected tweet', id_tweet
         except:
             text_error = '---------------> parser error  at %s, id-tweet %s\n' % (
                 datetime.datetime.now(), statuse)
             self.f_log.write(text_error)
             pass
     else:
         text_error = '---------------> message no expected  %s,  %s\n' % (
             datetime.datetime.now(), data)
         self.f_log.write(text_error)
     return True  # keep stream alive
Пример #32
0
    def on_data(self, data):
        statuse = json.loads(data)
        if 'delete' in statuse:
            return True  # keep stream alive
        if 'id' in statuse:
            statuse_quoted_text = None
            geoloc = None
            url_expanded = None
            url_media = None
            type_media = None
            text = None
            location = None
            description = None
            name = None
            date = parse_datetime(statuse['created_at'])
            app = parse_html_value(statuse['source'])
            entities = None
            relation = None
            quoted_id = None
            replied_id = None
            retweeted_id = None
            user_replied = None
            user_quoted = None
            user_retweeted = None
            first_HT = None
            #get interactions Ids
            try:
                id_tweet = statuse['id_str']
                if statuse['in_reply_to_status_id_str'] != None:
                    relation = 'reply'
                    replied_id = statuse['in_reply_to_status_id_str']
                    user_replied = statuse['in_reply_to_screen_name']
                if 'quoted_status' in statuse:
                    relation = 'quote'
                    quoted_id = statuse['quoted_status_id_str']
                    user_quoted = statuse['quoted_status']['user'][
                        'screen_name']
                elif 'retweeted_status' in statuse:
                    relation = 'RT'
                    retweeted_id = statuse['retweeted_status']['id_str']
                    user_retweeted = statuse['retweeted_status']['user'][
                        'screen_name']
                    if 'quoted_status' in statuse['retweeted_status']:
                        quoted_id = statuse['retweeted_status'][
                            'quoted_status']['id_str']
                        user_quoted = statuse['retweeted_status'][
                            'quoted_status']['user']['screen_name']
            except:
                text_error = '---------------->bad interactions ids, id tweet %s at %s\n' % (
                    id_tweet, time.asctime())
                self.f_log.write(text_error)
#get geolocation
            if 'coordinates' in statuse:
                coordinates = statuse['coordinates']
                if coordinates != None:
                    try:
                        if 'coordinates' in coordinates:
                            list_geoloc = coordinates['coordinates']
                            print list_geoloc
                            geoloc = '%s, %s' % (list_geoloc[0],
                                                 list_geoloc[1])
                    except:
                        text_error = '---------------->bad coordinates, id tweet %s at %s\n' % (
                            id_tweet, datetime.datetime.now())
                        self.f_log.write(text_error)
#get entities
            if 'entities' in statuse:
                entities = statuse['entities']
            if 'extended_tweet' in statuse:
                entities = statuse['extended_tweet']['entities']
            if 'retweeted_status' in statuse:
                if 'entities' in statuse['retweeted_status']:
                    entities = statuse['retweeted_status']['entities']
                if 'extended_tweet' in statuse['retweeted_status']:
                    entities = statuse['retweeted_status']['extended_tweet'][
                        'entities']
            if entities != None:
                try:
                    urls = entities['urls']
                    if len(urls) > 0:
                        url_expanded = urls[0]['expanded_url']
                except:
                    text_error = '---------------->bad enttity urls, id tweet %s at %s\n' % (
                        id_tweet, datetime.datetime.now())
                    self.f_log.write(text_error)
                try:
                    if 'media' in entities:
                        list_media = entities['media']
                        if len(list_media) > 0:
                            url_media = list_media[0]['media_url']
                            type_media = list_media[0]['type']
                except:
                    text_error = '---------------->bad entity media, at %s id tweet %s \n' % (
                        datetime.datetime.now(), id_tweet)
                    self.f_log.write(text_error)
                try:
                    if 'hashtags' in entities:
                        HTs = entities['hashtags']
                        if len(HTs) > 0:
                            first_HT = HTs[0]['text']
                except:
                    text_error = '---------------->bad entity HT, id tweet %s at %s\n' % (
                        id_tweet, time.asctime())
                    self.f_log.write(text_error)
#get text
            try:
                if 'text' in statuse:
                    text = re.sub('[\r\n\t]+', ' ', statuse['text'])
                if 'extended_tweet' in statuse:
                    text = re.sub('[\r\n\t]+', ' ',
                                  statuse['extended_tweet']['full_text'])
                if 'retweeted_status' in statuse:
                    statuse_RT = statuse['retweeted_status']
                    if 'text' in statuse_RT:
                        RT_expand = re.sub('[\r\n\t]+', ' ',
                                           statuse_RT['text'])
                    if 'extended_tweet' in statuse_RT:
                        extended_RT = statuse_RT['extended_tweet']
                        RT_expand = re.sub('[\r\n\t]+', ' ',
                                           extended_RT['full_text'])
                    RT = re.match(r'(^RT @\w+: )', text)
                    if RT:
                        text = RT.group(1) + RT_expand
            except:
                text_error = '---------------->bad tweet text,  at %s id tweet %s \n' % (
                    datetime.datetime.now(), id_tweet)
                self.f_log.write(text_error)
#get quoted if exist
            try:
                if 'quoted_status' in statuse:
                    if 'text' in statuse['quoted_status']:
                        statuse_quoted_text = statuse['quoted_status']['text']
                    if 'extended_tweet' in statuse['quoted_status']:
                        statuse_quoted_text = statuse['quoted_status'][
                            'extended_tweet']['full_text']
                    statuse_quoted_text = re.sub('[\r\n\t]+', ' ',
                                                 statuse_quoted_text)
                elif 'retweeted_status' in statuse:
                    if 'quoted_status' in statuse['retweeted_status']:
                        if 'text' in statuse['retweeted_status'][
                                'quoted_status']:
                            statuse_quoted_text = statuse['retweeted_status'][
                                'quoted_status']['text']
                        if 'extended_tweet' in statuse['retweeted_status'][
                                'quoted_status']:
                            statuse_quoted_text = statuse['retweeted_status'][
                                'quoted_status']['extended_tweet']['full_text']
                        statuse_quoted_text = re.sub('[\r\n\t]+', ' ',
                                                     statuse_quoted_text)
            except:
                text_error = '---------------->bad quoted,  at %s id tweet %s \n' % (
                    datetime.datetime.now(), id_tweet)
                self.f_log.write(text_error)


#get user profile
            if 'user' in statuse:
                try:
                    if 'location' in statuse['user']:
                        if statuse['user']['location'] != None:
                            location = re.sub('[\r\n\t]+', ' ',
                                              statuse['user']['location'],
                                              re.UNICODE)
                except:
                    text_error = '---------------->bad user location:%s ,  at %s id tweet %s \n' % (
                        datetime.datetime.now(), statuse['user']['location'],
                        id_tweet)
                    self.f_log.write(text_error)
                try:
                    if 'description' in statuse['user']:
                        if statuse['user']['description'] != None:
                            description = re.sub(
                                '[\r\n\t]+', ' ',
                                statuse['user']['description'], re.UNICODE)
                except:
                    text_error = '---------------->bad user description,  at %s id tweet %s \n' % (
                        datetime.datetime.now(), id_tweet)
                    self.f_log.write(text_error)
                try:
                    if 'name' in statuse['user']:
                        if statuse['user']['name'] != None:
                            name = re.sub('[\r\n\t]+', ' ',
                                          statuse['user']['name'], re.UNICODE)
                except:
                    text_error = '---------------->bad user name,  at %s id tweet %s \n' % (
                        datetime.datetime.now(), id_tweet)
                    self.f_log.write(text_error)
            try:
                link_tweet = 'https://twitter.com/%s/status/%s' % (
                    statuse['user']['screen_name'], id_tweet)
                tweet = '%s\t%s\t@%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (
                    id_tweet, date, statuse['user']['screen_name'], text, app,
                    statuse['user']['id'], statuse['user']['followers_count'],
                    statuse['user']['friends_count'],
                    statuse['user']['statuses_count'], location, url_expanded,
                    geoloc, name, description, url_media, type_media,
                    statuse_quoted_text, relation, replied_id, user_replied,
                    retweeted_id, user_retweeted, quoted_id, user_quoted,
                    first_HT, statuse['lang'],
                    parse_datetime(statuse['user']['created_at']),
                    statuse['user']['verified'],
                    statuse['user']['profile_image_url_https'], link_tweet)
                self.f_out.write(tweet)
                print '---->collected tweet', id_tweet
            except:
                text_error = '---------------> format error  at %s, id-tweet %s\n' % (
                    datetime.datetime.now(), id_tweet)
                self.f_log.write(text_error)
                pass
        else:
            text_error = '---------------> message no expected  %s,  %s\n' % (
                datetime.datetime.now(), data)
            self.f_log.write(text_error)
        return True  # keep stream alive