示例#1
0
    def on_data(self, data):

        try:
            jsonData = json.loads(data)

            if 'retweeted_status' in jsonData:

                tweetuser = TweetUser(jsonData)
                tweet = Tweet(jsonData, is_reweet=True)
                save_user_and_retweet_to_db(tweetuser, tweet)

            else:
                #normal tweet
                tweetuser = TweetUser(jsonData)
                tweet = Tweet(jsonData)

                #Insert Tweet to DB
                save_user_and_tweet_to_db(tweetuser, tweet)

            #print(str())
            global parsedTweets
            parsedTweets += 1
            if parsedTweets % 100 == 0:
                print("parsedTweets: ", parsedTweets)
        except Exception as e:
            print("exception:")
            print(e)

        return True
示例#2
0
文件: dbot.py 项目: articvox/dscraper
    def run(self) -> None:
        for comment in get_top_unposted_comments(self.tweet_limit):
            reaction = comment.get_top_reaction()

            self.twitter_service.tweet(
                Tweet(comment=comment.content,
                      author=comment.subject,
                      reaction_count=reaction.count,
                      reaction_type_emoji=reaction.get_emoji(),
                      article_url=comment.article_url,
                      comment_id=comment.comment_id))
示例#3
0
def tweet(screen_name, text, followers=None, created=None):
    if followers == None:
        followers = random.randint(1, 200)
    if created == None:
        created = str(datetime.datetime.now())
    return Tweet({
        'created_at': created,
        'text': text,
        'user': {
            'screen_name': screen_name,
            'followers_count': followers
        }
    })
示例#4
0
def parse_tweet_list(tweet_list: str) -> List[Tweet]:
    tweets = []
    try:
        if len(tweet_list) == 0:
            log.error("empty tweet list")
        else:
            tweet_json_list = json.loads(tweet_list)
            if tweet_json_list is not None:
                for t in tweet_json_list:
                    tweets.append(Tweet(t["id"], t["created_at"],
                                        t["author_id"], t["text"]))
    except Exception as e:
        log.error(f"cannot parse tweet list: {e}")
    return tweets
示例#5
0
    def on_data(self, data):
        decoded = json.loads(data)

        text = decoded['text']
        name = decoded['user']['name']
        screenName = decoded['user']['screen_name']
        description = decoded['user']['description']
        hashtags = decoded['entities']['hashtags']
        location = decoded['user']['location']
        tweet = Tweet(text=text,
                      name=name,
                      screenName=screenName,
                      description=description,
                      hashtags=hashtags,
                      location=location)

        print(tweet)
        return True
示例#6
0
    def tweet(self, tweet: Tweet) -> None:
        logging.info(decorate('Posting tweet', tweet))

        if tweet.get_length() >= 280 and self.truncate_tweets:
            logging.info(decorate('Truncating tweet, original length: {}'.format(tweet.get_length()), tweet))
            tweet.truncate_comment()
            logging.info(decorate('Truncated length: {}'.format(tweet.get_length()), tweet))

        try:
            self.twitterAPI.update_status(tweet.build())
            logging.info(decorate('Tweet successfully posted', tweet))
        except TweepError as e:
            logging.error(decorate('Tweeting failed: {}'.format(e.reason), tweet))

        save_sent_id(tweet.comment_id)
示例#7
0
 async def collect(self) -> None:
     empty_lines_in_a_row = 0
     for response_line in self.response.iter_lines():
         if response_line:
             empty_lines_in_a_row = 0
             resp = json.loads(response_line)
             if "data" not in resp:
                 raise TwitterCollectorException(f"{resp}")
             t = resp["data"]
             tweet = Tweet(t["id"], t["created_at"],
                           t["author_id"], t["text"])
             start_date = tweet.date.replace(second=self.second_start)
             end_date = start_date + \
                 datetime.timedelta(seconds=self.tweet_interval)
             if tweet.date >= start_date and tweet.date <= end_date:
                 self.buffer.add(tweet)
         else:
             empty_lines_in_a_row += 1
             if empty_lines_in_a_row >= 10:
                 log.error("Empty line received from Twitter. Restarting...")
                 raise TwitterCollectorException(
                     "empty line received from twitter")
示例#8
0
    def page(self, query):    
        tweetCount = 0
        pageCount = 0
        try:
            for page in tweepy.Cursor(self._api.search, q=query, lang='en').pages():
                pageCount +=1
                for tweet in page:
                    tweetCount +=1

                    text = tweet.text
                    name = tweet.author.name
                    screenName = tweet.author.screen_name
                    description = tweet.user.description
                    hashtags = tweet.entities.get('hashtags')
                    location = tweet.user.location
                    tweetObject = Tweet(text=text, name=name, screenName=screenName, description=description, hashtags=hashtags, location=location)
                    print(tweetObject)
                    pickle.dump(tweetObject, self._filename)
                
                print('page: ' + str(pageCount) + '.....................')
                print('pages: ' + str(pageCount) + ' tweet count: ' + str(tweetCount) + '\n')
        except tweepy.TweepError:
            print('rate limit exceeded')
            os.sys.exit(0)
print("User details:")
print(user.name)
print(user.description)
print(user.location)

tweet_set = TweetSet(twitter_handle, user.name, user.description,
                     user.location)

hashtags = []
mentions = []
tweet_count = 0
end_date = datetime.utcnow() - timedelta(days=90)
for status in tqdm(Cursor(api.user_timeline, id=twitter_handle).items()):
    tweet_count += 1
    tweet = Tweet(str(status.id), status.text, "",
                  status.created_at.strftime("%c"))
    if hasattr(status, "entities"):
        entities = status.entities
        if "hashtags" in entities:
            for ent in entities["hashtags"]:
                if ent is not None:
                    if "text" in ent:
                        hashtag = ent["text"]
                        if hashtag is not None:
                            tweet.add_hashtag(hashtag)
        if "user_mentions" in entities:
            for ent in entities["user_mentions"]:
                if ent is not None:
                    if "screen_name" in ent:
                        name = ent["screen_name"]
                        if name is not None: