Пример #1
0
    def _convert_row_to_post(self, row):
        # [site, social_id, username_hash, comment_time, comment_tokens]
        print("\rInsert post to DataFrame {0}/{1}".format(
            self._current_row, len(self.posts_csv_df)),
              end="")
        self._current_row += 1
        date = datetime.datetime.fromtimestamp(row['comment_time'])
        post = Post()
        claim_id = compute_author_guid_by_author_name(str(row['social_id']))
        post.post_id = str(
            compute_post_guid(row['site'] + str(claim_id),
                              row['username_hash'], date_to_str(date)))
        post.content = str(row['comment_tokens'])
        post.author = str(row['username_hash'])
        post.author_guid = str(row['username_hash'])
        post.domain = str(row['site'])
        post.date = date
        self._posts.append(post)

        claim_tweet_connection = Claim_Tweet_Connection()
        claim_tweet_connection.claim_id = str(claim_id)
        claim_tweet_connection.post_id = str(post.post_id)
        self._claim_tweet_connections.append(claim_tweet_connection)

        if self._current_row % self._max_posts_without_save == 0:
            self._save_posts_and_connections()
 def fill_claim_post_connection_using_keywords(self):
     self.build_word_post_dict_for_trec()
     claim_tweets_connections = []
     claims = self._db.get_claims()
     claim_id_keywords_dict = self._db.get_claim_id_keywords_dict_by_connection_type(
         'hill_climbing')
     for claim in claims:
         claim_keywords_list = claim_id_keywords_dict[claim.claim_id]
         for claim_keywords in claim_keywords_list:
             posts = self.get_posts_from_word_post_dict(
                 claim_keywords, claim)
             claim_tweets_connections += [
                 Claim_Tweet_Connection(claim_id=claim.claim_id,
                                        post_id=p.post_id) for p in posts
             ]
         if len(claim_tweets_connections) > 10000:
             self._db.add_claim_tweet_connections_fast(
                 claim_tweets_connections)
             claim_tweets_connections = []
     self._db.add_claim_tweet_connections_fast(claim_tweets_connections)
     del claim_tweets_connections
 def _add_claim_tweet_connection(self, claim_id, post_id):
     connection = Claim_Tweet_Connection()
     connection.claim_id = claim_id
     connection.post_id = post_id
     self._db.add_claim_connections([connection])
     pass
Пример #4
0
 def get_claim_tweet_connection(self, claim_id, post_id):
     claim_tweet_connection = Claim_Tweet_Connection()
     claim_tweet_connection.claim_id = claim_id
     claim_tweet_connection.post_id = post_id
     return claim_tweet_connection
 def _create_claim_post_connection(self, original_claim_id, post_id):
     claim_post_connection = Claim_Tweet_Connection()
     claim_post_connection.claim_id = original_claim_id
     claim_post_connection.post_id = post_id
     return claim_post_connection