示例#1
0
 def get_tweet_from_user(self, userlist, field='id'):
     # return a list of tweets made by users in userlist
     duplicateFinder = dupefilter.DupInfo(prefix=self.prefix)
     return duplicateFinder.get_tweet(collection=util.loadjson(
         self.sourcefile),
                                      userlist=userlist,
                                      field=field)
示例#2
0
 def print_metadata(self):
     # return metadata of one collection
     # including total number of tweets, total
     duplicateFinder = dupefilter.DupInfo(prefix=self.prefix)
     data = duplicateFinder.get_metadata()
     print data['num_tweet']
     print data['end_time'] - data['start_time']
     print data['start_time']
示例#3
0
 def get_suspicious_user_group(self,
                               startover=False,
                               filter_function=all_groups):
     duplicateFinder = dupefilter.DupInfo(prefix=self.prefix,
                                          startover=startover)
     duplicateFinder.find_duplicate_tweet(
         collection=util.loadjson(self.sourcefile),
         collect_url_only=self.collect_url_only)
     botty_groups = duplicateFinder.get_suspicious_user_group(
         filter_function=filter_function, url_based=self.url_based)
     return botty_groups
示例#4
0
    def get_percent_of_spam(self):
        spam_user = self.get_spam_user_info(variable='spam_user')
        duplicateFinder = dupefilter.DupInfo(prefix=self.prefix)
        total_user = duplicateFinder.get_metadata(variable='num_user')

        print 'Start time is ', duplicateFinder.get_metadata(
            variable='start_time')
        print 'End time is ', duplicateFinder.get_metadata(variable='end_time')
        print 'Total number of account is %d' % (total_user)
        print 'Total number of spam account is %d' % (len(spam_user))
        print 'Percent of spam account is %f' % (float(len(spam_user)) /
                                                 float(total_user))

        total_tweet = duplicateFinder.get_metadata(variable='num_tweet')
        print 'Total number of tweets is %d' % (total_tweet)
        num_spam_tweet = duplicateFinder.get_tweet(collection=util.loadjson(
            self.sourcefile),
                                                   userlist=spam_user,
                                                   only_number=True)
        print 'Total number of spam tweets is %d' % (num_spam_tweet)
        print 'Percent of spam tweets is %f' % (float(num_spam_tweet) /
                                                float(total_tweet))
示例#5
0
 def save_user_info(self):
     duplicateFinder = dupefilter.DupInfo(prefix=self.prefix)
     duplicateFinder.save_user_info(
         collection=util.loadjson(self.sourcefile))
示例#6
0
 def get_url_per_user(self):
     duplicateFinder = dupefilter.DupInfo(prefix=self.prefix)
     return duplicateFinder.get_url_per_user(
         collection=util.loadjson(self.sourcefile))
示例#7
0
 def get_num_tweet(self):
     # return total number of tweets in one collection
     duplicateFinder = dupefilter.DupInfo(prefix=self.prefix)
     num_tweet = duplicateFinder.get_metadata(variable='num_tweet')
     return num_tweet