示例#1
0
def insta_bot():
    while True:
        #provide choices
        print "Welcome to instaBot!"
        print "Menu options:"
        print "1.Get your own details"
        print "2.Get details of a user by username"
        print "3.Get your own recent post"
        print "4.Get the recent post of a user by username"
        print "5.Like the recent post of a user"
        print "6.Make a comment on the recent post of a user"
        print "7.list of likes on recent post of a user"
        print "8.list of comments on a recnet post of a user"
        print "9.delete bad comment on recent post"
        print "10.Exit"

        choice = int(raw_input("Enter you choice: "))
        if choice == 1:
            self_info()
        elif choice == 2:
            user_name = raw_input("Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"wrong choice"
示例#2
0
def main():
    while True:
        print '\n'
        print 'Hey! Welcome to instaBot!'
        print 'Here are your menu options:'
        print "a.Get your own details\n"
        print "b.Get details of a user by username\n"

        choice = raw_input("Enter you choice: ")
        if choice == "a":
            self_info()
        elif choice == "b":
            insta_username = raw_input("Enter the username of the user: "******"c":
            insta_username = raw_input("Enter the username of the user: "******"j":
            exit()
        else:
            print "wrong choice"
 def save_features(self):
     self.users_to_query = set()
     features = {'temporal': {}, 'content': {}, 'user': {}, 'network': {}}
     followers_set = set(self.followers)
     if self.level > 0:
         print "Number of followers: " + str(len(self.followers))
         follower_counts = Counter(self.followers).most_common()
         # should fix this to be a more precise measure
         size_to_keep = int(.08 * len(self.followers))
         connectedness_threshold = floor(0.3 * self.n)
         print size_to_keep
         print connectedness_threshold
         tmp_followers = [
             f[0] for f in follower_counts if f[1] > connectedness_threshold
         ]
         print "NUmber of followers over threshold = " + str(
             len(tmp_followers))
         if len(tmp_followers) < size_to_keep:
             tmp_followers.extend(
                 [f[0] for f in follower_counts[:size_to_keep] if f[1] > 1])
         followers_set = set(tmp_followers)
         print "Number of connected followers: " + str(len(followers_set))
     print "Getting all user info..."
     for follower in followers_set:
         user_info = None
         follower = str(follower)
         if follower not in self.users and follower not in self.ignore_users:
             self.cur.execute(
                 'SELECT suspended, deleted, other_error, user_info, user_info_json FROM followers WHERE user_id = %s',
                 (follower, ))
             record = self.cur.fetchone()
             if record:
                 if record[0] or record[1] or record[2]:
                     self.ignore_users.add(follower)
                     continue
                 if record[3] and not record[4]:
                     self.ignore_users.add(follower)
                     continue
                 if record[3] and record[4]:
                     try:
                         self.user_info[follower] = ast.literal_eval(
                             record[4])
                         continue
                     except:
                         self.ignore_users.add(follower)
                         continue
             self.users_to_query.add(follower)
     get_user_info(self)
     print "Getting all timeline info and extracting features"
     for follower in followers_set:
         timeline = None
         follower = str(follower)
         if follower not in self.users and follower not in self.ignore_users:
             self.users.add(follower)
             self.cur.execute(
                 'SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s',
                 (follower, ))
             record = self.cur.fetchone()
             if record:
                 if record[0] or record[1] or record[2]:
                     self.ignore_users.add(follower)
                     # print "User is suspended or deleted"
                     continue
                 if record[3]:
                     # print "Already have timeline information for user number " + follower
                     # Have to read in file to get timeline info
                     timeline = get_timeline_from_file(self, follower)
                 else:
                     timeline = get_user_timeline(self, follower)
             else:
                 timeline = get_user_timeline(self, follower)
             if timeline and self.user_info.get(
                     follower) and len(timeline) > 50:
                 gf = GetFeatures(follower, self.user_info[follower],
                                  timeline)
                 try:
                     gf.get_user_features()
                     gf.collect_tweets()
                     gf.get_content_features()
                     gf.get_temporal_features()
                     features['temporal'][follower] = gf.temporal_features
                     features['content'][follower] = gf.content_features
                     features['network'][follower] = gf.network_features
                     features['user'][follower] = gf.user_features
                     self.current_level_users.append(follower)
                 except Exception as e:
                     print "ERROR GETTING FEATURES"
                     print e
                     print follower
                     print self.user_info[follower]
     with open(
             'clique_expansion/all_features/' + self.seed_user +
             '_all_features.p', 'wb') as f:
         pickle.dump(features, f)
示例#4
0
    def find_bots(self, priors):
        self.users_to_query = set()
        features = {'temporal': {}, 'content': {}, 'user': {}, 'network': {}}
        followers_set = set(self.followers)
        if self.level > 0:
            print "Number of followers: " + str(len(self.followers))
            follower_counts = Counter(self.followers).most_common()
            # should fix this to be a more precise measure
            size_to_keep = int(.08*len(self.followers))
            connectedness_threshold = floor(0.3*self.n)
            print size_to_keep
            print connectedness_threshold
            tmp_followers = [f[0] for f in follower_counts if f[1] > connectedness_threshold]
            print "NUmber of followers over threshold = " + str(len(tmp_followers))
            if len(tmp_followers) < size_to_keep:
                tmp_followers.extend([f[0] for f in follower_counts[:size_to_keep] if f[1] > 1])
            followers_set = set(tmp_followers)
            print "Number of connected followers: " + str(len(followers_set))
        print "Getting all user info..."
        for follower in followers_set:
            user_info = None
            follower = str(follower)
            if follower not in self.users and follower not in self.ignore_users:
                self.cur.execute('SELECT suspended, deleted, other_error, user_info, user_info_json FROM followers WHERE user_id = %s', (follower,))
                record = self.cur.fetchone()
                if record:
                    if record[0] or record[1] or record[2]:
                        self.ignore_users.add(follower)
                        continue
                    if record[3] and not record[4]:
                        self.ignore_users.add(follower)
                        continue
                    if record[3] and record[4]:
                        try:
                            self.user_info[follower] = ast.literal_eval(record[4])
                            continue
                        except:
                            self.ignore_users.add(follower)
                            continue
                self.users_to_query.add(follower)
        get_user_info(self)
        print "Getting all timeline info and extracting features"
        for follower in followers_set:
            timeline = None
            follower = str(follower)
            if follower not in self.users and follower not in self.ignore_users:
                self.users.add(follower)
                self.cur.execute('SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s', (follower,))
                record = self.cur.fetchone()
                if record:
                    if record[0] or record[1] or record[2]:
                        self.ignore_users.add(follower)
                        # print "User is suspended or deleted"
                        continue
                    if record[3]:
                        # print "Already have timeline information for user number " + follower
                        # Have to read in file to get timeline info
                        timeline = get_timeline_from_file(self, follower)
                    else:
                        timeline = get_user_timeline(self, follower)
                else:
                    timeline = get_user_timeline(self, follower)
                if timeline and self.user_info.get(follower) and len(timeline) > 50:
                    gf = GetFeatures(follower, self.user_info[follower], timeline)
                    try:
                        gf.get_user_features()
                        gf.collect_tweets()
                        gf.get_content_features()
                        gf.get_temporal_features()
                        features['temporal'][follower] = gf.temporal_features
                        features['content'][follower] = gf.content_features
                        features['network'][follower] = gf.network_features
                        features['user'][follower] = gf.user_features
                        self.current_level_users.append(follower)
                    except Exception as e:
                        print "ERROR GETTING FEATURES"
                        print e
                        print follower
                        print self.user_info[follower]
                    # need to incorporate other network features
                    #gf.features['num_shared_edges'] = follower_counts[user]

        # we can look at the out-degree of the collapsed ego network. We also calculate the average out degree,
        # which is the average number of followers per follower.
        # need to get the followers for all these
        with open('clique_expansion/' + self.seed_user + '_all_features.p', 'wb') as f:
                pickle.dump(features, f)
        len_priors = len(priors['temporal'])
        current_features = priors
        current_features['temporal'].extend(features['temporal'].values())
        current_features['content'].extend(features['content'].values())
        current_features['network'].extend(features['network'].values())
        current_features['user'].extend(features['user'].values())
        print "Performing anomaly detection"
        X = dict()
        X['temporal'] = self.vec.fit_transform(current_features['temporal']).toarray()
        X['content'] = self.vec.fit_transform(current_features['content']).toarray()
        X['network'] = self.vec.fit_transform(current_features['network']).toarray()
        X['user'] = self.vec.fit_transform(current_features['user']).toarray()
        current_features = dict()
        for key, value in X.iteritems():
            X[key] = normalize(value)

        outliers = self.perform_outlier_detection(X, len_priors)

        self.level += 1
        self.clique_features = {'temporal': {}, 'content': {}, 'user': {}, 'network': {}}
        for follower in outliers:
            self.clique.add((follower, self.level))
            self.to_check.add(follower)
            self.clique_features['content'][follower] = features['content'][follower]
            self.clique_features['network'][follower] = features['network'][follower]
            self.clique_features['user'][follower] = features['user'][follower]
            self.clique_features['temporal'][follower] = features['temporal'][follower]
        features = dict()
        print self.clique
        self.n = float(len(self.clique))
        print "Current size of cluster: " + str(self.n)
示例#5
0
 def save_features(self):
     self.users_to_query = set()
     features = {'temporal': {}, 'content': {}, 'user': {}, 'network': {}}
     followers_set = set(self.followers)
     print "Getting all user info..."
     for follower in followers_set:
         follower = str(follower)
         if follower not in self.ignore_users:
             self.cur.execute(
                 'SELECT suspended, deleted, other_error, user_info, user_info_json FROM followers WHERE user_id = %s',
                 (follower, ))
             record = self.cur.fetchone()
             if record:
                 if record[0] or record[1] or record[2]:
                     self.ignore_users.add(follower)
                     continue
                 if record[3] and not record[4]:
                     self.ignore_users.add(follower)
                     continue
                 if record[3] and record[4]:
                     try:
                         self.user_info[follower] = ast.literal_eval(
                             record[4])
                         continue
                     except Exception as e:
                         print e
                         self.ignore_users.add(follower)
                         continue
             self.users_to_query.add(follower)
     get_user_info(self)
     print "Getting all timeline info and extracting features"
     for follower in followers_set:
         timeline = None
         follower = str(follower)
         if follower not in self.ignore_users:
             self.cur.execute(
                 'SELECT timeline FROM followers WHERE user_id = %s',
                 (follower, ))
             record = self.cur.fetchone()
             if record:
                 if record[0]:
                     # print "Already have timeline information for user number " + follower
                     # Have to read in file to get timeline info
                     timeline = get_timeline_from_file(self, follower)
                 else:
                     timeline = get_user_timeline(self, follower)
             else:
                 timeline = get_user_timeline(self, follower)
             if timeline and self.user_info.get(
                     follower) and len(timeline) > 150:
                 gf = GetFeatures(follower, self.user_info[follower],
                                  timeline)
                 try:
                     gf.get_user_features()
                     gf.collect_tweets()
                     gf.get_content_features()
                     gf.get_temporal_features()
                     features['temporal'][follower] = gf.temporal_features
                     features['content'][follower] = gf.content_features
                     features['network'][follower] = gf.network_features
                     features['user'][follower] = gf.user_features
                 except Exception as e:
                     print "ERROR GETTING FEATURES"
                     print e
                     print follower
                     print self.user_info[follower]
     with open(
             'clique_expansion/all_features/' + self.seed_user +
             '_all_features.p', 'wb') as f:
         pickle.dump(features, f)
示例#6
0
    def find_bots(self, priors):
        self.users_to_query = set()
        user_features = {}
        followers_set = set(self.followers)
        if self.level > 0:
            print "Number of followers: " + str(len(self.followers))
            follower_counts = Counter(self.followers).most_common()
            # should fix this to be a more precise measure
            size_to_keep = int(.08*len(self.followers))
            connectedness_threshold = floor(0.3*self.n)
            print size_to_keep
            print connectedness_threshold
            tmp_followers = [f[0] for f in follower_counts if f[1] > connectedness_threshold]
            print "NUmber of followers over threshold = " + str(len(tmp_followers))
            if len(tmp_followers) < size_to_keep:
                tmp_followers.extend([f[0] for f in follower_counts[:size_to_keep] if f[1] > 1])
            followers_set = set(tmp_followers)
            print "Number of connected followers: " + str(len(followers_set))
        print "Getting all user info..."
        for follower in followers_set:
            user_info = None
            follower = str(follower)
            if follower not in self.users and follower not in self.ignore_users:
                self.cur.execute('SELECT suspended, deleted, other_error, user_info, user_info_json FROM followers WHERE user_id = %s', (follower,))
                record = self.cur.fetchone()
                if record:
                    if record[0] or record[1] or record[2]:
                        self.ignore_users.add(follower)
                        continue
                    if record[3] and not record[4]:
                        self.ignore_users.add(follower)
                        continue
                    if record[3] and record[4]:
                        try:
                            self.user_info[follower] = ast.literal_eval(record[4])
                            continue
                        except:
                            self.ignore_users.add(follower)
                            continue
                self.users_to_query.add(follower)
        get_user_info(self)
        print "Getting all timeline info and extracting features"
        for follower in followers_set:
            timeline = None
            follower = str(follower)
            if follower not in self.users and follower not in self.ignore_users:
                self.users.add(follower)
                self.cur.execute('SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s', (follower,))
                record = self.cur.fetchone()
                if record:
                    if record[0] or record[1] or record[2]:
                        self.ignore_users.add(follower)
                        # print "User is suspended or deleted"
                        continue
                    if record[3]:
                        # print "Already have timeline information for user number " + follower
                        # Have to read in file to get timeline info
                        timeline = get_timeline_from_file(self, follower)
                    else:
                        timeline = get_user_timeline(self, follower)
                else:
                    timeline = get_user_timeline(self, follower)
                if timeline and self.user_info.get(follower) and len(timeline) > 50:
                    gf = GetFeatures(follower, self.user_info[follower], timeline)
                    try:
                        gf.user_features()
                        gf.collect_tweets()
                        gf.content_features()
                        gf.temporal_features()
                    except Exception as e:
                        print "ERROR GETTING FEATURES"
                        print e
                        print follower
                        print self.user_info[follower]
                    # need to incorporate other network features
                    #gf.features['num_shared_edges'] = follower_counts[user]
                    user_features[follower] = gf.features
                    self.current_level_users.append(follower)
        # we can look at the out-degree of the collapsed ego network. We also calculate the average out degree,
        # which is the average number of followers per follower.
        # need to get the followers for all these
        len_priors = len(priors)
        current_features = priors
        current_features.extend(user_features.values())
        print "Performing anomaly detection"
        #json.dump(priors, open('test.json', 'w'), indent=4, separators=(',', ': '))
        X = self.vec.fit_transform(current_features).toarray()
        current_features = {}
        X_norm = normalize(X)
        #print np.any(np.isnan(X))
        #print np.all(np.isfinite(X))
        outliers = self.perform_outlier_detection(X, len_priors)

        #How do I add back in the outliers to the anomaly detection? Mueen said not to so I will leave for now
        self.level += 1
        # Add highly connected followers to the clique and to_check
        clique_features = {}
        for follower in outliers:
            self.clique.add((follower, self.level))
            self.to_check.add(follower)
            self.clique_features[follower] = user_features[follower]
        user_features = {}
        print self.clique
        self.n = float(len(self.clique))
        print "Current size of cluster: " + str(self.n)
示例#7
0
 def find_bots(self, priors):
     print "Getting all user info..."
     self.users_to_query = set()
     followers_set = set(self.followers)
     print "Number of followers: " + str(len(self.followers))
     follower_counts = Counter(self.followers).most_common()
     # should fix this to be a more precise measure
     size_to_keep = int(.15*len(self.followers))
     connectedness_threshold = floor(0.3*self.n)
     tmp_followers = [f[0] for f in follower_counts if f[1] >= connectedness_threshold]
     if len(tmp_followers) < size_to_keep:
         tmp_followers.extend([f[0] for f in follower_counts[:size_to_keep] if f[1] > 1])
     followers_set = set(tmp_followers)
     print "Number of connected followers: " + str(len(followers_set))
     for follower in followers_set:
         user_info = None
         follower = str(follower)
         if follower not in self.users and follower not in self.ignore_users:
             self.cur.execute('SELECT suspended, deleted, other_error, user_info_json FROM followers WHERE user_id = %s', (follower,))
             record = self.cur.fetchone()
             if record:
                 if record[0] or record[1] or record[2]:
                     self.ignore_users.add(follower)
                     # print "User is suspended or deleted"
                     continue
                 if record[3]:
                     # print "Already have profile information for user number " + follower
                     self.user_info[follower] = ast.literal_eval(record[3])
                     continue
             self.users_to_query.add(follower)
     get_user_info(self)
     print "Getting all timeline info and extracting features"
     for follower in followers_set:
         timeline = None
         follower = str(follower)
         if follower not in self.users and follower not in self.ignore_users:
             self.users.add(follower)
             self.cur.execute('SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s', (follower,))
             record = self.cur.fetchone()
             if record:
                 if record[0] or record[1] or record[2]:
                     self.ignore_users.add(follower)
                     # print "User is suspended or deleted"
                     continue
                 if record[3]:
                     # print "Already have timeline information for user number " + follower
                     # Have to read in file to get timeline info
                     timeline = get_timeline_from_file(self, follower)
                 else:
                     timeline = get_user_timeline(self, follower)
             else:
                 timeline = get_user_timeline(self, follower)
             if timeline and self.user_info.get(follower) and len(timeline) > 50:
                 gf = GetFeatures(follower, self.user_info[follower], timeline)
                 try:
                     gf.user_features()
                     gf.collect_tweets()
                     gf.content_features()
                     gf.temporal_features()
                 except Exception as e:
                     print "ERROR GETTING FEATURES"
                     print e
                     print follower
                     print self.user_info[follower]
                 # need to incorporate other network features
                 #gf.features['num_shared_edges'] = follower_counts[user]
                 #cself.user_features[user] = gf.features
                 self.current_level_users.append(follower)
                 self.features_list.append(gf.features)
     # Axis=0 should be vertical
     len_priors = len(priors)
     current_features = priors
     current_features.extend(self.features_list)
     print "Performing anomaly detection"
     #json.dump(priors, open('test.json', 'w'), indent=4, separators=(',', ': '))
     X = self.vec.fit_transform(current_features).toarray()
     current_features = {}
     X_norm = normalize(X)
     #print np.any(np.isnan(X))
     #print np.all(np.isfinite(X))
     print X.shape
     # X = np.stack([current_features, priors], axis=0) Every round will find outliers, how do we stop exploring?
     clf = LocalOutlierFactor(n_neighbors=20)
     clf.fit(X)
     check_is_fitted(clf, ["threshold_", "negative_outlier_factor_", "n_neighbors_", "_distances_fit_X_"])
     if X is not None:
         X = check_array(X, accept_sparse='csr')
         y_pred = clf._decision_function(X)
     else:
         y_pred = clf.negative_outlier_factor_
     #y_pred = clf.fit_predict(X)
     y_pred_new = y_pred[len_priors:]
     # Do anomaly detection and set connected followers to certain outliers
     # this line is a stand-in
     users_scores = zip(self.current_level_users, y_pred_new)
     connected_followers = [u[0] for u in users_scores if u[1] <= clf.threshold_]
     #How do I add back in the outliers to the anomaly detection? Mueen said not to so I will leave for now
     self.level += 1
     # Add highly connected followers to the clique and to_check
     for follower in connected_followers:
         self.clique.add((follower, self.level))
         self.to_check.add(follower)
     print self.clique
     self.n = float(len(self.clique))
     print "Current size of cluster: " + str(self.n)