def insta_bot(): while True: #provide choices print "Welcome to instaBot!" print "Menu options:" print "1.Get your own details" print "2.Get details of a user by username" print "3.Get your own recent post" print "4.Get the recent post of a user by username" print "5.Like the recent post of a user" print "6.Make a comment on the recent post of a user" print "7.list of likes on recent post of a user" print "8.list of comments on a recnet post of a user" print "9.delete bad comment on recent post" print "10.Exit" choice = int(raw_input("Enter you choice: ")) if choice == 1: self_info() elif choice == 2: user_name = raw_input("Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"Enter the username of the user: "******"wrong choice"
def main(): while True: print '\n' print 'Hey! Welcome to instaBot!' print 'Here are your menu options:' print "a.Get your own details\n" print "b.Get details of a user by username\n" choice = raw_input("Enter you choice: ") if choice == "a": self_info() elif choice == "b": insta_username = raw_input("Enter the username of the user: "******"c": insta_username = raw_input("Enter the username of the user: "******"j": exit() else: print "wrong choice"
def save_features(self): self.users_to_query = set() features = {'temporal': {}, 'content': {}, 'user': {}, 'network': {}} followers_set = set(self.followers) if self.level > 0: print "Number of followers: " + str(len(self.followers)) follower_counts = Counter(self.followers).most_common() # should fix this to be a more precise measure size_to_keep = int(.08 * len(self.followers)) connectedness_threshold = floor(0.3 * self.n) print size_to_keep print connectedness_threshold tmp_followers = [ f[0] for f in follower_counts if f[1] > connectedness_threshold ] print "NUmber of followers over threshold = " + str( len(tmp_followers)) if len(tmp_followers) < size_to_keep: tmp_followers.extend( [f[0] for f in follower_counts[:size_to_keep] if f[1] > 1]) followers_set = set(tmp_followers) print "Number of connected followers: " + str(len(followers_set)) print "Getting all user info..." for follower in followers_set: user_info = None follower = str(follower) if follower not in self.users and follower not in self.ignore_users: self.cur.execute( 'SELECT suspended, deleted, other_error, user_info, user_info_json FROM followers WHERE user_id = %s', (follower, )) record = self.cur.fetchone() if record: if record[0] or record[1] or record[2]: self.ignore_users.add(follower) continue if record[3] and not record[4]: self.ignore_users.add(follower) continue if record[3] and record[4]: try: self.user_info[follower] = ast.literal_eval( record[4]) continue except: self.ignore_users.add(follower) continue self.users_to_query.add(follower) get_user_info(self) print "Getting all timeline info and extracting features" for follower in followers_set: timeline = None follower = str(follower) if follower not in self.users and follower not in self.ignore_users: self.users.add(follower) self.cur.execute( 'SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s', (follower, )) record = self.cur.fetchone() if record: if record[0] or record[1] or record[2]: self.ignore_users.add(follower) # print "User is suspended or deleted" continue if record[3]: # print "Already have timeline information for user number " + follower # Have to read in file to get timeline info timeline = get_timeline_from_file(self, follower) else: timeline = get_user_timeline(self, follower) else: timeline = get_user_timeline(self, follower) if timeline and self.user_info.get( follower) and len(timeline) > 50: gf = GetFeatures(follower, self.user_info[follower], timeline) try: gf.get_user_features() gf.collect_tweets() gf.get_content_features() gf.get_temporal_features() features['temporal'][follower] = gf.temporal_features features['content'][follower] = gf.content_features features['network'][follower] = gf.network_features features['user'][follower] = gf.user_features self.current_level_users.append(follower) except Exception as e: print "ERROR GETTING FEATURES" print e print follower print self.user_info[follower] with open( 'clique_expansion/all_features/' + self.seed_user + '_all_features.p', 'wb') as f: pickle.dump(features, f)
def find_bots(self, priors): self.users_to_query = set() features = {'temporal': {}, 'content': {}, 'user': {}, 'network': {}} followers_set = set(self.followers) if self.level > 0: print "Number of followers: " + str(len(self.followers)) follower_counts = Counter(self.followers).most_common() # should fix this to be a more precise measure size_to_keep = int(.08*len(self.followers)) connectedness_threshold = floor(0.3*self.n) print size_to_keep print connectedness_threshold tmp_followers = [f[0] for f in follower_counts if f[1] > connectedness_threshold] print "NUmber of followers over threshold = " + str(len(tmp_followers)) if len(tmp_followers) < size_to_keep: tmp_followers.extend([f[0] for f in follower_counts[:size_to_keep] if f[1] > 1]) followers_set = set(tmp_followers) print "Number of connected followers: " + str(len(followers_set)) print "Getting all user info..." for follower in followers_set: user_info = None follower = str(follower) if follower not in self.users and follower not in self.ignore_users: self.cur.execute('SELECT suspended, deleted, other_error, user_info, user_info_json FROM followers WHERE user_id = %s', (follower,)) record = self.cur.fetchone() if record: if record[0] or record[1] or record[2]: self.ignore_users.add(follower) continue if record[3] and not record[4]: self.ignore_users.add(follower) continue if record[3] and record[4]: try: self.user_info[follower] = ast.literal_eval(record[4]) continue except: self.ignore_users.add(follower) continue self.users_to_query.add(follower) get_user_info(self) print "Getting all timeline info and extracting features" for follower in followers_set: timeline = None follower = str(follower) if follower not in self.users and follower not in self.ignore_users: self.users.add(follower) self.cur.execute('SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s', (follower,)) record = self.cur.fetchone() if record: if record[0] or record[1] or record[2]: self.ignore_users.add(follower) # print "User is suspended or deleted" continue if record[3]: # print "Already have timeline information for user number " + follower # Have to read in file to get timeline info timeline = get_timeline_from_file(self, follower) else: timeline = get_user_timeline(self, follower) else: timeline = get_user_timeline(self, follower) if timeline and self.user_info.get(follower) and len(timeline) > 50: gf = GetFeatures(follower, self.user_info[follower], timeline) try: gf.get_user_features() gf.collect_tweets() gf.get_content_features() gf.get_temporal_features() features['temporal'][follower] = gf.temporal_features features['content'][follower] = gf.content_features features['network'][follower] = gf.network_features features['user'][follower] = gf.user_features self.current_level_users.append(follower) except Exception as e: print "ERROR GETTING FEATURES" print e print follower print self.user_info[follower] # need to incorporate other network features #gf.features['num_shared_edges'] = follower_counts[user] # we can look at the out-degree of the collapsed ego network. We also calculate the average out degree, # which is the average number of followers per follower. # need to get the followers for all these with open('clique_expansion/' + self.seed_user + '_all_features.p', 'wb') as f: pickle.dump(features, f) len_priors = len(priors['temporal']) current_features = priors current_features['temporal'].extend(features['temporal'].values()) current_features['content'].extend(features['content'].values()) current_features['network'].extend(features['network'].values()) current_features['user'].extend(features['user'].values()) print "Performing anomaly detection" X = dict() X['temporal'] = self.vec.fit_transform(current_features['temporal']).toarray() X['content'] = self.vec.fit_transform(current_features['content']).toarray() X['network'] = self.vec.fit_transform(current_features['network']).toarray() X['user'] = self.vec.fit_transform(current_features['user']).toarray() current_features = dict() for key, value in X.iteritems(): X[key] = normalize(value) outliers = self.perform_outlier_detection(X, len_priors) self.level += 1 self.clique_features = {'temporal': {}, 'content': {}, 'user': {}, 'network': {}} for follower in outliers: self.clique.add((follower, self.level)) self.to_check.add(follower) self.clique_features['content'][follower] = features['content'][follower] self.clique_features['network'][follower] = features['network'][follower] self.clique_features['user'][follower] = features['user'][follower] self.clique_features['temporal'][follower] = features['temporal'][follower] features = dict() print self.clique self.n = float(len(self.clique)) print "Current size of cluster: " + str(self.n)
def save_features(self): self.users_to_query = set() features = {'temporal': {}, 'content': {}, 'user': {}, 'network': {}} followers_set = set(self.followers) print "Getting all user info..." for follower in followers_set: follower = str(follower) if follower not in self.ignore_users: self.cur.execute( 'SELECT suspended, deleted, other_error, user_info, user_info_json FROM followers WHERE user_id = %s', (follower, )) record = self.cur.fetchone() if record: if record[0] or record[1] or record[2]: self.ignore_users.add(follower) continue if record[3] and not record[4]: self.ignore_users.add(follower) continue if record[3] and record[4]: try: self.user_info[follower] = ast.literal_eval( record[4]) continue except Exception as e: print e self.ignore_users.add(follower) continue self.users_to_query.add(follower) get_user_info(self) print "Getting all timeline info and extracting features" for follower in followers_set: timeline = None follower = str(follower) if follower not in self.ignore_users: self.cur.execute( 'SELECT timeline FROM followers WHERE user_id = %s', (follower, )) record = self.cur.fetchone() if record: if record[0]: # print "Already have timeline information for user number " + follower # Have to read in file to get timeline info timeline = get_timeline_from_file(self, follower) else: timeline = get_user_timeline(self, follower) else: timeline = get_user_timeline(self, follower) if timeline and self.user_info.get( follower) and len(timeline) > 150: gf = GetFeatures(follower, self.user_info[follower], timeline) try: gf.get_user_features() gf.collect_tweets() gf.get_content_features() gf.get_temporal_features() features['temporal'][follower] = gf.temporal_features features['content'][follower] = gf.content_features features['network'][follower] = gf.network_features features['user'][follower] = gf.user_features except Exception as e: print "ERROR GETTING FEATURES" print e print follower print self.user_info[follower] with open( 'clique_expansion/all_features/' + self.seed_user + '_all_features.p', 'wb') as f: pickle.dump(features, f)
def find_bots(self, priors): self.users_to_query = set() user_features = {} followers_set = set(self.followers) if self.level > 0: print "Number of followers: " + str(len(self.followers)) follower_counts = Counter(self.followers).most_common() # should fix this to be a more precise measure size_to_keep = int(.08*len(self.followers)) connectedness_threshold = floor(0.3*self.n) print size_to_keep print connectedness_threshold tmp_followers = [f[0] for f in follower_counts if f[1] > connectedness_threshold] print "NUmber of followers over threshold = " + str(len(tmp_followers)) if len(tmp_followers) < size_to_keep: tmp_followers.extend([f[0] for f in follower_counts[:size_to_keep] if f[1] > 1]) followers_set = set(tmp_followers) print "Number of connected followers: " + str(len(followers_set)) print "Getting all user info..." for follower in followers_set: user_info = None follower = str(follower) if follower not in self.users and follower not in self.ignore_users: self.cur.execute('SELECT suspended, deleted, other_error, user_info, user_info_json FROM followers WHERE user_id = %s', (follower,)) record = self.cur.fetchone() if record: if record[0] or record[1] or record[2]: self.ignore_users.add(follower) continue if record[3] and not record[4]: self.ignore_users.add(follower) continue if record[3] and record[4]: try: self.user_info[follower] = ast.literal_eval(record[4]) continue except: self.ignore_users.add(follower) continue self.users_to_query.add(follower) get_user_info(self) print "Getting all timeline info and extracting features" for follower in followers_set: timeline = None follower = str(follower) if follower not in self.users and follower not in self.ignore_users: self.users.add(follower) self.cur.execute('SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s', (follower,)) record = self.cur.fetchone() if record: if record[0] or record[1] or record[2]: self.ignore_users.add(follower) # print "User is suspended or deleted" continue if record[3]: # print "Already have timeline information for user number " + follower # Have to read in file to get timeline info timeline = get_timeline_from_file(self, follower) else: timeline = get_user_timeline(self, follower) else: timeline = get_user_timeline(self, follower) if timeline and self.user_info.get(follower) and len(timeline) > 50: gf = GetFeatures(follower, self.user_info[follower], timeline) try: gf.user_features() gf.collect_tweets() gf.content_features() gf.temporal_features() except Exception as e: print "ERROR GETTING FEATURES" print e print follower print self.user_info[follower] # need to incorporate other network features #gf.features['num_shared_edges'] = follower_counts[user] user_features[follower] = gf.features self.current_level_users.append(follower) # we can look at the out-degree of the collapsed ego network. We also calculate the average out degree, # which is the average number of followers per follower. # need to get the followers for all these len_priors = len(priors) current_features = priors current_features.extend(user_features.values()) print "Performing anomaly detection" #json.dump(priors, open('test.json', 'w'), indent=4, separators=(',', ': ')) X = self.vec.fit_transform(current_features).toarray() current_features = {} X_norm = normalize(X) #print np.any(np.isnan(X)) #print np.all(np.isfinite(X)) outliers = self.perform_outlier_detection(X, len_priors) #How do I add back in the outliers to the anomaly detection? Mueen said not to so I will leave for now self.level += 1 # Add highly connected followers to the clique and to_check clique_features = {} for follower in outliers: self.clique.add((follower, self.level)) self.to_check.add(follower) self.clique_features[follower] = user_features[follower] user_features = {} print self.clique self.n = float(len(self.clique)) print "Current size of cluster: " + str(self.n)
def find_bots(self, priors): print "Getting all user info..." self.users_to_query = set() followers_set = set(self.followers) print "Number of followers: " + str(len(self.followers)) follower_counts = Counter(self.followers).most_common() # should fix this to be a more precise measure size_to_keep = int(.15*len(self.followers)) connectedness_threshold = floor(0.3*self.n) tmp_followers = [f[0] for f in follower_counts if f[1] >= connectedness_threshold] if len(tmp_followers) < size_to_keep: tmp_followers.extend([f[0] for f in follower_counts[:size_to_keep] if f[1] > 1]) followers_set = set(tmp_followers) print "Number of connected followers: " + str(len(followers_set)) for follower in followers_set: user_info = None follower = str(follower) if follower not in self.users and follower not in self.ignore_users: self.cur.execute('SELECT suspended, deleted, other_error, user_info_json FROM followers WHERE user_id = %s', (follower,)) record = self.cur.fetchone() if record: if record[0] or record[1] or record[2]: self.ignore_users.add(follower) # print "User is suspended or deleted" continue if record[3]: # print "Already have profile information for user number " + follower self.user_info[follower] = ast.literal_eval(record[3]) continue self.users_to_query.add(follower) get_user_info(self) print "Getting all timeline info and extracting features" for follower in followers_set: timeline = None follower = str(follower) if follower not in self.users and follower not in self.ignore_users: self.users.add(follower) self.cur.execute('SELECT suspended, deleted, other_error, timeline FROM followers WHERE user_id = %s', (follower,)) record = self.cur.fetchone() if record: if record[0] or record[1] or record[2]: self.ignore_users.add(follower) # print "User is suspended or deleted" continue if record[3]: # print "Already have timeline information for user number " + follower # Have to read in file to get timeline info timeline = get_timeline_from_file(self, follower) else: timeline = get_user_timeline(self, follower) else: timeline = get_user_timeline(self, follower) if timeline and self.user_info.get(follower) and len(timeline) > 50: gf = GetFeatures(follower, self.user_info[follower], timeline) try: gf.user_features() gf.collect_tweets() gf.content_features() gf.temporal_features() except Exception as e: print "ERROR GETTING FEATURES" print e print follower print self.user_info[follower] # need to incorporate other network features #gf.features['num_shared_edges'] = follower_counts[user] #cself.user_features[user] = gf.features self.current_level_users.append(follower) self.features_list.append(gf.features) # Axis=0 should be vertical len_priors = len(priors) current_features = priors current_features.extend(self.features_list) print "Performing anomaly detection" #json.dump(priors, open('test.json', 'w'), indent=4, separators=(',', ': ')) X = self.vec.fit_transform(current_features).toarray() current_features = {} X_norm = normalize(X) #print np.any(np.isnan(X)) #print np.all(np.isfinite(X)) print X.shape # X = np.stack([current_features, priors], axis=0) Every round will find outliers, how do we stop exploring? clf = LocalOutlierFactor(n_neighbors=20) clf.fit(X) check_is_fitted(clf, ["threshold_", "negative_outlier_factor_", "n_neighbors_", "_distances_fit_X_"]) if X is not None: X = check_array(X, accept_sparse='csr') y_pred = clf._decision_function(X) else: y_pred = clf.negative_outlier_factor_ #y_pred = clf.fit_predict(X) y_pred_new = y_pred[len_priors:] # Do anomaly detection and set connected followers to certain outliers # this line is a stand-in users_scores = zip(self.current_level_users, y_pred_new) connected_followers = [u[0] for u in users_scores if u[1] <= clf.threshold_] #How do I add back in the outliers to the anomaly detection? Mueen said not to so I will leave for now self.level += 1 # Add highly connected followers to the clique and to_check for follower in connected_followers: self.clique.add((follower, self.level)) self.to_check.add(follower) print self.clique self.n = float(len(self.clique)) print "Current size of cluster: " + str(self.n)