def make_new_members_request(self): log('Doing the request with cursor {}'.format(self.cursor.group(1))) payload = { 'gid': self.group_id, 'order': 'date', 'view': 'list', 'limit': self.basket_size, 'sectiontype': 'recently_joined', 'memberstabexp': 1, 'cursor': self.cursor.group(1), 'start': 15, 'dpr': 1, '__user': self.user_id, '__a': 1, '__dyn': self.dyn_rand(), '__req': 1, '__be': 1, '__pc': 'PHASED%3ADEFAULT', '__rev': self.next_req_params['__rev'], '__spin_r': self.next_req_params['__spin_r'], '__spin_b': 'trunk', '__spin_t': self.next_req_params['__spin_t'] } start_time = time.time() self.current_result = self.sess.get(self.members_url, params=payload) log('Doing new request:') exec_time = round(time.time() - start_time, 2) self.requests += 1 log('~~~~URL~~~~') log(self.current_result.url) log_html(self.current_result, 'members_last_req') log('Request number {} finished with status code {} in {} seconds'. format(self.requests, self.current_result.status_code, exec_time)) time.sleep(2)
def get_groups_members(self): log('Getting group members for user {}'.format(self.user['user_id'])) for group_id in self.user['groups']: self.groups[group_id] = FBGroup(group_id=group_id, user_id=self.user['user_id'], basket_size=15, sess=self.sess) self.groups[group_id].get_members()
def get_cached_members(self): ''' check if we cached the group and parse its members as set if the group is not cached, just create empty set ''' try: cached_members = pickle.load(open(self.cache_file_path, 'rb')) log('Loaded {} cached members'.format(len(cached_members))) self.members |= cached_members except FileNotFoundError: log('No cache found for group id {}'.format(self.group_id))
def __init__(self, user_id, sess, uid): ''' :param user_id: the logged user id :param sess: requests.Session instance :param uid: the viewed user id ''' self.profile_sections = {} # cache already loaded sections self.user_id = user_id self.uid = uid self.sess = sess self.profile_page = self.sess.get(self.profile_url.format( uid)) # will get redirected to vanity url if that exists log('Retrieving profile page at url {}'.format(self.profile_page.url)) self.vanity_url = self.profile_page.url self.secured_url = self.get_secured_url() self.about_page = self.get_about_section('overview')
def __init__(self, group_id, user_id, basket_size, sess): self.group_id = group_id self.user_id = user_id self.basket_size = basket_size self.sess = sess self.cursor = '' self.members = set() self.cache_file_path = os.path.join(self.cache_path, '{}.pkl'.format(self.group_id)) # set current result to members page self.current_result = self.sess.get( 'https://www.facebook.com/groups/{}/members/'.format(group_id)) log_html(self.current_result, 'group_members') log('Retrieving members from initial group page..') self.gather_members() self.get_current_cursor() self.next_req_params = self.get_next_req_params()
def get_secured_url(self): # extract page search keys secure_url_pat = r'{}\?lst={}(.+?)"'.format(self.vanity_url + '/about', self.user_id) secured_string = re.search(secure_url_pat, self.profile_page.text) if secured_string is None: log('Does not have vanity url') secure_url_pat = r'{}&lst={}(.+?)"'.format( re.sub(r'([\?])', r'\\\1', self.vanity_url), self.user_id) secured_string_old = re.search(secure_url_pat, self.profile_page.text) if secured_string_old is None: raise CrucialFBDataNotFound( 'No secure url to about section found on user\'s page') # https://www.facebook.com/profile.php?id=100004400105419&lst=100000189256900%3A100004400105419%3A1514026608&sk=about return self.vanity_url + '&lst={}{}&sk=about'.format( self.user_id, secured_string_old.group(1)) return self.vanity_url + '/about?lst={}{}'.format( self.user_id, secured_string.group(1))
def login(self): log('Getting to login page...') login_url = "https://www.facebook.com/" self.current_result = self.sess.get(login_url) # we need to parse login form for constructing the post data page_soup = BeautifulSoup(self.current_result.text, 'html.parser') login_form = page_soup.find_all('form', id='login_form')[0] send_login_form_to = login_form["action"] post_data = {} for input_field in login_form.find_all('input'): if input_field.has_attr("name"): name = input_field["name"] value = input_field["value"] if input_field.has_attr( "value") else '' post_data[name] = value post_data['email'] = self.user['username'] post_data['pass'] = self.user['password'] log('Sending login request to {}'.format(send_login_form_to)) self.current_result = self.sess.post(send_login_form_to, post_data) log_html(self.current_result, 'login')
def get_user_data(self): firstname, lastname = self.get_name() log('Firstname, Lastname - {}, {}'.format(firstname, lastname)) dob = self.get_dob() log('DOB is {}'.format(dob)) country_state_city = self.get_country_state_city() log('Country state city are {}'.format(country_state_city)) gender = self.get_gender() log('Gender is {}'.format(gender)) return { 'id': self.uid, # temporary 'firstname': firstname, 'lastname': lastname, 'gender': gender, 'dob': dob, 'country_state_city': country_state_city, }
def main(): log("function start") mod1.mod1func() mod2func() myfunc() pack1.moda.modafunc() modbfunc() modc.modcfunc() modd.moddfunc() importlib.reload(mod1) log("Imported global = {}".format(mod1_global)) log("function end")
def get_members(self): ''' 1. Retrieves member ids from request, makes new ones and if no more cursors or memberids start to repeat, then halts and serializes all members :return: ''' log('Retrieving members from group id {}'.format(self.group_id)) self.get_cached_members() try: while True: self.make_new_members_request() self.gather_members() self.get_current_cursor() except NoCursor: log('No cursor found') except RepeatingMembers: log('Repeating members') if len(self.members) > 0: pickle.dump(self.members, open(self.cache_file_path, 'wb')) else: log('No members found for group {}'.format(self.group_id)) raise (NoUsersFound)
def get_current_cursor(self): ''' If no match is found, cursor is set no None :return: ''' log('Retrieving cursor from response ..') self.cursor = re.search( r'sectiontype=recently_joined&memberstabexp=1&cursor=(.*?)&', self.current_result.text) if self.cursor is None: log('No cursor found in current response!') log_html(self.current_result, 'cursor_error') raise NoCursor('No cursor found in current response!') log('Current cursor is now {}'.format(self.cursor.group(1)))
def export_group_members(self): ''' Exports to csv :return: ''' import csv try: #instantiate profiles registry fbProfilesRegistry = FBProfilesRgistry() for group_id, group_obj in self.groups.items(): with open(self.results_csv.format(group_id), 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL) writer.writerow([ 'UID', 'Firstname', 'Lastname', 'Gender', 'Date of birth', 'Location' ]) for member_id in group_obj.members: log('Resolving member id {} ...'.format(member_id)) try: if not fbProfilesRegistry.profile_exist(member_id): fbuser = FBProfile(self.user['user_id'], self.sess, member_id) fbProfilesRegistry.add_profile( member_id, fbuser.get_user_data()) user_data = fbProfilesRegistry.retrieve_profile( member_id) writer.writerow([ user_data['id'], user_data['firstname'], user_data['lastname'], user_data['gender'], user_data['dob'], user_data['country_state_city'] ]) except CrucialFBDataNotFound as e: log(str(e)) continue #save fb profile registry to cache fbProfilesRegistry.save_to_cache() except PermissionError as detail: log('PermissionError: {}'.format(detail))
def modbfunc(): log("executing modbfunc")
from log_setup import log log('module __name__ = {} : start'.format(__name__)) def modbfunc(): log("executing modbfunc") log('module __name__ = {} : end'.format(__name__))
def mod1func(): """ Basic demonstration module level function - logs invocations """ log("executing mod1func")
def mod2func(): log("executing mod2func") mod1.mod1func()
import json, os, errno from log_setup import log from fb_crawler import FBCrawler #set up folders for directory in ['results', 'logs', 'logs/responses', 'cache']: try: os.makedirs(directory) except OSError as e: if e.errno != errno.EEXIST: raise continue log('--------------------') log('Starting a new session') # import user config file users_file = 'users.json' users = json.load(open(users_file)) logs_count = 0 # login into fb account for user in users: fb_handle = FBCrawler(user) fb_handle.refresh_members() log('!!!!!!!!!! Done with user {} !!!!!!!!!!!!!!'.format(user['username'])) log('Done!')
from log_setup import log #log("init start") log('init __name__ = {} : start'.format(__name__)) __all__ = ['modc', 'modd'] log('init __name__ = {} : end'.format(__name__)) #log("init end")
def __init__(self): try: self.profiles = pickle.load(open(self.profiles_cache, 'rb')) except FileNotFoundError: log('No cache found for fb profiles...')