def set_cursor_loc(self): """ Finds the cursor location and scrolls to 0 :return: None """ # moves cursor to html element with known coordinates almost_empty = 'https://this-page-intentionally-left-blank.org' Util.connection_handler(self.driver, almost_empty) # Fix height (maybe move this to a more reasonable location) e = self.driver.find_element_by_tag_name('html') self.height = int(e.get_attribute('clientHeight')) element_path = '//*[@id="this-page-intentionally-left-blank.org"]/p' e = self.driver.find_element_by_xpath(element_path) action = ActionChains(self.driver) action.move_to_element_with_offset(e, 0, 0) action.perform() coordinates = e.location self.x_mouse_loc = int(coordinates['x']) self.y_mouse_loc = int(coordinates['y']) self.mouse_to(random.randint(0, self.width), random.randint(0, self.height)) self.scroll(0)
def time_handler(self, t_elapsed, start_up=False): office_hour = True if 3 > Util.get_time(self.timezone) or 22 < Util.get_time( self.timezone): office_hour = False while not self._night_search and 3 < Util.get_time(self.timezone) < 7: time.sleep(300) if start_up: wait = 60 * random.uniform(0, self.delay_min / self.nr_inst) else: wait = 60 * self.delay_min / self.nr_inst wait = random.uniform(wait, 1.5 * wait) if office_hour and wait > 0: time.sleep(wait) return office_hour or self.night_search
def timezone(self, zone_name): if zone_name is None: zone_name = Util.get_timezone() with open('Data/diverse/timezones.json', 'r') as tz_file: valid_tz = json.load(tz_file) if zone_name not in valid_tz: raise ValueError( f'{zone_name} is not a valid timezone. See Data/diverse/timezones.json') else: self._timezone = zone_name
def scroll(self, y_goal): """ :param y_goal: target y location :return: self.y_scroll_loc returns realized y location Scroll around y_goal, does not go to y_goal exactly """ d = Util.dist(0, 0, y_goal, self.y_scroll_loc) direction = np.sign(y_goal - self.y_scroll_loc) # determine acceleration if d > 2000: a0 = random.uniform(5, 8) a1 = 1 elif d > 400: a0 = random.uniform(2, 6) a1 = 1 else: a0 = random.uniform(0.2, 2) a1 = random.uniform(0.5, 2) t_min = math.ceil(np.sqrt(2 * d * a1 / (a0**2 * (a1 + 1)))) t0 = random.randint(t_min, t_min + 2) a0 = direction * a0 a1 = -direction * a1 v = 0 while np.sign(y_goal - self.y_scroll_loc) == direction: y_next = self.y_scroll_loc + int(v) # try: self.driver.execute_script(f'window.scrollTo(0,{y_next})') self.y_scroll_loc = self.driver.execute_script( 'return window.pageYOffset;') # except WebDriverException: # return None if self.y_scroll_loc is None: self.y_scroll_loc = 0 if t0 > 0: v += a0 t0 -= 1 else: v += a1 if direction * v <= 0: break time.sleep(0.02) return self.y_scroll_loc
def __init__(self, port, nr_inst, flag, nr_searches_creation, path_terms_creation, path_terms_benign, nr_searches_exp, path_terms_experiment, swarm_name, proxy=None, timezone=Util.get_timezone(), nr_results=1, delay_min=10, night_search=False, dir_results='Data/results/', dir_log='Data/log_files/swarms/', visual=False ): """ :param port: string, access selenium docker image :param nr_inst: int, nr. of individual bots the swarm runs :param flag: string, flag indicates political orientation/nature of the bot :param nr_searches_creation: int, nr. of searches to run without storing results :param path_terms_creation: str, path to a json containing political creation terms :param path_terms_benign: str, path to a json containing non political terms :param nr_searches_exp: int, nr. of searches in experiment :param path_terms_experiment: str, path to a json containing 'neutral' political terms :param swarm_name: str, name of this swarm :param nr_results: int, nr of results to store :param delay_min: int, delay between rounds of searches :param night_search: boolean, if False, searches are only conducted during the day :param dir_results: str, directory where results are to be stored :param dir_log: str, directory where swarm creates its log :param visual: boolean, if True, instannces are run in non dockerized, visual selenium """ if proxy is None: proxy = {''} self.port = port self.nr_inst = nr_inst self.flag = flag self.nr_searches_creation = nr_searches_creation self.path_terms_creation = path_terms_creation self.visual = visual self.path_terms_benign = path_terms_benign self.nr_searches_exp = nr_searches_exp self.path_terms_experiment = path_terms_experiment self.swarm_name = swarm_name self.proxy = proxy self.timezone = timezone self.dir_results = dir_results self.path_results = f'{self.dir_results}{self.swarm_name}.csv' self.path_searches = f'{self.dir_results}{self.swarm_name}_searches.csv' # initialize empty dictionary for instances self.instances = {} self.nr_results = nr_results self.delay_min = delay_min self.night_search = night_search self.create_terms = 'empty' self.benign_terms = 'empty' self.exp_terms = 'empty' self.exp_progress = 0 self.path_log = dir_log self.log = None self.handle_log('r') self._profile_dir = { 'Host': f'/Users/johannes/Uni/HSG/googlebot/Data/profiles/swarm_{self.swarm_name}', 'Selenium': f'/Users/johannes/Uni/HSG/googlebot/Data/profiles/swarm_{self.swarm_name}', } if not visual: self._profile_dir['Selenium'] = '/home/profiles'
def partial_mouse(self, x_goal, y_goal, fast=False): """ Execute a mouse movement to (x_goal, y_goal) :param x_goal: numeric within (0, self.width) target x_val :param y_goal: numeric within (0, self.height) :param fast: when dist<100 speed will be doubled when fast == True :return: None """ # restrict x_goal and y_goal to valid range x_goal = Util.clamp(x_goal, self.width) y_goal = Util.clamp(y_goal, self.height) # calculate diagonal distance for mouse to travel d = Util.dist(self.x_mouse_loc, x_goal, self.y_mouse_loc, y_goal) significant_movement = d > 10 # calculate speed of mouse, uses an s curve with an aditional growth factor a = random.uniform(7, 10) b = random.uniform(4, 6) speed = (a / (1 + math.exp(-0.1 * (d - 100))) + math.sqrt(d / 4) + b) # n_jums is given by the inverse of speed times distance # uses 1 when distance is 0 n_jumps = max(1, d) * (1 / speed) # speed up movement in case this is desired if fast and d < 90: n_jumps = n_jumps // 2 # convert n_jums to int and ensure > 0 n_jumps = max(int(np.ceil(n_jumps)), 1) # Calculate splines for x and y r_x = self.mouse_deviation(x_goal, 'x', significant_movement, d) r_y = self.mouse_deviation(y_goal, 'y', significant_movement, d) points = [[self.x_mouse_loc, self.y_mouse_loc], [self.x_mouse_loc, self.y_mouse_loc], [r_x, r_y], [x_goal, y_goal], [x_goal, y_goal]] points = np.array(points) x = points[:, 0] y = points[:, 1] t = [0, 0.1, 0.5, 0.9, 1] ipl_t = np.linspace(0.0, 1, n_jumps) x_i = Util.calc_spline(t, ipl_t, x) y_i = Util.calc_spline(t, ipl_t, y) # Execute Mouse movement action = ActionChains(self.driver) for mouse_x, mouse_y in zip(x_i, y_i): mouse_x = int(mouse_x) mouse_y = int(mouse_y) action.move_by_offset(mouse_x - self.x_mouse_loc, mouse_y - self.y_mouse_loc) self.x_mouse_loc = mouse_x self.y_mouse_loc = mouse_y action.perform()
} } loc_0, lim = axis_dict[axis].values() if (dist := abs(loc_0 - loc_1)) > 4: min_loc = min(loc_1, loc_0) + dist / 4 max_loc = min(loc_1, loc_0) + 3 * dist / 4 r = int(random.uniform(min_loc, max_loc)) elif allways_deviate: max_offset = int(np.ceil(d**(3 / 4))) // 6 min_offset = max_offset // 5 offset = random.choice([-1, 1]) * random.randint( min_offset, max_offset) r = loc_0 + offset else: r = loc_0 r = Util.clamp(r, lim) return r def partial_mouse(self, x_goal, y_goal, fast=False): """ Execute a mouse movement to (x_goal, y_goal) :param x_goal: numeric within (0, self.width) target x_val :param y_goal: numeric within (0, self.height) :param fast: when dist<100 speed will be doubled when fast == True :return: None """ # restrict x_goal and y_goal to valid range x_goal = Util.clamp(x_goal, self.width) y_goal = Util.clamp(y_goal, self.height)
def create(swarm): swarm.launch(exist=False) return swarm def launch_control(all_bots, process): launch = False creation_complete = searches_remaining('c', all_bots) == 0 if creation_complete or (process == 'e'): launch = True return launch # docker run -p 4445:4444 -d --shm-size=2g --name bot_2 selenium/standalone-firefox if __name__ == "__main__": run_visualization = Util.speech_bool( input('Only run a visual example? (y/n): ')) nr_inst = int(input('Nr. of instances per swarm: ')) delay = int(input('Delay between searches: ')) use_proxy = Util.speech_bool(input('Run Bots through a proxy? (y/n): ')) if use_proxy: with open('Data/proxy_data/proxy_data.json', 'r') as raw_proxy_data: proxy_data = json.load(raw_proxy_data) timezone = proxy_data['main']['TZ'] else: proxy_data = {} timezone = Util.get_timezone() if run_visualization: nr_creation = int( input('Please input the desired nr. of creation searches: '))
for agent in profiles[0:500]: options = webdriver.FirefoxOptions() profile = webdriver.FirefoxProfile() profile.set_preference("general.useragent.override", agent) options.profile = profile driver = webdriver.Firefox(options=options) driver.get('https://www.google.com') term = r.choice([ 'USA', 'France', 'Greeece', 'Thailand', 'Germany', 'Denmark', 'Italy', 'EU', 'UN', 'Ghandi', 'swaziland', 'hero', 'covid', 'covid 19', 'new cases', 'new york', 'los anngeles', 'Nice', 'Jazz' ]) search_field = driver.find_element_by_name("q") search_field.clear() time.sleep(d0 := r.uniform(0.5, 1.5)) Util.natural_typing_in_field(search_field, term) time.sleep(d1 := r.uniform(0.15, 0.5)) search_field.send_keys(Keys.RETURN) try: WebDriverWait(driver, 2).until( ec.presence_of_element_located((By.CLASS_NAME, 'rc'))) driver.close() except: driver.close() continue time.sleep(r.uniform(3, 10)) valid_agents.append(agent) with open('/Users/johannes/Uni/HSG/googlebot/Data/diverse/agents_2.0.json', 'w') as fl: