def loop_video_elements_only(driver): elements = driver.find_elements_by_css_selector("div.entry-method") cnt = 0 for ctr, element in enumerate(elements): action = ActionChains(driver) action.move_to_element(element).perform() children = element.find_elements_by_css_selector("a > span.text") txt = children[0].text.lower().strip() if is_done(element) == False: try: if (watch_pattern.match(txt) != None or checkout_video_pattern.match(txt) != None ) and is_youtube(element): watch(driver, element) except Exception as e: myprint('Must be due to inner form case somwhere', e) try: driver.execute_script( "document.getElementsByClassName('expandable')[" + str(ctr) + "].style.visibility='hidden';") except Exception as e: myprint('Seems there was no expandable here', e) if is_done(element) == True: cnt = cnt + 1 if cnt == len(elements): return True return False
def crawl(d): if 'sleep' in d: time.sleep( random.randint(100, constants.ANTIFRAUD_FUZZY_SLEEP_LIMIT * 10)) else: time.sleep(2) cnt = 0 driver = webdriver.Chrome(constants.CHROME_DRIVER_PATH) driver.get("https://google.com") load_cookie(driver, '/tmp/gleam_fb_cookie') load_cookie(driver, '/tmp/gleam_google_cookie') load_cookie(driver, '/tmp/gleam_twitter_cookie') load_cookie(driver, '/tmp/gleam_instagram_cookie') load_cookie(driver, '/tmp/gleam_soundcloud_cookie') load_cookie(driver, '/tmp/gleam_steampowered_cookie') for href in video_url_arr: try: myprint("==", href.replace('\n', ''), "==") try: open_url(driver, d, href, cnt) except Exception as e: myprint(constants.CRED + "do:" + constants.CEND, e) pass except Exception: pass driver.quit()
def subscribe_channel(driver, url): try: driver.get(url) time.sleep(1) driver.switch_to.active_element sub_button = driver.find_element_by_css_selector("#confirm-button > a") time.sleep(1) sub_button.click() time.sleep(constants.SOCIAL_FOLLOW_LIMIT_WAIT + random.randint(0, 5)) except Exception as e: myprint("Might be already subscribed", e)
def type5_click_continue(driver, element, patt=''): continue_buttons = element.find_elements_by_css_selector( "div.body-widget > div > div > div > div > div:nth-child(1) > div:nth-child(6) > div:nth-child(2) > div:nth-child(2) > div > form > div > span:nth-child(1) > button" ) if len(continue_buttons) == 0: return False action = ActionChains(driver) action.move_to_element(continue_buttons[0]) action.click().perform() myprint(constants.CGREEN + patt, 'type5_click_continue: continue clicked' + constants.CEND) return True
def type4_click_continue(driver, element, patt=''): continue_buttons = element.find_elements_by_css_selector( "div > div > form > div.form-actions.center > button") if len(continue_buttons) == 0: myprint("Continue button not found") return False click_button = continue_buttons[0] try: if len(continue_buttons) >= 2: if continue_buttons[0].text == 'Continue': click_button = continue_buttons[0] click_button.get_attribute("class").split(' ').index( 'disabled') else: click_button = continue_buttons[1] click_button.get_attribute("class").split(' ').index( 'disabled') else: click_button = continue_buttons[0] click_button.get_attribute("class").split(' ').index('disabled') myprint("Still disabled") return False except: action = ActionChains(driver) action.move_to_element(click_button) action.click().perform() myprint(patt, 'type4_click_continue: continue clicked') return True
def login(driver): time.sleep(2) login_options = driver.find_elements_by_css_selector( "div > div.popup-blocks-container > div > div:nth-child(1) > div:nth-child(6) > div:nth-child(2) > div:nth-child(2) > div > form > fieldset.center > div > ul > li" ) for element in login_options: children = element.find_elements_by_css_selector("*") for child in children: if ('facebook-background' in child.get_attribute("class").split(' ')): try: element.click() time.sleep(2) # myprint("login: 1st fb login succeeded") except Exception: # myprint("1st fb login method failed, trying 2nd way") try: fbs = driver.find_elements_by_css_selector( "body > div.body-widget > div > div > div.popup-blocks-container > div > div:nth-child(1) > div:nth-child(6) > div:nth-child(2) > div:nth-child(3) > div > span > div:nth-child(2) > ul > li > a.facebook-background" ) fbs[0].click() time.sleep(2) # myprint("login: 2nd fb login succeeded") except Exception: myprint("login: Exception:(suppressed)") return False if is_logged_in(driver): return True for element in login_options: children = element.find_elements_by_css_selector("*") for child in children: if ('twitter-background' in child.get_attribute("class").split(' ')): try: element.click() time.sleep(2) # myprint("login: 1st twitter login succeeded") except Exception: # myprint("1st twitter login method failed, trying 2nd way") try: twtr = driver.find_elements_by_css_selector( "body > div.body-widget > div > div > div.popup-blocks-container > div > div:nth-child(1) > div:nth-child(6) > div:nth-child(2) > div:nth-child(3) > div > span > div:nth-child(2) > ul > li:nth > a.twitter-background" ) twtr[0].click() time.sleep(2) # myprint("login: 2nd twitter login succeeded") except Exception: myprint("login: Exception:") return False if is_logged_in(driver): return True myprint(constants.CRED + "login: FAILED:" + constants.CEND) return False
def open_url(driver, d, url, cnt): if 'sleep' in d: time.sleep(random.randint(10, constants.ANTIFRAUD_FUZZY_SLEEP_LIMIT)) else: time.sleep(2) driver.get(url) myprint(driver.current_url) if is_not_abailable(driver): myprint("Is NOT AVAILABLE in your region") return if has_ended(driver): myprint("This Competition has ended") return if login(driver) == False: fill_global_form(driver) if loop_video_elements_only(driver) == True: myprint("DONE:", url) with open("data/video_page_links_done.txt", "a") as myfile: myfile.write(url + '\n')
def follow_gplus_page(driver, url): driver.get(url) time.sleep(1) gplus_follow_buttons = driver.find_elements_by_css_selector( "div > c-wiz> div > div > div > div > div > div > content > span > div > div" ) if len(gplus_follow_buttons) > 0: if gplus_follow_buttons[0].text.strip() == "Follow": time.sleep(2) try: gplus_follow_buttons[0].click() with open("data/links_done.txt", "a") as outfile: outfile.write(line) time.sleep(constants.SOCIAL_FOLLOW_LIMIT_WAIT + random.randint(1, 3)) except Exception as e: myprint(line, e) else: myprint(f'{line} Seems already following') else: myprint(f'{line} No button might be on wrong page')
return datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') def load_cookie(driver, path): with open(path, 'rb') as cookies_file: cookies = pickle.load(cookies_file) for cookie in cookies: driver.add_cookie(cookie) driver.get("https://gleamdb.ga/") load_cookie(driver, '/tmp/gleam_fb_cookie') load_cookie(driver, '/tmp/gleam_google_cookie') time.sleep(5) driver.get("https://gleamdb.ga/") time.sleep(5) close_button = driver.find_element_by_css_selector('#modalclose') close_button.click() time.sleep(5) driver.execute_script("window.scrollBy(0,250)", "") time.sleep(5) html = driver.page_source complete_name = os.path.join(os.path.expanduser('data/'), 'GleamioDB.html') file_object = codecs.open(complete_name, "w", "utf-8") file_object.write(html) driver.quit() myprint("Download of GleamioDB completed")
def fill_global_form(driver): global_forms = driver.find_elements_by_css_selector( "body > div.body-widget > div > div > div > div > div:nth-child(1) > div:nth-child(6) > div:nth-child(2) > div:nth-child(2) > div > form" ) if len(global_forms) > 0: try: name_boxes = global_forms[0].find_elements_by_css_selector( "div.body-widget > div > div > div > div > div:nth-child(1) > div:nth-child(6) > div:nth-child(2) > div:nth-child(2) > div > form > fieldset.inputs > div.form-horizontal > div > div > div:nth-child(1) > div > input" ) if len(name_boxes) > 0: name_boxes[0].clear() name_boxes[0].send_keys(constants.FULL_NAME) email_boxes = global_forms[0].find_elements_by_css_selector( "div.body-widget > div > div > div > div > div:nth-child(1) > div:nth-child(6) > div:nth-child(2) > div:nth-child(2) > div > form > fieldset.inputs > div.form-horizontal > div > div > div:nth-child(2) > div > input" ) if len(email_boxes) > 0: email_boxes[0].clear() email_boxes[0].send_keys(constants.GOOG_ID + "@gmail.com") dob_boxes = global_forms[0].find_elements_by_css_selector( "div.body-widget > div > div > div > div > div:nth-child(1) > div:nth-child(6) > div:nth-child(2) > div:nth-child(2) > div > form > fieldset.inputs > div.form-horizontal > div > div > div:nth-child(3) > div > div > input" ) dob_formats = global_forms[0].find_elements_by_css_selector( "div.body-widget > div > div > div > div > div:nth-child(1) > div:nth-child(6) > div:nth-child(2) > div:nth-child(2) > div > form > fieldset.inputs > div.form-horizontal > div > div > div:nth-child(3) > div > div > div" ) if len(dob_boxes) > 0: dob_boxes[0].clear() if dob_formats[0].text.strip() == 'MM/DD/YYYY': dob_boxes[0].send_keys(constants.DOB_MMDDYY) else: dob_boxes[0].send_keys(constants.DOB) mandatory_checkboxes = global_forms[0].find_elements_by_css_selector( "div > form > fieldset.inputs > div.form-horizontal > div > div > div:nth-child(3) > div > div > label > span.icon" ) if len(mandatory_checkboxes) > 0: mandatory_checkboxes[0].click() if type5_click_continue(driver, global_forms[0], "global_forms") == False: myprint("Form can not be saved") time.sleep(5) fb_account_selection = driver.find_elements_by_css_selector( "div.body-widget > div > div > div > div > div:nth-child(1) > div:nth-child(2) > div > ul > li:nth-child(1) > a.facebook-border" ) if len(fb_account_selection) == 0: fb_account_selection = driver.find_elements_by_css_selector( "div.body-widget > div > div > div.popup-blocks-container > div > div:nth-child(1) > div > div > ul > li > a.facebook-border" ) if len(fb_account_selection) > 0: fb_account_selection[0].click() time.sleep(5) else: myprint("FB account not found") except Exception: pass
load_cookie(driver, '/tmp/gleam_fb_cookie') load_cookie(driver, '/tmp/gleam_google_cookie') load_cookie(driver, '/tmp/gleam_twitter_cookie') load_cookie(driver, '/tmp/gleam_instagram_cookie') load_cookie(driver, '/tmp/gleam_soundcloud_cookie') load_cookie(driver, '/tmp/gleam_steampowered_cookie') for href in video_url_arr: try: myprint("==", href.replace('\n', ''), "==") try: open_url(driver, d, href, cnt) except Exception as e: myprint(constants.CRED + "do:" + constants.CEND, e) pass except Exception: pass driver.quit() if __name__ == "__main__": argv = sys.argv[1:] d = {} for i in range(0, len(argv), 2): d[argv[i].replace('-', '')] = argv[i + 1] myprint(d) crawl(d) myprint(constants.CGREEN + 'Main PEACEFULLY terminated' + constants.CEND)
email_element.send_keys(Keys.ENTER) time.sleep(5) pass_element = driver.find_element_by_id("ap_password") if pass_element: pass_element.send_keys(constants.AMZN_PASS) pass_element.send_keys(Keys.ENTER) time.sleep(5) if __name__ == "__main__": argv = sys.argv[1:] d = {} for i in range(0, len(argv), 2): d[argv[i].replace('-', '')] = argv[i + 1] myprint(d) chrome_options = webdriver.ChromeOptions() if 'headless' in d: chrome_options.add_argument("--headless") prefs = {"profile.default_content_setting_values.notifications": 2} chrome_options.add_experimental_option("prefs", prefs) driver = webdriver.Chrome(executable_path=constants.CHROME_DRIVER_PATH, chrome_options=chrome_options) driver.get("https://google.com") load_cookie(driver, '/tmp/gleam_fb_cookie') load_cookie(driver, '/tmp/gleam_google_cookie') load_cookie(driver, '/tmp/gleam_twitter_cookie')
def save_cookie(driver, path): with open(path, 'wb') as filehandler: pickle.dump(driver.get_cookies(), filehandler) myprint(f'cookie saved at {path}')