def data_extractor(input_driver): """ HELPER FUNCTION takes the driver object and extracts the page content data and cleans it if present and loaded correctly then returns the processed data and state : True/False (driver object) --> (boolean, dictionary) """ try: final_dict = {} # retrieve data by class Name for the name fields name_list = [' '] namefields = waiter(input_driver, 5).until( EC.presence_of_all_elements_located( (By.CLASS_NAME, "RightAlignBlack"))) for name in namefields: name_list.append(str(name.text)) #data corresponding to name fields data_list = [] datafields = waiter(input_driver, 5).until( EC.presence_of_all_elements_located( (By.CLASS_NAME, "LeftAlignGreen"))) for data in datafields: data_str = str(data.text) if data_str != ' : ': data_list.append(data_str) #handling the special case of the name field: Standard Seating Capacity capacity_namexpath = """ //*[@id="ctl00_UpdatePanel1"]/table/tbody/tr[5]/td/table/tbody/tr[6] /td[2]/fieldset/table/tbody/tr[3]/td[5]""" capacity_namefield = waiter(input_driver, 5).until( EC.presence_of_element_located((By.XPATH, capacity_namexpath))) cs_namefield = str(capacity_namefield.text) #getting special case of the data field for Standard Seating Capacity capacity_dataxpath = """ //*[@id="ctl00_UpdatePanel1"]/table/tbody/tr[5]/td/table/tbody/tr[6] /td[2]/fieldset/table/tbody/tr[3]/td[7]""" capacity_datafield = waiter(input_driver, 5).until( EC.presence_of_element_located((By.XPATH, capacity_dataxpath))) cs_datafield = str(capacity_datafield.text) #creating the clean data and enter in dictionary pos = 0 offset = 0 for nameval in name_list: if pos == 0 or pos == 1: pos += 1 continue if cs_datafield == data_list[pos + offset]: offset += 1 final_dict[nameval] = data_list[pos + offset] pos += 1 #finally add the exception case data to dictionary final_dict[cs_namefield] = cs_datafield return(True, final_dict) except TimeoutException: return(False, None)
def get_videos_url(driver, playlist_url): # enter youtube playlist driver.get(playlist_url) # wait for playlist, and get it waiter(driver, 60).until(visibility((By.ID, 'playlist'))) playlist = driver.find_element_by_id('playlist') css_sel = 'a[class$="panel-video-renderer"]' waiter(driver, 60).until(visibility((By.CSS_SELECTOR, css_sel))) videos_url = playlist.find_elements_by_css_selector(css_sel) videos_url = [i.get_attribute('href') for i in videos_url] # get videos name css_sel = 'span[id="video-title"]' waiter(driver, 60).until(visibility((By.CSS_SELECTOR, css_sel))) videos_name = playlist.find_elements_by_css_selector(css_sel) videos_name = [i.get_attribute('title') for i in videos_name] # # for in C mode # for video in videos_url: # videos_url_lst.append(video.get_attribute()) return list(zip(videos_url, videos_name))
def download_mp3(driver, video_url, video_name, download_path): mp3_path = os.path.join(download_path, video_name + '.mp3') if os.path.exists(mp3_path): return "Download ALREADY COMPLETED: " + video_name # log into video2mp3 driver.get('https://video2mp3.net') driver.switch_to.default_content() for _ in range(2): waiter(driver, 60).until( visibility((By.CSS_SELECTOR, 'iframe[id="inneriframe"]'))) iframe = driver.find_element_by_css_selector( 'iframe[id="inneriframe"]') driver.switch_to.frame(iframe) # change input waiter(driver, 60).until(clickable((By.ID, 'input'))) input_el = driver.find_element_by_id('input') driver.execute_script('arguments[0].value = `%s`' % video_url, input_el) waiter(driver, 60).until(clickable((By.ID, 'submit'))) button = driver.find_element_by_id('submit') driver.execute_script("arguments[0].click();", button) # download element xpath = "//a[contains(text(),'Download')]" waiter(driver, 60).until(clickable((By.XPATH, xpath))) button = driver.find_element_by_xpath(xpath) driver.execute_script("arguments[0].click();", button) # wait for download for _ in range(5 * 60): if os.path.exists(mp3_path): break time.sleep(1) return "Download COMPLETED: " + video_name
def get_videos_url(driver, playlist_url): # Enter youtube list driver.get(playlist_url) # Wait for playlist, and get it waiter(driver, 60).until(visibility((By.ID, 'playlist'))) playlist = driver.find_element_by_id('playlist') # Get url list css_sel = 'a[class$="panel-video-renderer"]' waiter(driver, 60).until(visibility((By.CSS_SELECTOR, css_sel))) videos_url = playlist.find_elements_by_css_selector(css_sel) videos_url = [i.get_attribute('href') for i in videos_url] # Get name list css_sel = 'span[id="video-title"]' waiter(driver, 60).until(visibility((By.CSS_SELECTOR, css_sel))) videos_name = playlist.find_elements_by_css_selector(css_sel) videos_name = [i.get_attribute('title') for i in videos_name] return list(zip(videos_url, videos_name))
def details_scraper(): """ MAIN FUNCTION It is the main function to be executed to perform scraping. Assuming : if not able to process the output after submit it simply skip the current reg_id and continues search for next new reg_id. It some what reliable but not perfect. So a good Internet connection is Assumed with a good response rate from the server from which we request data NOTE: to get better run time So in respect to their scenario you can reduce the wait time as much needed """ input_url = "http://mis.mptransport.org/MPLogin/eSewa/VehicleSearch.aspx" # INITALIZE a instance of the Id generator regid = idgen.regid_generator() #making the headless window frame for web driver to open FIREFOX display = Display(visible=0, size=(800, 600)) display.start() #create a instance of FIREFOX driver fire_drive = webdriver.Firefox() fire_drive.get(input_url) # stores the scraped data in a list data_list = [] while True: try: #registration no. entry in form regno_input_field = waiter(fire_drive, 5).until( EC.presence_of_element_located((By.NAME, "ctl00$ContentPlaceHolder1$txtRegNo"))) regno_input_field.clear() regno = next(regid) regno_input_field.send_keys(regno) #submit the form submit_button = waiter(fire_drive, 5).until( EC.presence_of_element_located((By.ID, "ctl00_ContentPlaceHolder1_btnShow"))) submit_button.click() #if Show details found click on it xpathval = '//*[@id="ctl00_ContentPlaceHolder1_grvSearchSummary"]/tbody/tr[2]/td[2]/input' # assumes no actual time out due to slow connection and # assumes time out due to element not present show_details = None show_details = waiter(fire_drive, 4).until(EC.presence_of_element_located((By.XPATH, xpathval))) show_details.click() #give some time to load page Note: it is not the best practice for run time time.sleep(2) #switching to the output window and obtaining the result page for handle in fire_drive.window_handles: fire_drive.switch_to_window(handle) #checking for not loaded result #and sending the read HTML page to data extractor and return result #if processed properly then we get the result else ignore it # means if data not found during retrieve process we ignore datafound, data_dict = data_extractor(fire_drive) if datafound: data_list.append(data_dict) #switching back to main search page if success fire_drive.back() except TimeoutException: if show_details is None: #occurs only if no element is present #and not connection time out print("GO to Next Reg-id as current id's details button not found") continue else: print("Timeout Error occurs") break except StopIteration: print("Scraping Process Completed") break fire_drive.quit() display.stop() return json_formatter(data_list)
async def codenames(self, ctx, createRoom=None): await ctx.trigger_typing() print('\n [*] \'>codenames\' command called.') voiceChannel = ctx.author.voice.channel if ctx.author.voice else None if not voiceChannel or len(list(filter(lambda member: not member.bot, voiceChannel.members))) < 4: await reactToMessage(self.bot, ctx.message, ['🙅♂️', '❌', '🙅♀️']) response = await ctx.send('É necessário estar conectado em um canal de voz para utilizar esse comando.' if not voiceChannel else 'É necessário no mínimo 4 pessoas para jogar Codenames.') await reactToResponse(self.bot, response) return else: await reactToMessage(self.bot, ctx.message, ['🎲', '🎮', '🏏', '🕹️']) createRoom = True if createRoom and createRoom.lower() in ['$createroom=true', '$createroom', 'true', 'createroom', 'criarsala', 'link', 'url', 'uri'] else False print(f' [**] A room will {"" if createRoom else "not"} be created.') people = [member.mention for member in list(filter(lambda member: not member.bot, voiceChannel.members))] print(f' [**] {len(people)} members are present.') blueSpymaster = random.choice(people) people.remove(blueSpymaster) redSpymaster = random.choice(people) people.remove(redSpymaster) blueOperatives = [] for _ in range(int(len(people)/2)): blueOperatives.append(random.choice(people)) people.remove(blueOperatives[-1]) redOperatives = people print(f' [**] The teams were created.') if createRoom: print(f' [**] A room is being created.') opt = webdriver.ChromeOptions() opt.add_argument('--headless') opt.add_argument("--disable-dev-shm-usage") opt.add_argument('--disable-gpu') opt.add_argument('--no-sandbox') opt.add_argument("--disable-extensions") print(f' [**] Opening the website...') driver = webdriver.Chrome(executable_path=self.CHROME_DRIVER, options=opt) driver.get('https://codenames.game/room/create') try: await ctx.trigger_typing() waiter(driver, 10, poll_frequency=0.1).until(presence((By.XPATH, '//h1[contains(text(), "Welcome to Codenames")]'))) print(f' [**] Waiting for the website to load...') driver.find_element_by_xpath('//input[@id="nickname-input"]').send_keys('A Voz da SA-SEL') driver.find_element_by_xpath('//button[contains(text(), "Create Room") and contains(@type, "submit")]').click() print(f' [**] Setting up the game...') waiter(driver, 10, poll_frequency=0.1).until(presence((By.XPATH, '//span[contains(text(), "Set up a game")]'))) driver.find_element_by_xpath('//div[contains(@class, "flag") and contains(@class, "pt")]').click() # Uncomment below to activate Brazil's expansion pack # driver.find_element_by_xpath('//input[contains(@id, "Expansão promocional: Brasil") and contains(@type, "checkbox") and contains(@name, "Expansão promocional: Brasil")]').click() driver.find_element_by_xpath('//button[contains(text(), "Start New Game")]').click() print(f' [**] Waiting for the room to be created...') waiter(driver, 10, poll_frequency=0.1).until(presence((By.XPATH, '//div[contains(text(), "A Voz da SA-SEL") and contains(@class, "button-inner")]'))) except: roomURL = None driver.quit() else: roomURL = driver.current_url else: roomURL = None response = await ctx.send('`TÁ NA HORA DO CODENAMES GAROTADA`\n\n' + (f'**Link da sala**: {roomURL}\n\n' if roomURL else '') + f'**Time azul** 🔵:\n__*Spymaster*__: {blueSpymaster}\n__*Operatives*__: {", ".join(blueOperatives)}\n\n**Time vermelho** 🔴:\n__*Spymaster*__: {redSpymaster}\n__*Operatives*__: {", ".join(redOperatives)}\n\nQue vença o melhor time!') await reactToResponse(self.bot, response) if roomURL: # Sleeps for 3 minute await sleep(180) await ctx.trigger_typing() try: people = [blueSpymaster] + blueOperatives + [redSpymaster] + redOperatives people.remove(ctx.author.mention) try: driver.find_element_by_xpath('//button//span[text()="Players"]').click() newHost = driver.find_element_by_xpath('//div[@class="relative"]//span[contains(@class, "bg-green-online") and contains(@class, "rounded-full")]/following-sibling::span[not(contains(text(), "A Voz da SA-SEL"))]') newHost.click() waiter(driver, 5, poll_frequency=0.1).until(presence((By.XPATH, '//button[contains(text(), "Make a Host")]'))).click() except: response = await ctx.send(f'Alô, {ctx.author.mention}! Eu vou sair da sala, mas como ninguém mais entrou, a sala vai ficar sem host. Se quiserem que crie outra sala depois, é só chamar.\n\ncc: {" ".join(people)}') else: response = await ctx.send(f'Alô, {ctx.author.mention}! Eu vou sair da sala, agora **o novo host é o `{newHost.text}`**.\n\ncc: {" ".join(people)}') finally: await reactToResponse(self.bot, response) driver.find_element_by_xpath('//div[contains(text(), "A Voz da SA-SEL") and contains(@class, "button-inner")]').click() waiter(driver, 5, poll_frequency=0.1).until(presence((By.XPATH, '//div[text()="Leave the Room"]'))).click() finally: await sleep(3) driver.quit()
def comment(self, link: str, comments: list, total=500): # Opens the raffle's Intagram post self.driver.get(link) # Waits for the page to fully load waiter(self.driver, 5).until(presence((By.XPATH, '//img[@alt="Instagram"]'))) # Raffles title (raffle's owner profile name) title = self.driver.title if " on Instagram: " in title: title = sub("\son\sInstagram:.+$", "", title) elif search("^Instagram\sphoto\sby\s", title): title = sub("^Instagram\sphoto\sby\s|•.+$", "", title) if self.displayConsoleLog: print( f"\n{'@' + self.username} ::: {title} ::: The raffle's post was successfully opened." ) # Likes the post if it wasn't liked already picture = self.driver.find_element_by_xpath( f"//main//article//div[@role=\"presentation\"]//div[@role=\"button\"]//img[@style=\"object-fit: cover;\"]" ) ActionChains(self.driver).double_click( picture).perform() # Double clicks the picture print( f"{'@' + self.username} ::: {title} ::: The post was successfully liked." ) # Follows the owner if it wasn't followed already followButton = self.driver.find_element_by_xpath( "//button[contains(text(),'Follow')]") if followButton.text == "Follow": followButton.click() waiter(self.driver, 5).until( presence((By.XPATH, "//button[contains(text(),'Following')]"))) if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: The profile was followed successfully." ) elif followButton.text == "Following": self.driver.find_element_by_xpath( "//button[contains(text(),'Following')]") if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: The profile was already followed." ) hasRefreshed = [False] * total i = 1 for _ in range(int(total / len(comments))): for comment in comments: # Will keep trying to comment until successfull while (True): try: # Waits until the page is loaded enough to comment waiter(self.driver, 15).until( beClickable( (By.XPATH, '//textarea[@placeholder="Add a comment…"]'))) # If the page wasn't just refreshed for the last comment if self.displayConsoleLog and i > 1 and not hasRefreshed[ i - 1]: print( f"{'@' + self.username} ::: {title} ::: Comment {i-1:{len(str(total))}}/{total} posted successfully." ) # If the page was just refreshed for the last comment if self.displayConsoleLog and hasRefreshed[i - 2]: print( f"{'@' + self.username} ::: {title} ::: Timeout avoided successfully." ) # Activates the comment box self.driver.find_element_by_xpath( '//textarea[@placeholder="Add a comment…"]').click( ) # Writes the comment self.driver.find_element_by_xpath( '//textarea[@placeholder="Add a comment…"]' ).send_keys(comment) if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: Comment {i:{len(str(total))}}/{total} was successfully written." ) # Submits the comment self.driver.find_element_by_xpath( '//button[@type="submit"]').click() if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: Comment {i:{len(str(total))}}/{total} was successfully submitted." ) except: try: self.driver.find_element_by_xpath( "//div[@role=\"dialog\"]//button[contains(text(),\"Report a Problem\")]" ).click() except: if self.displayConsoleLog: print( f"\n{'@' + self.username} ::: {title} ::: Error in comment {i:{len(str(total))}}/{total}." ) screenshotName = "./screenshots/" + \ ctime() + " ::: ERROR ::: COMMENTING ON RAFFLE.png" self.driver.save_screenshot(screenshotName) if self.displayConsoleLog: print( f"\n{'@' + self.username} ::: {title} ::: Screenshot saved: \"{screenshotName}\".\n\n" ) self.driver.quit() raise Exception("Error while commenting.") else: if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: Can't comment anymore: try again later." ) return # Checks if the comment was successfully posted # of if it's on timeout try: waiter(self.driver, 5, poll_frequency=0.1).until( presence(( By.XPATH, "//button[contains(text(),'Retry')]" ) or ( By.XPATH, "//p[@class=\"gxNyb\" and contains(text(),'Couldn't post comment.')]" ) or ( By.XPATH, "//div[@class=\"CgFia \"]/div[@class=\"HGN2m XjicZ\"]" ))) # If it was posted, break out of the while-loop except: break # If it's on timeout: else: # If the page wasnt refreshed yet for this comment: if not hasRefreshed[i - 1]: # Sets the refresh status to True hasRefreshed[i - 1] = True if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: Timeout. Refreshing page to try and avoid it." ) # Refreshes page try: self.driver.refresh() except: if self.displayConsoleLog: print( f"\n{'@' + self.username} ::: {title} ::: Error in comment {i:{len(str(total))}}/{total}, while refreshing page." ) screenshotName = "./screenshots/" + \ ctime() + " ::: ERROR ::: COMMENTING ON RAFFLE.png" self.driver.save_screenshot(screenshotName) if self.displayConsoleLog: print( f"\n{'@' + self.username} ::: {title} ::: Screenshot saved: \"{screenshotName}\".\n\n" ) self.driver.quit() raise Exception("Error while refreshing page.") else: if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: Page refreshed successfully." ) # If the page was already refreshed for this comment: else: timeout = True consecutiveTimeouts = 1 sleepingTime = 3 # in minutes while (timeout): if self.displayConsoleLog: # Sleeps for 'sleepingTime' minutes, displaying progress bar print( f"{'@' + self.username} ::: {title} ::: Timeout. Wait {sleepingTime}min." ) with IncrementalBar( f"{'@' + self.username} ::: {title} ::: ", max=60 * sleepingTime, suffix='%(percent)d%%') as bar: for _ in range(60 * sleepingTime): sleep(1) bar.next() # Sleeps for 'sleepingTime' minutes else: sleep(60 * sleepingTime) # Try and submit the comment again waiter(self.driver, 2).until( presence( (By.XPATH, '//button[@type="submit"]'))).click() if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: Comment {i:{len(str(total))}}/{total} submitted again." ) try: # Checks if it's still in timeout waiter( self.driver, 5, poll_frequency=0.1 ).until( presence(( By.XPATH, "//button[contains(text(),'Retry')]" ) or ( By.XPATH, "//p[@class=\"gxNyb\" and contains(text(),'Couldn't post comment.')]" ) or ( By.XPATH, "//div[@class=\"CgFia \"]/div[@class=\"HGN2m XjicZ\"]" ))) except: # If it's not if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: Timeout passed successfully." ) hasRefreshed[i - 1] = False timeout = False else: timeout = True if consecutiveTimeouts > 2: if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: Can't comment anymore: try again later." ) return consecutiveTimeouts += 1 sleepingTime *= consecutiveTimeouts # If the comment was successfull, break out of the outer while-loop break i += 1 sleep(2) if self.displayConsoleLog: print( f"{'@' + self.username} ::: {title} ::: All comments were successfully posted.\n" ) return
def follow(self, profile: str): profile = profile.lower().replace(" ", "").replace("@", "") if profile == self.username: return self.driver.get("https://www.instagram.com/" + profile + "/") # Waits for the page to fully load waiter(self.driver, 5).until(presence((By.XPATH, '//img[@alt="Instagram"]'))) # Checks if the profile exists try: self.driver.find_element_by_xpath( f"//main//header//section//*[contains(text(),'{profile}')]") # If it does not, return False except: try: self.driver.find_element_by_xpath( "//p[contains(text(),'The link you followed may be broken, or the page may have been removed. ')]" ) except: raise Exception() else: return False if self.displayConsoleLog: print(f"\n{'@' + self.username} ::: Follow @" + profile + ".") print(f"{'@' + self.username} ::: Profile opened successfully.") # Checks if already follows the profile try: self.driver.find_element_by_xpath( "//header//span[@aria-label=\"Following\"]") # If it doesn't, an exception will be caught except: # Checks if already requested to follow try: self.driver.find_element_by_xpath( "//header//button[contains(text(),'Requested')]") # If it didn't: except: if self.displayConsoleLog: print( f"{'@' + self.username} ::: The profile isn't followed yet." ) # Clicks the follow button self.driver.find_element_by_xpath( "//header//button[contains(text(),'Follow')]").click() if self.displayConsoleLog: print( f"{'@' + self.username} ::: 'Follow' button clicked successfully." ) # Checks if it is now following try: waiter(self.driver, 6).until( presence( (By.XPATH, "//header//span[@aria-label=\"Following\"]"))) # If it isn't, an exception will be caught except: # Confirms that a follow was requested (private profile) try: waiter(self.driver, 6).until( presence(( By.XPATH, "//header//button[contains(text(),'Requested')]" ))) except: try: self.driver.find_element_by_xpath( "//div[@role=\"dialog\"]//button[contains(text(),\"Report a Problem\")]" ).click() except: # Saves screenshot screenshotName = "./screenshots/" + \ ctime() + " ::: ERROR ::: FOLLOWING {profile}.png" self.driver.save_screenshot(screenshotName) if self.displayConsoleLog: print( f"\n{'@' + self.username} ::: Screenshot saved: \"{screenshotName}\".\n\n" ) # Closes browser self.driver.quit() raise Exception() else: if self.displayConsoleLog: print( f"{'@' + self.username} ::: Can't follow anymore: try again later." ) return "tryagainlater" else: if self.displayConsoleLog: print( f"{'@' + self.username} ::: Profile is private. A follow request was sent." ) else: if self.displayConsoleLog: print( f"{'@' + self.username} ::: Profile successfully followed." ) # If it was: else: if self.displayConsoleLog: print( f"{'@' + self.username} ::: Profile is private and a follow request was already sent." ) else: if self.displayConsoleLog: print( f"{'@' + self.username} ::: The profile is already being followed." ) return True
def login(self): self.driver.get("https://instagram.com") # Opens Instagram login page if self.displayConsoleLog: print( f"\n{'@' + self.username} ::: instagram.com successfully opened." ) try: # Waits until the page is loaded enough to enter username usernameInput = waiter(self.driver, 20).until( presence((By.XPATH, "//input[@name=\"username\"]"))) if self.displayConsoleLog: print( f"{'@' + self.username} ::: Page finished loading successfully." ) # Enters username usernameInput.send_keys(self.username) if self.displayConsoleLog: print( f"{'@' + self.username} ::: Username entered successfully." ) # Enters password self.driver.find_element_by_xpath( "//input[@name=\"password\"]").send_keys(self.password) if self.displayConsoleLog: print( f"{'@' + self.username} ::: Password entered successfully." ) # Clicks button to submit login info self.driver.find_element_by_xpath( '//button[@type="submit"]').click() if self.displayConsoleLog: print( f"{'@' + self.username} ::: Login submitted sucessfully.") waiter(self.driver, 10, poll_frequency=0.1).until( presence((By.XPATH, "//button[contains(text(), 'Not Now')]")) or presence( (By.XPATH, "//span[contains(text(), 'Instagram from Facebook')]"))) except: # If the page takes longer than 10s to load, raises exception if self.displayConsoleLog: print( f"{'@' + self.username} ::: Error while loading page (t > 10s)." ) # Saves screenshot screenshotName = "./screenshots/" + \ ctime() + " ::: ERROR ::: LOGGING IN TO INSTAGRAM.png" self.driver.save_screenshot(screenshotName) if self.displayConsoleLog: print( f"\n{'@' + self.username} ::: Screenshot saved: \"%s\".\n\n" % screenshotName) # Closes browser self.driver.quit() raise Exception("The page took too long to load.") return