def init_browser_bot(url, preload=False): """Each time init_browser_bot runs, it'll reference the variables from the previous init. Functions that reference the browser, html, or soup variables without assignments will default to the init_broswer_bot's variables, otherwise said variables are limited to local scope and therefore require global keyword declarations. """ global browser, executable_path, html, soup try: browser.visit(url) except: executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=True) browser.driver.set_window_size(1280, 720) browser.visit(url) if preload: time.sleep(1) for _ in range(3): browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(1) html = browser.html soup = bs(html, 'html.parser')
def test_0_http_browser_download(self): path = self.get_endpoint_path("HTTPServer") url = "http://{0}/thredds/fileServer/{1}".format(self.data_node, path) OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_node, self.username) pf = {"browser.helperApps.neverAsk.saveToDisk": "application/x-netcdf, application/netcdf"} browser = Browser("firefox", profile_preferences=pf) browser.visit(url) if browser.status_code.is_success() is True: browser.quit() return browser.find_by_css("input.custom-combobox-input").fill(OpenID) browser.find_by_value("GO").click() browser.find_by_id("password").fill(self.password) browser.find_by_value("SUBMIT").click() # To Do only if user is not enrolled in a group if browser.is_text_present("Group Registration Request"): # Chosing First Registration Group browser.find_by_id("button_1").click() # Accepting License Agreement browser.execute_script("myForm.submit();") # Clicking on 'Download data button' browser.find_by_id("goButton").click() browser.quit()
def begin_login(bilkent_id, password, mail, mail_pass): browser = Browser() browser.visit('https://stars.bilkent.edu.tr/srs/') input_elements = browser.find_by_tag('input') pass_field = input_elements[2] pass_id = pass_field['id'] browser.execute_script( "var y = document.getElementById(\"LoginForm_password\").type = \"password\"" ) browser.fill('LoginForm[username]', bilkent_id) browser.type(pass_field['name'], password) browser.find_by_name('yt0').click() sleep(1) raw_email = get_mails(mail, mail_pass) verification_code = get_verification_code(raw_email.decode("utf-8")) browser.fill("EmailVerifyForm[verifyCode]", verification_code) browser.find_by_name('yt0').click() sleep(1)
def scrape_featured_mars_image(): executable_path = {'executable_path': 'chromedriver.exe'} browser = Browser('chrome', **executable_path, headless=True) base_url = "https://www.jpl.nasa.gov" image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars" browser.visit(image_url) browser.find_by_css('img.thumb').first.click() time.sleep(2) browser.execute_script( "document.getElementById('fancybox-lock').scrollTo(0, document.body.scrollHeight);" ) browser.links.find_by_partial_text("more info").click() time.sleep(1) #get image src img_soup = bs(browser.html, "html.parser") img_src = img_soup.find("img", class_="main_image")["src"] img_src = base_url + img_src browser.quit() return {"featured_image": img_src}
def scrape(pokemon): url = f'https://bulbapedia.bulbagarden.net/wiki/{pokemon}_(Pok%C3%A9mon)' driver = webdriver.Chrome() #Open browser browser = Browser('chrome') browser.visit(url) #Turn webpage into html html = browser.html soup = bs(html, 'lxml') #Find the picture of the Pokemon and click on it until it's just the .png file browser.execute_script("window.scrollTo(0, 400);") links_found = browser.find_link_by_partial_href(f'{pokemon}.png').click() time.sleep(2) browser.execute_script("window.scrollTo(0, 400);") image = browser.find_by_id('file').click() time.sleep(2) pokemon_url = browser.url #Store it in a dictionary pokemon_image = {'name': pokemon, 'url': pokemon_url} browser.quit() return pokemon_image big_ol_pokemon_list = csv['Name']
def scroll_down(cls, browser: Browser, scrollnum: int): """ This function scrolls to the bottom of the page. If this action results in getting more data then it scrolls again until no more data is loaded or the number of scrolls reaches an upper threshold. When new data is loaded it reruns the classification on the newly loaded elements. It finds these elements by differentiating between the previous and the current document source code. Parameters ---------- :param browser : Browser The Splinter Browser object which controls the browser which executes the called actions. :param scrollnum : int Maximises the number of the scroll downs """ prior = 0 for _ in range(0, scrollnum): # noinspection PyAssignmentToLoopOrWithParameter for _ in range(0, scrollnum): browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);") current = len(browser.html) if current == prior: return prior = current
class BaseWebTestCase(LiveServerTestCase): """ Abstract class to handle logic for web tests """ username = "******" password = "******" wait_seconds = 3.0 def setUp(self): chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--window-size=1920,1080") chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--no-sandbox") self.browser = Browser("chrome", headless=False, wait_time=30, options=chrome_options) self.browser.driver.set_page_load_timeout(240) super(BaseWebTestCase, self).setUp() def tearDown(self): self.browser.quit() super(BaseWebTestCase, self).tearDown() def _wait(self): time.sleep(self.wait_seconds) def _login(self): self._visit("") self.browser.fill("username", self.username) self.browser.fill("password", self.password) self.browser.find_by_text("Sign in").first.click() assert self.browser.is_text_present( "Admin") # checks that the nav is visible assert not self.browser.is_text_present("Sign in") def _go_home(self): self._visit("/") def _setup_confirm(self): """ First part of work-around to let phantomjs accept confirmation dialogs http://stackoverflow.com/questions/19903146/confirm-alert-window-in-phantom-js """ js_confirm = "window.confirm = function() { return true }" self.browser.execute_script(js_confirm) def _accept_confirm(self): """ Second part of work-around to let phantomjs accept confirmation dialogs MUST call self._setup_confirm() for this to work """ self.browser.execute_script("return window.confirm") def _visit(self, path): path = self.live_server_url + path self.browser.visit(path) self._wait()
def test_0_http_browser_download(self): path = self.get_endpoint_path('HTTPServer') url = "http://{0}/thredds/fileServer/{1}".format(self.data_node, path) OpenID = "https://{0}/esgf-idp/openid/{1}".format(self.idp_node, self.username) pf={'browser.helperApps.neverAsk.saveToDisk':'application/x-netcdf, application/netcdf'} browser = Browser('firefox', profile_preferences=pf) browser.visit(url) if browser.status_code.is_success() is True: browser.quit() return browser.find_by_id('openid_identifier').fill(OpenID) browser.find_by_value('GO').click() browser.find_by_id('password').fill(self.password) browser.find_by_value('SUBMIT').click() # To Do only if user is not enrolled in a group if browser.is_text_present('Group Registration Request'): # Chosing First Registration Group browser.find_by_id('button_1').click() # Accepting License Agreement browser.execute_script('myForm.submit();') # Clicking on 'Download data button' browser.find_by_id('goButton').click() browser.quit()
class BaseWebTestCase(LiveServerTestCase): """ Abstract class to handle logic for web tests """ username = '******' password = '******' wait_seconds = 3.0 def setUp(self): chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--window-size=1920,1080") chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--no-sandbox") self.browser = Browser('chrome', headless=True, wait_time=10, options=chrome_options) super(BaseWebTestCase, self).setUp() def tearDown(self): self.browser.quit() try: super(BaseWebTestCase, self).tearDown() except IndexError: print("Ignoring IndexError in tearDown...") def _login(self): self._visit("") self.browser.fill('username', self.username) self.browser.fill('password', self.password) self.browser.find_by_text('Sign in').first.click() assert self.browser.is_text_present('Home') assert not self.browser.is_text_present('Sign in') def _go_home(self): self.browser.click_link_by_text('Home') time.sleep(self.wait_seconds) def _setup_confirm(self): """ First part of work-around to let phantomjs accept confirmation dialogs http://stackoverflow.com/questions/19903146/confirm-alert-window-in-phantom-js """ js_confirm = 'window.confirm = function() { return true }' self.browser.execute_script(js_confirm) def _accept_confirm(self): """ Second part of work-around to let phantomjs accept confirmation dialogs MUST call self._setup_confirm() for this to work """ self.browser.execute_script('return window.confirm') def _visit(self, path): path = self.live_server_url + path self.browser.visit(path) time.sleep(self.wait_seconds)
def testBrowser(url): br = Browser('firefox', headless=True) # open a url br.visit(url) #wait for a moment time.sleep(3) # br.screenshot("D:/csdn", ".png", True) br.execute_script('alert("Hello World")') br.screenshot("D:/baidu.png") br.quit()
def hypemCrawl(): """ Function call to start crawling hypemachine popular feed; can easily be modified to crawl other feeds; but given the number of tracks I am working with, need to limit it to a number that I can keep track of at least for now """ # browser to inject javascript into; hate making this global object browser = Browser('chrome') # visit hype machine browser.visit('http://hypem.com/popular') #TODO: make a generic/proxy class to make it easier to crawl list of sites, but # challenging due to different xpath formats :/ # activate the infinite scroll to show more tracks browser.execute_script("window.scrollTo(0,document.body.scrollHeight);") # grab appropriate elements lst1 = getElements(browser, xpath1) lst2 = getElements(browser, xpath2) browser.visit('http://hypem.com/popular/2') browser.execute_script("window.scrollTo(0,document.body.scrollHeight);") time.sleep(5) #TODO: implicit system wait to check if elements have loaded; sometimes # browser takes time to load lst3 = getElements(browser, xpath3) # combine lists, make sure duplicates aren't there if not lst2 and not lst3: final = set(lst1) elif not lst3: final = set(lst1 + lst2) else: final = set(lst1 + lst2 + lst3) # for my own information print len(final), " were pulled" # text file of all the songs output_file = str(datetime.datetime.now().strftime( "%d_%m_%y | %H:%M")) + "_hypem" + ".txt" output_path = "../output/txt/" + output_file with open(output_path, 'w') as f: for each in final: f.write(str(each) + '\n') # general clean up browser.quit()
def getDataForEvent(event): t = time.time() browser = Browser('chrome') #chrome phantomjs #print "init browser", (time.time() - t) try : browser.visit('http://www.betmarathon.com/en/popular/Football/') except Exception as error : throwError(1, "Browser.visit failed. info = " + str(error)) #print "visit maraphon" , (time.time() - t) res = '' res += '-------\n' + event[0] + '\n' + event[1] + '\n' strId = event[0][:-1] #f.write(eventId[0] + '\n' + eventId[1] + '\n') try : script = "Markets.applyView(document.getElementById('event-more-view-" + strId + "'));return false;" browser.execute_script(script); except Exception as error : throwError(1, "Browser.execute_script failed. info = " + str(error)) #print "time after script" , (time.time() - t) #print "sleep" time.sleep(5) #print "wake up" #browser.is_element_present_by_id("market-details-" + eventId, wait_time = 4) #print "before search" , (time.time() - t) event2 = browser.find_by_id("event_" + strId).first #print "search" , (time.time() - t) res += getInfo(event2.html) res += 'a-------\n' #print "getting text" , (time.time() - t) try : res = formatDataForEvent(event,res) except Exception as error : throwError(2, "Format data failed. info = " + str(error)) try : filename = getFileNameForEvent(event) f = open('./all_results/' + filename, 'w') f.write(res) f.close() except Exception as error : throwError(3, "Writing result to file failed. info = " + str(error)) browser.quit() return
class JS_Diagram_Converter(object): def __init__(self): self.browser = None def _init(self): self.browser = Browser(**options_for_browser) self.browser.visit(url) def convert_diagram(self, source): if self.browser is None: self._init() s = self.browser.find_by_xpath('//select')[0] s.select_by_text("Simple") a = self.browser.find_by_text("Download as SVG")[0] # writing to the textarea is a bit clumsy ta = self.browser.find_by_xpath('//textarea')[0] # select all text snippet1 = f"""document.querySelector("textarea").focus()""" snippet2 = f"""document.querySelector("textarea").select()""" self.browser.execute_script(snippet1) self.browser.execute_script(snippet2) # insert some dummy text # ta.fill("S1 -> S2: test") # select again # self.browser.execute_script(snippet) # now insert the actual source (unfortunately this is slow to simulate typing) ta.fill(source) s.select_by_text("Simple") time.sleep(2) a.click() def quit(self): if not self.browser is None: self.browser.quit() self.browser = None
def main(): t = time.time() browser = Browser('phantomjs') print "init browser", (time.time() - t) browser.visit( 'http://sports.williamhill.com/bet/en-gb/betting/y/5/tm/0/Football.html' ) submitTimeZone(browser) #browser.visit('http://petroclima.ru/') #browser.find_by_id("yesBtn").click() #f = codecs.open('html_new.out', 'w', encoding='utf-8') #f.write (browser.html) #browser.quit() #exit( #f = codecs.open('html_new.out', 'w', encoding='utf-8') #f.write(browser.html) bts = browser.find_by_id("ip_mkt_grp_994186").first print bts.html script = "document.site.ajax_unexpanded_type('ip', '6733', '0', 'Match Betting')" browser.execute_script(script) bts = browser.find_by_id("ip_row_6746483").first print "**********************************" print bts.html browser.quit() exit() #browser.find_by_class('more-view').first.click() print "visit williamhill ", (time.time() - t) browser.execute_script( "document.site.ajax_unexpanded_type('ip', '5664', '1', 'Match Betting')" ) #event = browser.find_by_css(".marketHolderCollapsed").first.click() #script = "document.site.ajax_unexpanded_type('ip', '5664', '1', 'Match Betting')" #browser.execute_script(script) time.sleep(5) event = browser.find_by_css(".marketHolderCollapsed").first #event = browser.find_by_id("ip_mkt_grp_tbl_5664_9d8a08d4b13c912153e27659829a27ad").first print "Printing result..." print event.html
def cookie_browser(cookies): browser = Browser(BROWSER, headless=HEADLESS) if BROWSER == 'chrome': browser.visit('https://fb.me') # Fix Needed for setting cookies in chrome for c in cookies: browser.cookies.driver.add_cookie({'name': c.name, 'value': c.value, 'path': c.path, 'expiry': c.expires}) browser.scroll_to_bottom = lambda:browser.execute_script('window.scrollTo(0, document.body.scrollHeight);') return browser
def js_click_on(cls, elem, browser: Browser) -> bool: """ Clicks on the element via executing a Javascript query and returns whether the click was successful or not. :param elem: The element to click on :param browser: The Splinter Browser instance which will execute the query :return: True if successful, False if not """ try: clss = elem.get_attribute("class") script_fh = "document.getElementsByClassName('" script_sh = "')[" + elem.get_attribute("class") + "].click()" script = script_fh + clss + script_sh script = ''.join([line.strip("\n") for line in script]) print(script) browser.execute_script(script) return True except StaleElementReferenceException: return False
def main(): t = time.time() browser = Browser('phantomjs') print "init browse ", (time.time() - t) browser.visit( 'http://sports.williamhill.com/bet/en-gb/betting/y/5/tm/1/Football.html' ) #browser.find_by_class('more-view').first.click() print "visit williamhill ", (time.time() - t) browser.execute_script( "document.site.ajax_unexpanded_type('ip', '5664', '1', 'Match Betting')" ) #event = browser.find_by_css(".marketHolderCollapsed").first.click() #script = "document.site.ajax_unexpanded_type('ip', '5664', '1', 'Match Betting')" #browser.execute_script(script) time.sleep(5) event = browser.find_by_css(".marketHolderCollapsed").first #event = browser.find_by_id("ip_mkt_grp_tbl_5664_9d8a08d4b13c912153e27659829a27ad").first print "Printing result..." print event.html
class PinterestCrawler(object): def __init__(self, url): self.navigator = Browser() self.url = url def login_to_pinterest(self, login, password): email_input = login password_input = password emaillogin = self.navigator.find_by_css( 'body > div > div.appContent > div.mainContainer > div > div > div > form > ul > li.loginUsername > input')[0] emailpassword = self.navigator.find_by_css('body > div > div.appContent > div.mainContainer > div > div > div > form > ul > li.loginPassword > input')[0] loginbutton = self.navigator.find_by_css('body > div > div.appContent > div.mainContainer > div > div > div > form > div.formFooter > div > button')[0] emaillogin.fill(email_input) emailpassword.fill(password_input) time.sleep(random.randint(2, 6)) return loginbutton.click() def clickfollow(self, url): self.navigator.visit(url) time.sleep(6) self.navigator.execute_script( 'window.scrollTo(50,document.body.scrollHeight);') css_path = 'body > div.App.AppBase.Module.full > div.appContent > div.mainContainer > div.Module.UserProfilePage > div.Module.UserProfileContent > div > div > div:nth-child({0}) > div > button' new_child = 1 while new_child <= 50: css_path2 = css_path.format(new_child) time.sleep(2) self.navigator.find_by_css(css_path2)[0].click() new_child += 1 time.sleep(random.randint(3, 12)) def main(self): self.navigator.visit(self.url) self.login_to_pinterest('*****@*****.**', 'blackhouse123') self.clickfollow('https://www.pinterest.com/PaleoLivingMag/followers/')
def navegacion(num): try: browser = Browser() browser.visit('http://ugraerospaceprogram.appspot.com/') print "Iniciando prueba " browser.fill('usuario', 'test') browser.fill('password', '12345') browser.find_by_name('init_sesion').click() browser.find_by_name('btn_editar').click() browser.find_by_name('btn_inicio').click() browser.find_by_name('btn_monitorizacion').click() browser.find_by_id('tipoGrafica-temp').find_by_id( 'element_line_temp').click() browser.find_by_id('botonGrafica-temp').click() browser.find_by_id('tipoGrafica-velocidadviento').find_by_id( 'element_line_v').click() browser.find_by_id('botonGrafica-velocidadviento').click() browser.find_by_id('tipoGrafica-humedad').find_by_id( 'element_line_h').click() browser.find_by_id('botonGrafica-humedad').click() browser.find_by_id('tipoGrafica-precipitacion').find_by_id( 'element_line_p').click() browser.find_by_id('botonGrafica-precipitacion').click() browser.find_by_name('btn_estadisticas').click() browser.find_by_name('btn_twitter').click() browser.find_by_id('boton-cerrar').click() browser.execute_script("alert('Navegacion acabada');") browser.quit() return True except: return False
def test_0_http_browser_download(self): path = self.get_endpoint_path('HTTPServer') url = "http://{0}/thredds/fileServer/{1}".format(self.data_node, path) OpenID = "https://{0}/esgf-idp/openid/{1}".format( self.idp_node, self.username) pf = { 'browser.helperApps.neverAsk.saveToDisk': 'application/x-netcdf, application/netcdf' } browser = Browser('firefox', profile_preferences=pf) browser.visit(url) if browser.status_code.is_success() is True: browser.quit() return browser.find_by_id('openid_identifier').fill(OpenID) browser.find_by_value('GO').click() browser.find_by_id('password').fill(self.password) browser.find_by_value('SUBMIT').click() # To Do only if user is not enrolled in a group if browser.is_text_present('Group Registration Request'): # Chosing First Registration Group browser.find_by_id('button_1').click() # Accepting License Agreement browser.execute_script('myForm.submit();') # Clicking on 'Download data button' browser.find_by_id('goButton').click() browser.quit()
def lj(): global b b = Browser(driver_name='firefox') b.visit(url) b.execute_script('alert("Begin input!~~~")') time.sleep(1) b.get_alert().dismiss() while b.is_element_present_by_id("cmdOK"): login() if b.url == init_url: break b.find_by_text(u"展开全部").click() time.sleep(1) b.find_by_xpath(".//*[@id='ext-gen74']/li[1]/div/a/span").click() b.driver.switch_to_frame("dynamic_added_tabxnode1") while b.is_element_not_present_by_xpath( ".//*[@id='ext-gen45']/div[2]/table/tbody/tr/td[7]/div/a/img"): time.sleep(2) continue b.find_by_xpath( ".//*[@id='ext-gen45']/div[3]/table/tbody/tr/td[7]/div/a/img").click() b.driver.switch_to_default_content() b.driver.switch_to_frame("ext-gen107") while b.is_element_not_present_by_xpath(".//*[@id='t101003015']"): time.sleep(2) continue b.find_by_xpath(".//*[@id='t101003015']").click() b.find_by_xpath(".//*[@id='101003015']/div[4]").click() b.driver.switch_to_frame("ext-gen18") while b.is_element_not_present_by_text(u"重新选择"): time.sleep(2) continue b.find_by_text(u"重新选择").click()
class HackFreeRice: def __init__(self, browserType='chrome'): self.correct = 0 self.incorrect = 0 self.readCredentials() # Initialize splinter (other possible values include 'phantomjs' and 'firefox') self.browser = Browser(browserType) def initialize(self, verbose=False): # Initialize until it is successful while True: if self.tryInitialize(): if verbose: print 'Successfully initialized.' break def readCredentials(self, verbose=False): # Read credentials from file with open('config') as f: username, password = f.readlines() self.username = username.strip() self.password = password.strip() if verbose: print 'Your username is %s' % self.username print 'Your password is %s' % self.password def tryInitialize(self, verbose=False): # Open freerice self.browser.visit('http://freerice.com/user/login') # Close pop-up, if present if self.browser.is_element_present_by_id('wfp-ew-dialog-close'): if self.browser.find_by_id('wfp-ew-dialog-close').first.visible: # Closed popup if present and visible self.browser.find_by_id('wfp-ew-dialog-close').click() # Login self.browser.execute_script("$('#edit-name').val('%s')" % self.username) self.browser.execute_script("$('#edit-pass').val('%s')" % self.password) self.browser.execute_script("$('#edit-submit').click()") already_logged_in = self.browser.is_text_present('Logout') login_check_string = '%s has earned' % self.username successful_login = self.browser.is_text_present(login_check_string) if already_logged_in or successful_login: if verbose: print 'Successfully logged in!' else: if verbose: print 'Login failed.' return False # Change subject to math self.browser.execute_script("window.location.href = 'http://freerice.com/frapi/category_selected/18492'") if self.browser.is_text_present('Multiplication Table'): if verbose: print 'Successfully navigated to Multiplication Table' return True else: return False def doQuestion(self, verbose=False): # Close twitter solicitation, if present self.browser.execute_script("$('#twt-skip').click()") question_text = self.browser.evaluate_script("$('#question-title').text()") question_text = question_text.split('loader') question_text = ''.join(question_text) if verbose: print 'The question is: %s' % question_text question_text = string.replace(question_text, ' x ', '*').strip() if verbose: print 'The code representation of the question is: %s' % question_text question_answer = self.browser.evaluate_script(question_text) if verbose: print 'The answer is: %s' % question_answer add_id_script = "$('a:contains(\"%s\").answer-item').attr('id', 'clickthisone')" % question_answer if verbose: print 'Script to add id is:', add_id_script self.browser.execute_script(add_id_script) self.browser.find_by_id('clickthisone').click() if self.browser.is_text_present('Correct!'): print 'Got the answer right. Yeah!' self.correct += 1 else: print 'Oops. Got that one wrong.' self.incorrect += 1 print 'You have donated %s grains of rice!' % str(10 * self.correct)
import re, urllib.request, csv # Feedspot URL url = 'https://blog.feedspot.com/indian_news_websites/' # Chrome Driver required (download from internet) so that dynamically loaded/extended page data can be extracted # An executable driver has been added in Zipped Alexa websites folder in the "Websites Dataset" directory # State the location of your driver executable_path = {"executable_path": "C:/Desktop/chromedriver"} # Instantiating a browser object as follows... # Pass 'headless=False' to make Chrome launch a visible window browser = Browser("chrome", **executable_path, headless=True) browser.visit(url) html = BeautifulSoup(browser.html, 'html.parser') browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") # Declare the JavaScript that scrolls to the end of the page... scrollJS = "window.scrollTo(0, document.body.scrollHeight);" # Declare number of attempts to make if extended page is not loaded in first attempt attempt = 2 text = "" while attempt > 0: # Scroll to the bottom of the page browser.execute_script(scrollJS) # Use BS4 to get the HTML soup = BeautifulSoup(browser.html, 'html.parser', from_encoding="utf-8")
def scrape(): # Create dictionary to return at the end mars = {} print(f"""-------------------- Start Scraping...""") ### Nasa Mars News Scraping print("Nasa Mars News Scraping...") # URL of page to be scraped url = 'https://mars.nasa.gov/news/' response = requests.get(url) soup = bs(response.text, "html.parser") news_title = soup.find("div", class_="content_title").a.text.strip('\n') news_p = soup.find('div', class_='rollover_description_inner').text.strip('\n') # add this variables to our dictionary mars["news_title"] = news_title mars["news_p"] = news_p ### JPL Mars Space Images - Featured Image print("JPL Mars Space Images - Featured Image scraping... ") # Using splinter to create Browser executable_path = {"executable_path": ChromeDriverManager().install()} browser = Browser("chrome", **executable_path, headless=False) browser.visit( "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars") html = browser.html soup = bs(html, "html.parser") base_url = "https://www.jpl.nasa.gov" image_url = soup.find("article", class_="carousel_item")["style"] image_url = image_url.lstrip("background-image: url('") image_url = image_url.rstrip("');") featured_image_url = base_url + image_url # add this variable to our dictionary mars["featured_image_url"] = featured_image_url ### Mars Weather print("Mars Weather scraping...") # go to the page browser.visit("https://twitter.com/marswxreport?lang=en") # Download more twitts (time.sleep(1)) browser.execute_script("window.scrollTo(2, document.body.scrollHeight);") # Get the html html = browser.html soup = bs(html, "html.parser") mars_weather = "" list_tweets = soup.find_all("div", class_="css-901oao") for tweet in list_tweets: if "InSight" in tweet.text: mars_weather = tweet.text break # add this variable to our dictionary mars["mars_weather"] = mars_weather ### Mars Facts print("Mars Facts scraping...") # Read with pandas url = "https://space-facts.com/mars/" tables = pd.read_html(url) # copy to make some changes in the header and index html_table = tables[0].copy() html_table = html_table.rename(columns={0: "Description", 1: "Value"}) html_table = html_table.set_index("Description") # change it to Markup so it can be displayed as html pd_table_html = Markup(html_table.to_html()) # add this variable to our dictionary mars["pd_table_html"] = pd_table_html ### Mars Hemispheres print("Mars Hemispheres scraping...") # Visit with the browser the main page browser.visit( "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" ) # Parse into html and make a soup object html = browser.html soup = bs(html, "html.parser") # url type: https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg # Base URL to get the full image url base_url = "https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/" # Get the div class="description" where the link to each hemisphere is list_items = soup.find_all("div", class_="description") hemisphere_image_urls = [] # For each link we get the href to get the full image and the description for item in list_items: aux_str = item.a['href'].split("/")[5] hemisphere_image_urls.append({ "title": item.a.text.rstrip(" Enhanced"), "img_url": f"{base_url}{aux_str}.tif/full.jpg" }) # add this variable to our dictionary mars["hemisphere_image_urls"] = hemisphere_image_urls # Close browser browser.quit() print(f"""...Finish Scraping --------------------""") # RETURN the dictionary return mars
def scrape(): # Dictionary creation to retaion variables mars_dict = {} # Scrape 1 : Nasa Mars News url = 'https://mars.nasa.gov/news/' response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") title = soup.find("div", class_="content_title").a.text.strip('\n') news = soup.find('div', class_='rollover_description_inner').text.strip('\n') print("Nasa Mars News") print("-------------------------") print(title) print(news) # variables into dictionary mars_dict["title"] = title mars_dict["newsp"] = news # Scrape 2 : Image with Splinter # splniter browser executable_path = {"executable_path": ChromeDriverManager().install()} browser = Browser("chrome", **executable_path, headless=False) # launch splinter finder browser.visit( "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars") html2 = browser.html soup2 = BeautifulSoup(html2, "html.parser") # Find link featured_image = soup2.find( "a", class_="button fancybox").get("data-fancybox-href") # print addresss featured_image_url = "https://www.jpl.nasa.gov" + featured_image print("Mars Nasa Featured Image") print("-------------------------") print(featured_image_url) mars_dict["featured_image_url"] = featured_image_url # Scrape 3 : Weather Tweets browser.visit("https://twitter.com/marswxreport?lang=en") browser.execute_script("window.scrollTo(2, document.body.scrollHeight);") html3 = browser.html soup3 = BeautifulSoup(html3, "html.parser") mars_tweets = "" mars_weather_scrape = soup3.find_all("div", class_="css-901oao") for tweet in mars_weather_scrape: if "º" in tweet.text: mars_tweets = tweet.text break print("Mars Weather latest Tweet") print("-------------------------") print(mars_tweets) print() mars_dict["mars_tweets"] = mars_tweets # Scrape 4 : Mars Facts url4 = "https://space-facts.com/mars/" facts = pd.read_html(url4)[0] facts = facts.to_html(classes="table") print("Mars Facsts") print("-------------------------") print(facts) mars_dict["facts"] = facts # Scrape 5 : Mars Hemispheres Pictures url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" response = requests.get(url) soup5 = BeautifulSoup(response.text, "html.parser") hemi_text = soup5.find_all("div", class_="item") hemisphere_image_urls = [] for i in hemi_text: string = i.a.img["src"] desc = i.div.h3.text.rstrip(" Enhanced") hemisphere_image_urls.append({ "Desc": desc, "URL": f"https://astrogeology.usgs.gov/{string}" }) #create Dictrionary print("Mars Hemispheres Pictures Links") print("-------------------------") print(hemisphere_image_urls) mars_dict["hemisphereimg"] = hemisphere_image_urls # Close browser browser.quit() return mars_dict
browser.find_by_css('.gwt-Button').click() time.sleep(6) browser.find_by_css('#scenario').click() browser.find_by_name('key-pair-method')[2].click() #select no key, because for example ap-southeast-2 does not have the existing keys, #and this asks for a key without letting you to proceed with the images provider_images = [] for i in range (1, len(browser.find_by_css('tr.scenario_description'))): # Check if next image is windows (to ignore it) if 'Windows' in browser.evaluate_script("$('tr.scenario_description').eq(%d).text()" % i): continue # Click image list item browser.execute_script("$('tr.scenario_description').eq(%d).click()" % i) # If 64-bit is available if not browser.evaluate_script("$('tr.scenario_description').eq(%d).find('input#x86_64').attr('disabled')" % i): # Click 64-bit radio button browser.execute_script("$('tr.scenario_description').eq(%d).find('input#x86_64').click()" % i) # Click continue browser.execute_script("$('#qs_continue_scenario .elasticbig-container').click()") # Get image ami line = browser.evaluate_script("$('.wizard_review h1.ami_name').eq(0).text()") ami = 'ami' + line.split(' (ami')[1].replace(')','') title = line.split(' (ami')[0] + ' 64bit' # Save ami new_pair = (ami, title) if new_pair not in provider_images: provider_images.append(new_pair)
class DataNodeTestCase(LiveServerTestCase): """ A set of tests to test all interaction related to the creation and deletion of nodes and relationships. Also, we test export the data in two formats: gexf and csv. """ def setUp(self): self.browser = Browser() signup(self, 'bob', '*****@*****.**', 'bob_secret') signin(self, 'bob', 'bob_secret') def tearDown(self): logout(self) self.browser.quit() def test_data_node_addition(self): create_graph(self) create_schema(self) create_type(self) create_data(self) # Check the node name self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() text = self.browser.find_by_id('propertiesTitle').first.value self.assertEqual(text, 'Properties') self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkRight']/a").first.click( ) self.browser.choose('confirm', '1') self.browser.find_by_value('Continue').first.click() text = self.browser.find_by_xpath( "//div[@class='indent']/div").first.value Graph.objects.get(name="Bob's graph").destroy() self.assertEqual(text, 'Nodes: 0') def test_data_node_addition_rel_add_del(self): create_graph(self) create_schema(self) create_type(self) create_node(self, "Bob") create_node(self, "Alice") # We create a allowed relation js_code = "$('a#schema-link')[0].click();" self.browser.execute_script(js_code) self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill('Bob\'s rel') self.browser.select('target', '1') self.browser.find_by_id('id_description').fill( 'This the allowed relationship for Bob\'s graph') self.browser.find_by_value('Save Type').first.click() self.assertEqual(self.browser.title, "SylvaDB - Bob's graph") # We create the link between the nodes self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath( "//td[@class='dataActions']/a[@class='dataOption list']" ).first.click() self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input").first.fill('Alice') self.browser.is_element_present_by_id("id_user_wait", 5) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() self.browser.find_by_value('Save Bob\'s type').first.click() self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(text, "1 relationships") # Delete the relationship self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath( "//td[@class='dataActions']/a[@class='dataOption list']" ).first.click() self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath( "//span[@class='all-relationships incoming-relationships i_bobs_rel1-relationships']//a[@class='delete-row initial-form floating']" ).first.click() self.browser.find_by_value('Save Bob\'s type').first.click() self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(text, "0 relationships") Graph.objects.get(name="Bob's graph").destroy() def test_node_type_deletion_keeping_nodes(self): create_graph(self) create_schema(self) create_type(self) # Adding relationship to the type self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill("Bob's rel") self.browser.select('target', '1') self.browser.find_by_id('id_description').fill( 'The loved relationship') self.browser.find_by_value('Save Type').first.click() text = self.browser.find_by_xpath( "//div[@class='form-row indent']/label").first.value self.assertNotEqual(text.find("Bob's rel"), -1) # Creating nodes create_node(self, 'Bob') create_node(self, 'Alice') # Creating relationship between nodes self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath( "//td[@class='dataActions']/a[@class='dataOption list']" ).first.click() self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input").first.fill('Alice') self.browser.is_element_present_by_id("id_user_wait", wait_time=5) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() self.browser.find_by_value('Save Bob\'s type').first.click() self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(text, "1 relationships") # Deleting type js_code = "$('a#schema-link')[0].click();" self.browser.execute_script(js_code) self.browser.find_by_xpath( "//fieldset[@class='module aligned wide model']/h2/a").first.click( ) self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkRight']/a[@class='delete']" ).first.click() text = self.browser.find_by_xpath( "//p/label[@for='id_option_0']").first.value self.assertNotEqual(text.find("We found some elements of this type"), -1) # Keeping nodes self.browser.choose('option', 'no') self.browser.find_by_value('Continue').first.click() text = self.browser.find_by_xpath( "//div[@class='body-inside']/p").first.value self.assertEqual(text, 'There are no types defined yet.') # Checking self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-nodes']" ).first.value self.assertEqual(text, "2 nodes") text = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(text, "1 relationships") self.browser.is_element_present_by_id('wait_for_js', 3) js_code = ''' var instanceId = '0'; for (key in sigma.instances) { instanceId = key; break; } var instance = sigma.instances[instanceId]; sigma.test_node_count = instance.getNodesCount(); ''' self.browser.execute_script(js_code) text = self.browser.evaluate_script('sigma.test_node_count') self.assertEqual(text, 0) Graph.objects.get(name="Bob's graph").destroy() def test_node_type_deletion_deleting_nodes(self): create_graph(self) create_schema(self) create_type(self) # Adding relationship to the type self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill("Bob's rel") self.browser.select('target', '1') self.browser.find_by_id('id_description').fill( 'The loved relationship') self.browser.find_by_value('Save Type').first.click() text = self.browser.find_by_xpath( "//div[@class='form-row indent']/label").first.value self.assertNotEqual(text.find("Bob's rel"), -1) # Creating nodes create_node(self, 'Bob') create_node(self, 'Alice') # Creating relationship between nodes self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath( "//td[@class='dataActions']/a[@class='dataOption list']" ).first.click() self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input").first.fill('Alice') self.browser.is_element_present_by_id("id_user_wait", wait_time=5) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() self.browser.find_by_value('Save Bob\'s type').first.click() self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(text, "1 relationships") # Deleting type js_code = "$('a#schema-link')[0].click();" self.browser.execute_script(js_code) self.browser.find_by_xpath( "//fieldset[@class='module aligned wide model']/h2/a").first.click( ) self.browser.find_by_xpath( "//span[@class='buttonLinkOption buttonLinkRight']/a[@class='delete']" ).first.click() text = self.browser.find_by_xpath( "//p/label[@for='id_option_0']").first.value self.assertNotEqual(text.find("We found some elements of this type"), -1) # Deleting nodes self.browser.choose('option', 'de') self.browser.find_by_value('Continue').first.click() text = self.browser.find_by_xpath( "//div[@class='body-inside']/p").first.value self.assertEqual(text, 'There are no types defined yet.') # Checking self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-nodes']" ).first.value self.assertEqual(text, "0 nodes") text = self.browser.find_by_xpath( "//div[@class='flags-block']/span[@class='graph-relationships']" ).first.value self.assertEqual(text, "0 relationships") Graph.objects.get(name="Bob's graph").destroy() def test_data_node_clone(self): create_graph(self) create_schema(self) create_type(self) create_data(self) original_name = self.browser.find_by_xpath( "//table[@id='content_table']/tbody/tr/td")[1].value # Clone the node self.browser.find_by_xpath( "//table[@id='content_table']/tbody/tr/td/a[@class='edit']" ).first.click() self.browser.find_by_name('Name').first.fill(original_name + " clone") self.browser.find_by_name("as-new").first.click() # Check that two nodes exist original_name = self.browser.find_by_xpath( "//table[@id='content_table']/tbody/tr/td")[1].value clone_name = self.browser.find_by_xpath( "//table[@id='content_table']/tbody/tr/td")[4].value self.assertEqual(original_name, "Bob's node") self.assertEqual(clone_name, "Bob's node clone") Graph.objects.get(name="Bob's graph").destroy() def test_sigma_visualization_in_node_view(self): create_graph(self) create_schema(self) create_type(self) # Adding relationship to the type self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill("Bob's rel") self.browser.select('target', '1') self.browser.find_by_id('id_description').fill( 'The loved relationship') self.browser.find_by_value('Save Type').first.click() text = self.browser.find_by_xpath( "//div[@class='form-row indent']/label").first.value self.assertNotEqual(text.find("Bob's rel"), -1) # Creating nodes create_node(self, 'Bob') create_node(self, 'Alice') # Creating relationship between nodes self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath( "//td[@class='dataActions']/a[@class='dataOption list']" ).first.click() self.browser.find_by_xpath( "//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath( "//li[@class='token-input-input-token']/input").first.fill('Alice') self.browser.is_element_present_by_id("id_user_wait", wait_time=5) self.browser.find_by_xpath( "//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b" ).first.click() self.browser.find_by_value('Save Bob\'s type').first.click() # Checking self.browser.find_by_xpath( "//table[@id='content_table']/tbody/tr/td/p/a[@title='View node' and text()='Alice']" ).first.click() self.browser.is_element_present_by_id('wait_for_js', 3) js_code = ''' var instanceId = '0'; for (key in sigma.instances) { instanceId = key; break; } var instance = sigma.instances[instanceId]; sigma.test_node_count = instance.getNodesCount(); ''' self.browser.execute_script(js_code) text = self.browser.evaluate_script('sigma.test_node_count') self.assertEqual(text, 2) Graph.objects.get(name="Bob's graph").destroy() def test_graph_export_gexf(self): create_graph(self) create_schema(self) create_type(self) create_data(self) self.browser.find_by_id('toolsMenu').first.click() cookies = { self.browser.cookies.all()[0]["name"]: self.browser.cookies.all()[0]["value"], self.browser.cookies.all()[1]["name"]: self.browser.cookies.all()[1]["value"] } result = requests.get(self.live_server_url + '/tools/bobs-graph/export/gexf/', cookies=cookies) self.assertEqual(result.headers['content-type'], 'application/xml') self.assertEqual(self.browser.status_code.is_success(), True) fw = open('sylva/base/tests/files/bobs-graph.gexf', 'w') fw.write(result.content) fw.close() f = open('sylva/base/tests/files/bobs-graph.gexf') xmlFile = "" for line in f: xmlFile += line f.close() self.assertEqual(xmlFile, result.content) Graph.objects.get(name="Bob's graph").destroy() def test_graph_export_csv(self): create_graph(self) create_schema(self) create_type(self) create_data(self) self.browser.find_by_id('toolsMenu').first.click() cookies = { self.browser.cookies.all()[0]["name"]: self.browser.cookies.all()[0]["value"], self.browser.cookies.all()[1]["name"]: self.browser.cookies.all()[1]["value"] } result = requests.get(self.live_server_url + '/tools/bobs-graph/export/csv/', cookies=cookies) self.assertEqual(result.headers['content-type'], 'application/zip') self.assertEqual(self.browser.status_code.is_success(), True) test_file = StringIO(result.content) csv_zip = ZipFile(test_file) for name in csv_zip.namelist(): fw = open('sylva/base/tests/files/' + name, 'w') fw.write(csv_zip.read(name)) fw.close() for name in csv_zip.namelist(): f = open('sylva/base/tests/files/' + name) csvFile = "" for line in f: csvFile += line f.close() self.assertEqual(csv_zip.read(name), csvFile) Graph.objects.get(name="Bob's graph").destroy()
def get_flights(origin_airport_code, destination_airport_code, departure_date_str): browser = Browser('phantomjs') browser.visit('https://www.southwest.com/') booking_button = browser.find_by_id('booking-form--flight-tab')[0] booking_button.click() #if return_date: # browser.choose('twoWayTrip','true') #else: browser.choose('twoWayTrip','false') #departure_date_str = departure_date.strftime("%m/%d/%y") # works better with the date selected first... no idea why. browser.execute_script("document.getElementsByName('outboundDateString')[0].type = 'visible'") time.sleep(2) browser.fill('originAirport', origin_airport_code) browser.fill('destinationAirport', destination_airport_code) browser.execute_script("document.getElementsByName('outboundDateString')[0].type = 'visible'") browser.fill('outboundDateString', departure_date_str) submit_button = browser.find_by_id('jb-booking-form-submit-button')[0] submit_button.click() flights_DOM_table = browser.find_by_css('.bugTableRow') flights_table = [] for flight_DOM in flights_DOM_table: depart_time = flight_DOM.find_by_css('.depart_column .time').text depart_time = depart_time.zfill(5) depart_am_pm = flight_DOM.find_by_css('.depart_column .indicator').text duration = parse_duration(flight_DOM.find_by_css('.duration').text) depart_str = departure_date_str + ", " + depart_time + depart_am_pm departure = datetime.datetime.strptime(depart_str, "%m/%d/%y, %I:%M%p") arrival = departure + duration #arrive_time = flight_DOM.find_by_css('.arrive_column .time').text #arrive_am_pm = flight_DOM.find_by_css('.arrive_column .indicator').text flight_nums = flight_DOM.find_by_css('.bugLinkText') # could be a few of these f = [] for num in flight_nums: f.append(num.text[0:-14]) routing = flight_DOM.find_by_css('.bugLinkRouting').text[0:-14] if len(f) > 1: routing += " - " + flight_DOM.find_by_css('.search-results--flight-stops').text box = flight_DOM.find_by_css('.price_column')[2] # only the wanna get away #check if sold out, unavailable or available price = None try: price = box.find_by_css('label.product_price')[0].text[1:] #strips the currency symbol except splinter.exceptions.ElementDoesNotExist: pass try: price = box.find_by_css('.insufficientInventory')[0].text.strip() except splinter.exceptions.ElementDoesNotExist: pass try: price = box.find_by_css('.unqualifiedForAnyFare')[0].text.strip() except: pass flight = (origin_airport_code, destination_airport_code, departure, arrival, tuple(f), routing, price) flights_table.append(flight) return flights_table
class GPlusEventManager(object): def __init__(self, email, passwd, otp): self.email = email self.passwd = passwd self.br = Browser('firefox') atexit.register(self.force_br_quit) # To dynamically load jQuery into the HTML head self.loadjq = """var head = document.getElementsByTagName('head')[0]; var script = document.createElement('script'); script.type = 'text/javascript'; script.src = '//ajax.googleapis.com/ajax/libs/jquery/1.10.1/jquery.min.js'; head.appendChild(script);""" self.otp = otp self.logged_in = self.login() def force_br_quit(self): try: self.br.quit() except: pass def create(self, title, desc, date, time): """ Create a new Google Plus event """ if not self.logged_in: self.logged_in = self.login() create_btn = 'div[guidedhelpid="events_create_event_button"]' self.br.find_by_css(create_btn)[0].click() return self.complete_form(title, desc, date, time, update=False) def update(self, id, title=None, desc=None, date=None, time=None): """ Update a Google Plus event """ if not self.logged_in: self.logged_in = self.login() self.br.visit(id) dropdown = 'div[class="A7kfHd q3sPdd"]' while self.br.is_element_not_present_by_css(dropdown): pass self.br.find_by_css(dropdown).click() self.br.find_by_xpath('//*[@id=":o"]/div').click() return self.complete_form(title, desc, date, time, update=True) def complete_form(self, title, desc, date, time, update): '''Fill event create/edit form, the CSS selectors are valid in both types of form''' title_input = 'input[placeholder="Event title"]' while self.br.is_element_not_present_by_css(title_input): pass if title: title_placeholder = self.br.find_by_css(title_input) title_placeholder.fill(title) if date: self.br.find_by_css('input[class="g-A-G T4 lUa"]').click() rm_date = '''document.body.getElementsByClassName("g-A-G T4 lUa") [0].value = ""''' self.br.execute_script(rm_date) date_field = 'input[class="g-A-G T4 lUa"]' self.br.find_by_css(date_field).type('{}\t'.format(date)) if time: self.br.execute_script(self.loadjq) loaded = False rm_time = '$(".EKa")[0].value = ""' while not loaded: try: self.br.execute_script(rm_time) except Exception, e: pass else: loaded = True time_field = 'input[class="g-A-G T4 EKa"]' self.br.find_by_css(time_field)[0].type('{}'.format(time)) if desc: set_desc = '''document.body.getElementsByClassName("yd editable") [1].innerHTML = "{}"'''.format(desc) self.br.execute_script(set_desc) invite_btn = self.br.find_by_css('div[guidedhelpid="sharebutton"]') invite_inp = self.br.find_by_css('input[class="i-j-h-G-G"]') invite_btn.click() if not update: # If new entry, invite Public group by default invite_inp.click() invite_inp.type('Public\n') invite_btn.click() while not self.br.is_text_present('Going ('): pass # wait on page load for new event url = self.br.url self.br.quit() return url # return event url
class ToolsTestCaseCsv(LiveServerTestCase): """ A master test to check the behaviour of the new 'auto' fields. Actually only works with gephi format. """ def setUp(self): self.browser = Browser() socket.setdefaulttimeout(30) signup(self, 'bob', '*****@*****.**', 'bob_secret') signin(self, 'bob', 'bob_secret') self.firstGraphName = "bobgraph" self.secondGraphName = "alicegraph" def tearDown(self): logout(self) self.browser.quit() @classmethod def tearDownClass(cls): sleep(10) # It needs some time for close the LiverServerTestCase super(ToolsTestCaseCsv, cls).tearDownClass() def test_graph_export_csv(self): # Create a graph with a auto_user property create_graph(self, self.firstGraphName) create_advanced_schema(self, self.firstGraphName) create_advanced_type(self, self.firstGraphName, "e") create_advanced_data(self) # Create new graph for import the data import_advanced_schema_csv(self, self.firstGraphName, self.secondGraphName) # Data import self.browser.find_by_id('toolsMenu').first.click() self.browser.find_link_by_href('/tools/' + self.secondGraphName + '/import/').first.click() self.browser.find_by_id('csv-radio').first.click() # Change the display field of input to attach the file script = """ $('#files').css('display', ''); """ self.browser.execute_script(script) self.browser.is_text_present('Drop your nodes files here', wait_time=10) # Import the nodes file_path = os.path.join( os.path.abspath(os.path.dirname(__file__)), 'files/csv/bobs-type.csv' ) self.browser.attach_file('file', file_path) self.browser.is_text_present('Nodes files loaded. Loading edges files...', wait_time=10) # Wait until the data is imported self.browser.is_text_present('Now drop your edges files', wait_time=10) # Change the display field of input to attach the file script = """ $('#files2').css('display', ''); """ self.browser.execute_script(script) # Import the relationships file_path = os.path.join( os.path.abspath(os.path.dirname(__file__)), 'files/csv/bobs-rels.csv' ) self.browser.attach_file('file2', file_path) self.browser.is_text_present('Data loaded. Uploading to the server...', wait_time=10) # Wait until the data is imported self.browser.is_text_present('Data uploaded.', wait_time=10) # Check that nodes and relationships are ok self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//a[@class='dataOption list']").first.click() alicegraph = Graph.objects.get(name=self.secondGraphName) alicegraphNodes = alicegraph.nodes.count() spin_assert(lambda: self.assertEqual(3, alicegraph.nodes.count())) spin_assert(lambda: self.assertEqual( 1, alicegraph.relationships.count())) # Add new nodes and relationships and check all is correct self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath( "//a[@class='dataOption new']").first.click() text = self.browser.find_by_id('propertiesTitle').first.value spin_assert(lambda: self.assertEqual(text, 'Properties')) self.browser.find_by_value("Save Bob's type").first.click() text = self.browser.find_by_xpath("//div[@class='pagination']/span[@class='pagination-info']").first.value spin_assert(lambda: self.assertNotEqual( text.find(" elements Bob's type."), -1)) spin_assert(lambda: self.assertEqual( alicegraphNodes + 1, alicegraph.nodes.count())) # Destroy the databases Graph.objects.get(name=self.firstGraphName).destroy() Graph.objects.get(name=self.secondGraphName).destroy()
sei_kana = table.cell(1, 8).value mei_kana = table.cell(1, 9).value tel = table.cell(1, 10).value email_password = table.cell(1, 11).value popaddress = table.cell(1, 12).value buy_time = table.cell(1, 13).value # 查看邮箱邮件数量 email_num1 = get_email_num(email, email_password, popaddress) print fid, u'邮箱内邮件数量:', email_num1 while 1: if now() >= '2015-07-15 15:00:00': b = Browser() # 第一步,点击确定预约 print fid, u'点击预约' b.visit('http://www.baidu.com') b.execute_script("document.location.href='%s'" % url) b.find_by_name('sbmt').first.click() # 第二步,填写email # if b.is_element_present_by_css() print fid, u'填写email' b.find_by_name('mail1').first.fill(email) b.find_by_name('mail2').first.fill(email) b.find_by_name('sbmt').first.click() # 第三步, 确认邮箱,点击确认 b.find_by_name('sbmt').first.click() print u'确认邮箱' while 1: email_num2 = get_email_num(email, email_password, popaddress) print u'邮箱内邮件数量:', email_num2 if email_num2 > email_num1: email_url = get_last_email_url(
class GameInteraction: def __init__(self): self.browser = Browser('chrome') currentFolderPath = os.path.dirname(os.path.abspath(__file__)) self.browser.visit("file:///"+currentFolderPath+"/2048-master/index.html") def getGameState(self): gameState = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] # empty gameState tileContainer = self.browser.find_by_css(".tile-container") allTiles = tileContainer.find_by_css(".tile") sortedTiles = {} classString = ".tile-position-" legalPositions = {} for col in xrange(1,5): for row in xrange(1,5): positionString = classString + str(col) + "-" + str(row) legalPositions[str(col)+"-"+str(row)] = positionString sortedTiles[str(col)+"-"+str(row)] = 0 #fill the sortedTiles map with all the tiles at their legal position (col-row) for pos, classPos in legalPositions.items(): #time.sleep(0.3) #let the web browser catch up currentTilesInPos = tileContainer.find_by_css(classPos) if len(currentTilesInPos) == 1: valueOfPos = currentTilesInPos[0].find_by_css(".tile-inner")[0].value.encode("utf8") try: valueOfPos = int(valueOfPos) sortedTiles[pos] = valueOfPos except: pdb.set_trace() print "[-]: valueOfPos (1 tile):",valueOfPos elif len(currentTilesInPos) == 3: #valueOfPos = currentTilesInPos.find_by_css(".tile-merged")[0].find_by_css(".tile-inner")[0].value.encode("utf8") mergedTiles = tileContainer.find_by_css(classPos+".tile-merged") innerTile = mergedTiles[0].find_by_css(".tile-inner")[0].value.encode("utf8") try: valueOfPos = int(innerTile) sortedTiles[pos] = valueOfPos except: pdb.set_trace() print "[-] mergedTiles:", mergedTiles print "[-] innerTile:", innerTile print "[-] valueOfPos (merged):",valueOfPos else: sortedTiles[pos] = 0 try: for row in range(0,len(gameState)): for col in range(0, len(gameState[row])): tileLocation = str(col+1)+"-"+str(row+1) gameState[row][col] = sortedTiles[tileLocation] except: pdb.set_trace() return np.array(gameState) def getScore(self): score = self.browser.find_by_css(".score-container").value print "[+] score:",score splitScore = score.split("+") return int(splitScore[0]) def move(self, action): if action == "up": self.browser.execute_script("KeyboardInputManager.moveUp()") elif action == "down": self.browser.execute_script("KeyboardInputManager.moveDown()") elif action == "left": self.browser.execute_script("KeyboardInputManager.moveLeft()") elif action == "right": self.browser.execute_script("KeyboardInputManager.moveRight()") else: print "[!] invalid action:", action
url0 = 'https://passport.weibo.cn/signin/login' browser.visit(url0) browser.driver.set_window_size(400, 800) time.sleep(2) username_input = browser.find_by_xpath('//p[@class="input-box"]/input')[0] username_input.type('*****@*****.**') passwd_input = browser.find_by_xpath('//p[@class="input-box"]/input')[1] passwd_input.type('wb199371') btn_login = browser.find_by_id('loginAction') btn_login.click() time.sleep(1) # js = "window.open('https://weibo.cn/search/');" js = "window.open('https://weibo.cn/');" browser.execute_script(js) time.sleep(2) browser.windows[0].close() print("当前操作网页:" + browser.title) """ # 搜索微博关键字 text_serch = browser.find_by_name('keyword') text_input = input('输入查找的关键字') text_serch.fill(text_input) btn_serch = browser.find_by_name('smblog') btn_serch.click() """ # wb_text = input('请输入想要发布的微博')
class DashboardTestCase(LiveServerTestCase): """ These tests check basic functions of Sylva's dashboard. """ def setUp(self): self.browser = Browser() socket.setdefaulttimeout(30) signup(self, "bob", "*****@*****.**", "bob_secret") def tearDown(self): logout(self) self.browser.quit() @classmethod def tearDownClass(cls): sleep(10) # It needs some time for close the LiverServerTestCase super(DashboardTestCase, cls).tearDownClass() def test_dashboard(self): signin(self, "bob", "bob_secret") spin_assert(lambda: self.assertEquals(self.browser.title, "SylvaDB - Dashboard")) text = self.browser.find_by_xpath("//header[@class='global']/h1").first.value spin_assert(lambda: self.assertEqual(text, "Dashboard")) def test_dashboard_new_graph(self): signin(self, "bob", "bob_secret") create_graph(self) Graph.objects.get(name="Bob's graph").destroy() def test_dashboard_graph_preview(self): """ This test, after create a graph with data, checks the Sigma visualization running a simple JavaScript code. This code gets the current instance of Sigma and checks the data with Sylva JavaScript object. """ signin(self, "bob", "bob_secret") create_graph(self) create_schema(self) create_type(self) create_data(self) self.browser.find_link_by_href("/graphs/bobs-graph/").first.click() self.browser.is_element_present_by_id("wait_for_js", 3) js_code = """ var instance = sigma.instances(0); var node = instance.graph.nodes()[0]; sylva.test_node_name = node.properties.Name; """ self.browser.execute_script(js_code) text = self.browser.evaluate_script("sylva.test_node_name") Graph.objects.get(name="Bob's graph").destroy() spin_assert(lambda: self.assertNotEqual(text.find("Bob's node"), -1)) def test_automatic_tour(self): """ Thist test checks that the tour starts automatically after signup, only once. """ self.browser.is_element_present_by_id("wait_for_cookie_tour", 3) signin(self, "bob", "bob_secret") exist = self.browser.is_element_present_by_xpath("//div[@class='joyride-content-wrapper']") spin_assert(lambda: self.assertEqual(exist, True)) self.browser.visit(self.live_server_url + "/dashboard/") exist = self.browser.is_element_present_by_xpath("//div[@class='joyride-content-wrapper']") spin_assert(lambda: self.assertNotEqual(exist, True))
class Session: def __init__(self, browser, user): self.browser = Browser(browser) self.browser.visit('http://jizdenky.studentagency.cz/') self.browser.fill_form({'passwordAccountCode': user['login'], 'password': user['password']}) self.browser.execute_script('window.scrollTo(0, 100)') button = self.browser.find_by_value('Přihlásit').first button.click() self.user = user self.log = logging.getLogger(__name__) def go_search(self): self.browser.visit('http://jizdenky.studentagency.cz/') def search(self, task, date_return=None, is_open=False): self.browser.find_by_id('hp_form_itinerar').first \ .find_by_xpath('div/input[@type="radio"]' )[1 if date_return or is_open else 0].check() for city, i in [(task.from_city, 1), (task.to_city, 2)]: self.browser.find_by_css('input[tabindex="{}"]'.format(i)) \ .first.fill(city) for item in self.browser.find_by_css('.ui-menu-item'): link = item.find_by_tag('a') if link.value.lower() == city.lower(): link.click() break self.browser.fill('departure:dateField', task.date) if date_return: self.browser.fill('returnDeparture:dateField', date_return) if is_open: self.browser.check('returnTicketOpen') self.browser.find_option_by_text('ISIC').first.check() self.browser.find_by_value('Vyhledat').first.click() while self.browser.is_element_not_present_by_css('.left_column', wait_time=1): pass items = self.browser.find_by_css('.left_column') \ .find_by_xpath('div/div/*') connections = [] for item in items: if item.tag_name == 'h2': date_local = item.text.split(' ')[1] elif item.tag_name == 'div' and item.has_class('routeSummary'): assert date_local if date_local != task.date: break connections.append(Connection(item)) return connections def order_time(self, connection): while True: if connection.click(): self.browser dialog = self.browser.find_by_css('[id^=_wicket_window]') if dialog: dialog.first.find_by_tag('button').click() if self.browser.is_element_present_by_id('sumary_lines', wait_time=1): break self.browser.find_by_id('sumary_lines') \ .first.find_by_tag('button') \ .first.click() seats = {} bus = self.browser.find_by_css('.seatsContainer') if bus: for seat in bus.first.find_by_css( '.seatContainer:not([style*=blocked])'): seats[int(seat.find_by_tag('div').first.html[:-1])] = seat else: bus = self.browser.find_by_css('.vehicle') for seat in bus.first.find_by_css('.free, .selected'): seats[int(seat.text[:-1])] = seat return seats def order_seat(self, seat): if not seat.has_class('selected'): seat.click() for fs in self.browser.find_by_css('fieldset.topRoute'): legend = fs.find_by_css('legend') if legend and 'Pojištění' in legend[0].text: for package in fs.find_by_css('.insurancePackageType'): if 'nechci' in package.find_by_tag('label').text: package.find_by_tag('input').click() time.sleep(1) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text reserved = 'Rezervovat' in interaction_type if not reserved: submit.click() time.sleep(1) data = (self.user['first'], self.user['last'], self.user['email'], self.user['phone']) for item, value in zip(self.browser.find_by_id('passengerInfo') .first.find_by_tag('input'), data): item.fill(value) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text assert 'Rezervovat' in interaction_type agreement = self.browser.find_by_css('[name="bottomComponent:termsAgreementCont:termsAgreementCB"]') if agreement: agreement[0].check() time.sleep(1) submit.click() with open('conf.yaml') as f: conf = yaml.load(f) if 'email' in conf: email = conf['email'] while self.browser.is_element_not_present_by_id('ticketPage', wait_time=1): pass msg = MIMEText(self.browser.find_by_id('ticketPage').first.html, 'html') msg['Subject'] = 'SA reservation' msg['From'] = email['from'] msg['To'] = self.user['email'] username = email['username'] password = email['password'] server = smtplib.SMTP(email['server']) server.starttls() server.login(username, b64decode(password).decode()) server.sendmail(msg['From'], msg['To'], msg.as_string()) server.quit()
from splinter import Browser browser = Browser('chrome') #connect to database import pymongo from pymongo import MongoClient client = MongoClient('localhost', 27017) db = client.Singapore_TCM_Database tcm_detail = db['tcm_detail'] tcm_certs = db['tcm_certs'] tcm_work = db['tcm_work'] browser.visit('https://prs.moh.gov.sg/prs/internet/profSearch/showSearchSummaryByName.action?hpe=TCM') browser.execute_script("resubmit()") # the search page is launched def getProfDetail(userid): #find section frist title_type = "Type of Register" title_place = "Primary/Principal Place of Practice" #insert student information count = tcm_detail.count() if(userid<=count): return name = browser.find_by_css('.table-head')[0].text reg_number = browser.find_by_xpath('//table/tbody/tr[1]/td[2]')[0].text qulification = browser.find_by_xpath('//table/tbody/tr[2]/td[2]')[0].text detail = {"_id":userid, "name":name, "qualification":qulification, "reg_number":reg_number} tcm_detail.insert_one(detail) #insert career info
# -*- coding: utf-8 -*- from splinter import Browser import time executable_path = {'executable_path': r'D:\webDriver\chromedriver.exe'} b = Browser('chrome', **executable_path) b.visit('https://www.zhihu.com/people/si-kao-de-yu-di/answers') # time.sleep(15) b.execute_script('window.scrollTo(0,document.body.scrollHeight)') time.sleep(1) b.execute_script('window.scrollTo(0,document.body.scrollHeight)') time.sleep(1) b.execute_script('window.scrollTo(0,document.body.scrollHeight)') htmlCode = b.html print(htmlCode) index = 0 # works! # for element in b.find_by_css(".ContentItem-more"): # try: # element.click() # time.sleep(1) # except Exception: # print(element.value) # the vote button can't be clicked. # b.find_by_css(".VoteButton--up").first.click() # b.find_by_css(".AppHeader-profileAvatar").click() # htmlCode1 = b.html # print(htmlCode1)
# Visit URL browser.visit(URL) browser.fill(username_textbox_name, my_username) browser.fill(password_textbox_name, my_password) # Find and click the 'search' button button = browser.find_by_id(login_button_id) # Interact with elements button.click() # DOM garbage is not my fault if browser.is_text_present(LOGIN_PROOF): element = browser.find_by_css(number_input_field)[0] element.fill(DESTINATION_NUMBER) browser.fill(sms_textarea_field, SMS_MESSAGE) # Enviar button browser.execute_script(modal_javascript_open) # waiting for javascript time.sleep(2) browser.execute_script(modal_javascript_confirm) # waiting for javascript time.sleep(2) # Success message print print "Message \"%s\" sent to %s" %(SMS_MESSAGE, DESTINATION_NUMBER)
def line_login(browser, user_name, password, code): """ lineに自動ログインして、パラメータのカードコードを入力し、チャージする。 チャージした結果を返す。 :param browser:ブラウザインスタンス :param user_name:ログインユーザネーム :param password:ログインパスワード :param code:ギフトカードコード :return:チャージ結果 """ # ログインページを開く browser = Browser('firefox') url = 'https://store.line.me/home/' browser.visit(url) # ログインする login_submit = browser.find_link_by_partial_href('login') if login_submit: login_submit.click() else: html_code = browser.html return { 'code': 4, 'message': "サイト上に問題が発生しました。(サイトがアクセスできない、またはネットが遅すぎる可能性があります。)", 'htmlcode': html_code } username_input_field = browser.find_by_id('id') password_input_field = browser.find_by_id('passwd') login_submit = browser.find_by_value('Login') if username_input_field and password_input_field and login_submit: username_input_field.fill(user_name) password_input_field.fill(password) login_submit.click() else: html_code = browser.html return { 'code': 4, 'message': "サイト上に問題が発生しました。(サイトがアクセスできない、またはネットが遅すぎる可能性があります。)", 'htmlcode': html_code } # ログイン画像認識があるかどうかチェックする #captcha_image_field = browser.find_by_css('img.FnCaptchaImg') #メールアドレスまたパスワードをチェックする login_alert_field = browser.find_by_css('p.mdMN02Txt') if browser.is_element_present_by_css('p.mdMN02Txt'): result = login_alert_field.value if result.find(unicode('The password you have entered is invalid, or you have not registered your email address with LINE.')) != -1: html_code = browser.html return { 'code': 2, 'message': 'メールアドレスまたはパスワードが正しくありません。', 'htmlcode': html_code } # チャージ画面に移動する browser.find_by_text('Charge').click() browser.windows.current = browser.windows[1] browser.find_by_id('70002').click() browser.execute_script("charge(this); return false;") # チャージする code_input_field = browser.find_by_id('FnSerialNumber') code_input_field.fill(code) time.sleep(9000) browser.execute_script("javascript:doCharge(this);return false;") result = browser.find_by_css('p.mdLYR11Txt01').value browser.quit() return result
browser.find_by_id('password').fill( ATD_ETL_CONFIG["ATD_CRIS_REQUEST_PASSWORD"]) browser.find_by_name('_eventId_proceed').click() # Let's begin our data extract request print("Selecting data extract request") browser.find_by_text("Create Data Extract Request").click() wait(10) browser.find_by_text("Continue").click() wait(10) print("Selecting Counties to be Included in the Extract") browser.find_by_css( 'input[ng-value="shareConstants.LOCATION_TYPE_IDS.COUNTY"]').click() browser.execute_script("$(\"div[data-value='105']\").click()") # Travis browser.execute_script("$(\"div[data-value='227']\").click()") # Williamson browser.execute_script("$(\"div[data-value='246']\").click()") # Hays wait(3) browser.find_by_text("Continue").click() wait(10) print("Selecting type IDS PROCESS") browser.find_by_css( 'input[ng-value="shareConstants.DATE_TYPE_IDS.PROCESS"]').click() browser.find_by_id('requestDateProcessBegin').fill(CRIS_EXTRACT_DATE_START) browser.find_by_id('requestDateProcessEnd').fill(CRIS_EXTRACT_DATE_END) browser.find_by_text("Continue").click() wait(10) print("Submit Request")
class DataNodeTestCase(LiveServerTestCase): """ A set of tests to test all interaction related to the creation and deletion of nodes and relationships. Also, we test export the data in two formats: gexf and csv. """ def setUp(self): self.browser = Browser() socket.setdefaulttimeout(30) signup(self, 'bob', '*****@*****.**', 'bob_secret') signin(self, 'bob', 'bob_secret') def tearDown(self): logout(self) self.browser.quit() @classmethod def tearDownClass(cls): sleep(10) # It needs some time for close the LiverServerTestCase super(DataNodeTestCase, cls).tearDownClass() def test_data_node_addition(self): create_graph(self) create_schema(self) create_type(self) create_data(self) # Check the node name self.browser.find_by_xpath("//td[@class='dataList']/a[@class='edit']").first.click() text = self.browser.find_by_id('propertiesTitle').first.value spin_assert(lambda: self.assertEqual(text, 'Properties')) self.browser.find_by_xpath("//span[@class='buttonLinkOption buttonLinkRight']/a").first.click() self.browser.choose('confirm', '1') self.browser.find_by_value('Continue').first.click() text = self.browser.find_by_xpath("//div[@class='indent']/div").first.value Graph.objects.get(name="Bob's graph").destroy() spin_assert(lambda: self.assertEqual(text, 'Nodes: 0')) def test_data_node_addition_rel_add_del(self): create_graph(self) create_schema(self) create_type(self) create_node(self, "Bob") create_node(self, "Alice") # We create a allowed relation js_code = "$('a#schema-link')[0].click();" self.browser.execute_script(js_code) self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill("Bob's rel") self.browser.select('target', '1') self.browser.find_by_id('id_description').fill("This the allowed relationship for Bob's graph") self.browser.find_by_value('Save Type').first.click() spin_assert(lambda: self.assertEqual( self.browser.title, "SylvaDB - Bob's graph")) # We create the link between the nodes self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//td[@class='dataActions']/a[@class='dataOption list']").first.click() self.browser.find_by_xpath("//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath("//li[@class='token-input-input-token']/input").first.fill('Alice') self.browser.is_element_present_by_id("id_user_wait", 5) self.browser.find_by_xpath("//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b").first.click() self.browser.find_by_value("Save Bob's type").first.click() # Delete the relationship self.browser.find_by_xpath("//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath("//span[@class='all-relationships incoming-relationships i_bobs_rel1-relationships']//a[@class='delete-row initial-form floating']").first.click() self.browser.find_by_value("Save Bob's type").first.click() self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath("//div[@class='flags-block']/span[@class='graph-relationships']").first.value spin_assert(lambda: self.assertEqual(text, "0 relationships")) Graph.objects.get(name="Bob's graph").destroy() def test_node_type_deletion_keeping_nodes(self): create_graph(self) create_schema(self) create_type(self) # Adding relationship to the type self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill("Bob's rel") self.browser.select('target', '1') self.browser.find_by_id('id_description').fill( 'The loved relationship') self.browser.find_by_value('Save Type').first.click() text = self.browser.find_by_xpath( "//div[@class='form-row indent']/label").first.value spin_assert(lambda: self.assertNotEqual(text.find("Bob's rel"), -1)) # Creating nodes create_node(self, 'Bob') create_node(self, 'Alice') # Creating relationship between nodes self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//td[@class='dataActions']/a[@class='dataOption list']").first.click() self.browser.find_by_xpath("//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath("//li[@class='token-input-input-token']/input").first.fill('Alice') self.browser.is_element_present_by_id("id_user_wait", wait_time=5) self.browser.find_by_xpath("//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b").first.click() self.browser.find_by_value("Save Bob's type").first.click() self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath("//div[@class='flags-block']/span[@class='graph-relationships']").first.value spin_assert(lambda: self.assertEqual(text, "1 relationships")) # Deleting type js_code = "$('a#schema-link')[0].click();" self.browser.execute_script(js_code) self.browser.find_by_xpath("//fieldset[@class='module aligned wide model']/h2/a").first.click() self.browser.find_by_xpath("//span[@class='buttonLinkOption buttonLinkRight']/a[@class='delete']").first.click() text = self.browser.find_by_xpath( "//p/label[@for='id_option_0']").first.value spin_assert(lambda: self.assertNotEqual(text.find( "We found some elements of this type"), -1)) # Keeping nodes self.browser.choose('option', 'no') self.browser.find_by_value('Continue').first.click() text = self.browser.find_by_xpath( "//div[@class='body-inside']/p").first.value spin_assert(lambda: self.assertEqual( text, 'There are no types defined yet.')) # Checking self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath("//div[@class='flags-block']/span[@class='graph-nodes']").first.value spin_assert(lambda: self.assertEqual(text, "2 nodes")) text = self.browser.find_by_xpath("//div[@class='flags-block']/span[@class='graph-relationships']").first.value spin_assert(lambda: self.assertEqual(text, "1 relationships")) text = self.browser.find_by_xpath( "//div[@class='graph-empty-message']").first.value spin_assert(lambda: self.assertNotEqual( text.find("Your Schema is empty."), -1)) Graph.objects.get(name="Bob's graph").destroy() def test_node_type_deletion_deleting_nodes(self): create_graph(self) create_schema(self) create_type(self) # Adding relationship to the type self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill("Bob's rel") self.browser.select('target', '1') self.browser.find_by_id('id_description').fill( 'The loved relationship') self.browser.find_by_value('Save Type').first.click() text = self.browser.find_by_xpath( "//div[@class='form-row indent']/label").first.value spin_assert(lambda: self.assertNotEqual(text.find("Bob's rel"), -1)) # Creating nodes create_node(self, 'Bob') create_node(self, 'Alice') # Creating relationship between nodes self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//td[@class='dataActions']/a[@class='dataOption list']").first.click() self.browser.find_by_xpath("//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath("//li[@class='token-input-input-token']/input").first.fill('Alice') self.browser.is_element_present_by_id("id_user_wait", wait_time=5) self.browser.find_by_xpath("//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b").first.click() self.browser.find_by_value("Save Bob's type").first.click() self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath("//div[@class='flags-block']/span[@class='graph-relationships']").first.value spin_assert(lambda: self.assertEqual(text, "1 relationships")) # Deleting type js_code = "$('a#schema-link')[0].click();" self.browser.execute_script(js_code) self.browser.find_by_xpath("//fieldset[@class='module aligned wide model']/h2/a").first.click() self.browser.find_by_xpath("//span[@class='buttonLinkOption buttonLinkRight']/a[@class='delete']").first.click() text = self.browser.find_by_xpath( "//p/label[@for='id_option_0']").first.value spin_assert(lambda: self.assertNotEqual(text.find( "We found some elements of this type"), -1)) # Deleting nodes self.browser.choose('option', 'de') self.browser.find_by_value('Continue').first.click() text = self.browser.find_by_xpath( "//div[@class='body-inside']/p").first.value spin_assert(lambda: self.assertEqual( text, 'There are no types defined yet.')) # Checking self.browser.find_link_by_href('/graphs/bobs-graph/').first.click() text = self.browser.find_by_xpath("//div[@class='flags-block']/span[@class='graph-nodes']").first.value spin_assert(lambda: self.assertEqual(text, "0 nodes")) text = self.browser.find_by_xpath("//div[@class='flags-block']/span[@class='graph-relationships']").first.value spin_assert(lambda: self.assertEqual(text, "0 relationships")) Graph.objects.get(name="Bob's graph").destroy() def test_data_node_clone(self): create_graph(self) create_schema(self) create_type(self) create_data(self) original_name = self.browser.find_by_xpath("//table[@id='content_table']/tbody/tr/td")[1].value # Clone the node self.browser.find_by_xpath("//table[@id='content_table']/tbody/tr/td/a[@class='edit']").first.click() self.browser.find_by_name('Name').first.fill(original_name + " clone") self.browser.find_by_name("as-new").first.click() # Check that two nodes exist original_name = self.browser.find_by_xpath("//table[@id='content_table']/tbody/tr/td")[1].value clone_name = self.browser.find_by_xpath("//table[@id='content_table']/tbody/tr/td")[4].value spin_assert(lambda: self.assertEqual(original_name, "Bob's node")) spin_assert(lambda: self.assertEqual(clone_name, "Bob's node clone")) Graph.objects.get(name="Bob's graph").destroy() def test_sigma_visualization_in_node_view(self): create_graph(self) create_schema(self) create_type(self) # Adding relationship to the type self.browser.find_by_id('allowedRelations').first.click() self.browser.select('source', '1') self.browser.find_by_name('name').fill("Bob's rel") self.browser.select('target', '1') self.browser.find_by_id('id_description').fill( 'The loved relationship') self.browser.find_by_value('Save Type').first.click() text = self.browser.find_by_xpath( "//div[@class='form-row indent']/label").first.value spin_assert(lambda: self.assertNotEqual(text.find("Bob's rel"), -1)) # Creating nodes create_node(self, 'Bob') create_node(self, 'Alice') # Creating relationship between nodes self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath("//td[@class='dataActions']/a[@class='dataOption list']").first.click() self.browser.find_by_xpath("//td[@class='dataList']/a[@class='edit']").first.click() self.browser.find_by_xpath("//li[@class='token-input-input-token']/input").first.fill('Alice') self.browser.is_element_present_by_id("id_user_wait", wait_time=5) self.browser.find_by_xpath("//div[@class='token-input-dropdown']//li[@class='token-input-dropdown-item2 token-input-selected-dropdown-item']/b").first.click() self.browser.find_by_value("Save Bob's type").first.click() # Checking self.browser.find_by_xpath("//table[@id='content_table']/tbody/tr/td/a[@title='View node']/p[text()='Alice']").first.click() self.browser.is_element_present_by_id('wait_for_js', 3) js_code = ''' var instance = sigma.instances(0); sylva.test_node_count = instance.graph.nodes().length; ''' self.browser.execute_script(js_code) text = self.browser.evaluate_script('sylva.test_node_count') spin_assert(lambda: self.assertEqual(text, 2)) Graph.objects.get(name="Bob's graph").destroy() def test_graph_export_gexf(self): create_graph(self) create_schema(self) create_type(self) create_data(self) self.browser.find_by_id('toolsMenu').first.click() cookies = {self.browser.cookies.all()[0]["name"]: self.browser.cookies.all()[0]["value"], self.browser.cookies.all()[1]["name"]: self.browser.cookies.all()[1]["value"]} result = requests.get(self.live_server_url + '/tools/bobs-graph/export/gexf/', cookies=cookies) spin_assert(lambda: self.assertEqual( result.headers['content-type'], 'application/xml')) spin_assert(lambda: self.assertEqual( self.browser.status_code.is_success(), True)) fw = open('sylva/sylva/tests/files/bobs-graph.gexf', 'w') fw.write(result.content) fw.close() f = open('sylva/sylva/tests/files/bobs-graph.gexf') xmlFile = "" for line in f: xmlFile += line f.close() spin_assert(lambda: self.assertEqual(xmlFile, result.content)) Graph.objects.get(name="Bob's graph").destroy() def test_graph_export_csv(self): create_graph(self) create_schema(self) create_type(self) create_data(self) self.browser.find_by_id('toolsMenu').first.click() cookies = {self.browser.cookies.all()[0]["name"]: self.browser.cookies.all()[0]["value"], self.browser.cookies.all()[1]["name"]: self.browser.cookies.all()[1]["value"]} result = requests.get(self.live_server_url + '/tools/bobs-graph/export/csv/', cookies=cookies) spin_assert(lambda: self.assertEqual( result.headers['content-type'], 'application/zip')) spin_assert(lambda: self.assertEqual( self.browser.status_code.is_success(), True)) test_file = StringIO(result.content) csv_zip = ZipFile(test_file) for name in csv_zip.namelist(): fw = open('sylva/sylva/tests/files/' + name, 'w') fw.write(csv_zip.read(name)) fw.close() for name in csv_zip.namelist(): f = open('sylva/sylva/tests/files/' + name) csvFile = "" for line in f: csvFile += line f.close() spin_assert(lambda: self.assertEqual(csv_zip.read(name), csvFile)) Graph.objects.get(name="Bob's graph").destroy()
class BaseWebTestCase(LiveServerTestCase): """ Abstract class to handle logic for web tests """ username = '******' password = '******' wait_seconds = 3.0 def setUp(self): chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--window-size=1920,1080") chrome_options.add_argument("--start-maximized") chrome_options.add_argument("--no-sandbox") self.browser = Browser('chrome', headless=False, wait_time=30, options=chrome_options) self.browser.driver.set_page_load_timeout(240) super(BaseWebTestCase, self).setUp() def tearDown(self): self.browser.quit() super(BaseWebTestCase, self).tearDown() def _post_teardown(self): try: super(BaseWebTestCase, self)._post_teardown() except Exception as e: import traceback traceback.print_exc() print("Ignoring exception in post-teardown") def _wait(self): time.sleep(self.wait_seconds) def _login(self): self._visit("") self.browser.fill('username', self.username) self.browser.fill('password', self.password) self.browser.find_by_text('Sign in').first.click() assert self.browser.is_text_present('Admin') # checks that the nav is visible assert not self.browser.is_text_present('Sign in') def _go_home(self): self._visit("/") def _setup_confirm(self): """ First part of work-around to let phantomjs accept confirmation dialogs http://stackoverflow.com/questions/19903146/confirm-alert-window-in-phantom-js """ js_confirm = 'window.confirm = function() { return true }' self.browser.execute_script(js_confirm) def _accept_confirm(self): """ Second part of work-around to let phantomjs accept confirmation dialogs MUST call self._setup_confirm() for this to work """ self.browser.execute_script('return window.confirm') def _visit(self, path): path = self.live_server_url + path self.browser.visit(path) self._wait()
break root = Tk() root.withdraw() dealFilePath = filedialog.askopenfilename() if dealFilePath == '': break dealFileName = ntpath.basename(dealFilePath).replace('.xml','') file = open(dealFilePath, 'r') dealHtml = file.read() file.close() browser.execute_script("""var div = document.getElementsByTagName("form")[0]; div.innerHTML = '""" + dealHtml + """';""") # # bkcenter & counterPartyTypeId # nodeBkcenter = browser.find_by_name('bkCenter') # nodeCounterPartyTypeId = browser.find_by_name('counterPartyTypeId') # if businessType == "GBM": # browser.execute_script("""var div = document.getElementsByName("counterPartyTypeId")[0]; # div.innerHTML = '""" + """<option value="1" selected="selected">Not Applicable</option> # <option value="2">Bank/Bk Equ - Gen/Tier 1</option> # <option value="3">Sovereign - Gen/Tiers 1/2</option> # <option value="4">Bank/Bk Equ - Tier 2</option> # <option value="5">Bank/Bk Equ - Tiers 3/4/5</option> # <option value="6">Sovereign - Tier 3</option> # <option value="7">Sovereign - Tiers 4/5</option>""".replace('\n','\t') + """';""") # browser.execute_script("""var div = document.getElementsByName("bkCenter")[0]; # div.innerHTML = '""" + """<option value="BRAZIL">BRAZIL</option>
args = parser.parse_args() input("Press enter when you are ready with the whatsapp scanner on your phone!\nYou will have 10 secs to scan the QR code.\n") browser = Browser() if(args.n): browser.visit("about:config") script = """ var prefs = Components.classes["@mozilla.org/preferences-service;1"] .getService(Components.interfaces.nsIPrefBranch); prefs.setIntPref("network.proxy.type", 0); """ browser.execute_script(script.format("ProxyIP", "PORT")) url = "https://web.whatsapp.com/" browser.visit(url) # time to login to whatsapp using QR code sleep(args.waittime) # find the chat chat_search = browser.find_by_xpath("/html/body/div[1]/div/div/div[3]/div/div[1]/div/label/div/div[2]").first chat_search.type(args.chat_name) # press the chat browser.find_by_xpath('//*[@title="'+args.chat_name+'"]').click()
def scrape_genre(genre_column, genre_query): browser = Browser('chrome', executable_path="chromedriver", headless=False) top_songs_url = f"https://soundcloud.com/charts/top?genre={genre_query}&country=US" browser.visit(top_songs_url) # Scroll to 30 items time.sleep(2) browser.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;") # Scroll to 40 items time.sleep(2) browser.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;") # Scroll to 50 items time.sleep(2) browser.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;") html = browser.html soup = BeautifulSoup(html, 'html.parser') topChart = soup.find('div', class_='chartTracks lazyLoadingList') songs = topChart.find_all('li', class_='chartTracks__item') songNames = [] chartPosition = [] artists = [] weeklyListenCount = [] allTimeListenCount = [] for song in songs: position = song.find('div',class_ = 'chartTrack__position sc-font-tabular') positionNumber = position.text.strip() chartPosition.append(positionNumber) trackDetails = song.find('div', class_ = 'chartTrack__details') artistName = trackDetails.find(class_='sc-link-light') names = artistName.text.strip() cleanedNames = names.replace('\x7f',"") artists.append(cleanedNames) weeklyPlayHiddenScore = song.find(class_='sc-visuallyhidden') if weeklyPlayHiddenScore: weeklyPlayAmount = weeklyPlayHiddenScore.text.strip() weeklyListenCount.append(weeklyPlayAmount) else: weeklyListenCount.append(0) songName = song.find(class_="sc-link-dark") nameText = songName.text.strip() songNames.append(nameText) allTimePlayHiddenScore = song.find(class_='chartTrack__scoreAllPlays') if allTimePlayHiddenScore: allPlayHiddenScore = allTimePlayHiddenScore.find(class_='sc-visuallyhidden') allPlayAmount = allPlayHiddenScore.text.strip() allTimeListenCount.append(allPlayAmount) else: allTimeListenCount.append(0) MusicDataFrame = pd.DataFrame(list(zip(chartPosition,songNames,artists,weeklyListenCount,allTimeListenCount)), columns=['Chart Position', 'Song Name', 'Artist', 'Weekly Play Count', 'All Time Play Count']) MusicDataFrame.set_index('Chart Position', inplace=True) MusicDataFrame['Genre'] = genre_column return MusicDataFrame
class ToolsTestCaseCsv(LiveServerTestCase): """ A master test to check the behaviour of the new 'auto' fields. Actually only works with gephi format. """ def setUp(self): self.browser = Browser() socket.setdefaulttimeout(30) signup(self, 'bob', '*****@*****.**', 'bob_secret') signin(self, 'bob', 'bob_secret') self.firstGraphName = "bobgraph" self.secondGraphName = "alicegraph" def tearDown(self): logout(self) self.browser.quit() @classmethod def tearDownClass(cls): sleep(10) # It needs some time for close the LiverServerTestCase super(ToolsTestCaseCsv, cls).tearDownClass() def test_graph_export_csv(self): # Create a graph with a auto_user property create_graph(self, self.firstGraphName) create_advanced_schema(self, self.firstGraphName) create_advanced_type(self, self.firstGraphName, "e") create_advanced_data(self) # Create new graph for import the data import_advanced_schema_csv(self, self.firstGraphName, self.secondGraphName) # Data import self.browser.find_by_id('toolsMenu').first.click() self.browser.find_link_by_href('/tools/' + self.secondGraphName + '/import/').first.click() self.browser.find_by_id('csv-radio').first.click() # Change the display field of input to attach the file script = """ $('#files').css('display', ''); """ self.browser.execute_script(script) self.browser.is_text_present('Drop your nodes files here', wait_time=10) # Import the nodes file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'files/csv/bobs-type.csv') self.browser.attach_file('file', file_path) self.browser.is_text_present( 'Nodes files loaded. Loading edges files...', wait_time=10) # Wait until the data is imported self.browser.is_text_present('Now drop your edges files', wait_time=10) # Change the display field of input to attach the file script = """ $('#files2').css('display', ''); """ self.browser.execute_script(script) # Import the relationships file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'files/csv/bobs-rels.csv') self.browser.attach_file('file2', file_path) self.browser.is_text_present('Data loaded. Uploading to the server...', wait_time=10) # Wait until the data is imported self.browser.is_text_present('Data uploaded.', wait_time=10) # Check that nodes and relationships are ok self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath( "//a[@class='dataOption list']").first.click() alicegraph = Graph.objects.get(name=self.secondGraphName) alicegraphNodes = alicegraph.nodes.count() spin_assert(lambda: self.assertEqual(3, alicegraph.nodes.count())) spin_assert( lambda: self.assertEqual(1, alicegraph.relationships.count())) # Add new nodes and relationships and check all is correct self.browser.find_by_id('dataMenu').first.click() self.browser.find_by_xpath( "//a[@class='dataOption new']").first.click() text = self.browser.find_by_id('propertiesTitle').first.value spin_assert(lambda: self.assertEqual(text, 'Properties')) self.browser.find_by_value("Save Bob's type").first.click() text = self.browser.find_by_xpath( "//div[@class='pagination']/span[@class='pagination-info']" ).first.value spin_assert(lambda: self.assertNotEqual( text.find(" elements Bob's type."), -1)) spin_assert(lambda: self.assertEqual(alicegraphNodes + 1, alicegraph.nodes.count())) # Destroy the databases Graph.objects.get(name=self.firstGraphName).destroy() Graph.objects.get(name=self.secondGraphName).destroy()
class Compass: def __init__(self, username='', password='', outdir=''): self._username = username self._password = password self._outdir = outdir self._browser = None self._record = None def quit(self): if self._browser: self._browser.quit() self._browser = None def loggin(self): prefs = { "browser.download.folderList": 2, "browser.download.manager.showWhenStarting": False, "browser.download.dir": self._outdir, "browser.helperApps.neverAsk.saveToDisk": "application/octet-stream,application/msexcel,application/csv"} self._browser = Browser('chrome') #, profile_preferences=prefs) self._browser.visit('https://compass.scouts.org.uk/login/User/Login') self._browser.fill('EM', self._username) self._browser.fill('PW', self._password) time.sleep(1) self._browser.find_by_text('Log in').first.click() # Look for the Role selection menu and select my Group Admin role. self._browser.is_element_present_by_name( 'ctl00$UserTitleMenu$cboUCRoles', wait_time=30) self._browser.select('ctl00$UserTitleMenu$cboUCRoles', '1253644') time.sleep(1) def wait_then_click_xpath(self, xpath, wait_time=30, frame=None): frame = self._browser if frame is None else frame while True: try: if frame.is_element_present_by_xpath(xpath, wait_time=wait_time): frame.find_by_xpath(xpath).click() break else: log.warning("Timeout expired waiting for {}".format(xpath)) time.sleep(1) except: log.warning("Caught exception: ", exc_info=True) def wait_then_click_text(self, text, wait_time=30, frame=None): frame = self._browser if frame is None else frame while True: if frame.is_text_present(text, wait_time=wait_time): frame.click_link_by_text(text) break else: log.warning("Timeout expired waiting for {}".format(text)) def adult_training(self): self.home() # Navigate to training page a show all records. self.wait_then_click_text('Training') time.sleep(1) self.wait_then_click_text('Adult Training') time.sleep(1) self.wait_then_click_xpath('//*[@id="bn_p1_search"]') def home(self): # Click the logo to take us to the top self.wait_then_click_xpath('//*[@alt="Compass Logo"]') time.sleep(1) def search(self): self.home() # Click search button self.wait_then_click_xpath('//*[@id="mn_SB"]') time.sleep(1) # Click "Find Member(s)" self.wait_then_click_xpath('//*[@id="mn_MS"]') time.sleep(1) # Navigate to training page a show all records. with self._browser.get_iframe('popup_iframe') as i: self.wait_then_click_xpath('//*[@id="LBTN2"]', frame=i) time.sleep(1) self.wait_then_click_xpath('//*[@class="popup_footer_right_div"]/a', frame=i) time.sleep(1) def lookup_member(self, member_number): self.home() # Click search button self.wait_then_click_xpath('//*[@id="mn_SB"]') time.sleep(1) xpath = '//*[@id="CNLookup2"]' while True: try: if self._browser.is_element_present_by_xpath(xpath, wait_time=30): self._browser.find_by_xpath(xpath).fill(member_number) break else: log.warning("Timeout expired waiting for {}".format(xpath)) time.sleep(1) except: log.warning("Caught exception: ", exc_info=True) self.wait_then_click_xpath('//*[@id="mn_QS"]') def fetch_table(self, table_id): parser = etree.HTMLParser() def columns(row): return ["".join(_.itertext()) for _ in etree.parse(StringIO(row.html), parser).findall('/*/td')] def headers(row): return ["".join(_.itertext()) for _ in etree.parse(StringIO(row.html), parser).findall('/*/td')] headers_xpath = '//*[@id ="{}"]/thead/*'.format(table_id) table_xpath = '//*[@id ="{}"]/tbody/tr[not(@style="display: none;")]'.format(table_id) if self._browser.is_element_present_by_xpath(table_xpath, wait_time=5): headings = [headers(row) for row in self._browser.find_by_xpath(headers_xpath)][0] records = [columns(row) for row in self._browser.find_by_xpath(table_xpath)] # Extend the length of each row to the same length as the columns records = [row+([None] * (len(headings)-len(row))) for row in records] # And add dummy columns if we do not have enough headings headings = headings + ["dummy{}".format(_) for _ in range(0,len(records[0]) - len(headings))] return pd.DataFrame.from_records(records, columns=headings) log.warning("Failed to find table {}".format(table_id)) return None def member_training_record(self, member_number, member_name): self.lookup_member(member_number) # Select Training record self.wait_then_click_xpath('//*[@id="LBTN5"]') personal_learning_plans = self.fetch_table('tbl_p5_TrainModules') personal_learning_plans['member'] = member_number personal_learning_plans['name'] = member_name training_record = self.fetch_table('tbl_p5_AllTrainModules') training_record['member'] = member_number training_record['name'] = member_name mandatory_learning = self.fetch_table('tbl_p5_TrainOGL') mandatory_learning['member'] = member_number mandatory_learning['name'] = member_name return personal_learning_plans, personal_learning_plans, mandatory_learning def member_permits(self, member_number, member_name): self.lookup_member(member_number) # Select Permits self.wait_then_click_xpath('//*[@id="LBTN4"]') permits = self.fetch_table('tbl_p4_permits') if permits is not None: permits['member'] = member_number permits['name'] = member_name return permits @lru_cache() def get_all_adult_trainers(self): self.adult_training() return self.fetch_table('tbl_p1_results') @lru_cache() def get_all_group_members(self): self.search() self._browser.is_element_present_by_xpath('//*[@id = "MemberSearch"]/tbody', wait_time=10) time.sleep(1) # Hack to ensure that all of the search results loaded. for i in range(0, 5): self._browser.execute_script( 'document.getElementById("ctl00_main_working_panel_scrollarea").scrollTop = 100000') time.sleep(1) return self.fetch_table('MemberSearch') def export(self, section): # Select the My Scouting link. self._browser.is_text_present('My Scouting', wait_time=30) self._browser.click_link_by_text('My Scouting') # Click the "Group Sections" hotspot. self.wait_then_click_xpath('//*[@id="TR_HIER7"]/h2') # Clink the link that shows the number of members in the section. # This is the one bit that is section specific. # We might be able to match on the Section name in the list, # which would make it more robust but at present we just hard # the location in the list. section_map = { 'garrick': 2, 'paget': 3, 'swinfen': 4, 'brown': 4, 'maclean': 5, 'rowallan': 6, 'somers': 7, 'boswell': 8, 'erasmus': 9, 'johnson': 10 } self.wait_then_click_xpath( '//*[@id="TR_HIER7_TBL"]/tbody/tr[{}]/td[4]/a'.format( section_map[section.lower()] )) # Click on the Export button. self.wait_then_click_xpath('//*[@id="bnExport"]') # Click to say that we want a CSV output. self.wait_then_click_xpath( '//*[@id="tbl_hdv"]/div/table/tbody/tr[2]/td[2]/input') time.sleep(2) # Click to say that we want all fields. self.wait_then_click_xpath('//*[@id="bnOK"]') download_path = os.path.join(self._outdir, 'CompassExport.csv') if os.path.exists(download_path): log.warn("Removing stale download file.") os.remove(download_path) # Click the warning. self.wait_then_click_xpath('//*[@id="bnAlertOK"]') # Browser will now download the csv file into outdir. It will be called # CompassExport. # Wait for file. timeout = 30 while not os.path.exists(download_path): time.sleep(1) timeout -= 1 if timeout <= 0: log.warn("Timeout waiting for {} export to download.".fomat( section )) break # rename download file. os.rename(download_path, os.path.join(self._outdir, '{}.csv'.format(section))) log.info("Completed download for {}.".format(section)) # Draw breath time.sleep(1) def load_from_dir(self): # Load the records form the set of files in self._outdir. log.debug('Loading from {}'.format(self._outdir)) def get_section(path, section): df = pd.read_csv(path, dtype=object, sep=',') df['section'] = section df['forenames_l'] = [_.lower().strip() for _ in df['forenames']] df['surname_l'] = [_.lower().strip() for _ in df['surname']] return df self._records = pd.DataFrame().append( [get_section(os.path.join(self._outdir, section), os.path.splitext(section)[0]) for section in os.listdir(self._outdir)], ignore_index=True) def find_by_name(self, firstname, lastname, section_wanted=None, ignore_second_name=True): """Return list of matching records.""" recs = self._records if ignore_second_name: df = recs[ (recs.forenames_l.str.lower().str.match( '^{}.*$'.format(firstname.strip(' ')[0].lower().strip()))) & (recs.surname_l == lastname.lower().strip())] else: df = recs[(recs.forenames_l == firstname.lower().strip()) & (recs.surname_l == lastname.lower().strip())] if section_wanted is not None: df = df[(df['section'] == section_wanted)] return df def sections(self): "Return a list of the sections for which we have data." return self._records['section'].unique() def all_yp_members_dict(self): return {s: members for s, members in self._records.groupby('section')} def section_all_members(self, section): return [m for i, m in self._records[ self._records['section'] == section].iterrows()] def section_yp_members_without_leaders(self, section): return [m for i, m in self._records[ (self._records['section'] == section) & (self._records['role'].isin( ['Beaver Scout', 'Cub Scout', 'Scout']))].iterrows()] def members_with_multiple_membership_numbers(self): return [member for s, member in self._records.groupby( ['forenames', 'surname']).filter( lambda x: len(x['membership_number'].unique()) > 1).groupby( ['forenames', 'surname', 'membership_number'])]
def get_flights(origin_airport_code, destination_airport_code, departure_date_str): browser = Browser('phantomjs') browser.visit('https://www.southwest.com/') booking_button = browser.find_by_id('booking-form--flight-tab')[0] booking_button.click() #if return_date: # browser.choose('twoWayTrip','true') #else: browser.choose('twoWayTrip', 'false') #departure_date_str = departure_date.strftime("%m/%d/%y") # works better with the date selected first... no idea why. browser.execute_script( "document.getElementsByName('outboundDateString')[0].type = 'visible'") time.sleep(2) browser.fill('originAirport', origin_airport_code) browser.fill('destinationAirport', destination_airport_code) browser.execute_script( "document.getElementsByName('outboundDateString')[0].type = 'visible'") browser.fill('outboundDateString', departure_date_str) submit_button = browser.find_by_id('jb-booking-form-submit-button')[0] submit_button.click() flights_DOM_table = browser.find_by_css('.bugTableRow') flights_table = [] for flight_DOM in flights_DOM_table: depart_time = flight_DOM.find_by_css('.depart_column .time').text depart_time = depart_time.zfill(5) depart_am_pm = flight_DOM.find_by_css('.depart_column .indicator').text duration = parse_duration(flight_DOM.find_by_css('.duration').text) depart_str = departure_date_str + ", " + depart_time + depart_am_pm departure = datetime.datetime.strptime(depart_str, "%m/%d/%y, %I:%M%p") arrival = departure + duration #arrive_time = flight_DOM.find_by_css('.arrive_column .time').text #arrive_am_pm = flight_DOM.find_by_css('.arrive_column .indicator').text flight_nums = flight_DOM.find_by_css( '.bugLinkText') # could be a few of these f = [] for num in flight_nums: f.append(num.text[0:-14]) routing = flight_DOM.find_by_css('.bugLinkRouting').text[0:-14] if len(f) > 1: routing += " - " + flight_DOM.find_by_css( '.search-results--flight-stops').text box = flight_DOM.find_by_css('.price_column')[ 2] # only the wanna get away #check if sold out, unavailable or available price = None try: price = box.find_by_css('label.product_price')[0].text[ 1:] #strips the currency symbol except splinter.exceptions.ElementDoesNotExist: pass try: price = box.find_by_css('.insufficientInventory')[0].text.strip() except splinter.exceptions.ElementDoesNotExist: pass try: price = box.find_by_css('.unqualifiedForAnyFare')[0].text.strip() except: pass flight = (origin_airport_code, destination_airport_code, departure, arrival, tuple(f), routing, price) flights_table.append(flight) return flights_table
with open('options.dump', 'rb') as dmp: lastoptvalues = pickle.load(dmp) lastoptkeys = ["county", "municipality", "workarea", "profession", "detailedprof"] lastoptions = dict(zip(lastoptkeys, lastoptvalues)) print "Restoring last used options..." browser = Browser('phantomjs', user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/600.5.15 (KHTML, like Gecko) Version/8.0.5 Safari/600.5.15)" browser.driver.set_window_size(1366, 768) url = "http://www.arbetsformedlingen.se" browser.visit(url) browser.find_by_css('.sidhuvud .btn-login, .sidhuvud-globalmeny-sok .btn-group > .btn, .sidhuvud-globalmeny-sok .btn-group > .btn:first-child, .sidhuvud-globalmeny-sok .btn-group > .btn:last-child').click() browser.find_by_css('.inloggnings-tabbar .nav-tabs li:nth-child(2) a ').click() browser.fill("user", "*****@*****.**") browser.fill("password",'Ejc2Tc6') browser.execute_script('document.forms["konto"].submit()') # locating the searchform time.sleep(5) # waiing for the page to load try: browser.find_by_css('#mainTabMenu ul li:nth-child(1) a').click() browser.find_by_css('.sv-text-portlet-content p.flikmeny a').click() browser.find_by_css('.linkBold:nth-child(1)').click() searchurl = browser.driver.current_url.replace("https", "http") # saving the url to the searchform for later except AttributeError: time.sleep(5) # wait a little longer countyoptions = [c.value for c in browser.find_by_name('selectlistLan').find_by_tag('option')[1:]]
class TwitterLiker(): # constructor def __init__(self): self.mUrl = "https://www.twitter.com/" self.cycles = 2 self.browser = Browser() self.username = "******" self.pw = 'XXXXXXXXXX\r' self.totalLikes = 0 self.userNameField = 'session[username_or_email]' self.passwordField = 'session[password]' self.loginButtonId = 'submit btn primary-btn js-submit' # scroll the page and # do the liking def launchPage(self): self.browser.visit(self.mUrl) self.login() # self.scrollBy() for i in range(0, self.cycles): self.likePosts() print(str(self.totalLikes) + " total likes this session...Yay!") def login(self): print("login") print("logging in as " + self.username) self.browser.click_link_by_text('Log in') # time.sleep(1) assert self.browser.find_by_name(self.userNameField) self.browser.fill(self.userNameField, self.username) self.browser.fill(self.passwordField, self.pw) inputs = self.browser.find_by_tag('input') for foo in inputs: if foo['class'] == self.loginButtonId: foo.click() print('clicked the log in button') # need to sleep a few seconds here time.sleep(3) def likePosts(self): print("liking posts") buttonList = self.browser.find_by_tag('button') time.sleep(2) buttonList = self.browser.find_by_tag('button') likeList = 0 time.sleep(1) for b in buttonList: if 'title="Like"' in b['innerHTML']: #check if it's visible, if not move on if b.visible: b.click() self.totalLikes += 1 likeList += 1 print("just liked " + str(likeList) + " tweets.") self.scrollBy() time.sleep(1) def scrollBy(self): print("scrolling down.") # print( self.browser.execute_script( "window.scrollY" )) self.browser.execute_script( "window.scrollBy(0,30000);" ) time.sleep(2) def boneyard(self): print('boneyard')
def spider(): browser = Browser() browser.visit('http://www.baidu.com') browser.execute_script( "window.location.href = 'http://bf.310v.com/3.html'") time.sleep(10) while True: import config reload(config) soup = BS(browser.html, 'html5lib') table = soup.select('table#idt')[0] a3_trs = table.find_all('tr', class_='a3') a4_trs = table.find_all('tr', class_='a4') a3_trs.extend(a4_trs) for tr in a3_trs: # 没有 style='display: none' if (not tr.has_attr('style')) and tr['id'].find('ad') == -1: time_td_text = tr.find_all('td')[3].get_text() # 比赛时间所在的td match_id = tr['id'] end_score = tr.find_all('td')[5].get_text() middle_score = tr.find_all('td')[7].get_text() match_news = News.objects.filter(match_id=match_id) if match_news: if time_td_text.find(u'完') > -1: for match_new in match_news: match_new.end_score = end_score match_new.middle_score = middle_score match_new.save() if time_td_text.find(u'中') > -1: for match_new in match_news: match_new.middle_score = middle_score match_new.save() if re.match(r'\d+', time_td_text) and int(time_td_text) < config.STATUS_TIME: num1_td = tr.find_all('td')[9] num2_td = tr.find_all('td')[11] yapan1 = num1_td.find_all('div')[0].get_text() yapan2 = num2_td.find_all('div')[0].get_text() daxiaopan1 = num1_td.find_all('div')[1].get_text() daxiaopan2 = num2_td.find_all('div')[1].get_text() tds = tr.find_all('td') ftype = tds[1].find('font').get_text() # 比赛类型 gamestarttime = tds[2].get_text() gamestatus = time_td_text team1 = tds[4].find_all('font')[2].get_text() score = tds[5].get_text() team2 = tds[6].find_all('font')[0].get_text() halfscore = tds[7].get_text() yapanSB = re.sub(r'\s', '', tds[10].find_all('div')[0].text) daxiaopanSB = tds[10].find_all('div')[1].text same_match_sep = datetime.datetime.now( ) - datetime.timedelta(seconds=config.SAME_MATCH_SEP_TIME) matchs = News.objects.filter(score=score).filter(team1=team1).filter( team2=team2).filter(create_time__gte=same_match_sep) # print team1, team2, score, halfscore for each in config.YAPAN: if yapan1 == each.split('-')[0] and yapan2 == each.split('-')[1]: # print each, yapan1, yapan2 if score != '0-0' and halfscore != '0-0' and len(matchs.filter(findex=each)) == 0: try: winsound.PlaySound( 'nokia.wav', winsound.SND_PURGE) except: pass news = News.objects.create( match_type=ftype, game_start_time=gamestarttime, status=gamestatus, team1=team1, team2=team2, half_score=halfscore, score=score, yapan=yapan1 + '-' + yapan2, daxiaopan=daxiaopan1 + '-' + daxiaopan2, findex=each, match_id=match_id, yapanSB=yapanSB, daxiaopanSB=daxiaopanSB) news.save() for each in config.DAXIAOPAN: if daxiaopan1 == each.split('-')[0] and daxiaopan2 == each.split('-')[1]: # print each, daxiaopan1, daxiaopan2 if score != '0-0' and halfscore != '0-0' and len(matchs.filter(findex=each)) == 0: try: winsound.PlaySound( 'nokia.wav', winsound.SND_PURGE) except: pass news = News.objects.create( match_type=ftype, game_start_time=gamestarttime, status=gamestatus, team1=team1, team2=team2, half_score=halfscore, score=score, yapan=yapan1 + '-' + yapan2, daxiaopan=daxiaopan1 + '-' + daxiaopan2, findex=each, match_id=match_id, yapanSB=yapanSB, daxiaopanSB=daxiaopanSB) news.save() time.sleep(config.SPIDER_SEP_TIME)