def login(url, q, p): browser = Browser('chrome') #browser = Browser('webdriver.chrome') # browser = Browser('firefox') browser.visit(url) #time.sleep(5) browser.cookies.add({ 'whatever': 'and ever' }) #https://splinter.readthedocs.io/en/latest/elements-in-the-page.html cookie = browser.cookies.all() print browser.is_element_present_by_xpath('//h1') print cookie #browser.cookies.delete('whatever', 'wherever') # deletes two cookies #browser.cookies.delete() # deletes all cookies #browser.execute_script("$('body').empty()") print browser.evaluate_script("4+4") == 8 #fill in account and password if browser.find_by_id('login_frame'): with browser.get_iframe('login_frame') as frame: frame.find_by_id('switcher_plogin').click() print u'输入账号...' frame.find_by_id('u').fill(q) print u'输入密码...' frame.find_by_id('p').fill(p) print u'尝试登录...' frame.find_by_id('login_button').click() print u'完成登录动作...'
def __enter__(self): browser = Browser() url = BASE_URL + 'login.jsp' browser.visit(url) with browser.get_iframe('base'): browser.fill('loginfoodcoopid', FOOD_COOP_ID) browser.fill('loginnickname', LOGIN_NAME) browser.fill('loginpassword', PASSWORD) browser.find_by_name('').first.click() return browser
def __init__(self, jira_url, email, jira_password): # type: (str, str, str) -> None """Obtain Tempo credentials from JIRA.""" chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') browser = Browser('chrome', options=chrome_options) browser.visit(jira_url) browser.find_by_id('username').fill(email) browser.find_by_id('login-submit').click() wait(lambda: browser.find_by_id('password').visible) browser.find_by_id('password').fill(jira_password) browser.find_by_id('login-submit').click() wait(lambda: 'Dashboard.jspa' in browser.url) browser.visit( jira_url + '/plugins/servlet/ac/is.origo.jira.tempo-plugin/tempo-my-work#!') def get_tempo_frame(): # type: () -> Any for frame in browser.find_by_tag('iframe'): if frame['id'].startswith('is.origo.jira.tempo-plugin'): return frame return None frame = wait(get_tempo_frame) with browser.get_iframe(frame['id']) as iframe: tempo_state = json.loads( iframe.find_by_id('tempo-container')['data-initial-state']) self.session = requests.Session() self.session.cookies = add_dict_to_cookiejar( self.session.cookies, {'tempo_session': tempo_state['tempoSession']}) self.session.headers.update({ 'Tempo-Context': tempo_state['tempoContext'], 'Tempo-Session': tempo_state['tempoSession'], 'Origin': 'https://app.tempo.io', 'Content-Type': 'application/json', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', })
def work(info): browser = Browser('chrome') browser.visit(URL_UCAMPUS) btn_login = browser.find_by_text("登录") btn_login.click() input_username = browser.find_by_name('username') input_password = browser.find_by_name('password') input_username[0].fill(info[0]) input_password[0].fill(info[1]) btn_login = browser.find_by_id("login") btn_login.click() # 由于原网页的 iframe 延迟渲染,需要等待一下才能使用 get_iframe 方法: time.sleep(1) try: with browser.get_iframe('layui-layer-iframe1') as iframe: # 教师账号需要点击span切换tab, 学生账号不需要 # tab = iframe.find_by_text('我是老师') # tab.click() # input_num = iframe.find_by_id('teachenum') # input_name = iframe.find_by_id('teachename') input_num = iframe.find_by_id('studentnum') input_name = iframe.find_by_id('studentname') input_num[0].fill(info[2]) input_name[0].fill(info[3]) # btn_commit = iframe.find_by_id('teachBtn') btn_commit = iframe.find_by_id('studenBtn') btn_commit.click() # !!!!!!!!!!!!!!!!!!!!!!!!! # btn_commit.right_click() msg = 'completed: %s' % str(info) print msg logger.info(msg) time.sleep(1) browser.quit() except Exception: msg = 'error: %s' % str(info) print msg logger.error(msg) time.sleep(1) browser.quit()
def solve_captcha(url): ua = UserAgent() browser = Browser("firefox", user_agent=ua.random) browser.visit(url) try: iframe_name = browser.find_by_xpath("//iframe").first["name"] with browser.get_iframe(iframe_name) as iframe: checkbox = iframe.find_by_xpath( '//*[@id="recaptcha-anchor"]').first if iframe.is_text_present("I'm not a robot"): print("Captcha Loaded") print("Waiting....") rand_sleep(1, 5) print("Mouse Over") checkbox.mouse_over() rand_sleep(0, 1) print("Click") checkbox.click() while not browser.is_text_present("You are verified"): sleep(0.5) submit = browser.find_by_xpath("/html/body/form/button").first submit.click() if "InternalCaptcha" in browser.url: print("Failed") browser.quit() except ElementDoesNotExist: print("Failed") browser.quit() except NoSuchWindowException: print("Browser Closed") exit(1) except WebDriverException: print("Browser Broke") exit(1)
#!/usr/bin/env python3 # -*-coding:utf-8-*- from splinter import Browser import time browser = Browser() url = "http://mail.163.com/" browser.visit(url) time.sleep(2) with browser.get_iframe('x-URS-iframe') as iframe: iframe.find_by_name('email').fill('zyrarter') iframe.find_by_name('password').fill('zyr519328') iframe.find_by_id('dologin').click()
from splinter import Browser //splinter is the module used for manipulating the browser import json //json for encoding the data into the file data=dict() // dictionaries save rollnumber and gpa of the student browser = Browser('firefox') //firefox for life browser.visit('http://www.nitt.edu/prm/ShowResult.htm') //our results page roll=110112000 //starting roll number -1 f=open('gpasem4','w') // opens the file that has to be written into while(roll<110112100): #browser.type('TextBox1',roll) roll=roll+1 with browser.get_iframe('main') as iframe: // the page is being divided into frames . so select the appropriate frame try : iframe.fill('TextBox1',roll) iframe.find_by_name('Button1').click() browser.find_by_xpath('//select[@id="Dt1"]/option[@value="96"]')._element.click() name=iframe.find_by_id('LblName') gpa=iframe.find_by_id('LblGPA') data.update({roll:gpa.value}) //appends the new data to our dictionary print roll, "done" // success pass except : print roll, "skipped" // missing rollnos are skipped pass json.dump(data,f) //store the dictionary in our file in hdd f.close() // very imp . close the file after use.
from splinter import Browser browser = Browser('chrome') browser.visit('http://rmdx.testucampus.unipus.cn') btn_login = browser.find_by_text("登录") btn_login.click() input_username = browser.find_by_name('username') input_password = browser.find_by_name('password') input_username[0].fill('ucptest2115') input_password[0].fill('pass2115') btn_login = browser.find_by_id("login") btn_login.click() # 由于原网页的 iframe 延迟渲染,需要等待一下才能使用 get_iframe 方法: import time time.sleep(1) with browser.get_iframe('layui-layer-iframe1') as iframe: input_num = iframe.find_by_name('num') input_name = iframe.find_by_name('name') input_num[0].fill('1000004') input_name[0].fill(u'赵四') btn_commit = iframe.find_by_id('studenBtn') print btn_commit # btn_commit.click() # !!!!!!!!!!!!!!!!!!!!!!!!! time.sleep(1) browser.quit()
# except HttpResponseError, e: # print "Oops, I failed with the status code %s and reason %s" % (e.status_code, e.reason) # test # Cookies manipulation browser.cookies.add({'whatever': 'and ever'}) # add a cookie browser.cookies.all() # retrieve all cookies browser.cookies.delete('mwahahahaha') # deletes the cookie 'mwahahahaha' browser.cookies.delete('whatever', 'wherever') # deletes two cookies browser.cookies.delete() # deletes all cookies # Frames, alerts and prompts # Using iframes,You can use the get_iframe method and the with statement to interact with iframes. You can pass the # iframe's name, id, or index to get_ifram with browser.get_iframe('iframemodal') as iframe: iframe.do_stuff() # Chrome support for alerts and prompts is new in Splinter 0.4.Only webdrivers (Firefox and Chrome) has support for # alerts and prompts. alert = browser.get_alert() alert.text alert.accept() alert.dismiss() prompt = browser.get_alert() prompt.text prompt.fill_with('text') prompt.accept() prompt.dismiss() # use the with statement to interacte with both alerts and prompts
class KithBot: def __init__(self, **info): self.base_url = "https://kith.com/" self.shop_ext = "collections/" self.checkout_ext = "checkout/" self.info = info def init_browser(self): self.b = Browser('chrome') def find_product(self): try: r = requests.get("{}{}{}".format( self.base_url, self.shop_ext, self.info["category"])).text soup = bs4.BeautifulSoup(r, 'lxml') temp_tuple = [] temp_link = [] for link in soup.find_all("a", class_="product-card__link"): temp_tuple.append((link["href"], link.contents[1].text, link.contents[3].text)) for i in temp_tuple: if i[1] == self.info["product"] and i[2] == self.info["color"]: temp_link.append(i[0]) self.final_link = list( set([x for x in temp_link if temp_link.count(x) == 1]))[0] print(self.final_link) except requests.ConnectionError as e: print("Failed to open url") def visit_site(self): size = '//div[@data-value='+ self.info["size"] + ']' self.b.visit("{}{}".format(self.base_url, str(self.final_link))) self.b.find_by_xpath(size).click() self.b.find_by_name('add').click() self.b.find_by_name('checkout').click() def shipping_func(self): self.b.fill("checkout[email]", self.info["emailfield"]) self.b.fill("checkout[shipping_address][first_name]", self.info["firstName"]) self.b.fill("checkout[shipping_address][last_name]", self.info["lastName"]) self.b.fill("checkout[shipping_address][address1]", self.info["addressfield"]) self.b.fill("checkout[shipping_address][city]", self.info["city"]) self.b.fill("checkout[shipping_address][zip]", self.info["zip"]) self.b.fill("checkout[shipping_address][phone]", self.info["phonefield"]) self.b.find_by_id('continue_button').click() self.b.find_by_id('continue_button').click() def checkout_func(self): id0 = self.b.find_by_xpath("//iframe[@class='card-fields-iframe']")[0]['id'] with self.b.get_iframe(id0) as iframe: num = textwrap.wrap(self.info["number"], 4) iframe.find_by_name("number").type(num[0]) iframe.find_by_name("number").type(num[1]) iframe.find_by_name("number").type(num[2]) iframe.find_by_name("number").type(num[3]) id1 = self.b.find_by_xpath("//iframe[@class='card-fields-iframe']")[1]['id'] with self.b.get_iframe(id1): self.b.fill("name", self.info["nameField"]) id2 = self.b.find_by_xpath("//iframe[@class='card-fields-iframe']")[2]['id'] with self.b.get_iframe(id2) as iframe: num = textwrap.wrap(self.info["expiry"], 2) iframe.find_by_name("expiry").type(num[0]) iframe.find_by_name("expiry").type(num[1]) id3 = self.b.find_by_xpath("//iframe[@class='card-fields-iframe']")[3]['id'] with self.b.get_iframe(id3): self.b.fill("verification_value", self.info["ccv"]) # self.b.find_by_id('continue_button').click() def main(self): self.init_browser() self.find_product() self.visit_site() self.shipping_func() self.checkout_func()
class control_google(): def init_browser(self, email, passwd): self.state = 'good' self.passwd = passwd self.login = email param = { 'chrome.noWebsiteTestingDefaults': True, 'chrome.prefs': { 'profile.default_content_settings': { 'images': 2 }, } } from selenium.webdriver.chrome.options import Options options = Options() #options.add_argument('--allow-running-insecure-content') #options.add_argument('--disable-web-security') #options.add_argument('--disk-cache-dir=/var/www/cake2.2.4/app/tmp/cache/selenium-chrome-cache') #options.add_argument('--no-referrers') #options.add_argument('--window-size=1003,719') #options.add_argument('--proxy-server=localhost:8118') options.add_argument( "'chrome.prefs': {'profile.managed_default_content_settings.images': 2}" ) CHROME = { "browserName": "chrome", "chrome.prefs": { "profile.managed_default_content_settings.images": 2 }, "chrome.switches": ["disable-images"], } self.browser = Browser('chrome', user_agent=useragent) #self.browser = Browser('chrome', user_agent=useragent, desired_capabilities=CHROME) load_page = 'https://accounts.google.com/ServiceLogin?btmpl=mobile_tier2&hl=ru&service=mobile' self.browser.visit(load_page) self.browser.find_by_id('Email').first.fill(email + '@gmail.com') self.browser.find_by_id('Passwd').first.fill(passwd) self.browser.find_by_id('signIn').first.click() def _google_hook(self): if self.browser.is_element_present_by_id('358'): self.browser.find_by_id('358').first.click() if self.browser.is_element_present_by_id('link_dismiss'): try: self.browser.find_by_id('link_dismiss').first.click() except: pass if 'getstarted' in self.browser.url: self.browser.back() if self.browser.is_element_present_by_id('link_dismiss'): self.browser.find_by_id('link_dismiss').first.click() def open_profile(self): print 'Open light version profile' load_page = 'https://plus.google.com/app/basic/%s/about' % self.profile_id self.browser.visit(load_page) def save_profile(self): self.browser.find_by_id('177').first.click() def register_google_plus(self, firstName, lastName): load_page = 'https://plus.google.com/u/0/?gpsrc=ogpy0&tab=XX' self.browser.visit(load_page) self.browser.fill('firstName', firstName) self.browser.fill('lastName', lastName) self.browser.find_by_name('buttonPressed').first.click() self.browser.find_by_id('357').first.click() def get_profile_id(self): load_page = 'https://www.google.com/settings/general-light?ref=/settings/account' self.browser.visit(load_page) if self.browser.is_element_present_by_xpath('//a[@class="CS"]'): profile_link = self.browser.find_by_xpath('//a[@class="CS"]').first link_path = profile_link['href'] return link_path.split('/')[3] else: return False def profile_edit(self, vals): self.open_profile() print 'Click change profile' self.browser.find_by_id('59').first.click() #Confirm mobile rules self._google_hook() self.browser.find_by_name('peWork0').first.fill(vals['company']) self.browser.find_by_name('peWorkTitle0').first.fill(vals['position']) self.browser.find_by_name('peWorkStartYear0').first.fill( vals['year_start']) self.browser.find_by_name('peWorkEndYear0').first.fill( vals['year_stop']) self.browser.find_by_name('peSchool0').first.fill( vals['university_name']) self.browser.find_by_name('peSchoolMajor0').first.fill( vals['field_education_name']) self.browser.find_by_name('peSchoolStartYear0').first.fill( vals['going_to_college_year']) self.browser.find_by_name('peSchoolEndYear0').first.fill( vals['after_graduation_year']) self.browser.find_by_name('pePlaceLived0').first.fill( vals['place_lived']) self.browser.find_by_name('pePlaceLivedIsCurrent').first.check() self.browser.find_by_name('peGender').first.select("1") print 'Done profile_edit' self.save_profile() def change_photo(self, photo_path): self.open_profile() print 'Click change profile' self.browser.find_by_id('59').first.click() print 'Click change photo' self.browser.find_by_id('375').first.click() self.browser.attach_file('photo_upload_file_name', self.photo_path) print 'Done profile_edit' self.browser.find_by_id('314').first.click() self.save_profile() def change_pass(self, old_pass, new_pass): print 'Open password change page' load_page = 'https://accounts.google.com/b/0/EditPasswd?hl=ru' self.browser.visit(load_page) self.browser.find_by_id('OldPasswd').first.fill(old_pass) self.browser.find_by_id('Passwd').first.fill(new_pass) self.browser.find_by_id('PasswdAgain').first.fill(new_pass) self.browser.find_by_id('save').first.click() print 'Done change pass' def open_full_plus(self): 'Print open full Google+' load_page = 'https://plus.google.com/u/0/' self.browser.visit(load_page) def open_full_profile(self): self.open_full_plus() self._google_hook() print 'Click user icon' self.browser.find_by_id('gbi4i').first.click() print 'Click show profile' #self.browser.find_by_id('gbmplp').first.click() self.browser.find_by_xpath( '//a[@class="gbqfb gbiba gbp1"]').first.click() def change_name(self, firstName, lastName): self.open_full_plus() self.open_full_profile() print 'Click change name' time.sleep(5) self.browser.find_by_xpath( '//div[@guidedhelpid="profile_name"]').first.click() print 'Fill values' time.sleep(5) self.browser.find_by_xpath( '//input[@class="l-pR osa g-A-G"]').first.fill(firstName) self.browser.find_by_xpath( '//input[@class="l-oR Ika g-A-G"]').first.fill(lastName) print 'Save results' self.browser.find_by_xpath( '//*[starts-with(@class,"a-f-e c-b c-b-M nVrMHf nZQKMd h019o")]' ).first.click() print 'Confirm' self.browser.find_by_name('ok').first.click() def youtube_hoock(self): if 'ServiceLogin?' in self.browser.url: print 'ServiceLogin? Hook' self.browser.fill('Passwd', self.passwd) self.browser.find_by_name('signIn').first.click() #self.browser.back() if 'create_channel?' in self.browser.url: print 'create_channel? Hook' self.browser.click_link_by_partial_href('create_channel') self.browser.fill('username', self.login) self.browser.find_by_id('channel_submit').click() self.browser.back() self.browser.back() self.browser.back() if 'select_site?' in self.browser.url: print 'select_site? Hook' self.browser.find_by_xpath('//input[@type="submit"]').click() self.browser.back() self.browser.back() if 'switch-profile.g?' in self.browser.url: print 'switch-profile.g? Hook' self.browser.find_by_id('switchButton').click() def youtube_like(self, url): self.browser.visit(url) self.browser.click_link_by_partial_href('action_like=1') self.youtube_hoock(url) self.browser.find_by_name('action_rate').click() def youtube_dislike(self, url): self.browser.visit(url) self.browser.click_link_by_partial_href('action_dislike=1') self.youtube_hoock() self.browser.find_by_name('action_rate').click() def youtube_comment(self, url, comment): self.browser.visit(url) self.browser.click_link_by_partial_href('post_comment') self.youtube_hoock() try: self.browser.click_link_by_partial_href('post_comment') except: pass self.youtube_hoock() self.browser.fill('comment', comment) self.browser.find_by_name('action_comment').click() self.youtube_hoock() def youtube_subscribe(self, chane_name): load_page = 'http://m.youtube.com/user/%s' % chane_name self.browser.visit(load_page) self.browser.find_by_name('submit')[1].click() self.youtube_hoock() try: self.browser.find_by_name('submit')[1].click() except: pass def google_friend_connector(self): #self.browser.click_link_by_partial_href('post_comment') pass def blogspot_follow(self, url): pass def get_capture(self): cap_element = self.browser.find_by_xpath('//img[@width="300"]').first cap_code = recognize_captcha(cap_element['src']) self.browser.fill('recaptcha_response_field', cap_code) def blogspot_post_plus(self, url): self.browser.visit(url) frame_name = self.browser.find_by_xpath( '//*[starts-with(@name,"I0_")]')[0]['name'] print frame_name with self.browser.get_iframe(frame_name) as iframe: # #self.browser.find_by_xpath('//span[@class="hAa Qo Bg"]').first.click() iframe.find_by_xpath('//span[@class="hAa Qo Bg"]').first.click() def blogspot_post(self, url, comment): self.browser.visit(url) with self.browser.get_iframe('comment-editor') as iframe: self.browser.fill('commentBody', comment) iframe.find_by_id('postCommentSubmit').click() self.youtube_hoock() with self.browser.get_iframe('comment-editor') as iframe: if iframe.is_element_present_by_id('recaptcha_image'): self.get_capture() iframe.find_by_id('postCommentSubmit').click() if 'showComment=' in self.browser.url: return True else: return False def google_post_like(self, url): self.browser.visit(url) if not self.browser.is_element_present_by_name('stupop'): self.browser.find_by_id('162').click() return True else: return False def google_post_dislike(self, url): self.browser.visit(url) if self.browser.is_element_present_by_name('stupop'): self.browser.find_by_id('162').click() return True else: return False def google_post_comment(self, url, comment): self.browser.visit(url) self.browser.fill('adcp', comment) self.browser.find_by_id('110').click() def google_post_share(self, url, comment): self.browser.visit(url) self.browser.find_by_id('396').click() self.browser.fill('rpPostMsg', comment) self.browser.find_by_id('253').click() def google_profile_join(self, id): self.browser.visit('https://plus.google.com/app/basic/%s/' % id) self.browser.find_by_id('59').click() self.circle_join() def circle_join(self): self.browser.find_by_name('chcccp')[3].click() self.browser.find_by_id('49').click() self.browser.reload() def google_communities_enter(self, id): self.browser.visit('https://plus.google.com/u/0/communities/%s/' % id) self._google_hook() def google_communities_join(self, id): self.google_communities_enter(id) if self.browser.is_element_present_by_xpath( '//*[starts-with(@class,"a-f-e c-b c-b-La")]'): self.browser.find_by_xpath( '//*[starts-with(@class,"a-f-e c-b c-b-La")]').first.click() def google_communities_post(self, id, mess): print 'Start communities post' self.google_communities_join(id) time.sleep(60) #for i in self.browser.find_by_xpath('//a[@class="FW9qdb Wk"]'): #` print i['oid'] #self.browser.reload() self.browser.find_by_xpath( '//div[@guidedhelpid="sharebox_textarea"]').first.click() self.browser.find_by_xpath('//div[@class="yd editable"]').first.fill( mess) self.browser.find_by_xpath( '//div[@guidedhelpid="sharebutton"]').click() time.sleep(60) self.browser.find_by_xpath('//div[@class="a-n Ph Hw"]').first.click() print '-' * 30 for i in self.browser.find_by_xpath('//a[@class="FW9qdb Wk"]'): print i['oid'] def google_people_suggested(self): self.browser.visit( 'https://plus.google.com/app/basic/people/suggested?') for i in range(10): try: self.browser.find_by_xpath('//a[@class="vfc"]').first.click() self.circle_join() except: self.browser.visit( 'https://plus.google.com/app/basic/people/suggested?') def google_grab_comm_members(self, id, qty): irr_qty = int((qty - 64) / 20.00) + 3 print 'Irr qty= %d' % irr_qty self.browser.visit( 'https://plus.google.com/u/0/communities/%s/members' % id) ret_arr = [] js_del_all_img = """ var images = document.getElementsByTagName('img'); while(images.length > 0) { images[0].parentNode.removeChild(images[0]); } """ for i in range(irr_qty): elem_arr = self.browser.find_by_xpath('//div[@class="ib31if"]') print 'Array len %d' % len(elem_arr) print i print '' elem_arr[len(elem_arr) - 2].right_click() #self.browser.execute_script(js_del_all_img) for elem in elem_arr: oid = elem['oid'] img = self.browser.find_by_xpath('//img[@oid="%s"]' % oid)[0] #print img['src'] if not oid in ret_arr: ret_arr.append(oid) print oid f = open('/tmp/google_oid.txt', 'w') for s in ret_arr: f.write('<item>' + s + '</item>\n') f.close() print 'Grab done' def quit(self): self.browser.quit()
#!/usr/bin/python import time from splinter import Browser browser = Browser('phantomjs') # browser = Browser() browser.visit('file:///home/ethompsy/Projects/spot/html/spotify_test.html') print browser.url print browser.html with browser.get_iframe('player') as iframe: iframe.is_element_present_by_id('play-button', wait_time=10) print iframe.find_by_id('play-button').first iframe.find_by_id('play-button').first.click() time.sleep(5) browser.quit()
class ChopeBrowser: def __init__(self, headless=False): self.chrome = Browser('chrome', headless=headless) def time_delay(self, time): self.chrome.is_element_present_by_name('!@#$%^&*())(*&^%$#@!', wait_time=time) def login(self, usr, pwd, domain='STUDENT'): url = 'https://ntupcb.ntu.edu.sg' url += '/fbscbs/Account/SignIn?ReturnUrl=%2ffbscbs' self.chrome.visit(url) dropdown = self.chrome.find_by_tag('option') for option in dropdown: if option.text == domain: option.click() self.chrome.fill('Username', usr) self.chrome.fill('Password', pwd + '\n') # PC BOOKING STARTS HERE # Tries to book the PC of selected type def pc_setup(self, usr, pwd, Type): self.login(usr, pwd) button = self.chrome.find_by_id('tdPcBook') button.click() time.sleep(2) with self.chrome.get_iframe('frmAdminViewControls') as iframe: iframe.find_by_id('pnlInsLoc3').click() self.type_number(Type) data = self.scrape_pc() can_book = self.book_pc(data[1], data[2]) self.chrome.quit() return data[0], can_book # identify pc type requested def type_number(self, Types): for i in range(0, 4): with self.chrome.get_iframe('frmAdminViewControls') as iframe: page = iframe.find_by_id('pnlInsPcGrp' + str(i)) if page != []: page = page.html page = BeautifulSoup(page, "lxml") page = page.find("span", { "style": "display:inline-block;height:20px;width:80px;" }) page = page.get_text() if page == Types: page = iframe.find_by_id('pnlInsPcGrp' + str(i)).click() return return 0 # Scrape all PC in the current screen def scrape_pc(self): with self.chrome.get_iframe('frmSeating') as iframe: for i in range(0, 6): for j in range(1, 11): btnID = 'grdSeating_tblCol' + str(j) + '_' + str(i) parse = iframe.find_by_id(btnID) if parse == []: return 'no pc', 100, 100 if parse != []: color = self.color(parse.html) if (color == '#FFFFFF'): return self.name_pc(parse.html), j, i no_pc = 'no pc' j = 100 i = 100 return no_pc, j, i # Identify name of PC def name_pc(self, codes): soup = BeautifulSoup(codes, "lxml") mydivs = soup.findAll("span", {"class": "lblPcName"}) return mydivs[0].get_text() # Check availability of PC, by detecting background color def color(self, code): soup = BeautifulSoup(code, "lxml") tag = soup.findAll('td', {"style": "background-color: #FFFFFF"}) if tag != []: return '#FFFFFF' else: return 'blabla' # Try to book the selected PC def book_pc(self, col, row): with self.chrome.get_iframe('frmSeating') as iframe: if (col != 100) and (row != 100): try: time.sleep(1) butt = iframe.find_by_id("grdSeating_divOuterCol" + str(col) + "_" + str(row)) if butt != []: butt.click() time.sleep(1) sub = iframe.find_by_name("btnsumit") sub.click() return "booked" except: pyautogui.press('enter') return "cannot book" return "cannot book" # Initialize booking site until arriving to the booking table def first_setup(self): button = self.chrome.find_by_id('tdFacilityBook') button.click() self.chrome.click_link_by_href('#8') self.chrome.click_link_by_href('#-1') self.chrome.click_link_by_href('/fbscbs/Booking/Create?resourceId=69') self.chrome.click_link_by_id('book') self.chrome.click_link_by_id('changeResource') self.chrome.click_link_by_href('#-1') self.chrome.click_link_by_id('book') # Eliminates unnecessary booking slots def is_registered(event): if event.has_class('noShowWhite'): return False if event.has_class('currentEvent'): return False return True # Adds weekly booked slots for selected facility # Each list of weekly bookings contain list of daily bookings # each containing lists booked slots, determined by start and end time def check_facility(self, evFacilities): columnWeek = self.chrome.find_by_css('.wc-event-column') evWeek = [] for columnDay in columnWeek: evToday = [] evList = columnDay.find_by_css('.ui-corner-all') for event in evList: if not event.has_class('noShowWhite'): if not event.has_class('currentEvent'): event = event.text if not event.find('—') == -1: if event == '': continue evToday.append(event.split('—')) evWeek.append(evToday) evFacilities.append(evWeek) def click_next(self, counter, evFacilities): # Recursively check facilities. # Choose facility based on counter dropdown = self.chrome.find_by_id('ResourceId') options = dropdown.find_by_tag('option') if counter < len(options): nextOption = options[counter] nextOption.click() self.check_facility(counter, evFacilities) else: return evFacilities # Scrape seats main function # OPTIMIZE: by multithreading # and by runnnig multiple browser at once def scrape_seats(self, usr, pwd): self.login(usr, pwd) self.first_setup() evFacilities = [] dropdown = self.chrome.find_by_id('ResourceId') options = dropdown.find_by_tag('option') optRange = range(len(options)) for i in optRange: opt = options[i] nextOption = opt nextOption.click() self.time_delay(0.2) # while loadingTitle.visible: # pass evFacilities.append(opt.text) self.check_facility(evFacilities) self.quit() return evFacilities def quit(self): self.chrome.quit()
# which duo authentication device is going to be used duo_device_num = 0 f = open("discusion.txt", "w", encoding="utf-8") browser = Browser('chrome', executable_path=chromedriverPath) # browser = Browser('chrome', headless=False, executable_path=chromedriverPath) # browser = Browser('firefox',executable_path=geckoPath) # url = "http://www.google.com" browser.visit(url) browser.fill('username', username) browser.fill('password', password) browser.find_by_name('submit').click() with browser.get_iframe('duo_iframe') as iframe: device = iframe.find_by_tag('select').first options = iframe.find_by_tag('option') device.select(options[duo_device_num].value) buts = iframe.find_by_tag('button') for but in buts: if but.visible: but.click() break eles = browser.find_by_css(".message.user_content.enhanced") while eles is None or len(eles) < 3: time.sleep(1) eles = browser.find_by_css(".message.user_content.enhanced") print("waiting duo authentication...") posts = []
from splinter import Browser browser=Browser('firefox') browser.visit('http://www.way2sms.com/content/index.html') browser.find_by_name('username').fill('mob') browser.find_by_name('password').fill('pass') browser.find_by_name('Login').click() browser.find_by_css('.boo.I4').click() browser.find_by_id('quicksms').click() with browser.get_iframe('frame') as iframe: browser.find_by_name('textArea').fill('textttareaarereara') browser.find_by_css('.inp').fill('9710101067') browser.find_by_name('Send').click()
#!/usr/bin/env python3 # -*-coding:utf-8-*- from splinter import Browser import time browser = Browser() url = 'https://mail.qq.com/' browser.visit(url) with browser.get_iframe('login_frame')as iframe: iframe.find_by_id('u').fill('976207292') iframe.find_by_id('p').fill('z976207292') iframe.find_by_id('login_button').click()
if dropbx == True: texturedata["dropbox"] = {"project link": dblink} texturesubmitdata = bldir + name + "/" + name + "_postdata.json" with open(texturesubmitdata, "w") as f: json.dump(texturedata, f, sort_keys=False, indent=4, ensure_ascii=False) # prepare browser for forum post # parse paragraph to it's final form to be injected via javascript. # fill subject line of forum. # test for autosubmit status. bbcodesubmitdata = bldir + name + "/" + name + "_bbctemp.txt" with open(bbcodesubmitdata, "w") as f: f.write(template.bbtemplate(texturedata)) if autosubmit == True: pblogin() browser.fill("subject", nname) with browser.get_iframe(2) as iframe: iframe.execute_script( """ var forumBody = '%s'; document.getElementsByTagName('body')[0].innerHTML = forumBody; """ % (final) ) # test whether or not a new page with the texture name has been submitted before moving on, note that splinter does not automatically press submit at the moment, this will change in the future when the script is close to 100% accurate. pagename = nname + " | Melee HD" pagetitle = browser.title while pagetitle != pagename: pagetitle = browser.title time.sleep(1)
class SplinterBrowserDriver(BaseBrowserDriver): """ This is a BrowserDriver for splinter (http://splinter.cobrateam.info) that implements the BaseBrowserDriver API. To use it, you must have splinter installed on your env. For itself it's a browser driver that supports multiple browsing technologies such as selenium, phantomjs, zope, etc. """ driver_name = 'splinter' def __init__(self): super(SplinterBrowserDriver, self).__init__() if not splinter_available: raise ImportError( "In order to use splinter Base Driver you have to install it. " "Check the instructions at http://splinter.cobrateam.info") self._browser = Browser(config.default_browser) def _handle_empty_element_action(self, element): if not element: raise ActionNotPerformableException( "The action couldn't be perfomed because the element couldn't " "be found; Try checking if your element" "selector is correct and if the page is loaded properly.") @property def page_url(self): return self._browser.url @property def page_source(self): return self._browser.html @property def page_title(self): return self._browser.title def open_url(self, url): self._browser.driver.get(url) def quit(self): return self._browser.quit() def is_element_visible(self, element): return element.visible def get_element_text(self, element): return element.text def get_element_by_xpath(self, selector): return self._browser.find_by_xpath(selector) def get_element_by_css(self, selector): return self._browser.find_by_css(selector) def get_element_by_id(self, selector): return self._browser.find_by_id(selector) def get_element_by_tag(self, selector): return self._browser.find_by_tag(selector) @element_action def type(self, element, text, slowly=False): return element.type(text, slowly) @element_action def fill(self, element, text): return element.fill(text) @element_action def clear(self, element): self.fill(element, '') @element_action def click(self, element): return element.click() @element_action def check(self, element): return element.check() @element_action def uncheck(self, element): return element.uncheck() @element_action def mouse_over(self, element): return element.mouse_over() @element_action def mouse_out(self, element): return element.mouse_out() def reload(self): return self._browser.reload() def go_back(self): return self._browser.back() def go_forward(self): return self._browser.forward() def execute_script(self, script): return self._browser.evaluate_script(script) def get_iframe(self, iframe_id): return self._browser.get_iframe(iframe_id) def get_alert(self): return self._browser.get_alert() def attach_file(self, input_name, file_path): return self._browser.attach_file(input_name, file_path) def wait_pageload(self, timeout=30): wait_interval = 0.05 elapsed = 0 while self.execute_script('document.readyState') != 'complete': self.wait(wait_interval) elapsed += wait_interval if elapsed > timeout: raise PageNotLoadedException def click_and_wait(self, element, timeout=30): self.click(element) self.wait_pageload(timeout)
class MovieCheckin: USERNAME = '******' PASSWORD = '******' CITY = 'Linköping' CITY_ALIAS = 'Filmstaden Linköping' SEEN_MOVIES_FILENAME = 'seen_movies.txt' def __init__(self): self.manager = SFApiManager() self.seen_movies = self.get_seen_movies() print("{} - Starting MovieCheckinManager..".format( datetime.datetime.now())) print("List of Ids for seen movies: {}".format(self.seen_movies)) self.todays_show = None self.scheduler = BlockingScheduler(timezone="Europe/Stockholm") self.scheduler.add_job(self.reset, 'cron', day_of_week='mon-sun', hour=8, minute=0, second=00) self.browser = Browser('firefox') self.scheduler.start() def reset(self): print("{} - Job: time to select a new movie".format( datetime.datetime.now())) self.todays_show = self.select_show() show_length = self.todays_show.get_show_length() self.todays_show_end = self.todays_show.time + timedelta( minutes=show_length + 10) print("Selected movie {} which begins at {} and ends at {}".format( self.todays_show.movie.title, self.todays_show.time, self.todays_show_end)) five_minutes_before_movie_start = self.todays_show.time - timedelta( minutes=5) print("Registering job to run @ {}".format( five_minutes_before_movie_start)) self.scheduler.add_job(self.get_taken_seat, 'date', run_date=five_minutes_before_movie_start, id='seat_job') def get_taken_seat(self): print( "{} - Job: five minutes before show starts, selecting taken seat.." .format(datetime.datetime.now())) self.seat = self.todays_show.get_taken_seat() if self.seat != None: print("Selected taken seat: {}".format(self.seat)) print("Registering job to run @ {}".format(self.todays_show_end)) self.scheduler.add_job(self.register, 'date', run_date=self.todays_show_end, id='register_job') else: "No-one purchased tickets to this show, trying another movie.." self.reset() def get_seen_movies(self): seen_movies_file = open(self.SEEN_MOVIES_FILENAME, 'r') seen_movies = [id.rstrip() for id in seen_movies_file.readlines()] seen_movies_file.close() return seen_movies def add_seen_movie(self, movie_id): self.seen_movies.append(movie_id) print("{} - Adding movie with showId {} to file".format( datetime.datetime.now(), movie_id)) try: seen_movies_file = open(self.SEEN_MOVIES_FILENAME, 'a') seen_movies_file.write("{}\n".format(movie_id)) seen_movies_file.close() except: print("Could not add showId to file!") def get_todays_lkpg_shows(self): date_str = datetime.datetime.today().strftime('%Y-%m-%d') lkpg_ncgid = [ theater.ncg_id for theater in self.manager.get_theaters() if theater.city_alias == "LI" ][0] return self.manager.get_shows(lkpg_ncgid, date_str, date_str) def select_show(self): todays_unseen_shows = [ show for show in self.get_todays_lkpg_shows() if show.movie.ncg_id not in self.seen_movies and show.time > datetime.datetime.now() ] selected_show = random.choice(todays_unseen_shows) selected_show.seats = self.manager.get_seats( selected_show.remote_entity_id) print("Todays movie selected: {}".format(selected_show.movie.title)) print("Ncg-Id: {}".format(selected_show.movie.ncg_id)) print("Id: {}".format(selected_show.remote_entity_id)) print("Starts: {}".format(selected_show.time)) return selected_show def register(self): print("{} - Job: time to register todays show!".format( datetime.datetime.now())) cityStr = self.CITY cinemaStr = self.CITY_ALIAS dateStr = self.todays_show.time.strftime("%Y-%m-%d") timeStr = self.todays_show.time.strftime("%H:%m") roomNr = self.todays_show.remote_entity_id.split("-")[1] seatNr = self.seat[1] rowNr = self.seat[0] movieTitleStr = self.todays_show.movie.title url = "http://www3.sf.se/BIOKLUBBEN/LOGGA-IN/" count = 0 while count < 10: #dunno why one might have to try several times before it works, I blame Geckodriver.. try: self.browser.visit(url) with self.browser.get_iframe("Stack") as iframe: print("Loggar in") iframe.fill('ctl00$ContentPlaceHolder1$LoginNameTextBox', self.USERNAME) iframe.fill('ctl00$ContentPlaceHolder1$PasswordTextBox', self.PASSWORD) iframe.click_link_by_text('Logga in') with self.browser.get_iframe("Stack") as iframe: print("Fyller i uppgifter") iframe.click_link_by_text("Efterregistrering") iframe.select('ctl00$ContentPlaceHolder1$CinemaNameDDL', cinemaStr) iframe.fill( 'ctl00$ContentPlaceHolder1$TransactionDateTextBox', dateStr) iframe.fill('ctl00$ContentPlaceHolder1$RowNbrTextBox', rowNr) iframe.fill('ctl00$ContentPlaceHolder1$ShowTimeTextBox', timeStr) iframe.select('ctl00$ContentPlaceHolder1$CityNameDDL', cityStr) iframe.fill('ctl00$ContentPlaceHolder1$SalonIDTextBox', roomNr) iframe.fill('ctl00$ContentPlaceHolder1$MovieNameTextBox', movieTitleStr) iframe.fill('ctl00$ContentPlaceHolder1$ChairNbrTextBox', seatNr) iframe.click_link_by_text("Skicka") print("{} - Sent registration!".format( datetime.datetime.now())) count = 10 break except: count = count + 1 self.add_seen_movie(self.todays_show.movie.ncg_id)
class SplinterBrowserDriver(BaseBrowserDriver): """ This is a BrowserDriver for splinter (http://splinter.cobrateam.info) that implements the BaseBrowserDriver API. To use it, you must have splinter installed on your env. For itself it's a browser driver that supports multiple browsing technologies such as selenium, phantomjs, zope, etc. """ driver_name = 'splinter' def __init__(self): super(SplinterBrowserDriver, self).__init__() if not splinter_available: raise ImportError( "In order to use splinter Base Driver you have to install it. " "Check the instructions at http://splinter.cobrateam.info") self._browser = Browser(config.default_browser) def _handle_empty_element_action(self, element): if not element: raise ActionNotPerformableException( "The action couldn't be perfomed because the element couldn't " "be found; Try checking if your element" "selector is correct and if the page is loaded properly.") @property def page_url(self): return self._browser.url @property def page_source(self): return self._browser.html @property def page_title(self): return self._browser.title def open_url(self, url): self._browser.driver.get(url) def quit(self): return self._browser.quit() def is_element_visible(self, element): return element.visible def get_element_text(self, element): return element.text def get_element_by_xpath(self, selector): return self._browser.find_by_xpath(selector) def get_element_by_css(self, selector): return self._browser.find_by_css(selector) def get_element_by_id(self, selector): return self._browser.find_by_id(selector) def get_element_by_tag(self, selector): return self._browser.find_by_tag(selector) @element_action def type(self, element, text, slowly=False): return element.type(text, slowly) @element_action def fill(self, element, text): return element.fill(text) @element_action def clear(self, element): self.fill(element, '') @element_action def click(self, element): return element.click() @element_action def check(self, element): return element.check() @element_action def uncheck(self, element): return element.uncheck() @element_action def mouse_over(self, element): return element.mouse_over() @element_action def mouse_out(self, element): return element.mouse_out() def reload(self): return self._browser.reload() def go_back(self): return self._browser.back() def go_forward(self): return self._browser.forward() def execute_script(self, script): """This method is deprecated. Use `execute_javascript` instead. """ return self._browser.evaluate_script(script) def execute_javascript(self, script): return self._browser.evaluate_script(script) def get_iframe(self, iframe_id): return self._browser.get_iframe(iframe_id) def get_alert(self): return self._browser.get_alert() def attach_file(self, input_name, file_path): return self._browser.attach_file(input_name, file_path) def wait_pageload(self, timeout=30): wait_interval = 0.05 elapsed = 0 while self.execute_javascript('document.readyState') != 'complete': self.wait(wait_interval) elapsed += wait_interval if elapsed > timeout: raise PageNotLoadedException def click_and_wait(self, element, timeout=30): self.click(element) self.wait_pageload(timeout)
class Compass: def __init__(self, username='', password='', outdir=''): self._username = username self._password = password self._outdir = outdir self._browser = None self._record = None def quit(self): if self._browser: self._browser.quit() self._browser = None def loggin(self): prefs = { "browser.download.folderList": 2, "browser.download.manager.showWhenStarting": False, "browser.download.dir": self._outdir, "browser.helperApps.neverAsk.saveToDisk": "application/octet-stream,application/msexcel,application/csv"} self._browser = Browser('chrome') #, profile_preferences=prefs) self._browser.visit('https://compass.scouts.org.uk/login/User/Login') self._browser.fill('EM', self._username) self._browser.fill('PW', self._password) time.sleep(1) self._browser.find_by_text('Log in').first.click() # Look for the Role selection menu and select my Group Admin role. self._browser.is_element_present_by_name( 'ctl00$UserTitleMenu$cboUCRoles', wait_time=30) self._browser.select('ctl00$UserTitleMenu$cboUCRoles', '1253644') time.sleep(1) def wait_then_click_xpath(self, xpath, wait_time=30, frame=None): frame = self._browser if frame is None else frame while True: try: if frame.is_element_present_by_xpath(xpath, wait_time=wait_time): frame.find_by_xpath(xpath).click() break else: log.warning("Timeout expired waiting for {}".format(xpath)) time.sleep(1) except: log.warning("Caught exception: ", exc_info=True) def wait_then_click_text(self, text, wait_time=30, frame=None): frame = self._browser if frame is None else frame while True: if frame.is_text_present(text, wait_time=wait_time): frame.click_link_by_text(text) break else: log.warning("Timeout expired waiting for {}".format(text)) def adult_training(self): self.home() # Navigate to training page a show all records. self.wait_then_click_text('Training') time.sleep(1) self.wait_then_click_text('Adult Training') time.sleep(1) self.wait_then_click_xpath('//*[@id="bn_p1_search"]') def home(self): # Click the logo to take us to the top self.wait_then_click_xpath('//*[@alt="Compass Logo"]') time.sleep(1) def search(self): self.home() # Click search button self.wait_then_click_xpath('//*[@id="mn_SB"]') time.sleep(1) # Click "Find Member(s)" self.wait_then_click_xpath('//*[@id="mn_MS"]') time.sleep(1) # Navigate to training page a show all records. with self._browser.get_iframe('popup_iframe') as i: self.wait_then_click_xpath('//*[@id="LBTN2"]', frame=i) time.sleep(1) self.wait_then_click_xpath('//*[@class="popup_footer_right_div"]/a', frame=i) time.sleep(1) def lookup_member(self, member_number): self.home() # Click search button self.wait_then_click_xpath('//*[@id="mn_SB"]') time.sleep(1) xpath = '//*[@id="CNLookup2"]' while True: try: if self._browser.is_element_present_by_xpath(xpath, wait_time=30): self._browser.find_by_xpath(xpath).fill(member_number) break else: log.warning("Timeout expired waiting for {}".format(xpath)) time.sleep(1) except: log.warning("Caught exception: ", exc_info=True) self.wait_then_click_xpath('//*[@id="mn_QS"]') def fetch_table(self, table_id): parser = etree.HTMLParser() def columns(row): return ["".join(_.itertext()) for _ in etree.parse(StringIO(row.html), parser).findall('/*/td')] def headers(row): return ["".join(_.itertext()) for _ in etree.parse(StringIO(row.html), parser).findall('/*/td')] headers_xpath = '//*[@id ="{}"]/thead/*'.format(table_id) table_xpath = '//*[@id ="{}"]/tbody/tr[not(@style="display: none;")]'.format(table_id) if self._browser.is_element_present_by_xpath(table_xpath, wait_time=5): headings = [headers(row) for row in self._browser.find_by_xpath(headers_xpath)][0] records = [columns(row) for row in self._browser.find_by_xpath(table_xpath)] # Extend the length of each row to the same length as the columns records = [row+([None] * (len(headings)-len(row))) for row in records] # And add dummy columns if we do not have enough headings headings = headings + ["dummy{}".format(_) for _ in range(0,len(records[0]) - len(headings))] return pd.DataFrame.from_records(records, columns=headings) log.warning("Failed to find table {}".format(table_id)) return None def member_training_record(self, member_number, member_name): self.lookup_member(member_number) # Select Training record self.wait_then_click_xpath('//*[@id="LBTN5"]') personal_learning_plans = self.fetch_table('tbl_p5_TrainModules') personal_learning_plans['member'] = member_number personal_learning_plans['name'] = member_name training_record = self.fetch_table('tbl_p5_AllTrainModules') training_record['member'] = member_number training_record['name'] = member_name mandatory_learning = self.fetch_table('tbl_p5_TrainOGL') mandatory_learning['member'] = member_number mandatory_learning['name'] = member_name return personal_learning_plans, personal_learning_plans, mandatory_learning def member_permits(self, member_number, member_name): self.lookup_member(member_number) # Select Permits self.wait_then_click_xpath('//*[@id="LBTN4"]') permits = self.fetch_table('tbl_p4_permits') if permits is not None: permits['member'] = member_number permits['name'] = member_name return permits @lru_cache() def get_all_adult_trainers(self): self.adult_training() return self.fetch_table('tbl_p1_results') @lru_cache() def get_all_group_members(self): self.search() self._browser.is_element_present_by_xpath('//*[@id = "MemberSearch"]/tbody', wait_time=10) time.sleep(1) # Hack to ensure that all of the search results loaded. for i in range(0, 5): self._browser.execute_script( 'document.getElementById("ctl00_main_working_panel_scrollarea").scrollTop = 100000') time.sleep(1) return self.fetch_table('MemberSearch') def export(self, section): # Select the My Scouting link. self._browser.is_text_present('My Scouting', wait_time=30) self._browser.click_link_by_text('My Scouting') # Click the "Group Sections" hotspot. self.wait_then_click_xpath('//*[@id="TR_HIER7"]/h2') # Clink the link that shows the number of members in the section. # This is the one bit that is section specific. # We might be able to match on the Section name in the list, # which would make it more robust but at present we just hard # the location in the list. section_map = { 'garrick': 2, 'paget': 3, 'swinfen': 4, 'brown': 4, 'maclean': 5, 'rowallan': 6, 'somers': 7, 'boswell': 8, 'erasmus': 9, 'johnson': 10 } self.wait_then_click_xpath( '//*[@id="TR_HIER7_TBL"]/tbody/tr[{}]/td[4]/a'.format( section_map[section.lower()] )) # Click on the Export button. self.wait_then_click_xpath('//*[@id="bnExport"]') # Click to say that we want a CSV output. self.wait_then_click_xpath( '//*[@id="tbl_hdv"]/div/table/tbody/tr[2]/td[2]/input') time.sleep(2) # Click to say that we want all fields. self.wait_then_click_xpath('//*[@id="bnOK"]') download_path = os.path.join(self._outdir, 'CompassExport.csv') if os.path.exists(download_path): log.warn("Removing stale download file.") os.remove(download_path) # Click the warning. self.wait_then_click_xpath('//*[@id="bnAlertOK"]') # Browser will now download the csv file into outdir. It will be called # CompassExport. # Wait for file. timeout = 30 while not os.path.exists(download_path): time.sleep(1) timeout -= 1 if timeout <= 0: log.warn("Timeout waiting for {} export to download.".fomat( section )) break # rename download file. os.rename(download_path, os.path.join(self._outdir, '{}.csv'.format(section))) log.info("Completed download for {}.".format(section)) # Draw breath time.sleep(1) def load_from_dir(self): # Load the records form the set of files in self._outdir. log.debug('Loading from {}'.format(self._outdir)) def get_section(path, section): df = pd.read_csv(path, dtype=object, sep=',') df['section'] = section df['forenames_l'] = [_.lower().strip() for _ in df['forenames']] df['surname_l'] = [_.lower().strip() for _ in df['surname']] return df self._records = pd.DataFrame().append( [get_section(os.path.join(self._outdir, section), os.path.splitext(section)[0]) for section in os.listdir(self._outdir)], ignore_index=True) def find_by_name(self, firstname, lastname, section_wanted=None, ignore_second_name=True): """Return list of matching records.""" recs = self._records if ignore_second_name: df = recs[ (recs.forenames_l.str.lower().str.match( '^{}.*$'.format(firstname.strip(' ')[0].lower().strip()))) & (recs.surname_l == lastname.lower().strip())] else: df = recs[(recs.forenames_l == firstname.lower().strip()) & (recs.surname_l == lastname.lower().strip())] if section_wanted is not None: df = df[(df['section'] == section_wanted)] return df def sections(self): "Return a list of the sections for which we have data." return self._records['section'].unique() def all_yp_members_dict(self): return {s: members for s, members in self._records.groupby('section')} def section_all_members(self, section): return [m for i, m in self._records[ self._records['section'] == section].iterrows()] def section_yp_members_without_leaders(self, section): return [m for i, m in self._records[ (self._records['section'] == section) & (self._records['role'].isin( ['Beaver Scout', 'Cub Scout', 'Scout']))].iterrows()] def members_with_multiple_membership_numbers(self): return [member for s, member in self._records.groupby( ['forenames', 'surname']).filter( lambda x: len(x['membership_number'].unique()) > 1).groupby( ['forenames', 'surname', 'membership_number'])]