def download_linke(coords, proxy, port, saveFile, saveMode): print proxy, port print proxy != "" url = "http://www.soda-is.com/eng/services/service_invoke/gui.php?" + "xml_descript=soda_tl.xml&Submit2=Month" session = Session() session.verify = False if proxy != "": proxies = {proxy: port} session.proxies = proxies br = RoboBrowser(session=session, parser="lxml") br.open(url) linke_form = br.get_forms()[1] num = len(coords) index = 0 with open(saveFile, saveMode) as f: try: for coord in coords: inlon, inlat = coord linke_form["lat"].value = inlat linke_form["lon"].value = inlon sf = linke_form.submit_fields.getlist("execute") br.submit_form(linke_form, submit=sf[0]) linke_table = br.find("table", {"cellspacing": "0", "cellpadding": "2"}) linkes = get_monthly_linke_str(get_linke_values(linke_table)) s = "%s,%s,%s\n" % (format(inlon, "0.5f"), format(inlat, "0.5f"), linkes) if len(s) > 48: f.write(s) print "Done with point %i of %i: (%s, %s)" % ( index + 1, num, format(inlon, "0.5f"), format(inlat, "0.5f"), ) index += 1 br.back() print "DONE!" except Exception as e: not_dl = list(coords[index:]) with open(saveFile + "_notdownloaded.txt", "w") as nd: for c in not_dl: nd.write("%s,%s\n" % (str(c[0]), str(c[1]))) print e
def take_action(self, parsed_args): config_dir = '~/.kddcup2015-cli' config_dir = os.path.expanduser(config_dir) if os.path.isdir(config_dir): config = ConfigParser.ConfigParser(allow_no_value=True) config.readfp(open(config_dir + '/config')) if parsed_args.username: username = parsed_args.username else: username = config.get('user', 'username') if parsed_args.password: password = parsed_args.password else: password = config.get('user', 'password') entry = parsed_args.entry message = parsed_args.message base = 'https://www.kddcup2015.com' login_url = '/'.join([base, 'user-ajaxlogin.html']) submit_url = '/'.join([base, 'submission-make.html']) submission_url = '/'.join(([base, 'submission.html'])) browser = RoboBrowser() response = browser.session.post( login_url, dict(email=username, pwd=password)).json() if response['rs'] == 'error': self.app.stdout.write(response['msg']) browser.open(submit_url) form = browser.get_form() form['_f'].value = open(entry) if message: form['description'] = message browser.submit_form(form) sleep(5) browser.open(submission_url) html_str = str(browser.parsed) html = pq(html_str) times = list(map( lambda x: datetime_parser.parse(x.text), html('.td_result +td+td+td+td'))) newest_index = times.index(max(times)) score = html('.td_result')[newest_index * 2].text.strip() self.app.stdout.write(score + '\n')
def start_crawl(pages=2): session = Session() session.verify = False url = 'https://www.sgs.gov.cn/notice/search/ent_except_list' b = RoboBrowser(session=session) b.open(url) basic_info = pd.DataFrame(columns=['name', 'url', 'ID', 'date']) detail_info = pd.DataFrame() for i in range(pages): # 改变这个数字控制爬取页数, 网页限制最大50页 form = b.get_form(id='formInfo') if not form: continue form['condition.pageNo'].value = str(i + 1) # 修改表单控制页数 form['condition.keyword'].value = '' try: # dirty fix... b.submit_form(form) basic_info = basic_info.append(parse_table(b), ignore_index=True) except AttributeError: pass for url in basic_info['url']: detail = get_detail(url) if isinstance(detail, pd.DataFrame): detail_info = detail_info.append(detail, ignore_index=True) return basic_info, detail_info
def pushedbutton(self,b): account = self.lineEdit.text() pasw = self.lineEdit_3.text() #use robobrowser module to manipulate web page browser = RoboBrowser(history = True) browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp') form1 = browser.get_form(id = 'form1') form1['f_id'].value = account form1['f_pwd'].value = pasw browser.submit_form(form1) if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp": self.lineEdit_2.setText('帳號密碼錯了?') else: link_one = browser.get_link(text = re.compile('.意見調查')) browser.follow_link(link_one) list = [] for l in browser.get_links(text = re.compile('.填寫.')): list.append(l) list.pop(0) for li in list: browser.follow_link(li) form2 = browser.get_form(id = 'thisform') form2['Cos_Q1'].value = '1' browser.submit_form(form2) self.lineEdit_2.setText('Done!')
def scrape_site(url, cookie_file="", ses=False, is_rss=False): from http.cookiejar import LWPCookieJar from robobrowser import RoboBrowser from requests import Session s = Session() if cookie_file: s.cookies = LWPCookieJar(cookie_file) try: s.cookies.load(ignore_discard=True) except: # Cookies don't exsit yet pass s.headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; rv:39.0)' s.headers['Accept'] = 'text/html' s.headers['Connection'] = 'keep-alive' if is_rss: parser = 'xml' else: parser = 'html5lib' browser = RoboBrowser(session=s, parser=parser) browser.open(url) if ses: return browser, s else: return browser
def take_action(self, parsed_args): config_dir = '~/.kddcup2015-cli' config_dir = os.path.expanduser(config_dir) if parsed_args.topN: topN = int(parsed_args.topN) else: topN = 10 base = 'https://www.kddcup2015.com' rank_url = '/'.join([base, 'submission-rank.html']) browser = RoboBrowser() browser.open(rank_url) html_str = str(browser.parsed) html = pq(html_str) country_teams = list( map(lambda x: x.text.strip(), html('.country_team')[:topN])) scores = list( map(lambda x: x.text.strip(), html('.td_result')[:topN])) entries = list( map(lambda x: x.text.strip(), html('.td_result + td')[:topN])) last_subs = list( map(lambda x: x.text.strip(), html('.td_result + td + td')[:topN])) return ( ('Team', 'Score', 'Entries', 'Last Submission UTC'), (list(zip(country_teams, scores, entries, last_subs))) )
def lookUpNetCTLPan(self, sequ): seq='>seq' + '\n'+ sequ browser= RoboBrowser(user_agent='Mozilla/5.0', history=True) browser.allow_redirects=True browser.session.cookies Query="http://tools.immuneepitope.org/stools/netchop/netchop.do?app=netchop" browser.open(Query) net_form= browser.get_form(action="upload-submit.do") net_form net_form['sequences'].value=seq net_form['formtype'].value='netctlpan_select' net_form['length'].value='9' net_form['species'].value="human" net_form['supertype'].value='A2' net_form['allele'].value= "HLA-A02:01" #self.amerLength print(net_form) net_form.serialize() net_form browser.submit_form(net_form, submit="Submit") browser print(browser) table_form=browser.get_form(action="tableViewctlpan.do?thePage=1") print(table_form) return
def login(): """Used to login into a server returns a robobrowser instance logined in """ s = requests.Session() s.headers['User-Agent'] = 'Mozilla (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7' browser = RoboBrowser(history=True, parser='html5lib', session=s) browser.open('http://en.ogame.gameforge.com') loginForm = browser.get_form('loginForm') print loginForm['uni'].value print loginForm['login'].value print loginForm['pass'].value loginForm['uni'].value = 'SERVER URL' loginForm['login'].value = 'USERNAME' loginForm['pass'].value = 'PASSWORD' browser.submit_form(loginForm) if 'loginError' in browser.url: print 'loginError' return None return browser
def main(url): # morons are too lazy to type out the URL scheme if urlparse(url).scheme == '': # rfc3987: parse(url, rule='URI')['scheme'] url = 'http://{}'.format(url) # And i wonder if it is a valid URL try: urlparse(url).netloc.split('.')[1] except IndexError: print(os.environ['NICKNAME'] + ', give me a valid URL to shorten. Louge off you skeleton pile..') return # lay on the force bro. browser = RoboBrowser(history=True) browser.open('http://ezl.ink/index.php') form = browser.get_form(0) assert isinstance(form, Form) form["url"] = url browser.submit_form(form) html = browser.parsed shorturl = re.findall('http[s]?://ezl.ink/[a-zA-Z0-9]+', str(html)) print(os.environ['NICKNAME'] + ', shorturl: ' + shorturl[0])
def open_site(self, chart_name): browser = RoboBrowser(history=True, parser='html.parser') browser.open(self.url_start + charts[chart_name]) form = browser.get_form(action='includes/process.php') browser.submit_form(form) return browser
def main(argv) : browser = RoboBrowser() login_url = 'http://localhost:8080/SecuredLogin/index.jsp' browser.open(login_url) form = browser.get_form(id='frm') form['uname'].value = str(sys.argv[1]) form['passwd'].value = str(sys.argv[2]) browser.submit_form(form)
def login(user, password): browser = RoboBrowser(parser="lxml") browser.open('http://m.mediavida.com/login.php') login = browser.get_form(class_='full') login['name'].value = user login['password'].value = password browser.submit_form(login) return browser
def open_site(self, chart_name): browser = RoboBrowser(history=True, parser='html.parser') browser.open(self.url_start) form = browser.get_form(action='https://france99.com/includes/process.php?action=update') form['u'] = charts[chart_name] browser.submit_form(form) return browser
def get_mp3_url(lecture_url): browser = RoboBrowser() browser.open(lecture_url) link = browser.get_link(href=re.compile("\\.mp3$")) if link is not None: return link["href"] else: return None
def get_bracket_data(year): url = 'http://espn.go.com/mens-college-basketball/tournament/bracket/_/id/{}22/'.format(year) b = RoboBrowser() b.open(url) data = [] for item in b.find_all(attrs={'class': 'match'}): t1, t2 = [(get_id(a['href']), a['title']) for a in item('a')] s1, s2 = ' '.join(item.find('dd').stripped_strings).split() data.append([t1, t2, s1, s2]) return data
def login(user, passwd): browser = RoboBrowser(history=True, parser="html.parser") browser.open("https://sistemas.ufsc.br/login") form = browser.get_form(id="fm1") form["username"].value = user form["password"].value = passwd browser.submit_form(form) return browser
def main(): parser = argparse.ArgumentParser( description='Submit codeforces in command line') parser.add_argument('user', type=str, help='Your codeforces ID') parser.add_argument('prob', type=str, help='Codeforces problem ID (Ex: 33C)') parser.add_argument('file', type=str, help='path to the source code') args = parser.parse_args() user_name = args.user last_id, _ = get_submission_data(user_name) passwd = getpass() browser = RoboBrowser() browser.open('http://codeforces.com/enter') enter_form = browser.get_form('enterForm') enter_form['handle'] = user_name enter_form['password'] = passwd browser.submit_form(enter_form) try: checks = list(map(lambda x: x.getText()[1:].strip(), browser.select('div.caption.titled'))) if user_name not in checks: print("Login Failed.. probably because you've typed" "a wrong password.") return except Exception as e: print("Login Failed.. probably because you've typed" "a wrong password.") return browser.open('http://codeforces.com/problemset/submit') submit_form = browser.get_form(class_='submit-form') submit_form['submittedProblemCode'] = args.prob submit_form['sourceFile'] = args.file browser.submit_form(submit_form) if browser.url[-6:] != 'status': print('Your submission has failed, probably ' 'because you have submit the same file before.') return print('Submitted, wait for result...') while True: id_, verdict = get_submission_data(user_name) if id_ != last_id and verdict != 'TESTING': print('Verdict = {}'.format(verdict)) break time.sleep(5)
def scrape(q): query = q ph = re.compile('(\(\d{3}\)\ \d{3}-\d{4})') ad = re.compile('[A-Z]{2}\ (\d{5})') site = re.compile('(?<=\?q=).*(?=&sa)') result = { 'name':'!NO DATA!', 'address':'!NO DATA!', 'phone':'!NO DATA!', 'website':'!NO DATA!', 'blurb':'!NO DATA!' } #uses mechanize to submit google search browser = RoboBrowser(user_agent='Firefox', parser='html.parser') browser.open('http://google.com/') # Search for Porcupine Tree form = browser.get_form(action='/search') form # <RoboForm q=> form['q'].value = query browser.submit_form(form, form.submit_fields['btnG']) result['query']=query if browser.find("div", {"class" : "_B5d"}): result['name'] = browser.find("div", {"class" : "_B5d"}).text.encode('utf-8') stuff = browser.find("div", {"class" : "_uXc"}) address = stuff.find(text=ad) if address: result['address']=address.encode('utf-8') phone = stuff.find(text=ph) if phone: result['phone']=phone.encode('utf-8') blurb = stuff.find("span") if blurb: result['blurb'] = blurb.text.encode('utf-8') website = stuff.find("a", string="Website") if website: website = website.get('href').encode('utf-8') result['website'] = site.search(website).group() print result delay = random.randint(5,10) print "Waiting " + str(delay) + " seconds..." time.sleep(delay) return result
class vk_session: def __init__(self, root_path, proxy="", cookies=""): self.is_signed = False self.proxy = proxy self.root_path = root_path session = requests.session() if proxy: session.proxies.update({'http': 'http://' + proxy, 'ssl': proxy ,'https': 'https://' + proxy}) headers = { "ACCEPT": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "ACCEPT_ENCODING": "gzip, deflate, sdch", "ACCEPT_LANGUAGE": "ru-RU,ru;", "CONNECTION": "keep-alive", "REFERER": root_path, "UPGRADE_INSECURE_REQUESTS": "1", 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36', } session.headers = headers if cookies: session.cookies = cookies self.browser = RoboBrowser(session=session, timeout=4, history=False) def connect(self): self.browser.open(self.root_path) print("connected") def sign_in(self, username, password, captcha): try: form = self.browser.get_forms()[0] form["email"] = username form["pass"] = password if captcha: form["captcha_key"] = vk_captcha.decode(page=self.browser.parsed, root_path=self.root_path) self.browser.submit_form(form) except: print(username) raise def create_new_group(self, name, group_type, public_type): self.browser.open("https://m.vk.com/groups?act=new") form = self.browser.get_forms()[0] form["title"] = name form["type"] = group_type form["public_type"] = public_type self.browser.submit_form(form) time.sleep(1) def enter_captcha(self): form = self.browser.get_forms()[0] form["captcha_key"] = vk_captcha.decode(page=self.browser.parsed, root_path=self.root_path) self.browser.submit_form(form)
class BKBrowser(object): def __init__(self): # Browse url : self.result = None self.browser = RoboBrowser(parser="html.parser") self.browser.session.headers = config.headers # Mount with custom SSL Adapter self.browser.session.mount('https://', HTTPSAdapter()) def _connect(self): # Get to website print("- Connecting to url ...") self.browser.open(config.url) def _skip_first_page(self): button = self.browser.get_forms()[0] self.browser.submit_form(button) # Let's fill in the proper form ! def _fill_form(self): while not self.browser.find('p', {'class': 'ValCode'}): inputs_map = max_radio_map(self.browser) f = self.browser.get_forms()[0] for i in f.keys(): if f[i].value == '': answers_list = inputs_map.get(i, ['1']) f[i].value = random.choice(answers_list) f.serialize() self.browser.submit_form(f) def _fill_date_form(self): # Fill in Date/Time form and start the Questionnaire print("- Filling Forms Randomly ...") form = self.browser.get_forms()[0] form['JavaScriptEnabled'].value = '1' form['SurveyCode'].value = config.ID form['InputMonth'].value = config.date[0] form['InputDay'].value = config.date[1] form['InputHour'].value = config.time[0] form['InputMinute'].value = config.time[1] form.serialize() self.browser.submit_form(form) def get_validation_code(self): self._connect() self._skip_first_page() self._fill_date_form() self._fill_form() self.result = self.browser.find('p', {'class': 'ValCode'}).text return self.result def return_result(self): return self.result
def login(): """Login to evilzone and return a browser object. """ browser = RoboBrowser(history=True) browser.open('https://www.evilzone.org/login') form = browser.get_form(0) assert isinstance(form, Form) form["user"] = input("EZ Username: "******"passwrd"] = input("Password: ") browser.submit_form(form) return browser
class RoboBrowserTestCase(StaticLiveServerTestCase, base.AbstractBrowser): def setUp(self): super().setUp() self.browser = RoboBrowser(history=True, parser='html.parser') def load(self, url): self.browser.open(self.live_server_url + url) def get_title(self): return self.browser.find('title').text def get_form(self, selector): return RoboBrowserForm(self.browser, selector)
def testMethod_findXssFailuresInAForm_shouldOnlySaveXssFailuresThatAreNotAlreadyInTheList(self): url = "http://www.remikya.com/Controllers/SearchController.php" xssFinder = XssFinder(url) browser = RoboBrowser() browser.open(url) form = browser.get_form(id="form") xssFinder.findXssFailuresInAForm(browser, form) xssFinder.findXssFailuresInAForm(browser, form) xssFinderListLength = len(xssFinder.getListOfLinks()) EXPECTED_ANSWER = 1 self.assertEqual(EXPECTED_ANSWER, xssFinderListLength)
def getSongLinks(soundcloudURL): browser = RoboBrowser(history=False) # Go to SoundFlush and ask to rip the specified track. browser.open('http://soundflush.com/') form = browser.get_form(id='form_download') form['track_url'].value = soundcloudURL browser.submit_form(form) # Grab the download link and filename from the download page. downloadLink = browser.select('a#btn_save')[0] downloadURL = downloadLink['href'] downloadName = downloadLink['download'] return {'url' : downloadURL, 'name' : downloadName}
def scrape_snotel_sites(url=None): if not url: url = "http://www.wcc.nrcs.usda.gov/nwcc/yearcount?network=sntl&counttype=statelist&state=" browser = RoboBrowser(parser="html5lib") browser.open(url) browser.response.raise_for_status() table = browser.find_all("table")[4] sites = [] # list of sites with name and code cols = [t.text.strip() for t in table.tr.find_all("th")] for row in table.find_all("tr"): if row.td and row.td.text.strip() == 'SNTL': items = [i.text.strip() for i in row.find_all("td")] sites.append(dict(zip(cols, items))) return sites
def loginToESPN(leagueID, year): link = 'http://games.espn.go.com/flb/leagueoffice?leagueId='+str(leagueID)+'&seasonId='+str(year) br = RoboBrowser(history=True) br.open(link) try: form = br.get_form(action="https://r.espn.go.com/espn/memberservices/pc/login") form['username'].value = login.username.value form['password'].value = login.password.value br.submit_form(form) print('\nLogging In\n') except: print('\nNo need to login!\n') return br
def create_calc(self,target,aus): browser = RoboBrowser(user_agent='a python robot', history=False) browser.open('https://game.planetarion.com/bcalc.pl') t = filter(lambda a: target.x == a['x'] and target.y == a['y'] and target.z == a['z'], aus) if len(t) == 1: self.add_au(browser,t[0]) for au in sorted(aus,key=operator.itemgetter('x','y','z')): if len(t) != 1 or target.x != au['x'] or target.y != au['y'] or target.z != au['z']: self.add_au(browser,au) form = browser.get_form() form['def_metal_asteroids']=target.size form['action']='save' browser.submit_form(form) return browser.url
def get_schedule(employee_info,shift_period): browser=RoboBrowser(parser='lxml') login_handler(browser,employee_info) start_of_week=shift_period['start_of_week'] end_of_week=shift_period['end_of_week'] browser.open('https://www.rsishifts.com/Schedules/SchedulePrintByUser.aspx?' 'StartDate='+start_of_week+'&EndDate='+end_of_week) employee_name=employee_info['employee_name'] schedule=find_schedule(browser,employee_name) if (schedule): return convert_schedule_to_datetime(start_of_week,schedule) else: return False
def get_digitised_pages(self, entity_id=None): ''' Returns the number of pages (images) in a digitised file. Note that you don't need a session id to access these pages, so there's no need to go through get_url(). ''' # url = 'http://recordsearch.naa.gov.au/scripts/Imagine.asp?B={}&I=1&SE=1'.format(entity_id) url = 'http://recordsearch.naa.gov.au/SearchNRetrieve/Interface/ViewImage.aspx?B={}'.format(entity_id) br = RoboBrowser(parser='lxml') br.open(url) try: pages = int(br.find('span', attrs={'id': "lblEndPage"}).string) except AttributeError: pages = 0 return pages
class LMS: def __init__(self, username, pswd, proxy=False): """ login/parol yordamida LMS classidan instance yaratiladi :param: username => string: login :param: pswd => string: parol :param: proxy => bool: proxy ishlatish yoki ishlatmaslikni belgilash [default: False] """ self.username = username self.pswd = pswd self.session = Session() self.proxy = proxy agent = choice( json.loads(open('all_agents.txt', 'r').read())['agents']) if self.proxy: random_proxy = random.choice(self.get_proxies(False)) self.session.proxies = { 'https': 'https://{ip}:{port}'.format(ip=random_proxy['ip'], port=random_proxy['port']) } self.browser = RoboBrowser(user_agent=agent, session=self.session) else: self.browser = RoboBrowser(user_agent=agent) def load_session(self, session): self.session = session def clear_text(self, text): """ matnni ortiqcha belgilardan tozalash uchun metod kiruvchi axborot string bo'lgandagina u qayta ishlanadi :param: text => string :return: string """ if type(text) == int or type(text) == float: return text else: text = str(text) tag_pattern = "</*[a-z]+>" for tag in re.findall(tag_pattern, text): text = text.replace(tag, '') return text.replace('\n', '').strip() def is_valid(self, page): """ login jarayoni muvaffaqiyatli kechganini aniqlash metodi :param: page => string: sahifaning html kodi :return: bool """ if 'exampleInputEmail1' in str(page): return False else: return True def get_proxies(self, fresh=False): if fresh: #try: url = 'https://www.sslproxies.org/' response = requests.get(url) parser = bs4.BeautifulSoup(response.text) tbody = parser.find('tbody') data = [] for row in tbody.findAll('tr'): cols = row.findAll('td') data.append({'ip': cols[0].text, 'port': cols[1].text}) with open('proxies.json', 'w') as f: f.write(json.dumps(data)) return data #except Exception as e: # print(str(e)) # return False else: try: return json.loads(open('proxies.json', 'r').read()) except Exception as e: logging.error(str(e)) return False def check_credentials(self): """ login/parol to'g'riligini tekshirish metodi :return: - student_name => string: login/parol to'g'iri bo'lganda - bool: login/parol noto'g'iri bo'lganda [False] """ login_url = 'https://lms.tuit.uz/auth/login' self.browser.open(login_url) form = self.browser.get_form(action=login_url) form['login'].value = self.username form['password'].value = self.pswd self.browser.submit_form(form) page = self.browser.parsed if self.is_valid(page): parser = bs4.BeautifulSoup(str(page)) name = parser.find( 'ul', 'dropdown-menu dropdown-content').findAll('li')[0].text return name.replace('\n', '').strip() else: return False def get_current_semester_id(self): """ ayni paytdagi faol semester id raqamini aniqlaydi agar ayni paytda N-semester bo'la turib, shu semesterda birorta fan mavjud bo'lmasa, N-1 natija sifatida qaytariladi """ url = "https://lms.tuit.uz/student/my-courses" self.browser.open(url) page = self.browser.parsed parser = bs4.BeautifulSoup(str(page)) block = parser.find("select", {'name': 'semester_id'}) semester_ids = block.findAll('option') semester_id = len(semester_ids) basic_url = 'https://lms.tuit.uz/student/my-courses/data?semester_id={}' try: data = self.browser.open(basic_url.format(semester_id)) count = json.loads(self.clear_text(str( self.browser.parsed)))['recordsTotal'] except Exception as e: logging.error(str(e)) count = 0 if count == 0: semester_id -= 1 return semester_id def enable_proxy(self, proxies=None): self.proxy = proxies if proxies != None else self.get_proxies() self.proxy = proxy agent = choice( json.loads(open('all_agents.txt', 'r').read())['agents']) if self.proxy: random_proxy = random.choice(self.get_proxies()) self.session.proxies = { 'https': 'https://{ip}:{port}'.format(ip=random_proxy['ip'], port=random_proxy['port']) } self.browser = RoboBrowser(user_agent=agent, session=self.session) else: self.browser = RoboBrowser(user_agent=agent) def chameleon(self): agent = choice( json.loads(open('all_agents.txt', 'r').read())['agents']) if self.proxy: random_proxy = random.choice(self.get_proxies()) self.session.proxies = { 'https': 'https://{ip}:{port}'.format(ip=random_proxy['ip'], port=random_proxy['port']) } self.browser = RoboBrowser(user_agent=agent, session=self.session) else: self.browser = RoboBrowser(user_agent=agent) def get_finals(self): basic_url = 'https://lms.tuit.uz/student/finals/data?semester_id=3' try: data = [] c = 3 valid = True while valid: self.browser.open(basic_url) page = self.browser.parsed single = json.loads(self.clear_text(str(page))) return {'ok': True, 'data': single} if single['recordsTotal'] != 0: data.append({c: single}) else: valid = False c += 1 data = {'ok': True, 'data': data} except Exception as e: data = {'ok': False, 'data': None} return data def get_semester(self): try: self.browser.open('https://lms.tuit.uz/student/study-plan') parser = bs4.BeautifulSoup(str(self.browser.parsed)) rows = parser.findAll('tr') if len(rows) == 0: return {'ok': False, 'data': None} data = [] semester = [] semester_count = 1 for i in rows[1:]: cols = [x.text.replace('\n', '') for x in i.findAll('td')] if len(cols) > 1: semester.append({ 'subject': cols[0], 'credit': cols[1], 'grade': cols[2].replace(' ', '') }) elif i.find('td', 'bg-info'): data.append({'semester': semester_count, 'data': semester}) semester_count += 1 semester = [] if len(semester) > 0: data.append({'semester': semester_count, 'data': semester}) data = {'ok': True, 'data': data} except Exception as e: logging.error(str(e)) data = {'ok': False, 'data': None} return data def get_schedule(self): try: self.browser.open('https://lms.tuit.uz/student/schedule') parser = bs4.BeautifulSoup(str(self.browser.parsed)) tbody = parser.find('tbody') if tbody == None: print("Tbody is None") return {'ok': False, 'data': None} rows = tbody.findAll('tr') data = [] days = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat'] for i in rows: cols = i.findAll('td') duration = cols[0].text week = [] current_day = 0 for day in cols[1:]: print("for loop DAY: " + str(day)) if len(day.text) > 1: subject = day.find(text=True) code = day.find('span') building = day.find('small').text.split('-')[0] room = day.find('small').text.split('-')[1] week.append({ 'data': { 'subject': subject.replace(' ', '').replace('\n', ''), 'identificator': code.text, 'room': room, 'building': building }, 'day': days[current_day] }) else: week.append({'day': days[current_day], 'data': None}) current_day += 1 if week: data.append({'time': duration, 'week': week}) data = {'ok': True, 'data': schedule_reformatter(data)} except Exception as e: logging.error(str(e)) data = {'ok': False, 'data': e} return data def get_subjects(self, semester_id=1): semester_id = self.get_current_semester_id() basic_url = 'https://lms.tuit.uz/student/my-courses/data?semester_id={}' try: data = self.browser.open(basic_url.format(semester_id)) data = { 'ok': True, 'data': json.loads(self.clear_text(str(self.browser.parsed))) } except Exception as e: logging.error(str(e)) data = {'ok': False, 'data': str(e)} return data def get_activities(self, subject_id): try: self.browser.open( 'https://lms.tuit.uz/student/my-courses/show/{}'.format( subject_id)) parser = bs4.BeautifulSoup(str(self.browser.parsed)) table = parser.find('div', 'table-responsive') if not table: logging.error('GET_ACTIVITIES: Table not found') return {'ok': False, 'data': None} tbody = table.find('tbody') data = [] for row in tbody.findAll('tr'): cols = row.findAll('td') teacher = cols[0].text task = cols[1].find(text=True) task_file_name = cols[1].find('a').text task_file_url = cols[1].find('a') if task_file_url: task_file_url = task_file_url['href'] else: task_file_url = None task_deadline = cols[2].text task_grade = [x.text for x in cols[3].findAll('button')] max_grade = task_grade[1] acheived_grade = task_grade[0] submitted_work = cols[4] submitted = False uploaded_file = None uploaded_file_name = None if submitted_work.find('i', 'fa fa-download'): submitted = True uploaded_file = submitted_work.find('a')['href'] if len(str(uploaded_file)) < 20: uploaded_file = None uploaded_file_name = self.clear_text( submitted_work.find('a').text) data.append({ 'teacher': self.clear_text(teacher), 'task': self.clear_text(task), 'task_file_name': self.clear_text(task_file_name), 'task_file_url': task_file_url, 'deadline': self.clear_text(task_deadline), 'max_grade': self.clear_text(max_grade), 'grade': self.clear_text(acheived_grade), 'is_submitted': submitted, 'submitted_file_url': uploaded_file, 'submitted_file_name': uploaded_file_name }) data = {'ok': True, 'data': data} except Exception as e: logging.error(str(e)) data = {'ok': False, 'data': None} return data def test(self, url): try: self.browser.open(url) except: pass
#!usr/bin/env python3 # -*- coding: utf-8 -*- ''' Tip1: text = text.replace(r'\xa0', '') Tip2: html.unscape(text) ''' import re from robobrowser import RoboBrowser url = 'http://xxxx/xxxx/' b = RoboBrowser(history=True) b.open(url) class_name = b.select('.headline h2') class_desc = b.select('.tag-box') class_time = b.select('h4') teacher = b.select('.thumbnail-style h3') qq = b.find(text=re.compile('QQ')) qq_group = b.find(text=re.compile('\+selenium')) info = { 'class_name': class_name[0].text, 'calss_desc': class_desc[0].text, 'class_time': class_time[0].text, 'teacher': teacher[0].text, 'qq': qq, 'qq_group': qq_group }
def cli(prob_id, filename): # get latest submission id, so when submitting should have not equal id last_id, b, c, d, e = get_latest_verdict(config.username) # Browse to Codeforces browser = RoboBrowser(parser='html.parser') browser.open('http://codeforces.com/enter') enter_form = browser.get_form('enterForm') enter_form['handleOrEmail'] = config.username enter_form['password'] = config.password browser.submit_form(enter_form) try: checks = list( map(lambda x: x.getText()[1:].strip(), browser.select('div.caption.titled'))) if config.username not in checks: click.secho('Login Failed.. Wrong password.', fg='red') return except Exception as e: click.secho('Login Failed.. Maybe wrong id/password.', fg='red') return click.secho('[{0}] login successful! '.format(config.username), fg='green') click.secho('Submitting [{1}] for problem [{0}]'.format(prob_id, filename), fg='green') browser.open('http://codeforces.com/problemset/submit') submit_form = browser.get_form(class_='submit-form') submit_form['submittedProblemCode'] = prob_id try: submit_form['sourceFile'] = filename except Exception as e: click.secho('File {0} not found in current directory'.format(filename)) return browser.submit_form(submit_form) if browser.url[-6:] != 'status': click.secho( 'Failed submission, probably you have submit the same file before', fg='red') return click.secho('[{0}] submitted ...'.format(filename), fg='green') hasStarted = False while True: id_, verdict_, time_, memory_, passedTestCount_ = get_latest_verdict( config.username) if id_ != last_id and verdict_ != 'TESTING' and verdict_ != None: if verdict_ == 'OK': click.secho('OK - Passed {} tests'.format(passedTestCount_), fg='green') else: click.secho("{} on test {}".format(verdict_, passedTestCount_ + 1), fg='red') click.secho('{} MS | {} KB'.format(time_, memory_), fg=('green' if verdict_ == 'OK' else 'red')) break elif verdict_ == 'TESTING' and (not hasStarted): click.secho("Judgment has begun", fg='green') hasStarted = True time.sleep(0.5)
break except ValueError: printError("Format incorrect. Entrez un nombre entier.") # creation du fichier grades.txt si besoin f = open("grades.txt", "a") f.close() # main while True: newGrade = False fileGrades = open('grades.txt', 'r') tempFile = open('tempgrades.txt', 'w') browser = RoboBrowser() browser.open( 'https://ent.insa-rennes.fr/uPortal/f/infosperso/p/dossierAdmEtu.u18l1n17/max/render.uP?pCp#portlet-DossierAdmEtu-tab2' ) # Get the signup form signup_form = browser.get_form() # Fill it out signup_form['username'].value = INSAuser signup_form['password'].value = INSApassword # Submit the form a = browser.submit_form(signup_form) # break si, après identification, on se retrouve sur la page d'erreur if browser.find_all(class_="errors"): printError("Mauvais identifiant ou Mot de Passe.")
else: raise TypeError('invalid codename') def match_source(tag): return tag.name == 'a' and tag.has_attr('href') and tag.has_attr( 'title') and tag['title'] == 'Source package building this package' for pkgname in opts['pkgname'].split(','): browser = RoboBrowser() if dist == 'ubuntu': url = 'http://packages.ubuntu.com/source/' + opts[ 'codename'] + '/' + pkgname browser.open(url) if b'No such package' in browser.response.content: url = 'http://packages.ubuntu.com/' + opts[ 'codename'] + '/' + pkgname print(url) browser.open(url) if b'No such package' in browser.response.content: raise TypeError('no package found') source_links = browser.find_all(match_source) pkgname = source_links[0].contents[0] else: raise TypeError('invalid dist') if os.path.exists(pkgname): continue
class TechnicolorGateway(object): def __init__(self, host, port, user, password) -> None: self._host = host self._port = port self._uri = f'http://{host}:{port}' self._user = user self._password = password self._br = RoboBrowser(history=True, parser="html.parser") def srp6authenticate(self): try: self._br.open(self._uri) token = self._br.find(lambda tag: tag.has_attr('name') and tag[ 'name'] == 'CSRFtoken')['content'] _LOGGER.debug('Got CSRF token: %s', token) usr = srp.User(self._user, self._password, hash_alg=srp.SHA256, ng_type=srp.NG_2048) uname, A = usr.start_authentication() _LOGGER.debug('A value %s', binascii.hexlify(A)) self._br.open(f'{self._uri}/authenticate', method='post', data=urlencode({ 'CSRFtoken': token, 'I': uname, 'A': binascii.hexlify(A) })) _LOGGER.debug("br.response %s", self._br.response) j = json.decoder.JSONDecoder().decode(self._br.parsed.decode()) _LOGGER.debug("Challenge received: %s", j) M = usr.process_challenge(binascii.unhexlify(j['s']), binascii.unhexlify(j['B'])) _LOGGER.debug("M value %s", binascii.hexlify(M)) self._br.open(f'{self._uri}/authenticate', method='post', data=urlencode({ 'CSRFtoken': token, 'M': binascii.hexlify(M) })) _LOGGER.debug("br.response %s", self._br.response) j = json.decoder.JSONDecoder().decode(self._br.parsed.decode()) _LOGGER.debug("Got response %s", j) if 'error' in j: raise Exception( "Unable to authenticate (check password?), message:", j) usr.verify_session(binascii.unhexlify(j['M'])) if not usr.authenticated(): raise Exception("Unable to authenticate") return True except Exception as e: _LOGGER.error("Authentication failed. Exception: ", e) traceback.print_exc() raise def get_device_modal(self): r = self._br.session.get(f"{self._uri}/modals/device-modal.lp") self._br._update_state(r) content = r.content.decode() return get_device_modal(content) def get_broadband_modal(self): r = self._br.session.get(f"{self._uri}/modals/broadband-modal.lp") self._br._update_state(r) content = r.content.decode() return get_broadband_modal(content)
import requests from bs4 import BeautifulSoup from secrets import username, password from time import sleep from robobrowser import RoboBrowser from werkzeug.utils import cached_property browser = RoboBrowser( history=True, user_agent= 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:42.0) Gecko/20100101 Firefox/42.0' ) browser.open('https://m.facebook.com') form = browser.get_form(id='login_form') form['email'].value = username form['pass'].value = password browser.submit_form(form) # POST_LOGIN_URL = "https://www.facebook.com/login" # with requests.Session() as session: # sleep(5) # page = requests.get("https://mbasic.facebook.com/ufi/reaction/profile/browser/fetch/?limit=20&shown_ids=100057212151011%2C100056552215448%2C100056127935351%2C100053789405388%2C100053609945466%2C100051387796520%2C100050407750028%2C100049740621835%2C100049248944423%2C100045831321224&total_count=530&ft_ent_identifier=10158503274236785") # page.raise_for_status() # # print(page.text) # soup = BeautifulSoup(page.content,'html.parser') # names = soup.find_all('h3',class_='bj')
def get_medicare_email(request, mmg): """ :param request: :param mmg: :return: """ mmg_back = mmg mmg_back['status'] = "FAIL" mmg_back['mmg_email'] = "" PARSER = settings.BS_PARSER if not PARSER: if settings.DEBUG: print('Default Parser for BeautifulSoup:', 'lxml') PARSER = 'lxml' # Call the default page rb = RoboBrowser() # Set the default parser (lxml) # This avoids BeautifulSoup reporting an issue in the console/log rb.parser = PARSER target_page = "https://www.mymedicare.gov/myaccount.aspx" # Open the form to start the login rb.open(target_page) # Get the form content page = rb.parsed if settings.DEBUG: print("===============================") print("on page:", rb.url) print("MyAccount:", page) my_email = rb.find("div", attrs={"class":"ctl00_ctl00_ContentPlaceHolder1_ctl00_ctl00_ctl00_ctl01_UserInfo_pnlEmailSettings"}) if settings.DEBUG: print("What email information:", my_email) for addr in my_email: mail_addr = my_email.find("div", attrs={"class": "myaccount-data"}) mail_address = mail_addr.text mmg_back['mmg_email'] = mail_address if rb.url == target_page: mmg_back['url'] = rb.url mmg_back['status'] = "OK" if settings.DEBUG: print("Email:", mail_address) print("url:", rb.url) return mmg_back