def set_password(user_to_change, old_password, new_password, dryrun=False): login_payload = { "options":{ "username": user_to_change, "password": old_password } } password_change_payload = { "options":{ "password":{ "username": user_to_change, "password": old_password, "new_password": new_password } } } if not dryrun: browser = \ mechanicalsoup.Browser(user_agent='Python univention password changer') answer = browser.post(LOGIN_URL, json=login_payload, verify=False) if answer.ok is not True: error_api_call(answer.content) session_id = browser.session.cookies.get('UMCSessionId') browser.session.headers.update({'X-XSRF-Protection': session_id}) answer = browser.post(CHANGE_REQUEST_URL, json=password_change_payload, verify=False) if answer.ok is not True: error_api_call(answer.content)
def __init__(self, email, password, competition_number=0): """ Logs a user into Investopedia's trading simulator, and chooses a competition given a *username*, *password*, and *competition_number* *competition_number* is the position of the desired game in the dropdown box on http://www.investopedia.com/simulator/home.aspx starting at 0. Default = 0 """ self.br = br = mechanicalsoup.Browser() login_page = self.fetch( "/accounts/login.aspx?returnurl=http://www.investopedia.com/simulator/" ) # you have to select the form before you can input information to it # the login form used to be at nr=2, now it's at nr=0 login_form = login_page.soup.select("form#account-api-form")[0] login_form.select("#edit-email")[0]["value"] = email login_form.select("#edit-password")[0]["value"] = password home_page = br.submit(login_form, login_page.url) # select competition to use competition_form = home_page.soup.select("form#ddlGamesJoinedForm")[0] [ option.attrs.pop("selected", "") for option in competition_form.select("select#edit-salutation")[0]("option") ] competition_form.select("select#edit-salutation")[0].find_all( "option")[competition_number]["selected"] = True br.submit(competition_form, home_page.url)
def login(login, password, trainQ, testQ): flag = 0 s = requests.Session() url = "https://freddiemac.embs.com/FLoan/secure/auth.php" url2 = "https://freddiemac.embs.com/FLoan/Data/download.php" browser = ms.Browser(session = s) print("Logging in....") login_page = browser.get(url) login_form = login_page.soup.find("form",{"class":"form"}) login_form.find("input", {"name":"username"})["value"] = login login_form.find("input", {"name":"password"})["value"] = password response = browser.submit(login_form, login_page.url) login_page2 = browser.get(url2) print("To the continue page...") next_form = login_page2.soup.find("form",{"class":"fmform"}) a= next_form.find("input",{"name": "accept"}).attrs a['checked']=True response2 = browser.submit(next_form, login_page2.url) print("Start Downloading from..."+ response2.url) table = response2.soup.find("table",{"class":"table1"}) t = table.find_all('a') flag = downloadhistoricaldata(trainQ, testQ, t,s, flag) if flag == 1: print("Data downloaded successfully!!") else: print("Error in downloading data")
def getNewBrowser(): # create browser instance b = mechanize.Browser(user_agent='Mozilla/5.0 (X11; Linux x86_64) Gecko/20100101 Firefox/31.0') # create a cookiejar for cookies jar = http.cookiejar.LWPCookieJar() b.set_cookiejar(jar) # prevent mechanize from simulating a 403 disallow #b.set_handle_robots(False) # handle some other stuff #b.set_handle_equiv(True) #b.set_handle_gzip(True) #b.set_handle_redirect(True) #b.set_handle_referer(True) # follows refresh 0 but not hangs on refresh >0 #b.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) # want debugging messages? #b.set_debug_http(True) #b.set_debug_redirects(True) #b.set_debug_responses(True) # User-Agent #b.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) Gecko/20100101 Firefox/31.0')] return b
def raise_if_form_exists(url, session): """ This function raises a UserWarning if the link has forms """ user_warning = ('Navigate to {0}, login and follow instructions. '.format(url) + 'It is likely that you have to perform some one-time' 'registration steps before acessing this data.') # This is important for the python 2.6 build: try: from six.moves.html_parser import HTMLParseError except ImportError as e: # HTMLParseError is removed in Python 3.5. Since it can never be # thrown in 3.5, we can just define our own class as a placeholder. # *from bs4/builder/_htmlparser.py class HTMLParseError(Exception): pass br = mechanicalsoup.Browser(session=session) try: login_page = br.get(url) except HTMLParseError: # This is important for the python 2.6 build: raise UserWarning(user_warning) if ((hasattr(login_page, 'soup') and len(login_page.soup.select('form')) > 0)): raise UserWarning(user_warning)
def test_submit_set(): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.Browser() page = browser.get("http://httpbin.org/forms/post") form = mechanicalsoup.Form(page.soup.form) form["custname"] = "Philip J. Fry" form["size"] = "medium" form["topping"] = ("cheese", "onion") form["comments"] = "freezer" response = browser.submit(form, page.url) # helpfully the form submits to http://httpbin.org/post which simply # returns the request headers in json format json = response.json() data = json["form"] assert data["custname"] == "Philip J. Fry" assert data["custtel"] == "" # web browser submits "" for input left blank assert data["size"] == "medium" assert data["topping"] == ["cheese", "onion"] assert data["comments"] == "freezer" browser.close()
def __init__(self, target_dir='data', queries=[], debug=False): self.browser = mechanicalsoup.Browser() self.target_dir = target_dir self.outfile = os.path.join(target_dir, OUT_FNAME) self.queries = queries self.debug = debug self.last_query = None
def test_404(): browser = mechanicalsoup.Browser(raise_on_404=True) with pytest.raises(mechanicalsoup.LinkNotFoundError): resp = browser.get("http://httpbin.org/nosuchpage") resp = browser.get("http://httpbin.org/") assert resp.status_code == 200 browser.close()
def search_all(db=None): browser = mechanicalsoup.Browser() if db is None: db = Models.connect() search_sites(browser, db) search_forums(browser, db)
def check_cred(login_details, ma_cred_queue, call_origin): browser = mechanicalsoup.Browser(soup_config={"features": "html.parser"}) try: login_page = browser.get('https://inbox.myallocator.com/en/login', timeout=15) except requests.exceptions.Timeout: ma_cred_queue.put(["exit", "MyAllocator website has timed out and could not be reached, please try again" " later."]) return except requests.exceptions.ConnectionError: ma_cred_queue.put(["exit", "Could not connect to the internet, please check your connection and try again"]) return else: login_form = login_page.soup.select('.login_box')[0].select('form')[0] login_form.select('#Username')[0]['value'] = login_details["ma_username"] login_form.select('#Password')[0]['value'] = login_details["ma_password"] home_page = browser.submit(login_form, login_page.url) property_tags = home_page.soup.find_all("a", class_="property-link") if len(property_tags) > 0: ma_cred_queue.put("ma ok {}".format(call_origin)) return else: ma_cred_queue.put("ma not ok {}".format(call_origin)) return
def anonymous_tucan(): browser = ms.Browser(soup_config={"features": "lxml"}) page = browser.get(TUCAN_URL) page = browser.get( _get_redirection_link(page)) # HTML redirects, because why not page = browser.get(_get_redirection_link(page)) return browser, page
def password_generator(): password_lst = [] print("Exit and generate password by typing '/'.") i = 0 while (True): try: userinput = input(f"Enter keyword {i+1}: ").rstrip() if userinput == '/' and i != 0: break url = API_URL + f'{userinput}' browser = mechanicalsoup.Browser() response = browser.get(url) data = json.loads(response.text) random_num = randrange( len(data)) # randrange throws ValueError here if data = 0 password_lst.append(data[random_num]['word']) i += 1 except ValueError: if userinput == '/' and i == 0: print("Minimum of 1 keyword required.") else: print("Got nothing for that, try again.") print(f"Your {i} randomly generated words were {'/'.join(password_lst)}.") password = randomizer(''.join(password_lst)) print(f"Generated password: {password}\n")
def check_wg_credentials(login_info, cred_queue, call_origin): browser = mechanicalsoup.Browser(soup_config={'features': 'html.parser'}) try: login_page = browser.get( 'https://www.wg-gesucht.de/mein-wg-gesucht.html', timeout=15) except requests.exceptions.HTTPError: cred_queue.put("no connection {}".format(call_origin)) return except requests.exceptions.ConnectionError: cred_queue.put("no connection {}".format(call_origin)) return except requests.exceptions.Timeout: cred_queue.put("timed out {}".format(call_origin)) return login_form = login_page.soup.select('.panel-body')[0].select('form')[0] login_form.find("input", {"id": "email_user"})['value'] = login_info['email'] login_form.find( "input", {"id": "passwort-static"})['value'] = login_info['password'] cred_queue.put("Signing into WG-Gesucht...") home_page = browser.submit(login_form, login_page.url) # checks if the logout menu exists, if not then the login was not successful if len( home_page.soup.find_all( "div", {"class": "dropdown toggle-logout-menu"})) > 0: cred_queue.put("login ok {}".format(call_origin)) return else: cred_queue.put("login not ok {}".format(call_origin)) return
def main(argv): house_number = '' post_code = '' try: opts, args = getopt.getopt(argv, "h:p:") except getopt.GetoptError: print('FTTCChecker.py -h <houseNumber> -p <postCode>') sys.exit(2) for opt, arg in opts: if opt in ("-h", "--houseNumber"): house_number = arg elif opt in ("-p", "--postCode"): post_code = arg browser = mechanicalsoup.Browser() # landing page populates some request variables, just need to select the proceed button and submit landing_page = browser.get("https://www.dslchecker.bt.com/#") proceed_form = landing_page.soup.select("form")[3] warning_page = browser.submit(proceed_form, landing_page.url) warning_form = warning_page.soup.select("form")[0] query_submit = browser.submit(warning_form, warning_page.url) query_form = query_submit.soup.select("form")[0] query_form.find('input', {'name': 'buildingnumber'})['value'] = house_number query_form.find('input', {'name': 'PostCode'})['value'] = post_code result_page = browser.submit(query_form, query_submit.url) result_table = result_page.soup.find( 'table', {'style': 'border:1px solid black;border-collapse:collapse;'}) # # Nasty query to identify the VSDL Range A (Clean) availability result from the returned table clean_result = result_table.select('tr')[2].select('td')[6].select( 'span')[0].text # impacted_result = result_table.select('tr')[3].select('td')[6].select('span')[0].text print(clean_result)
def login(credential, banner_url, ca_bundle): import logging import mechanicalsoup login_prompt = 'twbkwbis.P_WWWLogin' login_verify = 'twbkwbis.P_ValLogin' browser = mechanicalsoup.Browser(soup_config={'features': 'html.parser'}) browser.banner_url = banner_url browser.log = logging.getLogger() browser.log.info('Logging in via %s' % banner_url(login_prompt)) browser.get(banner_url(login_prompt), verify=ca_bundle) result = browser.post(banner_url(login_verify), { 'sid': credential['username'], 'PIN': credential['password'], }) if not result.ok: raise ValueError(result) import types browser.ca_bundle = ca_bundle browser.set_term = types.MethodType(set_term, browser) return browser
def main(): # Create a browser and log in browser = mechanicalsoup.Browser() login(browser) # Copy 'figs' folder to temp directory create_dir(TEMP_PATH) create_dir(GENERATED_PATH) try: shutil.rmtree(os.path.join(TEMP_PATH, r'figs')) except: pass shutil.copytree(os.path.join(this_dir, r'template/figs'), os.path.join(TEMP_PATH, r'figs')) # Go through students return False num = 1 for url in yield_elev_urls(browser): name, oppmoter = get_name_oppmoter(url) # Skip if no oppmøter if len(oppmoter) < 1: continue oppmoter_tot, mulige = oppmoter_mulige(WEEKS, oppmoter) prosent = round((oppmoter_tot / mulige) * 100) if attest(oppmoter_tot, prosent): out_str = 'Attest nr {} går til "{}".'.format(num, name) print(out_str) generate_attest(name) num += 1 shutil.rmtree(os.path.join(TEMP_PATH, r'figs'))
def areas_of_chennai(request): browser = mechanicalsoup.Browser(soup_config={"features": "html.parser"}) page = browser.get("http://www.mapsofindia.com/lat_long/tamilnadu/") soup = BeautifulSoup(page.text, "html.parser") table = soup.find("table", {"class": "tableizer-table"}) tr = table.findAll("tr") tr.pop(0) dic = {} for row in tr: td = row.findAll("td") lat = td[1].getText().replace("°", "") lat = lat.replace(";", ".") lat = lat.replace("' N", "") lat = lat.replace(' ', '') lat = lat.encode('ascii', 'ignore') lat = float(lat) lon = td[2].getText().replace("°", "") lon = lon.replace(";", ".") lon = lon.replace("' E", "") lon = lon.replace(' ', '') lon = lon.encode('ascii', 'ignore') lon = float(lon) dic[td[0].getText()] = (lat, lon) #for i,cells in enumerate(td): #print(cells.getText(),end='|') # for towns in dic: # print(towns,dic[towns]) return TemplateResponse(request, "index.html", {'towns': dic})
def get_data_directores(lista_empresas, lista_links, verbose=True): browser = mechanicalsoup.Browser(soup_config={'features': 'lxml'}) directorio = {} for (rut, nombre), url in zip(lista_empresas, lista_links): if verbose: print('Obteniendo datos para', nombre) page = browser.get(url) trs = page.soup.findAll('tr') dir_data = [] for tr in trs: tds = tr.findAll('td') if len(tds) < 4: continue else: rut_dir = tds[0].text nombre_dir = tds[1].text cargo_dir = tds[2].text fecha_nombramiento = tds[3].text director = {} director['rut'] = rut_dir director['nombre'] = nombre_dir director['cargo'] = cargo_dir director['fecha_nombramiento'] = fecha_nombramiento dir_data.append(director) directorio[rut] = dir_data return directorio
def test__request_file(httpbin): form_html = """ <form method="post" action="{}/post"> <input name="pic" type="file" /> </form> """.format(httpbin.url) form = BeautifulSoup(form_html, "lxml").form # create a temporary file for testing file upload pic_path = tempfile.mkstemp()[1] with open(pic_path, "w") as f: f.write(":-)") form.find("input", {"name": "pic"})["value"] = pic_path browser = mechanicalsoup.Browser() response = browser._request(form) # Check that only "files" includes a "pic" keyword in the response found = False for key, value in response.json().items(): if key == "files": assert value["pic"] == ":-)" found = True else: assert (value is None) or ("pic" not in value) assert found assert "multipart/form-data" in response.request.headers["Content-Type"]
def try_to_login(server, login): URL = "http://{}/wp-login.php".format(server) browser = mechanicalsoup.Browser() # request login page login_page = browser.get(URL) # we grab the login form login_form = login_page.soup.find("form", {"id": "loginform"}) with open('Passwords', "r") as passwordsFile: password = passwordsFile.readline() while len(password) != 0: # find login and password inputs login_form.find("input", {"name": "log"})["value"] = login login_form.find("input", {"name": "pwd"})["value"] = password # submit form response = browser.submit(login_form, login_page.url) if (str(response.url).__contains__("wp-admin")): print("Login SUCCESS for {} - {}".format(login, password)) file = open("SuccessLogin", "a") file.write("{} : {}".format(login, password)) file.close() break # else: # print("Login failed for password: {}".format(password)) password = passwordsFile.readline()
def test_request_keyword_error(keyword): """Make sure exception is raised if kwargs duplicates an arg.""" form_html = "<form></form>" browser = mechanicalsoup.Browser() with pytest.raises(TypeError, match="multiple values for"): browser._request(BeautifulSoup(form_html, "lxml").form, 'myurl', **{keyword: 'somevalue'})
def login(login, passw): print("Pass:"******"https://freddiemac.embs.com/FLoan/secure/auth.php" url2 = "https://freddiemac.embs.com/FLoan/Data/download.php" s = requests.Session() browser = ms.Browser(session=s) print("Logging in....") login_page = browser.get(url) login_form = login_page.soup.find("form", {"class": "form"}) login_form.find("input", {"name": "username"})["value"] = login login_form.find("input", {"name": "password"})["value"] = passw response = browser.submit(login_form, login_page.url) login_page2 = browser.get(url2) print("To the continue page...") next_form = login_page2.soup.find("form", {"class": "fmform"}) a = next_form.find("input", {"name": "accept"}).attrs a['checked'] = True response2 = browser.submit(next_form, login_page2.url) print("Start Downloading from..." + response2.url) table = response2.soup.find("table", {"class": "table1"}) t = table.find_all('a') for x in range(76, 88): c = 'https://freddiemac.embs.com/FLoan/Data/' + t[x]['href'] r = s.get(c) z = ZipFile(BytesIO(r.content)) z.extractall(os.getcwd()) print("Downloaded all sample successfully!")
def crawl_nytimes_archive(queue): browser = mechanicalsoup.Browser() #Fetch date targets from config file start_date_year, start_date_month, start_date_day, end_date_year, end_date_month, end_date_day = getConfig() #Define Date Values so they can be incremented in search URL end_date = datetime.date(end_date_year,end_date_month,end_date_day) #Defines the date to start crawling at. target_date = datetime.date(start_date_year,start_date_month,start_date_day) #Loop through search results by calling search URL for each date in range one at a time while target_date < end_date: #Set search_url based on target_date search_url = "http://query.nytimes.com/svc/add/v1/sitesearch.json?end_date={0:d}{1:02d}{2:02d}&begin_date={3:d}{4:02d}{5:02d}&page=1&facet=true".format(target_date.year,target_date.month,target_date.day,target_date.year,target_date.month,target_date.day) logging.info("Search URL="+search_url) date_start_time = time.time() page_number = 1 #Loop through each page of search results while page_number < 100: #Queue all the links for this search page crawlPage(target_date, browser, search_url, queue, page_number) #Get new page number and preserve old to perform string replacement in new url. old_page_number = page_number page_number += 1 #Increment searc url page number by one by reversing url and switching old_page_number with new search_url = search_url[::-1].replace(str(old_page_number)[::-1],str(page_number)[::-1],1)[::-1] logging.info("Day {} queued in {} seconds".format(target_date,(time.time() - date_start_time))) #Increment date to search new url when all pages for last date are complete. target_date += datetime.timedelta(days=1)
def test_submit_online(): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.Browser() page = browser.get("http://httpbin.org/forms/post") form = page.soup.form form.find("input", {"name": "custname"})['value'] = 'Philip J. Fry' assert 'value' not in form.find('input', { 'name': 'custtel' }).attrs # leave custtel blank without value form.find("input", {"name": "size", "value": "medium"})['checked'] = "" form.find("input", {"name": "topping", "value": "cheese"})['checked'] = "" form.find("input", {"name": "topping", "value": "onion"})['checked'] = "" form.find("textarea", {"name": "comments"}).insert(0, 'freezer') response = browser.submit(form, page.url) # helpfully the form submits to http://httpbin.org/post which simply returns the request headers in json format json = response.json() data = json['form'] assert data["custname"] == 'Philip J. Fry' assert data["custtel"] == '' # web browser submits '' for input left blank assert data["size"] == "medium" assert data["topping"] == ["cheese", "onion"] assert data["comments"] == "freezer"
def __init__(self): self.db = sqlite3.connect('bancoface.db') self.cursor = self.db.cursor() self.cursor.execute('CREATE TABLE IF NOT EXISTS Amigos(nome VARCHAR,link VARCHAR)') self.cursor.execute('CREATE TABLE IF NOT EXISTS Sobre(nome VARCHAR,sexo VARCHAR,fones VARCHAR,aniversario VARCHAR,cidNatal VARCHAR,cidAtual VARCHAR,relacionamento VARCHAR,familiares VARCHAR,acontecimentos VARCHAR,linksobre VARCHAR)') self.browser = mechanicalsoup.Browser() self.complete = 'https://m.facebook.com' pagina = self.browser.get('https://m.facebook.com/login.php') login = '******' senha= 'sw0rdf1nsh123' print('Conectando ao facebook com {}'.format(login)) formulario = pagina.soup.find('form') formulario.find('input',{'type':'text'})['value'] = login formulario.find('input',{'type':'password'})['value'] = senha response = self.browser.submit(formulario, pagina.url) if 'save-device' in response.url: print('Conectado com Sucesso') self.meumenu() else: print('Login ou Senha Incorretos')
def run_program(args): browser = mechanicalsoup.Browser(soup_config=({'features': "html.parser"})) results = [] print("Starting to crawl. Options:\n{}\n".format(args)) for page_num in range(args.page_start, args.page_end + 1): print("\tCrawling page: {}/{}".format(page_num, args.page_end)) results += get_results(browser, args.url, page_num) with open(args.json_out, 'w') as f: json.dump(results, f, indent=4, sort_keys=True) #print(results) #render(results) import urllib.request img_urls = [r['img'] for r in results if r is not None] import os try: os.mkdir('data/{}'.format(args.target)) except: pass for idx, img in enumerate(img_urls): try: urllib.request.urlretrieve( img, "data/{}/{}.jpg".format(args.target, idx)) except TypeError: pass
def test_submit_online(httpbin): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.Browser() page = browser.get(httpbin + "/forms/post") form = page.soup.form form.find("input", {"name": "custname"})["value"] = "Philip J. Fry" # leave custtel blank without value assert "value" not in form.find("input", {"name": "custtel"}).attrs form.find("input", {"name": "size", "value": "medium"})["checked"] = "" form.find("input", {"name": "topping", "value": "cheese"})["checked"] = "" form.find("input", {"name": "topping", "value": "onion"})["checked"] = "" form.find("textarea", {"name": "comments"}).insert(0, "freezer") response = browser.submit(form, page.url) # helpfully the form submits to http://httpbin.org/post which simply # returns the request headers in json format json = response.json() data = json["form"] assert data["custname"] == "Philip J. Fry" assert data["custtel"] == "" # web browser submits "" for input left blank assert data["size"] == "medium" assert data["topping"] == ["cheese", "onion"] assert data["comments"] == "freezer" assert json["headers"]["User-Agent"].startswith('python-requests/') assert 'MechanicalSoup' in json["headers"]["User-Agent"]
def test_submit_online(): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.Browser() page = browser.get("http://httpbin.org/forms/post") form = mechanicalsoup.Form(page.soup.form) input_data = {"custname": "Philip J. Fry"} form.input(input_data) check_data = {"size": "large", "topping": ["cheese"]} form.check(check_data) check_data = {"size": "medium", "topping": "onion"} form.check(check_data) form.textarea({"comments": "warm"}) form.textarea({"comments": "actually, no, not warm"}) form.textarea({"comments": "freezer"}) response = browser.submit(form, page.url) # helpfully the form submits to http://httpbin.org/post which simply # returns the request headers in json format json = response.json() data = json["form"] assert data["custname"] == "Philip J. Fry" assert data["custtel"] == "" # web browser submits "" for input left blank assert data["size"] == "medium" assert data["topping"] == ["cheese", "onion"] assert data["comments"] == "freezer"
def test_get_request_kwargs_when_method_is_in_kwargs(httpbin): """Raise TypeError exception""" browser = mechanicalsoup.Browser() page = browser.get(httpbin + "/forms/post") form = page.soup.form kwargs = {"method": "post"} with pytest.raises(TypeError): browser.get_request_kwargs(form, page.url, **kwargs)
def logout(self): """Logs out the user. Logs out the user by creating new browser.""" logging.info("Logging out.") self._logged_in = False self._browser = ms.Browser()