Python Browser示例，mechanicalsoup.Browser Python示例

示例#1

0

显示文件

def set_password(user_to_change, old_password, new_password, dryrun=False):
    login_payload = {
        "options":{
            "username": user_to_change,
            "password": old_password
        }
    }

    password_change_payload = {
        "options":{
            "password":{
                "username": user_to_change,
                "password": old_password,
                "new_password": new_password
            }
        }
    }
    if not dryrun:
        browser = \
            mechanicalsoup.Browser(user_agent='Python univention password changer')
        answer = browser.post(LOGIN_URL, json=login_payload, verify=False)
        if answer.ok is not True:
            error_api_call(answer.content)
        session_id = browser.session.cookies.get('UMCSessionId')
        browser.session.headers.update({'X-XSRF-Protection': session_id})
        answer = browser.post(CHANGE_REQUEST_URL,
                              json=password_change_payload,
                              verify=False)
        if answer.ok is not True:
            error_api_call(answer.content)

示例#2

0

显示文件

    def __init__(self, email, password, competition_number=0):
        """
        Logs a user into Investopedia's trading simulator,
        and chooses a competition
        given a *username*, *password*, and *competition_number*
        *competition_number* is the position of the desired game
        in the dropdown box on http://www.investopedia.com/simulator/home.aspx
        starting at 0. Default = 0
        """

        self.br = br = mechanicalsoup.Browser()
        login_page = self.fetch(
            "/accounts/login.aspx?returnurl=http://www.investopedia.com/simulator/"
        )

        # you have to select the form before you can input information to it
        # the login form used to be at nr=2, now it's at nr=0
        login_form = login_page.soup.select("form#account-api-form")[0]
        login_form.select("#edit-email")[0]["value"] = email
        login_form.select("#edit-password")[0]["value"] = password
        home_page = br.submit(login_form, login_page.url)

        # select competition to use
        competition_form = home_page.soup.select("form#ddlGamesJoinedForm")[0]
        [
            option.attrs.pop("selected", "") for option in
            competition_form.select("select#edit-salutation")[0]("option")
        ]
        competition_form.select("select#edit-salutation")[0].find_all(
            "option")[competition_number]["selected"] = True
        br.submit(competition_form, home_page.url)

示例#3

0

显示文件

文件： Script_part2.py 项目： joshisn26/Freddie-Mac-Data-Analysis

def login(login, password, trainQ, testQ):
    flag = 0
    s = requests.Session()
    url = "https://freddiemac.embs.com/FLoan/secure/auth.php"
    url2 = "https://freddiemac.embs.com/FLoan/Data/download.php"
    browser = ms.Browser(session = s)
    print("Logging in....")
    login_page = browser.get(url)
    login_form = login_page.soup.find("form",{"class":"form"})
    login_form.find("input", {"name":"username"})["value"] = login
    login_form.find("input", {"name":"password"})["value"] = password
    response = browser.submit(login_form, login_page.url)
    login_page2 = browser.get(url2)
    print("To the continue page...")

    next_form = login_page2.soup.find("form",{"class":"fmform"})
    a= next_form.find("input",{"name": "accept"}).attrs
    a['checked']=True

    response2 = browser.submit(next_form, login_page2.url)
    print("Start Downloading from..."+ response2.url)
    table = response2.soup.find("table",{"class":"table1"})

    t = table.find_all('a')
    flag = downloadhistoricaldata(trainQ, testQ, t,s, flag) 

    if flag == 1:
        print("Data downloaded successfully!!")
    else:
        print("Error in downloading data")

示例#4

0

显示文件

def getNewBrowser():
    # create browser instance
    b = mechanize.Browser(user_agent='Mozilla/5.0 (X11; Linux x86_64) Gecko/20100101 Firefox/31.0')

    # create a cookiejar for cookies
    jar = http.cookiejar.LWPCookieJar()
    b.set_cookiejar(jar)

    # prevent mechanize from simulating a 403 disallow
    #b.set_handle_robots(False)

    # handle some other stuff
    #b.set_handle_equiv(True)
    #b.set_handle_gzip(True)
    #b.set_handle_redirect(True)
    #b.set_handle_referer(True)

    # follows refresh 0 but not hangs on refresh >0
    #b.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

    # want debugging messages?
    #b.set_debug_http(True)
    #b.set_debug_redirects(True)
    #b.set_debug_responses(True)

    # User-Agent
    #b.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) Gecko/20100101 Firefox/31.0')]
    return b

示例#5

0

显示文件

文件： get_cookies.py 项目： taeold/pydap

def raise_if_form_exists(url, session):
    """
    This function raises a UserWarning if the link has forms
    """

    user_warning = ('Navigate to {0}, login and follow instructions. '.format(url) +
                    'It is likely that you have to perform some one-time'
                    'registration steps before acessing this data.')

    # This is important for the python 2.6 build:
    try:
        from six.moves.html_parser import HTMLParseError
    except ImportError as e:
        # HTMLParseError is removed in Python 3.5. Since it can never be
        # thrown in 3.5, we can just define our own class as a placeholder.
        # *from bs4/builder/_htmlparser.py
        class HTMLParseError(Exception):
            pass

    br = mechanicalsoup.Browser(session=session)
    try:
        login_page = br.get(url)
    except HTMLParseError:
        # This is important for the python 2.6 build:
        raise UserWarning(user_warning)

    if ((hasattr(login_page, 'soup') and
       len(login_page.soup.select('form')) > 0)):
        raise UserWarning(user_warning)

示例#6

0

显示文件

def test_submit_set():
    """Complete and submit the pizza form at http://httpbin.org/forms/post """
    browser = mechanicalsoup.Browser()
    page = browser.get("http://httpbin.org/forms/post")
    form = mechanicalsoup.Form(page.soup.form)

    form["custname"] = "Philip J. Fry"

    form["size"] = "medium"
    form["topping"] = ("cheese", "onion")

    form["comments"] = "freezer"

    response = browser.submit(form, page.url)

    # helpfully the form submits to http://httpbin.org/post which simply
    # returns the request headers in json format
    json = response.json()
    data = json["form"]
    assert data["custname"] == "Philip J. Fry"
    assert data["custtel"] == ""  # web browser submits "" for input left blank
    assert data["size"] == "medium"
    assert data["topping"] == ["cheese", "onion"]
    assert data["comments"] == "freezer"
    browser.close()

示例#7

0

显示文件

 def __init__(self, target_dir='data', queries=[], debug=False):
     self.browser = mechanicalsoup.Browser()
     self.target_dir = target_dir
     self.outfile = os.path.join(target_dir, OUT_FNAME)
     self.queries = queries
     self.debug = debug
     self.last_query = None

示例#8

0

显示文件

def test_404():
    browser = mechanicalsoup.Browser(raise_on_404=True)
    with pytest.raises(mechanicalsoup.LinkNotFoundError):
        resp = browser.get("http://httpbin.org/nosuchpage")
    resp = browser.get("http://httpbin.org/")
    assert resp.status_code == 200
    browser.close()

示例#9

0

显示文件

文件： LinkRetrieve.py 项目： bharnett/infringer2

def search_all(db=None):
    browser = mechanicalsoup.Browser()

    if db is None:
        db = Models.connect()
    search_sites(browser, db)
    search_forums(browser, db)

示例#10

0

显示文件

文件： myallocator.py 项目： grantwilliams/statistics-amt

def check_cred(login_details, ma_cred_queue, call_origin):
    browser = mechanicalsoup.Browser(soup_config={"features": "html.parser"})
    try:
        login_page = browser.get('https://inbox.myallocator.com/en/login', timeout=15)
    except requests.exceptions.Timeout:
        ma_cred_queue.put(["exit", "MyAllocator website has timed out and could not be reached, please try again"
                                   " later."])
        return
    except requests.exceptions.ConnectionError:
        ma_cred_queue.put(["exit", "Could not connect to the internet, please check your connection and try again"])
        return
    else:
        login_form = login_page.soup.select('.login_box')[0].select('form')[0]
        login_form.select('#Username')[0]['value'] = login_details["ma_username"]
        login_form.select('#Password')[0]['value'] = login_details["ma_password"]

        home_page = browser.submit(login_form, login_page.url)

        property_tags = home_page.soup.find_all("a", class_="property-link")

        if len(property_tags) > 0:
            ma_cred_queue.put("ma ok {}".format(call_origin))
            return
        else:
            ma_cred_queue.put("ma not ok {}".format(call_origin))
            return

示例#11

0

显示文件

文件： step1.py 项目： saibotk/beautiful-tucan

def anonymous_tucan():
    browser = ms.Browser(soup_config={"features": "lxml"})
    page = browser.get(TUCAN_URL)
    page = browser.get(
        _get_redirection_link(page))  # HTML redirects, because why not
    page = browser.get(_get_redirection_link(page))
    return browser, page

示例#12

0

显示文件

def password_generator():
    password_lst = []

    print("Exit and generate password by typing '/'.")
    i = 0
    while (True):
        try:
            userinput = input(f"Enter keyword {i+1}: ").rstrip()
            if userinput == '/' and i != 0:
                break
            url = API_URL + f'{userinput}'
            browser = mechanicalsoup.Browser()
            response = browser.get(url)
            data = json.loads(response.text)
            random_num = randrange(
                len(data))  # randrange throws ValueError here if data = 0
            password_lst.append(data[random_num]['word'])
            i += 1
        except ValueError:
            if userinput == '/' and i == 0:
                print("Minimum of 1 keyword required.")
            else:
                print("Got nothing for that, try again.")

    print(f"Your {i} randomly generated words were {'/'.join(password_lst)}.")
    password = randomizer(''.join(password_lst))
    print(f"Generated password: {password}\n")

示例#13

0

显示文件

文件： wg_gesucht.py 项目： mgla/wg-gesucht-crawler

def check_wg_credentials(login_info, cred_queue, call_origin):
    browser = mechanicalsoup.Browser(soup_config={'features': 'html.parser'})

    try:
        login_page = browser.get(
            'https://www.wg-gesucht.de/mein-wg-gesucht.html', timeout=15)
    except requests.exceptions.HTTPError:
        cred_queue.put("no connection {}".format(call_origin))
        return
    except requests.exceptions.ConnectionError:
        cred_queue.put("no connection {}".format(call_origin))
        return
    except requests.exceptions.Timeout:
        cred_queue.put("timed out {}".format(call_origin))
        return

    login_form = login_page.soup.select('.panel-body')[0].select('form')[0]
    login_form.find("input",
                    {"id": "email_user"})['value'] = login_info['email']
    login_form.find(
        "input", {"id": "passwort-static"})['value'] = login_info['password']

    cred_queue.put("Signing into WG-Gesucht...")
    home_page = browser.submit(login_form, login_page.url)

    #  checks if the logout menu exists, if not then the login was not successful
    if len(
            home_page.soup.find_all(
                "div", {"class": "dropdown toggle-logout-menu"})) > 0:
        cred_queue.put("login ok {}".format(call_origin))
        return
    else:
        cred_queue.put("login not ok {}".format(call_origin))
        return

示例#14

0

显示文件

文件： FTTCChecker.py 项目： SplinterHead/FTTC-Checker

def main(argv):
    house_number = ''
    post_code = ''

    try:
        opts, args = getopt.getopt(argv, "h:p:")
    except getopt.GetoptError:
        print('FTTCChecker.py -h <houseNumber> -p <postCode>')
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-h", "--houseNumber"):
            house_number = arg
        elif opt in ("-p", "--postCode"):
            post_code = arg

    browser = mechanicalsoup.Browser()
    # landing page populates some request variables, just need to select the proceed button and submit
    landing_page = browser.get("https://www.dslchecker.bt.com/#")
    proceed_form = landing_page.soup.select("form")[3]
    warning_page = browser.submit(proceed_form, landing_page.url)
    warning_form = warning_page.soup.select("form")[0]
    query_submit = browser.submit(warning_form, warning_page.url)
    query_form = query_submit.soup.select("form")[0]
    query_form.find('input',
                    {'name': 'buildingnumber'})['value'] = house_number
    query_form.find('input', {'name': 'PostCode'})['value'] = post_code
    result_page = browser.submit(query_form, query_submit.url)
    result_table = result_page.soup.find(
        'table', {'style': 'border:1px solid black;border-collapse:collapse;'})
    # # Nasty query to identify the VSDL Range A (Clean) availability result from the returned table
    clean_result = result_table.select('tr')[2].select('td')[6].select(
        'span')[0].text
    #     impacted_result = result_table.select('tr')[3].select('td')[6].select('span')[0].text
    print(clean_result)

示例#15

0

显示文件

def login(credential, banner_url, ca_bundle):
    import logging
    import mechanicalsoup

    login_prompt = 'twbkwbis.P_WWWLogin'
    login_verify = 'twbkwbis.P_ValLogin'

    browser = mechanicalsoup.Browser(soup_config={'features': 'html.parser'})
    browser.banner_url = banner_url
    browser.log = logging.getLogger()

    browser.log.info('Logging in via %s' % banner_url(login_prompt))
    browser.get(banner_url(login_prompt), verify=ca_bundle)
    result = browser.post(banner_url(login_verify), {
        'sid': credential['username'],
        'PIN': credential['password'],
    })

    if not result.ok:
        raise ValueError(result)

    import types
    browser.ca_bundle = ca_bundle
    browser.set_term = types.MethodType(set_term, browser)

    return browser

示例#16

0

显示文件

def main():
    # Create a browser and log in
    browser = mechanicalsoup.Browser()
    login(browser)

    # Copy 'figs' folder to temp directory
    create_dir(TEMP_PATH)
    create_dir(GENERATED_PATH)
    try:
        shutil.rmtree(os.path.join(TEMP_PATH, r'figs'))
    except:
        pass
    shutil.copytree(os.path.join(this_dir, r'template/figs'),
                    os.path.join(TEMP_PATH, r'figs'))

    # Go through students
    return False
    num = 1
    for url in yield_elev_urls(browser):
        name, oppmoter = get_name_oppmoter(url)

        # Skip if no oppmøter
        if len(oppmoter) < 1:
            continue

        oppmoter_tot, mulige = oppmoter_mulige(WEEKS, oppmoter)
        prosent = round((oppmoter_tot / mulige) * 100)

        if attest(oppmoter_tot, prosent):
            out_str = 'Attest nr {} går til "{}".'.format(num, name)
            print(out_str)
            generate_attest(name)
            num += 1

    shutil.rmtree(os.path.join(TEMP_PATH, r'figs'))

示例#17

0

显示文件

def areas_of_chennai(request):

    browser = mechanicalsoup.Browser(soup_config={"features": "html.parser"})
    page = browser.get("http://www.mapsofindia.com/lat_long/tamilnadu/")

    soup = BeautifulSoup(page.text, "html.parser")

    table = soup.find("table", {"class": "tableizer-table"})

    tr = table.findAll("tr")
    tr.pop(0)

    dic = {}
    for row in tr:
        td = row.findAll("td")
        lat = td[1].getText().replace("&deg", "")
        lat = lat.replace(";", ".")
        lat = lat.replace("' N", "")
        lat = lat.replace(' ', '')
        lat = lat.encode('ascii', 'ignore')
        lat = float(lat)
        lon = td[2].getText().replace("&deg", "")
        lon = lon.replace(";", ".")
        lon = lon.replace("' E", "")
        lon = lon.replace(' ', '')
        lon = lon.encode('ascii', 'ignore')
        lon = float(lon)
        dic[td[0].getText()] = (lat, lon)
        #for i,cells in enumerate(td):
        #print(cells.getText(),end='|')

    # for towns in dic:
    # 	print(towns,dic[towns])
    return TemplateResponse(request, "index.html", {'towns': dic})

示例#18

0

显示文件

def get_data_directores(lista_empresas, lista_links, verbose=True):
    browser = mechanicalsoup.Browser(soup_config={'features': 'lxml'})
    directorio = {}
    for (rut, nombre), url in zip(lista_empresas, lista_links):
        if verbose:
            print('Obteniendo datos para', nombre)
        page = browser.get(url)
        trs = page.soup.findAll('tr')
        dir_data = []
        for tr in trs:
            tds = tr.findAll('td')
            if len(tds) < 4:
                continue
            else:
                rut_dir = tds[0].text
                nombre_dir = tds[1].text
                cargo_dir = tds[2].text
                fecha_nombramiento = tds[3].text

                director = {}
                director['rut'] = rut_dir
                director['nombre'] = nombre_dir
                director['cargo'] = cargo_dir
                director['fecha_nombramiento'] = fecha_nombramiento

                dir_data.append(director)
        directorio[rut] = dir_data
    return directorio

示例#19

0

显示文件

def test__request_file(httpbin):
    form_html = """
    <form method="post" action="{}/post">
      <input name="pic" type="file" />
    </form>
    """.format(httpbin.url)
    form = BeautifulSoup(form_html, "lxml").form

    # create a temporary file for testing file upload
    pic_path = tempfile.mkstemp()[1]
    with open(pic_path, "w") as f:
        f.write(":-)")

    form.find("input", {"name": "pic"})["value"] = pic_path

    browser = mechanicalsoup.Browser()
    response = browser._request(form)

    # Check that only "files" includes a "pic" keyword in the response
    found = False
    for key, value in response.json().items():
        if key == "files":
            assert value["pic"] == ":-)"
            found = True
        else:
            assert (value is None) or ("pic" not in value)

    assert found
    assert "multipart/form-data" in response.request.headers["Content-Type"]

示例#20

0

显示文件

def try_to_login(server, login):
    URL = "http://{}/wp-login.php".format(server)
    browser = mechanicalsoup.Browser()

    # request login page
    login_page = browser.get(URL)

    # we grab the login form
    login_form = login_page.soup.find("form", {"id": "loginform"})

    with open('Passwords',
              "r") as passwordsFile:
        password = passwordsFile.readline()
        while len(password) != 0:
            # find login and password inputs
            login_form.find("input", {"name": "log"})["value"] = login
            login_form.find("input", {"name": "pwd"})["value"] = password

            # submit form
            response = browser.submit(login_form, login_page.url)
            if (str(response.url).__contains__("wp-admin")):
                print("Login SUCCESS for {} - {}".format(login, password))
                file = open("SuccessLogin", "a")
                file.write("{} : {}".format(login, password))
                file.close()
                break
            # else:
                # print("Login failed for password: {}".format(password))
            password = passwordsFile.readline()

示例#21

0

显示文件

def test_request_keyword_error(keyword):
    """Make sure exception is raised if kwargs duplicates an arg."""
    form_html = "<form></form>"
    browser = mechanicalsoup.Browser()
    with pytest.raises(TypeError, match="multiple values for"):
        browser._request(BeautifulSoup(form_html, "lxml").form,
                         'myurl', **{keyword: 'somevalue'})

示例#22

0

显示文件

def login(login, passw):
    print("Pass:"******"https://freddiemac.embs.com/FLoan/secure/auth.php"
    url2 = "https://freddiemac.embs.com/FLoan/Data/download.php"
    s = requests.Session()
    browser = ms.Browser(session=s)
    print("Logging in....")
    login_page = browser.get(url)
    login_form = login_page.soup.find("form", {"class": "form"})
    login_form.find("input", {"name": "username"})["value"] = login
    login_form.find("input", {"name": "password"})["value"] = passw
    response = browser.submit(login_form, login_page.url)
    login_page2 = browser.get(url2)
    print("To the continue page...")
    next_form = login_page2.soup.find("form", {"class": "fmform"})
    a = next_form.find("input", {"name": "accept"}).attrs
    a['checked'] = True
    response2 = browser.submit(next_form, login_page2.url)
    print("Start Downloading from..." + response2.url)
    table = response2.soup.find("table", {"class": "table1"})
    t = table.find_all('a')
    for x in range(76, 88):
        c = 'https://freddiemac.embs.com/FLoan/Data/' + t[x]['href']
        r = s.get(c)
        z = ZipFile(BytesIO(r.content))
        z.extractall(os.getcwd())
    print("Downloaded all sample successfully!")

示例#23

0

显示文件

def crawl_nytimes_archive(queue):
	
	browser = mechanicalsoup.Browser()

	#Fetch date targets from config file
	start_date_year, start_date_month, start_date_day, end_date_year, end_date_month, end_date_day = getConfig()
	#Define Date Values so they can be incremented in search URL
	end_date = datetime.date(end_date_year,end_date_month,end_date_day)
	#Defines the date to start crawling at. 
	target_date = datetime.date(start_date_year,start_date_month,start_date_day)
	
	#Loop through search results by calling search URL for each date in range one at a time
	while target_date < end_date:
		#Set search_url based on target_date
		search_url = "http://query.nytimes.com/svc/add/v1/sitesearch.json?end_date={0:d}{1:02d}{2:02d}&begin_date={3:d}{4:02d}{5:02d}&page=1&facet=true".format(target_date.year,target_date.month,target_date.day,target_date.year,target_date.month,target_date.day)
		logging.info("Search URL="+search_url)
		date_start_time = time.time()
		
		page_number = 1
		#Loop through each page of search results
		while page_number < 100:
			#Queue all the links for this search page
			crawlPage(target_date, browser, search_url, queue, page_number)
			#Get new page number and preserve old to perform string replacement in new url.
			old_page_number = page_number
			page_number += 1
			#Increment searc url page number by one by reversing url and switching old_page_number with new
			search_url = search_url[::-1].replace(str(old_page_number)[::-1],str(page_number)[::-1],1)[::-1]

		logging.info("Day {} queued in {} seconds".format(target_date,(time.time() - date_start_time)))
		#Increment date to search new url when all pages for last date are complete. 
		target_date += datetime.timedelta(days=1)

示例#24

0

显示文件

def test_submit_online():
    """Complete and submit the pizza form at http://httpbin.org/forms/post """
    browser = mechanicalsoup.Browser()
    page = browser.get("http://httpbin.org/forms/post")
    form = page.soup.form

    form.find("input", {"name": "custname"})['value'] = 'Philip J. Fry'
    assert 'value' not in form.find('input', {
        'name': 'custtel'
    }).attrs  # leave custtel blank without value
    form.find("input", {"name": "size", "value": "medium"})['checked'] = ""
    form.find("input", {"name": "topping", "value": "cheese"})['checked'] = ""
    form.find("input", {"name": "topping", "value": "onion"})['checked'] = ""
    form.find("textarea", {"name": "comments"}).insert(0, 'freezer')

    response = browser.submit(form, page.url)

    # helpfully the form submits to http://httpbin.org/post which simply returns the request headers in json format
    json = response.json()
    data = json['form']
    assert data["custname"] == 'Philip J. Fry'
    assert data["custtel"] == ''  # web browser submits '' for input left blank
    assert data["size"] == "medium"
    assert data["topping"] == ["cheese", "onion"]
    assert data["comments"] == "freezer"

示例#25

0

显示文件

文件： reconect.py 项目： AvoRay/MeusOlhos

    def __init__(self):
        self.db = sqlite3.connect('bancoface.db')
        self.cursor = self.db.cursor()
        

        self.cursor.execute('CREATE TABLE IF NOT EXISTS Amigos(nome VARCHAR,link VARCHAR)')
        self.cursor.execute('CREATE TABLE IF NOT EXISTS Sobre(nome VARCHAR,sexo VARCHAR,fones VARCHAR,aniversario VARCHAR,cidNatal VARCHAR,cidAtual VARCHAR,relacionamento VARCHAR,familiares VARCHAR,acontecimentos VARCHAR,linksobre VARCHAR)')

        self.browser = mechanicalsoup.Browser()

        self.complete = 'https://m.facebook.com'

        pagina = self.browser.get('https://m.facebook.com/login.php')

        

        login = '******'
        senha= 'sw0rdf1nsh123'

        print('Conectando ao facebook com {}'.format(login))


        formulario = pagina.soup.find('form')

        formulario.find('input',{'type':'text'})['value'] = login
        formulario.find('input',{'type':'password'})['value'] = senha 
        response = self.browser.submit(formulario, pagina.url)


        if 'save-device' in response.url:
            print('Conectado com Sucesso')
            self.meumenu()
        else:
            print('Login ou Senha Incorretos')

示例#26

0

显示文件

文件： crawl.py 项目： behinger/ebay-kleinanzeigen-crawler

def run_program(args):
    browser = mechanicalsoup.Browser(soup_config=({'features': "html.parser"}))
    results = []
    print("Starting to crawl. Options:\n{}\n".format(args))
    for page_num in range(args.page_start, args.page_end + 1):
        print("\tCrawling page: {}/{}".format(page_num, args.page_end))
        results += get_results(browser, args.url, page_num)
    with open(args.json_out, 'w') as f:
        json.dump(results, f, indent=4, sort_keys=True)
    #print(results)
    #render(results)
    import urllib.request
    img_urls = [r['img'] for r in results if r is not None]
    import os

    try:
        os.mkdir('data/{}'.format(args.target))
    except:
        pass

    for idx, img in enumerate(img_urls):
        try:
            urllib.request.urlretrieve(
                img, "data/{}/{}.jpg".format(args.target, idx))
        except TypeError:
            pass

示例#27

0

显示文件

文件： test_browser.py 项目： andress0391/pentesting

def test_submit_online(httpbin):
    """Complete and submit the pizza form at http://httpbin.org/forms/post """
    browser = mechanicalsoup.Browser()
    page = browser.get(httpbin + "/forms/post")
    form = page.soup.form

    form.find("input", {"name": "custname"})["value"] = "Philip J. Fry"
    # leave custtel blank without value
    assert "value" not in form.find("input", {"name": "custtel"}).attrs
    form.find("input", {"name": "size", "value": "medium"})["checked"] = ""
    form.find("input", {"name": "topping", "value": "cheese"})["checked"] = ""
    form.find("input", {"name": "topping", "value": "onion"})["checked"] = ""
    form.find("textarea", {"name": "comments"}).insert(0, "freezer")

    response = browser.submit(form, page.url)

    # helpfully the form submits to http://httpbin.org/post which simply
    # returns the request headers in json format
    json = response.json()
    data = json["form"]
    assert data["custname"] == "Philip J. Fry"
    assert data["custtel"] == ""  # web browser submits "" for input left blank
    assert data["size"] == "medium"
    assert data["topping"] == ["cheese", "onion"]
    assert data["comments"] == "freezer"

    assert json["headers"]["User-Agent"].startswith('python-requests/')
    assert 'MechanicalSoup' in json["headers"]["User-Agent"]

示例#28

0

显示文件

def test_submit_online():
    """Complete and submit the pizza form at http://httpbin.org/forms/post """
    browser = mechanicalsoup.Browser()
    page = browser.get("http://httpbin.org/forms/post")
    form = mechanicalsoup.Form(page.soup.form)

    input_data = {"custname": "Philip J. Fry"}
    form.input(input_data)

    check_data = {"size": "large", "topping": ["cheese"]}
    form.check(check_data)
    check_data = {"size": "medium", "topping": "onion"}
    form.check(check_data)

    form.textarea({"comments": "warm"})
    form.textarea({"comments": "actually, no, not warm"})
    form.textarea({"comments": "freezer"})

    response = browser.submit(form, page.url)

    # helpfully the form submits to http://httpbin.org/post which simply
    # returns the request headers in json format
    json = response.json()
    data = json["form"]
    assert data["custname"] == "Philip J. Fry"
    assert data["custtel"] == ""  # web browser submits "" for input left blank
    assert data["size"] == "medium"
    assert data["topping"] == ["cheese", "onion"]
    assert data["comments"] == "freezer"

示例#29

0

显示文件

文件： test_browser.py 项目： kumarstack55/MechanicalSoup

def test_get_request_kwargs_when_method_is_in_kwargs(httpbin):
    """Raise TypeError exception"""
    browser = mechanicalsoup.Browser()
    page = browser.get(httpbin + "/forms/post")
    form = page.soup.form
    kwargs = {"method": "post"}
    with pytest.raises(TypeError):
        browser.get_request_kwargs(form, page.url, **kwargs)

示例#30

0

显示文件

    def logout(self):
        """Logs out the user.

        Logs out the user by creating new browser."""

        logging.info("Logging out.")
        self._logged_in = False
        self._browser = ms.Browser()