Python create_url示例，util.create_url Python示例

示例#1

0

显示文件

文件： labx.py 项目： thotran2015/seedingLabs

def extract_results(item,condition=None):
        #Url is extended based on condition
        if condition == "new":
                specific_url = util.create_url(MAIN_URL,item,DELIMITER) + "&condition=New,New%20or%20Used&adtype=998"
        else:
                specific_url = util.create_url(MAIN_URL,item,DELIMITER) + "&condition=Used,Refurbished,For%20Parts/Not%20Working,New%20or%20Used&adtype=998"     
        url = util.create_url(MAIN_URL,item, DELIMITER)
        results=[]
        #Check if page has data
        try:
            soup = util.check_exceptions(url)
            table = soup.find('tbody', class_='ResultsNewTable')
            rows=table.find_all('tr')
        except:
                  return []
        #Get 1st 10 results only
        for i in range(len(rows)):
                  row= rows[i]
                  new_result = Result(row.find('a').get('title'))
                  new_result.url = row.find('a').get('href')
                  new_result.price = util.get_price(row.find_all('td')[4].contents[0])
                  number = util.get_price(new_result.title)
                  new_result.image_src = "https://photos.labx.com/labx/"+number+"/"+number+"-0.jpg"
                  if util.is_valid_price(new_result.price):
                          results.append(new_result)
                          if len(results) == 10:
                                  return results
        return results

示例#2

0

显示文件

文件： test_authorize.py 项目： sgoggins/oppy

def test_query_parameters_are_reflected_in_response(test_client,
                                                    confidential_client):
    """
        GIVEN:  GET request to the /authorize endpoint
        WHEN:   query parameters are specified
        THEN:   response is 200 OK with parameters as hidden input fields in the HTML
    """
    client = confidential_client
    url = create_url('/authorize',
                     client_id=client['client_id'],
                     redirect_uri=client['redirect_uris'][0],
                     response_type='code',
                     state='96f07e0b-992a-4b5e-a61a-228bd9cfad35',
                     scope='read write')
    response = test_client.get(url)
    soup = BeautifulSoup(response.data, features="html.parser")

    assert response.status_code == 200
    assert soup.find('input',
                     dict(name='client_id'))['value'] == client['client_id']
    assert soup.find(
        'input',
        dict(name='redirect_uri'))['value'] == client['redirect_uris'][0]
    assert soup.find(
        'input',
        dict(name='state'))['value'] == '96f07e0b-992a-4b5e-a61a-228bd9cfad35'
    assert soup.find('input', dict(name='scope'))['value'] == 'read write'

示例#3

0

显示文件

文件： biosurplus.py 项目： venitab8/CFG_Project_-2017

def extract_results(search_term, condition=None):
    if condition == 'new':
        return []
    headers = {
        'Host':
        'www.biosurplus.com',
        'Connection':
        'keep-alive',
        'Accept':
        'text/html',
        'Referer':
        'http://www.biosurplus.com/?ajax_search_nonce=b2ba2354a5&s==Beckman+Coulter&post_type=product',
        'Accept-Encoding':
        'gzip, deflate, sdch',
        'Accept-Language':
        'en-US,en;q=0.8',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'
    }
    url = util.create_url(MAIN_URL, search_term,
                          DELIMITER) + "&post_type=product"
    path_to_chromedriver = 'chromedriver.exe'
    option = webdriver.ChromeOptions()
    option.add_argument('headless')
    browser = webdriver.Chrome(executable_path=path_to_chromedriver,
                               options=option)
    browser.get(url)
    time.sleep(5)

    soup = BeautifulSoup(browser.page_source, "html.parser")
    table = soup.find('div', class_='content-area')
    try:
        #check if the table
        rows = table.findAll("li", {"class": re.compile('post-*')})
    except:
        return []
    results = []
    for row in rows:
        new_result = Result(
            row.find('h2', class_="woocommerce-loop-product__title").text)
        new_result.set_price(
            util.get_price(row.find(text=re.compile("Price*"))))
        #Handle different paths
        try:
            img_src = row.find('div', class_="image_frame").find(
                'div', class_="product-loop-image bsi-thumb").get("style")
        except:
            img_src = row.find('div', {
                "style": re.compile('background*')
            }).get('style')
        img_src = img_src.replace(') ', '( ')
        img_src = img_src.split('(')[1]
        img_src = img_src.split(')')[0]
        new_result.set_image_src(img_src)
        new_result.set_url(row.find('a').get('href'))
        if util.is_valid_price(new_result.get_price()):
            results.append(new_result)
            if len(results) == 10:
                return results
    return results

示例#4

0

显示文件

def extract_results(search_word, condition=None):
    url = util.create_url(MAIN_URL, search_word, DELIMITER)
    try:
        soup = util.check_exceptions(url)
        product_table = soup.find('table', class_='table_content')
        result_links = product_table.find_all('a')
    except:
        return []

    equips = []
    for link in result_links:
        product_url = HOME_URL + link.get('href')
        product_page_content = BeautifulSoup(
            urllib.request.urlopen(product_url), "html.parser")
        title = ''.join(
            product_page_content.find(
                'div',
                class_='product_left').find('h1').find_all(text=True)).strip()
        equipment = Result(title)
        equipment.url = product_url
        equipment.image_src = HOME_URL + product_page_content.find(
            'img', {
                "id": "big_product_img"
            }).get('src')
        equipment.price = util.get_price(
            product_page_content.find('div',
                                      class_='pr_price2').find(text=True))
        if util.is_valid_price(equipment.price):
            equips.append(equipment)
        if len(equips) >= 10:
            return equips
    return equips

示例#5

0

显示文件

def extract_results(search_word, condition=None):
    if condition == 'new':
        return []
    url = util.create_url(MAIN_URL, search_word, DELIMITER)
    page = urllib.request.urlopen(url)
    soup = BeautifulSoup(page, "html.parser")
    product_grid = soup.find('ul', class_='Products_ul')
    try:
        total_equips = product_grid.find_all('li', class_='Products')
    except:
        return []
    equips = []
    for equip in total_equips:
        title = equip.find(
            'div', class_='title').find('span').find(text=True).strip()
        equipment = Result(title)
        equipment.url = equip.find('a').get('href')
        equipment.image_src = equip.find('div',
                                         class_='Image').find('img').get('src')
        price_text = equip.find('div', class_='price').find_all(text=True)
        equipment.price = util.get_price(''.join(price_text))
        if util.is_valid_price(equipment.price):
            equips.append(equipment)
        if len(equips) >= 10:
            return equips
    return equips

示例#6

0

显示文件

def extract_results(search_word, condition=None):
    if condition == 'new':
        return []
    url = util.create_url(MAIN_URL, search_word, DELIMITER)
    path_to_chromedriver = 'chromedriver.exe'
    option = webdriver.ChromeOptions()
    option.add_argument('headless')
    browser = webdriver.Chrome(executable_path=path_to_chromedriver,
                               options=option)
    browser.get(url)
    time.sleep(5)
    soup = BeautifulSoup(browser.page_source, "html.parser")
    product_grid = soup.find('ul', class_='product_list p_list')
    try:
        total_equips = product_grid.find_all(
            'li', {"class": re.compile('p_list_item*')})
    except:
        return []
    equips = []
    for equip in total_equips:
        title = equip.find('div', class_='title').find('a').text
        print(title, "t")
        equipment = Result(title)
        equipment.set_url(HOME_URL + equip.find('a').get('href'))
        equipment.set_image_src(
            HOME_URL +
            equip.find('div', class_='thumb').find('img').get('src'))
        price_text = equip.find('li', class_='price').text
        equipment.set_price(util.get_price(price_text))
        if util.is_valid_price(equipment.get_price()):
            equips.append(equipment)
        if len(equips) == 10:
            return equips
    return equips

示例#7

0

显示文件

文件： sci_bay.py 项目： venitab8/CFG_Project_-2017

def extract_results(search_term, condition=None):
    if condition == 'new':
        return []
    url = util.create_url(MAIN_URL, search_term, DELIMITER)
    page = urllib.request.urlopen(url)
    soup = BeautifulSoup(page, "html.parser")
    table = soup.find('div', class_='content-area')
    rows = table.findAll("article")

    results = []
    for row in rows:
        new_result = Result(
            row.find('h1', class_="entry-title").find("a").text)
        result_url = row.find('a').get('href')

        #scrape from the result's page
        result_soup = BeautifulSoup(urllib.request.urlopen(result_url),
                                    "html.parser")
        new_result.set_url(result_url)
        new_result.set_price(
            util.get_price(result_soup.find('span', class_="amount").text))
        new_result.set_image_src(
            result_soup.find('div', class_='images').find('img').get('src'))
        if util.is_valid_price(new_result.get_price()):
            results.append(new_result)
            if len(results) == 10: return results
    return results

示例#8

0

显示文件

文件： dotmed.py 项目： venitab8/CFG_Project_-2017

def extract_results(search_word, condition=None):
    url = util.create_url(MAIN_URL, search_word, DELIMITER)
    url = url + '&cond=used' if condition != 'new' else url + '&cond=new'
    path_to_chromedriver = 'chromedriver.exe'
    option = webdriver.ChromeOptions()
    option.add_argument('headless')
    browser = webdriver.Chrome(executable_path=path_to_chromedriver,
                               options=option)
    browser.get(url)
    time.sleep(5)
    soup = BeautifulSoup(browser.page_source, "html.parser")

    equips = []
    try:
        sale_equips = soup.find_all('div', {'id': re.compile('listing_*')})
    except:
        return equips

    for equip in sale_equips:
        title = equip.find('h4').find('a').text.strip()
        equipment = Result(title)
        equipment.set_url(
            HOME_URL + equip.find('div', class_='row').find('a').get('href'))
        equipment.set_image_src(equip.find('img').get('src'))
        equipment.set_price(util.get_price(equip.find('span', class_='price')))
        if util.is_valid_price(equipment.get_price()):
            equips.append(equipment)
        if len(equips) == 10:
            return equips
    return equips

示例#9

0

显示文件

def extract_results(search_word, condition=None):
    url = util.create_url(MAIN_URL, search_word, DELIMITER)
    url = url if condition != "new" else url + '&Condition=5067'
    try:
        soup = util.check_exceptions(url)
        product_grid = soup.find('div', class_='pagebody')
        total_equips = product_grid.find_all('div', class_='el')
    except:
        return []
    equips = []
    for equip in total_equips:
        # items_details have names of generic device, model, manufacturer bundled together
        items_details = equip.find('div',
                                   class_='item_details').find_all(text=True)
        title = ' '.join(items_details).strip()
        equipment = Result(title)
        equipment.url = equip.find('div', class_='image').find(
            'a', class_='item_number').get('href')
        equipment.image_src = equip.find('div',
                                         class_='image').find('img').get('src')
        price_text = equip.find('div', class_='price').find(
            text=True) if equip.find(
                'span', class_='price_element') == None else equip.find(
                    'span', class_='price_element').find(text=True)
        equipment.price = util.get_price(''.join(price_text))
        if util.is_valid_price(equipment.price):
            equips.append(equipment)
        if len(equips) >= 10:
            return equips
    return equips

示例#10

0

显示文件

def extract_results(search_word, condition=None):
    url = util.create_url(MAIN_URL, search_word, DELIMITER)
    page = urllib.request.urlopen(url)
    soup = BeautifulSoup(page, "html.parser")
    try:
        product_contents = soup.find_all('div',
                                         class_='products-mnbox-content')
    except:
        return []

    results = []
    for product_content in product_contents:
        equip_url = HOME_URL + product_content.find('a').get('href')
        models_site = BeautifulSoup(urllib.request.urlopen(equip_url),
                                    "html.parser")
        model_descriptions = models_site.find_all('td', class_='description')

        for re in model_descriptions:
            result = Result(
                re.find('div', {
                    'id': 'gaProductName'
                }).find(text=True).strip())
            result.image_src = 'https:' + re.find(
                'img', class_='lazy').get('data-original')
            result.url = HOME_URL + re.find('a').get('href')
            price_site = BeautifulSoup(urllib.request.urlopen(result.url),
                                       "html.parser")
            result.price = util.get_price(
                price_site.find('div', class_='price-box').find(
                    'span', class_='price-range').find(text=True))
            if util.is_valid_price(result.price):
                results.append(result)
            if len(results) >= 10:
                return results
    return results

示例#11

0

显示文件

def extract_results(search_word, condition=None):
    if condition == "new":
        return []
    url = util.create_url(MAIN_URL, search_word, DELIMITER)
    try:
        soup = util.check_exceptions(url)
        product_grid = soup.find('div', class_='v-product-grid')
        total_equips = product_grid.find_all('div', class_='v-product')
    except:
        return []
    equips = []

    for equip in total_equips:
        title = equip.find(
            'a', class_='v-product__title productnamecolor colors_productname'
        ).find(text=True).strip()
        equipment = Result(title)
        equipment.url = equip.find('a', class_='v-product__img').get('href')
        equipment.image_src = 'http:' + equip.find('img').get('src')
        price_text = equip.find(
            'div', class_='product_productprice').find_all(text=True)
        equipment.price = util.get_price(''.join(price_text))
        if util.is_valid_price(equipment.price):
            equips.append(equipment)
        if len(equips) >= 10:
            return equips
    return equips

示例#12

0

显示文件

文件： labx.py 项目： akatcoff/CFG_Project_-2017

def extract_results(item,condition=None):
        #Url is extended based on condition
        if condition == "new":
                url = util.create_url(MAIN_URL,item,DELIMITER) + "&condition=468"
        else:
                url = util.create_url(MAIN_URL,item,DELIMITER) + "&condition=467,469"     
        results=[]
        headers={
        'Host': 'www.labx.com',
        'Connection': 'keep-alive',
        'Accept': '*/*',
        'Referer': 'https://www.labx.com/item/vacuum-pump-230-v-50-hz/12183467',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'en-US,en;q=0.9',
        'User-Agent': 'Chrome/80.0.3987.132, Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
        'Sec-Fetch-Dest': 'script',
        'Sec-Fetch-Mode': 'no-cors',
        'Sec-Fetch-Site': 'same-site',
        'Upgrade-Insecure-Requests':'1',
        'x-runtime':'148ms'}
        #Check if page has data
        try:
            path_to_chromedriver = 'chromedriver.exe'
            option = webdriver.ChromeOptions()
            option.add_argument('headless')
            browser = webdriver.Chrome(executable_path = path_to_chromedriver,options=option)
            browser.get(url)
            time.sleep(5)
            soup = BeautifulSoup(browser.page_source,'html.parser')
            rows = soup.find_all('div',class_='product-card')
        except:
                  return []
        #Get 1st 10 results only
        print(len(rows))
        for i in range(len(rows)):
                  row = rows[i]
                  new_result = Result(row.find('a', class_='card-title').text)
                  new_result.url = HOME_URL + row.find('a').get('href')
                  new_result.price = util.get_price(row.find(class_='price').get_text())
                  number = util.get_price(new_result.title)
                  new_result.image_src = row.find('div', class_='card-img-top').find("img").get("src")
                  if util.is_valid_price(new_result.price):
                          results.append(new_result)
                          if len(results) == 9:
                                  break
        return results

示例#13

0

显示文件

文件： test_token.py 项目： sgoggins/oppy

def test_token_endpoint_single_sign_on(test_client, confidential_client):
    """
        GIVEN:  Successful retrieval of tokens after sign in
        WHEN:   another authorization request with different scope is executed
        THEN:   auth code is issued without login screen being presented
    """

    with freezegun.freeze_time("2020-03-14 12:00:00"):
        code, _ = authenticate_user(test_client,
                                    confidential_client,
                                    scope='openid')

        client_id = confidential_client['client_id']
        client_secret = confidential_client['client_secret']
        plaintext = f'{client_id}:{client_secret}'

        headers = {
            'Authorization':
            'Basic ' +
            str(base64.b64encode(plaintext.encode('utf-8')), 'utf-8')
        }
        post_data = {
            'grant_type': 'authorization_code',
            'code': code,
            'scope': 'openid',
            'client_id': client_id
        }

        response = test_client.post('/token', headers=headers, data=post_data)

        assert response.status_code == 200
        assert response.headers['Content-Type'] == 'application/json'
        token = decode_token(response.json['access_token'],
                             audience='https://localhost:5000/')
        assert token['aud'] == 'https://localhost:5000/'
        assert response.json['refresh_token']
        token = decode_token(response.json['id_token'], audience=client_id)
        assert token['aud'] == client_id

    with freezegun.freeze_time("2020-03-14 12:45:00"):
        # issue authorization request for different scope
        url = create_url('/authorize',
                         client_id=confidential_client['client_id'],
                         redirect_uri=confidential_client['redirect_uris'][0],
                         response_type='code',
                         state='96f07e0b-992a-4b5e-a61a-228bd9cfad35',
                         scope='read write')
        response = test_client.get(url)

        # expect code to be issued without prompt for login
        assert response.status_code == 302
        parsed_uri = urlparse(response.headers['Location'])
        assert '{uri.scheme}://{uri.netloc}{uri.path}'.format(
            uri=parsed_uri) == confidential_client['redirect_uris'][0]
        query_params = dict(
            parse_qsl(urlsplit(response.headers['Location']).query))
        assert query_params['code']
        assert query_params['state'] == '96f07e0b-992a-4b5e-a61a-228bd9cfad35'

示例#14

0

显示文件

文件： test_authorize.py 项目： marcelvandendungen/oppy

def test_invalid_client_id_results_in_error(test_client):
    """
        GIVEN:  GET request to the /authorize endpoint
        WHEN:   client_id query parameter is not registered
        THEN:   response is 400 Bad Request
    """
    url = create_url('/authorize', client_id='unknown_client', response_type='code')
    response = test_client.get(url)
    assert response.status_code == 400

示例#15

0

显示文件

文件： test_authorize.py 项目： sgoggins/oppy

def test_missing_client_id_results_in_error(test_client):
    """
        GIVEN:  GET request to the /authorize endpoint
        WHEN:   client_id query parameter is missing
        THEN:   response is 400 Bad Request
    """
    url = create_url('/authorize')
    response = test_client.get(url)
    assert response.status_code == 400

示例#16

0

显示文件

文件： test_authorize.py 项目： marcelvandendungen/oppy

def test_invalid_redirect_uri_results_in_error(test_client, confidential_client):
    """
        GIVEN:  GET request to the /authorize endpoint
        WHEN:   redirect_uri query parameter does not match uri registered in client
        THEN:   response is 400 Bad Request
    """
    client_id = confidential_client['client_id']
    url = create_url('/authorize', client_id=client_id, response_type='code', redirect_uri='xyz')
    response = test_client.get(url)
    assert response.status_code == 400

示例#17

0

显示文件

def extract_results(item, requested_condition=None):
    path_to_chromedriver = 'chromedriver.exe'
    option = webdriver.ChromeOptions()
    option.add_argument('headless')
    browser = webdriver.Chrome(executable_path=path_to_chromedriver,
                               options=option)
    url = util.create_url(MAIN_URL, item, DELIMITER)
    browser.get(url)
    time.sleep(5)
    soup = BeautifulSoup(browser.page_source, 'html.parser')
    #print(soup,"soup")
    results = []
    #Check for data
    try:
        table = soup.find('div', class_='search results')
    except:
        return results
    #Get 1st 10 results only
    rows = table.find_all('li', class_='item product product-item')

    for i in range(len(rows)):
        row = rows[i]
        new_result = Result(
            row.find('a', class_='product-item-link').text.strip())
        #print(new_result.title,"t")
        new_result.url = row.find('a').get('href')
        new_result.price = util.get_price(str(row.find('span',class_='price').find(text=True))\
                           .encode('utf-8')[1:])
        new_result.image_src = row.find('img').get('src')
        browser.get(new_result.url)
        new_soup = BeautifulSoup(browser.page_source, "html.parser")
        condition = new_soup.find('div',
                                  class_='product attribute description').find(
                                      'div', class_='value').text
        conditions = ['new', 'New', 'used', 'Used']
        bad_condition_types = [
            'bad', 'poor', 'not working', 'broken', 'not functional'
        ]
        #Check for matching conditions
        for word in conditions:
            if word in condition:
                if (requested_condition == None and word.lower() == 'used') or \
                        (requested_condition != None and requested_condition.lower()== word.lower()):
                    #Only add working good equipment
                    for type_word in bad_condition_types:
                        if type_word not in condition and util.is_valid_price(
                                new_result.price):
                            results.append(new_result)
                            break
                        if len(results) == 10:
                            return results
    return results

示例#18

0

显示文件

文件： test_authorize.py 项目： marcelvandendungen/oppy

def test_unsupported_response_type_results_in_redirect(test_client, confidential_client):
    """
        GIVEN:  GET request to the /authorize endpoint
        WHEN:   response_type query parameter is not supported
        THEN:   response is 302 Redirect with error query parameter
    """
    client = confidential_client
    url = create_url('/authorize', client_id=client['client_id'], response_type='token',
                     redirect_uri=client['redirect_uris'][0], state='96f07e0b-992a-4b5e-a61a-228bd9cfad35')
    response = test_client.get(url)
    assert response.status_code == 302
    query_params = dict(parse_qsl(urlsplit(response.headers['Location']).query))
    assert query_params['error'] == 'unsupported_response_type'

示例#19

0

显示文件

文件： ebay.py 项目： thotran2015/SeedLab

def extract_results(search_term, condition=None):
	url=''
	if condition=='new':
		url = util.create_url(MAIN_URL, search_term, DELIMITER) + '&LH_BIN=1' + NEW
	else:
		url = util.create_url(MAIN_URL, search_term, DELIMITER) + '&LH_BIN=1' + USED
	page=urllib2.urlopen(url)
	soup = BeautifulSoup(page,"html.parser")
	table=soup.find('div', id='ResultSetItems')
	try:
		rows=table.findAll('li', class_='sresult lvresult clearfix li')
	except:
		return []
	results=[]
	for row in rows: 
		new_result=Result(row.find('h3', class_="lvtitle").find(text=True))
		new_result.url=row.find('h3', class_="lvtitle").find('a').get('href')
		new_result.image_src=row.find('img', class_='img').get('src')
		new_result.price=util.get_price(row.find('li', class_="lvprice prc").find('span').find(text=True))
		if util.is_valid_price(new_result.price):
			results.append(new_result)
	return results

示例#20

0

显示文件

def extract_results(search_term, condition=None):
	url=''
	if condition=='new':
		url = util.create_url(MAIN_URL, search_term, DELIMITER) + '&rt=nc' + NEW
	else:
		url = util.create_url(MAIN_URL, search_term, DELIMITER) + '&rt=nc' + USED
	page=urllib.request.urlopen(url)
	soup = BeautifulSoup(page,"html.parser")
	table=soup.find('div',class_='srp-river-results clearfix')
	try:
		rows=table.findAll('div', class_='s-item__wrapper clearfix')
	except:
		return []
	results=[]
	for row in rows: 
		new_result=Result(row.find('img', class_='s-item__image-img').get('alt'))
		new_result.set_url(row.find('a').get('href'))
		new_result.set_image_src(row.find('img', class_='s-item__image-img').get('src'))
		new_result.set_price(util.get_price(row.find('span', class_="s-item__price").text))
		if util.is_valid_price(new_result.get_price()):
			results.append(new_result)
	return results

示例#21

0

显示文件

文件： test_authorize.py 项目： marcelvandendungen/oppy

def test_confidential_client_without_code_challenge_results_in_error(test_client, public_client):
    """
        GIVEN:  GET request to the /authorize endpoint
        WHEN:   client_id identifies a public client and code_challenge query parameter is missing
        THEN:   response is 302 Redirect with error query parameter (PKCE required for public clients)
    """
    client = public_client
    url = create_url('/authorize', client_id=client['client_id'], redirect_uri=client['redirect_uris'][0],
                     response_type='code', state='96f07e0b-992a-4b5e-a61a-228bd9cfad35')
    response = test_client.get(url)
    assert response.status_code == 302
    query_params = dict(parse_qsl(urlsplit(response.headers['Location']).query))
    assert query_params['error'] == 'invalid_request'
    assert query_params['error_description'] == 'code challenge required'

示例#22

0

显示文件

def extract_results(search_term, condition=None):
    if condition == 'new':
        return []
    headers = {
        'Host':
        'www.biosurplus.com',
        'Connection':
        'keep-alive',
        'Accept':
        'text/html',
        'Referer':
        'http://www.biosurplus.com/store/search/?per_page=24&product_search_q=Beckman+Coulter+Biomek+Workstation',
        'Accept-Encoding':
        'gzip, deflate, sdch',
        'Accept-Language':
        'en-US,en;q=0.8',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'
    }
    url = util.create_url(MAIN_URL, search_term, DELIMITER)
    req = urllib2.Request(url, headers=headers)
    page = urllib2.urlopen(req)

    #This page is g-zipped. Unzip it
    stringified_data = StringIO.StringIO(page.read())
    unzipped_page = gzip.GzipFile(fileobj=stringified_data)

    soup = BeautifulSoup(unzipped_page, "html.parser")
    table = soup.find('div', class_='product_browse')
    try:
        #check if the table
        rows = table.findAll("div", class_="fps_featured_product")
    except:
        return []
    results = []
    for row in rows:
        manufacturer = row.find('p',
                                class_="fps_fp_description").find(text=True)
        title = row.find('h2',
                         class_="fps_fp_heading").find("a").find(text=True)
        new_result = Result(manufacturer + " " + title)
        new_result.price = util.get_price(
            row.find('p', class_='product_price').find(text=True))
        new_result.image_src = row.find(
            'div', class_="fps_fp_image_inner").find('img').get('src')
        new_result.url = "www.biosurplus.com" + row.find('a').get('href')
        if util.is_valid_price(new_result.price):
            results.append(new_result)
            if len(results) == 10: return results
    return results

示例#23

0

显示文件

文件： test_authorize.py 项目： marcelvandendungen/oppy

def test_invalid_scope_returns_error(test_client, confidential_client):
    """
        GIVEN:  GET request to the /authorize endpoint
        WHEN:   query parameters are specified, scope is invalid
        THEN:   response is 302 Redirect with error parameters
    """
    client = confidential_client
    url = create_url('/authorize', client_id=client['client_id'], redirect_uri=client['redirect_uris'][0],
                     response_type='code', state='96f07e0b-992a-4b5e-a61a-228bd9cfad35', scope='scope1 scope2')
    response = test_client.get(url)
    assert response.status_code == 302
    query_params = dict(parse_qsl(urlsplit(response.headers['Location']).query))
    assert query_params['error'] == 'invalid_scope'
    assert query_params['error_description'] == 'One or more scopes are invalid'

示例#24

0

显示文件

def extract_results(search_term, condition=None):
    url = util.create_url(MAIN_URL, search_term, DELIMITER)
    if condition == 'new':
        url = url + '&tbs=vw:l,mr:1,new:1'
    else:
        url = url + '&tbs=vw:l,mr:1,new:3'
    headers = {
        'Connection':
        'keep-alive',
        'Accept':
        'text/html',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'
    }
    r = requests.get(url, timeout=5, headers=headers)
    soup = BeautifulSoup(r.content, "html.parser")
    table = soup.find('div', class_='sh-pr__product-results')
    try:
        rows = table.findAll('div', class_='sh-dlr__list-result')
        print(len(rows), "length")
    except:
        return []

    results = []
    for row in rows:
        if condition != 'new':
            condition_text = str(row.find('span', class_='h1Wfwb O8U6h').text)
            if (('used' not in condition_text)
                    and ('refurbished' not in condition_text)):
                #skip over items that do not say "used" when searching for used items
                continue
        if "eBay" in str(
                row.find('a', class_='shntl hy2WroIfzrX__merchant-name').text):
            #many google results overlap with eBay. Do not include these.
            continue
        new_result = Result(row.find('h3', class_='xsRiS').text)
        new_result.set_url(HOME_URL + row.find('a').get('href'))
        new_result.set_price(
            util.get_price(row.find('span', 'aria--hidden' == 'true').text))
        # if condition!='new':
        # new_result.set_image_src(row.find('img',class_='TL92Hc').get('src'))
        #r = requests.get(new_result.get_url(),timeout=5,headers=headers)
        #new_soup = BeautifulSoup(r.content,"html.parser")
        #new_result.set_image_src(new_soup.find('img',class_='sh-div__image sh-div__current').get('src'))
        if util.is_valid_price(new_result.get_price()):
            results.append(new_result)
            if len(results) == 10:
                return results
    return results

示例#25

0

显示文件

文件： test_authorize.py 项目： marcelvandendungen/oppy

def test_missing_query_parameters_not_reflected_in_response(test_client, confidential_client):
    """
        GIVEN:  GET request to the /authorize endpoint
        WHEN:   query parameters are specified, but no 'state' or 'nonce' query parameters
        THEN:   response is 200 OK and no hidden input fields with name 'state' or 'nonce' in the HTML
    """
    client = confidential_client
    url = create_url('/authorize', client_id=client['client_id'], redirect_uri=client['redirect_uris'][0],
                     response_type='code')
    response = test_client.get(url)
    soup = BeautifulSoup(response.data, features="html.parser")

    assert response.status_code == 200
    assert soup.find('input', dict(name='state')) is None
    assert soup.find('scope', dict(name='scope')) is None
    assert soup.find('input', dict(name='nonce')) is None

示例#26

0

显示文件

def extract_results(item, requested_condition=None):
    url = util.create_url(MAIN_URL, item, DELIMITER)
    r = requests.get(url, timeout=3)
    #        page = urllib2.urlopen(create_url(MAIN_URL,item,DELIMITER))
    soup = BeautifulSoup(r.content, "html.parser")
    results = []
    #Check for data
    try:
        table = soup.find_all('li', class_='item')
    except:
        return results
    #Get 1st 10 results only
    for i in range(len(table)):
        row = table[i]
        new_result = Result(row.find('a').get('title'))
        new_result.url = row.find('a').get('href')
        new_result.price = util.get_price(str(row.find('span',class_='price').find(text=True))\
                           .encode('utf-8')[1:])
        new_result.image_src = row.find('img').get('src')

        specific_page = urllib2.urlopen(new_result.url)
        new_soup = BeautifulSoup(specific_page, "html.parser")
        condition = new_soup.find('div', class_='product-collateral').find(
            'div', class_='std').text
        conditions = ['new', 'New', 'used', 'Used']
        bad_condition_types = [
            'bad', 'poor', 'not working', 'broken', 'not functional'
        ]
        #Check for matching conditions
        for word in conditions:
            if word in condition:
                if (requested_condition == None and word.lower() == 'used') or \
                        (requested_condition != None and requested_condition.lower()== word.lower()):
                    #Only add working good equipment
                    for type_word in bad_condition_types:
                        if type_word not in condition and util.is_valid_price(
                                new_result.price):
                            results.append(new_result)
                            if len(results) == 10:
                                return results
    return results

示例#27

0

显示文件

def extract_results(search_term, condition=None):
    if condition == 'new':
        return []
    url = util.create_url(MAIN_URL, search_term, DELIMITER)
    page = urllib.request.urlopen(url)
    soup = BeautifulSoup(page, "html.parser")
    table = soup.find('div', class_='search-results-container')
    try:
        rows = table.findAll("div", class_="card-body")
    except:
        return []
    results = []
    for row in rows:
        new_result = Result(
            row.find('h6', class_="title listing-title-padding").text)
        new_result.set_price(
            util.get_price(row.find('span', class_="price price-amount")))
        new_result.set_url(row.find('a').get('href'))
        new_result.set_image_src(row.find('img').get('src'))
        if util.is_valid_price(new_result.get_price()):
            results.append(new_result)
    return results

示例#28

0

显示文件

def extract_results(search_term, condition=None):
    url = util.create_url(MAIN_URL, search_term, DELIMITER)
    url = url + '&tbs=vw:l,mr:1,new:1' if condition == 'new' else url
    headers = {
        'Connection':
        'keep-alive',
        'Accept':
        'text/html',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'
    }
    #    	req =urllib2.Request(url, headers=headers)
    #        page=urllib2.urlopen(req)
    r = requests.get(url, timeout=5, headers=headers)
    soup = BeautifulSoup(r.content, "html.parser")
    table = soup.find('div', id='search')
    try:
        rows = table.findAll('div', class_='psli')
    except:
        return []

    results = []
    for row in rows:
        if condition != 'new' and ('used' not in str(
                row.find('span', class_='price'))):
            #skip over items that do not say "used" when searching for used items
            continue
        if "eBay" in str(row.find('div', class_='_tyb shop__secondary').text):
            #many google results overlap with eBay. Do not include these.
            continue
        new_result = Result(row.find('a', class_='pstl').find(text=True))
        new_result.url = HOME_URL + row.find('a', class_='pstl').get('href')
        new_result.price = util.get_price(
            row.find('span', class_='price').b.find(text=True))
        if util.is_valid_price(new_result.price):
            results.append(new_result)
    return results

示例#29

0

显示文件

def extract_results(search_word, condition=None):
    url = util.create_url(MAIN_URL, search_word, DELIMITER)
    url = url + '&cond=used' if condition != 'new' else url + '&cond=new'
    page = urllib.request.urlopen(url)
    soup = BeautifulSoup(page, "html.parser")
    product_grid = soup.find('div', id='totalListings')
    equips = []
    try:
        sale_equips = product_grid.find_all('div', class_='listings_table_d')
    except:
        try:
            sale_equips = product_grid.find_all('div',
                                                class_='listings_table_d ')
        except:
            return []
    for equip in sale_equips:
        title = ''.join(
            equip.find('dt',
                       class_='listing_head').find_all(text=True)).strip()
        equipment = Result(title)
        equipment.url = 'http://www.dotmed.com' + equip.find(
            'dt', class_='listing_head').find('a').get('href')
        img_tag = equip.find('dd', class_='img')
        if img_tag != None:
            equipment.image_src = img_tag.find('img').get('src')
        price_tag = equip.find('dl', class_='datePosted').find('p')
        #filters out products with no price or with foreign prices
        if price_tag != None and 'USD' in ''.join(
                price_tag.find_all(text=True)):
            equipment.price = util.get_price(''.join(
                price_tag.find_all(text=True)))
        if util.is_valid_price(equipment.price):
            equips.append(equipment)
        if len(equips) >= 10:
            return equips
    return equips

示例#30

0

显示文件

def extract_results(search_term, condition=None):
    if condition == 'new':
        return []
    url = util.create_url(MAIN_URL, search_term, DELIMITER)
    page = urllib.request.urlopen(url)
    soup = BeautifulSoup(page, "html.parser")
    table = soup.find('div', id='tbl-listings')
    try:
        rows = table.findAll("div", class_="search-row")
        rows[0].find('h3', class_="listing-title").find("a").find(text=True)
    except:
        return []
    results = []
    for row in rows:
        new_result = Result(
            row.find('h3', class_="listing-title").find("a").find(text=True))
        new_result.price = util.get_price(
            row.find('span', class_="listing-price").find(text=True))
        new_result.url = row.find('a').get('href')
        new_result.image_src = row.find('img',
                                        class_="search-thumbnail").get('src')
        if util.is_valid_price(new_result.price):
            results.append(new_result)
    return results