def extract_results(item,condition=None): #Url is extended based on condition if condition == "new": specific_url = util.create_url(MAIN_URL,item,DELIMITER) + "&condition=New,New%20or%20Used&adtype=998" else: specific_url = util.create_url(MAIN_URL,item,DELIMITER) + "&condition=Used,Refurbished,For%20Parts/Not%20Working,New%20or%20Used&adtype=998" url = util.create_url(MAIN_URL,item, DELIMITER) results=[] #Check if page has data try: soup = util.check_exceptions(url) table = soup.find('tbody', class_='ResultsNewTable') rows=table.find_all('tr') except: return [] #Get 1st 10 results only for i in range(len(rows)): row= rows[i] new_result = Result(row.find('a').get('title')) new_result.url = row.find('a').get('href') new_result.price = util.get_price(row.find_all('td')[4].contents[0]) number = util.get_price(new_result.title) new_result.image_src = "https://photos.labx.com/labx/"+number+"/"+number+"-0.jpg" if util.is_valid_price(new_result.price): results.append(new_result) if len(results) == 10: return results return results
def test_query_parameters_are_reflected_in_response(test_client, confidential_client): """ GIVEN: GET request to the /authorize endpoint WHEN: query parameters are specified THEN: response is 200 OK with parameters as hidden input fields in the HTML """ client = confidential_client url = create_url('/authorize', client_id=client['client_id'], redirect_uri=client['redirect_uris'][0], response_type='code', state='96f07e0b-992a-4b5e-a61a-228bd9cfad35', scope='read write') response = test_client.get(url) soup = BeautifulSoup(response.data, features="html.parser") assert response.status_code == 200 assert soup.find('input', dict(name='client_id'))['value'] == client['client_id'] assert soup.find( 'input', dict(name='redirect_uri'))['value'] == client['redirect_uris'][0] assert soup.find( 'input', dict(name='state'))['value'] == '96f07e0b-992a-4b5e-a61a-228bd9cfad35' assert soup.find('input', dict(name='scope'))['value'] == 'read write'
def extract_results(search_term, condition=None): if condition == 'new': return [] headers = { 'Host': 'www.biosurplus.com', 'Connection': 'keep-alive', 'Accept': 'text/html', 'Referer': 'http://www.biosurplus.com/?ajax_search_nonce=b2ba2354a5&s==Beckman+Coulter&post_type=product', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'en-US,en;q=0.8', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36' } url = util.create_url(MAIN_URL, search_term, DELIMITER) + "&post_type=product" path_to_chromedriver = 'chromedriver.exe' option = webdriver.ChromeOptions() option.add_argument('headless') browser = webdriver.Chrome(executable_path=path_to_chromedriver, options=option) browser.get(url) time.sleep(5) soup = BeautifulSoup(browser.page_source, "html.parser") table = soup.find('div', class_='content-area') try: #check if the table rows = table.findAll("li", {"class": re.compile('post-*')}) except: return [] results = [] for row in rows: new_result = Result( row.find('h2', class_="woocommerce-loop-product__title").text) new_result.set_price( util.get_price(row.find(text=re.compile("Price*")))) #Handle different paths try: img_src = row.find('div', class_="image_frame").find( 'div', class_="product-loop-image bsi-thumb").get("style") except: img_src = row.find('div', { "style": re.compile('background*') }).get('style') img_src = img_src.replace(') ', '( ') img_src = img_src.split('(')[1] img_src = img_src.split(')')[0] new_result.set_image_src(img_src) new_result.set_url(row.find('a').get('href')) if util.is_valid_price(new_result.get_price()): results.append(new_result) if len(results) == 10: return results return results
def extract_results(search_word, condition=None): url = util.create_url(MAIN_URL, search_word, DELIMITER) try: soup = util.check_exceptions(url) product_table = soup.find('table', class_='table_content') result_links = product_table.find_all('a') except: return [] equips = [] for link in result_links: product_url = HOME_URL + link.get('href') product_page_content = BeautifulSoup( urllib.request.urlopen(product_url), "html.parser") title = ''.join( product_page_content.find( 'div', class_='product_left').find('h1').find_all(text=True)).strip() equipment = Result(title) equipment.url = product_url equipment.image_src = HOME_URL + product_page_content.find( 'img', { "id": "big_product_img" }).get('src') equipment.price = util.get_price( product_page_content.find('div', class_='pr_price2').find(text=True)) if util.is_valid_price(equipment.price): equips.append(equipment) if len(equips) >= 10: return equips return equips
def extract_results(search_word, condition=None): if condition == 'new': return [] url = util.create_url(MAIN_URL, search_word, DELIMITER) page = urllib.request.urlopen(url) soup = BeautifulSoup(page, "html.parser") product_grid = soup.find('ul', class_='Products_ul') try: total_equips = product_grid.find_all('li', class_='Products') except: return [] equips = [] for equip in total_equips: title = equip.find( 'div', class_='title').find('span').find(text=True).strip() equipment = Result(title) equipment.url = equip.find('a').get('href') equipment.image_src = equip.find('div', class_='Image').find('img').get('src') price_text = equip.find('div', class_='price').find_all(text=True) equipment.price = util.get_price(''.join(price_text)) if util.is_valid_price(equipment.price): equips.append(equipment) if len(equips) >= 10: return equips return equips
def extract_results(search_word, condition=None): if condition == 'new': return [] url = util.create_url(MAIN_URL, search_word, DELIMITER) path_to_chromedriver = 'chromedriver.exe' option = webdriver.ChromeOptions() option.add_argument('headless') browser = webdriver.Chrome(executable_path=path_to_chromedriver, options=option) browser.get(url) time.sleep(5) soup = BeautifulSoup(browser.page_source, "html.parser") product_grid = soup.find('ul', class_='product_list p_list') try: total_equips = product_grid.find_all( 'li', {"class": re.compile('p_list_item*')}) except: return [] equips = [] for equip in total_equips: title = equip.find('div', class_='title').find('a').text print(title, "t") equipment = Result(title) equipment.set_url(HOME_URL + equip.find('a').get('href')) equipment.set_image_src( HOME_URL + equip.find('div', class_='thumb').find('img').get('src')) price_text = equip.find('li', class_='price').text equipment.set_price(util.get_price(price_text)) if util.is_valid_price(equipment.get_price()): equips.append(equipment) if len(equips) == 10: return equips return equips
def extract_results(search_term, condition=None): if condition == 'new': return [] url = util.create_url(MAIN_URL, search_term, DELIMITER) page = urllib.request.urlopen(url) soup = BeautifulSoup(page, "html.parser") table = soup.find('div', class_='content-area') rows = table.findAll("article") results = [] for row in rows: new_result = Result( row.find('h1', class_="entry-title").find("a").text) result_url = row.find('a').get('href') #scrape from the result's page result_soup = BeautifulSoup(urllib.request.urlopen(result_url), "html.parser") new_result.set_url(result_url) new_result.set_price( util.get_price(result_soup.find('span', class_="amount").text)) new_result.set_image_src( result_soup.find('div', class_='images').find('img').get('src')) if util.is_valid_price(new_result.get_price()): results.append(new_result) if len(results) == 10: return results return results
def extract_results(search_word, condition=None): url = util.create_url(MAIN_URL, search_word, DELIMITER) url = url + '&cond=used' if condition != 'new' else url + '&cond=new' path_to_chromedriver = 'chromedriver.exe' option = webdriver.ChromeOptions() option.add_argument('headless') browser = webdriver.Chrome(executable_path=path_to_chromedriver, options=option) browser.get(url) time.sleep(5) soup = BeautifulSoup(browser.page_source, "html.parser") equips = [] try: sale_equips = soup.find_all('div', {'id': re.compile('listing_*')}) except: return equips for equip in sale_equips: title = equip.find('h4').find('a').text.strip() equipment = Result(title) equipment.set_url( HOME_URL + equip.find('div', class_='row').find('a').get('href')) equipment.set_image_src(equip.find('img').get('src')) equipment.set_price(util.get_price(equip.find('span', class_='price'))) if util.is_valid_price(equipment.get_price()): equips.append(equipment) if len(equips) == 10: return equips return equips
def extract_results(search_word, condition=None): url = util.create_url(MAIN_URL, search_word, DELIMITER) url = url if condition != "new" else url + '&Condition=5067' try: soup = util.check_exceptions(url) product_grid = soup.find('div', class_='pagebody') total_equips = product_grid.find_all('div', class_='el') except: return [] equips = [] for equip in total_equips: # items_details have names of generic device, model, manufacturer bundled together items_details = equip.find('div', class_='item_details').find_all(text=True) title = ' '.join(items_details).strip() equipment = Result(title) equipment.url = equip.find('div', class_='image').find( 'a', class_='item_number').get('href') equipment.image_src = equip.find('div', class_='image').find('img').get('src') price_text = equip.find('div', class_='price').find( text=True) if equip.find( 'span', class_='price_element') == None else equip.find( 'span', class_='price_element').find(text=True) equipment.price = util.get_price(''.join(price_text)) if util.is_valid_price(equipment.price): equips.append(equipment) if len(equips) >= 10: return equips return equips
def extract_results(search_word, condition=None): url = util.create_url(MAIN_URL, search_word, DELIMITER) page = urllib.request.urlopen(url) soup = BeautifulSoup(page, "html.parser") try: product_contents = soup.find_all('div', class_='products-mnbox-content') except: return [] results = [] for product_content in product_contents: equip_url = HOME_URL + product_content.find('a').get('href') models_site = BeautifulSoup(urllib.request.urlopen(equip_url), "html.parser") model_descriptions = models_site.find_all('td', class_='description') for re in model_descriptions: result = Result( re.find('div', { 'id': 'gaProductName' }).find(text=True).strip()) result.image_src = 'https:' + re.find( 'img', class_='lazy').get('data-original') result.url = HOME_URL + re.find('a').get('href') price_site = BeautifulSoup(urllib.request.urlopen(result.url), "html.parser") result.price = util.get_price( price_site.find('div', class_='price-box').find( 'span', class_='price-range').find(text=True)) if util.is_valid_price(result.price): results.append(result) if len(results) >= 10: return results return results
def extract_results(search_word, condition=None): if condition == "new": return [] url = util.create_url(MAIN_URL, search_word, DELIMITER) try: soup = util.check_exceptions(url) product_grid = soup.find('div', class_='v-product-grid') total_equips = product_grid.find_all('div', class_='v-product') except: return [] equips = [] for equip in total_equips: title = equip.find( 'a', class_='v-product__title productnamecolor colors_productname' ).find(text=True).strip() equipment = Result(title) equipment.url = equip.find('a', class_='v-product__img').get('href') equipment.image_src = 'http:' + equip.find('img').get('src') price_text = equip.find( 'div', class_='product_productprice').find_all(text=True) equipment.price = util.get_price(''.join(price_text)) if util.is_valid_price(equipment.price): equips.append(equipment) if len(equips) >= 10: return equips return equips
def extract_results(item,condition=None): #Url is extended based on condition if condition == "new": url = util.create_url(MAIN_URL,item,DELIMITER) + "&condition=468" else: url = util.create_url(MAIN_URL,item,DELIMITER) + "&condition=467,469" results=[] headers={ 'Host': 'www.labx.com', 'Connection': 'keep-alive', 'Accept': '*/*', 'Referer': 'https://www.labx.com/item/vacuum-pump-230-v-50-hz/12183467', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9', 'User-Agent': 'Chrome/80.0.3987.132, Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36', 'Sec-Fetch-Dest': 'script', 'Sec-Fetch-Mode': 'no-cors', 'Sec-Fetch-Site': 'same-site', 'Upgrade-Insecure-Requests':'1', 'x-runtime':'148ms'} #Check if page has data try: path_to_chromedriver = 'chromedriver.exe' option = webdriver.ChromeOptions() option.add_argument('headless') browser = webdriver.Chrome(executable_path = path_to_chromedriver,options=option) browser.get(url) time.sleep(5) soup = BeautifulSoup(browser.page_source,'html.parser') rows = soup.find_all('div',class_='product-card') except: return [] #Get 1st 10 results only print(len(rows)) for i in range(len(rows)): row = rows[i] new_result = Result(row.find('a', class_='card-title').text) new_result.url = HOME_URL + row.find('a').get('href') new_result.price = util.get_price(row.find(class_='price').get_text()) number = util.get_price(new_result.title) new_result.image_src = row.find('div', class_='card-img-top').find("img").get("src") if util.is_valid_price(new_result.price): results.append(new_result) if len(results) == 9: break return results
def test_token_endpoint_single_sign_on(test_client, confidential_client): """ GIVEN: Successful retrieval of tokens after sign in WHEN: another authorization request with different scope is executed THEN: auth code is issued without login screen being presented """ with freezegun.freeze_time("2020-03-14 12:00:00"): code, _ = authenticate_user(test_client, confidential_client, scope='openid') client_id = confidential_client['client_id'] client_secret = confidential_client['client_secret'] plaintext = f'{client_id}:{client_secret}' headers = { 'Authorization': 'Basic ' + str(base64.b64encode(plaintext.encode('utf-8')), 'utf-8') } post_data = { 'grant_type': 'authorization_code', 'code': code, 'scope': 'openid', 'client_id': client_id } response = test_client.post('/token', headers=headers, data=post_data) assert response.status_code == 200 assert response.headers['Content-Type'] == 'application/json' token = decode_token(response.json['access_token'], audience='https://localhost:5000/') assert token['aud'] == 'https://localhost:5000/' assert response.json['refresh_token'] token = decode_token(response.json['id_token'], audience=client_id) assert token['aud'] == client_id with freezegun.freeze_time("2020-03-14 12:45:00"): # issue authorization request for different scope url = create_url('/authorize', client_id=confidential_client['client_id'], redirect_uri=confidential_client['redirect_uris'][0], response_type='code', state='96f07e0b-992a-4b5e-a61a-228bd9cfad35', scope='read write') response = test_client.get(url) # expect code to be issued without prompt for login assert response.status_code == 302 parsed_uri = urlparse(response.headers['Location']) assert '{uri.scheme}://{uri.netloc}{uri.path}'.format( uri=parsed_uri) == confidential_client['redirect_uris'][0] query_params = dict( parse_qsl(urlsplit(response.headers['Location']).query)) assert query_params['code'] assert query_params['state'] == '96f07e0b-992a-4b5e-a61a-228bd9cfad35'
def test_invalid_client_id_results_in_error(test_client): """ GIVEN: GET request to the /authorize endpoint WHEN: client_id query parameter is not registered THEN: response is 400 Bad Request """ url = create_url('/authorize', client_id='unknown_client', response_type='code') response = test_client.get(url) assert response.status_code == 400
def test_missing_client_id_results_in_error(test_client): """ GIVEN: GET request to the /authorize endpoint WHEN: client_id query parameter is missing THEN: response is 400 Bad Request """ url = create_url('/authorize') response = test_client.get(url) assert response.status_code == 400
def test_invalid_redirect_uri_results_in_error(test_client, confidential_client): """ GIVEN: GET request to the /authorize endpoint WHEN: redirect_uri query parameter does not match uri registered in client THEN: response is 400 Bad Request """ client_id = confidential_client['client_id'] url = create_url('/authorize', client_id=client_id, response_type='code', redirect_uri='xyz') response = test_client.get(url) assert response.status_code == 400
def extract_results(item, requested_condition=None): path_to_chromedriver = 'chromedriver.exe' option = webdriver.ChromeOptions() option.add_argument('headless') browser = webdriver.Chrome(executable_path=path_to_chromedriver, options=option) url = util.create_url(MAIN_URL, item, DELIMITER) browser.get(url) time.sleep(5) soup = BeautifulSoup(browser.page_source, 'html.parser') #print(soup,"soup") results = [] #Check for data try: table = soup.find('div', class_='search results') except: return results #Get 1st 10 results only rows = table.find_all('li', class_='item product product-item') for i in range(len(rows)): row = rows[i] new_result = Result( row.find('a', class_='product-item-link').text.strip()) #print(new_result.title,"t") new_result.url = row.find('a').get('href') new_result.price = util.get_price(str(row.find('span',class_='price').find(text=True))\ .encode('utf-8')[1:]) new_result.image_src = row.find('img').get('src') browser.get(new_result.url) new_soup = BeautifulSoup(browser.page_source, "html.parser") condition = new_soup.find('div', class_='product attribute description').find( 'div', class_='value').text conditions = ['new', 'New', 'used', 'Used'] bad_condition_types = [ 'bad', 'poor', 'not working', 'broken', 'not functional' ] #Check for matching conditions for word in conditions: if word in condition: if (requested_condition == None and word.lower() == 'used') or \ (requested_condition != None and requested_condition.lower()== word.lower()): #Only add working good equipment for type_word in bad_condition_types: if type_word not in condition and util.is_valid_price( new_result.price): results.append(new_result) break if len(results) == 10: return results return results
def test_unsupported_response_type_results_in_redirect(test_client, confidential_client): """ GIVEN: GET request to the /authorize endpoint WHEN: response_type query parameter is not supported THEN: response is 302 Redirect with error query parameter """ client = confidential_client url = create_url('/authorize', client_id=client['client_id'], response_type='token', redirect_uri=client['redirect_uris'][0], state='96f07e0b-992a-4b5e-a61a-228bd9cfad35') response = test_client.get(url) assert response.status_code == 302 query_params = dict(parse_qsl(urlsplit(response.headers['Location']).query)) assert query_params['error'] == 'unsupported_response_type'
def extract_results(search_term, condition=None): url='' if condition=='new': url = util.create_url(MAIN_URL, search_term, DELIMITER) + '&LH_BIN=1' + NEW else: url = util.create_url(MAIN_URL, search_term, DELIMITER) + '&LH_BIN=1' + USED page=urllib2.urlopen(url) soup = BeautifulSoup(page,"html.parser") table=soup.find('div', id='ResultSetItems') try: rows=table.findAll('li', class_='sresult lvresult clearfix li') except: return [] results=[] for row in rows: new_result=Result(row.find('h3', class_="lvtitle").find(text=True)) new_result.url=row.find('h3', class_="lvtitle").find('a').get('href') new_result.image_src=row.find('img', class_='img').get('src') new_result.price=util.get_price(row.find('li', class_="lvprice prc").find('span').find(text=True)) if util.is_valid_price(new_result.price): results.append(new_result) return results
def extract_results(search_term, condition=None): url='' if condition=='new': url = util.create_url(MAIN_URL, search_term, DELIMITER) + '&rt=nc' + NEW else: url = util.create_url(MAIN_URL, search_term, DELIMITER) + '&rt=nc' + USED page=urllib.request.urlopen(url) soup = BeautifulSoup(page,"html.parser") table=soup.find('div',class_='srp-river-results clearfix') try: rows=table.findAll('div', class_='s-item__wrapper clearfix') except: return [] results=[] for row in rows: new_result=Result(row.find('img', class_='s-item__image-img').get('alt')) new_result.set_url(row.find('a').get('href')) new_result.set_image_src(row.find('img', class_='s-item__image-img').get('src')) new_result.set_price(util.get_price(row.find('span', class_="s-item__price").text)) if util.is_valid_price(new_result.get_price()): results.append(new_result) return results
def test_confidential_client_without_code_challenge_results_in_error(test_client, public_client): """ GIVEN: GET request to the /authorize endpoint WHEN: client_id identifies a public client and code_challenge query parameter is missing THEN: response is 302 Redirect with error query parameter (PKCE required for public clients) """ client = public_client url = create_url('/authorize', client_id=client['client_id'], redirect_uri=client['redirect_uris'][0], response_type='code', state='96f07e0b-992a-4b5e-a61a-228bd9cfad35') response = test_client.get(url) assert response.status_code == 302 query_params = dict(parse_qsl(urlsplit(response.headers['Location']).query)) assert query_params['error'] == 'invalid_request' assert query_params['error_description'] == 'code challenge required'
def extract_results(search_term, condition=None): if condition == 'new': return [] headers = { 'Host': 'www.biosurplus.com', 'Connection': 'keep-alive', 'Accept': 'text/html', 'Referer': 'http://www.biosurplus.com/store/search/?per_page=24&product_search_q=Beckman+Coulter+Biomek+Workstation', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'en-US,en;q=0.8', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36' } url = util.create_url(MAIN_URL, search_term, DELIMITER) req = urllib2.Request(url, headers=headers) page = urllib2.urlopen(req) #This page is g-zipped. Unzip it stringified_data = StringIO.StringIO(page.read()) unzipped_page = gzip.GzipFile(fileobj=stringified_data) soup = BeautifulSoup(unzipped_page, "html.parser") table = soup.find('div', class_='product_browse') try: #check if the table rows = table.findAll("div", class_="fps_featured_product") except: return [] results = [] for row in rows: manufacturer = row.find('p', class_="fps_fp_description").find(text=True) title = row.find('h2', class_="fps_fp_heading").find("a").find(text=True) new_result = Result(manufacturer + " " + title) new_result.price = util.get_price( row.find('p', class_='product_price').find(text=True)) new_result.image_src = row.find( 'div', class_="fps_fp_image_inner").find('img').get('src') new_result.url = "www.biosurplus.com" + row.find('a').get('href') if util.is_valid_price(new_result.price): results.append(new_result) if len(results) == 10: return results return results
def test_invalid_scope_returns_error(test_client, confidential_client): """ GIVEN: GET request to the /authorize endpoint WHEN: query parameters are specified, scope is invalid THEN: response is 302 Redirect with error parameters """ client = confidential_client url = create_url('/authorize', client_id=client['client_id'], redirect_uri=client['redirect_uris'][0], response_type='code', state='96f07e0b-992a-4b5e-a61a-228bd9cfad35', scope='scope1 scope2') response = test_client.get(url) assert response.status_code == 302 query_params = dict(parse_qsl(urlsplit(response.headers['Location']).query)) assert query_params['error'] == 'invalid_scope' assert query_params['error_description'] == 'One or more scopes are invalid'
def extract_results(search_term, condition=None): url = util.create_url(MAIN_URL, search_term, DELIMITER) if condition == 'new': url = url + '&tbs=vw:l,mr:1,new:1' else: url = url + '&tbs=vw:l,mr:1,new:3' headers = { 'Connection': 'keep-alive', 'Accept': 'text/html', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36' } r = requests.get(url, timeout=5, headers=headers) soup = BeautifulSoup(r.content, "html.parser") table = soup.find('div', class_='sh-pr__product-results') try: rows = table.findAll('div', class_='sh-dlr__list-result') print(len(rows), "length") except: return [] results = [] for row in rows: if condition != 'new': condition_text = str(row.find('span', class_='h1Wfwb O8U6h').text) if (('used' not in condition_text) and ('refurbished' not in condition_text)): #skip over items that do not say "used" when searching for used items continue if "eBay" in str( row.find('a', class_='shntl hy2WroIfzrX__merchant-name').text): #many google results overlap with eBay. Do not include these. continue new_result = Result(row.find('h3', class_='xsRiS').text) new_result.set_url(HOME_URL + row.find('a').get('href')) new_result.set_price( util.get_price(row.find('span', 'aria--hidden' == 'true').text)) # if condition!='new': # new_result.set_image_src(row.find('img',class_='TL92Hc').get('src')) #r = requests.get(new_result.get_url(),timeout=5,headers=headers) #new_soup = BeautifulSoup(r.content,"html.parser") #new_result.set_image_src(new_soup.find('img',class_='sh-div__image sh-div__current').get('src')) if util.is_valid_price(new_result.get_price()): results.append(new_result) if len(results) == 10: return results return results
def test_missing_query_parameters_not_reflected_in_response(test_client, confidential_client): """ GIVEN: GET request to the /authorize endpoint WHEN: query parameters are specified, but no 'state' or 'nonce' query parameters THEN: response is 200 OK and no hidden input fields with name 'state' or 'nonce' in the HTML """ client = confidential_client url = create_url('/authorize', client_id=client['client_id'], redirect_uri=client['redirect_uris'][0], response_type='code') response = test_client.get(url) soup = BeautifulSoup(response.data, features="html.parser") assert response.status_code == 200 assert soup.find('input', dict(name='state')) is None assert soup.find('scope', dict(name='scope')) is None assert soup.find('input', dict(name='nonce')) is None
def extract_results(item, requested_condition=None): url = util.create_url(MAIN_URL, item, DELIMITER) r = requests.get(url, timeout=3) # page = urllib2.urlopen(create_url(MAIN_URL,item,DELIMITER)) soup = BeautifulSoup(r.content, "html.parser") results = [] #Check for data try: table = soup.find_all('li', class_='item') except: return results #Get 1st 10 results only for i in range(len(table)): row = table[i] new_result = Result(row.find('a').get('title')) new_result.url = row.find('a').get('href') new_result.price = util.get_price(str(row.find('span',class_='price').find(text=True))\ .encode('utf-8')[1:]) new_result.image_src = row.find('img').get('src') specific_page = urllib2.urlopen(new_result.url) new_soup = BeautifulSoup(specific_page, "html.parser") condition = new_soup.find('div', class_='product-collateral').find( 'div', class_='std').text conditions = ['new', 'New', 'used', 'Used'] bad_condition_types = [ 'bad', 'poor', 'not working', 'broken', 'not functional' ] #Check for matching conditions for word in conditions: if word in condition: if (requested_condition == None and word.lower() == 'used') or \ (requested_condition != None and requested_condition.lower()== word.lower()): #Only add working good equipment for type_word in bad_condition_types: if type_word not in condition and util.is_valid_price( new_result.price): results.append(new_result) if len(results) == 10: return results return results
def extract_results(search_term, condition=None): if condition == 'new': return [] url = util.create_url(MAIN_URL, search_term, DELIMITER) page = urllib.request.urlopen(url) soup = BeautifulSoup(page, "html.parser") table = soup.find('div', class_='search-results-container') try: rows = table.findAll("div", class_="card-body") except: return [] results = [] for row in rows: new_result = Result( row.find('h6', class_="title listing-title-padding").text) new_result.set_price( util.get_price(row.find('span', class_="price price-amount"))) new_result.set_url(row.find('a').get('href')) new_result.set_image_src(row.find('img').get('src')) if util.is_valid_price(new_result.get_price()): results.append(new_result) return results
def extract_results(search_term, condition=None): url = util.create_url(MAIN_URL, search_term, DELIMITER) url = url + '&tbs=vw:l,mr:1,new:1' if condition == 'new' else url headers = { 'Connection': 'keep-alive', 'Accept': 'text/html', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36' } # req =urllib2.Request(url, headers=headers) # page=urllib2.urlopen(req) r = requests.get(url, timeout=5, headers=headers) soup = BeautifulSoup(r.content, "html.parser") table = soup.find('div', id='search') try: rows = table.findAll('div', class_='psli') except: return [] results = [] for row in rows: if condition != 'new' and ('used' not in str( row.find('span', class_='price'))): #skip over items that do not say "used" when searching for used items continue if "eBay" in str(row.find('div', class_='_tyb shop__secondary').text): #many google results overlap with eBay. Do not include these. continue new_result = Result(row.find('a', class_='pstl').find(text=True)) new_result.url = HOME_URL + row.find('a', class_='pstl').get('href') new_result.price = util.get_price( row.find('span', class_='price').b.find(text=True)) if util.is_valid_price(new_result.price): results.append(new_result) return results
def extract_results(search_word, condition=None): url = util.create_url(MAIN_URL, search_word, DELIMITER) url = url + '&cond=used' if condition != 'new' else url + '&cond=new' page = urllib.request.urlopen(url) soup = BeautifulSoup(page, "html.parser") product_grid = soup.find('div', id='totalListings') equips = [] try: sale_equips = product_grid.find_all('div', class_='listings_table_d') except: try: sale_equips = product_grid.find_all('div', class_='listings_table_d ') except: return [] for equip in sale_equips: title = ''.join( equip.find('dt', class_='listing_head').find_all(text=True)).strip() equipment = Result(title) equipment.url = 'http://www.dotmed.com' + equip.find( 'dt', class_='listing_head').find('a').get('href') img_tag = equip.find('dd', class_='img') if img_tag != None: equipment.image_src = img_tag.find('img').get('src') price_tag = equip.find('dl', class_='datePosted').find('p') #filters out products with no price or with foreign prices if price_tag != None and 'USD' in ''.join( price_tag.find_all(text=True)): equipment.price = util.get_price(''.join( price_tag.find_all(text=True))) if util.is_valid_price(equipment.price): equips.append(equipment) if len(equips) >= 10: return equips return equips
def extract_results(search_term, condition=None): if condition == 'new': return [] url = util.create_url(MAIN_URL, search_term, DELIMITER) page = urllib.request.urlopen(url) soup = BeautifulSoup(page, "html.parser") table = soup.find('div', id='tbl-listings') try: rows = table.findAll("div", class_="search-row") rows[0].find('h3', class_="listing-title").find("a").find(text=True) except: return [] results = [] for row in rows: new_result = Result( row.find('h3', class_="listing-title").find("a").find(text=True)) new_result.price = util.get_price( row.find('span', class_="listing-price").find(text=True)) new_result.url = row.find('a').get('href') new_result.image_src = row.find('img', class_="search-thumbnail").get('src') if util.is_valid_price(new_result.price): results.append(new_result) return results