def test_content_type_error(self): link = 'https://ibb.co/kh13H5P' with self.assertRaises(ContentTypeError) as context: AmazonCaptcha.fromlink(link) self.assertTrue('is not supported as a Content-Type' in str(context.exception))
def solve_captcha(session, form_element, pdp_url: str): log.warning("Encountered CAPTCHA. Attempting to solve.") # Starting from the form, get the inputs and image captcha_images = form_element.xpath( '//img[contains(@src, "amazon.com/captcha/")]') if captcha_images: link = captcha_images[0].attrib["src"] # link = 'https://images-na.ssl-images-amazon.com/captcha/usvmgloq/Captcha_kwrrnqwkph.jpg' captcha = AmazonCaptcha.fromlink(link) solution = captcha.solve() if solution: form_inputs = form_element.xpath(".//input") input_dict = {} for form_input in form_inputs: if form_input.type == "text": input_dict[form_input.name] = solution else: input_dict[form_input.name] = form_input.value f = furl( pdp_url) # Use the original URL to get the schema and host f = f.set(path=form_element.attrib["action"]) f.add(args=input_dict) response = session.get(f.furl) log.debug(f"Captcha response was {response.status_code}") return response.text, response.status_code return html.fromstring(""), 404
def test_fromlink_with_predefined_undolvable_captcha_and_keep_logs(self): link = 'https://i.ibb.co/Cn2J1mS/notsolved.jpg' captcha = AmazonCaptcha.fromlink(link) solution = captcha.solve(keep_logs=True) self.assertIn('not-solved-captcha.log', os.listdir())
def test_fromlink_with_predefined_undolvable_captcha(self): link = 'https://i.ibb.co/Cn2J1mS/notsolved.jpg' captcha = AmazonCaptcha.fromlink(link) solution = captcha.solve() self.assertEqual(solution, 'Not solved')
def run(self): while True: # calc next time next_time_monitor = time.time() + self.delay_monitor self.logger.info('checking stock') try: # randomize user agent user_agent = self.get_random_user_agent() # randomize proxy proxies = self.get_random_proxy() # send ajax request start_time = time.time() r = requests.get( f"{AMAZON_SMILE_BASE_URL}/gp/aod/ajax?asin={self.item['asin']}", cookies={'session-id': ''}, headers={'user-agent': user_agent}, proxies=proxies) self.logger.debug( f'ajax request took {int(1000 * (time.time() - start_time))} ms' ) self.logger.debug( f'ajax request returned status code {r.status_code}') if r.status_code == 200: offer_divs = html.fromstring(r.text).xpath( "//div[@id='aod-sticky-pinned-offer'] | //div[@id='aod-offer']" ) for offer_div in offer_divs: price_spans = offer_div.xpath( ".//span[@class='a-price-whole']") if price_spans: price = int(price_spans[0].text.replace(',', '')) self.logger.info(f'offer for ${price}') # check price if self.item['min_price'] <= price <= self.item[ 'max_price']: self.logger.success('price in range') # get the offering id offering_id = offer_div.xpath( ".//input[@name='offeringID.1']")[0].value self.logger.debug( f'offering_id = {offering_id}') # build data data = { 'offerListing.1': offering_id, 'quantity.1': '1', } # create session s = requests.Session() s.headers = { 'content-type': 'application/x-www-form-urlencoded', 'x-amz-checkout-csrf-token': self.cookies['session-id'], } for n, v in self.cookies.items(): s.cookies.set(n, v) # calc timeout time timeout_time = time.time() + self.timeout_buy while True: # calc next time next_time_buy = time.time( ) + self.delay_buy self.logger.info('trying to cart') # randomize user agent s.headers.update({ 'user-agent': get_random_user_agent() }) # randomize proxy s.proxies = get_random_proxy() # send turbo init request start_time = time.time() r = s.post( f'{AMAZON_SMILE_BASE_URL}/checkout/turbo-initiate?pipelineType=turbo', data) self.logger.debug( f'turbo init request took {calc_time_delta(start_time)} ms' ) self.logger.debug( f'turbo init request returned status code {r.status_code}' ) if r.status_code == 200: if r.text != ' ': self.logger.success('carted') # check for captcha captcha_forms = html.fromstring( r.text ).xpath( '//form[contains(@action, "validateCaptcha")]' ) if captcha_forms: self.logger.info('got captcha') # try to solve captcha captcha_form = captcha_forms[0] captcha_img_link = captcha_form.xpath( '//img[contains(@src, "amazon.com/captcha/")]' )[0].attrib['src'] captcha_solution = AmazonCaptcha.fromlink( captcha_img_link).solve() # check for captcha solution if captcha_solution: self.logger.success( 'solved captcha') self.logger.debug( f'captcha_solution = {captcha_solution}' ) # send validate captcha request captcha_inputs = captcha_form.xpath( './/input') args = { captcha_input.name: captcha_solution if captcha_input.type == 'text' else captcha_input.value for captcha_input in captcha_inputs } f = furl( AMAZON_SMILE_BASE_URL) f.set(path=captcha_form. attrib['action']) f.add(args=args) start_time = time.time() r = s.get(f.url) self.logger.debug( f'validate captcha request took {calc_time_delta(start_time)} ms' ) self.logger.debug( f'validate captcha request returned status code {r.status_code}' ) self.check_out(r.text, s) # no captcha solution else: self.logger.warning( 'could not solve captcha' ) # no captcha else: self.check_out(r.text, s) # no stock else: self.logger.warning( 'could not cart') # check for timeout if timeout_time - time.time() < 0: self.logger.info( 'timed out trying to buy') break sleep_time_left(next_time_buy) except Exception as e: self.logger.error(e) sleep_time_left(next_time_monitor)
def do_GET(self): self._set_response() captcha = AmazonCaptcha.fromlink(self.path[1:]) solution = captcha.solve() self.wfile.write(solution.encode('utf-8'))
import requests import lxml from bs4 import BeautifulSoup from amazoncaptcha import AmazonCaptcha amazon_url = "https://www.amazon.com/Instant-Pot-Duo-Evo-Plus/dp/B07W55DDFB/ref=sr_1_1?qid=1597662463" amz_headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0", "Accept-Language": "en-US,en;q=0.5" } response = requests.get(amazon_url, headers=amz_headers) captcha_webpage = response.text soup = BeautifulSoup(captcha_webpage, "lxml") captcha_link = soup.select(".a-text-center>img")[0].get("src") print(captcha_link) captcha = AmazonCaptcha.fromlink(captcha_link) solution = captcha.solve() print(solution) # <div class="a-row a-text-center"> # <img src="https://images-na.ssl-images-amazon.com/captcha/twhhswbk/Captcha_fwgzoazcal.jpg"> # </div> #print(response.text)