def _get_title_from_product_page(self, page: BeautifulSoup) -> Optional[Text]: title_box = page.find('h1', {'class': 'prod-ProductTitle'}) if not title_box: log.error('Failed to find title box') return return title_box.text
def _get_search_results(self, page: BeautifulSoup) -> List[Tag]: result_box = page.find('div', {'class': 'search-result-listview-items'}) if not result_box: log.error('Failed to find search result box') return [] return result_box.findAll( 'div', {'data-automation-id': 'search-result-listview-item'})
def _get_price_from_search_result(self, item: Tag) -> Optional[Text]: price_box = item.find('li', {'class': 'price-current'}) if not price_box: log.error('Failed to find price box') return price_strong = price_box.find('strong') return price_strong.text if price_strong else None
def _get_price_from_search_result(self, item: Tag) -> Optional[Text]: price_box = item.find('div', {'class': 'priceView-customer-price'}) if not price_box: log.error('Failed to get price box') return price_span = price_box.find('span') if not price_span: log.error('Failed to find price span') return return price_span.text
def _get_url_from_search_result(self, item: Tag) -> Optional[Text]: title_link_a = item.find('a', {'class': 'product-title-link'}) if not title_link_a: log.error('Failed to find title link') return link = title_link_a['href'] if 'walmart.com' not in link: link = 'https://walmart.com/' + link return link
def _get_title_from_search_result(self, item: Tag) -> Optional[Text]: title_box = item.find('h4', {'class': 'sku-header'}) if not title_box: log.error('Failed to find title box') return title_a = title_box.find('a') if not title_a: log.error('Failed to find title a tag') return return title_a.text
def _get_price_from_product_page(self, page: BeautifulSoup) -> Optional[Text]: price_box = page.find('div', {'class': 'priceView-customer-price'}) if not price_box: log.error('Failed to find price box') return price_span = price_box.find('span') if not price_span: log.error('Failed to find Price Span') return return price_span.text.strip()
def _get_sku_from_product_page(self, page: BeautifulSoup) -> Optional[Text]: sku_box = page.find('div', {'class': 'sku product-data'}) if not sku_box: log.error('Failed to find SKU box') return sku_value = sku_box.find('span', {'class': 'product-data-value'}) if not sku_value: log.error('Failed to find SKU value') return return sku_value.text.strip()
def _get_price_from_search_result(self, item: Tag) -> Optional[Text]: price_block = item.find('span', {'class': 'price-main-block'}) if not price_block: log.error('Failed to find price Block') return price_group = price_block.find('span', {'class', 'price-group'}) if not price_group: log.error('Failed to find price group') return return price_group.text
def _get_url_from_search_result(self, item: Tag) -> Optional[Text]: info_box = item.find('div', {'class': 'item-info'}) if not info_box: log.error('Did not locate product info box') return None product_link = info_box.find('a', {'class': 'item-title'}) if not product_link: log.error('Did not locate product link') return None return product_link['href']
def _is_in_stock_product_page(self, page: BeautifulSoup) -> bool: add_to_cart_box = page.find('div', {'class': 'fulfillment-add-to-cart-button'}) if not add_to_cart_box: log.error('Failed to find add to cart button') return False add_to_cart_btn = add_to_cart_box.find('button') if not add_to_cart_btn: log.error('Failed to find add to cart button') return False if add_to_cart_btn.text.lower() == 'add to cart': return True return False
def _is_in_stock_search_result(self, page: BeautifulSoup) -> bool: price_block = page.find('div', {'class': 'price-block'}) if not price_block: log.error('Failed to find price block') return False add_to_cart_btn = price_block.find('button', {'class': 'add-to-cart-button'}) if not add_to_cart_btn: log.error('Failed to find add to cart button') return False if add_to_cart_btn.text.lower() == 'add to cart': return True return False
def _is_in_stock_product_page(self, page: BeautifulSoup) -> bool: product_overview = page.find('div', id='product-overview') if not product_overview: log.error('Failed to find product overview') return False for btn in product_overview.findAll( 'button', {'data-tl-id': 'ProductPrimaryCTA-cta_add_to_cart_button'}): btn_text = btn.text.strip('\n') if btn_text.lower() == 'add to cart': return True return False
def check_search_pages(self) -> List[ProductInfo]: all_results = [] for page in self.search_pages: log.info('Checking search page: %s', page) page_source = self._load_page(page) if not page_source: log.error('Did not get page source. Skipping %s', page) continue page = BeautifulSoup(page_source, 'html.parser') all_results += self.parse_search_page(page) for r in all_results: log.debug(r) return all_results
def _get_url_from_search_result(self, item: Tag) -> Optional[Text]: title_box = item.find('h4', {'class': 'sku-header'}) if not title_box: log.error('Failed to find title box') return title_a = title_box.find('a') if not title_a: log.error('Failed to find title a tag') return url = title_a['href'] if 'bestbuy.com' not in url: url = 'https://bestbuy.com' + url return url
def notification_agent_factory(configs: List[dict]) -> List[NotificationAgent]: agents = [] for config in configs: if config['name'].lower() not in AGENT_NAME_MAP: log.error('Unable to locate notification agent with name %s', config['name']) continue try: agents.append(AGENT_NAME_MAP[config['name']](config)) except InvalidNotificationAgentConfig: log.error('Invalid notification agent config') return agents
def _load_page(self, url: Text, user_agent=None) -> Optional[Text]: ua = UserAgent(cache=False) try: headers = {'User-Agent': user_agent or ua.chrome} log.debug('User Agent: %s', headers['User-Agent']) r = requests.get(url, headers=headers) except (ConnectionError, Timeout): log.error('Failed to load URL: %s', url) return if r.status_code != 200: log.error('Unexpected Status Code %s for URL %s', r.status_code, url) return return r.text
def _get_sku_from_product_page(self, page: BeautifulSoup) -> Optional[Text]: product_overview = page.find('div', id='product-overview') if not product_overview: log.error('Failed to find product overview') return item_number_box = product_overview.find('div', {'class': 'wm-item-number'}) if not item_number_box: log.error('Failed to find item number box') return sku = item_number_box.text sku = sku.replace('Walmart # ', '').replace(' ', '').strip('\n') return sku
def _is_in_stock_search_result(self, search_result: Tag) -> bool: btn_box = search_result.find('div', {'class': 'item-button-area'}) if not btn_box: log.error('Did not find button box') return False btn = btn_box.find('button') if not btn: log.error('Did not find button') return False log.debug('Button Text: %s', btn.text) if btn.text.lower().strip() == 'add to cart': return True else: return False
def _load_page(self, url: Text) -> Optional[Text]: try: headers = {'User-Agent': random.choice(USER_AGENTS)} r = requests.get( url, headers=headers, cookies={'next-day': 'null|true|true|null|1608350760'}) except (ConnectionError, Timeout): log.error('Failed to load URL: %s', url) return if r.status_code != 200: log.error('Unexpected Status Code %s for URL %s', r.status_code, url) return return r.text
def _get_price_from_product_page(self, page: BeautifulSoup) -> Optional[Text]: product_overview = page.find('div', id='product-overview') if not product_overview: log.error('Failed to find product overview') return price_section = page.find('section', {'class': 'prod-PriceSection'}) if not price_section: log.error('Failed to find price section') return price_section = price_section.find('span', {'class': 'price'}) price = '' price += price_section.find('span', {'class': 'price-currency'}).text price += price_section.find('span', { 'class': 'price-characteristic' }).text price += price_section.find('span', {'class': 'price-mark'}).text price += price_section.find('span', {'class': 'price-mantissa'}).text return price
def load_configs_from_dir(config_dir: Text) -> List[ParserConfig]: """ Take a given directory and attempt to load all .json files :rtype: List[ParserConfig] """ configs = [] if not os.path.isdir(config_dir): raise InvalidConfigDirectory(f'Invalid config directory: {config_dir}') for root, dirs, files in os.walk(config_dir): for file in files: if os.path.splitext(file)[1] == '.json': try: with open(os.path.join(root, file), 'r') as f: config_data = json.loads(f.read()) except JSONDecodeError as e: log.error('Invalid json in %s. %s', file, str(e)) continue configs.append(ParserConfig(**config_data)) return configs
def _get_sku_from_product_page(self, page: BeautifulSoup) -> Optional[Text]: breadcrumb_box = page.find('ol', {'class': 'breadcrumb'}) if not breadcrumb_box: log.error('Failed to get breadcrumb box from page') return sku_box = breadcrumb_box.find('li', {'class': 'is-current'}) if not sku_box: log.error('Failed to find SKU box') return sku_text_box = sku_box.find('em') if not sku_text_box: log.error('Failed to find SKU text box') return sku = sku_text_box.text log.debug('Found SKU %s', sku) return sku
def _get_price_from_product_page(self, page: BeautifulSoup) -> Optional[Text]: buy_box = page.find('div', {'class': 'product-buy-box'}) if not buy_box: log.error('Failed to find buy box') return price_box = buy_box.find('li', {'class': 'price-current'}) if not price_box: log.error('Failed to find price box') return price_text_box = price_box.find('strong') if not price_text_box: log.error('Failed to find price text box') return price = price_text_box.text log.debug('Price is %s', price) return price
import argparse import os import sys from stockstalker.common.logging import log from stockstalker.parsers.parser_helpers import parser_factory from stockstalker.util.helpers import load_configs_from_dir if __name__ == '__main__': parser = argparse.ArgumentParser( description="A tool to check stock of products") parser.add_argument('--config-dir', default=os.path.join(os.getcwd(), 'configs'), dest='config_dir') args = parser.parse_args() configs = load_configs_from_dir(args.config_dir) if not configs: log.error('No configs loaded') sys.exit(1) parsers = [] for config in configs: parsers.append(parser_factory(config)) parsers[0].check_stock() print('')
def _get_title_from_search_result(self, item: Tag) -> Optional[Text]: title_link_a = item.find('a', {'class': 'product-title-link'}) if not title_link_a: log.error('Failed to find title link') return return title_link_a.text