def __init__(self): config = configparser.ConfigParser() config.read('config.ini') ms_username = config.get('Credentials', 'morningstar_username') ms_password = config.get('Credentials', 'morningstar_password') # we only want stocks with market cap > 50,000 # TODO: add this value to the config file! self._google_scraper = GoogleScraper(50000000) self._asx_scraper = AsxScraper() self._ms_scraper = MorningStarScraper(ms_username, ms_password) self._ms_scraper.login()
def execute(self, args): BINARY_PATH = os.getenv("SCRAPER_CHROME_BINARY_PATH") DRIVER_PATH = os.getenv("SCRAPER_CHROME_DRIVER_PATH") if BINARY_PATH is None or DRIVER_PATH is None: print( "Error: SCRAPER_CHROME_BINARY_PATH and SCRAPER_CHROME_DRIVER_PATH environment variables must be set to Selenium Chrome Driver settings." ) return binary_path = BINARY_PATH driver_path = DRIVER_PATH # Parse arguments parser = argparse.ArgumentParser( description="Scrape a website for images using searchterms.") parser.add_argument( "site", type=str, help= "The site to search in. Can be 'bigstock', 'google' or 'shutterstock'", ) parser.add_argument("searchterm", type=str, help="The terms to search for.") parser.add_argument( "pagecount", type=int, default=100, help="The total number of pages to scrape. Default is 100.", ) parser.add_argument( "start_page", type=int, default=1, help="The page to start the search on. Default is 1.", ) parser.add_argument( "image_size", type=str, default="regular", help= "The image size that should be downloaded. Can be 'small', 'regular' or 'large'. Default is 'regular'.", ) arguments = parser.parse_args() self.site = arguments.site self.searchterm = arguments.searchterm self.pagecount = arguments.pagecount self.start_page = arguments.start_page self.image_size = arguments.image_size search_options = { "searchterm": self.searchterm, "pagecount": self.pagecount, "start_page": self.start_page, "image_size": self.image_size, } webdriver_options = { "chrome_binary_path": binary_path, "chrome_driver_path": driver_path, } # Select proper scraper based on received ards if self.site == "google": self.scraper = GoogleScraper(self) self.scraper.run(search_options, webdriver_options) elif self.site == "bigstock": self.scraper = BigStockScraper(self) self.scraper.run(search_options, webdriver_options) elif self.site == "shutterstock": self.scraper = ShutterStockScraper(self) self.scraper.run(search_options, webdriver_options) else: print(f"Error: no scraper found for website '{self.site}'.")