if __name__ == '__main__': logger.info('Starting scrape job for rivalry table data.') # initialize headless selenium webdriver chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') driver = webdriver.Chrome(chrome_options=chrome_options) # load website / raw table data driver.get(RIVALRY_URL) time.sleep(5) # give webpage time to load table driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # scroll down to load dynamic content time.sleep(1) table = driver.find_element_by_id('__nuxt') table = table.text.split('\n') table = transcribe_table_data(table) logger.info('Finished processing of %s rows.', len(table)) # insert to db if ENVIRONMENT == 'PRODUCTION' and len(table) > 0: logger.info('Inserting %s rows into database.', len(table)) postgres_db_insert(table, DB_CREDENTIALS) elif len(table) == 0: logger.warning('EGB data scrape produced 0 data points.') else: logger.info('Produced data: %s', table)
time.sleep(5) # give webpage time to load table driver.execute_script("window.scrollTo(0, document.body.scrollHeight);" ) # scroll down to load dynamic content time.sleep(1) # transcribe data table table = driver.find_element_by_id('betting__container').text soup = BeautifulSoup(table, 'html.parser') table_text = remove_header(soup.text) table_text = insert_row_breaks(table_text) table_rows = table_text.split('_ROW_BREAK_') formatted_data = transcribe_table_data(table_rows)[1:] if len(formatted_data) == 1: logger.info('Finished processing %s row', len(formatted_data)) else: logger.info('Finished processing %s rows', len(formatted_data)) # insert to db if ENVIRONMENT == "PRODUCTION": if len(formatted_data) > 0: logger.info('Inserting %s rows into database', len(formatted_data)) postgres_db_insert(formatted_data, DB_CREDENTIALS) else: logger.warning('GGBET data scrape produced 0 data points') elif ENVIRONMENT == "DEVELOPMENT": logger.info('Produced data: %s', table) else: logger.warning("ENVIRONMENT environment variable not set correctly") driver.quit()