def login(): if request.method == 'POST': print("recieve post request") response = Response() request_object = request.json regex = re.compile( r'^(?:http|ftp)s?://' # http:// or https:// r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... r'localhost|' # localhost... r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip r'(?::\d+)?' # optional port r'(?:/?|[/?]\S+)$', re.IGNORECASE) if (not re.match(regex, request_object.get("url", ""))): abort( make_response( jsonify( response.get_response(Constants.URL_NOT_FOUND, Constants.URL_NOT_FOUND)), response.get_code(Constants.URL_NOT_FOUND))) print("url is valid") return jsonify( SeleniumCrawler().get_page(url=request_object.get("url", ""))) else: return send_file('out.pdf')
def get_page(self, url): response = Response() try: # Initilized the chrome driver print("Initilized the chrome driver") chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--window-size=1420,1080') chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') browser = webdriver.Chrome(chrome_options=chrome_options) # browser kibana print("browser kibana") browser.get(url) delay = 10000 # wait till specific classes appears print("wait till specific classes appears") WebDriverWait(browser, delay).until( EC.presence_of_element_located((By.CLASS_NAME, 'kbn-table'))) body = browser.find_element_by_class_name( "kbn-table").get_attribute('innerHTML') # calculate number of pages exists and loop them print("calculate number of pages exists and loop them") pages = (str( browser.find_element_by_class_name( "kuiToolBarText").text).split(" ")[2]).replace(",", "") pages = math.ceil(int(pages) / 50) - 1 print("pages found {}".format(pages)) for page in range(1, pages): browser.execute_script( "document.getElementsByClassName('kuiButton')[1].click()") chunk = browser.find_element_by_class_name( "kbn-table").get_attribute('innerHTML').replace( "<tbody>", "") body += chunk # apply table tags and generate pdf print("apply table tags and generate pdf") pdf = pydf.generate_pdf("<table>" + body + "</table>") with open('out.pdf', 'wb') as f: f.write(pdf) return json.loads( json.dumps((response.get_response(Constants.SUCCESS, Constants.SUCCESS)))) except Exception as e: logging.exception(e) return abort( make_response( jsonify( response.get_response(Constants.SERVER_ERROR, Constants.SERVER_ERROR)), response.get_code(Constants.SERVER_ERROR)))