def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # Load the page with Timeout(seconds=self._timeout + 5): headers = {} if self._user_agent: headers['User-Agent'] = self._user_agent response = requests.get(url, timeout=self._timeout, headers=headers) # received response; may not have been successful if response.status_code != 200: return LoadResult(LoadResultFAILURE_NO_200, url) else: return LoadResult(LoadResult.SUCCESS, url, final_url=response.url, time=response.elapsed.total_seconds(), size=len(response.content)) # problem executing request except (TimeoutError, requests.exceptions.Timeout): logging.exception('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page_selenium(self, url, outdir): # load the specified URL (with selenium) logging.info('Fetching page %s', url) try: # load page with Timeout(seconds=self._timeout + 5): self._selenium_driver.get(url) WebDriverWait(self._selenium_driver, self._timeout).until(\ lambda d: d.execute_script('return document.readyState') == 'complete') logging.debug('Page loaded.') # get timing information # http://www.w3.org/TR/navigation-timing/#processing-model timings = self._selenium_driver.execute_script(TIMINGS_JAVASCRIPT) load_time = (timings['loadEventEnd'] - timings['fetchStart']) / 1000.0 return LoadResult(LoadResult.SUCCESS, url, time=load_time,\ final_url=self._selenium_driver.current_url) except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except TimeoutException: logging.exception('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except Exception as e: logging.exception('Error loading %s: %s' % (url, e)) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page_native(self, url, outdir): # load the specified URL (directly) logging.info('Fetching page %s', url) try: firefox_cmd = '%s %s' % (FIREFOX, url) #firefox_cmd = '%s -profile %s %s' % (FIREFOX, self._profile_path, url) logging.debug('Loading: %s', firefox_cmd) with Timeout(seconds=self._timeout + 5): subprocess.check_output(firefox_cmd.split()) # TODO: error checking # TODO: try to get timing info, final URL, HAR, etc. logging.debug('Page loaded.') return LoadResult(LoadResult.SUCCESS, url) except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s' % ( url, e, e.output, )) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s' % (url, e)) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # prepare the curl command curl_cmd = CURL curl_cmd += ' -s -S' # don't show progress meter curl_cmd += ' -L' # follow redirects curl_cmd += ' -o /dev/null' # don't print file to stdout curl_cmd += ' -w http_code=%{http_code};final_url=%{url_effective};time=%{time_total};size=%{size_download}' # format for stats at end curl_cmd += ' --connect-timeout %i' % self._timeout # TCP connect timeout if self._disable_network_cache: curl_cmd += ' --header "Cache-Control: max-age=0"' # disable network caches if self._user_agent: curl_cmd += ' --user-agent "%s"' % self._user_agent # custom user agent curl_cmd += ' %s' % url # load the page logging.debug('Running curl: %s', curl_cmd) with Timeout(seconds=self._timeout + 5): output = subprocess.check_output(shlex.split(curl_cmd)) logging.debug('curl returned: %s', output.strip()) # curl returned, but may or may not have succeeded returnvals = { field.split('=')[0]: field.split('=')[1] for field in output.split('\n')[-1].split(';') } if returnvals['http_code'] != '200': return LoadResult(LoadResult.FAILURE_NO_200, url) else: # Report status and time return LoadResult(LoadResult.SUCCESS, url, final_url=returnvals['final_url'], time=float( string.replace(returnvals['time'], ',', '.')), size=returnvals['size']) # problem running curl except TimeoutError: logging.exception('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s' % (url, e, e.output)) if e.returncode == 28: return LoadResult(LoadResult.FAILURE_TIMEOUT, url) else: return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # Load the page parsed_url = urlparse.urlparse(url) path = '/' if parsed_url.path == '' else parsed_url.path if parsed_url.scheme != 'https': logging.warn( 'Specified protocol was not HTTPS; using HTTPS anyway.') get_request = 'GET %s HTTP/1.1\r\nHost: %s\r\n\r\n' %\ (path, parsed_url.netloc) options = '' if self._test_false_start: options += ' -cutthrough' if self._test_session_resumption: options += ' -reconnect' cmd = '%s s_client -connect %s:443 %s' %\ (OPENSSL_BINARY, parsed_url.netloc, options) logging.debug('Running tcploader: %s', cmd) with Timeout(seconds=self._timeout + 5): p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,\ stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout, stderr) = p.communicate(input=get_request) #output = subprocess.check_output(cmd, shell=True) logging.debug('s_client returned: %s', stdout.strip()) # TODO: better OpenSSL error checking here return LoadResult( LoadResult.SUCCESS, url, tls_false_start_supported=('false_start=yes' in stdout), tls_session_resumption_supported=('session_resumption=yes' in stdout)) # problem running tcp_loader except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s\n%s' % (url, e, e.output, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) finally: try: subprocess.check_output('killall openssl'.split()) except Exception as e: logging.debug( 'Error killing openssl (process might not exist): %s', e)
def _load_page(self, url, outdir, trial_num=None, tag=None): # path for new HAR file if self._save_har: harpath = self._outfile_path(url, suffix='.har', trial=trial_num, tag=tag) else: harpath = '/dev/null' logging.debug('Will save HAR to %s', harpath) # build chrome-har-capturer arguments capturer_args = '' onload_delay = self._delay_after_onload if self._delay_first_trial_only and trial_num != 0: onload_delay = 0 capturer_args += ' -d %i' % onload_delay if self._disable_network_cache: capturer_args += ' --no-network-cache' if self._save_content == 'always' or\ (self._save_content == 'first' and trial_num == 0): capturer_args += ' -c' # load the specified URL logging.info('Fetching page %s (%s)', url, tag) try: capturer_cmd = '%s -o "%s" %s %s' %\ (CHROME_HAR_CAPTURER, harpath, capturer_args, url) logging.debug('Running capturer: %s', capturer_cmd) with Timeout(seconds=self._timeout + 5): subprocess.check_call(capturer_cmd, shell=True,\ stdout=self._stdout_file, stderr=subprocess.STDOUT) except TimeoutError: logging.error('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s' % (url, e, e.output)) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s' % (url, e)) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) logging.debug('Page loaded.') return LoadResult(LoadResult.SUCCESS, url, har=harpath)
def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # Load the page parsed_url = urlparse.urlparse(url) path = '/' if parsed_url.path == '' else parsed_url.path cmd = '%s %s %s %s' %\ (TCPLOADER, parsed_url.scheme, parsed_url.netloc, path) if self._user_agent: cmd += ' "%s"' % self._user_agent logging.debug('Running tcploader: %s', cmd) with Timeout(seconds=self._timeout + 5): output = subprocess.check_output(cmd, shell=True) logging.debug('tcploader returned: %s', output.strip()) returnvals = {field.split('=')[0]: field.split('=')[1]\ for field in output.strip().split('\n')[-1].split(';')} return LoadResult(LoadResult.SUCCESS, url, time=float(returnvals['time_seconds']), size=int(returnvals['size']), server=returnvals['server'], tcp_fast_open_supported=\ bool(int(returnvals['tcp_fast_open_used'])) ) # problem running tcp_loader except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s\n%s' % (url, e, e.output, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) finally: try: subprocess.check_output('killall tcp_loader'.split()) except Exception as e: logging.debug( 'Error killing tcp_loader (process might not exist): %s', e)
def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # prepare the NODE command node_cmd = NODE+' ' node_cmd += NODEHTTP2+' ' # Location of node.js client HTTP2 program node_cmd += url # load the page logging.debug('Running node.js: %s', node_cmd) with Timeout(seconds=self._timeout+5): output = subprocess.check_output(shlex.split(node_cmd)) logging.debug('NODE returned: %s', output.strip()) # NODE returned, but may or may not have succeeded returnvals = {field.split('=')[0]: field.split('=')[1] for field in output.split(';')} if returnvals['http_code'] != '200': return LoadResult(LoadResult.FAILURE_NO_200, url) else: # Report status and time return LoadResult(LoadResult.SUCCESS, url, final_url=returnvals['final_url'], time=float(string.replace(returnvals['time'], ',', '.')), size=returnvals['size']) # problem running NODE except TimeoutError: logging.exception('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s' % (url, e, e.output)) if e.returncode == 28: return LoadResult(LoadResult.FAILURE_TIMEOUT, url) else: return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page(self, url, outdir, trial_num=-1): # path for new HAR file safeurl = self._sanitize_url(url) filename = '%s_trial%d.har' % (safeurl, trial_num) imagename = '%s_trial%d.png' % (safeurl, trial_num) harpath = os.path.join(outdir, filename) if self._save_har: logging.debug('Will save HAR to %s', harpath) if self._save_screenshot: imagepath = os.path.join(outdir, imagename) logging.debug('Will save screenshot to %s', imagepath) else: imagepath = '/dev/null' # load the specified URL logging.info('Loading page: %s', url) try: # Load the page phantom_cmd = '%s --ssl-protocol=any %s %s %s %d' %\ (PHANTOMJS, PHANTOMLOADER, url, imagepath, self._timeout) phantom_cmd = phantom_cmd.split() if self._user_agent: phantom_cmd.append(' "%s"' % self._user_agent) logging.debug('Running PhantomJS: %s', phantom_cmd) with Timeout(seconds=self._timeout+5): output = subprocess.check_output(phantom_cmd) har, statusline = output.split('*=*=*=*') logging.debug('loadspeed.js returned: %s', statusline.strip()) # PhantomJS returned, but may or may not have succeeded fields = statusline.strip().split(':') status = fields[0] message = ':'.join(fields[1:]) if status == 'FAILURE': if message == 'timeout': logging.error('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) else: logging.error('Error fetching %s: %s', url, message) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) elif status == 'SUCCESS': # Save the HAR if self._save_har: with open(harpath, 'w') as f: f.write(har) f.closed # Report status and time returnvals = {field.split('=')[0]: field.split('=')[1] for field in message.split(';')} return LoadResult(LoadResult.SUCCESS, url, final_url=returnvals['final_url'], time=float(returnvals['time'])/1000.0, har=harpath, img=imagepath) else: logging.error('loadspeed.js returned unexpected output: %s', output) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) # problem running PhantomJS except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s\n%s' % (url, e, e.output, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _setup(self): stdout = self._stdout_file stderr = self._stdout_file if self._headless: # start a virtual display try: os.environ['DISPLAY'] = DISPLAY xvfb_command = '%s %s -screen 0 1366x768x24 -ac' % (XVFB, DISPLAY) logging.debug('Starting XVFB: %s', xvfb_command) self._xvfb_proc = subprocess.Popen(xvfb_command.split(),\ stdout=stdout, stderr=stderr) sleep(1) # check if Xvfb failed to start and process terminated retcode = self._xvfb_proc.poll() if retcode != None: raise ("Xvfb proc exited with return code: %i" % retcode) except Exception as e: logging.exception("Error starting XFVB") return False logging.debug('Started XVFB (DISPLAY=%s)', os.environ['DISPLAY']) if self._log_ssl_keys: keylog_file = os.path.join(self._outdir, 'ssl_keylog') os.environ['SSLKEYLOGFILE'] = keylog_file # launch chrome with no cache and remote debug on try: # TODO: enable HTTP2 options = '' if self._user_agent: options += ' --user-agent="%s"' % self._user_agent if self._disable_local_cache: options += ' --disable-application-cache --disable-cache' if self._disable_quic: options += ' --disable-quic' if self._disable_spdy: options += ' --use-spdy=off' if self._ignore_certificate_errors: options += ' --ignore-certificate-errors' # options for chrome-har-capturer options += ' --remote-debugging-port=9222 --enable-benchmarking --enable-net-benchmarking' chrome_command = '%s %s' % (CHROME, options) logging.debug('Starting Chrome: %s', chrome_command) self._chrome_proc = subprocess.Popen(chrome_command.split(),\ stdout=stdout, stderr=stderr) # wait until chrome remote debugging is ready with Timeout(seconds=5): curl_retcode = -1 while curl_retcode != 0: # try to access chrome remote debug interface curl_cmd = '%s -sS --max-time 1 -o /dev/null localhost:9222/json' % CURL curl_retcode = subprocess.call(curl_cmd.split(),\ stdout=self._stdout_file, stderr=subprocess.STDOUT) logging.debug( 'Checking if Chrome remote debug is ready. Curl return code: %d' % curl_retcode) # check to see if chrome exited for some reason # (e.g., if Xvfb failed to start) chrome_retcode = self._chrome_proc.poll() if chrome_retcode != None: raise ("Chrome proc exited with return code: %i" % chrome_retcode) sleep(0.5) except TimeoutError: logging.error('Timeout waiting for Chrome to be ready') return False except Exception as e: logging.exception("Error starting Chrome") return False logging.debug('Started Chrome') return True