# matplotlib is quite massive so we're only importing it now. import matplotlib import matplotlib.pyplot as plt bm_file_path, _, _, _ = get_paths() bm_results_file_path = append_to_filename(bm_file_path, "_results") # Read results file into rows field. rows = [] with open(bm_results_file_path) as f: # Skip CSV header. next(f) for line in f: line = parse_csv_line(line) line[1] = as_bool(line[1]) line[2] = as_bool(line[2]) for i in range(3, len(line)): line[i] = float(line[i]) rows.append(line) # Create basic figure. fig, axes = plt.subplots(figsize=(6, 3)) if action.startswith("hist_"): hist_data = action[5:] # Strip hist_ at beginning. hist_title = "" # Contains indices of column containing measurements for
blacklisted_urls = [] with open(bl_file_path, "r") as f: for line in f: line = line.strip() # Ignore empty lines and comments. if line.startswith("#") or len(line) == 0: continue blacklisted_urls.append(line) with open(bm_file_path, "r") as in_file: # Copy CSV header. out_file = open(bm_trunc_file_path, "w") out_file.write(next(in_file)) # Write rows line by line except for the ones that # belong to a blacklisted URL. for line in in_file: if parse_csv_line(line)[columns["url"]] in blacklisted_urls: continue out_file.write(line) out_file.close() # Copy original file. shutil.copyfile(bm_file_path, append_to_filename(bm_file_path, "_orig")) # Overwrite original benchmark file. shutil.move(bm_trunc_file_path, bm_file_path)
where JS was enabled and one where JS was disabled. """ from util import get_paths, parse_csv_line, append_to_filename, as_bool from util import benchmark_columns as columns import os bm_file_path, _, _, _ = get_paths() bm_js_file_path = append_to_filename(bm_file_path, "_js") bm_no_js_file_path = append_to_filename(bm_file_path, "_no_js") with open(bm_file_path, "r") as f: out_js = open(bm_js_file_path, "w") out_no_js = open(bm_no_js_file_path, "w") # Get CSV header and write them to both output files. csv_header = next(f) out_js.write(csv_header) out_no_js.write(csv_header) for line in f: # Write to respective file if the jsenabled column # is either true or false. if as_bool(parse_csv_line(line)[columns["jsEnabled"]]): out_js.write(line) else: out_no_js.write(line) out_js.close() out_no_js.close()
Returns a list of parsed noscript tags from the given file path, assuming that the file contains nothing but noscript elements in the document root. """ return lxml.html.parse(file_path).find("body").findall("noscript") with open(bm_file_path, "r") as f: # Skip CSV header next(f) results = {} pages_scanned = 0 for line in f: # Scan line for both JS and no JS. row_js = parse_csv_line(line) row_no_js = parse_csv_line(next(f)) noscript_tags = [] url = urlparse(row_js[columns["url"]]) # Check if page with JS enabled sent any noscript tags. if as_bool(row_js[columns["noscript"]]): file_path = os.path.join(noscript_dir_path, row_js[columns["dataFileName"]] + ".html") noscript_tags.extend(get_noscript_tags(file_path)) # Check if page with JS disabled sent any noscript tags. if as_bool(row_no_js[columns["noscript"]]): file_path = os.path.join(noscript_dir_path, row_no_js[columns["dataFileName"]] + ".html") noscript_tags.extend(get_noscript_tags(file_path))
) # List all subdirectory contents. metrics_dir_list = os.listdir(metrics_dir_path) noscript_dir_list = os.listdir(noscript_dir_path) screenshots_dir_list = os.listdir(screenshots_dir_path) with open(bm_file_path, "r") as f: # Skip CSV header. next(f) # Remove the files from the directory listings that are # referenced in the main table. In the end, the lists will # only contain files that need to be deleted. for line in f: data_file_name = parse_csv_line(line)[columns["dataFileName"]] try_remove(metrics_dir_list, data_file_name + ".json") try_remove(noscript_dir_list, data_file_name + ".html") try_remove(screenshots_dir_list, data_file_name + ".png") # Remove unnecessary metrics files. for x in metrics_dir_list: os.remove(os.path.join(metrics_dir_path, x)) # Remove unnecessary noscript files. for x in noscript_dir_list: os.remove(os.path.join(noscript_dir_path, x)) # Remove unnecessary screenshots. for x in screenshots_dir_list:
js_file = open(append_to_filename(bm_file_path, "_js"), "r") nojs_file = open(append_to_filename(bm_file_path, "_no_js"), "r") # Skip CSV headers. next(js_file) next(nojs_file) while True: # Both are None if EOF is reached. js_line = next(js_file, None) nojs_line = next(nojs_file, None) if js_line is None or nojs_line is None: break js_row = parse_csv_line(js_line) nojs_row = parse_csv_line(nojs_line) # 6 = index first median col. for i in range(6, len(js_row)): # Parse values into floats. js_row[i] = float(js_row[i]) nojs_row[i] = float(nojs_row[i]) out_row = [ # col 1: url js_row[columns["url"]], # col 2: noscript exists? as_bool(js_row[columns["noscript"]]) or as_bool(nojs_row[columns["noscript"]]), # col 3: script exists?