def get_failed_pages(result_log_dir='logs/tests/', page_dir='allpages/'): n = 0 files = utility.get_files(result_log_dir) total_lines = utility.lines_in_dir(result_log_dir) start_time = time.time() for fname in files: results = [] with open(result_log_dir + fname, 'r') as f: results = f.read().split('\n') # Each line should be a dictionary containing the filename and the results results = filter(lambda i: len(i) > 0, results) results = map(ast.literal_eval, results) for i, (fname, fresults) in enumerate(map(lambda d: (d['fname'], d['result']), results)): utility.show_bar(n, total_lines, message='Checking fails ({} of {}): '.format(n, total_lines), start_time=start_time) n += 1 try: lines = [] with open(page_dir + fname, 'r') as f: lines = f.read().split('\n') for line, result in zip(lines, fresults): if not result and line != '/wiki/Main_Page': yield (fname, line) except IOError: print('File \'{}\' not found.'.format(fname)) print('')
def do_work(self): self.results = [] failed = 0 succeeded = 0 lines = [] with open(self.work_file, 'r') as work_file: lines = work_file.readlines() width, _ = utility.get_terminal_size() width -= 1 start_time = time.time() for i, line in enumerate(lines): utility.show_bar(i, len(lines), width=width, start_time=start_time, message='{} of {}, {} fails. '.format(i, len(lines), failed)) # Remove the newline first try: if self.nowrite: history = self.Tracer.find(line[:-1], verbose=0) else: history = self.Tracer.find(line[:-1], verbose=0, dirname=self.log_dir) self.results.append(True) except Exception as e: if not 'No more links to follow' in str(e): print(e) self.results.append(False) failed += 1 print('')
def buildCache(self, histories): print("Caching " + str(len(histories.result)) + " page traces.") for i, history in enumerate(histories.result): utility.show_bar(i, len(histories.result), message='Building cache: ') self.addToCache(history) print('')
def offloadCache(self, fileName): with open(fileName, "w") as f: for i, (key, value) in enumerate(self.cache.items()): utility.show_bar( i, len(self.cache), message='Offloading cache (\'{}\'): '.format(fileName)) f.write(str((key, value)) + '\n') print('')
def grayscale(im, verbose=0): res = [] w, h = float(len(im)), float(len(im[0])) for iy, y in enumerate(im): res.append([]) for ix, x in enumerate(y): res[-1].append(utility.average(x)) if verbose > 0: utility.show_bar(iy * h + ix, w * h, number_limit=True, message='Grayscaling: ') return np.array(res, dtype=float)
def count(l, verbose=0): res = {} for i, v in enumerate(l): if v in res: res[v] += 1 else: res[v] = 1 if verbose > 0: utility.show_bar(i, len(l), message='Counting: ') if verbose > 0: print('') return res
def determine_discrepancies(im1, im2, pixels, tolerance=0.0, verbose=0): res = [] w, h = float(len(im1)), float(len(im1[0])) for iy, y in enumerate(im1): for ix, x in enumerate(y): if not rough_color_match(x, im2[iy][ix], tolerance): res.append((iy, ix)) if verbose > 0: utility.show_bar(iy * h + ix, w * h, number_limit=True, message='Finding discrepancies ({}): '.format( len(res))) return res
def get_pixels_of_color(im, color, tolerance=0.0, verbose=0): res = [] w, h = float(len(im)), float(len(im[0])) for iy, y in enumerate(im): for ix, x in enumerate(y): if rough_color_match(x, color, tolerance): res.append((iy, ix)) if verbose > 0: utility.show_bar(iy * h + ix, w * h, number_limit=True, message='Finding {} ({}): '.format( color, len(res))) return res
def generate_noise(size, message=''): #, fname='noise.png'): freq, octs = 1 / 32.0, 5 data = [] global perm perm = range(256) random.shuffle(perm) perm += perm for y in range(size): data.append([]) for x in range(size): utility.show_bar(y * size + x, size**2, message=message, number_limit=True) data[-1].append(fBm(x * freq, y * freq, int(size * freq), octs)) # if fname != None: # im = Image.new("L", (size, size)) # put_data(im, map(lambda i: max(0, i), data), size, size) # im.save(fname) return data
def __init__(self, host='localhost', port=60000, directory='allpages/', finished_directory='completed/', temp_directory='temp/', verify_directory='verify/', mode='new'): self.host = host self.port = port self.directory = directory self.finished_directory = finished_directory self.temp_directory = temp_directory self.verify_directory = verify_directory self.files = [] self.verify_files = [] self.in_use_files = [] self.finished_files = [] self.verify_files = [] self.clients = {} self.client_stats = {} try: os.mkdir('logs/tests/') except: pass if mode == 'new': self.files = utility.get_files(self.directory) try: os.mkdir(self.finished_directory) except: pass try: os.mkdir(self.temp_directory) except: pass try: os.mkdir(self.verify_directory) except: pass for i, f in enumerate(self.files): utility.show_bar(i, len(self.files), number_limit=True, message='Copying to {}: '.format( self.temp_directory)) shutil.copy(self.directory + f, self.temp_directory + f) print('') elif mode == 'continue': print('Loading temp files.') self.files = utility.get_files(self.temp_directory) print('Loading finished files.') self.finished_files = utility.get_files(self.finished_directory) elif mode == 'update': self.files = utility.get_files(self.temp_directory) self.finished_files = utility.get_files(self.finished_directory) all_files = self.files + self.finished_files check_files = utility.get_files(self.directory) for i, fname in enumerate(check_files): utility.show_bar(i, len(check_files), message='Updating files: ') if not fname in all_files: shutil.copy(self.directory + fname, self.temp_directory + fname) print('') self.files = utility.get_files(self.temp_directory) # We should probably verify all the error files at some point. self.start_time = time.time() self.finished_since_start = 0 def get_next_file(client): if not client in self.clients: self.clients[client] = [] self.client_stats[client] = {} for fname in self.verify_files + self.files: if not fname in self.in_use_files: self.in_use_files.append(fname) self.clients[client].append(fname) return fname def finish_file(client, fname, result): self.files.remove(fname) self.in_use_files.remove(fname) self.finished_files.append((fname, result)) self.clients[client].remove(fname) if 'finished' in self.client_stats[client]: self.client_stats[client]['finished'] += 1 else: self.client_stats[client]['finished'] = 1 shutil.move(self.temp_directory + fname, self.finished_directory + fname) self.finished_since_start += 1 elapsed = time.time() - self.start_time estimated_remaining = elapsed / self.finished_since_start * (len( self.files)) print('Finished {} files so far. {} remaining.'.format( len(self.finished_files), utility.display_time(estimated_remaining))) utility.show_dict(self.client_stats) self.write_results_to_file('logs/tests/', fname, result) self.server = SocketServer.TCPServer((self.host, self.port), WikiClientHandler) # So the handlers can interact with us self.server.get_next_file = get_next_file self.server.finish_file = finish_file self.server.directory = directory self.server.finished_directory = finished_directory self.server.temp_directory = temp_directory
def mergeCaches(self, fileNames, new_name=None): print('Merging caches.') caches = [] print('Loading caches.') for i, fileName in enumerate(fileNames): with open(fileName) as f: result = f.read() cache = {} # It's the new format, with each line being one lines = result.split('\n') for lineno, line in enumerate(lines): try: utility.show_bar( lineno, len(lines), message='Loading cache ({} of {}): '.format( i + 1, len(fileNames))) url, history = ast.literal_eval(line) if isinstance(history, list): cache[url] = history[0] print(history, cache[url]) else: cache[url] = history except: pass caches.append(cache) print('') if len(caches) > 0: # Save some time by just having everything in the first cache self.cache = dict(caches[0]) total_done = 0 total_included = 0 total_entries = sum(map(lambda cache: len(cache), caches[1:])) for other in caches[1:]: for entry in other: utility.show_bar( total_done, total_entries, message='Merging ({} of {} included): '.format( total_included, total_done)) if not entry in self.cache: self.cache[entry] = other[entry] total_included += 1 total_done += 1 print('') if new_name != None: print( 'Finished merging caches, writing out our new cache to \'{}\'' .format(new_name)) for fileName in fileNames: os.remove(fileName) self.offloadCache(new_name)