예제 #1
0
def get_failed_pages(result_log_dir='logs/tests/', page_dir='allpages/'):
    n = 0
    files = utility.get_files(result_log_dir)
    total_lines = utility.lines_in_dir(result_log_dir)
    start_time = time.time()
    for fname in files:
        results = []
        with open(result_log_dir + fname, 'r') as f:
            results = f.read().split('\n')

        # Each line should be a dictionary containing the filename and the results
        results = filter(lambda i: len(i) > 0, results)
        results = map(ast.literal_eval, results)

        for i, (fname, fresults) in enumerate(map(lambda d: (d['fname'], d['result']), results)):
            utility.show_bar(n, total_lines, message='Checking fails ({} of {}): '.format(n, total_lines), start_time=start_time)
            n += 1

            try:
                lines = []
                with open(page_dir + fname, 'r') as f:
                    lines = f.read().split('\n')

                for line, result in zip(lines, fresults):
                    if not result and line != '/wiki/Main_Page':
                        yield (fname, line)
            except IOError:
                print('File \'{}\' not found.'.format(fname))
    print('')
예제 #2
0
    def do_work(self):
        self.results = []

        failed = 0
        succeeded = 0

        lines = []
        with open(self.work_file, 'r') as work_file:
            lines = work_file.readlines()

        width, _ = utility.get_terminal_size()
        width -= 1

        start_time = time.time()
        for i, line in enumerate(lines):
            utility.show_bar(i, len(lines), width=width, start_time=start_time, message='{} of {}, {} fails. '.format(i, len(lines), failed))

            # Remove the newline first
            try:
                if self.nowrite:
                    history = self.Tracer.find(line[:-1], verbose=0)
                else:
                    history = self.Tracer.find(line[:-1], verbose=0, dirname=self.log_dir)

                self.results.append(True)
            except Exception as e:
                if not 'No more links to follow' in str(e):
                    print(e)

                self.results.append(False)

                failed += 1

        print('')
예제 #3
0
    def buildCache(self, histories):
        print("Caching " + str(len(histories.result)) + " page traces.")

        for i, history in enumerate(histories.result):
            utility.show_bar(i,
                             len(histories.result),
                             message='Building cache: ')
            self.addToCache(history)

        print('')
예제 #4
0
    def offloadCache(self, fileName):
        with open(fileName, "w") as f:
            for i, (key, value) in enumerate(self.cache.items()):
                utility.show_bar(
                    i,
                    len(self.cache),
                    message='Offloading cache (\'{}\'): '.format(fileName))
                f.write(str((key, value)) + '\n')

        print('')
예제 #5
0
def grayscale(im, verbose=0):
    res = []
    w, h = float(len(im)), float(len(im[0]))
    for iy, y in enumerate(im):
        res.append([])
        for ix, x in enumerate(y):
            res[-1].append(utility.average(x))
            if verbose > 0:
                utility.show_bar(iy * h + ix,
                                 w * h,
                                 number_limit=True,
                                 message='Grayscaling: ')
    return np.array(res, dtype=float)
예제 #6
0
def count(l, verbose=0):
    res = {}
    for i, v in enumerate(l):
        if v in res:
            res[v] += 1
        else:
            res[v] = 1

        if verbose > 0:
            utility.show_bar(i, len(l), message='Counting: ')

    if verbose > 0:
        print('')

    return res
예제 #7
0
def determine_discrepancies(im1, im2, pixels, tolerance=0.0, verbose=0):
    res = []
    w, h = float(len(im1)), float(len(im1[0]))
    for iy, y in enumerate(im1):
        for ix, x in enumerate(y):
            if not rough_color_match(x, im2[iy][ix], tolerance):
                res.append((iy, ix))
            if verbose > 0:
                utility.show_bar(iy * h + ix,
                                 w * h,
                                 number_limit=True,
                                 message='Finding discrepancies ({}): '.format(
                                     len(res)))

    return res
예제 #8
0
def get_pixels_of_color(im, color, tolerance=0.0, verbose=0):
    res = []
    w, h = float(len(im)), float(len(im[0]))
    for iy, y in enumerate(im):
        for ix, x in enumerate(y):
            if rough_color_match(x, color, tolerance):
                res.append((iy, ix))
                if verbose > 0:
                    utility.show_bar(iy * h + ix,
                                     w * h,
                                     number_limit=True,
                                     message='Finding {} ({}): '.format(
                                         color, len(res)))

    return res
예제 #9
0
def generate_noise(size, message=''):  #, fname='noise.png'):
    freq, octs = 1 / 32.0, 5
    data = []
    global perm
    perm = range(256)
    random.shuffle(perm)
    perm += perm
    for y in range(size):
        data.append([])
        for x in range(size):
            utility.show_bar(y * size + x,
                             size**2,
                             message=message,
                             number_limit=True)
            data[-1].append(fBm(x * freq, y * freq, int(size * freq), octs))

    # if fname != None:
    #     im = Image.new("L", (size, size))
    #     put_data(im, map(lambda i: max(0, i), data), size, size)
    #     im.save(fname)

    return data
예제 #10
0
    def __init__(self,
                 host='localhost',
                 port=60000,
                 directory='allpages/',
                 finished_directory='completed/',
                 temp_directory='temp/',
                 verify_directory='verify/',
                 mode='new'):
        self.host = host
        self.port = port

        self.directory = directory
        self.finished_directory = finished_directory
        self.temp_directory = temp_directory
        self.verify_directory = verify_directory

        self.files = []
        self.verify_files = []
        self.in_use_files = []
        self.finished_files = []
        self.verify_files = []
        self.clients = {}

        self.client_stats = {}

        try:
            os.mkdir('logs/tests/')
        except:
            pass

        if mode == 'new':
            self.files = utility.get_files(self.directory)

            try:
                os.mkdir(self.finished_directory)
            except:
                pass

            try:
                os.mkdir(self.temp_directory)
            except:
                pass

            try:
                os.mkdir(self.verify_directory)
            except:
                pass

            for i, f in enumerate(self.files):
                utility.show_bar(i,
                                 len(self.files),
                                 number_limit=True,
                                 message='Copying to {}: '.format(
                                     self.temp_directory))
                shutil.copy(self.directory + f, self.temp_directory + f)

            print('')
        elif mode == 'continue':
            print('Loading temp files.')
            self.files = utility.get_files(self.temp_directory)

            print('Loading finished files.')
            self.finished_files = utility.get_files(self.finished_directory)
        elif mode == 'update':
            self.files = utility.get_files(self.temp_directory)
            self.finished_files = utility.get_files(self.finished_directory)

            all_files = self.files + self.finished_files

            check_files = utility.get_files(self.directory)
            for i, fname in enumerate(check_files):
                utility.show_bar(i,
                                 len(check_files),
                                 message='Updating files: ')
                if not fname in all_files:
                    shutil.copy(self.directory + fname,
                                self.temp_directory + fname)

            print('')

            self.files = utility.get_files(self.temp_directory)

            # We should probably verify all the error files at some point.

        self.start_time = time.time()
        self.finished_since_start = 0

        def get_next_file(client):
            if not client in self.clients:
                self.clients[client] = []
                self.client_stats[client] = {}

            for fname in self.verify_files + self.files:
                if not fname in self.in_use_files:
                    self.in_use_files.append(fname)
                    self.clients[client].append(fname)
                    return fname

        def finish_file(client, fname, result):
            self.files.remove(fname)
            self.in_use_files.remove(fname)
            self.finished_files.append((fname, result))

            self.clients[client].remove(fname)

            if 'finished' in self.client_stats[client]:
                self.client_stats[client]['finished'] += 1
            else:
                self.client_stats[client]['finished'] = 1

            shutil.move(self.temp_directory + fname,
                        self.finished_directory + fname)

            self.finished_since_start += 1
            elapsed = time.time() - self.start_time
            estimated_remaining = elapsed / self.finished_since_start * (len(
                self.files))

            print('Finished {} files so far. {} remaining.'.format(
                len(self.finished_files),
                utility.display_time(estimated_remaining)))

            utility.show_dict(self.client_stats)

            self.write_results_to_file('logs/tests/', fname, result)

        self.server = SocketServer.TCPServer((self.host, self.port),
                                             WikiClientHandler)

        # So the handlers can interact with us
        self.server.get_next_file = get_next_file
        self.server.finish_file = finish_file
        self.server.directory = directory
        self.server.finished_directory = finished_directory
        self.server.temp_directory = temp_directory
예제 #11
0
    def mergeCaches(self, fileNames, new_name=None):
        print('Merging caches.')
        caches = []

        print('Loading caches.')
        for i, fileName in enumerate(fileNames):
            with open(fileName) as f:
                result = f.read()

                cache = {}

                # It's the new format, with each line being one
                lines = result.split('\n')

                for lineno, line in enumerate(lines):
                    try:
                        utility.show_bar(
                            lineno,
                            len(lines),
                            message='Loading cache ({} of {}): '.format(
                                i + 1, len(fileNames)))
                        url, history = ast.literal_eval(line)

                        if isinstance(history, list):
                            cache[url] = history[0]
                            print(history, cache[url])
                        else:
                            cache[url] = history
                    except:
                        pass

                caches.append(cache)

                print('')

        if len(caches) > 0:
            # Save some time by just having everything in the first cache
            self.cache = dict(caches[0])

            total_done = 0
            total_included = 0
            total_entries = sum(map(lambda cache: len(cache), caches[1:]))

            for other in caches[1:]:
                for entry in other:
                    utility.show_bar(
                        total_done,
                        total_entries,
                        message='Merging ({} of {} included): '.format(
                            total_included, total_done))
                    if not entry in self.cache:
                        self.cache[entry] = other[entry]
                        total_included += 1

                    total_done += 1

            print('')

            if new_name != None:
                print(
                    'Finished merging caches, writing out our new cache to \'{}\''
                    .format(new_name))

                for fileName in fileNames:
                    os.remove(fileName)
                self.offloadCache(new_name)