示例#1
0
def get_failed_pages(result_log_dir='logs/tests/', page_dir='allpages/'):
    n = 0
    files = utility.get_files(result_log_dir)
    total_lines = utility.lines_in_dir(result_log_dir)
    start_time = time.time()
    for fname in files:
        results = []
        with open(result_log_dir + fname, 'r') as f:
            results = f.read().split('\n')

        # Each line should be a dictionary containing the filename and the results
        results = filter(lambda i: len(i) > 0, results)
        results = map(ast.literal_eval, results)

        for i, (fname, fresults) in enumerate(map(lambda d: (d['fname'], d['result']), results)):
            utility.show_bar(n, total_lines, message='Checking fails ({} of {}): '.format(n, total_lines), start_time=start_time)
            n += 1

            try:
                lines = []
                with open(page_dir + fname, 'r') as f:
                    lines = f.read().split('\n')

                for line, result in zip(lines, fresults):
                    if not result and line != '/wiki/Main_Page':
                        yield (fname, line)
            except IOError:
                print('File \'{}\' not found.'.format(fname))
    print('')
示例#2
0
 def get_netStats(self):
     rows = []
     cls_attributes = util.get_cls_attributes(meta_db.NetStats)
     for file in util.get_files(self.pattern + '*_net_stats.json'):
         json = util.get_json_from_file(file, self.is2move)
         if json and json['status'] == 'OK' and json['data']:
             json['data']['file_timestamp'] = util.get_timestamp_from_file(
                 file)
             rows.append(self._get_dict(json['data'], cls_attributes))
     return rows
示例#3
0
 def get_tx(self):
     txs = []
     cls_attributes = util.get_cls_attributes(meta_db.Transaction)
     for file in util.get_files(self.pattern + '*pending_txs.json'):
         txs_json = util.get_json_from_file(file)
         if txs_json:
             for tx in txs_json:
                 tx['file_timestamp'] = util.get_timestamp_from_file(file)
                 txs.append(self._get_dict(tx, cls_attributes))
     return txs
示例#4
0
 def _get_rows(self, table, file_extension):
     rows = []
     cls_attributes = util.get_cls_attributes(table)
     print(cls_attributes)
     for file in util.get_files(self.pattern + file_extension):
         json = util.get_json_from_file(file, self.is2move)
         if json:
             json['file_timestamp'] = util.get_timestamp_from_file(file)
             rows.append(self._get_dict(json, cls_attributes))
     return rows
示例#5
0
    def __init__(self,
                 host,
                 port,
                 nowrite=False,
                 noupdate=False,
                 target='Philosophy'):
        self.host = host
        self.port = port

        self.nowrite = nowrite
        self.noupdate = noupdate

        self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        self.working_file = ''

        self.Tracer = wikipedia.WikipediaTracer()
        self.Tracer.set_target(target)

        self.id = socket.gethostname()

        self.work_file = 'worker/{}-workfile.txt'.format(self.id)
        self.cache_file = 'worker/{}-cachefile.txt'.format(self.id)
        self.log_dir = 'worker/{}/'.format(self.id)
        self.fname = ''  # The serverside name of the file we're working on
        self.results = []

        self.f = None

        try:
            os.mkdir(self.work_file.split('/')[0] + '/')
        except:
            pass

        try:
            os.mkdir(self.log_dir)
        except:
            pass

        cache_files = utility.get_files('worker/', search_re=r'cachefile')
        cache_files = map(lambda fname: 'worker/' + fname, cache_files)
        self.Tracer.mergeCaches(cache_files, new_name=self.cache_file)

        # In case we've run it with this computer before
        self.Tracer.articles = utility.lines_in_dir(self.log_dir)
示例#6
0
    def __init__(self,
                 host='localhost',
                 port=60000,
                 directory='allpages/',
                 finished_directory='completed/',
                 temp_directory='temp/',
                 verify_directory='verify/',
                 mode='new'):
        self.host = host
        self.port = port

        self.directory = directory
        self.finished_directory = finished_directory
        self.temp_directory = temp_directory
        self.verify_directory = verify_directory

        self.files = []
        self.verify_files = []
        self.in_use_files = []
        self.finished_files = []
        self.verify_files = []
        self.clients = {}

        self.client_stats = {}

        try:
            os.mkdir('logs/tests/')
        except:
            pass

        if mode == 'new':
            self.files = utility.get_files(self.directory)

            try:
                os.mkdir(self.finished_directory)
            except:
                pass

            try:
                os.mkdir(self.temp_directory)
            except:
                pass

            try:
                os.mkdir(self.verify_directory)
            except:
                pass

            for i, f in enumerate(self.files):
                utility.show_bar(i,
                                 len(self.files),
                                 number_limit=True,
                                 message='Copying to {}: '.format(
                                     self.temp_directory))
                shutil.copy(self.directory + f, self.temp_directory + f)

            print('')
        elif mode == 'continue':
            print('Loading temp files.')
            self.files = utility.get_files(self.temp_directory)

            print('Loading finished files.')
            self.finished_files = utility.get_files(self.finished_directory)
        elif mode == 'update':
            self.files = utility.get_files(self.temp_directory)
            self.finished_files = utility.get_files(self.finished_directory)

            all_files = self.files + self.finished_files

            check_files = utility.get_files(self.directory)
            for i, fname in enumerate(check_files):
                utility.show_bar(i,
                                 len(check_files),
                                 message='Updating files: ')
                if not fname in all_files:
                    shutil.copy(self.directory + fname,
                                self.temp_directory + fname)

            print('')

            self.files = utility.get_files(self.temp_directory)

            # We should probably verify all the error files at some point.

        self.start_time = time.time()
        self.finished_since_start = 0

        def get_next_file(client):
            if not client in self.clients:
                self.clients[client] = []
                self.client_stats[client] = {}

            for fname in self.verify_files + self.files:
                if not fname in self.in_use_files:
                    self.in_use_files.append(fname)
                    self.clients[client].append(fname)
                    return fname

        def finish_file(client, fname, result):
            self.files.remove(fname)
            self.in_use_files.remove(fname)
            self.finished_files.append((fname, result))

            self.clients[client].remove(fname)

            if 'finished' in self.client_stats[client]:
                self.client_stats[client]['finished'] += 1
            else:
                self.client_stats[client]['finished'] = 1

            shutil.move(self.temp_directory + fname,
                        self.finished_directory + fname)

            self.finished_since_start += 1
            elapsed = time.time() - self.start_time
            estimated_remaining = elapsed / self.finished_since_start * (len(
                self.files))

            print('Finished {} files so far. {} remaining.'.format(
                len(self.finished_files),
                utility.display_time(estimated_remaining)))

            utility.show_dict(self.client_stats)

            self.write_results_to_file('logs/tests/', fname, result)

        self.server = SocketServer.TCPServer((self.host, self.port),
                                             WikiClientHandler)

        # So the handlers can interact with us
        self.server.get_next_file = get_next_file
        self.server.finish_file = finish_file
        self.server.directory = directory
        self.server.finished_directory = finished_directory
        self.server.temp_directory = temp_directory
示例#7
0
       labels.txt: include all the activity labels available for the dataset (1 per row). 
                   Column 1: experiment number ID, 
                   Column 2: user number ID, 
                   Column 3: activity number ID 
                   Column 4: Label start point (in number of signal log samples (recorded at 50Hz))
                   Column 5: Label end point (in number of signal log samples)
                    
Output: .csv files containing data at 5Hz, 10Hz, 25Hz and 50Hz with labels 
        [experimentID userID activityID tBodyAcc-X tBodyAcc-Y tBodyAcc-Z tGravityAcc-X tGravityAcc-Y tGravityAcc-Z tBodyGyro-X tBodyGyro-Y]
        Time domain signals are prefixed with 't' to denote time.
"""

# get raw data files
path_to_raw_data_files = "C:\\Users\sri01\Downloads\DS 6999\HAPT Data Set\RawData\*.txt"
files = ut.get_files(path_to_raw_data_files)

# This is the directory where you want to write the new csvs to
path_to_output_directory = "C:\\Users\sri01\Downloads\DS 6999\HAPT Processed2"
ut.set_output_path(path_to_output_directory)


# get data from labels file
def get_label_data(files):
    for file in files:
        if file.__contains__('labels'):
            label_file = file
            break
    return ut.get_data(label_file)