def snapshot_state(self,f): line = f.readline() while line: if '] Snapshotting solver state' in line: return extract_datetime_from_line(line, self.log_year) else: self.prev_line = line line = f.readline() return extract_datetime_from_line(self.prev_line, self.log_year)
def accurate_snapshot_state(self,f): line = f.readline() while line: if '] Snapshot end' in line: return extract_datetime_from_line(line, self.log_year) else: self.prev_line = line line = f.readline() return extract_datetime_from_line(self.prev_line, self.log_year)
def snapshot_state(self, f): line = f.readline() while line: if '] Snapshotting solver state' in line: return extract_datetime_from_line(line, self.log_year) else: self.prev_line = line line = f.readline() return extract_datetime_from_line(self.prev_line, self.log_year)
def accurate_snapshot_state(self, f): line = f.readline() while line: if '] Snapshot end' in line: return extract_datetime_from_line(line, self.log_year) else: self.prev_line = line line = f.readline() return extract_datetime_from_line(self.prev_line, self.log_year)
def test_state(self, f): line = f.readline() while line: if 'Test net output' not in line and 'Test net output' in self.prev_line: return extract_datetime_from_line(self.prev_line, self.log_year) else: self.prev_line = line line = f.readline() return extract_datetime_from_line(self.prev_line, self.log_year)
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, test_dict_list) train_dict_list and test_dict_list are lists of dicts that define the table rows """ regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only start parsing for other stuff if we've found the first # iteration continue try: time = extract_seconds.extract_datetime_from_line(line, logfile_year) except ValueError: # Skip lines with bad formatting, for example when resuming solver continue seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate ) test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate ) fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) return train_dict_list, test_dict_list
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) # if iteration == -1: # # Only start parsing for other stuff if we've found the first # # iteration # continue # print('Find in line: ', line.find('RuntimeWarning')) # print('Find in line: ', line.find('>>>')) if line.find('RuntimeWarning:') != -1 or line.find('>>>') != -1: continue time = extract_seconds.extract_datetime_from_line(line, logfile_year) seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate ) test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate ) fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) return train_dict_list, test_dict_list
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ # regex_iteration = re.compile('Iteration (\d+) (-3.53679e+10 iter/s, 0.216956s/500 iters), loss = 0.0236502') regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only start parsing for other stuff if we've found the first # iteration continue time = extract_seconds.extract_datetime_from_line(line, logfile_year) seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate ) test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate ) fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) return train_dict_list, test_dict_list
def parse_time(self): f = open(self.filepath, 'r') line = '' while True: self.prev_line = line line = f.readline() if not line: if self.total_time == 0 and begin_time and self.prev_line: quit_time = extract_datetime_from_line( self.prev_line, self.log_year) self.total_time = (quit_time - begin_time).total_seconds() break if '] Solving' in line: begin_time = extract_datetime_from_line(line, self.log_year) if '] Optimization Done.' in line: quit_time = extract_datetime_from_line(line, self.log_year) self.total_time = (quit_time - begin_time).total_seconds() break if '] Iteration' in line and 'lr' in line: self.train_state(f) elif 'Testing net ' in line: start_time = extract_datetime_from_line(line, self.log_year) iter_num = line.split(' ')[-4] end_time = self.test_state(f) self.test_time.append({ 'config': iter_num, 'time': (end_time - start_time).total_seconds() }) self.total_test_time += (end_time - start_time).total_seconds() elif '] Snapshotting to binary proto' in line: start_time = extract_datetime_from_line( self.prev_line, self.log_year) end_time = self.snapshot_state(f) if end_time: self.total_snapshot_time += (end_time - start_time).total_seconds() elif '] Snapshot begin' in line: start_time = extract_datetime_from_line(line, self.log_year) end_time = self.accurate_snapshot_state(f) if end_time: self.total_snapshot_time += (end_time - start_time).total_seconds()
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ regex_batch = re.compile('Batch (\d+)') regex_test_output = re.compile('\, (\S+) = ([\.\deE+-]+)') # Pick out lines of interest batch = -1 learning_rate = float('NaN') test_dict_list = [] test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = regex_batch.search(line) if iteration_match: batch = float(iteration_match.group(1)) if batch == -1: # Only start parsing for other stuff if we've found the first # batch continue time = extract_seconds.extract_datetime_from_line( line, logfile_year) seconds = (time - start_time).total_seconds() test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, batch, seconds) return test_dict_list
def parse_time(self): f = open(self.filepath,'r') line = '' while True: self.prev_line = line line = f.readline() if not line: if self.total_time == 0 and begin_time and self.prev_line: quit_time = extract_datetime_from_line(self.prev_line, self.log_year) self.total_time = (quit_time - begin_time).total_seconds() break if '] Solving' in line: begin_time = extract_datetime_from_line(line, self.log_year) if '] Optimization Done.' in line: quit_time = extract_datetime_from_line(line, self.log_year) self.total_time = (quit_time-begin_time).total_seconds() break if '] Iteration' in line and 'lr' in line: self.train_state(f) elif 'Testing net ' in line: start_time = extract_datetime_from_line(line, self.log_year) iter_num = line.split(' ')[-4] end_time = self.test_state(f) self.test_time.append({'config':iter_num,'time':(end_time-start_time).total_seconds()}) self.total_test_time += (end_time-start_time).total_seconds() elif '] Snapshotting to binary proto' in line: start_time = extract_datetime_from_line(self.prev_line, self.log_year) end_time = self.snapshot_state(f) if end_time: self.total_snapshot_time += (end_time - start_time).total_seconds() elif '] Snapshot begin' in line: start_time = extract_datetime_from_line(line, self.log_year) end_time = self.accurate_snapshot_state(f) if end_time: self.total_snapshot_time += (end_time - start_time).total_seconds()
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ re_iteration = re.compile('Iteration (\d+)') #re_accuracy = re.compile('output #\d+: accuracy = ([\.\d]+)') re_accuracy = re.compile('output #\d+: loss3/top-1 = ([\.\d]+)') #re_train_loss = re.compile('Iteration \d+, loss = ([\.\d]+)') re_train_loss = re.compile('Iteration \d+, loss3/loss3 = ([\.\d]+)') #re_output_loss = re.compile('output #\d+: loss = ([\.\d]+)') re_output_loss = re.compile('output #\d+: loss3/loss3 = ([\.\d]+)') re_lr = re.compile('lr = ([\.\d]+)') # Pick out lines of interest iteration = -1 test_accuracy = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_dict_names = ('NumIters', 'Seconds', 'TrainingLoss', 'LearningRate') test_dict_names = ('NumIters', 'Seconds', 'TestAccuracy', 'TestLoss') logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = re_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only look for other stuff if we've found the first iteration continue time = extract_seconds.extract_datetime_from_line( line, logfile_year) seconds = (time - start_time).total_seconds() lr_match = re_lr.search(line) if lr_match: learning_rate = float(lr_match.group(1)) accuracy_match = re_accuracy.search(line) if accuracy_match and get_line_type(line) == 'test': test_accuracy = float(accuracy_match.group(1)) train_loss_match = re_train_loss.search(line) if train_loss_match: train_loss = float(train_loss_match.group(1)) train_dict_list.append({ 'NumIters': iteration, 'Seconds': seconds, 'TrainingLoss': train_loss, 'LearningRate': learning_rate }) output_loss_match = re_output_loss.search(line) if output_loss_match and get_line_type(line) == 'test': test_loss = float(output_loss_match.group(1)) # NOTE: we assume that (1) accuracy always comes right before # loss for test data so the test_accuracy variable is already # correctly populated and (2) there's one and only one output # named "accuracy" for the test net test_dict_list.append({ 'NumIters': iteration, 'Seconds': seconds, 'TestAccuracy': test_accuracy, 'TestLoss': test_loss }) return train_dict_list, train_dict_names, test_dict_list, test_dict_names
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, test_dict_list) train_dict_list and test_dict_list are lists of dicts that define the table rows """ # 设置正则化表达式 regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile( 'Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile( 'Test net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile( 'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest # 选出我们感兴趣的行 iteration = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) last_time = start_time for line in f: iteration_match = regex_iteration.search(line) if iteration_match: # group(0) 是获取整个匹配字符串;group(1) 是获取匹配字符串中()中的内容 # 比如,m = re.match(r'www\.(.+)\.com', 'www.google.com') # m.group(0) 输出 www.google.com # m.group(1) 输出 google iteration = float(iteration_match.group(1)) if iteration == -1: # Only start parsing for other stuff if we've found the first # iteration # 只有我们找到第一个 iteration, 我们才开始解析其他 stuff continue try: time = extract_seconds.extract_datetime_from_line( line, logfile_year) except ValueError: # Skip lines with bad formatting, for example when resuming solver # 跳过匹配错误的行,比如重新启动的 solver continue # if it's another year # 如果出现跨年的情况 if time.month < last_time.month: logfile_year += 1 time = extract_seconds.extract_datetime_from_line( line, logfile_year) last_time = time seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate) test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate) fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) return train_dict_list, test_dict_list
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') rpn_train_dict_lists = [[], []] rcnn_train_dict_lists = [[], []] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = None train_dict_list = None for line in f: if line.startswith('Stage 1 RPN'): train_dict_list = rpn_train_dict_lists[0] continue elif line.startswith('Stage 1 Fast R-CNN'): train_dict_list = rcnn_train_dict_lists[0] continue elif line.startswith('Stage 2 RPN'): train_dict_list = rpn_train_dict_lists[1] continue elif line.startswith('Stage 2 Fast R-CNN'): train_dict_list = rcnn_train_dict_lists[1] continue else: pass if line.startswith('Init') or not line.startswith('I'): continue iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only start parsing for other stuff if we've found the first # iteration continue time = extract_seconds.extract_datetime_from_line(line, logfile_year) if not start_time: start_time = time seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate ) for stage in range(2): fix_initial_nan_learning_rate(rpn_train_dict_lists[stage]) fix_initial_nan_learning_rate(rcnn_train_dict_lists[stage]) return rpn_train_dict_lists, rcnn_train_dict_lists
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ regex_iteration = re.compile('Iteration (\d+), loss = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') loss = 0 train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) with open(path_to_log) as f: for line in f: iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) loss = float(iteration_match.group(2)) #print ('iteration = ', iteration, 'loss = ', loss) if (iteration == -1 or line.find('speed') != -1 or line.find('Wrote') != -1 or line.find('None') != -1): # Only start parsing for other stuff if we've found the first # iteration continue #FRDC lxq if line.find('done solving') != -1: # Only start parsing for other stuff if we've found the first # iteration break time = extract_seconds.extract_datetime_from_line(line, logfile_year) seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate, loss ) test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate, loss ) fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) return train_dict_list, test_dict_list
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile( 'Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile( 'Test net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile( 'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: if line[:5] == "speed" or line[:5] == "Wrote": continue if line[:4] == "done": break iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only start parsing for other stuff if we've found the first # iteration continue try: time = extract_seconds.extract_datetime_from_line( line, logfile_year) except ValueError: break except: print "Unexpected error reading line: " print line print "I will go on though" seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate) test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate) fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) return train_dict_list, test_dict_list
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, test_dict_list) train_dict_list and test_dict_list are lists of dicts that define the table rows """ regex_iteration = re.compile('Iteration (\d+), loss = ([\.\deE+-]+)') regex_train_output = re.compile( 'Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile('lr = ([\.\deE+-]+)') regex_test_output = re.compile( 'Test net output #(\d+): detection_eval = ([\.\deE+-]+)') # Pick out lines of interest iteration = 0 loss = -1 learning_rate = 0.001 train_dict_list = [] train_row = None test_score = 0.0 logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) last_time = start_time for line in f: iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) loss = float(iteration_match.group(2)) try: time = extract_seconds.extract_datetime_from_line( line, logfile_year) except: # Skip lines with bad formatting, for example when resuming solver continue # if it's another year if time.month < last_time.month: logfile_year += 1 time = extract_seconds.extract_datetime_from_line( line, logfile_year) last_time = time seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) test_score_match = regex_test_output.search(line) if test_score_match: test_score = float(test_score_match.group(2)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate, loss, test_score) return train_dict_list
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, test_dict_list) train_dict_list and test_dict_list are lists of dicts that define the table rows """ regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile('loss = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only start parsing for other stuff if we've found the first # iteration continue time = extract_seconds.extract_datetime_from_line(line, logfile_year) seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate ) test_rate_match = regex_test_output.search(line) if test_rate_match: if not test_row or test_row['NumIters'] != iteration: # Push the last row and start a new one if test_row: # If we're on a new iteration, push the last row # This will probably only happen for the first row; otherwise # the full row checking logic below will push and clear full # rows test_dict_list.append(test_row) test_row = OrderedDict([ ('NumIters', iteration), ('Seconds', seconds), ('LearningRate', learning_rate) ]) # output_num is not used; may be used in the future # output_num = output_match.group(1) output_val = test_rate_match.group(1) test_row["loss"] = float(output_val) if test_row and len(test_dict_list) >= 1 and len(test_row) == len(test_dict_list[0]): # The row is full, based on the fact that it has the same number of # columns as the first row; append it to the list test_dict_list.append(test_row) test_row = None fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) return train_dict_list, test_dict_list
def parse_log(path_to_log): """Parse log file Returns (df_train, df_test) df_train and df_test are pandas DataFrame with data from log """ re_correct_line = re.compile('^\w+\d+') re_iteration = re.compile('Iteration (\d+)') # alexnet #re_output_loss = re.compile('output #\d+: loss = ([\.\d]+)') #re_output_acc = re.compile('output #\d+: accuracy = ([\.\d]+)') #googlenet re_output_loss = re.compile('output #\d+: loss3\/loss3 = ([\.\d]+)') re_output_acc = re.compile('output #\d+: loss3\/top-1 = ([\.\d]+)') re_lr = re.compile('lr = ([\.\d]+)') # Pick out lines of interest iteration = -1 test_accuracy = -1 learning_rate = float('NaN') acc = float('NaN') train_dict_list = [] test_dict_list = [] train_dict_names = ('NumIters', 'Loss', 'Accuracy', 'LearningRate', 'Seconds') test_dict_names = ('NumIters', 'Loss', 'Accuracy') is_test_group = False logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: if not re_correct_line.match(line): continue iteration_match = re_iteration.search(line) if iteration_match: iteration = int(iteration_match.group(1)) if iteration == -1: # Only look for other stuff if we've found the first iteration continue time = extract_seconds.extract_datetime_from_line(line, logfile_year) seconds = (time - start_time).total_seconds() lr_match = re_lr.search(line) if lr_match: learning_rate = float(lr_match.group(1)) output_acc_match = re_output_acc.search(line) if output_acc_match: acc = float(output_acc_match.group(1)) output_loss_match = re_output_loss.search(line) if output_loss_match: if get_line_type(line) == 'test': test_loss = float(output_loss_match.group(1)) test_dict_list.append({'NumIters': iteration, 'Loss': test_loss, 'Accuracy': acc}) else: train_loss = float(output_loss_match.group(1)) train_dict_list.append({'NumIters': iteration, 'Loss': train_loss, 'Accuracy': acc, 'LearningRate': learning_rate, 'Seconds': seconds}) df_train = pd.DataFrame(columns=train_dict_names) df_test = pd.DataFrame(columns=test_dict_names) for col in train_dict_names: df_train[col] = [d[col] for d in train_dict_list] for col in test_dict_names: df_test[col] = [d[col] for d in test_dict_list] return df_train, df_test
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ regex_valid_line = re.compile('I(\d\d\d\d)') regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile( 'Train net output #(\d+): ([\S\_]+) = ([\.\deE+-]+)') regex_test_output = re.compile( 'Test net output #(\d+): ([\S\_]+) = ([\.\deE+-]+)') regex_learning_rate = re.compile( 'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: #line = line.strip() valid_line_match = regex_valid_line.match(line) if not valid_line_match: #if len(line)<1 or not line.startswith('I'): #line.startswith('speed') or line.startswith('Wrote') or line.startswith('done'):#added by zhaoyafei 20161124 #if : continue iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) #else:#added by zhaoyafei 20161124 #print 'No iteration_match: continue' #print line #sys.exit(0) #continue if iteration == -1: # Only start parsing for other stuff if we've found the first # iteration #print 'iteration == -1:continue' #sys.exit(0) continue print line #sys.exit(0) time = extract_seconds.extract_datetime_from_line( line, logfile_year) seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate) test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate) fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) return train_dict_list, test_dict_list
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ # I0526 13:50:19.771880 28824 solver.cpp:319] Test loss: 2.65821 # I0526 13:50:19.772948 28824 solver.cpp:332] Test net output #0: cross_entropy_loss = 65.4734 # I0526 13:50:19.772971 28824 solver.cpp:332] Test net output #1: l2_error = 2.65821 (* 1 = 2.65821 loss) # I0526 13:50:19.772979 28824 solver.cpp:283] Iteration 24000, Testing net (#1) # I0526 13:50:20.337695 28824 solver.cpp:319] Test loss: 2.78423 # I0526 13:50:20.337750 28824 solver.cpp:332] Test net output #0: cross_entropy_loss = 65.8907 # I0526 13:50:20.337774 28824 solver.cpp:332] Test net output #1: l2_error = 2.78423 (* 1 = 2.78423 loss) re_batch_size = re.compile('batch_size: (\d+)') re_iteration = re.compile('Iteration (\d+)') re_train_loss = re.compile('Iteration \d+, loss = ([\.\d\-+ena]+)') re_accuracy = re.compile('output #\d+: (accuracy|l2_error) = ([\.\d\-+ena]+)') re_output_loss = re.compile('output #\d+: (loss|cross_entropy_loss) = ([\.\d\-+ena]+)') re_lr = re.compile('lr = ([\.\d\-+ena]+)') re_grad_norm = re.compile('avg_grad_norm = ([\.\d\-+enan]+)') re_step_norm = re.compile('avg_step_norm = ([\.\d\-+enan]+)') re_eff_lr = re.compile('avg_effective_learning_rate = ([\.\d\-+enan]+)') re_test_start_seconds = re.compile('Testing net') # Pick out lines of interest iteration = -1 test_accuracy = -1 test_start_seconds = float('NaN') learning_rate = float('NaN') avg_grad_norm = float('NaN') avg_step_norm = float('NaN') eff_lr = float('NaN') batch_size = None train_dict_list = [] test_dict_list = [] logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) with open(path_to_log) as f: for line in f: # print 'parse_log.py line: ', line if batch_size is None: batch_size_match = re_batch_size.search(line) if batch_size_match: batch_size = float(batch_size_match.group(1)) iteration_match = re_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only look for other stuff if we've found the first iteration continue time = extract_seconds.extract_datetime_from_line(line, logfile_year) seconds = (time - start_time).total_seconds() lr_match = re_lr.search(line) if lr_match: learning_rate = float(lr_match.group(1)) grad_norm_match = re_grad_norm.search(line) if grad_norm_match: avg_grad_norm = float(grad_norm_match.group(1)) step_norm_match = re_step_norm.search(line) if step_norm_match: avg_step_norm = float(step_norm_match.group(1)) eff_lr_match = re_eff_lr.search(line) if eff_lr_match: eff_lr = float(eff_lr_match.group(1)) test_start_match = re_test_start_seconds.search(line) if test_start_match: test_start_seconds = seconds accuracy_match = re_accuracy.search(line) if accuracy_match and get_line_type(line) == 'test': test_accuracy = float(accuracy_match.group(2)) # ce_match = re_ce.search(line) # if ce_match and get_line_type(line) == 'test': # test_accuracy = float(ce_match.group(1)) train_loss_match = re_train_loss.search(line) if train_loss_match: train_loss = float(train_loss_match.group(1)) train_dict_list.append({'NumIters': iteration, 'Seconds': seconds, 'TrainingLoss': train_loss, 'LearningRate': learning_rate, 'AvgGradientNorm': avg_grad_norm, # 'AvgStepNorm': avg_step_norm}) 'AvgStepNorm': avg_step_norm, 'EffectiveLearningRate': eff_lr}) output_loss_match = re_output_loss.search(line) if output_loss_match and get_line_type(line) == 'test': test_loss = float(output_loss_match.group(2)) # NOTE: we assume that (1) accuracy always comes right before # loss for test data so the test_accuracy variable is already # correctly populated and (2) there's one and only one output # named "accuracy" for the test net test_dict_list.append({'NumIters': iteration, 'SecondsAtStart': test_start_seconds, 'SecondsAtEnd': seconds, 'TestAccuracy': test_accuracy, 'TestLoss': test_loss}) return train_dict_list, test_dict_list, batch_size
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ re_iteration = re.compile('Iteration (\d+)') re_top_output = re.compile('Iteration \d+, (\w+) = ([\.\d]+)') re_output = re.compile('(Test|Train) net output #\d+: ' '(\w+) = ([+-]*[\.\d]+(e[+-][\d]+)*)') # Pick out lines of interest iteration = -1 train_dict_list = [] test_dict_list = [] logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = re_iteration.search(line) if iteration_match: parsed_iteration = float(iteration_match.group(1)) if parsed_iteration != iteration: train_dict_list += [{'NumIters': parsed_iteration}] if (not test_dict_list or len(test_dict_list[-1].keys()) != 1): test_dict_list += [{'NumIters': parsed_iteration}] else: test_dict_list[-1]['NumIters'] = parsed_iteration iteration = parsed_iteration if iteration == -1: # Only look for other stuff if we've found the first iteration continue try: time = extract_seconds.extract_datetime_from_line(line, logfile_year) except: continue seconds = (time - start_time).total_seconds() top_output_match = re_top_output.search(line) if top_output_match: top_output_name = top_output_match.group(1) top_output_value = float(top_output_match.group(2)) train_dict_list[-1][top_output_name] = top_output_value output_match = re_output.search(line) if output_match: is_test_output = output_match.group(1).lower() == 'test' if is_test_output: dict_list = test_dict_list else: dict_list = train_dict_list output_name = output_match.group(2) output_value = float(output_match.group(3)) dict_list[-1][output_name] = output_value dict_list[-1]['Seconds'] = seconds train_dict_names = train_dict_list[0].keys() test_dict_names = test_dict_list[0].keys() # print train_dict_list[: 3] return train_dict_list, train_dict_names, test_dict_list, test_dict_names
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ re_iteration = re.compile('Iteration (\d+)') #re_accuracy = re.compile('output #\d+: accuracy = ([\.\d]+)') re_accuracy = re.compile('output #\d+: loss3/top-1 = ([\.\d]+)') #re_train_loss = re.compile('Iteration \d+, loss = ([\.\d]+)') re_train_loss = re.compile('Iteration \d+, loss3/loss3 = ([\.\d]+)') #re_output_loss = re.compile('output #\d+: loss = ([\.\d]+)') re_output_loss = re.compile('output #\d+: loss3/loss3 = ([\.\d]+)') re_lr = re.compile('lr = ([\.\d]+)') # Pick out lines of interest iteration = -1 test_accuracy = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_dict_names = ('NumIters', 'Seconds', 'TrainingLoss', 'LearningRate') test_dict_names = ('NumIters', 'Seconds', 'TestAccuracy', 'TestLoss') logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = re_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only look for other stuff if we've found the first iteration continue time = extract_seconds.extract_datetime_from_line(line, logfile_year) seconds = (time - start_time).total_seconds() lr_match = re_lr.search(line) if lr_match: learning_rate = float(lr_match.group(1)) accuracy_match = re_accuracy.search(line) if accuracy_match and get_line_type(line) == 'test': test_accuracy = float(accuracy_match.group(1)) train_loss_match = re_train_loss.search(line) if train_loss_match: train_loss = float(train_loss_match.group(1)) train_dict_list.append({'NumIters': iteration, 'Seconds': seconds, 'TrainingLoss': train_loss, 'LearningRate': learning_rate}) output_loss_match = re_output_loss.search(line) if output_loss_match and get_line_type(line) == 'test': test_loss = float(output_loss_match.group(1)) # NOTE: we assume that (1) accuracy always comes right before # loss for test data so the test_accuracy variable is already # correctly populated and (2) there's one and only one output # named "accuracy" for the test net test_dict_list.append({'NumIters': iteration, 'Seconds': seconds, 'TestAccuracy': test_accuracy, 'TestLoss': test_loss}) return train_dict_list, train_dict_names, test_dict_list, test_dict_names
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, test_dict_list) train_dict_list and test_dict_list are lists of dicts that define the table rows """ regex_time = re.compile('\d+:\d+:\d+\.\d+') regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile( 'Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile( 'Test net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile( 'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) # print "start time: ", start_time for line in f: if not regex_time.search(line): continue # print "line: ", line iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only start parsing for other stuff if we've found the first # iteration continue time = extract_seconds.extract_datetime_from_line( line, logfile_year) # print "time: ", time seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) continue # print "learning_rate", learning_rate train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate) # print "train_dict_list", train_dict_list # print "train_row", train_row test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate) fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) # print train_dict_list # print test_dict_list return train_dict_list, test_dict_list
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names) train_dict_list and test_dict_list are lists of dicts that define the table rows train_dict_names and test_dict_names are ordered tuples of the column names for the two dict_lists """ re_iteration = re.compile('Iteration (\d+)') re_test_accuracy = re.compile('output #\d+: accuracy = ([\.\d]+)') re_test_loss = re.compile('output #\d+: loss = ([\.\d]+)') re_loss = re.compile('Iteration \d+, loss = ([\.\d]+)') re_lr = re.compile('lr = ([\d]+e-[\d]+|[\.\d]+)') # Pick out lines of interest iteration = -1 current_line_iteration = -1 iteration_type = None iteration_dict = None accuracy = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_dict_names = ('Iterations', 'Seconds', 'Loss', 'LearningRate') test_dict_names = ('Iterations', 'Seconds', 'Loss', 'Accuracy') logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = re_iteration.search(line) if iteration_match: current_line_iteration = int(iteration_match.group(1)) if current_line_iteration == -1: # Only look for other stuff if we've found the first iteration continue #print("{0} {1} {2}".format(iteration_type, iteration, iteration_dict['Loss'] if iteration_dict and 'Loss' in iteration_dict else "")) # new iteration or switching from test to train if(iteration < current_line_iteration or (get_line_type(line) and (get_line_type(line) != iteration_type))): iteration = current_line_iteration # new iteration if(iteration > 0 or (iteration_type and get_line_type(line) != iteration_type)): # log previous iteration if(iteration_type == 'train'): train_dict_list.append(iteration_dict) else: test_dict_list.append(iteration_dict) time = extract_seconds.extract_datetime_from_line(line, logfile_year) seconds = (time - start_time).total_seconds() iteration_dict = {'Iterations': '{:d}'.format(iteration), 'Seconds': '{:f}'.format(seconds)} iteration_type = get_line_type(line) or 'train' lr_match = re_lr.search(line) if lr_match: iteration_dict['LearningRate'] = float(lr_match.group(1)) accuracy_match = re_test_accuracy.search(line) if accuracy_match: iteration_dict['Accuracy'] = float(accuracy_match.group(1)) loss_test_match = re_test_loss.search(line) if loss_test_match: iteration_dict['Loss'] = float(loss_test_match.group(1)) loss_match = re_loss.search(line) if loss_match: iteration_dict['Loss'] = float(loss_match.group(1)) # log last iteration if(iteration_dict and iteration_type == 'train'): train_dict_list.append(iteration_dict) elif(iteration_dict): test_dict_list.append(iteration_dict) return train_dict_list, train_dict_names, test_dict_list, test_dict_names
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, test_dict_list) train_dict_list and test_dict_list are lists of dicts that define the table rows """ regex_float = '([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)' regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile('lr = %s'%regex_float) regex_end_of_phase = re.compile('Wrote snapshot to') regex_ignore_rows = re.compile('speed: [0-9\.]*s / iter') regex_mean_ap = re.compile('Mean AP = %s'%regex_float) # Pick out lines of interest iteration = -1 learning_rate = float('NaN') phases=[] train_dict_list=[] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) mean_ap = None with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) last_time = start_time for line in f: if regex_ignore_rows.search(line): continue if regex_end_of_phase.search(line): # start a new learning phase fix_initial_nan_learning_rate(train_dict_list) phases.append(train_dict_list) train_dict_list=[] iteration=-1 continue mean_ap_match=regex_mean_ap.search(line) if mean_ap_match: mean_ap=float(mean_ap_match.group(1)) iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) continue if iteration == -1: # Only start parsing for other stuff if we've found the first # iteration continue try: time = extract_seconds.extract_datetime_from_line(line, logfile_year) except ValueError: # Skip lines with bad formatting, for example when resuming solver continue # if it's another year if time.month < last_time.month: logfile_year += 1 time = extract_seconds.extract_datetime_from_line(line, logfile_year) last_time = time seconds = (time - start_time).total_seconds() learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) continue train_dict_list, train_row, train_match = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate ) return phases, mean_ap
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, test_dict_list) train_dict_list and test_dict_list are lists of dicts that define the table rows """ regex_phase = re.compile('\+ ./tools/(\S+)') regex_iteration = re.compile('Iteration (\d+)') regex_train_output = re.compile( 'Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile( 'Test net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile( 'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # Pick out lines of interest iteration = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] train_row = None test_row = None logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: start_time = extract_seconds.get_start_time(f, logfile_year) time = 0. seconds = 0. phase = 0 for line in f: try: phase_match = regex_phase.search(line) if phase_match: phase = phase + 1 start_time = time continue iteration_match = regex_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) time = extract_seconds.extract_datetime_from_line( line, logfile_year) seconds = (time - start_time).total_seconds() continue lr_match = regex_learning_rate.search(line) if lr_match: learning_rate = float(learning_rate_match.group(1)) train_match = regex_train_output.search(line) if train_match: train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate, phase) test_match = regex_test_output.search(line) if test_match: test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate, phase) except ValueError: print("Oops!") print line continue fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) #print train_dict_list return train_dict_list, test_dict_list
def parse_log(path_to_log): """Parse log file Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names, debug_info_names, debug_info_dict_list) If the debug info wasn't enabled for the run, debug_info_dict_list is empty train_dict_list, test_dict_list and debug_info_dict_list are lists of dicts that define the table rows train_dict_names, test_dict_names and debug_info_names are ordered tuples of the column names for the two dict_lists """ re_iteration = re.compile('Iteration (\d+)') re_train_loss = re.compile('Iteration (\d+), loss = (' + FLOAT_RE + ')') regex_train_output = re.compile( 'Train net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_test_output = re.compile( 'Test net output #(\d+): (\S+) = ([\.\deE+-]+)') regex_learning_rate = re.compile( 'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)') # For parsing debug info re_forward_data = re.compile( '\[Forward\] Layer (\S+), top blob (\S+) data: (' + FLOAT_RE + ')') re_backward_diff = re.compile( '\[Backward\] Layer (\S+), bottom blob (\S+) diff: (' + FLOAT_RE + ')') re_backward_param_diff = re.compile( '\[Backward\] Layer (\S+), param blob (\d+) diff: (' + FLOAT_RE + ')') re_forward_param_data = re.compile( '\[Forward\] Layer (\S+), param blob (\d+) data: (' + FLOAT_RE + ')') was_in_backward = False # Pick out lines of interest iteration = -1 fb_iteration = -1 # iter # used for timing forward\backward debug_flag = False max_param_count = -1 learning_rate = float('NaN') train_dict_list = [] test_dict_list = [] debug_info_dict_list = [] debug_layer_dict = {} train_row = None test_row = None train_dict_names = ('NumIters', 'Seconds', 'TrainingLoss', 'LearningRate') test_dict_names = ('NumIters', 'Seconds', 'TestAccuracy', 'TestLoss') debug_info_dict_list = [] debug_info_names_list = [ 'NumIters', 'LayerName', 'Activation', 'BackPropBottomDiff' ] debug_info_names = tuple(debug_info_names_list) logfile_year = extract_seconds.get_log_created_year(path_to_log) with open(path_to_log) as f: display_interval = get_display_interval(f) debug_flag = is_debug_mode(f) if debug_flag: max_param_count = getMaxParamCount(f) additional_header_list = [] backward_param_headers = [ 'BackPropDiff' + '_param' + str(i) for i in range(max_param_count + 1) ] additional_header_list += backward_param_headers for i in range(max_param_count + 1): additional_header_list.append('param' + str(i) + '_Data') additional_header_list.append('param' + str(i) + '_Change') # adding new headers for each of the parameters debug_info_names_list += additional_header_list debug_info_names = tuple(debug_info_names_list) f.seek(0) # return to head of file start_time = extract_seconds.get_start_time(f, logfile_year) for line in f: iteration_match = re_iteration.search(line) if iteration_match: iteration = float(iteration_match.group(1)) if iteration == -1: # Only look for other stuff if we've found the first iteration continue # Try to extract date and time from line, assuming there exists one in # the expected format try: time = extract_seconds.extract_datetime_from_line( line, logfile_year) except: continue seconds = (time - start_time).total_seconds() train_loss_match = re_train_loss.search(line) if train_loss_match: fb_iteration = float(train_loss_match.group(1)) learning_rate_match = regex_learning_rate.search(line) if learning_rate_match: learning_rate = float(learning_rate_match.group(1)) train_dict_list, train_row = parse_line_for_net_output( regex_train_output, train_row, train_dict_list, line, iteration, seconds, learning_rate) test_dict_list, test_row = parse_line_for_net_output( regex_test_output, test_row, test_dict_list, line, iteration, seconds, learning_rate) fix_initial_nan_learning_rate(train_dict_list) fix_initial_nan_learning_rate(test_dict_list) # Only extract debug information if debug_info is true if not debug_flag: continue forward_match = re_forward_data.search(line) if forward_match: # If was_in_update flag was on, we are starting a new forward # pass so we will save last iteration info and # initialize the iteration specific variables if was_in_backward: debug_info_dict_list += debug_layer_dict.values() debug_layer_dict = {} was_in_backward = False layer_name = forward_match.group(1) activation_val = extended_float(forward_match.group(3)) if not debug_layer_dict.has_key(layer_name): debug_layer_dict[layer_name] = dict.fromkeys( debug_info_names) debug_layer_dict[layer_name]['LayerName'] = layer_name debug_layer_dict[layer_name]['NumIters'] = \ (fb_iteration != -1) * (fb_iteration + display_interval) debug_layer_dict[layer_name]['Activation'] = activation_val forward_param_data_match = re_forward_param_data.search(line) if forward_param_data_match: layer_name = forward_param_data_match.group(1) param_num = forward_param_data_match.group(2) param_header = 'param' + param_num param_data = extended_float(forward_param_data_match.group(3)) debug_layer_dict[layer_name][param_header + '_Data'] = param_data backward_match = re_backward_diff.search(line) if backward_match: layer_name = backward_match.group(1) back_prop_val = extended_float(backward_match.group(3)) if not debug_layer_dict.has_key(layer_name): debug_layer_dict[layer_name] = dict.fromkeys( debug_info_names) debug_layer_dict[layer_name][ 'BackPropBottomDiff'] = back_prop_val backward_param_match = re_backward_param_diff.search(line) if backward_param_match: was_in_backward = True layer_name = backward_param_match.group(1) param_num = backward_param_match.group(2) param_header = '_param' + param_num back_prop_param_val = extended_float( backward_param_match.group(3)) if not debug_layer_dict.has_key(layer_name): debug_layer_dict[layer_name] = dict.fromkeys( debug_info_names) debug_layer_dict[layer_name][ 'BackPropDiff' + param_header] = back_prop_param_val # add last iteration information if it exists if debug_flag and debug_layer_dict: debug_info_dict_list += debug_layer_dict.values() return train_dict_list, train_dict_names, test_dict_list, test_dict_names, \ debug_info_dict_list, debug_info_names