Пример #1
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """

    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

            try:
                time = extract_seconds.extract_datetime_from_line(line,
                                                                  logfile_year)
            except ValueError:
                # Skip lines with bad formatting, for example when resuming solver
                continue

            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list,
                line, iteration, seconds, learning_rate
            )
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list,
                line, iteration, seconds, learning_rate
            )

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
Пример #2
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)
    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
#            if iteration == -1:
#                # Only start parsing for other stuff if we've found the first
#                # iteration
#                continue
#            print('Find in line: ', line.find('RuntimeWarning'))
#            print('Find in line: ', line.find('>>>'))
            if line.find('RuntimeWarning:') != -1 or line.find('>>>') != -1:
                continue

            time = extract_seconds.extract_datetime_from_line(line,
                                                              logfile_year)
            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list,
                line, iteration, seconds, learning_rate
            )
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list,
                line, iteration, seconds, learning_rate
            )

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
Пример #3
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

#    regex_iteration = re.compile('Iteration (\d+) (-3.53679e+10 iter/s, 0.216956s/500 iters), loss = 0.0236502')
    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

            time = extract_seconds.extract_datetime_from_line(line,
                                                              logfile_year)
            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list,
                line, iteration, seconds, learning_rate
            )
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list,
                line, iteration, seconds, learning_rate
            )

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
Пример #4
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """

    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile(
        'Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile(
        'Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile(
        'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')
    regex_time_stamp = re.compile('I(\d){4}')
    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

# skip line not starting with time stamps
            if not regex_time_stamp.match(line):
                continue
            time = extract_seconds.extract_datetime_from_line(
                line, logfile_year)
            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list, line,
                iteration, seconds, learning_rate)
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list, line, iteration,
                seconds, learning_rate)

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

    regex_batch = re.compile('Batch (\d+)')
    regex_test_output = re.compile('\, (\S+) = ([\.\deE+-]+)')

    # Pick out lines of interest
    batch = -1
    learning_rate = float('NaN')
    test_dict_list = []
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = regex_batch.search(line)
            if iteration_match:
                batch = float(iteration_match.group(1))
            if batch == -1:
                # Only start parsing for other stuff if we've found the first
                # batch
                continue

            time = extract_seconds.extract_datetime_from_line(
                line, logfile_year)
            seconds = (time - start_time).total_seconds()

            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list, line, batch,
                seconds)

    return test_dict_list
Пример #6
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """

    # 设置正则化表达式
    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile(
        'Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile(
        'Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile(
        'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    # 选出我们感兴趣的行
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)
        last_time = start_time

        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                # group(0) 是获取整个匹配字符串;group(1) 是获取匹配字符串中()中的内容
                # 比如,m = re.match(r'www\.(.+)\.com', 'www.google.com')
                # m.group(0) 输出 www.google.com
                # m.group(1) 输出 google
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                # 只有我们找到第一个 iteration, 我们才开始解析其他 stuff
                continue

            try:
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            except ValueError:
                # Skip lines with bad formatting, for example when resuming solver
                # 跳过匹配错误的行,比如重新启动的 solver
                continue

            # if it's another year
            # 如果出现跨年的情况
            if time.month < last_time.month:
                logfile_year += 1
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            last_time = time

            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list, line,
                iteration, seconds, learning_rate)
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list, line, iteration,
                seconds, learning_rate)

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
Пример #7
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

    re_iteration = re.compile('Iteration (\d+)')
    #re_accuracy = re.compile('output #\d+: accuracy = ([\.\d]+)')
    re_accuracy = re.compile('output #\d+: loss3/top-1 = ([\.\d]+)')
    #re_train_loss = re.compile('Iteration \d+, loss = ([\.\d]+)')
    re_train_loss = re.compile('Iteration \d+, loss3/loss3 = ([\.\d]+)')
    #re_output_loss = re.compile('output #\d+: loss = ([\.\d]+)')
    re_output_loss = re.compile('output #\d+: loss3/loss3 = ([\.\d]+)')
    re_lr = re.compile('lr = ([\.\d]+)')

    # Pick out lines of interest
    iteration = -1
    test_accuracy = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_dict_names = ('NumIters', 'Seconds', 'TrainingLoss', 'LearningRate')
    test_dict_names = ('NumIters', 'Seconds', 'TestAccuracy', 'TestLoss')

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = re_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only look for other stuff if we've found the first iteration
                continue

            time = extract_seconds.extract_datetime_from_line(
                line, logfile_year)
            seconds = (time - start_time).total_seconds()

            lr_match = re_lr.search(line)
            if lr_match:
                learning_rate = float(lr_match.group(1))

            accuracy_match = re_accuracy.search(line)
            if accuracy_match and get_line_type(line) == 'test':
                test_accuracy = float(accuracy_match.group(1))

            train_loss_match = re_train_loss.search(line)
            if train_loss_match:
                train_loss = float(train_loss_match.group(1))
                train_dict_list.append({
                    'NumIters': iteration,
                    'Seconds': seconds,
                    'TrainingLoss': train_loss,
                    'LearningRate': learning_rate
                })

            output_loss_match = re_output_loss.search(line)
            if output_loss_match and get_line_type(line) == 'test':
                test_loss = float(output_loss_match.group(1))
                # NOTE: we assume that (1) accuracy always comes right before
                # loss for test data so the test_accuracy variable is already
                # correctly populated and (2) there's one and only one output
                # named "accuracy" for the test net
                test_dict_list.append({
                    'NumIters': iteration,
                    'Seconds': seconds,
                    'TestAccuracy': test_accuracy,
                    'TestLoss': test_loss
                })

    return train_dict_list, train_dict_names, test_dict_list, test_dict_names
Пример #8
0
def parse_log(path_to_log):
    """Parse log file
    Returns (df_train, df_test)

    df_train and df_test are pandas DataFrame with data from log
    """

    re_correct_line = re.compile('^\w+\d+')
    re_iteration = re.compile('Iteration (\d+)')
    # alexnet
    #re_output_loss = re.compile('output #\d+: loss = ([\.\d]+)')
    #re_output_acc = re.compile('output #\d+: accuracy = ([\.\d]+)')

    #googlenet
    re_output_loss = re.compile('output #\d+: loss3\/loss3 = ([\.\d]+)')
    re_output_acc = re.compile('output #\d+: loss3\/top-1 = ([\.\d]+)')

    re_lr = re.compile('lr = ([\.\d]+)')

    # Pick out lines of interest
    iteration = -1
    test_accuracy = -1
    learning_rate = float('NaN')
    acc = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_dict_names = ('NumIters', 'Loss', 'Accuracy', 'LearningRate', 'Seconds')
    test_dict_names = ('NumIters', 'Loss', 'Accuracy')

    is_test_group = False

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)
        for line in f:
            if not re_correct_line.match(line):
                continue
            iteration_match = re_iteration.search(line)
            if iteration_match:
                iteration = int(iteration_match.group(1))
            if iteration == -1:
                # Only look for other stuff if we've found the first iteration
                continue

            time = extract_seconds.extract_datetime_from_line(line, logfile_year)
            seconds = (time - start_time).total_seconds()

            lr_match = re_lr.search(line)
            if lr_match:
                learning_rate = float(lr_match.group(1))
            output_acc_match = re_output_acc.search(line)
            if output_acc_match:
                acc = float(output_acc_match.group(1))

            output_loss_match = re_output_loss.search(line)
            if output_loss_match:
                if get_line_type(line) == 'test':
                    test_loss = float(output_loss_match.group(1))
                    test_dict_list.append({'NumIters': iteration,
                                           'Loss': test_loss,
                                           'Accuracy': acc})
                else:
                    train_loss = float(output_loss_match.group(1))
                    train_dict_list.append({'NumIters': iteration,
                                            'Loss': train_loss,
                                            'Accuracy': acc,
                                            'LearningRate': learning_rate,
                                            'Seconds': seconds})

    df_train = pd.DataFrame(columns=train_dict_names)
    df_test = pd.DataFrame(columns=test_dict_names)

    for col in train_dict_names:
        df_train[col] = [d[col] for d in train_dict_list]
    for col in test_dict_names:
        df_test[col] = [d[col] for d in test_dict_list]

    return df_train, df_test
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """

    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile('loss = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')
    regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

            time = extract_seconds.extract_datetime_from_line(line,
                                                              logfile_year)
            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list,
                line, iteration, seconds, learning_rate
            )
            test_rate_match = regex_test_output.search(line)
            if test_rate_match:
                if not test_row or test_row['NumIters'] != iteration:
                    # Push the last row and start a new one
                    if test_row:
                        # If we're on a new iteration, push the last row
                        # This will probably only happen for the first row; otherwise
                        # the full row checking logic below will push and clear full
                        # rows
                        test_dict_list.append(test_row)

                    test_row = OrderedDict([
                        ('NumIters', iteration),
                        ('Seconds', seconds),
                        ('LearningRate', learning_rate)
                    ])

                # output_num is not used; may be used in the future
                # output_num = output_match.group(1)
                output_val = test_rate_match.group(1)
                test_row["loss"] = float(output_val)

            if test_row and len(test_dict_list) >= 1 and len(test_row) == len(test_dict_list[0]):
                # The row is full, based on the fact that it has the same number of
                # columns as the first row; append it to the list
                test_dict_list.append(test_row)
                test_row = None

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
Пример #10
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """

    regex_iteration = re.compile('batch_mini_iter: (\d+)')
    regex_train_output = re.compile('loss: ([\.\deE+-]+)')
    regex_obj_score = re.compile('avg_obj: ([\.\deE+-]+)')
    regex_noobj = re.compile('avg_noobj: ([\.\deE+-]+)')
    regex_iou = re.compile('avg_iou: ([\.\deE+-]+)')
    regex_cat = re.compile('avg_cat: ([\.\deE+-]+)')
    regex_recall = re.compile('recall: ([\.\deE+-]+)')
    #regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    #regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

            try:
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            except ValueError:
                # Skip lines with bad formatting, for example when resuming solver
                continue

            seconds = (time - start_time).total_seconds()

            #learning_rate_match = regex_learning_rate.search(line)
            #if learning_rate_match:
            #    learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, regex_obj_score, regex_noobj, regex_iou,
                regex_cat, regex_recall, train_row, train_dict_list, line,
                iteration, seconds)
            #test_dict_list, test_row = parse_line_for_net_output(
            #    regex_test_output, test_row, test_dict_list,
            #    line, iteration, seconds, learning_rate
            #)

    #fix_initial_nan_learning_rate(train_dict_list)
    #fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list  #, test_dict_list
Пример #11
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile(
        'Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile(
        'Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile(
        'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:

            if line[:5] == "speed" or line[:5] == "Wrote":
                continue

            if line[:4] == "done":
                break

            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

            try:
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            except ValueError:
                break
            except:
                print "Unexpected error reading line: "
                print line
                print "I will go on though"

            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list, line,
                iteration, seconds, learning_rate)
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list, line, iteration,
                seconds, learning_rate)

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
Пример #12
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

    re_iteration = re.compile('Iteration (\d+)')
    re_top_output = re.compile('Iteration \d+, (\w+) = ([\.\d]+)')
    re_output = re.compile('(Test|Train) net output #\d+: '
                           '(\w+) = ([+-]*[\.\d]+(e[+-][\d]+)*)')

    # Pick out lines of interest
    iteration = -1
    train_dict_list = []
    test_dict_list = []

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = re_iteration.search(line)
            if iteration_match:
                parsed_iteration = float(iteration_match.group(1))
                if parsed_iteration != iteration:
                    train_dict_list += [{'NumIters': parsed_iteration}]

                    if (not test_dict_list or 
                        len(test_dict_list[-1].keys()) != 1):

                        test_dict_list += [{'NumIters': parsed_iteration}]
                    else:
                        test_dict_list[-1]['NumIters'] = parsed_iteration

                    iteration = parsed_iteration

            if iteration == -1:
                # Only look for other stuff if we've found the first iteration
                continue

            try:
                time = extract_seconds.extract_datetime_from_line(line,
                                                                  logfile_year)
            except:
                continue

            seconds = (time - start_time).total_seconds()

            top_output_match = re_top_output.search(line)
            if top_output_match:
                top_output_name = top_output_match.group(1)
                top_output_value = float(top_output_match.group(2))

                train_dict_list[-1][top_output_name] = top_output_value

            output_match = re_output.search(line)
            if output_match:
                is_test_output = output_match.group(1).lower() == 'test'
                if is_test_output:
                    dict_list = test_dict_list 
                else:
                    dict_list = train_dict_list

                output_name = output_match.group(2)
                output_value = float(output_match.group(3))

                dict_list[-1][output_name] = output_value
                dict_list[-1]['Seconds'] = seconds

    train_dict_names = train_dict_list[0].keys()
    test_dict_names = test_dict_list[0].keys()

    # print train_dict_list[: 3]

    return train_dict_list, train_dict_names, test_dict_list, test_dict_names
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

    re_iteration = re.compile('Iteration (\d+)')
    #re_accuracy = re.compile('output #\d+: accuracy = ([\.\d]+)')
    re_accuracy = re.compile('output #\d+: loss3/top-1 = ([\.\d]+)')
    #re_train_loss = re.compile('Iteration \d+, loss = ([\.\d]+)')
    re_train_loss = re.compile('Iteration \d+, loss3/loss3 = ([\.\d]+)')
    #re_output_loss = re.compile('output #\d+: loss = ([\.\d]+)')
    re_output_loss = re.compile('output #\d+: loss3/loss3 = ([\.\d]+)')
    re_lr = re.compile('lr = ([\.\d]+)')

    # Pick out lines of interest
    iteration = -1
    test_accuracy = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_dict_names = ('NumIters', 'Seconds', 'TrainingLoss', 'LearningRate')
    test_dict_names = ('NumIters', 'Seconds', 'TestAccuracy', 'TestLoss')

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = re_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only look for other stuff if we've found the first iteration
                continue

            time = extract_seconds.extract_datetime_from_line(line,
                                                              logfile_year)
            seconds = (time - start_time).total_seconds()

            lr_match = re_lr.search(line)
            if lr_match:
                learning_rate = float(lr_match.group(1))

            accuracy_match = re_accuracy.search(line)
            if accuracy_match and get_line_type(line) == 'test':
                test_accuracy = float(accuracy_match.group(1))

            train_loss_match = re_train_loss.search(line)
            if train_loss_match:
                train_loss = float(train_loss_match.group(1))
                train_dict_list.append({'NumIters': iteration,
                                        'Seconds': seconds,
                                        'TrainingLoss': train_loss,
                                        'LearningRate': learning_rate})

            output_loss_match = re_output_loss.search(line)
            if output_loss_match and get_line_type(line) == 'test':
                test_loss = float(output_loss_match.group(1))
                # NOTE: we assume that (1) accuracy always comes right before
                # loss for test data so the test_accuracy variable is already
                # correctly populated and (2) there's one and only one output
                # named "accuracy" for the test net
                test_dict_list.append({'NumIters': iteration,
                                       'Seconds': seconds,
                                       'TestAccuracy': test_accuracy,
                                       'TestLoss': test_loss})

    return train_dict_list, train_dict_names, test_dict_list, test_dict_names
Пример #14
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """
    regex_float = '([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)'
    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile('lr = %s'%regex_float)
    regex_end_of_phase = re.compile('Wrote snapshot to')
    regex_ignore_rows = re.compile('speed: [0-9\.]*s / iter')
    regex_mean_ap = re.compile('Mean AP = %s'%regex_float)
    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    phases=[]
    train_dict_list=[]


    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    mean_ap =  None

    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)
        last_time = start_time

        for line in f:

            if regex_ignore_rows.search(line):
                continue

            if regex_end_of_phase.search(line):
                # start a new learning phase
                fix_initial_nan_learning_rate(train_dict_list)
                phases.append(train_dict_list)
                train_dict_list=[]
                iteration=-1
                continue

            mean_ap_match=regex_mean_ap.search(line)
            if mean_ap_match:
                mean_ap=float(mean_ap_match.group(1))

            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
                continue
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

            try:
                time = extract_seconds.extract_datetime_from_line(line,
                                                                  logfile_year)
            except ValueError:
                # Skip lines with bad formatting, for example when resuming solver
                continue

            # if it's another year
            if time.month < last_time.month:
                logfile_year += 1
                time = extract_seconds.extract_datetime_from_line(line, logfile_year)
            last_time = time

            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))
                continue

            train_dict_list, train_row, train_match = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list,
                line, iteration, seconds, learning_rate
            )

    return phases, mean_ap
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

# I0526 13:50:19.771880 28824 solver.cpp:319] Test loss: 2.65821
# I0526 13:50:19.772948 28824 solver.cpp:332]     Test net output #0: cross_entropy_loss = 65.4734
# I0526 13:50:19.772971 28824 solver.cpp:332]     Test net output #1: l2_error = 2.65821 (* 1 = 2.65821 loss)
# I0526 13:50:19.772979 28824 solver.cpp:283] Iteration 24000, Testing net (#1)
# I0526 13:50:20.337695 28824 solver.cpp:319] Test loss: 2.78423
# I0526 13:50:20.337750 28824 solver.cpp:332]     Test net output #0: cross_entropy_loss = 65.8907
# I0526 13:50:20.337774 28824 solver.cpp:332]     Test net output #1: l2_error = 2.78423 (* 1 = 2.78423 loss)

    re_batch_size = re.compile('batch_size: (\d+)')
    re_iteration = re.compile('Iteration (\d+)')
    re_train_loss = re.compile('Iteration \d+, loss = ([\.\d\-+ena]+)')

    re_accuracy = re.compile('output #\d+: (accuracy|l2_error) = ([\.\d\-+ena]+)')
    re_output_loss = re.compile('output #\d+: (loss|cross_entropy_loss) = ([\.\d\-+ena]+)')

    re_lr = re.compile('lr = ([\.\d\-+ena]+)')
    re_grad_norm = re.compile('avg_grad_norm = ([\.\d\-+enan]+)')
    re_step_norm = re.compile('avg_step_norm = ([\.\d\-+enan]+)')
    re_eff_lr = re.compile('avg_effective_learning_rate = ([\.\d\-+enan]+)')
    re_test_start_seconds = re.compile('Testing net')

    # Pick out lines of interest
    iteration = -1
    test_accuracy = -1
    test_start_seconds = float('NaN')
    learning_rate = float('NaN')
    avg_grad_norm = float('NaN')
    avg_step_norm = float('NaN')
    eff_lr = float('NaN')
    batch_size = None
    train_dict_list = []
    test_dict_list = []

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

    with open(path_to_log) as f:
        for line in f:
            # print 'parse_log.py line: ', line
            if batch_size is None:
                batch_size_match = re_batch_size.search(line)
                if batch_size_match:
                    batch_size = float(batch_size_match.group(1))

            iteration_match = re_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))

            if iteration == -1:
                # Only look for other stuff if we've found the first iteration
                continue

            time = extract_seconds.extract_datetime_from_line(line,
                                                              logfile_year)
            seconds = (time - start_time).total_seconds()

            lr_match = re_lr.search(line)
            if lr_match:
                learning_rate = float(lr_match.group(1))

            grad_norm_match = re_grad_norm.search(line)
            if grad_norm_match:
                avg_grad_norm = float(grad_norm_match.group(1))

            step_norm_match = re_step_norm.search(line)
            if step_norm_match:
                avg_step_norm = float(step_norm_match.group(1))

            eff_lr_match = re_eff_lr.search(line)
            if eff_lr_match:
                eff_lr = float(eff_lr_match.group(1))

            test_start_match = re_test_start_seconds.search(line)
            if test_start_match:
                test_start_seconds = seconds

            accuracy_match = re_accuracy.search(line)
            if accuracy_match and get_line_type(line) == 'test':
                test_accuracy = float(accuracy_match.group(2))

            # ce_match = re_ce.search(line)
            # if ce_match and get_line_type(line) == 'test':
            #     test_accuracy = float(ce_match.group(1))

            train_loss_match = re_train_loss.search(line)
            if train_loss_match:
                train_loss = float(train_loss_match.group(1))
                train_dict_list.append({'NumIters': iteration,
                                        'Seconds': seconds,
                                        'TrainingLoss': train_loss,
                                        'LearningRate': learning_rate,
                                        'AvgGradientNorm': avg_grad_norm,
                                        # 'AvgStepNorm': avg_step_norm})
                                        'AvgStepNorm': avg_step_norm,
                                        'EffectiveLearningRate': eff_lr})

            output_loss_match = re_output_loss.search(line)
            if output_loss_match and get_line_type(line) == 'test':
                test_loss = float(output_loss_match.group(2))
                # NOTE: we assume that (1) accuracy always comes right before
                # loss for test data so the test_accuracy variable is already
                # correctly populated and (2) there's one and only one output
                # named "accuracy" for the test net
                test_dict_list.append({'NumIters': iteration,
                                       'SecondsAtStart': test_start_seconds,
                                       'SecondsAtEnd': seconds,
                                       'TestAccuracy': test_accuracy,
                                       'TestLoss': test_loss})

    return train_dict_list, test_dict_list, batch_size
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

    regex_iteration = re.compile('Iteration (\d+), loss = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')
    regex_train_output = re.compile('Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile('Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile('lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    loss = 0
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

    with open(path_to_log) as f:
        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
                loss = float(iteration_match.group(2))
                #print ('iteration = ', iteration, 'loss = ', loss)
            if (iteration == -1 or line.find('speed') != -1 or line.find('Wrote') != -1 or line.find('None') != -1):
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

            #FRDC lxq
            if  line.find('done solving') != -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                break

            time = extract_seconds.extract_datetime_from_line(line,
                                                              logfile_year)
            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)

            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))


            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list,
                line, iteration, seconds, learning_rate, loss
            )
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list,
                line, iteration, seconds, learning_rate, loss
            )

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
Пример #17
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """
    regex_valid_line = re.compile('I(\d\d\d\d)')
    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile(
        'Train net output #(\d+): ([\S\_]+) = ([\.\deE+-]+)')
    regex_test_output = re.compile(
        'Test net output #(\d+): ([\S\_]+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile(
        'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            #line = line.strip()
            valid_line_match = regex_valid_line.match(line)
            if not valid_line_match:
                #if len(line)<1 or not line.startswith('I'):
                #line.startswith('speed') or line.startswith('Wrote') or line.startswith('done'):#added by zhaoyafei 20161124
                #if :
                continue

            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            #else:#added by zhaoyafei 20161124
            #print 'No iteration_match: continue'
            #print line
            #sys.exit(0)
            #continue

            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                #print 'iteration == -1:continue'
                #sys.exit(0)
                continue

            print line
            #sys.exit(0)

            time = extract_seconds.extract_datetime_from_line(
                line, logfile_year)
            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list, line,
                iteration, seconds, learning_rate)
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list, line, iteration,
                seconds, learning_rate)

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """

    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile(
        'Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile(
        'Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile(
        'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)
        last_time = start_time

        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

            try:
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            except ValueError:
                # Skip lines with bad formatting, for example when resuming solver
                continue

            # if it's another year
            if time.month < last_time.month:
                logfile_year += 1
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            last_time = time

            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list, line,
                iteration, seconds, learning_rate)
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list, line, iteration,
                seconds, learning_rate)

    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    return train_dict_list, test_dict_list
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """

    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile(
        'Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile(
        'Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile(
        'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = [[] for i in range(4)]
    test_dict_list = [[] for i in range(4)]
    train_row = None
    test_row = None
    stage_num = 0

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = regex_iteration.search(line)
            line_s = line.strip()
            if len(line_s) == 0:
                continue
            if line.find(
                    'Stage 1 Fast R-CNN using RPN proposals, init from ImageNet model'
            ) != -1:
                stage_num = 1
            if line.find(
                    'Stage 2 RPN, init from stage 1 Fast R-CNN model') != -1:
                stage_num = 2
            if line.find(
                    'Stage 2 Fast R-CNN, init from stage 2 RPN R-CNN model'
            ) != -1:
                stage_num = 3
            if iteration_match:
                iteration = float(iteration_match.group(1))
            if iteration == -1:
                # Only start parsing for other stuff if we've found the first
                # iteration
                continue

            try:
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            except ValueError:
                # Skip lines with bad formatting, for example when resuming solver
                continue

            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list[stage_num], train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list[stage_num],
                line, iteration, seconds, learning_rate)
            test_dict_list[stage_num], test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list[stage_num], line,
                iteration, seconds, learning_rate)

    for i in range(4):
        fix_initial_nan_learning_rate(train_dict_list[i])
        fix_initial_nan_learning_rate(test_dict_list[i])

    return train_dict_list, test_dict_list
Пример #20
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows

    train_dict_names and test_dict_names are ordered tuples of the column names
    for the two dict_lists
    """

    re_iteration = re.compile('Iteration (\d+)')
    re_test_accuracy = re.compile('output #\d+: accuracy = ([\.\d]+)')
    re_test_loss = re.compile('output #\d+: loss = ([\.\d]+)')
    re_loss = re.compile('Iteration \d+, loss = ([\.\d]+)')
    re_lr = re.compile('lr = ([\d]+e-[\d]+|[\.\d]+)')

    # Pick out lines of interest
    iteration = -1
    current_line_iteration = -1
    iteration_type = None
    iteration_dict = None

    accuracy = -1
    learning_rate = float('NaN')

    train_dict_list = []
    test_dict_list = []
    train_dict_names = ('Iterations', 'Seconds', 'Loss', 'LearningRate')
    test_dict_names = ('Iterations', 'Seconds', 'Loss', 'Accuracy')

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = re_iteration.search(line)
            if iteration_match:
                current_line_iteration = int(iteration_match.group(1))
            if current_line_iteration == -1:
                # Only look for other stuff if we've found the first iteration
                continue

            #print("{0} {1} {2}".format(iteration_type, iteration, iteration_dict['Loss'] if iteration_dict and 'Loss' in iteration_dict else ""))

            # new iteration or switching from test to train
            if(iteration < current_line_iteration or 
                (get_line_type(line) and (get_line_type(line) != iteration_type))):

                iteration = current_line_iteration

                # new iteration
                if(iteration > 0 or (iteration_type and get_line_type(line) != iteration_type)):
                    # log previous iteration
                    if(iteration_type == 'train'):
                        train_dict_list.append(iteration_dict)
                    else:
                        test_dict_list.append(iteration_dict)


                time = extract_seconds.extract_datetime_from_line(line,
                                                              logfile_year)
                seconds = (time - start_time).total_seconds()
                iteration_dict = {'Iterations': '{:d}'.format(iteration),
                                 'Seconds': '{:f}'.format(seconds)}

                iteration_type = get_line_type(line) or 'train'



            lr_match = re_lr.search(line)
            if lr_match:
                iteration_dict['LearningRate'] = float(lr_match.group(1))

            accuracy_match = re_test_accuracy.search(line)
            if accuracy_match:
                iteration_dict['Accuracy'] = float(accuracy_match.group(1))

            loss_test_match = re_test_loss.search(line)
            if loss_test_match:
                iteration_dict['Loss'] = float(loss_test_match.group(1))

            loss_match = re_loss.search(line)
            if loss_match:
                 iteration_dict['Loss'] = float(loss_match.group(1))


        # log last iteration
        if(iteration_dict and iteration_type == 'train'):
            train_dict_list.append(iteration_dict)
        elif(iteration_dict):
            test_dict_list.append(iteration_dict)

    return train_dict_list, train_dict_names, test_dict_list, test_dict_names
Пример #21
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)
    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """
    regex_iteration = re.compile('Iteration (\d+), loss = ([\.\deE+-]+)')
    regex_train_output = re.compile(
        'Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile('lr = ([\.\deE+-]+)')
    regex_test_output = re.compile(
        'Test net output #(\d+): detection_eval = ([\.\deE+-]+)')

    # Pick out lines of interest
    iteration = 0
    loss = -1
    learning_rate = 0.001
    train_dict_list = []
    train_row = None
    test_score = 0.0

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)
        last_time = start_time

        for line in f:
            iteration_match = regex_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))
                loss = float(iteration_match.group(2))
            try:
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            except:
                # Skip lines with bad formatting, for example when resuming solver
                continue

            # if it's another year
            if time.month < last_time.month:
                logfile_year += 1
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            last_time = time

            seconds = (time - start_time).total_seconds()

            learning_rate_match = regex_learning_rate.search(line)

            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            test_score_match = regex_test_output.search(line)
            if test_score_match:
                test_score = float(test_score_match.group(2))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list, line,
                iteration, seconds, learning_rate, loss, test_score)

    return train_dict_list
Пример #22
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, test_dict_list)

    train_dict_list and test_dict_list are lists of dicts that define the table
    rows
    """

    regex_phase = re.compile('\+ ./tools/(\S+)')
    regex_iteration = re.compile('Iteration (\d+)')
    regex_train_output = re.compile(
        'Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile(
        'Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile(
        'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')

    # Pick out lines of interest
    iteration = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    train_row = None
    test_row = None

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        start_time = extract_seconds.get_start_time(f, logfile_year)
        time = 0.
        seconds = 0.
        phase = 0
        for line in f:
            try:
                phase_match = regex_phase.search(line)
                if phase_match:
                    phase = phase + 1
                    start_time = time
                    continue
                iteration_match = regex_iteration.search(line)
                if iteration_match:
                    iteration = float(iteration_match.group(1))
                    time = extract_seconds.extract_datetime_from_line(
                        line, logfile_year)
                    seconds = (time - start_time).total_seconds()
                    continue

                lr_match = regex_learning_rate.search(line)
                if lr_match:
                    learning_rate = float(learning_rate_match.group(1))

                train_match = regex_train_output.search(line)
                if train_match:
                    train_dict_list, train_row = parse_line_for_net_output(
                        regex_train_output, train_row, train_dict_list, line,
                        iteration, seconds, learning_rate, phase)

                test_match = regex_test_output.search(line)
                if test_match:
                    test_dict_list, test_row = parse_line_for_net_output(
                        regex_test_output, test_row, test_dict_list, line,
                        iteration, seconds, learning_rate, phase)
            except ValueError:
                print("Oops!")
                print line
                continue
    fix_initial_nan_learning_rate(train_dict_list)
    fix_initial_nan_learning_rate(test_dict_list)

    #print train_dict_list
    return train_dict_list, test_dict_list
Пример #23
0
def parse_log(path_to_log):
    """Parse log file
    Returns (train_dict_list, train_dict_names, test_dict_list, 
    test_dict_names, debug_info_names, debug_info_dict_list)
    
    If the debug info wasn't enabled for the run, debug_info_dict_list is
    empty

    train_dict_list, test_dict_list and debug_info_dict_list are lists of 
    dicts that define the table rows

    train_dict_names, test_dict_names and  debug_info_names are ordered 
    tuples of the column names for the two dict_lists
       
    """

    re_iteration = re.compile('Iteration (\d+)')

    re_train_loss = re.compile('Iteration (\d+), loss = (' + FLOAT_RE + ')')

    regex_train_output = re.compile(
        'Train net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_test_output = re.compile(
        'Test net output #(\d+): (\S+) = ([\.\deE+-]+)')
    regex_learning_rate = re.compile(
        'lr = ([-+]?[0-9]*\.?[0-9]+([eE]?[-+]?[0-9]+)?)')
    # For parsing debug info
    re_forward_data = re.compile(
        '\[Forward\] Layer (\S+), top blob (\S+) data: (' + FLOAT_RE + ')')
    re_backward_diff = re.compile(
        '\[Backward\] Layer (\S+), bottom blob (\S+) diff: (' + FLOAT_RE + ')')
    re_backward_param_diff = re.compile(
        '\[Backward\] Layer (\S+), param blob (\d+) diff: (' + FLOAT_RE + ')')
    re_forward_param_data = re.compile(
        '\[Forward\] Layer (\S+), param blob (\d+) data: (' + FLOAT_RE + ')')
    was_in_backward = False

    # Pick out lines of interest
    iteration = -1
    fb_iteration = -1  # iter # used for timing forward\backward
    debug_flag = False
    max_param_count = -1
    learning_rate = float('NaN')
    train_dict_list = []
    test_dict_list = []
    debug_info_dict_list = []
    debug_layer_dict = {}
    train_row = None
    test_row = None

    train_dict_names = ('NumIters', 'Seconds', 'TrainingLoss', 'LearningRate')
    test_dict_names = ('NumIters', 'Seconds', 'TestAccuracy', 'TestLoss')
    debug_info_dict_list = []
    debug_info_names_list = [
        'NumIters', 'LayerName', 'Activation', 'BackPropBottomDiff'
    ]
    debug_info_names = tuple(debug_info_names_list)

    logfile_year = extract_seconds.get_log_created_year(path_to_log)
    with open(path_to_log) as f:
        display_interval = get_display_interval(f)
        debug_flag = is_debug_mode(f)
        if debug_flag:
            max_param_count = getMaxParamCount(f)
            additional_header_list = []
            backward_param_headers = [
                'BackPropDiff' + '_param' + str(i)
                for i in range(max_param_count + 1)
            ]
            additional_header_list += backward_param_headers
            for i in range(max_param_count + 1):
                additional_header_list.append('param' + str(i) + '_Data')
                additional_header_list.append('param' + str(i) + '_Change')

            # adding new headers for each of the parameters
            debug_info_names_list += additional_header_list
            debug_info_names = tuple(debug_info_names_list)
            f.seek(0)  # return to head of file

        start_time = extract_seconds.get_start_time(f, logfile_year)

        for line in f:
            iteration_match = re_iteration.search(line)
            if iteration_match:
                iteration = float(iteration_match.group(1))

            if iteration == -1:
                # Only look for other stuff if we've found the first iteration
                continue
            # Try to extract date and time from line, assuming there exists one in
            # the expected format
            try:
                time = extract_seconds.extract_datetime_from_line(
                    line, logfile_year)
            except:
                continue
            seconds = (time - start_time).total_seconds()
            train_loss_match = re_train_loss.search(line)
            if train_loss_match:
                fb_iteration = float(train_loss_match.group(1))
            learning_rate_match = regex_learning_rate.search(line)
            if learning_rate_match:
                learning_rate = float(learning_rate_match.group(1))

            train_dict_list, train_row = parse_line_for_net_output(
                regex_train_output, train_row, train_dict_list, line,
                iteration, seconds, learning_rate)
            test_dict_list, test_row = parse_line_for_net_output(
                regex_test_output, test_row, test_dict_list, line, iteration,
                seconds, learning_rate)

            fix_initial_nan_learning_rate(train_dict_list)
            fix_initial_nan_learning_rate(test_dict_list)

            # Only extract debug information if debug_info is true
            if not debug_flag:
                continue

            forward_match = re_forward_data.search(line)
            if forward_match:
                # If was_in_update flag was on, we are starting a new forward
                # pass so we will save last iteration info and
                # initialize the iteration specific variables
                if was_in_backward:
                    debug_info_dict_list += debug_layer_dict.values()
                    debug_layer_dict = {}
                    was_in_backward = False
                layer_name = forward_match.group(1)
                activation_val = extended_float(forward_match.group(3))
                if not debug_layer_dict.has_key(layer_name):
                    debug_layer_dict[layer_name] = dict.fromkeys(
                        debug_info_names)
                    debug_layer_dict[layer_name]['LayerName'] = layer_name
                    debug_layer_dict[layer_name]['NumIters'] = \
                    (fb_iteration != -1) * (fb_iteration + display_interval)
                debug_layer_dict[layer_name]['Activation'] = activation_val

            forward_param_data_match = re_forward_param_data.search(line)
            if forward_param_data_match:

                layer_name = forward_param_data_match.group(1)
                param_num = forward_param_data_match.group(2)
                param_header = 'param' + param_num
                param_data = extended_float(forward_param_data_match.group(3))
                debug_layer_dict[layer_name][param_header +
                                             '_Data'] = param_data

            backward_match = re_backward_diff.search(line)
            if backward_match:
                layer_name = backward_match.group(1)
                back_prop_val = extended_float(backward_match.group(3))
                if not debug_layer_dict.has_key(layer_name):
                    debug_layer_dict[layer_name] = dict.fromkeys(
                        debug_info_names)
                debug_layer_dict[layer_name][
                    'BackPropBottomDiff'] = back_prop_val

            backward_param_match = re_backward_param_diff.search(line)
            if backward_param_match:
                was_in_backward = True
                layer_name = backward_param_match.group(1)
                param_num = backward_param_match.group(2)
                param_header = '_param' + param_num
                back_prop_param_val = extended_float(
                    backward_param_match.group(3))
                if not debug_layer_dict.has_key(layer_name):
                    debug_layer_dict[layer_name] = dict.fromkeys(
                        debug_info_names)
                debug_layer_dict[layer_name][
                    'BackPropDiff' + param_header] = back_prop_param_val

    # add last iteration information if it exists
    if debug_flag and debug_layer_dict:
        debug_info_dict_list += debug_layer_dict.values()

    return train_dict_list, train_dict_names, test_dict_list, test_dict_names, \
    debug_info_dict_list, debug_info_names