Пример #1
0
def process_single(filename, verbose, select):
    if verbose:
        print("Processing log file: {}".format(filename))

    with open(filename, 'r') as fin:
        log = fin.read()
    task = parse_utils.parse_task(log)

    if task is None:
        print("Task is not detected: {}".format(filename))
        return None

    if verbose:
        print("\ttask = {}".format(task))

    if task == 'multitask' or task == 'pheno':
        metric = 'ave_auc_macro'
    elif task == 'ihm' or task == 'decomp':
        metric = 'AUC of ROC'
    elif task == 'los':
        metric = 'Cohen kappa score'
    else:
        assert False

    train_metrics, val_metrics = parse_utils.parse_metrics(metric, log)
    if len(train_metrics) == 0:
        print("Less than one epoch: {}".format(filename))
        return None
    last_train = train_metrics[-1]
    last_val = val_metrics[-1]

    if verbose:
        print("\tlast train = {}, last val = {}".format(last_train, last_val))

    rerun = True
    if task == 'ihm':
        if last_val < 0.83 and last_train > 0.88:
            rerun = False
        if last_val < 0.84 and last_train > 0.89:
            rerun = False
        if last_val < 0.85 and last_train > 0.9:
            rerun = False
    elif task == 'decomp':
        if last_val < 0.85 and last_train > 0.89:
            rerun = False
        if last_val < 0.87 and last_train > 0.9:
            rerun = False
        if last_val < 0.88 and last_train > 0.92:
            rerun = False
    elif task == 'pheno' or task == 'multitask':
        if last_val < 0.75 and last_train > 0.77:
            rerun = False
        if last_val < 0.76 and last_train > 0.79:
            rerun = False
    elif task == 'los':
        if last_val < 0.35 and last_train > 0.42:
            rerun = False
        if last_val < 0.38 and last_train > 0.44:
            rerun = False
    else:
        assert False

    # check if val_metrics is decreasing
    if task in ['ihm', 'decomp', 'pheno', 'multitask']:
        n_decreases = 3
    else:  # 'los'
        n_decreases = 5

    if check_decreasing(val_metrics, n_decreases, 0.001):
        rerun = False

    # check if maximum value for validation was very early
    if task in ['ihm', 'decomp', 'pheno', 'multitask']:
        tol = 0.01
    else:  # 'los'
        tol = 0.03
    val_max = max(val_metrics)
    val_max_pos = np.argmax(val_metrics)
    if len(val_metrics) - val_max_pos >= 8 and val_max - last_val > tol:
        rerun = False

    if not select:
        rerun = True

    if verbose:
        print("\trerun = {}".format(rerun))

    if not rerun:
        return None

    # need to rerun
    last_state = parse_utils.parse_last_state(log)
    if last_state is None:
        print("Last state is not parsed: {}".format(filename))
        return None

    n_epochs = parse_utils.parse_epoch(last_state)

    if verbose:
        print("\tlast state = {}".format(last_state))

    network = parse_utils.parse_network(log)

    prefix = parse_utils.parse_prefix(log)
    if prefix == '':
        prefix = 'r2'
    elif not str.isdigit(prefix[-1]):
        prefix += '2'
    else:
        prefix = prefix[:-1] + str(int(prefix[-1]) + 1)

    dim = parse_utils.parse_dim(log)
    size_coef = parse_utils.parse_size_coef(log)
    depth = parse_utils.parse_depth(log)

    ihm_C = parse_utils.parse_ihm_C(log)
    decomp_C = parse_utils.parse_decomp_C(log)
    los_C = parse_utils.parse_los_C(log)
    pheno_C = parse_utils.parse_pheno_C(log)

    dropout = parse_utils.parse_dropout(log)
    partition = parse_utils.parse_partition(log)
    deep_supervision = parse_utils.parse_deep_supervision(log)
    target_repl_coef = parse_utils.parse_target_repl_coef(log)

    command = "python -u keras_main.py --network {} --prefix {} --dim {}"\
              " --depth {} --epochs 100 --batch_size 8 --timestep 1.0"\
              " --load_state {}".format(network, prefix, dim, depth,  last_state)

    if network.find('channel') != -1:
        command += ' --size_coef {}'.format(size_coef)

    if ihm_C:
        command += ' --ihm_C {}'.format(ihm_C)

    if decomp_C:
        command += ' --decomp_C {}'.format(decomp_C)

    if los_C:
        command += ' --los_C {}'.format(los_C)

    if pheno_C:
        command += ' --pheno_C {}'.format(pheno_C)

    if dropout > 0.0:
        command += ' --dropout {}'.format(dropout)

    if partition:
        command += ' --partition {}'.format(partition)

    if deep_supervision:
        command += ' --deep_supervision'

    if (target_repl_coef is not None) and target_repl_coef > 0.0:
        command += ' --target_repl_coef {}'.format(target_repl_coef)

    return {
        "command": command,
        "train_max": np.max(train_metrics),
        "train_max_pos": np.argmax(train_metrics),
        "val_max": np.max(val_metrics),
        "val_max_pos": np.argmax(val_metrics),
        "last_train": last_train,
        "last_val": last_val,
        "n_epochs": n_epochs,
        "filename": filename
    }
Пример #2
0
def process_single(filename, verbose, select):
    if verbose:
        print("Processing log file: {}".format(filename))

    with open(filename, 'r') as fin:
        log = fin.read()
    task = parse_utils.parse_task(log)

    if task is None:
        print("Task is not detected: {}".format(filename))
        return None

    if verbose:
        print("\ttask = {}".format(task))

    if task == 'multitask' or task == 'pheno':
        metric = 'ave_auc_macro'
    elif task == 'ihm' or task == 'decomp':
        metric = 'AUC of ROC'
    elif task == 'los':
        metric = 'Cohen kappa score'
    else:
        assert False

    train_metrics, val_metrics = parse_utils.parse_metrics(log, metric)
    if len(train_metrics) == 0:
        print("Less than one epoch: {}".format(filename))
        return None
    last_train = train_metrics[-1]
    last_val = val_metrics[-1]

    if verbose:
        print("\tlast train = {}, last val = {}".format(last_train, last_val))

    rerun = True
    if task == 'ihm':
        if last_val < 0.83 and last_train > 0.88:
            rerun = False
        if last_val < 0.84 and last_train > 0.89:
            rerun = False
        if last_val < 0.85 and last_train > 0.9:
            rerun = False
    elif task == 'decomp':
        if last_val < 0.85 and last_train > 0.89:
            rerun = False
        if last_val < 0.87 and last_train > 0.9:
            rerun = False
        if last_val < 0.88 and last_train > 0.92:
            rerun = False
    elif task == 'pheno' or task == 'multitask':
        if last_val < 0.75 and last_train > 0.77:
            rerun = False
        if last_val < 0.76 and last_train > 0.79:
            rerun = False
    elif task == 'los':
        if last_val < 0.35 and last_train > 0.42:
            rerun = False
        if last_val < 0.38 and last_train > 0.44:
            rerun = False
    else:
        assert False

    # check if val_metrics is decreasing
    if task in ['ihm', 'decomp', 'pheno', 'multitask']:
        n_decreases = 3
    else:  # 'los'
        n_decreases = 5

    if check_decreasing(val_metrics, n_decreases, 0.001):
        rerun = False

    # check if maximum value for validation was very early
    if task in ['ihm', 'decomp', 'pheno', 'multitask']:
        tol = 0.01
    else:  # 'los'
        tol = 0.03
    val_max = max(val_metrics)
    val_max_pos = np.argmax(val_metrics)
    if len(val_metrics) - val_max_pos >= 8 and val_max - last_val > tol:
        rerun = False

    if not select:
        rerun = True

    if verbose:
        print("\trerun = {}".format(rerun))

    if not rerun:
        return None

    # need to rerun
    last_state = parse_utils.parse_last_state(log)
    if last_state is None:
        print("Last state is not parsed: {}".format(filename))
        return None

    n_epochs = parse_utils.parse_epoch(last_state)

    if verbose:
        print("\tlast state = {}".format(last_state))

    network = parse_utils.parse_network(log)

    prefix = parse_utils.parse_prefix(log)
    if prefix == '':
        prefix = 'r2'
    elif not str.isdigit(prefix[-1]):
        prefix += '2'
    else:
        prefix = prefix[:-1] + str(int(prefix[-1]) + 1)

    dim = parse_utils.parse_dim(log)
    size_coef = parse_utils.parse_size_coef(log)
    depth = parse_utils.parse_depth(log)

    ihm_C = parse_utils.parse_ihm_C(log)
    decomp_C = parse_utils.parse_decomp_C(log)
    los_C = parse_utils.parse_los_C(log)
    pheno_C = parse_utils.parse_pheno_C(log)

    dropout = parse_utils.parse_dropout(log)
    partition = parse_utils.parse_partition(log)
    deep_supervision = parse_utils.parse_deep_supervision(log)
    target_repl_coef = parse_utils.parse_target_repl_coef(log)

    batch_size = parse_utils.parse_batch_size(log)

    command = "python -u main.py --network {} --prefix {} --dim {}"\
              " --depth {} --epochs 100 --batch_size {} --timestep 1.0"\
              " --load_state {}".format(network, prefix, dim, depth,  batch_size, last_state)

    if network.find('channel') != -1:
        command += ' --size_coef {}'.format(size_coef)

    if ihm_C:
        command += ' --ihm_C {}'.format(ihm_C)

    if decomp_C:
        command += ' --decomp_C {}'.format(decomp_C)

    if los_C:
        command += ' --los_C {}'.format(los_C)

    if pheno_C:
        command += ' --pheno_C {}'.format(pheno_C)

    if dropout > 0.0:
        command += ' --dropout {}'.format(dropout)

    if partition:
        command += ' --partition {}'.format(partition)

    if deep_supervision:
        command += ' --deep_supervision'

    if (target_repl_coef is not None) and target_repl_coef > 0.0:
        command += ' --target_repl_coef {}'.format(target_repl_coef)

    return {"command": command,
            "train_max": np.max(train_metrics),
            "train_max_pos": np.argmax(train_metrics),
            "val_max": np.max(val_metrics),
            "val_max_pos": np.argmax(val_metrics),
            "last_train": last_train,
            "last_val": last_val,
            "n_epochs": n_epochs,
            "filename": filename}