示例#1
0
def write_model_status(epoch=None, lr=None, loss=None, score=None, append=True):
    """ Writes the current epoch, loss, learning rate, and model score to CSV.
    """
    convergence_path = C.job_dir + "convergence.csv"

    if not append:  # create the file
        with open(convergence_path, "w") as output_file:
            # write the header
            output_file.write("epoch, lr, avg_loss, model_score\n")
    else:  # append to existing file
        if C.job_type == "train":  # only write a `convergence.csv` when training
            if score is None:
                with open(convergence_path, "a") as output_file:
                    output_file.write(f"Epoch {epoch}, {lr:.8f}, {loss:.8f}, ")
                # write to tensorboard
                tb_writer.add_scalar("Train/loss", loss, epoch)
                tb_writer.add_scalar("Train/lr", lr, epoch)

            elif score == "NA":
                with open(convergence_path, "a") as output_file:
                    output_file.write(f"{score}\n")

            elif score is not None:
                with open(convergence_path, "a") as output_file:
                    output_file.write(f"{score:.6f}\n")

            else:
                raise NotImplementedError
示例#2
0
def write_validation_scores(output_dir, epoch_key, model_scores, append=True):
    """ Writes a CSV with the model validation scores as a function of the epoch.

    Args:
      output_dir (str) : Full path/filename to CSV file.
      epoch_key (str) : For example, "Training set" or "Epoch {n}".
      model_scores (dict) : Contains the average NLL per molecule of
        {validation/training/generated} structures, and the average model score
        (weighted mean of above two scores).
      append (bool) : Indicates whether to append to the output file or
        start a new one. Default `True`.
    """
    validation_file_path = output_dir + "validation.csv"

    avg_nll_val = model_scores["avg_nll_val"]
    avg_nll_train = model_scores["avg_nll_train"]
    avg_nll_gen = model_scores["avg_nll_gen"]
    abs_nll_diff = model_scores["abs_nll_diff"]
    uc_jsd = model_scores["UC-JSD"]

    if not append:  # create file
        with open(validation_file_path, "w") as output_file:
            # write headeres
            output_file.write(
                f"set, avg_nll_per_molecule_val, avg_nll_per_molecule_train, "
                f"avg_nll_per_molecule_gen, abs_nll_diff, uc_jsd\n"
            )

    # append the properties of interest to the CSV file
    with open(validation_file_path, "a") as output_file:
        output_file.write(
            f"{epoch_key:}, {avg_nll_val:.5f}, {avg_nll_train:.5f}, "
            f"{avg_nll_gen:.5f}, {abs_nll_diff:.5f}, {uc_jsd:.7f}\n"
        )

    try:  # write to tensorboard
        epoch = int(epoch_key.split()[1])
        # scalars
        tb_writer.add_scalar("NLL/validation", avg_nll_val, epoch)
        tb_writer.add_scalar("NLL/training", avg_nll_train, epoch)
        tb_writer.add_scalar("NLL/generation", avg_nll_gen, epoch)
        tb_writer.add_scalar("NLL/diff", abs_nll_diff, epoch)
        tb_writer.add_scalar("NLL/uc_jsd", uc_jsd, epoch)
    except:
        pass
示例#3
0
def properties_to_csv(prop_dict, csv_filename, epoch_key, append=True):
    """ Writes a CSV summarizing how training is going by comparing the
    properties of the generated structures during evaluation to the
    training set.

    Args:
      prop_dict (dict) : Contains molecular properties.
      csv_filename (str) : Full path/filename to CSV file.
      epoch_key (str) : For example, "Training set" or "Epoch {n}".
      append (bool) : Indicates whether to append to the output file (if the
        file exists) or start a new one. Default `True`.
    """
    # get all the relevant properties from the dictionary
    frac_valid = prop_dict[(epoch_key, "fraction_valid")]
    avg_n_nodes = prop_dict[(epoch_key, "avg_n_nodes")]
    avg_n_edges = prop_dict[(epoch_key, "avg_n_edges")]
    frac_unique = prop_dict[(epoch_key, "fraction_unique")]

    # use the following properties if they exist e.g. for generation epochs, but
    # not for training set
    try:
        run_time = prop_dict[(epoch_key, "run_time")]
        frac_valid_pt = round(
            float(prop_dict[(epoch_key, "fraction_valid_properly_terminated")]), 5
        )
        frac_pt = round(
            float(prop_dict[(epoch_key, "fraction_properly_terminated")]), 5
        )
    except KeyError:
        run_time = "NA"
        frac_valid_pt = "NA"
        frac_pt = "NA"

    (
        norm_n_nodes_hist,
        norm_atom_type_hist,
        norm_formal_charge_hist,
        norm_numh_hist,
        norm_n_edges_hist,
        norm_edge_feature_hist,
        norm_chirality_hist,
    ) = normalize_evaluation_metrics(prop_dict, epoch_key)

    if not append:
        # file does not exist yet, create it
        with open(csv_filename, "w") as output_file:
            # write the file header
            output_file.write(
                "set, fraction_valid, fraction_valid_pt, fraction_pt, run_time, "
                "avg_n_nodes, avg_n_edges, fraction_unique, atom_type_hist, "
                "formal_charge_hist, numh_hist, chirality_hist, "
                "n_nodes_hist, n_edges_hist, edge_feature_hist\n"
            )

    # append the properties of interest to the CSV file
    with open(csv_filename, "a") as output_file:
        output_file.write(
            f"{epoch_key}, {frac_valid:.3f}, {frac_valid_pt}, {frac_pt}, {run_time}, "
            f"{avg_n_nodes:.3f}, {avg_n_edges:.3f}, {frac_unique:.3f}, "
            f"{norm_atom_type_hist}, {norm_formal_charge_hist}, "
            f"{norm_numh_hist}, {norm_chirality_hist}, {norm_n_nodes_hist}, "
            f"{norm_n_edges_hist}, {norm_edge_feature_hist}\n"
        )

    # write to tensorboard
    try:
        epoch = int(epoch_key.split()[1])
    except:
        pass
    else:
        # Scalars
        tb_writer.add_scalar("Evaluation/valid", frac_valid, epoch)
        tb_writer.add_scalar("Evaluation/valid_pt", frac_valid_pt, epoch)
        tb_writer.add_scalar("Evaluation/pt", frac_pt, epoch)
        tb_writer.add_scalar("Evaluation/n_nodes", avg_n_nodes, epoch)
        tb_writer.add_scalar("Evaluation/unique", frac_unique, epoch)

        # Histogram
        tb_writer.add_histogram("Distributions/atom_type",
                                np.array(norm_atom_type_hist), epoch)
        tb_writer.add_histogram("Distributions/form_charge",
                                np.array(norm_formal_charge_hist), epoch)
        tb_writer.add_histogram("Distributions/hydrogen",
                                np.array(norm_numh_hist), epoch)
        tb_writer.add_histogram("Distributions/n_nodes",
                                np.array(norm_n_nodes_hist), epoch)
        tb_writer.add_histogram("Distributions/n_edges",
                                np.array(norm_n_edges_hist), epoch)
        tb_writer.add_histogram("Distributions/edge_features",
                                np.array(norm_edge_feature_hist), epoch)