def generate_test(config,
                  params,
                  split_name="train",
                  seed=None,
                  sobol_generator=None):
    # Find data_dir
    data_dirpath = python_utils.choose_first_existing_path(
        config["data_dir_candidates"])
    if data_dirpath is None:
        print_utils.print_error("ERROR: Data directory not found!")
        exit()
    data_dirpath = os.path.expanduser(data_dirpath)
    print_utils.print_info("Using data from {}".format(data_dirpath))
    root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"])

    alpha, x, density, gt, noise, curvature = generate_data(
        root_dir,
        params,
        split_name=split_name,
        seed=seed,
        sobol_generator=sobol_generator)

    noisy_gt = gt + noise
    import matplotlib.pyplot as plt
    f = plt.figure()
    f.set_tight_layout({"pad": .0})
    ax = f.gca()
    # plt.scatter(alpha, noisy_gt, s=10)
    ax.plot(alpha, noisy_gt)
    ax.set_xlabel("alpha")
    ax.set_ylabel("y")
    # plt.title("Sinusoid, freq = {}".format(params["f"]))
    plt.show()
def main():
    # --- Process args --- #
    args = get_args()
    config = run_utils.load_config(args.config)
    if config is None:
        print_utils.print_error(
            "ERROR: cannot continue without a config file. Exiting now...")
        exit()

    distribution = "uniform"
    params = {
        "n": args.sample_count,
        "f": args.frequency,
        "s": args.noise_std,
        "d": distribution,
    }

    sobol_generator = rand_utils.SobolGenerator()
    # sobol_generator = None

    generate_test(config,
                  params,
                  split_name="train",
                  seed=0,
                  sobol_generator=sobol_generator)
    generate_test(config,
                  params,
                  split_name="val",
                  seed=1,
                  sobol_generator=sobol_generator)
    generate_test(config,
                  params,
                  split_name="test",
                  seed=2,
                  sobol_generator=sobol_generator)
def main():
    # --- Process args --- #
    args = get_args()
    config = run_utils.load_config(args.config)
    if config is None:
        print_utils.print_error(
            "ERROR: cannot continue without a config file. Exiting now...")
        exit()
    if args.batch_size is not None:
        config["batch_size"] = args.batch_size

    distribution = "uniform"
    dataset_params = {
        "n": args.sample_count,
        "f": args.frequency,
        "s": args.noise_std,
        "d": distribution,
    }

    # Find data_dir
    data_dirpath = python_utils.choose_first_existing_path(
        config["data_dir_candidates"])
    if data_dirpath is None:
        print_utils.print_error("ERROR: Data directory not found!")
        exit()
    data_dirpath = os.path.expanduser(data_dirpath)
    print_utils.print_info("Using data from {}".format(data_dirpath))
    root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"])

    sobol_generator = rand_utils.SobolGenerator()

    train_ds = Synthetic1DDataset(root_dir=root_dir,
                                  params=dataset_params,
                                  split_name="train",
                                  sobol_generator=sobol_generator,
                                  transform=torchvision.transforms.Compose([
                                      transforms.ToTensor(),
                                      transforms.ToDevice(device="cuda")
                                  ]))
    train_dl = DataLoader(train_ds,
                          batch_size=config["batch_size"],
                          shuffle=True,
                          num_workers=4)

    for i_batch, sample_batched in enumerate(train_dl):
        print(
            i_batch,
            sample_batched['density'].max(),
            # sample_batched['gt'],
            # sample_batched['noise'],
        )
示例#4
0
def get_termlangs(repositories, all_languages):
    print_info("Searching for manifests ..")
    termlangs = {}
    for m in find_files(repositories, ".manifest"):
        print_info("Found Manifest:" +str(m))
        try:
            term = parse_manifest(m)
            if term.language not in termlangs:
                termlangs[term.language] = []
            termlangs[term.language].append(term)
        except Exception as e:
            import traceback
            traceback.print_exc()
            print_error("Failed" + e)
    return termlangs
示例#5
0
def main():
    # --- Process args --- #
    args = get_args()
    config = run_utils.load_config(args.config)
    if config is None:
        print_utils.print_error(
            "ERROR: cannot continue without a config file. Exiting now...")
        exit()
    if args.runs_dirpath is not None:
        config["runs_dirpath"] = args.runs_dirpath

    if args.mode == "image":
        plot_stats.plot_stats(config, args.run_name, args.source_idx_list)
    elif args.mode == "1d":
        plot_stats_1d.plot(config, args.run_name)
def compute_grads(config, run_params, dataset_params, split_name):
    # print("# --- Compute grads --- #")

    working_dir = os.path.dirname(os.path.abspath(__file__))

    # Find data_dir
    data_dirpath = python_utils.choose_first_existing_path(
        config["data_dir_candidates"])
    if data_dirpath is None:
        print_utils.print_error("ERROR: Data directory not found!")
        exit()
    # print_utils.print_info("Using data from {}".format(data_dirpath))
    root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"])

    # setup run directory:
    runs_dir = os.path.join(working_dir, config["runs_dirpath"])
    run_dirpath = None
    try:
        run_dirpath = run_utils.setup_run_dir(runs_dir, run_params["run_name"])
    except ValueError:
        print_utils.print_error(
            "Run name {} was not found. Aborting...".format(
                run_params["run_name"]))
        exit()

    # Choose device
    dev = torch.device("cuda") if torch.cuda.is_available() else torch.device(
        "cpu")

    # Instantiate dataset
    ds = Synthetic1DDataset(root_dir=root_dir,
                            params=dataset_params,
                            split_name=split_name,
                            transform=torchvision.transforms.Compose([
                                transforms.ToTensor(),
                                transforms.ToDevice(device=dev)
                            ]))
    dl = DataLoader(ds, batch_size=1)

    model = Simple1DInputNet(config, run_params["capacity"])
    model.to(dev)

    analyzer = Analyzer(config, model, run_dirpath)
    analyzer.compute_and_save_grads(dl)
示例#7
0
    def compute_and_save_grads(self, dl):
        # Loading model
        if self.load_checkpoint(self.checkpoints_dirpath) is None:  # Try last checkpoint
            print_utils.print_error("Checkpoint {} could not be loaded. Aborting...".format(self.checkpoints_dirpath))
            exit()

        self.model.train()
        pbar = tqdm(dl, desc="Compute grads: ")
        for batch_index, batch in enumerate(pbar):
            grads, pred = self.model.compute_grads(batch, return_pred=True)
            grads = grads.cpu().detach().numpy()
            pred = pred.cpu().detach().numpy()
            # Save grads in run_dirpath
            grads_filename = "grads.{:05d}.npy".format(batch_index)
            pred_filename = "pred.{:05d}.npy".format(batch_index)
            grads_filepath = os.path.join(self.grads_dirpath, grads_filename)
            pred_filepath = os.path.join(self.grads_dirpath, pred_filename)
            np.save(grads_filepath, grads)
            np.save(pred_filepath, pred)
示例#8
0
    def build_from_resource(resource, term, term_dir, language, theme):
        try: 
            print_info("Project building..\t" + str(resource.filename))
        except:
            print_error("Project failed due to filename encoding: " + str(resource.filename))
            return None

        project     = Project.parse_project_meta(resource)
        project_dir = os.path.join(term_dir,"%.02d"%(project.number))
        makedirs(project_dir)

        try:
            built_project = Project.build_project(term, project, language, theme, project_dir)
        except:
            print_error("Project failed while building: " + str(resource.filename))
            return None

        print_info("Project done!\t\t" + str(resource.filename))
        return built_project
示例#9
0
def plot_stats(config, run_name, source_idx_list):
    print("# --- Plot stats --- #")

    working_dir = os.path.dirname(os.path.abspath(__file__))

    # setup run directory:
    runs_dir = os.path.join(working_dir, config["runs_dirpath"])
    run_dirpath = None
    try:
        run_dirpath = run_utils.setup_run_dir(runs_dir, run_name)
    except ValueError:
        print_utils.print_error(
            "Run name {} was not found. Aborting...".format(run_name))
        exit()
    stats_dirpath = os.path.join(run_dirpath, "stats")

    stats_list = load_stats_list(stats_dirpath, source_idx_list)

    plot_k_nearest(stats_list)

    for stats in stats_list:
        plot_hist(stats)
示例#10
0
def main():
    # --- Process args --- #
    args = get_args()
    config = run_utils.load_config(args.config)
    if config is None:
        print_utils.print_error(
            "ERROR: cannot continue without a config file. Exiting now...")
        exit()
    if args.batch_size is not None:
        config["batch_size"] = args.batch_size
    if args.exps_dirpath is not None:
        config["exps_dirpath"] = args.exps_dirpath

    distribution = "uniform"
    params = {
        "run_count": args.run_count,
        "sample_count": args.sample_count,
        "frequency": args.frequency,
        "noise_std": args.noise_std,
        "distribution": distribution,
    }
    stats_params = {
        "neighbors_t": args.neighbors_t,
        "neighbors_n": args.neighbors_n,
    }

    working_dir = os.path.dirname(os.path.abspath(__file__))

    # Setup exp directory:
    exps_dir = os.path.join(working_dir, config["exps_dirpath"])
    exp_dirpath = run_utils.setup_run_dir(exps_dir, args.exp_name, args.new_exp)

    # Launch experiments
    launch_experiments(config, exp_dirpath, args.new_exp, args.recompute_stats, params, stats_params)

    # Aggregate results
    aggregate_results(exp_dirpath, params, stats_params)
示例#11
0
    def restore_checkpoint(self, sess, saver, checkpoints_dir):
        """

        :param sess:
        :param saver:
        :param checkpoints_dir:
        :return: True if a checkpoint was found and restored, False if no checkpoint was found
        """
        checkpoint = tf.train.get_checkpoint_state(checkpoints_dir)
        if checkpoint and checkpoint.model_checkpoint_path:  # Check if the model has a checkpoint
            print_utils.print_info("Restoring {} checkpoint {}".format(
                self.model_name, checkpoint.model_checkpoint_path))
            try:
                saver.restore(sess, checkpoint.model_checkpoint_path)
            except tf.errors.InvalidArgumentError:
                print_utils.print_error(
                    "ERROR: could not load checkpoint.\n"
                    "\tThis is likely due to: .\n"
                    "\t\t -  the model graph definition has changed from the checkpoint thus weights do not match\n"
                    .format(checkpoints_dir))
                exit()
            return True
        else:
            return False
示例#12
0
def main():
    # --- Process args --- #
    args = get_args()
    config = run_utils.load_config(args.config)
    if config is None:
        print_utils.print_error(
            "ERROR: cannot continue without a config file. Exiting now...")
        exit()
    print_utils.print_info("Using downscaling factors: {}".format(args.ds_fac))
    run_name_list = [RUN_NAME_FORMAT.format(ds_fac) for ds_fac in args.ds_fac]

    # --- Read image --- #
    print_utils.print_info("Reading image...")
    image_filepath = get_abs_path(args.image)
    image, image_metadata = read_image(image_filepath, args.pixelsize)
    image = clip_image(image, 0, 255)

    # hist = np.histogram(image)
    # print_hist(hist)

    im_min, im_max = get_min_max(image, std_factor=3)

    # print("min: {}, max: {}".format(im_min, im_max))

    image = stretch_image(image, im_min, im_max, 0, 255)
    image = clip_image(image, 0, 255)

    # hist = np.histogram(image)
    # print_hist(hist)

    print("Image stats:")
    print("\tShape: {}".format(image.shape))
    print("\tMin: {}".format(image.min()))
    print("\tMax: {}".format(image.max()))

    # --- Read shapefile if it exists --- #
    if args.shapefile is not None:
        shapefile_filepath = get_abs_path(args.shapefile)
        gt_polygons = get_shapefile_annotations(image_filepath,
                                                shapefile_filepath)

    else:
        # --- Load or fetch OSM building data --- #
        gt_polygons = get_osm_annotations(image_filepath)

    # --- Print polygon info --- #
    print("Polygons stats:")
    print("\tCount: {}".format(len(gt_polygons)))
    print("\tMin: {}".format(min([polygon.min() for polygon in gt_polygons])))
    print("\tMax: {}".format(max([polygon.max() for polygon in gt_polygons])))

    if not check_polygons_in_image(image, gt_polygons):
        print_utils.print_error(
            "ERROR: polygons are not inside the image. This is most likely due to using the wrong projection when reading the input shapefile. Aborting..."
        )
        exit()

    print_utils.print_info("Aligning building annotations...")
    aligned_polygons = test.test_align_gt(args.runs_dirpath,
                                          image,
                                          image_metadata,
                                          gt_polygons,
                                          args.batch_size,
                                          args.ds_fac,
                                          run_name_list,
                                          config["disp_max_abs_value"],
                                          output_shapefiles=False)

    print_utils.print_info("Saving aligned building annotations...")
    save_annotations(args.image, aligned_polygons)
示例#13
0
def train(config, run_params, dataset_params):
    # print("# --- Starting training --- #")

    run_name = run_params["run_name"]
    new_run = run_params["new_run"]
    init_run_name = run_params["init_run_name"]

    working_dir = os.path.dirname(os.path.abspath(__file__))

    # Find data_dir
    data_dirpath = python_utils.choose_first_existing_path(
        config["data_dir_candidates"])
    if data_dirpath is None:
        print_utils.print_error("ERROR: Data directory not found!")
        exit()
    # print_utils.print_info("Using data from {}".format(data_dirpath))
    root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"])

    # setup init checkpoints directory path if one is specified:
    if init_run_name is not None:
        init_run_dirpath = run_utils.setup_run_dir(config["runs_dirpath"],
                                                   init_run_name)
        _, init_checkpoints_dirpath = run_utils.setup_run_subdirs(
            init_run_dirpath)
    else:
        init_checkpoints_dirpath = None

    # setup run directory:
    runs_dir = os.path.join(working_dir, config["runs_dirpath"])
    run_dirpath = run_utils.setup_run_dir(runs_dir, run_name, new_run)

    # save config in logs directory
    run_utils.save_config(config, run_dirpath)

    # save args
    args_filepath = os.path.join(run_dirpath, "args.json")
    python_utils.save_json(
        args_filepath, {
            "run_name": run_name,
            "new_run": new_run,
            "init_run_name": init_run_name,
            "batch_size": config["batch_size"],
        })

    # Choose device
    # dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    dev = "cpu"  # For small networks and experiments, cpu is much faster

    # Instantiate dataset
    # sobol_generator = rand_utils.SobolGenerator()
    sobol_generator = None
    train_ds = Synthetic1DDataset(root_dir=root_dir,
                                  params=dataset_params,
                                  split_name="train",
                                  sobol_generator=sobol_generator,
                                  transform=torchvision.transforms.Compose([
                                      transforms.ToTensor(),
                                      transforms.ToDevice(device=dev)
                                  ]))
    val_ds = Synthetic1DDataset(root_dir=root_dir,
                                params=dataset_params,
                                split_name="val",
                                sobol_generator=sobol_generator,
                                transform=torchvision.transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.ToDevice(device=dev)
                                ]))

    # print(train_ds.alpha)
    # print(val_ds.alpha)
    # exit()

    # Generate test dataset here because if using Sobel numbers, all datasets should be using the same SobolGenerator
    # so that they do not generate the same samples.
    test_ds = Synthetic1DDataset(root_dir=root_dir,
                                 params=dataset_params,
                                 split_name="test",
                                 sobol_generator=sobol_generator,
                                 transform=torchvision.transforms.Compose([
                                     transforms.ToTensor(),
                                     transforms.ToDevice(device=dev)
                                 ]))
    train_dl = DataLoader(train_ds,
                          batch_size=config["batch_size"],
                          shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=config["batch_size"])

    success = False
    while not success:
        try:
            model = Simple1DInputNet(config)
            model.to(dev)
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=config["lr"],
                                         weight_decay=config["weight_decay"])
            loss_func = measures.l1_loss

            trainer = Trainer(config, model, optimizer, loss_func,
                              init_checkpoints_dirpath, run_dirpath)
            trainer.fit(config, train_dl, val_dl)
            success = True
        except ValueError:  # Catches NaN errors
            # Try again
            run_utils.wipe_run_subdirs(run_dirpath)
            print("\nTry again\n")
            pass
示例#14
0
from html_builders import *

import os
import os.path
import sys
import shutil
import collections
import json
import codecs
import tempfile
import string

try:
    import yaml
except ImportError:
    print_error("You need to install pyyaml using pip or easy_install, sorry")
    sys.exit(-10)

# todo : real classes
Term = collections.namedtuple('Term', \
    'id manifest title description language number projects extras')
Extra = collections.namedtuple('Extra', 'name materials note')

# Process files within project and resource containers
def build_extra(term, extra, language, theme, output_dir):
    note = []
    if extra.note:
        note.extend(process_file(extra.note, \
            note_style, \
            language, \
            theme, \
def similarity_stats_1d(config, run_name, dataset_params, split_name,
                        stats_params):
    # print("# --- Similarity Stats --- #")

    working_dir = os.path.dirname(os.path.abspath(__file__))

    # Find data_dir
    data_dirpath = python_utils.choose_first_existing_path(
        config["data_dir_candidates"])
    if data_dirpath is None:
        print_utils.print_error("ERROR: Data directory not found!")
        exit()
    # print_utils.print_info("Using data from {}".format(data_dirpath))
    root_dir = os.path.join(data_dirpath, config["data_root_partial_dirpath"])

    # setup run directory:
    runs_dir = os.path.join(working_dir, config["runs_dirpath"])
    run_dirpath = None
    try:
        run_dirpath = run_utils.setup_run_dir(runs_dir, run_name)
    except ValueError:
        print_utils.print_error(
            "Run name {} was not found. Aborting...".format(run_name))
        exit()

    # Instantiate dataset
    # ds = Synthetic1DDataset(root_dir=root_dir, params=dataset_params, split_name="test",
    #                         distribution="triangular"
    #                         )
    ds = Synthetic1DDataset(root_dir=root_dir,
                            params=dataset_params,
                            split_name=split_name,
                            transform=None)
    sample_count = len(ds)

    # Load grads and pred
    grads_dirpath = os.path.join(run_dirpath, "grads")
    grads_filepath_list = python_utils.get_filepaths(grads_dirpath,
                                                     endswith_str=".npy",
                                                     startswith_str="grads.")
    grads_list = [
        np.load(grads_filepath)
        for grads_filepath in tqdm(grads_filepath_list, desc="Loading grads")
    ]
    # print("Grads shape: {}".format(grads_list[0].shape))
    pred_filepath_list = python_utils.get_filepaths(grads_dirpath,
                                                    endswith_str=".npy",
                                                    startswith_str="pred.")
    pred_list = [
        np.load(pred_filepath)
        for pred_filepath in tqdm(pred_filepath_list, desc="Loading pred")
    ]

    # Create stats dir
    stats_dirpath = os.path.join(run_dirpath, "stats_1d")
    os.makedirs(stats_dirpath, exist_ok=True)

    # import time
    # t1 = time.clock()

    neighbor_count, neighbor_count_no_normalization = netsimilarity_utils.compute_soft_neighbor_count(
        grads_list)
    neighbors_filepath = os.path.join(stats_dirpath, "neighbors_soft.npy")
    np.save(neighbors_filepath, neighbor_count)
    neighbors_filepath = os.path.join(stats_dirpath,
                                      "neighbors_soft_no_normalization.npy")
    np.save(neighbors_filepath, neighbor_count_no_normalization)

    if not COMPUTE_ONLY_NEIGHBORS_SOFT:
        # Compute similarity matrix
        similarity_mat = netsimilarity_utils.compute_similarity_mat_1d(
            grads_list)

        # Compute number of neighbors
        # Hard-thresholding:
        for t in stats_params["neighbors_t"]:
            neighbor_count = netsimilarity_utils.compute_neighbor_count(
                similarity_mat, "hard", t=t)
            neighbors_filepath = os.path.join(
                stats_dirpath, "neighbors_hard_t_{}.npy".format(t))
            np.save(neighbors_filepath, neighbor_count)

        # # Soft estimate
        # neighbor_count = netsimilarity_utils.compute_neighbor_count(similarity_mat, "soft")
        # neighbors_filepath = os.path.join(stats_dirpath, "neighbors_soft.npy")
        # np.save(neighbors_filepath, neighbor_count)

        # Mix
        for n in stats_params["neighbors_n"]:
            neighbor_count = netsimilarity_utils.compute_neighbor_count(
                similarity_mat, "less_soft", n=n)
            neighbors_filepath = os.path.join(
                stats_dirpath, "neighbors_less_soft_n_{}.npy".format(n))
            np.save(neighbors_filepath, neighbor_count)

    # print("Time to compute number of neighbors:")
    # print(time.clock() - t1)

    # Save inputs
    for key in ["alpha", "x", "density", "gt", "noise", "curvature"]:
        filepath = os.path.join(stats_dirpath, "{}.npy".format(key))
        values = [sample[key] for sample in ds]
        np.save(filepath, values)

    # Save outputs
    pred_filepath = os.path.join(stats_dirpath, "pred.npy")
    pred = [pred[0] for pred in pred_list]
    np.save(pred_filepath, pred)

    # Error
    error_filepath = os.path.join(stats_dirpath, "error.npy")
    error = [
        np.abs(sample["gt"] - pred[0]) for sample, pred in zip(ds, pred_list)
    ]
    np.save(error_filepath, error)

    # Losses
    logs_dirpath = os.path.join(run_dirpath, "logs")
    final_losses = python_utils.load_json(
        os.path.join(logs_dirpath, "final_losses.json"))
    train_loss_filepath = os.path.join(stats_dirpath, "train_loss.npy")
    np.save(train_loss_filepath, final_losses["train_loss"])
    val_loss_filepath = os.path.join(stats_dirpath, "val_loss.npy")
    np.save(val_loss_filepath, final_losses["val_loss"])
    loss_ratio_filepath = os.path.join(stats_dirpath, "loss_ratio.npy")
    np.save(loss_ratio_filepath,
            final_losses["val_loss"] / final_losses["train_loss"])