Exemplo n.º 1
0
def diff_hw_config(hardware_cfg: Path) -> None:
    """Diff changes if we updated the hardware configuration.

    :param hardware_cfg: path to ``hosts/$(hostname)-hardware-configuration.nix``
    """
    # Figure out what would happen if we ran nixos-generate-config.
    info("Diff if `nixos-generate-config` was run:")
    cmd(
        "nixos-generate-config --show-hardware-config "
        + f"| diff --ignore-all-space {hardware_cfg} - "
        + "| delta"
    )
    new_cfg = get_output(["nixos-generate-config", "--show-hardware-config"])
    diff = get_output(
        [
            "diff",
            "--report-identical-files",
            "--new-file",
            "--unified",
            "--ignore-all-space",
            str(hardware_cfg),
            "-",
        ],
        input=new_cfg,
        # 1 just means we found differences
        ok_returncodes=[0, 1],
    )
    delta = subprocess.run(["delta"], input=diff, encoding="utf-8", check=False)
    if delta.returncode != 0:
        warn(f"delta exited with non-zero return code {delta.returncode}")
def execute(data, num_folds=5):
    """
    Compute the Root Mean Squared Error using num_folds for cross validation
    :param data: Raw Data frame parsed from CSV
    :param num_folds: The number of folds to use
    :return: Root Mean Squared Error
    """
    assert data is not None, "data must be a valid DataFrame"
    assert num_folds > 1, "num_folds must be greater than one."

    # 2. Randomizes the data
    randomized_data = util.randomize_data(data)

    # 3. Creates S folds (for our purposes S = 5, but make your code generalizable, that is it should
    #   work for any legal value of S)
    folds = divide_data(randomized_data, num_folds)

    squared_errors = []
    # 4. For i = 1 to S
    for i in xrange(0, num_folds):
        #   (a) Select fold i as your testing data and the remaining (S - 1) folds as your training data
        test_data = folds[i]
        training_data = select_training_data(folds, i)

        #   (b) Standardizes the data (except for the last column of course) based on the training data
        standardized_train_data, mean, std = util.standardize_data(
            util.get_features(training_data))

        # Add offset column at the front
        standardized_train_data.insert(0, "Bias", 1)

        #   (c) Train a closed-form linear regression model
        training_outputs = util.get_output(training_data)
        weights = cflr.find_weights(standardized_train_data, training_outputs)

        #   (d) Compute the squared error for each sample in the current testing fold
        expected = util.get_output(test_data)
        actual = cflr.apply_solution(util.get_features(test_data), mean, std,
                                     weights)

        squared_error = (expected - actual)**2
        squared_errors.append(squared_error)

    # 5. Compute the RMSE using all the errors.
    rmse = compute_rmse(len(data), squared_errors)

    return rmse
Exemplo n.º 3
0
 def run(self, path):
     oldpath = os.getcwd()
     os.chdir(path)
     try:
         out = get_output(self.cmd, shell=True)
         return out
     finally:
         os.chdir(oldpath)
Exemplo n.º 4
0
def test_osinfo_db_path_dir():
    """
    Test osinfo-db-path --dir
    """
    cmd = [util.Tools.db_path, util.ToolsArgs.DIR, FOOBAR_DIR]
    output = util.get_output(cmd)
    expected_output = FOOBAR_DIR + "\n"
    assert output == expected_output
Exemplo n.º 5
0
 def run(self, path):
     oldpath = os.getcwd()
     os.chdir(path)
     try:
         out = get_output(self.cmd, shell=True)
         return out
     finally:
         os.chdir(oldpath)
Exemplo n.º 6
0
def test_osinfo_db_path_system():
    """
    Test osinfo-db-path --system
    """
    if "OSINFO_SYSTEM_DIR" in os.environ:
        del os.environ["OSINFO_SYSTEM_DIR"]
    cmd = [util.Tools.db_path, util.ToolsArgs.SYSTEM]
    output = util.get_output(cmd)
    expected_output = os.path.join(DATADIR, "osinfo\n")
    assert output == expected_output
Exemplo n.º 7
0
def test_osinfo_db_path_local():
    """
    Test osinfo-db-path --local
    """
    if "OSINFO_LOCAL_DIR" in os.environ:
        del os.environ["OSINFO_LOCAL_DIR"]
    cmd = [util.Tools.db_path, util.ToolsArgs.LOCAL]
    output = util.get_output(cmd)
    expected_output = os.path.join(SYSCONFDIR, "osinfo\n")
    assert output == expected_output
Exemplo n.º 8
0
    def test_get_output(self):
        """
            test that get_output returns successfully and executes as expected.
        """
        cmd = '%s %s' % (self.echo, self.testdata)
        actual = util.get_output(cmd)
        #echo replaces EOF with EOL
        expected = 'asdfqwer\n'

        self.assertEqual(expected, actual)
Exemplo n.º 9
0
def git_cmd(path, *args, **kwargs):
    if path:
        repo = os.path.join(path, ".git")
        cmds = ["git", "--git-dir", repo, "--work-tree", path] + list(args)
    else: cmds = ["git"] + list(args)
    try: return get_output(cmds, **kwargs)
    except CmdError,e:
        log("Error running git command: %r", e.cmd)
        log("Failure with code: %d", e.ret)
        raise
Exemplo n.º 10
0
def execute(data, training_data_ratio=2.0 / 3.0, k=1):
    """
    Execute the "Locally-Weighted" Linear Regression (using Closed-Form Linear Regression)
    :param data: Raw Data frame parsed from CSV
    :param training_data_ratio: The percent (0.0 to 1.0) of input data to use in training.
    :param k: Smoothing parameter for local weight computation
    :return: Nothing
    """
    # 2. Randomize the data
    randomized_data = util.randomize_data(data)

    # 3. Select the first 2 / 3(round up) of the data for training and the remaining for testing
    training_data, test_data = util.split_data(randomized_data,
                                               training_data_ratio)
    training_outputs = util.get_output(training_data)

    # 4. Standardize the data(except for the last column of course) using the training data
    standardized_training_data, mean, std = util.standardize_data(
        util.get_features(training_data))

    # Add offset column at the front
    standardized_training_data.insert(0, "Bias", 1)

    std_test_data, _, _ = util.standardize_data(util.get_features(test_data),
                                                mean, std)
    std_test_data.insert(0, "Bias", 1)

    squared_errors = []
    # 5. Then for each testing sample
    for i in xrange(0, len(std_test_data)):

        testing_sample = std_test_data.iloc[i]
        expected_output = test_data.loc[testing_sample.name][-1]

        theta_query = compute_theta_query(testing_sample,
                                          standardized_training_data,
                                          training_outputs, k)

        # (b) Evaluate the testing sample using the local model.
        actual_output = np.dot(testing_sample, theta_query)

        # (c) Compute the squared error of the testing sample.
        squared_errors.append(util.compute_se(expected_output, actual_output))

    # 6. Compute the root mean squared error (RMSE)
    sum_of_squared_errors = 0
    for error in squared_errors:
        sum_of_squared_errors += error

    mean_squared_error = sum_of_squared_errors / len(squared_errors)

    rmse = math.sqrt(mean_squared_error)

    return rmse
Exemplo n.º 11
0
def test_osinfo_db_path_root():
    """
    Test osinfo-db-path --root FOOBAR_DIR --system
    """
    if "OSINFO_SYSTEM_DIR" in os.environ:
        del os.environ["OSINFO_SYSTEM_DIR"]
    cmd = [util.Tools.db_path, util.ToolsArgs.ROOT, FOOBAR_DIR,
           util.ToolsArgs.SYSTEM]
    output = util.get_output(cmd)
    expected_output = os.path.join(FOOBAR_DIR, *DATADIR.split("/"), "osinfo\n")
    assert output == expected_output
Exemplo n.º 12
0
 def parse_args(cls) -> Args:
     """Type-safe wrapper around ``argparse.ArgumentParser.parse_args``."""
     args = cls._parser().parse_args()
     return cls(
         verbose=args.verbose,
         update=args.update,
         force=args.force,
         diff=args.diff,
         hostname=args.hostname
         if args.hostname is not None
         else get_output(["hostname"]),
         dry_run=args.dry_run,
     )
Exemplo n.º 13
0
def test_osinfo_db_path_user():
    """
    Test osinfo-db-path --user
    """
    if "OSINFO_USER_DIR" in os.environ:
        del os.environ["OSINFO_USER_DIR"]
    if "XDG_CONFIG_HOME" in os.environ:
        del os.environ["XDG_CONFIG_HOME"]
    cmd = [util.Tools.db_path, util.ToolsArgs.USER]
    output = util.get_output(cmd)
    expected_output = os.path.join(os.environ["HOME"], ".config",
                                   "osinfo\n")
    assert output == expected_output
Exemplo n.º 14
0
def update_hw_config_force(hardware_cfg: Path) -> None:
    """Generate and replace the hardware configuration.

    Performs no safety checks, but doesn't write if ``DRY_RUN`` is true.

    :param hardware_cfg: Path to ``hosts/$(hostname)-hardware-configuration.nix`` file to replace.
    """
    info(("Updating " if hardware_cfg.exists() else "Generating ") + p(hardware_cfg))
    cmd(f"nixos-generate-config --show-hardware-config > {hardware_cfg}")
    new_hardware_config = get_output(
        ["nixos-generate-config", "--show-hardware-config"]
    )
    if not DRY_RUN:
        hardware_cfg.write_text(new_hardware_config)
Exemplo n.º 15
0
def git_repo_root() -> Path:
    """Gets the git repository root directory.

    Is given relative to this file; it would therefore be unsurprising if this
    function returns the empty path or the current directory path, a single
    dot.
    """
    # dirname is the directory this file is in...
    dirname = Path(__file__).parent
    # ...which we use to determine the repository root.
    dbg("Getting repository root directory.")
    # Don't worry that `relative_to` can raise an exception; we know that this
    # file is in the repository.
    return Path(
        get_output(["git", "rev-parse", "--show-toplevel"], cwd=dirname)
    ).relative_to(dirname.absolute())
Exemplo n.º 16
0
 def word_count(self, base, path):
     out = get_output(["perl", self.tex_count_path, "-inc", path], cwd=base)
     if self.debug: log("TeXCount: %s", out)
     results = {}
     name = None
     for l in out.splitlines():
         if not ":" in l: continue
         k, v = l.split(":")[0].strip().rstrip(), l.split(
             ":")[1].strip().rstrip()
         if k in ["File", "Included file"]:
             name = v
             results[name] = {}
         elif k == "File(s) total":
             name = "total"
             results[name] = {}
         elif name is not None:
             results[name][k] = v
     return (out, results)
Exemplo n.º 17
0
def update_hw_config(args: Args, repo_root: Path, hardware_cfg: Path) -> None:
    """Update ``hardware_cfg``, checking that it's not modified in the repo."""
    dbg("Getting `git status` to check if hardware config has been modified.")
    git_status = get_output(
        ["git", "status", "--porcelain", "--untracked-files"],
        cwd=repo_root,
    ).splitlines()

    # Determine if the local hardware configuration is modified by seeing if
    # any of the modified filenames match it.
    hardware_cfg_modified = any(
        # Slice the string to transform
        #   "  M hosts/dahurica-hardware-configuration.nix"
        #    ^^^^
        #    0123
        # into
        #   "hosts/dahurica-hardware-configuration.nix"
        hardware_cfg.samefile(repo_root / filename[3:])
        for filename in git_status
        if GIT_STATUS_MODIFIED.match(filename)
    )
    dbg(
        "Hardware configuration "
        + ("has" if hardware_cfg_modified else f"has {BOLD}not{RESET_BOLD}")
        + " been modified."
    )

    if hardware_cfg_modified:
        if args.force:
            error(f"There are uncomitted changes to {p(hardware_cfg)};")
            error("refusing to overwrite with `nixos-generate-config`.")
            error("Either commit your changes or pass --force to overwrite local files")
            args.update = False
        else:
            info(
                f"There are uncomitted changes to {p(hardware_cfg)} but "
                + p("--force")
                + " was given; overwriting"
            )

    # We may have updated `args.update` above, so double-check it here.
    if args.update:
        # Note: `DRY_RUN` is handled in `update_hw_config_force`.
        update_hw_config_force(hardware_cfg)
Exemplo n.º 18
0
def get_secret_ids():
    """
    List the available secret keys that can be used for decryption or signing.

    return:
        A dictionary containing subkey and key keys, with lists of every available key list respectively

    """
    out = util.get_output('gpg --list-secret-keys --with-colons').split('\n')

    subkeys = []
    keys = []
    for line in out:
        if re.search('^ssb', line):
            subkeys.append(line.split(':')[4])
        elif re.search('^sec', line):
            keys.append(line.split(':')[4])

    return {'keys': keys, 'subkeys': subkeys }
Exemplo n.º 19
0
 def word_count(self, base, path):
     out = get_output(["perl", self.tex_count_path, "-inc", path], cwd=base)
     if self.debug:
         log("TeXCount: %s", out)
     results = {}
     name = None
     for l in out.splitlines():
         if not ":" in l:
             continue
         k, v = l.split(":")[0].strip().rstrip(), l.split(":")[1].strip().rstrip()
         if k in ["File", "Included file"]:
             name = v
             results[name] = {}
         elif k == "File(s) total":
             name = "total"
             results[name] = {}
         elif name is not None:
             results[name][k] = v
     return (out, results)
Exemplo n.º 20
0
def execute(data):
    """

    :param data: Raw Data frame parsed from CSV
    :return: Nothing
    """

    # 2. Randomizes the data
    randomized_data = util.randomize_data(data)

    # 3. Selects the first 2/3 (round up) of the data for training and the remaining for testing
    training_data_size = 2.0 / 3.0
    training_data, test_data = util.split_data(randomized_data,
                                               training_data_size)

    # Capture the predicted outputs
    training_outputs = training_data[training_data.columns[-1]]

    # 4. Standardizes the data (except for the last column of course) using the training data
    training_inputs, training_mean, training_std = util.standardize_data(
        util.get_features(training_data))

    # Add offset column at the front
    training_inputs.insert(0, "Bias", 1)

    # 5. Computes the closed-form solution of linear regression
    weights = find_weights(training_inputs, training_outputs)

    # 6. Applies the solution to the testing samples
    test_input = util.get_features(test_data)
    expected = util.get_output(test_data)
    actual = apply_solution(test_input, training_mean, training_std, weights)

    # 7. Computes the root mean squared error (RMSE)
    rmse = util.compute_rmse(expected, actual)

    return weights, rmse
def execute(data,
            learning_rate=0.001,
            training_data_ratio=2.0 / 3,
            max_iterations=1000000):
    """
    Perform Batch Gradient Descent

    :param data: Raw Data frame parsed from CSV
    :param learning_rate: The rate at which to advance along the gradient
    :param training_data_ratio: The percent of given data to use for training (remaining percent is used for testing)
    :param max_iterations: The maximum number of iterations to execute before exiting
    :return: Nothing
    """

    # 2. Randomizes the data
    print "Randomizing Data"
    randomized_data = util.randomize_data(data)

    # 3. Selects the first 2 / 3 (round up) of the data for training and the remaining for testing
    print "Selecting Training Data"
    training_data, test_data = util.split_data(randomized_data,
                                               training_data_ratio)

    # 4. Standardizes the data(except for the last column of course) base on the training data
    print "Standardizing Data"
    std_training_data, mean, std = util.standardize_data(
        util.get_features(training_data))
    std_training_data.insert(0, "Bias", 1)

    std_test_data, _, _ = util.standardize_data(util.get_features(test_data),
                                                mean, std)
    std_test_data.insert(0, "Bias", 1)

    iteration = 0
    prior_rmse = 0
    current_rmse = 100  # Doesn't matter what this value is, so long as it doesn't equal prior rmse
    eps = np.spacing(1)
    N = len(std_training_data)

    # Start with randomized values for theta
    theta = np.array([random.uniform(-1, 1) for _ in xrange(0, 3)])

    # Capture our expected values for the training data
    expected = util.get_output(training_data)
    test_data_expected = util.get_output(test_data)

    # Capture the RMSE for test and training over all iterations
    test_rmse_values = []
    training_rmse_values = []

    print "Performing Gradient Descent Linear Regression"
    # 5. While the termination criteria (mentioned above in the implementation details) hasn't been met
    while iteration <= max_iterations and abs(current_rmse -
                                              prior_rmse) >= eps:
        prior_rmse = current_rmse

        #   (a) Compute the RMSE of the training data
        #       By applying the current theta values to the training set & comparing results
        actual = std_training_data.dot(theta)
        current_rmse = util.compute_rmse(expected, actual)

        #   (b) While we can't let the testing set affect our training process, also compute the RMSE of
        #       the testing error at each iteration of the algorithm (it'll be interesting to see).
        #       Same thing as (a), but use test inputs / outputs
        test_data_actual = std_test_data.dot(theta)
        test_data_rmse = util.compute_rmse(test_data_expected,
                                           test_data_actual)

        #   (c) Update each parameter using batch gradient descent
        #       By use of the learning rate
        for i in xrange(len(theta)):
            # We know the length of theta is the same as the num columns in std_training_data
            errors = (actual - expected
                      ) * std_training_data[std_training_data.columns[i]]
            cumulative_error = errors.sum()
            theta[i] -= learning_rate / N * cumulative_error

        iteration += 1
        test_rmse_values.append(test_data_rmse)
        training_rmse_values.append(current_rmse)

    print "Completed in {0} iterations".format(iteration)

    print "Plotting Errors"
    image_path = plot_rmse_values(test_rmse_values, training_rmse_values,
                                  learning_rate)
    print "Saved Image to '{0}'".format(image_path)

    # 6. Compute the RMSE of the testing data.
    print "Computing RMSE of Test Data"
    test_data_actual = std_test_data.dot(theta)
    test_data_rmse = util.compute_rmse(test_data_expected, test_data_actual)
    return theta, test_data_rmse
Exemplo n.º 22
0
 def run(self, path, *args, **kwargs):
     out = get_output(["du", "-chs", "--exclude=.git", path])
     return out.splitlines()[-1].split()[0]
Exemplo n.º 23
0
 def run(self, path, *args, **kwargs):
     out = get_output(["du", "-chs", "--exclude=.git", path])
     return out.splitlines()[-1].split()[0]
Exemplo n.º 24
0
def check_hw_config(host_cfg: Path, hardware_cfg: Path, old_hardware_cfg: Path) -> None:
    """Check that ``hardware_cfg`` exists and ``old_hardware_cfg`` doesn't.

    Also imports ``hardware_cfg`` in ``cfg`` if it's not otherwise imported.
    """
    # `old_hardware_cfg` is repo_root / "hardware-configuration.nix"; NixOS
    # might generate one by default while installing, so this check should stay
    # here even though all of my hosts have been using this script for a while
    # now.
    if old_hardware_cfg.is_symlink():
        warn(
            f"{p(old_hardware_cfg)} is a symlink to "
            + p(hardware_cfg.parent / os.readlink(old_hardware_cfg))
        )
        warn("That's probably not needed; consider deleting it.")

    elif old_hardware_cfg.exists():
        if not hardware_cfg.exists():
            info(
                f"{p(old_hardware_cfg)} exists but {p(hardware_cfg)}"
                + f" doesn't, renaming {p(old_hardware_cfg)}."
            )
            if not DRY_RUN:
                old_hardware_cfg.rename(hardware_cfg)
        else:
            if filecmp.cmp(hardware_cfg, old_hardware_cfg):
                info(
                    f"{p(old_hardware_cfg)} and {p(hardware_cfg)} "
                    + "both exist but have the same contents, removing "
                    + p(old_hardware_cfg)
                )
                if not DRY_RUN:
                    old_hardware_cfg.unlink()
            else:
                error(
                    f"{p(old_hardware_cfg)} and {p(hardware_cfg)} "
                    + "both exist but have different contents."
                )
                error(
                    f"Determine which one is correct and move it to {p(hardware_cfg)}, "
                    + f"and then delete {p(old_hardware_cfg)}"
                )

    # Otherwise, if we don't have a hardware configuration yet, generate one.
    if not hardware_cfg.exists():
        update_hw_config_force(hardware_cfg)

    # `hardware_cfg`, relative to `cfg`, in a nix-import-friendly manner
    hardware_cfg_rel = f"./{hardware_cfg.name}"
    cfg_text = host_cfg.read_text(encoding="utf-8")
    if hardware_cfg_rel in cfg_text:
        dbg(f"It looks like {p(host_cfg)} already imports {p(hardware_cfg)}")
    else:
        info(f"{p(host_cfg)} doesn't import {p(hardware_cfg)}, attempting to add it")
        # Make the substitution...
        new_cfg_text, found_imports = re.subn(
            r"^\s*imports\s*=\s*\[",
            r"\g<0> " + hardware_cfg_rel + " ",
            cfg_text,
            count=1,
            flags=re.MULTILINE,
        )
        if not found_imports:
            error(
                f"Couldn't find a suitable import statement in {p(host_cfg)}; "
                + f"make sure to add an import to {p(hardware_cfg)}"
            )
        else:
            info(f"Writing and reformatting {p(host_cfg)}")
            if not DRY_RUN:
                host_cfg.write_text(new_cfg_text)
                nixfmt_output = get_output(["nixfmt", str(host_cfg)])
                if nixfmt_output:
                    dbg("nixfmt reported:")
                    for line in nixfmt_output.splitlines():
                        dbg(line)