def fine_tune_model():
    print(f'\n\nfine tuning model...')
    data_manager = DataManager()

    plotter = Plotter()

    train_manager = get_train_manager(data_manager)
    n_filters_list = []
    train_losses, test_losses = [], []
    for i in range(1, 11):
        n_filters = 4 * i
        net = Net(conv1_out_channels=n_filters)
        optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

        train_manager.init_model(net, optimizer)
        train_and_save_model(train_manager)
        (train_loss,
         train_accuracy), (test_loss,
                           test_accuracy) = train_manager.get_losses()
        n_filters_list.append(n_filters)
        train_losses.append(train_loss)
        test_losses.append(test_loss)
    plotter.plot_filters_losses(n_filters_list, train_losses, test_losses)

    return train_losses, test_losses, n_filters_list
Exemplo n.º 2
0
class Histogram:
    def __init__(self, file_name: str, **kwargs):
        self.plotter = Plotter(file_name, kwargs["features"],
                               self.house_array_function)
        self.plotter.plot_histogram()

    @staticmethod
    def house_array_function(stack, house):
        return [v[1] for v in stack if v[0] == house and v[1] != ""]
Exemplo n.º 3
0
class Scatter:
    def __init__(self, file_name: str, features: list):
        self.classifier = Classifier(file_name, filter=False)
        self.plotter = Plotter(file_name, features, self.house_array_function)
        self.plotter.plot_scatter()

    @staticmethod
    def house_array_function(stack, house):
        return [v[1] if v[0] == house and v[1] != "" else 'nan' for v in stack]
def sandbox():
    print(f'sandboxing...')
    # n_filters = list(range(1,11))
    n_filters = [1, 3, 65, 76, 423, 675, 934, 2345, 13444, 52345]
    # n_filters = shuffle(n_filters)
    n_filters = np.array(n_filters)
    loss_train = np.array(list(range(10))) * 2
    loss_test = np.array(list(range(10))) * 3
    plotter = Plotter()
    plotter.plot_filters_losses(n_filters, loss_train, loss_test)
Exemplo n.º 5
0
    def __init__(
        self,
        config: Dict[str, Any],
        dir_prefix: str,
        wandb_log: bool,
        wandb_init_params: Dict[str, Any],
        device: torch.device,
    ) -> None:
        """Initialize."""
        super(Pruner, self).__init__(config, dir_prefix)
        self.wandb_log = wandb_log
        self.pretrain_dir_name = "pretrain"
        self.dir_postfix = "pruned"
        self.init_params_name = "init_params"
        self.init_params_path = ""
        self.device = device

        self.plotter = Plotter(self.wandb_log)

        # create an initial model
        self.trainer = Trainer(
            config=self.config["TRAIN_CONFIG"],
            dir_prefix=dir_prefix,
            checkpt_dir=self.pretrain_dir_name,
            wandb_log=wandb_log,
            wandb_init_params=wandb_init_params,
            device=device,
        )
        self.model = self.trainer.model

        self.model_params = model_utils.get_params(
            self.model,
            (
                (nn.Conv2d, "weight"),
                (nn.Conv2d, "bias"),
                (nn.BatchNorm2d, "weight"),
                (nn.BatchNorm2d, "bias"),
                (nn.Linear, "weight"),
                (nn.Linear, "bias"),
            ),
        )
        self.params_to_prune = self.get_params_to_prune()

        # to calculate sparsity properly
        model_utils.dummy_pruning(self.model_params)
        model_utils.dummy_pruning(self.params_to_prune)
Exemplo n.º 6
0
from flask import Flask, render_template, request, jsonify
from src.plotter import Plotter
from bokeh.embed import components
from bokeh.embed import server_document
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Slider
from bokeh.plotting import figure
from bokeh.server.server import Server
from bokeh.themes import Theme
from tornado.ioloop import IOLoop
from threading import Thread
from bokeh.sampledata.sea_surface_temperature import sea_surface_temperature

app = Flask(__name__)
plotter = Plotter(dh=None)


def bkapp(doc):
    df = sea_surface_temperature.copy()
    source = ColumnDataSource(data=df)

    plot = figure(
        x_axis_type="datetime",
        y_range=(0, 25),
        y_axis_label="Temperature (Celsius)",
        title="Sea Surface Temperature at 43.18, -70.43",
    )
    plot.line("time", "temperature", source=source)

    def callback(attr, old, new):
        if new == 0:
Exemplo n.º 7
0
def run(
        seed: int = None,
        n: int = 100,
        graphs: Iterable[str] = (),
        n_samples: int = None,
        n_features: int = 100,
        dataset: str = None,
        smv_label_flip_prob: float = 0.0,
        error_mean: float = 0.0,
        error_std_dev: float = 0.0,
        node_error_mean: float = 0.0,
        node_error_std_dev: float = 0.0,
        starting_weights_domain: Union[List[float], Tuple[float]] = None,
        max_iter: int = None,
        max_time: float = None,
        method: Union[str, None] = 'classic',
        alpha: float = None,
        learning_rate: str = 'constant',
        spectrum_dependent_learning_rate: bool = False,
        dual_averaging_radius=10,
        time_distr_class: object = statistics.ExponentialDistribution,
        time_distr_param: list = (1,),
        time_distr_param_rule: str = None,
        time_const_weight: float = 0,
        real_y_activation_func: callable = None,
        obj_function: str = 'mse',
        average_model_toggle: bool = False,
        metrics: list = (),
        real_metrics: list = (),
        real_metrics_toggle: bool = False,
        metrics_type: int = 0,
        metrics_nodes: str = 'all',
        shuffle: bool = True,
        batch_size: int = 20,
        epsilon: float = None,
        save_test_to_file: bool = False,
        test_folder_name_struct: list = (
                'u040',
                'shuffle',
                'w_domain',
                'metrics',
                'dataset',
                'distr',
                'error',
                'nodeserror',
                'alpha',
                'nodes',
                'samp',
                'feat',
                'time',
                'iter',
                'c',
                'method'
        ),
        test_parent_folder: str = "",
        instant_plot: bool = False,
        plots: list = ('mse_iter',),
        save_plot_to_file: bool = False,
        plot_global_w: bool = False,
        plot_node_w: Union[bool, int, List[int]] = False,
        verbose_main: int = 0,
        verbose_cluster: int = 0,
        verbose_node: int = 0,
        verbose_task: int = 0,
        verbose_plotter: int = 0
):
    """
    Main method.

    Parameters
    ----------
    seed : int or None:
        Random simulation seed. If None will be taken from current time.
    n : int
        Amount of nodes in the cluster.
    graphs: List[str]
        List of topologies to run the simulation with.
    n_samples : int
        Total number of samples in the generated dataset.
    n_features : int
        Number of feature each sample will have.
    dataset : str
        Dataset label:
        - "reg": general customizable linear regression dataset;
        - "unireg": unidimensional regression;
        - "svm": multidimensional classification problem;
        - "unisvm": unidimensional dataset that changes with topology spectral gap;
        - "skreg" : regression dataset from sklearn library,
        - "sloreg" and "susysvm" from UCI's repository.
    smv_label_flip_prob : float
        Probability that a label is flipped in svm dataset generation.
        Kind of noise added in the dataset.
    error_mean : float
        Mean of noise to introduce in regression datasets.
    error_std_dev : float
        Standard deviation of noise introduced in regression datasets.
    node_error_mean : float
        Mean of the per-node noise introduced in each node's sample.
        Be careful because if used with SVM this can change values of labels.
    node_error_std_dev : float
        Standard deviation of the per-node noise introduced in each node's sample.
        Be careful because if used with SVM this can change values of labels.
    starting_weights_domain : List[float]
        In the form of [a,b]. Domain of each node's w is uniformly randomly picked within a and b.
    max_iter : int
        Maximum iteration after which the simulation is stopped.
    max_time : float
        Maximum time value after which the simulation is stopped.
    epsilon : float
        Accuracy threshold for objective function below which the simulation is stopped.
    method : str
        - "classic" : classic gradient descent, batch is equal to the whole dataset;
        - "stochastic" : stochastic gradient descent;
        - "batch" : batch gradient descent;
        - "subgradient" : subgradient projected gradient descent;
        - "dual_averaging" : dual averaging method.
    alpha : float
        Learning rate constant coefficient.
    learning_rate : str
        - 'constant' : the learning rate never changes during the simulation (it is euqual to alpha);
        - 'root_decreasing' : learning rate is alpha * 1/math.sqrt(K) where K = #iter.
    spectrum_dependent_learning_rate : bool
        If True the learning rate is also multiplied by math.sqrt(spectral_gap), so it is different for each graph.
    dual_averaging_radius : int
        Radius of the projection on the feasible set.
    time_distr_class : object
        Class of the random time distribution.
    time_distr_param : list or list of list
        Parameters list.
        See Also generate_time_distr_param_list.
    time_distr_param_rule : str
        Parameters distribution rule.
        See Also generate_time_distr_param_list.
    time_const_weight : float
        Weight assigned to constant part of the computation time.
        It is calculated as T_u(t) = E[X_u] * c + (1-c) * X_u(t).
    real_y_activation_func : function
        Activation function applied on real_y calculation.
    obj_function : str
        Identifier of the objective function (one of those declared in metrics.py).
    average_model_toggle : bool
        If True then the average over time of parameter vector is used istead of just x(k).
    metrics : list of str
        List of additional metrics to compute (objective function is automatically added to this list).
    real_metrics : list of str
        List of real metrics to compute (with regards to the real noiseless model).
    real_metrics_toggle : bool
        If False real metrics are not computed (useful to speed up the computation).
    metrics_type : int
        - 0 : metrics are computed over the whole dataset using model W equal to the avg of nodes' locla models;
        - 1 : metrics are computed as AVG of local nodes' metrics;
        - 2 : metrics are computed over the whole dataset using the model only from metrics_nodes (see below).
    metrics_nodes : int or list of int
        If type is int then it will be put into a list and treated as [int].
        Depends on the value of metrics_type:
        - metrics_type == 0 : no effects;
        - metrics_type == 1 : metrics are computed as avg of local metrics of nodes inside metrics_nodes list;
        - metrics_type == 2 : metrics are computed over the whole dataset using the model obtained as mean of
            nodes inside metrics_nodes.
    shuffle : bool
        If True the dataset is shuffled before being split into nodes, otherwise the dataset is untouched.
    batch_size : int
        Useful only for batch gradient descent, is the size of the batch.
    save_test_to_file : bool
        If True the test is saved to specified folder, otherwise it is stored into tempo folder.
    test_folder_name_struct : list
        See generate_test_subfolder_name.
    test_parent_folder : str
        Parent test folder: the test will be located in ./test_log/{$PARENT_FOLDER}/{$TEST_NAME_FOLDER}.
        Can be more than one-folder-deep!
    instant_plot : bool
        If True plots will be prompted upon finishing simulation. Be careful since it will pause the thread!
    plots : list of str
        List of plots' names to create / prompt upon finishing simulation.
        See plotter.py.
    save_plot_to_file : bool
        If True plots will be saved into .../{$TEST_FOLDER_NAME}/plots/ folder.
    plot_global_w : bool
        If True global W will be prompted after finishing simulation.
        This plot is never automatically saved, save it by yourself if you need to keep it.
    plot_node_w : list or False
        List of nodes to plot w which. If False nothing will be prompted.
    verbose_main : int
        Verbose policy in simulator.py script.
        - <0 : no print at all except from errors (unsafe).
        -  0 : default messages;
        -  1 : verbose + default messages
        -  2 : verbose + default messages + input required to continue after each message (simulation will be paused
            after each message and will require to press ENTER to go on, useful for debugging).
    verbose_cluster : int
        Verbose policy in cluster.py script.
        See verbose_main.
    verbose_node : int
        Verbose policy in node.py script.
        See verbose_main.
    verbose_task : int
        Verbose policy in tasks.py script.
        See verbose_main.
    verbose_plotter : int
        Verbose policy in plotter.py script.
        See verbose_main.

    Returns
    -------
    None
    """

    ### BEGIN SETUP ###

    begin_time = time.time()
    # descriptor text placed at the beginning of _descriptor.txt file within the test folder

    setup_from_file = False
    setup_folder_path = Plotter.get_temp_test_folder_path_by_index()
    setup_file_path = os.path.join(setup_folder_path, ".setup.pkl")

    setup = dict()

    setup['seed'] = int(time.time()) if seed is None else seed
    setup['n'] = n

    setup['graphs'] = generate_n_nodes_graphs_list(setup['n'], graphs)

    # TRAINING SET SETUP

    setup['n_samples'] = n_samples
    setup['n_features'] = n_features
    setup['dataset'] = dataset  # svm, unireg, reg, reg2, skreg
    setup['smv_label_flip_prob'] = smv_label_flip_prob
    setup['error_mean'] = error_mean
    setup['error_std_dev'] = error_std_dev
    setup['node_error_mean'] = node_error_mean
    setup['node_error_std_dev'] = node_error_std_dev

    # r = np.random.uniform(4, 10)
    # c = np.random.uniform(1.1, 7.8) * np.random.choice([-1, 1, 1, 1])
    # starting_weights_domain = [c - r, c + r]
    setup['starting_weights_domain'] = starting_weights_domain

    # TRAINING SET ALMOST FIXED SETUP
    # SETUP USED ONLY BY REGRESSION 'reg':
    setup['domain_radius'] = 8
    setup['domain_center'] = 0

    # CLUSTER SETUP 1
    setup['max_iter'] = max_iter
    setup['max_time'] = max_time  # units of time
    setup['method'] = method
    setup['dual_averaging_radius'] = dual_averaging_radius

    setup['alpha'] = alpha
    setup['learning_rate'] = learning_rate  # constant, root_decreasing
    setup['spectrum_dependent_learning_rate'] = spectrum_dependent_learning_rate

    setup['time_distr_class'] = time_distr_class
    setup['time_distr_param'] = generate_time_distr_param_list(
        setup['n'],
        time_distr_param,
        time_distr_param_rule

    )  # exp[rate], par[a,s], U[a,b]
    setup['time_distr_param_rule'] = time_distr_param_rule
    setup['time_const_weight'] = time_const_weight
    setup['real_y_activation_func'] = real_y_activation_func
    setup['obj_function'] = obj_function  # mse, hinge_loss, edgy_hinge_loss, cont_hinge_loss, score
    setup['average_model_toggle'] = average_model_toggle

    setup['metrics'] = metrics
    setup['real_metrics'] = real_metrics
    setup['real_metrics_toggle'] = real_metrics_toggle  # False to disable real_metrics computation (for better perf.)
    setup['metrics_type'] = metrics_type  # 0: avg w on whole TS, 1: avg errors in nodes, 2: node's on whole TS
    setup['metrics_nodes'] = metrics_nodes  # single node ID, list of IDs, 'all', 'worst', 'best'
    setup['shuffle'] = shuffle  # <--

    # CLUSTER ALMOST FIXED SETUP
    setup['batch_size'] = batch_size
    setup['epsilon'] = epsilon

    # VERBOSE FLAGS
    # verbose <  0: no print at all except from errors
    # verbose == 0: default messages
    # verbose == 1: verbose + default messages
    # verbose == 2: verbose + default messages + input required to continue after each message
    verbose = verbose_main

    if setup_from_file:
        with open(setup_file_path, 'rb') as setup_file:
            setup = pickle.load(setup_file)

    # OUTPUT SETUP
    test_subfolder = generate_test_subfolder_name(setup,
        *test_folder_name_struct,
        parent_folder=test_parent_folder
    )

    test_title = test_subfolder

    # OUTPUT ALMOST FIXED SETUP
    test_root = "test_log"  # don't touch this
    temp_test_subfolder = datetime.datetime.now().strftime('%y-%m-%d_%H.%M.%S.%f')
    compress = True
    overwrite_if_already_exists = False  # overwrite the folder if it already exists or create a different one otherwise
    delete_folder_on_errors = True
    save_descriptor = True  # create _descriptor.txt file
    ### END SETUP ###

    np.random.seed(setup['seed'])
    random.seed(setup['seed'])

    if setup['n'] % 2 != 0 and setup['n'] > 1:
        warnings.warn("Amount of nodes is odd (N={}), keep in mind graph generator "
                      "can misbehave in undirected graphs generation with odd nodes amount (it can "
                      "generate directed graphs instead)".format(setup['n']))

    if not save_test_to_file:
        # if you don't want to store the file permanently they are however placed inside temp folder
        # in order to use them for a short and limited period of time (temp folder may be deleted manually)
        test_subfolder = os.path.join("temp", temp_test_subfolder)
        overwrite_if_already_exists = False

    test_path = os.path.normpath(os.path.join(test_root, test_subfolder))

    if not overwrite_if_already_exists:
        # determine a name for the new folder such that it doesn't coincide with any other folder
        c = 0
        tmp_test_path = test_path
        while os.path.exists(tmp_test_path):
            tmp_test_path = test_path + ".conflict." + str(c)
            c += 1
        test_path = tmp_test_path

    test_path = os.path.normpath(test_path)

    # create dir
    if not os.path.exists(test_path):
        os.makedirs(test_path)

    # define function to delete test folder (in case of errors)
    def delete_test_dir():
        if delete_folder_on_errors:
            shutil.rmtree(test_path)

    # markov_matrix = normalize(__adjacency_matrix, axis=1, norm='l1')

    ### BEGIN TRAINING SET GEN ###
    X, y, w = None, None, None
    # X, y = make_blobs(n_samples=10000, n_features=100, centers=3, cluster_std=2, random_state=20)

    if setup['dataset'] == 'reg':
        X, y, w = datasets.reg_dataset(
            setup['n_samples'], setup['n_features'],
            error_mean=setup['error_mean'],
            error_std_dev=setup['error_std_dev']
        )
    elif setup['dataset'] == 'svm':
        X, y, w = datasets.svm_dual_averaging_dataset(
            setup['n_samples'], setup['n_features'],
            label_flip_prob=setup['smv_label_flip_prob']
        )
    elif setup['dataset'] == 'unireg':
        X, y, w = datasets.unireg_dataset(setup['n'])
    elif setup['dataset'] == 'unisvm':
        X, y, w = datasets.unisvm_dual_averaging_dataset(
            setup['n'],
            label_flip_prob=setup['smv_label_flip_prob']
        )
    elif setup['dataset'] == 'enereg':
        X, y, w = datasets.load_appliances_energy_reg_dataset(setup['n_samples'])
    elif setup['dataset'] == 'sloreg':
        X, y, w = datasets.load_slice_localization_reg_dataset(setup['n_samples'])
    elif setup['dataset'] == 'susysvm':
        X, y, w = datasets.load_susy_svm_dataset(setup['n_samples'])
    elif setup['dataset'] == 'skreg':
        X, y, w = make_regression(
            n_samples=setup['n_samples'],
            n_features=setup['n_features'],
            n_informative=setup['n_features'],
            n_targets=1,
            bias=1,
            effective_rank=None,
            tail_strength=1.0,
            noise=setup['error_std_dev'],
            shuffle=True,
            coef=True,
            random_state=None
        )
    elif setup['dataset'] in ['eigvecsvm', 'alteigvecsvm'] or 'multieigvecsvm' in setup['dataset']:
        pass
    else:
        delete_test_dir()
        raise Exception("{} is not a good training set generator function".format(setup['dataset']))

    ### END TRAINING SET GEN ###

    ### BEGIN MAIN STUFFS ###

    # save setup object dump
    with open(os.path.join(test_path, '.setup.pkl'), "wb") as f:
        pickle.dump(setup, f, pickle.HIGHEST_PROTOCOL)

    # setup['string_graphs'] = pprint.PrettyPrinter(indent=4).pformat(setup['graphs']).replace('array([', 'np.array([')

    # Fill descriptor with setup dictionary
    descriptor = """>>> Test Descriptor File
Title: {}
Date: {}
Summary: 

""".format(
        test_title if save_test_to_file else '',
        str(datetime.datetime.fromtimestamp(begin_time))
    )

    for k, v in setup.items():
        descriptor += "{} = {}\n".format(k, v)
    descriptor += "\n"

    # save descriptor file
    if save_descriptor:
        with open(os.path.join(test_path, '.descriptor.txt'), "w") as f:
            f.write(descriptor)

    w_logs = {}
    node_w_logs = {}

    ## SIMULATIONS
    # simulation for each adjacency matrix in setup['graphs'] dict
    for graph, adjmat in setup['graphs'].items():
        # set the seed again (each simulation must perform on the same cluster setup)
        np.random.seed(setup['seed'])
        random.seed(setup['seed'])

        cluster = None
        try:
            cluster = Cluster(adjmat, graph_name=graph, verbose=verbose_cluster)

            if setup['dataset'] in ['eigvecsvm', 'alteigvecsvm', 'multieigvecsvm']:
                # if using the ones matrix with this dataset, something wrong happens
                # so we use the last adj_mat also for the clique
                if 'clique' in graph:
                    max_deg = 0
                    max_deg_adjmat = adjmat
                    for G, A in setup['graphs'].items():
                        if 'clique' in G:
                            continue
                        d = int(G.split('-')[0])
                        if d > max_deg:
                            max_deg_adjmat = A
                            max_deg = d
                    if setup['dataset'] == 'eigvecsvm':
                        X, y, w = datasets.eigvecsvm_dataset_from_adjacency_matrix(max_deg_adjmat)
                    elif setup['dataset'] == 'alteigvecsvm':
                        X, y, w = datasets.eigvecsvm_dataset_from_expander(
                            setup['n'],
                            max_deg,
                            matrix_type='uniform-weighted'
                        )
                    elif setup['dataset'] == 'multieigvecsvm':
                        X, y, w = datasets.multieigvecsvm_dataset_from_expander(
                            setup['n_samples'], setup['n'], max_deg)
                else:
                    if setup['dataset'] == 'eigvecsvm':
                        X, y, w = datasets.eigvecsvm_dataset_from_adjacency_matrix(adjmat)
                    elif setup['dataset'] == 'alteigvecsvm':
                        deg = int(graph.split('-')[0])
                        X, y, w = datasets.eigvecsvm_dataset_from_expander(
                            setup['n'],
                            deg,
                            matrix_type='uniform-weighted'
                        )
                    elif setup['dataset'] == 'multieigvecsvm':
                        deg = int(graph.split('-')[0])
                        X, y, w = datasets.multieigvecsvm_dataset_from_expander(
                            setup['n_samples'], setup['n'], deg)

            elif 'multieigvecsvm' in setup['dataset']:
                deg = int(setup['dataset'].split('-')[0])
                X, y, w = datasets.multieigvecsvm_dataset_from_expander(
                    setup['n_samples'], setup['n'], deg)

            alpha = setup['alpha']
            if spectrum_dependent_learning_rate:
                alpha *= math.sqrt(uniform_weighted_Pn_spectral_gap_from_adjacency_matrix(adjmat))

            cluster.setup(
                X, y, w,
                real_y_activation_function=setup['real_y_activation_func'],
                obj_function=setup['obj_function'],
                average_model_toggle=average_model_toggle,
                method=setup['method'],
                max_iter=setup['max_iter'],
                max_time=setup['max_time'],
                batch_size=setup['batch_size'],
                dual_averaging_radius=setup['dual_averaging_radius'],
                epsilon=setup['epsilon'],
                alpha=alpha,
                learning_rate=setup['learning_rate'],
                metrics=setup['metrics'],
                real_metrics=setup["real_metrics"],
                real_metrics_toggle=setup['real_metrics_toggle'],
                metrics_type=setup['metrics_type'],
                metrics_nodes=setup['metrics_nodes'],
                shuffle=setup['shuffle'],
                time_distr_class=setup['time_distr_class'],
                time_distr_param=setup['time_distr_param'],
                time_const_weight=setup['time_const_weight'],
                node_error_mean=setup['node_error_mean'],
                node_error_std_dev=setup['node_error_std_dev'],
                starting_weights_domain=setup['starting_weights_domain'],
                verbose_node=verbose_node,
                verbose_task=verbose_task
            )

            if setup['method'] is None:
                cluster.run()
            else:
                cluster.run()

        except:
            # if the cluster throws an exception then delete the folder created to host its output files
            # the most common exception in cluster.run() is thrown when the SGD computation diverges
            delete_test_dir()
            print(
                "Exception in cluster object\n",
                "cluster.iteration=" + str(cluster.iteration)
            )
            raise

        extension = '.txt'
        if compress:
            extension += '.gz'

        np.savetxt(
            os.path.join(test_path, "{}_iter_time_log{}".format(graph, extension)),
            cluster.logs["iter_time"],
            delimiter=','
        )

        np.savetxt(
            os.path.join(test_path, "{}_avg_iter_time_log{}".format(graph, extension)),
            cluster.logs["avg_iter_time"],
            delimiter=','
        )
        np.savetxt(
            os.path.join(test_path, "{}_max_iter_time_log{}".format(graph, extension)),
            cluster.logs["max_iter_time"],
            delimiter=','
        )

        # Save metrics logs
        if not setup['method'] is None:
            for metrics_id, metrics_log in cluster.logs["metrics"].items():
                np.savetxt(
                    os.path.join(test_path, "{}_{}_log{}".format(graph, metrics_id, extension)),
                    metrics_log,
                    delimiter=','
                )

            # Save real metrics logs
            for real_metrics_id, real_metrics_log in cluster.logs["real_metrics"].items():
                np.savetxt(
                    os.path.join(test_path, "{}_real_{}_log{}".format(graph, real_metrics_id, extension)),
                    real_metrics_log,
                    delimiter=','
                )

        if plot_global_w:
            w_logs[graph] = cluster.w

        if not plot_node_w is False:
            try:
                node_w_logs[graph] = np.array(cluster.nodes[plot_node_w[0]].training_task.w)
                for i in range(1, len(plot_node_w)):
                    node_w_logs[graph] += np.array(cluster.nodes[plot_node_w[i]].training_task.w)
                node_w_logs[graph] /= len(plot_node_w)
            except:
                plot_node_w = False

        print("Logs of {} simulation created at {}".format(graph, test_path))

    if save_descriptor:
        with open(os.path.join(test_path, '.descriptor.txt'), 'a') as f:
            f.write('\n\n# duration (hh:mm:ss): ' + time.strftime('%H:%M:%S', time.gmtime(time.time() - begin_time)))

    colors = Plotter.generate_rainbow_color_dict_from_graph_keys(
        list(w_logs.keys()), setup['n']
    )

    if plot_global_w:
        plt.suptitle(test_subfolder)
        plt.title("W(it)")
        plt.xlabel("iter")
        plt.ylabel("Global W at iteration")
        plt.yscale('linear')
        for graph in w_logs:
            plt.plot(
                list(range(len(w_logs[graph]))),
                w_logs[graph],
                label=graph,
                color=colors[graph],
                marker='o',
                markersize=2
                # **kwargs
            )
        plt.legend()
        plt.show()
        plt.close()

    if not plot_node_w is False:
        plt.suptitle(test_subfolder)
        plt.title("W_{0}(it) (W of Node {0} at iteration)".format(plot_node_w))
        plt.xlabel("iter")
        plt.ylabel("W_{}(iter)".format(plot_node_w))
        plt.yscale('linear')
        for graph in node_w_logs:
            plt.plot(
                list(range(len(node_w_logs[graph]))),
                [p[0] for p in node_w_logs[graph]],
                label=graph,
                color=colors[graph],
                marker='o',
                markersize=2
            )
        plt.legend()
        plt.show()
        plt.close()

    if save_plot_to_file or instant_plot:
        plot_from_files(
            test_folder_path=test_path,
            save_plots_to_test_folder=save_plot_to_file,
            instant_plot=instant_plot,
            plots=plots,
            verbose=verbose_plotter,
            test_tag=test_subfolder
        )
Exemplo n.º 8
0
def main(filename):
    data = load_csv(filename)

    plotter = Plotter(data)

    plotter.ball_radius = 8
    plotter.a_color = (0, 0, 0)
    plotter.b_color = (1, 1, 1)
    plotter.line_width = 3
    plotter.line_color = (0.486, 0.486, 0.529)

    plotter.render()
    plotter.save(name='prot.png')
Exemplo n.º 9
0
from src.plotter import Plotter
import numpy as np
import imageio
import os

plotter = Plotter(50, 50, 20)

map = {
    1: (50, 50, 50, 1),
    2: (100, 100, 100, 1),
    3: (150, 150, 150, 1),
    4: (200, 200, 200, 1),
    5: (50, 100, 150, 1),
    6: (0, 0, 0, 1)
}

points = {}
points[(0, 0)] = 1
points[(1, 1)] = 2
points[(49, 49)] = 3

map_plot = plotter.plot(points, map)
map_plot = plotter.invert(map_plot)

im = np.asarray(map_plot)
imageio.mimsave('./tp.gif', [im], fps=5)
Exemplo n.º 10
0
 def __init__(self, file_name: str, **kwargs):
     self.plotter = Plotter(file_name, kwargs["features"],
                            self.house_array_function)
     self.plotter.plot_histogram()
Exemplo n.º 11
0
class Pruner(Runner):
    """Pruner for models."""

    def __init__(
        self,
        config: Dict[str, Any],
        dir_prefix: str,
        wandb_log: bool,
        wandb_init_params: Dict[str, Any],
        device: torch.device,
    ) -> None:
        """Initialize."""
        super(Pruner, self).__init__(config, dir_prefix)
        self.wandb_log = wandb_log
        self.pretrain_dir_name = "pretrain"
        self.dir_postfix = "pruned"
        self.init_params_name = "init_params"
        self.init_params_path = ""
        self.device = device

        self.plotter = Plotter(self.wandb_log)

        # create an initial model
        self.trainer = Trainer(
            config=self.config["TRAIN_CONFIG"],
            dir_prefix=dir_prefix,
            checkpt_dir=self.pretrain_dir_name,
            wandb_log=wandb_log,
            wandb_init_params=wandb_init_params,
            device=device,
        )
        self.model = self.trainer.model

        self.model_params = model_utils.get_params(
            self.model,
            (
                (nn.Conv2d, "weight"),
                (nn.Conv2d, "bias"),
                (nn.BatchNorm2d, "weight"),
                (nn.BatchNorm2d, "bias"),
                (nn.Linear, "weight"),
                (nn.Linear, "bias"),
            ),
        )
        self.params_to_prune = self.get_params_to_prune()

        # to calculate sparsity properly
        model_utils.dummy_pruning(self.model_params)
        model_utils.dummy_pruning(self.params_to_prune)

    @abc.abstractmethod
    def prune_params(self, prune_iter: int) -> None:
        """Run pruning."""
        raise NotImplementedError

    @abc.abstractmethod
    def get_params_to_prune(self) -> Tuple[Tuple[nn.Module, str], ...]:
        """Get parameters to prune."""
        raise NotImplementedError

    def reset(
        self, prune_iter: int, resumed: bool = False,
    ) -> Tuple[int, List[Tuple[str, float, Callable[[float], str]]]]:
        """Reset the processes for pruning or pretraining.

        Args:
            prune_iter (int): the next pruning iteration.
            resumed (bool): has True if it is resumed.

        Returns:
            int: the starting epoch of training (rewinding point for pruning).
            List[Tuple[str, float, Callable[[float], str]]]: logging information for sparsity,
                which consists of key, value, and formatting function.

        """
        # pretraining
        if prune_iter == -1:
            start_epoch = 0
            mask_total_sparsity = zero_total_sparsity = 0
            mask_conv_sparsity = zero_conv_sparsity = 0
            mask_fc_sparsity = zero_fc_sparsity = 0
            mask_bn_sparsity = zero_bn_sparsity = 0

            # directory names for checkpionts
            checkpt_dir = self.pretrain_dir_name
            logger.info("Initialized Pretraining Settings")

            # store initial weights
            if not resumed and self.config["PRUNE_PARAMS"]["STORE_PARAM_BEFORE"] == 0:
                self.save_init_params()
        # pruning
        else:
            start_epoch = self.config["PRUNE_PARAMS"]["TRAIN_START_FROM"]
            logger.info("Change train configuration for pruning.")
            if prune_iter == 0 or resumed:
                self.trainer.setup_train_configuration(
                    self.config["TRAIN_CONFIG_AT_PRUNE"]
                )

            if not resumed:
                # Prune with current best model
                if self.config["PRUNE_PARAMS"]["PRUNE_AT_BEST"]:
                    self.trainer.load_best_model()
                logger.info("Prune model")
                self.prune_params(prune_iter)
            logger.info("Forward model for one iter to warmup")
            self.trainer.warmup_one_iter()

            # sparsities
            zero_total_sparsity = model_utils.sparsity(self.model_params)
            zero_conv_sparsity = model_utils.sparsity(
                self.model_params, module_types=(nn.Conv2d,)
            )
            zero_fc_sparsity = model_utils.sparsity(
                self.model_params, module_types=(nn.Linear,)
            )
            zero_bn_sparsity = model_utils.sparsity(
                self.model_params, module_types=(nn.BatchNorm2d,)
            )
            mask_total_sparsity = model_utils.mask_sparsity(self.model_params)
            mask_conv_sparsity = model_utils.mask_sparsity(
                self.model_params, module_types=(nn.Conv2d,)
            )
            mask_fc_sparsity = model_utils.mask_sparsity(
                self.model_params, module_types=(nn.Linear,)
            )
            mask_bn_sparsity = model_utils.mask_sparsity(
                self.model_params, module_types=(nn.BatchNorm2d,)
            )

            # directory name for checkpoints
            checkpt_dir = f"{prune_iter}_"
            checkpt_dir += f"{(mask_total_sparsity):.2f}_".replace(".", "_")
            checkpt_dir += f"{self.dir_postfix}"

            logger.info(
                "Initialized Pruning Settings: "
                f"[{prune_iter} | {self.config['N_PRUNING_ITER']-1}]"
            )

            # initialize trainer
            if not resumed and self.init_params_path:
                self.trainer.load_params(self.init_params_path, with_mask=False)

        # reset trainer
        self.trainer.reset(checkpt_dir)

        # plot result
        self.plotter.plot(self.model, self.trainer.get_model_save_dir())

        # sparsity info for logging
        sparsity_info: List[Tuple[str, float, Callable]] = []
        sparsity_info.append(
            ("zero_sparsity/total", zero_total_sparsity, percent_format)
        )
        sparsity_info.append(("zero_sparsity/conv", zero_conv_sparsity, percent_format))
        sparsity_info.append(("zero_sparsity/fc", zero_fc_sparsity, percent_format))
        sparsity_info.append(("zero_sparsity/bn", zero_bn_sparsity, percent_format))
        sparsity_info.append(
            ("mask_sparsity/total", mask_total_sparsity, percent_format)
        )
        sparsity_info.append(("mask_sparsity/conv", mask_conv_sparsity, percent_format))
        sparsity_info.append(("mask_sparsity/fc", mask_fc_sparsity, percent_format))
        sparsity_info.append(("mask_sparsity/bn", mask_bn_sparsity, percent_format))
        sparsity_info.append(
            (
                "mask_sparsity/target",
                self.get_target_sparsity(prune_iter) * 100.0,
                percent_format,
            )
        )

        return start_epoch, sparsity_info

    def resume(self) -> int:
        """Setting to resume the training."""
        # check if there is a saved initial parameters
        init_params_path = os.path.join(
            self.dir_prefix, f"{self.init_params_name}.{self.fileext}"
        )
        if os.path.exists(init_params_path):
            self.init_params_path = init_params_path

        # check the pruning iteration
        last_iter = self._check_pruning_iter_from_filepath()

        return last_iter

    def run(self, resume_info_path: str = "") -> None:
        """Run pruning."""
        # resume pruner if needed
        start_iter, epoch_to_resume = -1, 0
        if resume_info_path:
            start_iter = self.resume()
            epoch_to_resume = self.trainer.resume()
            self.trainer.warmup_one_iter()

        for prune_iter in range(start_iter, self.config["N_PRUNING_ITER"]):
            start_epoch, sparsity_info = self.reset(prune_iter, epoch_to_resume > 0)

            # if there is a valid file to resume
            if start_epoch < epoch_to_resume:
                start_epoch = epoch_to_resume
                epoch_to_resume = 0

            for epoch in range(start_epoch, self.trainer.total_epochs):
                self.trainer.run_one_epoch(epoch, sparsity_info)

                # store weights with warmup
                if self.config["PRUNE_PARAMS"]["STORE_PARAM_BEFORE"] - 1 == epoch:
                    self.save_init_params()

            if prune_iter == -1:
                logger.info("Pretraining Done")
            else:
                logger.info(
                    f"Pruning Done: [{prune_iter} | {self.config['N_PRUNING_ITER']-1}]"
                )

    def get_target_sparsity(self, prune_iter: int) -> float:
        """Get target sparsity for current prune epoch."""
        target_density = 1.0
        for _ in range(prune_iter + 1):
            target_density = target_density * (
                1 - self.config["PRUNE_PARAMS"]["PRUNE_AMOUNT"]
            )
        return 1 - target_density

    def save_init_params(self) -> None:
        """Set initial weights."""
        self.trainer.save_params(
            self.dir_prefix,
            self.init_params_name,
            self.config["PRUNE_PARAMS"]["STORE_PARAM_BEFORE"] - 1,
            record_path=False,
        )
        logger.info("Stored initial parameters")
        self.init_params_path = os.path.join(
            self.dir_prefix, f"{self.init_params_name}.{self.fileext}"
        )

    def _check_pruning_iter_from_filepath(self) -> int:
        """Check the last pruning iteration from filepath."""
        last_iter = -1
        latest_file_path = self._fetch_latest_checkpt()

        if latest_file_path and os.path.exists(latest_file_path):
            logger.info(f"Resume pruning from {self.dir_prefix}")
            _, checkpt_dir, _ = latest_file_path.rsplit(os.path.sep, 2)

            # fetch the last iter from the filename
            if checkpt_dir != self.pretrain_dir_name:
                last_iter = int(checkpt_dir.split("_", 1)[0])

        return last_iter

    def early_stop(self) -> None:
        """Early stop."""
        logger.info("Prune cannot be done. Early stop")
        raise Exception("Early Stop")
Exemplo n.º 12
0
"""

print("Loading maze from json")
maze = Maze.from_json(
    os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        'maze_jsons',
        'test.json'
    )
)

maze = Maze(50,50)

print('There are {} blocks total'.format(len(maze.blocks)))
"""
plotter = Plotter(maze.x, maze.y, 10)
#plotter.set_null('black')

map = {
    1: (50, 50, 50, 1),
    2: (100, 100, 100, 1),
    3: (150, 150, 150, 1),
    4: (200, 200, 200, 1),
    5: (50, 100, 150, 1),
    6: (0, 0, 0, 1),
    7: (255, 243, 122, 1)
}

images_mp = []
images_sp = []
images_dp = []
Exemplo n.º 13
0
def main():
    # SETUP BEGIN
    test_folder_path = './test_log/test_u043_Win[-70,-60]_sgC1alpha_!shuf_unireg_mtrT0all_500iter'
    logs, setup = load_test_logs(test_folder_path, return_setup=True)
    degrees = {}
    for graph in setup['graphs']:
        degrees[graph] = degree_from_adjacency_matrix(setup['graphs'][graph])
    graph_filter = [
        # "0-diagonal",
        "1-cycle",
        # "2-uniform_edges",
        "2-cycle",
        # "3-uniform_edges",
        "3-cycle",
        # "4-uniform_edges",
        "4-cycle",
        # "5-uniform_edges",
        # "5-cycle",
        # "8-uniform_edges",
        "8-cycle",
        # "10-uniform_edges",
        # "10-cycle",
        # "20-uniform_edges",
        "20-cycle",
        # "50-uniform_edges",
        "50-cycle",
        # "80-uniform_edges",
        "80-cycle",
        "99-clique",
    ]

    clique_mse_log = logs['metrics']['mse']['99-clique']

    opt = 858.5
    pred_ratios = []
    real_ratios = []

    for graph, graph_mse_log in dict(logs['metrics']['mse']).items():
        if graph not in graph_filter:
            del logs['metrics']['mse'][graph]
            continue

        ratio = []
        for x in range(len(graph_mse_log)):
            ry = (graph_mse_log[x] - opt) / (clique_mse_log[x] - opt + 1)
            ratio.append(ry)
        ratio = max(ratio)

        real_ratios.append(ratio)
        pred_ratios.append(1 / math.sqrt(
            uniform_weighted_Pn_spectral_gap_from_adjacency_matrix(
                setup['graphs'][graph])))

        # y = np.max(mse_log)
        # x = np.argmax(mse_log)
        # a[graph] = y * math.sqrt(x)

    print(real_ratios)

    plt.figure(1, figsize=(12, 6))
    plt.suptitle(test_folder_path)
    plt.subplot(1, 2, 1)
    plt.title("Ratio comparison", loc='left')
    plt.title("({})".format(setup['time_distr_class'].name), loc='right')
    plt.xlabel("prediction")
    plt.ylabel("simulation")
    plt.yscale('linear')
    plt.plot(
        pred_ratios,
        real_ratios,
        color='blue',
        markersize=5,
        marker='o',
    )

    for i in range(len(pred_ratios)):
        plt.text(pred_ratios[i] - 2,
                 real_ratios[i] + 0.1,
                 'd={}'.format(degrees[list(
                     logs['metrics']['mse'].keys())[i]]),
                 size='xx-small')

    colors = Plotter.generate_rainbow_color_dict_from_graph_keys(
        list(setup['graphs'].keys()), setup['n'])

    # MSE - AVG ITER SUBPLOT
    plt.subplot(1, 2, 2)
    plt.title("MSE over AVG iteration", loc='left')
    plt.title("({})".format(setup['time_distr_class'].name), loc='right')
    plt.xlabel('AVG iter')
    plt.ylabel('MSE')
    plt.yscale('linear')
    for graph, graph_mse_log in dict(logs['metrics']['mse']).items():
        plt.plot(list(range(len(graph_mse_log))),
                 graph_mse_log,
                 label=graph,
                 color=colors[graph])
    plt.legend()
    plt.subplots_adjust(top=0.88,
                        bottom=0.08,
                        left=0.06,
                        right=0.96,
                        hspace=0.2,
                        wspace=0.17)
    plt.show()
    plt.close()
Exemplo n.º 14
0
 def __init__(self, file_name: str, features: list):
     self.classifier = Classifier(file_name, filter=False)
     self.plotter = Plotter(file_name, features, self.house_array_function)
     self.plotter.plot_scatter()
Exemplo n.º 15
0
def main():
    # SETUP BEGIN
    test_folder_path = './test_log/test_rslo100_sloreg_100n_exp[1]_mtrT0all_sgC1e-05alpha_52000samp_INFtime_400iter'
    target_x0 = 100
    target_x = 300

    logs, setup = load_test_logs(test_folder_path, return_setup=True)
    objfunc = METRICS[setup['obj_function']]
    degrees = {}
    for graph in setup['graphs']:
        degrees[graph] = degree_from_adjacency_matrix(setup['graphs'][graph])
    graph_filter = [
        '*'
    ]

    pred_ratios = []
    real_slopes = []

    # equal to None so that if it will not be reassigned then it will raise exception
    clique_slope = None
    clique_spectral_gap = None

    for graph, graph_objfunc_log in dict(logs['metrics'][objfunc.id]).items():
        if graph not in graph_filter and '*' not in graph_filter:
            del logs['metrics'][objfunc.id][graph]
            continue

        y0 = logs['metrics'][objfunc.id][graph][target_x0]
        y = logs['metrics'][objfunc.id][graph][target_x]
        """if degrees[graph] == 2:
            y0 = logs['metrics'][objfunc.id][graph][3000]
            y = logs['metrics'][objfunc.id][graph][3600]"""

        slope = y - y0
        print(slope)

        real_slopes.append(slope)
        pred_ratios.append(1 / math.sqrt(uniform_weighted_Pn_spectral_gap_from_adjacency_matrix(setup['graphs'][graph])))

        if 'clique' in graph:
            clique_slope = slope
            clique_spectral_gap = uniform_weighted_Pn_spectral_gap_from_adjacency_matrix(setup['graphs'][graph])

    real_ratios = clique_slope / np.array(real_slopes)
    pred_ratios = np.array(pred_ratios)
    print(real_ratios)

    plt.figure(1, figsize=(12, 6))
    plt.suptitle(test_folder_path)
    plt.subplot(1, 2, 1)
    plt.title("Ratio comparison", loc='left')
    plt.title("({})".format(setup['time_distr_class'].name), loc='right')
    plt.xlabel("prediction")
    plt.ylabel("simulation")
    plt.yscale('linear')
    plt.plot(
        pred_ratios,
        real_ratios,
        color='blue',
        markersize=5,
        marker='o',
    )

    for i in range(len(pred_ratios)):
        plt.text(
            pred_ratios[i],
            real_ratios[i],
            'd={}'.format(degrees[list(logs['metrics'][objfunc.id].keys())[i]]),
            size='xx-small'
        )

    colors = Plotter.generate_rainbow_color_dict_from_graph_keys(
        list(setup['graphs'].keys()), setup['n']
    )

    # objfunc - AVG ITER SUBPLOT
    plt.subplot(1, 2, 2)
    plt.title("{} over iteration".format(objfunc.fullname), loc='left')
    plt.title("({})".format(setup['time_distr_class'].name), loc='right')
    plt.xlabel('iter')
    plt.ylabel(objfunc.fullname)
    plt.yscale('linear')
    for graph, graph_objfunc_log in dict(logs['metrics'][objfunc.id]).items():
        plt.plot(
            list(range(len(graph_objfunc_log))),
            graph_objfunc_log,
            label=graph,
            color=colors[graph]
        )
    plt.legend()
    plt.subplots_adjust(
        top=0.88,
        bottom=0.08,
        left=0.06,
        right=0.96,
        hspace=0.2,
        wspace=0.17
    )
    plt.show()
    plt.close()