def cast_to_DL(parameters: Parameters, dataset: str, model, optimal_step_size: int, weight_decay: int, iid: str) \ -> DLParameters: """Casts a Parameters class in a DLParameters class which is the specific format for deep learning.""" parameters.__class__ = DLParameters parameters.initialize_DL_params(dataset, model, optimal_step_size, weight_decay, iid) return parameters
def setUpClass(cls): """ get_some_resource() is slow, to avoid calling it for each test use setUpClass() and store the result as class variable """ super(PerformancesTest, cls).setUpClass() ### RMSE ### # Creating cost models which will be used to computed cost/loss, gradients, L ... cls.linear_cost_models = build_several_cost_model(RMSEModel, linear_X, linear_Y, nb_devices) # Defining parameters for the performances test. cls.linear_params = Parameters(n_dimensions=dim_test + 1, nb_devices=nb_devices, up_compression_model=SQuantization(1, dim_test + 1), step_formula=deacreasing_step_size, nb_epoch=nb_epoch, use_averaging=False, cost_models=cls.linear_cost_models, stochastic=True) obj_min_by_N_descent = SGD_Descent(Parameters(n_dimensions=dim_test + 1, nb_devices=nb_devices, nb_epoch=200, momentum=0., verbose=True, cost_models=cls.linear_cost_models, stochastic=False, bidirectional=False )) obj_min_by_N_descent.run(cls.linear_cost_models) cls.linear_obj = obj_min_by_N_descent.train_losses[-1] # For LOGISTIC: cls.logistic_cost_models = build_several_cost_model(LogisticModel, logistic_X, logistic_Y, nb_devices) # Defining parameters for the performances test. cls.logistic_params = Parameters(n_dimensions=2, nb_devices=nb_devices, up_compression_model=SQuantization(1, 3), step_formula=deacreasing_step_size, nb_epoch=nb_epoch, use_averaging=False, cost_models=cls.logistic_cost_models, stochastic=True) obj_min_by_N_descent = SGD_Descent(Parameters(n_dimensions=2, nb_devices=nb_devices, nb_epoch=200, momentum=0., verbose=True, cost_models=cls.logistic_cost_models, stochastic=False, bidirectional=False )) obj_min_by_N_descent.run(cls.logistic_cost_models) cls.logistic_obj = obj_min_by_N_descent.train_losses[-1]
def define(self, cost_models, n_dimensions: int, nb_devices: int, up_compression_model: CompressionModel, down_compression_model: CompressionModel, step_formula=None, nb_epoch: int = NB_EPOCH, fraction_sampled_workers: int = 1., use_averaging=False, stochastic=True, streaming=False, batch_size=1) -> Parameters: return Parameters(n_dimensions=n_dimensions, nb_devices=nb_devices, nb_epoch=nb_epoch, fraction_sampled_workers=fraction_sampled_workers, step_formula=step_formula, up_compression_model=SQuantization(0), down_compression_model=SQuantization(0), stochastic=stochastic, streaming=streaming, batch_size=batch_size, cost_models=cost_models, use_averaging=use_averaging, use_up_memory=False)
def compute_number_of_bits(type_params: Parameters, nb_epoch: int, compress_model: bool): """Computing the theoretical number of bits used by an algorithm (with Elias encoding).""" # Initialization, the first element needs to be removed at the end. number_of_bits = np.array([0 for i in range(nb_epoch)]) if isinstance(type_params, DLParameters): model = type_params.model() for p in model.parameters(): d = p.numel() nb_bits = compute_number_of_bits_by_layer(type_params, d, nb_epoch, compress_model) number_of_bits = number_of_bits + nb_bits return number_of_bits else: d = type_params.n_dimensions return compute_number_of_bits_by_layer(type_params, d, nb_epoch, compress_model)
def define(self, n_dimensions: int, nb_devices: int, quantization_param: int = 0, step_formula=None, momentum: float = 0, nb_epoch: int = NB_EPOCH, use_averaging=False, model: ACostModel = RMSEModel(), stochastic=True): return Parameters(n_dimensions=n_dimensions, nb_devices=nb_devices, nb_epoch=nb_epoch, step_formula=step_formula, quantization_param=0, momentum=momentum, verbose=False, stochastic=stochastic, bidirectional=False, cost_model=model, use_averaging=use_averaging )
def define(self, n_dimensions: int, nb_devices: int, quantization_param: int, step_formula=None, momentum: float = 0, nb_epoch: int = NB_EPOCH, use_averaging=False, model: ACostModel = RMSEModel(), stochastic=True): return Parameters(n_dimensions=n_dimensions, nb_devices=nb_devices, nb_epoch=nb_epoch, step_formula=step_formula, quantization_param=1, learning_rate=0, momentum=momentum, verbose=False, stochastic=stochastic, cost_model=model, use_averaging=use_averaging, bidirectional=True, double_use_memory=False, compress_gradients=True )
def cast_to_DL(parameters: Parameters, dataset: str, model, optimal_step_size: int, weight_decay: int) -> DLParameters: parameters.__class__ = DLParameters parameters.initialize_DL_params(dataset, model, optimal_step_size, weight_decay) return parameters
### Following takes around 5 minutes. ### # 1) Generating data. w_true = generate_param(dim_notebook) X, Y = build_data_linear(w_true, n_dimensions=dim_notebook, n_devices=nb_devices, with_seed=False, without_noise=False) # 2) Creating cost models which will be used to computed cost/loss, gradients, L ... cost_models = build_several_cost_model(RMSEModel, X, Y, nb_devices) # 3) Computing objective function. obj_min_descent = SGD_Descent(Parameters(n_dimensions=dim_notebook, nb_devices=nb_devices_for_the_run, nb_epoch=600, momentum=0., verbose=True, cost_models=cost_models, stochastic=False, bidirectional=False )) obj_min_descent.run(cost_models) obj_min = obj_min_descent.train_losses[-1] # 4) Running descent for two algorithms: Diana and Artemis all_descent = {} myX = X[:nb_devices_for_the_run] myY = Y[:nb_devices_for_the_run] X_number_of_bits = [] for type_params in tqdm([Diana(), VanillaSGD()]): multiple_sg_descent = multiple_run_descent(type_params, cost_models=cost_models, nb_epoch=10, compression_model=SQuantization(1, dim_notebook)) all_descent[type_params.name()] = multiple_sg_descent
def setUpClass(cls): """ get_some_resource() is slow, to avoid calling it for each test use setUpClass() and store the result as class variable """ super(PerformancesTest, cls).setUpClass() ### RMSE ### # Defining parameters for the performances test. cls.linear_params = Parameters(n_dimensions=dim_test + 1, nb_devices=nb_devices, quantization_param=1, step_formula=None, nb_epoch=nb_epoch, use_averaging=False, cost_model=RMSEModel(), stochastic=True) obj_min_by_N_descent = FL_VanillaSGD( Parameters( n_dimensions=dim_test + 1, nb_devices=nb_devices, nb_epoch=200, momentum=0., quantization_param=0, verbose=True, cost_model=RMSEModel(), stochastic=False, bidirectional=False, )) obj_min_by_N_descent.set_data(linear_X[:nb_devices], linear_Y[:nb_devices]) obj_min_by_N_descent.run() cls.linear_obj = obj_min_by_N_descent.losses[-1] # For LOGISTIC: # Defining parameters for the performances test. cls.logistic_params = Parameters(n_dimensions=2, nb_devices=nb_devices, quantization_param=1, step_formula=None, nb_epoch=nb_epoch, use_averaging=False, cost_model=LogisticModel(), stochastic=True) obj_min_by_N_descent = FL_VanillaSGD( Parameters( n_dimensions=2, nb_devices=nb_devices, nb_epoch=200, momentum=0., quantization_param=0, verbose=True, cost_model=LogisticModel(), stochastic=False, bidirectional=False, )) obj_min_by_N_descent.set_data(logistic_X[:nb_devices], logistic_Y[:nb_devices]) obj_min_by_N_descent.run() cls.logistic_obj = obj_min_by_N_descent.losses[-1]
X, Y = build_data_linear(w_true, n_dimensions=dim_notebook - 1, n_devices=nb_devices, with_seed=False, without_noise=False) X = add_bias_term(X) # Add a column of ones. # 2) Creating cost models which will be used to computed cost/loss, gradients, L ... cost_models = build_several_cost_model(RMSEModel, X, Y, nb_devices) # 3) Computing objective function. obj_min_descent = SGD_Descent( Parameters(n_dimensions=dim_notebook, nb_devices=nb_devices, nb_epoch=4000, momentum=0., verbose=True, cost_models=cost_models, stochastic=False), None) obj_min_descent.run(cost_models) obj_min = obj_min_descent.train_losses[-1] # 4) Defining settings of the run. compression = SQuantization(level=1, dim=dim_notebook, norm=2) step_size = deacreasing_step_size # 4) Running descent for two algorithms: Vanilla SGD, Diana and Artemis. all_descent = {} for type_params in tqdm([VanillaSGD(), Diana(), Artemis()]): multiple_sg_descent = multiple_run_descent( type_params,