def FreezePlan(plan_config): """Dump the plan to YAML file.""" plan = Plan() plan.config = Plan.Parse(Path(plan_config), resolve=False).config init_state_path = plan.config['aggregator']['settings']['init_state_path'] if not Path(init_state_path).exists(): logger.info("Plan has not been initialized! Run 'fx plan" " initialize' before proceeding") return Plan.Dump(Path(plan_config), plan.config, freeze=True)
def prepare_workspace_distribution(self, model_provider, task_keeper, data_loader, rounds_to_train, delta_updates=False, opt_treatment='RESET'): """Prepare an archive from a user workspace.""" self._prepare_plan(model_provider, task_keeper, data_loader, rounds_to_train, delta_updates=delta_updates, opt_treatment=opt_treatment, model_interface_file='model_obj.pkl', tasks_interface_file='tasks_obj.pkl', dataloader_interface_file='loader_obj.pkl') # Save serialized python objects to disc self._serialize_interface_objects(model_provider, task_keeper, data_loader) # Save the prepared plan Plan.Dump(Path(f'./plan/{self.plan.name}'), self.plan.config, freeze=False) # PACK the WORKSPACE! # Prepare requirements file to restore python env self._export_python_env() # Compress te workspace to restore it on collaborator self._pack_the_workspace()
def start_(context, plan, authorized_cols, secure): """Start the aggregator service.""" plan = Plan.Parse(plan_config_path=Path(plan), cols_config_path=Path(authorized_cols)) logger.info('🧿 Starting the Aggregator Service.') plan.get_server().serve()
def start_(context, plan, collaborator_name, data_config, secure): """Start a collaborator service.""" plan = Plan.Parse(plan_config_path=Path(plan), data_config_path=Path(data_config)) # TODO: Need to restructure data loader config file loader echo(f'Data = {plan.cols_data_paths}') logger.info('🧿 Starting a Collaborator Service.') plan.get_collaborator(collaborator_name).run()
def setup_plan(save=True): """ Dump the plan with all defaults + overrides set. Args: save : bool (default=True) Whether to save the plan to disk Returns: plan : Plan object """ plan_config = 'plan/plan.yaml' cols_config = 'plan/cols.yaml' data_config = 'plan/data.yaml' plan = Plan.Parse(plan_config_path=Path(plan_config), cols_config_path=Path(cols_config), data_config_path=Path(data_config)) if save: Plan.Dump(Path(plan_config), plan.config) return plan
def _serialize_interface_objects(self, model_provider, task_keeper, data_loader): """Save python objects to be restored on collaborators.""" serializer = self.plan.Build( self.plan.config['api_layer']['required_plugin_components'] ['serializer_plugin'], {}) framework_adapter = Plan.Build(model_provider.framework_plugin, {}) # Model provider serialization may need preprocessing steps framework_adapter.serialization_setup() serializer.serialize( model_provider, self.plan.config['api_layer']['settings']['model_interface_file']) for object_, filename in zip( [task_keeper, data_loader], ['tasks_interface_file', 'dataloader_interface_file']): serializer.serialize( object_, self.plan.config['api_layer']['settings'][filename])
def update_plan(override_config): """ Update the plan with the provided override and save it to disk. For a list of available override options, call `fx.get_plan()` Args: override_config : dict {"COMPONENT.settings.variable" : value} Returns: None """ plan_path = 'plan/plan.yaml' flat_plan_config = get_plan(return_complete=True) for k, v in override_config.items(): if k in flat_plan_config: flat_plan_config[k] = v logger.info(f'Updating {k} to {v}... ') else: logger.info(f'Key {k} not found in plan. Ignoring... ') plan_config = unflatten(flat_plan_config, '.') Plan.Dump(Path(plan_path), plan_config)
def setup_plan(log_level='CRITICAL'): """ Dump the plan with all defaults + overrides set. Args: save : bool (default=True) Whether to save the plan to disk Returns: plan : Plan object """ plan_config = 'plan/plan.yaml' cols_config = 'plan/cols.yaml' data_config = 'plan/data.yaml' getLogger().setLevel(log_level) plan = Plan.Parse(plan_config_path=Path(plan_config), cols_config_path=Path(cols_config), data_config_path=Path(data_config), resolve=False) getLogger().setLevel('INFO') return plan
def run_experiment(collaborator_dict, override_config={}): """ Core function that executes the FL Plan. Args: collaborator_dict : dict {collaborator_name(str): FederatedModel} This dictionary defines which collaborators will participate in the experiment, as well as a reference to that collaborator's federated model. override_config : dict {flplan.key : flplan.value} Override any of the plan parameters at runtime using this dictionary. To get a list of the available options, execute `fx.get_plan()` Returns: final_federated_model : FederatedModel The final model resulting from the federated learning experiment """ from sys import path file = Path(__file__).resolve() root = file.parent.resolve() # interface root, containing command modules work = Path.cwd().resolve() path.append(str(root)) path.insert(0, str(work)) # Update the plan if necessary if len(override_config) > 0: update_plan(override_config) # TODO: Fix this implementation. The full plan parsing is reused here, # but the model and data will be overwritten based on user specifications plan_config = 'plan/plan.yaml' cols_config = 'plan/cols.yaml' data_config = 'plan/data.yaml' plan = Plan.Parse(plan_config_path=Path(plan_config), cols_config_path=Path(cols_config), data_config_path=Path(data_config)) # Overwrite plan values plan.authorized_cols = list(collaborator_dict) tensor_pipe = plan.get_tensor_pipe() # This must be set to the final index of the list (this is the last # tensorflow session to get created) plan.runner_ = list(collaborator_dict.values())[-1] model = plan.runner_ # Initialize model weights init_state_path = plan.config['aggregator']['settings']['init_state_path'] rounds_to_train = plan.config['aggregator']['settings']['rounds_to_train'] tensor_dict, holdout_params = split_tensor_dict_for_holdouts( logger, plan.runner_.get_tensor_dict(False)) model_snap = utils.construct_model_proto(tensor_dict=tensor_dict, round_number=0, tensor_pipe=tensor_pipe) logger.info(f'Creating Initial Weights File 🠆 {init_state_path}') utils.dump_proto(model_proto=model_snap, fpath=init_state_path) logger.info('Starting Experiment...') aggregator = plan.get_aggregator() model_states = { collaborator: None for collaborator in collaborator_dict.keys() } # Create the collaborators collaborators = { collaborator: create_collaborator(plan, collaborator, model, aggregator) for collaborator in plan.authorized_cols } for round_num in range(rounds_to_train): for col in plan.authorized_cols: collaborator = collaborators[col] model.set_data_loader(collaborator_dict[col].data_loader) if round_num != 0: model.rebuild_model(round_num, model_states[col]) collaborator.run_simulation() model_states[col] = model.get_tensor_dict(with_opt_vars=True) # Set the weights for the final model model.rebuild_model(rounds_to_train - 1, aggregator.last_tensor_dict, validation=True) return model
def fit(self): """Run the estimator.""" import fastestimator as fe from fastestimator.trace.io.best_model_saver import BestModelSaver from sys import path file = Path(__file__).resolve() # interface root, containing command modules root = file.parent.resolve() work = Path.cwd().resolve() path.append(str(root)) path.insert(0, str(work)) # TODO: Fix this implementation. The full plan parsing is reused here, # but the model and data will be overwritten based on # user specifications plan_config = (Path(fx.WORKSPACE_PREFIX) / 'plan' / 'plan.yaml') cols_config = (Path(fx.WORKSPACE_PREFIX) / 'plan' / 'cols.yaml') data_config = (Path(fx.WORKSPACE_PREFIX) / 'plan' / 'data.yaml') plan = Plan.Parse(plan_config_path=plan_config, cols_config_path=cols_config, data_config_path=data_config) self.rounds = plan.config['aggregator']['settings']['rounds_to_train'] data_loader = FastEstimatorDataLoader(self.estimator.pipeline) runner = FastEstimatorTaskRunner(self.estimator, data_loader=data_loader) # Overwrite plan values tensor_pipe = plan.get_tensor_pipe() # Initialize model weights init_state_path = plan.config['aggregator']['settings'][ 'init_state_path'] tensor_dict, holdout_params = split_tensor_dict_for_holdouts( self.logger, runner.get_tensor_dict(False)) model_snap = utils.construct_model_proto(tensor_dict=tensor_dict, round_number=0, tensor_pipe=tensor_pipe) self.logger.info(f'Creating Initial Weights File' f' 🠆 {init_state_path}') utils.dump_proto(model_proto=model_snap, fpath=init_state_path) self.logger.info('Starting Experiment...') aggregator = plan.get_aggregator() model_states = { collaborator: None for collaborator in plan.authorized_cols } runners = {} save_dir = {} data_path = 1 for col in plan.authorized_cols: data = self.estimator.pipeline.data train_data, eval_data, test_data = split_data( data['train'], data['eval'], data['test'], data_path, len(plan.authorized_cols)) pipeline_kwargs = {} for k, v in self.estimator.pipeline.__dict__.items(): if k in [ 'batch_size', 'ops', 'num_process', 'drop_last', 'pad_value', 'collate_fn' ]: pipeline_kwargs[k] = v pipeline_kwargs.update({ 'train_data': train_data, 'eval_data': eval_data, 'test_data': test_data }) pipeline = fe.Pipeline(**pipeline_kwargs) data_loader = FastEstimatorDataLoader(pipeline) self.estimator.system.pipeline = pipeline runners[col] = FastEstimatorTaskRunner(estimator=self.estimator, data_loader=data_loader) runners[col].set_optimizer_treatment('CONTINUE_LOCAL') for trace in runners[col].estimator.system.traces: if isinstance(trace, BestModelSaver): save_dir_path = f'{trace.save_dir}/{col}' os.makedirs(save_dir_path, exist_ok=True) save_dir[col] = save_dir_path data_path += 1 # Create the collaborators collaborators = { collaborator: fx.create_collaborator(plan, collaborator, runners[collaborator], aggregator) for collaborator in plan.authorized_cols } model = None for round_num in range(self.rounds): for col in plan.authorized_cols: collaborator = collaborators[col] if round_num != 0: # For FastEstimator Jupyter notebook, models must be # saved in different directories (i.e. path must be # reset here) runners[col].estimator.system.load_state( f'save/{col}_state') runners[col].rebuild_model(round_num, model_states[col]) # Reset the save directory if BestModelSaver is present # in traces for trace in runners[col].estimator.system.traces: if isinstance(trace, BestModelSaver): trace.save_dir = save_dir[col] collaborator.run_simulation() model_states[col] = runners[col].get_tensor_dict( with_opt_vars=True) model = runners[col].model runners[col].estimator.system.save_state(f'save/{col}_state') # TODO This will return the model from the last collaborator, # NOT the final aggregated model (though they should be similar). # There should be a method added to the aggregator that will load # the best model from disk and return it return model
def _prepare_plan(self, model_provider, task_keeper, data_loader, rounds_to_train, delta_updates=False, opt_treatment='RESET', model_interface_file='model_obj.pkl', tasks_interface_file='tasks_obj.pkl', dataloader_interface_file='loader_obj.pkl'): """Fill plan.yaml file using provided setting.""" # Create a folder to store plans os.makedirs('./plan', exist_ok=True) os.makedirs('./save', exist_ok=True) # Load the default plan base_plan_path = WORKSPACE / 'workspace/plan/plans/default/base_plan_interactive_api.yaml' plan = Plan.Parse(base_plan_path, resolve=False) # Change plan name to default one plan.name = 'plan.yaml' plan.authorized_cols = list(self.federation.col_data_paths.keys()) # Network part of the plan plan.config['network']['settings']['agg_addr'] = self.federation.fqdn plan.config['network']['settings'][ 'disable_tls'] = self.federation.disable_tls # Aggregator part of the plan plan.config['aggregator']['settings'][ 'rounds_to_train'] = rounds_to_train # Collaborator part plan.config['collaborator']['settings'][ 'delta_updates'] = delta_updates plan.config['collaborator']['settings'][ 'opt_treatment'] = opt_treatment # DataLoader part for setting, value in data_loader.kwargs.items(): plan.config['data_loader']['settings'][setting] = value # Tasks part for name in task_keeper.task_registry: if task_keeper.task_contract[name]['optimizer'] is not None: # This is training task plan.config['tasks'][name] = { 'function': name, 'kwargs': task_keeper.task_settings[name] } else: # This is a validation type task (not altering the model state) for name_prefix, apply_kwarg in zip( ['localy_tuned_model_', 'aggregated_model_'], ['local', 'global']): # We add two entries for this task: for local and global models task_kwargs = deepcopy(task_keeper.task_settings[name]) task_kwargs.update({'apply': apply_kwarg}) plan.config['tasks'][name_prefix + name] = { 'function': name, 'kwargs': task_kwargs } # TaskRunner framework plugin # ['required_plugin_components'] should be already in the default plan with all the fields # filled with the default values plan.config['task_runner']['required_plugin_components'] = dict() plan.config['task_runner']['required_plugin_components']['framework_adapters'] = \ model_provider.framework_plugin # API layer plan.config['api_layer'] = dict() plan.config['api_layer']['required_plugin_components'] = dict() plan.config['api_layer']['settings'] = dict() plan.config['api_layer']['required_plugin_components']['serializer_plugin'] = \ self.serializer_plugin plan.config['api_layer']['settings'] = { 'model_interface_file': model_interface_file, 'tasks_interface_file': tasks_interface_file, 'dataloader_interface_file': dataloader_interface_file, } plan.config['assigner']['settings']['task_groups'][0]['tasks'] = \ [entry for entry in plan.config['tasks'] if (type(plan.config['tasks'][entry]) is dict) and ('function' in plan.config['tasks'][entry])] self.plan = deepcopy(plan)
def initialize(context, plan_config, cols_config, data_config, aggregator_address, feature_shape): """ Initialize Data Science plan. Create a protocol buffer file of the initial model weights for the federation. """ plan = Plan.Parse(plan_config_path=Path(plan_config), cols_config_path=Path(cols_config), data_config_path=Path(data_config)) init_state_path = plan.config['aggregator']['settings']['init_state_path'] # TODO: Is this part really needed? Why would we need to collaborator # name to know the input shape to the model? # if feature_shape is None: # if cols_config is None: # exit('You must specify either a feature # shape or authorized collaborator # list in order for the script to determine the input layer shape') print(plan.cols_data_paths) collaborator_cname = list(plan.cols_data_paths)[0] # else: # logger.info(f'Using data object of type {type(data)} # and feature shape {feature_shape}') # raise NotImplementedError() # data_loader = plan.get_data_loader(collaborator_cname) # task_runner = plan.get_task_runner(collaborator_cname) data_loader = plan.get_data_loader(collaborator_cname) task_runner = plan.get_task_runner(data_loader) tensor_pipe = plan.get_tensor_pipe() # I believe there is no need for this line as task_runner has this variable # initialized with empty dict tensor_dict_split_fn_kwargs = # task_runner.tensor_dict_split_fn_kwargs or {} tensor_dict, holdout_params = split_tensor_dict_for_holdouts( logger, task_runner.get_tensor_dict(False), **task_runner.tensor_dict_split_fn_kwargs) logger.warn(f'Following parameters omitted from global initial model, ' f'local initialization will determine' f' values: {list(holdout_params.keys())}') model_snap = utils.construct_model_proto(tensor_dict=tensor_dict, round_number=0, tensor_pipe=tensor_pipe) logger.info(f'Creating Initial Weights File 🠆 {init_state_path}') utils.dump_proto(model_proto=model_snap, fpath=init_state_path) plan_origin = Plan.Parse(Path(plan_config), resolve=False).config if (plan_origin['network']['settings']['agg_addr'] == 'auto' or aggregator_address): plan_origin['network']['settings'] = plan_origin['network'].get( 'settings', {}) plan_origin['network']['settings']['agg_addr'] =\ aggregator_address or getfqdn() logger.warn(f"Patching Aggregator Addr in Plan" f" 🠆 {plan_origin['network']['settings']['agg_addr']}") Plan.Dump(Path(plan_config), plan_origin) plan.config = plan_origin # Record that plan with this hash has been initialized if 'plans' not in context.obj: context.obj['plans'] = [] context.obj['plans'].append(f"{Path(plan_config).stem}_{plan.hash[:8]}") logger.info(f"{context.obj['plans']}")