示例#1
0
def run_ps(trainers):
    utils.timed_log("Start training")
    ps_rref = rpc.RRef(bups.BatchUpdateParameterServer())
    futs = []
    for trainer in trainers:
        futs.append(rpc.rpc_async(trainer, run_trainer, args=(ps_rref, )))

    torch.futures.wait_all(futs)
    utils.timed_log("Finish training")
def core(config):

    for attr in initialize_dag(config):

        attr = dict(config, **attr)

        attr = get_raw_table(attr)

        if ((attr.get('raw_table') is None) or # skip if table was not selected in config file
            (attr.get('pass') == True)):       # skip if pass == True in dependency graph

            with timed_log(name=attr['name'], config=config, time_chunk='seconds', force='skiped'):
                continue

        with timed_log(name=attr['name'], config=config, time_chunk='seconds', force=attr['force']):
            run_process(attr)
示例#3
0
    def update_and_fetch_model(ps_rref, grads, rewards):
        self = ps_rref.local_value()

        utils.timed_log(
            f"PS got {self.curr_update_size}/{self.batch_update_size} updates")

        for p, g in zip(self.model.parameters(), grads):
            p.grad += g

        for reward in rewards:
            self.current_rewards.append(reward)
            self.curr_update_size += 1

        with self.lock:

            fut = self.future_model

            if self.curr_update_size >= self.batch_update_size:
                for p in self.model.parameters():
                    p.grad /= self.batch_update_size

                self.curr_update_size = 0
                self.optimizer.step()
                self.optimizer.zero_grad()
                fut.set_result(self.model)
                utils.timed_log(
                    f"rewards length is {len(self.current_rewards)}")
                utils.timed_log(
                    f"average reward is {np.mean(self.current_rewards)}")
                utils.timed_log("PS updated model")
                self.future_model = torch.futures.Future()

                #save the model
                cwd = os.getcwd()
                parameter_file = C.trained_model_name
                cwd = os.path.join(cwd, parameter_file)
                torch.save(self.model.state_dict(), cwd)

                #record optimization history
                self.optimization_history.append(np.mean(self.current_rewards))
                optimization_history = {}
                optimization_history['history'] = self.optimization_history
                #store the history
                cwd = os.getcwd()
                #cwd = os.path.join(cwd, 'data_folder')
                parameter_file = 'optimization_history.json'
                cwd = os.path.join(cwd, parameter_file)
                with open(cwd, 'w') as statusFile:
                    statusFile.write(jsonpickle.encode(optimization_history))

                self.current_rewards = []

        return fut
示例#4
0
    def single(self, config_path='configs/config-template.yaml'):
        """Run the pipeline for a single region
        
        Parameters
        ----------
        config_path : str, optional
            config.yaml path, by default 'configs/config.yaml'
        """

        config = get_config(config_path)
        config.update(self.args)

        if self.verbose: 
            print(config)

        with timed_log(name='Start process', config=config, time_chunk='minutes', 
                        force=config['force']):
                pass
                
        with timed_log(name='Full process', config=config, time_chunk='minutes', 
                        force=config['force']):
            core(config)
示例#5
0
    def single(self, config_path="configs/config-template.yaml"):
        """Run the pipeline for a single region
        
        Parameters
        ----------
        config_path : str, optional
            config.yaml path, by default 'configs/config.yaml'
        """

        config = get_config(config_path)
        config.update(self.args)

        if self.verbose:
            print(config)

        with timed_log(
                name="Start process",
                config=config,
                time_chunk="minutes",
                force=config["force"],
        ):
            pass

        with timed_log(
                name="Full process",
                config=config,
                time_chunk="minutes",
                force=config["force"],
        ):

            if config["slug"] == "prod":
                try:
                    core(config)
                except:
                    if config["post_log"]:
                        logger.post(traceback.format_exc())
                    print(traceback.format_exc())
            else:
                core(config)
示例#6
0
def core(config):

    for attr in initialize_dag(config):

        attr = dict(config, **attr)

        attr = get_raw_table(attr)

        if (attr.get("raw_table") is
                None) or (  # skip if table was not selected in config file
                    attr.get("pass")
                    == True):  # skip if pass == True in dependency graph

            with timed_log(name=attr["name"],
                           config=config,
                           time_chunk="seconds",
                           force="skiped"):
                continue

        with timed_log(name=attr["name"],
                       config=config,
                       time_chunk="seconds",
                       force=attr["force"]):
            run_process(attr)
示例#7
0
    def update_and_fetch_model(ps_rref, grads, rewards):
        self = ps_rref.local_value()

        utils.timed_log(
            f"PS got {self.curr_update_size}/{self.batch_update_size} updates")

        for p, g in zip(self.model.parameters(), grads):
            p.grad += g

        for reward in rewards:
            self.current_rewards.append(reward)
            self.curr_update_size += 1

        with self.lock:

            fut = self.future_model

            if self.curr_update_size >= self.batch_update_size:
                for p in self.model.parameters():
                    p.grad /= self.batch_update_size

                self.curr_update_size = 0
                self.optimizer.step()
                self.optimizer.zero_grad()
                fut.set_result(self.model)
                utils.timed_log(
                    f"rewards length is {len(self.current_rewards)}")
                utils.timed_log(
                    f"average reward is {np.mean(self.current_rewards)}")
                utils.timed_log("PS updated model")
                self.future_model = torch.futures.Future()
                self.current_rewards = []

                #save the model
                cwd = os.getcwd()
                parameter_file = 'cartpole_rpc_trained_model.pt'
                cwd = os.path.join(cwd, parameter_file)
                torch.save(self.model.state_dict(), cwd)

        return fut