def main(args): oracle = Oracle(args) samples_dict = oracle.initializeDataset(save=False, returnData=True) scores = samples_dict["scores"] samples_mat = samples_dict["samples"] seq_letters = oracle.numbers2letters(samples_mat) seq_ints = [ "".join([str(el) for el in seq if el > 0]) for seq in samples_mat ] if isinstance(scores, dict): scores.update({"letters": seq_letters, "indices": seq_ints}) df = pd.DataFrame(scores) else: df = pd.DataFrame({ "letters": seq_letters, "indices": seq_ints, "scores": scores }) if args.output: output_yml = Path(args.output).with_suffix(".yml") with open(output_yml, "w") as f: yaml.dump(numpy2python(namespace2dict(args)), f, default_flow_style=False) if args.no_indices: df.drop(columns="indices", inplace=True) df.to_csv(args.output)
def updateModelState(self, model_state, model): """ update the model state and store it for later sampling :param model_state: :return: """ model_state_dict = model_state previous_model_state = self.model_state # things to put into the model state # test loss and standard deviation between models self.model_state = torch.stack( ( torch.tensor(model_state_dict["test loss"]), torch.tensor(model_state_dict["test std"]), ) ) # sample energies self.model_state = torch.cat( (self.model_state, torch.tensor(model_state_dict["best cluster energies"])) ) # sample uncertainties self.model_state = torch.cat( (self.model_state, torch.Tensor(model_state_dict["best cluster deviations"])) ) # internal dist, dataset dist, random set dist self.model_state = torch.cat( (self.model_state, torch.tensor(model_state_dict["best clusters internal diff"])) ) self.model_state = torch.cat( (self.model_state, torch.tensor(model_state_dict["best clusters dataset diff"])) ) self.model_state = torch.cat( (self.model_state, torch.tensor(model_state_dict["best clusters random set diff"])) ) # n proxy models, # clustering cutoff, # progress fraction singletons = torch.stack( ( torch.tensor(model_state_dict["n proxy models"]), torch.tensor(model_state_dict["clustering cutoff"]), torch.tensor(model_state_dict["iter"] / model_state_dict["budget"]), ) ) self.model_state = torch.cat((self.model_state, singletons)) self.model_state = self.model_state.to(self.device) self.proxyModel = model # this should already be on correct device - passed directly from the main program # get data to compute distances # model state samples self.modelStateSamples = model_state_dict["best cluster samples"] # training dataset self.trainingSamples = np.load('datasets/' + self.config.dataset.oracle + '.npy', allow_pickle=True).item() self.trainingSamples = self.trainingSamples['samples'] # large random sample numSamples = min(int(1e4), self.config.dataset.dict_size ** self.config.dataset.max_length // 100) # either 1e4, or 1% of the sample space, whichever is smaller dataoracle = Oracle(self.config) self.randomSamples = dataoracle.initializeDataset(save=False, returnData=True, customSize=numSamples) # get large random dataset self.randomSamples = self.randomSamples['samples'] return previous_model_state, self.model_state