class Env(object): def __init__(self, name, seed, cfg, data_path=None, save=False): self.name = name self.seed = seed self.dir_name = 'pretrained/dim-{}'.format(args.latent_dim) print('Save file director is {}'.format(self.dir_name)) self.visited = {} self.features = [] self.embedding = {} self._reset(data_path, save) def _reset(self, data_path, save): if not save: print("extract arch2vec embedding table...") dataset = load_json(data_path) self.model = Model(input_dim=args.input_dim, hidden_dim=args.hidden_dim, latent_dim=args.latent_dim, num_hops=args.hops, num_mlp_layers=args.mlps, dropout=args.dropout, **cfg['GAE']).cuda() model_ckpt_path = os.path.join(self.dir_name, '{}'.format(args.model_path)) if not os.path.exists(model_ckpt_path): print("File {} does not exist.".format(model_ckpt_path)) exit() self.model.load_state_dict( torch.load(model_ckpt_path)['model_state']) self.model.eval() print("length of the dataset: {}".format(len(dataset))) self.f_path = os.path.join( self.dir_name, '{}-arch2vec.pt'.format(args.dataset_name)) if os.path.exists(self.f_path): print('ATTENTION!!! {} is already saved.'.format(self.f_path)) exit() print('save to {} ...'.format(self.f_path)) for ind in range(len(dataset)): adj = torch.Tensor( dataset[str(ind)]['module_adjacency']).unsqueeze(0).cuda() ops = torch.Tensor(dataset[str(ind)] ['module_operations']).unsqueeze(0).cuda() adj, ops, prep_reverse = preprocessing(adj, ops, **cfg['prep']) test_acc = dataset[str(ind)]['test_accuracy'] valid_acc = dataset[str(ind)]['validation_accuracy'] other_info = { 'valid_accuracy_avg': dataset[str(ind)]['validation_accuracy_avg'], 'test_accuracy_avg': dataset[str(ind)]['test_accuracy_avg'] } time = dataset[str(ind)]['training_time'] x, _ = self.model._encoder(ops, adj) self.embedding[ind] = { 'feature': x.mean(dim=1).squeeze(0).cpu(), 'valid_accuracy': float(valid_acc), 'test_accuracy': float(test_acc), 'time': float(time), 'other_info': other_info } torch.save(self.embedding, self.f_path) print("finished arch2vec extraction") exit() else: self.f_path = os.path.join( self.dir_name, '{}-arch2vec.pt'.format(args.dataset_name)) print("load pretrained arch2vec in path: {}".format(self.f_path)) self.embedding = torch.load(self.f_path) random.seed(args.seed) random.shuffle(self.embedding) for ind in range(len(self.embedding)): self.features.append(self.embedding[ind]['feature']) self.features = torch.stack(self.features, dim=0) print('loading finished. pretrained embeddings shape: {}'.format( self.features.shape)) def get_init_state(self): """ :return: 1 x dim """ random.seed(args.seed) rand_indices = random.randint(0, self.features.shape[0]) self.visited[rand_indices] = True return self.features[rand_indices], self.embedding[rand_indices][ 'valid_accuracy'] / 100.0, self.embedding[rand_indices][ 'test_accuracy'] / 100.0, self.embedding[rand_indices][ 'time'], self.embedding[rand_indices]['other_info'] def step(self, action): """ action: 1 x dim self.features. N x dim """ dist = torch.norm(self.features - action.cpu(), dim=1) knn = (-1 * dist).topk(dist.shape[0]) min_dist, min_idx = knn.values, knn.indices count = 1 while True: if len(self.visited) == dist.shape[0]: print("CANNOT FIND IN THE ENTIRE DATASET !!!") exit() if min_idx[count].item() not in self.visited: self.visited[min_idx[count].item()] = True break count += 1 return self.features[ min_idx[count].item()], self.embedding[min_idx[count].item( )]['valid_accuracy'] / 100.0, self.embedding[min_idx[count].item( )]['test_accuracy'] / 100.0, self.embedding[min_idx[count].item( )]['time'], self.embedding[min_idx[count].item()]['other_info']
class Env(object): def __init__(self, name, seed, emb_path, model_path, cfg, data_path=None, save=False): self.name = name self.model_path = model_path self.emb_path = emb_path self.seed = seed self.dir_name = 'pretrained/dim-{}'.format(args.dim) self.visited = {} self.features = [] self.embedding = {} self._reset(data_path, save) def _reset(self, data_path, save): if not save: print("extract arch2vec from {}".format(os.path.join(self.dir_name, self.model_path))) if not os.path.exists(os.path.join(self.dir_name, self.model_path)): exit() dataset = load_json(data_path) self.model = Model(input_dim=5, hidden_dim=128, latent_dim=16, num_hops=5, num_mlp_layers=2, dropout=0, **cfg['GAE']).cuda() self.model.load_state_dict(torch.load(os.path.join(self.dir_name, self.model_path).format(args.dim))['model_state']) self.model.eval() with torch.no_grad(): print("length of the dataset: {}".format(len(dataset))) self.f_path = os.path.join(self.dir_name, 'arch2vec-{}'.format(self.model_path)) if os.path.exists(self.f_path): print('{} is already saved'.format(self.f_path)) exit() print('save to {}'.format(self.f_path)) for ind in range(len(dataset)): adj = torch.Tensor(dataset[str(ind)]['module_adjacency']).unsqueeze(0).cuda() ops = torch.Tensor(dataset[str(ind)]['module_operations']).unsqueeze(0).cuda() adj, ops, prep_reverse = preprocessing(adj, ops, **cfg['prep']) test_acc = dataset[str(ind)]['test_accuracy'] valid_acc = dataset[str(ind)]['validation_accuracy'] time = dataset[str(ind)]['training_time'] x,_ = self.model._encoder(ops, adj) self.embedding[ind] = {'feature': x.squeeze(0).mean(dim=0).cpu(), 'valid_accuracy': float(valid_acc), 'test_accuracy': float(test_acc), 'time': float(time)} torch.save(self.embedding, self.f_path) print("finish arch2vec extraction") exit() else: self.f_path = os.path.join(self.dir_name, self.emb_path) print("load arch2vec from: {}".format(self.f_path)) self.embedding = torch.load(self.f_path) for ind in range(len(self.embedding)): self.features.append(self.embedding[ind]['feature']) self.features = torch.stack(self.features, dim=0) print('loading finished. pretrained embeddings shape: {}'.format(self.features.shape)) def get_init_state(self): """ :return: 1 x dim """ random.seed(args.seed) rand_indices = random.randint(0, self.features.shape[0]) self.visited[rand_indices] = True return self.features[rand_indices], self.embedding[rand_indices]['valid_accuracy'],\ self.embedding[rand_indices]['test_accuracy'], self.embedding[rand_indices]['time'] def step(self, action): """ action: 1 x dim self.features. N x dim """ dist = torch.norm(self.features - action.cpu(), dim=1) knn = (-1 * dist).topk(dist.shape[0]) min_dist, min_idx = knn.values, knn.indices count = 0 while True: if len(self.visited) == dist.shape[0]: print("cannot find in the dataset") exit() if min_idx[count].item() not in self.visited: self.visited[min_idx[count].item()] = True break count += 1 return self.features[min_idx[count].item()], self.embedding[min_idx[count].item()]['valid_accuracy'], \ self.embedding[min_idx[count].item()]['test_accuracy'], self.embedding[min_idx[count].item()]['time']
class Env(object): def __init__(self, name, seed, cfg, data_path=None, save=False): self.name = name self.seed = seed self.model = Model(input_dim=args.input_dim, hidden_dim=args.hidden_dim, latent_dim=args.dim, num_hops=args.hops, num_mlp_layers=args.mlps, dropout=args.dropout, **cfg['GAE']).cuda() self.dir_name = 'pretrained/dim-{}'.format(args.dim) if not os.path.exists(os.path.join(self.dir_name, 'model-darts.pt')): exit() self.model.load_state_dict( torch.load( os.path.join(self.dir_name, 'model-darts.pt').format( args.dim))['model_state']) self.visited = {} self.features = [] self.genotype = [] self.embedding = {} self._reset(data_path, save) def _reset(self, data_path, save): if not save: print("extract arch2vec on DARTS search space ...") dataset = load_json(data_path) print("length of the dataset: {}".format(len(dataset))) self.f_path = os.path.join(self.dir_name, 'arch2vec-darts.pt') if os.path.exists(self.f_path): print('{} is already saved'.format(self.f_path)) exit() print('save to {}'.format(self.f_path)) counter = 0 self.model.eval() for k, v in dataset.items(): adj = torch.Tensor(v[0]).unsqueeze(0).cuda() ops = torch.Tensor(one_hot_darts(v[1])).unsqueeze(0).cuda() adj, ops, prep_reverse = preprocessing(adj, ops, **cfg['prep']) with torch.no_grad(): x, _ = self.model._encoder(ops, adj) self.embedding[counter] = { 'feature': x.squeeze(0).mean(dim=0).cpu(), 'genotype': process(v[2]) } print("{}/{}".format(counter, len(dataset))) counter += 1 torch.save(self.embedding, self.f_path) print("finished arch2vec extraction") exit() else: self.f_path = os.path.join(self.dir_name, 'arch2vec-darts.pt') print("load arch2vec from: {}".format(self.f_path)) self.embedding = torch.load(self.f_path) for ind in range(len(self.embedding)): self.features.append(self.embedding[ind]['feature']) self.genotype.append(self.embedding[ind]['genotype']) self.features = torch.stack(self.features, dim=0) print('loading finished. pretrained embeddings shape: {}'.format( self.features.shape)) def get_init_state(self): """ :return: 1 x dim """ rand_indices = random.randint(0, self.features.shape[0]) self.visited[rand_indices] = True return self.features[rand_indices], self.genotype[rand_indices] def step(self, action): """ action: 1 x dim self.features. N x dim """ dist = torch.norm(self.features - action.cpu(), dim=1) knn = (-1 * dist).topk(dist.shape[0]) min_dist, min_idx = knn.values, knn.indices count = 0 while True: if len(self.visited) == dist.shape[0]: print("CANNOT FIND IN THE DATASET!") exit() if min_idx[count].item() not in self.visited: self.visited[min_idx[count].item()] = True break count += 1 return self.features[min_idx[count].item()], self.genotype[ min_idx[count].item()]