Пример #1
0
class Env(object):
    def __init__(self, name, seed, cfg, data_path=None, save=False):
        self.name = name
        self.seed = seed
        self.dir_name = 'pretrained/dim-{}'.format(args.latent_dim)
        print('Save file director is {}'.format(self.dir_name))
        self.visited = {}
        self.features = []
        self.embedding = {}
        self._reset(data_path, save)

    def _reset(self, data_path, save):
        if not save:
            print("extract arch2vec embedding table...")
            dataset = load_json(data_path)
            self.model = Model(input_dim=args.input_dim,
                               hidden_dim=args.hidden_dim,
                               latent_dim=args.latent_dim,
                               num_hops=args.hops,
                               num_mlp_layers=args.mlps,
                               dropout=args.dropout,
                               **cfg['GAE']).cuda()
            model_ckpt_path = os.path.join(self.dir_name,
                                           '{}'.format(args.model_path))
            if not os.path.exists(model_ckpt_path):
                print("File {} does not exist.".format(model_ckpt_path))
                exit()
            self.model.load_state_dict(
                torch.load(model_ckpt_path)['model_state'])
            self.model.eval()
            print("length of the dataset: {}".format(len(dataset)))
            self.f_path = os.path.join(
                self.dir_name, '{}-arch2vec.pt'.format(args.dataset_name))
            if os.path.exists(self.f_path):
                print('ATTENTION!!! {} is already saved.'.format(self.f_path))
                exit()
            print('save to {} ...'.format(self.f_path))
            for ind in range(len(dataset)):
                adj = torch.Tensor(
                    dataset[str(ind)]['module_adjacency']).unsqueeze(0).cuda()
                ops = torch.Tensor(dataset[str(ind)]
                                   ['module_operations']).unsqueeze(0).cuda()
                adj, ops, prep_reverse = preprocessing(adj, ops, **cfg['prep'])
                test_acc = dataset[str(ind)]['test_accuracy']
                valid_acc = dataset[str(ind)]['validation_accuracy']
                other_info = {
                    'valid_accuracy_avg':
                    dataset[str(ind)]['validation_accuracy_avg'],
                    'test_accuracy_avg':
                    dataset[str(ind)]['test_accuracy_avg']
                }
                time = dataset[str(ind)]['training_time']
                x, _ = self.model._encoder(ops, adj)
                self.embedding[ind] = {
                    'feature': x.mean(dim=1).squeeze(0).cpu(),
                    'valid_accuracy': float(valid_acc),
                    'test_accuracy': float(test_acc),
                    'time': float(time),
                    'other_info': other_info
                }
            torch.save(self.embedding, self.f_path)
            print("finished arch2vec extraction")
            exit()
        else:
            self.f_path = os.path.join(
                self.dir_name, '{}-arch2vec.pt'.format(args.dataset_name))
            print("load pretrained arch2vec in path: {}".format(self.f_path))
            self.embedding = torch.load(self.f_path)
            random.seed(args.seed)
            random.shuffle(self.embedding)
            for ind in range(len(self.embedding)):
                self.features.append(self.embedding[ind]['feature'])
            self.features = torch.stack(self.features, dim=0)
            print('loading finished. pretrained embeddings shape: {}'.format(
                self.features.shape))

    def get_init_state(self):
        """
        :return: 1 x dim
        """
        random.seed(args.seed)
        rand_indices = random.randint(0, self.features.shape[0])
        self.visited[rand_indices] = True
        return self.features[rand_indices], self.embedding[rand_indices][
            'valid_accuracy'] / 100.0, self.embedding[rand_indices][
                'test_accuracy'] / 100.0, self.embedding[rand_indices][
                    'time'], self.embedding[rand_indices]['other_info']

    def step(self, action):
        """
        action: 1 x dim
        self.features. N x dim
        """
        dist = torch.norm(self.features - action.cpu(), dim=1)
        knn = (-1 * dist).topk(dist.shape[0])
        min_dist, min_idx = knn.values, knn.indices
        count = 1
        while True:
            if len(self.visited) == dist.shape[0]:
                print("CANNOT FIND IN THE ENTIRE DATASET !!!")
                exit()
            if min_idx[count].item() not in self.visited:
                self.visited[min_idx[count].item()] = True
                break
            count += 1

        return self.features[
            min_idx[count].item()], self.embedding[min_idx[count].item(
            )]['valid_accuracy'] / 100.0, self.embedding[min_idx[count].item(
            )]['test_accuracy'] / 100.0, self.embedding[min_idx[count].item(
            )]['time'], self.embedding[min_idx[count].item()]['other_info']
Пример #2
0
class Env(object):
    def __init__(self, name, seed, emb_path, model_path, cfg, data_path=None, save=False):
        self.name = name
        self.model_path = model_path
        self.emb_path = emb_path
        self.seed = seed
        self.dir_name = 'pretrained/dim-{}'.format(args.dim)
        self.visited = {}
        self.features = []
        self.embedding = {}
        self._reset(data_path, save)

    def _reset(self, data_path, save):
        if not save:
            print("extract arch2vec from {}".format(os.path.join(self.dir_name, self.model_path)))
            if not os.path.exists(os.path.join(self.dir_name, self.model_path)):
                exit()
            dataset = load_json(data_path)
            self.model = Model(input_dim=5, hidden_dim=128, latent_dim=16, num_hops=5, num_mlp_layers=2, dropout=0, **cfg['GAE']).cuda()
            self.model.load_state_dict(torch.load(os.path.join(self.dir_name, self.model_path).format(args.dim))['model_state'])
            self.model.eval()
            with torch.no_grad():
                print("length of the dataset: {}".format(len(dataset)))
                self.f_path = os.path.join(self.dir_name, 'arch2vec-{}'.format(self.model_path))
                if os.path.exists(self.f_path):
                    print('{} is already saved'.format(self.f_path))
                    exit()
                print('save to {}'.format(self.f_path))
                for ind in range(len(dataset)):
                    adj = torch.Tensor(dataset[str(ind)]['module_adjacency']).unsqueeze(0).cuda()
                    ops = torch.Tensor(dataset[str(ind)]['module_operations']).unsqueeze(0).cuda()
                    adj, ops, prep_reverse = preprocessing(adj, ops, **cfg['prep'])
                    test_acc = dataset[str(ind)]['test_accuracy']
                    valid_acc = dataset[str(ind)]['validation_accuracy']
                    time = dataset[str(ind)]['training_time']
                    x,_ = self.model._encoder(ops, adj)
                    self.embedding[ind] = {'feature': x.squeeze(0).mean(dim=0).cpu(), 'valid_accuracy': float(valid_acc), 'test_accuracy': float(test_acc), 'time': float(time)}
                torch.save(self.embedding, self.f_path)
                print("finish arch2vec extraction")
                exit()
        else:
            self.f_path = os.path.join(self.dir_name, self.emb_path)
            print("load arch2vec from: {}".format(self.f_path))
            self.embedding = torch.load(self.f_path)
            for ind in range(len(self.embedding)):
                self.features.append(self.embedding[ind]['feature'])
            self.features = torch.stack(self.features, dim=0)
            print('loading finished. pretrained embeddings shape: {}'.format(self.features.shape))

    def get_init_state(self):
        """
        :return: 1 x dim
        """
        random.seed(args.seed)
        rand_indices = random.randint(0, self.features.shape[0])
        self.visited[rand_indices] = True
        return self.features[rand_indices], self.embedding[rand_indices]['valid_accuracy'],\
               self.embedding[rand_indices]['test_accuracy'], self.embedding[rand_indices]['time']

    def step(self, action):
        """
        action: 1 x dim
        self.features. N x dim
        """
        dist = torch.norm(self.features - action.cpu(), dim=1)
        knn = (-1 * dist).topk(dist.shape[0])
        min_dist, min_idx = knn.values, knn.indices
        count = 0
        while True:
            if len(self.visited) == dist.shape[0]:
                print("cannot find in the dataset")
                exit()
            if min_idx[count].item() not in self.visited:
                self.visited[min_idx[count].item()] = True
                break
            count += 1

        return self.features[min_idx[count].item()], self.embedding[min_idx[count].item()]['valid_accuracy'], \
               self.embedding[min_idx[count].item()]['test_accuracy'], self.embedding[min_idx[count].item()]['time']
Пример #3
0
class Env(object):
    def __init__(self, name, seed, cfg, data_path=None, save=False):
        self.name = name
        self.seed = seed
        self.model = Model(input_dim=args.input_dim,
                           hidden_dim=args.hidden_dim,
                           latent_dim=args.dim,
                           num_hops=args.hops,
                           num_mlp_layers=args.mlps,
                           dropout=args.dropout,
                           **cfg['GAE']).cuda()
        self.dir_name = 'pretrained/dim-{}'.format(args.dim)
        if not os.path.exists(os.path.join(self.dir_name, 'model-darts.pt')):
            exit()
        self.model.load_state_dict(
            torch.load(
                os.path.join(self.dir_name, 'model-darts.pt').format(
                    args.dim))['model_state'])
        self.visited = {}
        self.features = []
        self.genotype = []
        self.embedding = {}
        self._reset(data_path, save)

    def _reset(self, data_path, save):
        if not save:
            print("extract arch2vec on DARTS search space ...")
            dataset = load_json(data_path)
            print("length of the dataset: {}".format(len(dataset)))
            self.f_path = os.path.join(self.dir_name, 'arch2vec-darts.pt')
            if os.path.exists(self.f_path):
                print('{} is already saved'.format(self.f_path))
                exit()
            print('save to {}'.format(self.f_path))
            counter = 0
            self.model.eval()
            for k, v in dataset.items():
                adj = torch.Tensor(v[0]).unsqueeze(0).cuda()
                ops = torch.Tensor(one_hot_darts(v[1])).unsqueeze(0).cuda()
                adj, ops, prep_reverse = preprocessing(adj, ops, **cfg['prep'])
                with torch.no_grad():
                    x, _ = self.model._encoder(ops, adj)
                    self.embedding[counter] = {
                        'feature': x.squeeze(0).mean(dim=0).cpu(),
                        'genotype': process(v[2])
                    }
                print("{}/{}".format(counter, len(dataset)))
                counter += 1
            torch.save(self.embedding, self.f_path)
            print("finished arch2vec extraction")
            exit()
        else:
            self.f_path = os.path.join(self.dir_name, 'arch2vec-darts.pt')
            print("load arch2vec from: {}".format(self.f_path))
            self.embedding = torch.load(self.f_path)
            for ind in range(len(self.embedding)):
                self.features.append(self.embedding[ind]['feature'])
                self.genotype.append(self.embedding[ind]['genotype'])
            self.features = torch.stack(self.features, dim=0)
            print('loading finished. pretrained embeddings shape: {}'.format(
                self.features.shape))

    def get_init_state(self):
        """
        :return: 1 x dim
        """
        rand_indices = random.randint(0, self.features.shape[0])
        self.visited[rand_indices] = True
        return self.features[rand_indices], self.genotype[rand_indices]

    def step(self, action):
        """
        action: 1 x dim
        self.features. N x dim
        """
        dist = torch.norm(self.features - action.cpu(), dim=1)
        knn = (-1 * dist).topk(dist.shape[0])
        min_dist, min_idx = knn.values, knn.indices
        count = 0
        while True:
            if len(self.visited) == dist.shape[0]:
                print("CANNOT FIND IN THE DATASET!")
                exit()
            if min_idx[count].item() not in self.visited:
                self.visited[min_idx[count].item()] = True
                break
            count += 1

        return self.features[min_idx[count].item()], self.genotype[
            min_idx[count].item()]