Пример #1
0
    def __init__(self, composition, device, context=None):

        if not torch_available:
            raise Exception(
                'Pytorch python module (torch) is not installed. Please install it with '
                '`pip install torch` or `pip3 install torch`')

        super(PytorchModelCreator, self).__init__()

        # Maps Mechanism -> PytorchMechanismWrapper
        self.nodes = []
        self.component_map = {}

        # Maps Projections -> PytorchProjectionWrappers
        self.projections = []
        self.projection_map = {}

        self.params = nn.ParameterList()
        self.device = device
        self._composition = composition

        # Instantiate pytorch mechanisms
        for node in set(composition.nodes) - set(
                composition.get_nodes_by_role(NodeRole.LEARNING)):
            pytorch_node = PytorchMechanismWrapper(
                node,
                self._composition._get_node_index(node),
                device,
                context=context)
            self.component_map[node] = pytorch_node
            self.nodes.append(pytorch_node)

        # Instantiate pytorch projections
        for projection in composition.projections:
            if projection.sender.owner in self.component_map and projection.receiver.owner in self.component_map:
                proj_send = self.component_map[projection.sender.owner]
                proj_recv = self.component_map[projection.receiver.owner]

                port_idx = projection.sender.owner.output_ports.index(
                    projection.sender)
                new_proj = PytorchProjectionWrapper(
                    projection,
                    list(self._composition._inner_projections).index(
                        projection),
                    port_idx,
                    device,
                    sender=proj_send,
                    receiver=proj_recv,
                    context=context)
                proj_send.add_efferent(new_proj)
                proj_recv.add_afferent(new_proj)
                self.projection_map[projection] = new_proj
                self.projections.append(new_proj)
                self.params.append(new_proj.matrix)

        # Setup execution sets
        # 1) Remove all learning-specific nodes
        self.execution_sets = [
            x - set(composition.get_nodes_by_role(NodeRole.LEARNING))
            for x in composition.scheduler.run(context=context)
        ]
        # 2) Convert to pytorchcomponent representation
        self.execution_sets = [{
            self.component_map[comp]
            for comp in s if comp in self.component_map
        } for s in self.execution_sets]
        # 3) Remove empty execution sets
        self.execution_sets = [x for x in self.execution_sets if len(x) > 0]
Пример #2
0
    def __init__(self, use_spatial_model=True, gpu_cuda=True):
        super(PoseDetector, self).__init__()

        self.model_size = cfg.MODEL_SIZE
        self.output_shape = (60, 90, 10)
        self.use_spatial_model = use_spatial_model
        self.gpu_cuda = gpu_cuda
        self._test_dataloader = self.test_dataloader()

        # Model joints:
        self.joint_names = [
            'lsho', 'lelb', 'lwri', 'rsho', 'relb', 'rwri', 'lhip', 'rhip',
            'nose', 'torso'
        ]
        self.joint_dependence = {}
        ## Assuming there is co-dependence between EVERY joint pairs
        for joint in self.joint_names:
            self.joint_dependence[joint] = [
                joint_cond for joint_cond in self.joint_names
                if joint_cond != joint
            ]

        ## Initializing pairwise energies and bias between Joints
        self.pairwise_energies, self.pairwise_biases = {}, {}
        for joint in self.joint_names:  #[:n_joints]:
            for cond_joint in self.joint_dependence[joint]:
                #TODO : manage dynamic sizing (in-place of 120,180)
                joint_key = joint + '_' + cond_joint
                if self.gpu_cuda:
                    self.pairwise_energies[joint_key] = nn.Parameter(
                        torch.ones([1, 119, 179, 1],
                                   dtype=torch.float32,
                                   requires_grad=True,
                                   device="cuda") / (119 * 179))
                    self.pairwise_biases[joint_key] = nn.Parameter(
                        torch.ones([1, 60, 90, 1],
                                   dtype=torch.float32,
                                   requires_grad=True,
                                   device="cuda") * 1e-5)
                else:
                    self.pairwise_energies[joint_key] = nn.Parameter(
                        torch.ones([1, 119, 179, 1],
                                   dtype=torch.float32,
                                   requires_grad=True) / (119 * 179))
                    self.pairwise_biases[joint_key] = nn.Parameter(
                        torch.ones([1, 60, 90, 1],
                                   dtype=torch.float32,
                                   requires_grad=True) * 1e-5)

        #This line is needed in order to pass all pairwise parameters to the optimizer
        self.pairwise_parameters = nn.ParameterList([
            self.pairwise_energies[joint_key]
            for joint_key in self.pairwise_energies.keys()
        ] + [
            self.pairwise_biases[joint_key]
            for joint_key in self.pairwise_biases.keys()
        ])

        # Layers for full resolution image
        self.fullres_layer1 = nn.Sequential(
            nn.Conv2d(3, self.model_size * 1, 5, stride=1, padding=2),
            nn.ReLU(), nn.BatchNorm2d(self.model_size * 1),
            nn.MaxPool2d(2, stride=2))

        self.fullres_layer2 = nn.Sequential(
            nn.Conv2d(self.model_size * 1,
                      self.model_size * 2,
                      5,
                      stride=1,
                      padding=2), nn.ReLU(),
            nn.BatchNorm2d(self.model_size * 2), nn.MaxPool2d(2, stride=2))

        self.fullres_layer3 = nn.Sequential(
            nn.Conv2d(self.model_size * 2,
                      self.model_size * 4,
                      9,
                      stride=1,
                      padding=4), nn.ReLU(),
            nn.BatchNorm2d(self.model_size * 4), nn.MaxPool2d(2, stride=2))

        # Layers for half resolution image
        self.halfres_layer1 = nn.Sequential(
            nn.Conv2d(3, self.model_size * 1, 5, stride=1, padding=2),
            nn.ReLU(), nn.BatchNorm2d(self.model_size * 1),
            nn.MaxPool2d(2, stride=2))

        self.halfres_layer2 = nn.Sequential(
            nn.Conv2d(self.model_size * 1,
                      self.model_size * 2,
                      5,
                      stride=1,
                      padding=2), nn.ReLU(),
            nn.BatchNorm2d(self.model_size * 2), nn.MaxPool2d(2, stride=2))

        self.halfres_layer3 = nn.Sequential(
            nn.Conv2d(self.model_size * 2,
                      self.model_size * 4,
                      9,
                      stride=1,
                      padding=4), nn.ReLU(),
            nn.BatchNorm2d(self.model_size * 4), nn.MaxPool2d(2, stride=2))

        # Layers for quarter resolution image
        self.quarterres_layer1 = nn.Sequential(
            nn.Conv2d(3, self.model_size * 1, 5, stride=1, padding=2),
            nn.ReLU(), nn.BatchNorm2d(self.model_size * 1),
            nn.MaxPool2d(2, stride=2))

        self.quarterres_layer2 = nn.Sequential(
            nn.Conv2d(self.model_size * 1,
                      self.model_size * 2,
                      5,
                      stride=1,
                      padding=2),
            nn.ReLU(),
            nn.BatchNorm2d(self.model_size * 2),
            nn.MaxPool2d(2, stride=2,
                         padding=1)  #Adding padding so upsample dimension fit
        )

        self.quarterres_layer3 = nn.Sequential(
            nn.Conv2d(self.model_size * 2,
                      self.model_size * 4,
                      9,
                      stride=1,
                      padding=4), nn.ReLU(),
            nn.BatchNorm2d(self.model_size * 4), nn.MaxPool2d(2, stride=2))

        # Last common layers
        self.last_layers = nn.Sequential(
            nn.Conv2d(self.model_size * 4,
                      self.model_size * 4,
                      9,
                      stride=1,
                      padding=4), nn.ReLU(),
            nn.BatchNorm2d(self.model_size * 4),
            nn.Conv2d(self.model_size * 4,
                      self.output_shape[2],
                      9,
                      stride=1,
                      padding=4))

        ## Upsampling and downsampling

        self.conv_downsample = nn.Sequential(
            nn.Conv2d(3, 3, 3, stride=2, padding=1),
            nn.Conv2d(3, 3, 1, stride=1, padding=0))

        self.conv_upsample = nn.ConvTranspose2d(self.model_size * 4,
                                                self.model_size * 4,
                                                3,
                                                stride=2,
                                                padding=1)

        self.conv1_1 = nn.Conv2d(self.model_size * 4,
                                 self.model_size * 4,
                                 1,
                                 stride=1,
                                 padding=0)

        ## Softplus for spatial model
        self.softplus = nn.Softplus(beta=5)

        ## Batchnorm for spatial model
        self.BN_SM = nn.BatchNorm2d(self.output_shape[2])
Пример #3
0
    def __init__(self, params, placedb):
        """
        @brief initialization
        @param params parameter 
        @param placedb placement database 
        """
        torch.manual_seed(params.random_seed)
        super(BasicPlace, self).__init__()

        tt = time.time()
        self.init_pos = np.zeros(placedb.num_nodes * 2, dtype=placedb.dtype)
        # x position
        self.init_pos[0:placedb.num_physical_nodes] = placedb.node_x
        if params.global_place_flag and params.random_center_init_flag:  # move to center of layout
            logging.info(
                "move cells to the center of layout with random noise")
            self.init_pos[0:placedb.num_movable_nodes] = np.random.normal(
                loc=(placedb.xl * 1.0 + placedb.xh * 1.0) / 2,
                scale=(placedb.xh - placedb.xl) * 0.001,
                size=placedb.num_movable_nodes)
        #self.init_pos[0:placedb.num_movable_nodes] = init_x[0:placedb.num_movable_nodes]*0.01 + (placedb.xl+placedb.xh)/2
        # y position
        self.init_pos[placedb.num_nodes:placedb.num_nodes +
                      placedb.num_physical_nodes] = placedb.node_y
        if params.global_place_flag and params.random_center_init_flag:  # move to center of layout
            self.init_pos[placedb.num_nodes:placedb.num_nodes +
                          placedb.num_movable_nodes] = np.random.normal(
                              loc=(placedb.yl * 1.0 + placedb.yh * 1.0) / 2,
                              scale=(placedb.yh - placedb.yl) * 0.001,
                              size=placedb.num_movable_nodes)
        #init_y[0:placedb.num_movable_nodes] = init_y[0:placedb.num_movable_nodes]*0.01 + (placedb.yl+placedb.yh)/2

        if placedb.num_filler_nodes:  # uniformly distribute filler cells in the layout
            self.init_pos[placedb.num_physical_nodes:placedb.
                          num_nodes] = np.random.uniform(
                              low=placedb.xl,
                              high=placedb.xh -
                              placedb.node_size_x[-placedb.num_filler_nodes],
                              size=placedb.num_filler_nodes)
            self.init_pos[placedb.num_nodes +
                          placedb.num_physical_nodes:placedb.num_nodes *
                          2] = np.random.uniform(
                              low=placedb.yl,
                              high=placedb.yh -
                              placedb.node_size_y[-placedb.num_filler_nodes],
                              size=placedb.num_filler_nodes)

        logging.debug("prepare init_pos takes %.2f seconds" %
                      (time.time() - tt))

        self.device = torch.device("cuda" if params.gpu else "cpu")

        # position should be parameter
        # must be defined in BasicPlace
        tt = time.time()
        self.pos = nn.ParameterList(
            [nn.Parameter(torch.from_numpy(self.init_pos).to(self.device))])
        logging.debug("build pos takes %.2f seconds" % (time.time() - tt))
        # shared data on device for building ops
        # I do not want to construct the data from placedb again and again for each op
        tt = time.time()
        self.data_collections = PlaceDataCollection(self.pos, params, placedb,
                                                    self.device)
        logging.debug("build data_collections takes %.2f seconds" %
                      (time.time() - tt))

        # similarly I wrap all ops
        tt = time.time()
        self.op_collections = PlaceOpCollection()
        logging.debug("build op_collections takes %.2f seconds" %
                      (time.time() - tt))

        tt = time.time()
        # position to pin position
        self.op_collections.pin_pos_op = self.build_pin_pos(
            params, placedb, self.data_collections, self.device)
        # bound nodes to layout region
        self.op_collections.move_boundary_op = self.build_move_boundary(
            params, placedb, self.data_collections, self.device)
        # hpwl and density overflow ops for evaluation
        self.op_collections.hpwl_op = self.build_hpwl(
            params, placedb, self.data_collections,
            self.op_collections.pin_pos_op, self.device)
        # rectilinear minimum steiner tree wirelength from flute
        # can only be called once
        #self.op_collections.rmst_wl_op = self.build_rmst_wl(params, placedb, self.op_collections.pin_pos_op, torch.device("cpu"))
        #self.op_collections.density_overflow_op = self.build_density_overflow(params, placedb, self.data_collections, self.device)
        self.op_collections.density_overflow_op = self.build_electric_overflow(
            params, placedb, self.data_collections, self.device)
        # legality check
        self.op_collections.legality_check_op = self.build_legality_check(
            params, placedb, self.data_collections, self.device)
        # legalization
        self.op_collections.legalize_op = self.build_legalization(
            params, placedb, self.data_collections, self.device)
        # detailed placement
        self.op_collections.detailed_place_op = self.build_detailed_placement(
            params, placedb, self.data_collections, self.device)
        # draw placement
        self.op_collections.draw_place_op = self.build_draw_placement(
            params, placedb)

        # flag for rmst_wl_op
        # can only read once
        self.read_lut_flag = True

        logging.debug("build BasicPlace ops takes %.2f seconds" %
                      (time.time() - tt))
Пример #4
0
    def __init__(self,
                 in_channels: List,
                 latent_dim: int,
                 n_dataset: List,
                 hidden_dims: List = None,
                 alpha: float = None,
                 gamma: float = 1000.,
                 max_capacity: int = 25,
                 capacity_max_iter: int = 1e5,
                 loss_type: str = 'B',
                 intercept_adj: bool = True,
                 slope_adj: bool = True,
                 log=False):
        super(VAE, self).__init__()

        self.latent_dim = latent_dim
        if alpha is None:
            self.alpha = 50.0 / latent_dim
        else:
            self.alpha = alpha
        self.gamma = gamma  # TODO: what is gamma
        self.loss_type = loss_type
        self.C_max = torch.Tensor([max_capacity])
        self.C_stop_iter = capacity_max_iter
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.experts = ProductOfExperts()
        self.n_dataset = n_dataset
        self.intercept_adj = intercept_adj
        self.slope_adj = slope_adj
        self.log = log

        self.beta = nn.ParameterList()
        for in_ch in in_channels:
            self.beta.append(
                torch.nn.Parameter(xavier_init(latent_dim, in_ch),
                                   requires_grad=True))
        self.beta_dataset = nn.ParameterList()
        for in_ch, n_d in zip(in_channels, n_dataset):
            self.beta_dataset.append(
                torch.nn.Parameter(xavier_init(n_d, in_ch),
                                   requires_grad=True))
        self.beta_dataset_mtp = nn.ParameterList()
        for in_ch, n_d in zip(in_channels, n_dataset):
            # torch,rand returns uniform[0, 1)
            self.beta_dataset_mtp.append(
                torch.nn.Parameter(torch.rand(n_d, in_ch), requires_grad=True))

        if hidden_dims is None:
            hidden_dims = [128, 64]

        # Constructing Laplace Approximation to Dirichlet Prior
        # The greater the alpha, the higher the mode. That is, the probs will
        # be more centered around (1/latent_dim, ..., 1/latent_dim)
        self.a = self.alpha * torch.ones(1, self.latent_dim)
        self.mu2 = (torch.log(self.a) -
                    torch.mean(torch.log(self.a), 1)).to(device=self.device)
        self.var2 = (((1 / self.a) * (1 - (2.0 / self.latent_dim))) +
                     (1.0 / (self.latent_dim * self.latent_dim)) *
                     torch.sum(1 / self.a, 1)).to(device=self.device)

        self.encoder = nn.ModuleList()
        self.fc_mu = nn.ModuleList()
        self.fc_var = nn.ModuleList()
        for in_ch in in_channels:
            # Build Encoder
            modules = []
            current_in = in_ch
            for h_dim in hidden_dims:
                modules.append(
                    nn.Sequential(nn.Linear(current_in, h_dim),
                                  nn.BatchNorm1d(h_dim), nn.LeakyReLU()
                                  )  # the original paper use tf.nn.softplus
                )
                current_in = h_dim
            self.encoder.append(nn.Sequential(*modules))
            self.fc_mu.append(nn.Linear(hidden_dims[-1], latent_dim))
            self.fc_var.append(nn.Linear(hidden_dims[-1], latent_dim))
Пример #5
0
    def __init__(self,
                 points=1024,
                 class_num=40,
                 embed_dim=64,
                 heads=4,
                 dim_head=32,
                 pre_blocks=[2, 2, 2, 2],
                 pos_blocks=[2, 2, 2, 2],
                 k_neighbors=[32, 32, 32, 32],
                 reducers=[2, 2, 2, 2],
                 **kwargs):
        super(Model7, self).__init__()
        self.stages = len(pre_blocks)
        self.class_num = class_num
        self.heads = heads
        self.dim_head = dim_head
        self.points = points
        self.embedding = nn.Sequential(FCBNReLU1D(3, embed_dim),
                                       nn.Conv1d(embed_dim, embed_dim, 1))
        assert len(pre_blocks)==len(k_neighbors)==len(reducers)==len(pos_blocks), \
            "Please check stage number consistent for pre_blocks, pos_blocks k_neighbors, reducers."
        self.local_grouper_list = nn.ModuleList()
        self.pre_blocks_list = nn.ModuleList()
        self.pos_blocks_list = nn.ModuleList()
        self.local_token_list = nn.ParameterList()
        self.global_token_list = nn.ParameterList()
        last_channel = embed_dim
        anchor_points = self.points
        for i in range(len(pre_blocks)):
            out_channel = last_channel * 2
            pre_block_num = pre_blocks[i]
            pos_block_num = pos_blocks[i]
            kneighbor = k_neighbors[i]
            reduce = reducers[i]
            anchor_points = anchor_points // reduce

            # dim_head = out_channel*2//self.heads

            # append local_grouper_list
            local_grouper = LocalGrouper(anchor_points, kneighbor)  #[b,g,k,d]
            self.local_grouper_list.append(local_grouper)
            # append pre_block_list
            pre_block_module = PreExtraction(out_channel,
                                             pre_block_num,
                                             heads=self.heads,
                                             dim_head=self.dim_head)
            self.pre_blocks_list.append(pre_block_module)
            local_token = nn.Parameter(torch.rand([1, 1, 1, out_channel]))
            self.local_token_list.append(local_token)
            # append pos_block_list
            pos_block_module = PosExtraction(out_channel,
                                             pos_block_num,
                                             heads=self.heads,
                                             dim_head=self.dim_head)
            self.pos_blocks_list.append(pos_block_module)
            global_token = nn.Parameter(torch.rand([1, 1, out_channel]))
            self.global_token_list.append(global_token)

            last_channel = out_channel

        self.classifier = nn.Sequential(
            nn.Linear(last_channel, last_channel // 4),
            nn.BatchNorm1d(last_channel // 4), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(last_channel // 4, self.class_num))
Пример #6
0
    def __init__(
        self,
        triples_factory: TriplesFactory,
        embedding_dim: int = 500,
        num_bases_or_blocks: int = 5,
        num_layers: int = 2,
        use_bias: bool = True,
        use_batch_norm: bool = False,
        activation_cls: Optional[Type[nn.Module]] = None,
        activation_kwargs: Optional[Mapping[str, Any]] = None,
        sparse_messages_slcwa: bool = True,
        edge_dropout: float = 0.4,
        self_loop_dropout: float = 0.2,
        edge_weighting: Callable[
            [torch.LongTensor, torch.LongTensor],
            torch.FloatTensor, ] = inverse_indegree_edge_weights,
        decomposition: str = 'basis',
        buffer_messages: bool = True,
        base_representations: Optional[RepresentationModule] = None,
    ):
        super().__init__()

        self.triples_factory = triples_factory

        # normalize representations
        if base_representations is None:
            base_representations = Embedding(
                num_embeddings=triples_factory.num_entities,
                embedding_dim=embedding_dim,
                # https://github.com/MichSchli/RelationPrediction/blob/c77b094fe5c17685ed138dae9ae49b304e0d8d89/code/encoders/affine_transform.py#L24-L28
                initializer=nn.init.xavier_uniform_,
            )
        self.base_embeddings = base_representations
        self.embedding_dim = embedding_dim

        # check decomposition
        self.decomposition = decomposition
        if self.decomposition == 'basis':
            if num_bases_or_blocks is None:
                logging.info(
                    'Using a heuristic to determine the number of bases.')
                num_bases_or_blocks = triples_factory.num_relations // 2 + 1
            if num_bases_or_blocks > triples_factory.num_relations:
                raise ValueError(
                    'The number of bases should not exceed the number of relations.'
                )
        elif self.decomposition == 'block':
            if num_bases_or_blocks is None:
                logging.info(
                    'Using a heuristic to determine the number of blocks.')
                num_bases_or_blocks = 2
            if embedding_dim % num_bases_or_blocks != 0:
                raise ValueError(
                    'With block decomposition, the embedding dimension has to be divisible by the number of'
                    f' blocks, but {embedding_dim} % {num_bases_or_blocks} != 0.',
                )
        else:
            raise ValueError(
                f'Unknown decomposition: "{decomposition}". Please use either "basis" or "block".'
            )

        self.num_bases = num_bases_or_blocks
        self.edge_weighting = edge_weighting
        self.edge_dropout = edge_dropout
        if self_loop_dropout is None:
            self_loop_dropout = edge_dropout
        self.self_loop_dropout = self_loop_dropout
        self.use_batch_norm = use_batch_norm
        if activation_cls is None:
            activation_cls = nn.ReLU
        self.activation_cls = activation_cls
        self.activation_kwargs = activation_kwargs
        if use_batch_norm:
            if use_bias:
                logger.warning(
                    'Disabling bias because batch normalization was used.')
            use_bias = False
        self.use_bias = use_bias
        self.num_layers = num_layers
        self.sparse_messages_slcwa = sparse_messages_slcwa

        # Save graph using buffers, such that the tensors are moved together with the model
        h, r, t = self.triples_factory.mapped_triples.t()
        self.register_buffer('sources', h)
        self.register_buffer('targets', t)
        self.register_buffer('edge_types', r)

        self.activations = nn.ModuleList([
            self.activation_cls(**(self.activation_kwargs or {}))
            for _ in range(self.num_layers)
        ])

        # Weights
        self.bases = nn.ParameterList()
        if self.decomposition == 'basis':
            self.att = nn.ParameterList()
            for _ in range(self.num_layers):
                self.bases.append(
                    nn.Parameter(
                        data=torch.empty(
                            self.num_bases,
                            self.embedding_dim,
                            self.embedding_dim,
                        ),
                        requires_grad=True,
                    ))
                self.att.append(
                    nn.Parameter(
                        data=torch.empty(
                            self.triples_factory.num_relations + 1,
                            self.num_bases,
                        ),
                        requires_grad=True,
                    ))
        elif self.decomposition == 'block':
            block_size = self.embedding_dim // self.num_bases
            for _ in range(self.num_layers):
                self.bases.append(
                    nn.Parameter(
                        data=torch.empty(
                            self.triples_factory.num_relations + 1,
                            self.num_bases,
                            block_size,
                            block_size,
                        ),
                        requires_grad=True,
                    ))

            self.att = None
        else:
            raise NotImplementedError
        if self.use_bias:
            self.biases = nn.ParameterList([
                nn.Parameter(torch.empty(self.embedding_dim),
                             requires_grad=True)
                for _ in range(self.num_layers)
            ])
        else:
            self.biases = None
        if self.use_batch_norm:
            self.batch_norms = nn.ModuleList([
                nn.BatchNorm1d(num_features=self.embedding_dim)
                for _ in range(self.num_layers)
            ])
        else:
            self.batch_norms = None

        # buffering of messages
        self.buffer_messages = buffer_messages
        self.enriched_embeddings = None
Пример #7
0
    def __init__(self, LayerNo):
        super(LPD_Net, self).__init__()
        self.name = "LPD_Net"
        self.LayerNo = LayerNo

        self.filter_size = 3
        self.conv_size = 32

        self.eta_step = nn.ParameterList()
        self.sigma_step = nn.ParameterList()

        self.soft_thr = nn.ParameterList()
        self.soft_a = nn.ParameterList()

        self.delta = nn.ParameterList()

        self.A2 = nn.ModuleList()
        self.B = nn.ModuleList()

        self.AT2 = nn.ModuleList()
        self.BT = nn.ModuleList()

        for _ in range(self.LayerNo):
            self.eta_step.append(nn.Parameter(torch.Tensor([0.1])))
            self.sigma_step.append(nn.Parameter(torch.Tensor([1])))

            self.soft_thr.append(nn.Parameter(torch.Tensor([0.1])))
            self.soft_a.append(nn.Parameter(torch.Tensor([50])))
            self.delta.append(nn.Parameter(torch.Tensor([0.1])))

        self.A2.append(
            nn.Conv2d(1,
                      self.conv_size,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False))
        self.B.append(
            nn.Conv2d(self.conv_size,
                      self.conv_size,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False))

        self.AT2.append(
            nn.Conv2d(self.conv_size,
                      1,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False))
        self.BT.append(
            nn.Conv2d(self.conv_size,
                      self.conv_size,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False))

        nn.init.xavier_normal_(self.A2[0].weight)
        nn.init.xavier_normal_(self.B[0].weight)
        nn.init.xavier_normal_(self.AT2[0].weight)
        nn.init.xavier_normal_(self.BT[0].weight)
    def __init__(
            self,
            block: Type[Union[BasicBlock, Bottleneck]],
            layers: List[int],
            num_classes: int = 10,
            zero_init_residual: bool = False,
            groups: int = 1,
            width_per_group: int = 64,
            replace_stride_with_dilation: Optional[List[bool]] = None,
            norm_layer: Optional[Callable[..., nn.Module]] = None) -> None:
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(
                                 replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3,
                               self.inplanes,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block,
                                       128,
                                       layers[1],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block,
                                       256,
                                       layers[2],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        self.ks = nn.ParameterList([
            nn.Parameter(torch.Tensor(1).uniform_(0.75, 0.8))
            for i in range(layers[0] + layers[1] + layers[2])
        ])

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight,
                                      0)  # type: ignore[arg-type]
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight,
                                      0)  # type: ignore[arg-type]
 def create_param_from_shapes(self, list_param_shapes):
     self.list_params = []
     for s in list_param_shapes:
         self.list_params.append(Parameter(torch.Tensor(s)))
     self.list_params = nn.ParameterList(self.list_params)
Пример #10
0
    def __init__(
        self,
        input_size,
        C_in,
        C,
        n_classes,
        n_layers,
        auxiliary,
        genotype,
        stem_multiplier=3,
        feature_scale_rate=2,
        PRIMITIVES=gt.PRIMITIVES,
        reduction_layers=[],
    ):
        """
        Args:
            input_size: size of height and width (assuming height = width)
            C_in: # of input channels
            C: # of starting model channels
        """
        super().__init__()
        self.C_in = C_in
        self.C = C
        self.n_classes = n_classes
        self.n_layers = n_layers
        self.aux_pos = 2 * n_layers // 3 if auxiliary else -1

        C_cur = stem_multiplier * C
        self.stem = nn.Sequential(
            nn.Conv2d(C_in, C_cur, 3, 1, 1, bias=False), nn.BatchNorm2d(C_cur)
        )

        C_pp, C_p, C_cur = C_cur, C_cur, C

        self.cells = nn.ModuleList()
        reduction_p = False

        if not reduction_layers:
            reduction_layers = [n_layers // 3, (2 * n_layers) // 3]

        for i in range(n_layers):
            if i in reduction_layers:
                C_cur *= feature_scale_rate
                reduction = True
            else:
                reduction = False

            cell = AugmentCell(genotype, C_pp, C_p, C_cur, reduction_p, reduction)
            reduction_p = reduction
            self.cells.append(cell)
            C_cur_out = C_cur * len(cell.concat)
            C_pp, C_p = C_p, C_cur_out

            if i == self.aux_pos:
                # [!] this auxiliary head is ignored in computing parameter size
                #     by the name 'aux_head'
                self.aux_head = AuxiliaryHead(input_size // 4, C_p, n_classes)

        self.gap = nn.AdaptiveAvgPool2d(1)
        self.linear = nn.Linear(C_p, n_classes)

        self.criterion = nn.CrossEntropyLoss()

        ####### dummy alphas
        self.alpha_normal = nn.ParameterList()
        self.alpha_reduce = nn.ParameterList()

        for i in range(2):
            self.alpha_normal.append(nn.Parameter(1e-3 * torch.randn(1, 5)))
            self.alpha_reduce.append(nn.Parameter(1e-3 * torch.randn(1, 5)))

        # setup alphas list
        self._alphas = []
        for n, p in self.named_parameters():
            if "alpha" in n:
                self._alphas.append((n, p))

        self.alpha_prune_threshold = 0.0
Пример #11
0
    def __init__(self,
                 hnet,
                 hnet_uncond_in_size=None,
                 sigma_noise=0.02,
                 input_handler=None,
                 output_handler=None,
                 verbose=True):
        # FIXME find a way using super to handle multiple inheritance.
        nn.Module.__init__(self)
        HyperNetInterface.__init__(self)

        assert isinstance(hnet, HyperNetInterface)
        self._hnet = hnet
        self._hnet_uncond_in_size = hnet_uncond_in_size
        self._sigma_noise = sigma_noise
        self._input_handler = input_handler
        self._output_handler = output_handler

        if input_handler is None and hnet_uncond_in_size is None:
            raise ValueError(
                'Either "input_handler" or "hnet_uncond_in_size"' +
                ' has to be specified.')

        ### Setup attributes required by interface ###
        # Most of these attributes are taken over from `self._hnet`
        self._target_shapes = hnet.target_shapes
        self._num_known_conds = self._hnet.num_known_conds
        self._unconditional_param_shapes_ref = \
            list(self._hnet.unconditional_param_shapes_ref)

        if self._hnet.internal_params is not None:
            self._internal_params = \
                nn.ParameterList(self._hnet.internal_params)
        self._param_shapes = list(self._hnet.param_shapes)
        self._param_shapes_meta = list(self._hnet.param_shapes_meta)
        if self._hnet.hyper_shapes_learned is not None:
            self._hyper_shapes_learned = list(self._hnet.hyper_shapes_learned)
            self._hyper_shapes_learned_ref = \
                list(self._hnet.hyper_shapes_learned_ref)
        if self._hnet.hyper_shapes_distilled is not None:
            self._hyper_shapes_distilled = \
                list(self._hnet.hyper_shapes_distilled)
        self._has_bias = self._hnet.has_bias
        # A noise perturbed output can't be considered an FC output anymore.
        self._has_fc_out = False
        self._mask_fc_out = self._hnet.mask_fc_out
        # Guess that's the safest answer.
        self._has_linear_out = False
        self._layer_weight_tensors = \
            nn.ParameterList(self._hnet.layer_weight_tensors)
        self._layer_bias_vectors = \
            nn.ParameterList(self._hnet.layer_bias_vectors)
        if self._hnet.batchnorm_layers is not None:
            self._batchnorm_layers = nn.ModuleList(self._hnet.batchnorm_layers)
        if self._hnet.context_mod_layers is not None:
            self._context_mod_layers = \
                nn.ModuleList(self._hnet.context_mod_layers)

        ### Finalize construction ###
        self._is_properly_setup()

        if verbose:
            print('Wrapped a perturbation interface around a hypernetwork.')
Пример #12
0
    def __init__(self, n_in_enc, graph_args_j, graph_args_p, graph_args_b,
                 edge_weighting, fusion_layer, cross_w, **kwargs):
        super().__init__()

        self.graph_j = Graph_J(**graph_args_j)
        self.graph_p = Graph_P(**graph_args_p)
        self.graph_b = Graph_B(**graph_args_b)
        A_j = torch.tensor(self.graph_j.A_j,
                           dtype=torch.float32,
                           requires_grad=False)
        self.register_buffer('A_j', A_j)
        A_p = torch.tensor(self.graph_p.A_p,
                           dtype=torch.float32,
                           requires_grad=False)
        self.register_buffer('A_p', A_p)
        A_b = torch.tensor(self.graph_b.A_b,
                           dtype=torch.float32,
                           requires_grad=False)
        self.register_buffer('A_b', A_b)

        t_ksize, s_ksize_1, s_ksize_2, s_ksize_3 = 5, self.A_j.size(
            0), self.A_p.size(0), self.A_b.size(0)
        ksize_1 = (t_ksize, s_ksize_1)
        ksize_2 = (t_ksize, s_ksize_2)
        ksize_3 = (t_ksize, s_ksize_3)

        self.s2_init = AveargeJoint()
        self.s3_init = AveargePart()
        self.s1_l1 = St_gcn(n_in_enc,
                            32,
                            ksize_1,
                            stride=1,
                            residual=False,
                            **kwargs)
        self.s1_l2 = St_gcn(32, 64, ksize_1, stride=2, **kwargs)
        self.s1_l3 = St_gcn(64, 128, ksize_1, stride=2, **kwargs)
        self.s1_l4 = St_gcn(128, 256, ksize_1, stride=2, **kwargs)
        self.s1_l5 = St_gcn(256, 256, ksize_1, stride=1, **kwargs)
        self.s2_l1 = St_gcn(n_in_enc,
                            32,
                            ksize_2,
                            stride=1,
                            residual=False,
                            **kwargs)
        self.s2_l2 = St_gcn(32, 64, ksize_2, stride=2, **kwargs)
        self.s2_l3 = St_gcn(64, 128, ksize_2, stride=2, **kwargs)
        self.s2_l4 = St_gcn(128, 256, ksize_2, stride=2, **kwargs)
        self.s3_l1 = St_gcn(n_in_enc,
                            32,
                            ksize_3,
                            stride=1,
                            residual=False,
                            **kwargs)
        self.s3_l2 = St_gcn(32, 64, ksize_3, stride=2, **kwargs)
        self.s3_l3 = St_gcn(64, 128, ksize_3, stride=2, **kwargs)
        self.s3_l4 = St_gcn(128, 256, ksize_3, stride=2, **kwargs)
        self.s2_back = PartLocalInform()
        self.s3_back = BodyLocalInform()
        self.fusion_layer = fusion_layer
        self.cross_w = cross_w

        if self.fusion_layer == 0:
            pass
        elif self.fusion_layer == 1:
            self.j2p_1 = S1_to_S2(n_j1=32,
                                  n_j2=(800, 256),
                                  n_p1=32,
                                  n_p2=(800, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.p2j_1 = S2_to_S1(n_p1=32,
                                  n_p2=(800, 256),
                                  n_j1=32,
                                  n_j2=(800, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.p2b_1 = S2_to_S3(n_p1=32,
                                  n_p2=(800, 256),
                                  n_b1=32,
                                  n_b2=(800, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.b2p_1 = S3_to_S2(n_b1=32,
                                  n_b2=(800, 256),
                                  n_p1=32,
                                  n_p2=(800, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
        elif self.fusion_layer == 2:
            self.j2p_1 = S1_to_S2(n_j1=32,
                                  n_j2=(800, 256),
                                  n_p1=32,
                                  n_p2=(800, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.p2j_1 = S2_to_S1(n_p1=32,
                                  n_p2=(800, 256),
                                  n_j1=32,
                                  n_j2=(800, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.p2b_1 = S2_to_S3(n_p1=32,
                                  n_p2=(800, 256),
                                  n_b1=32,
                                  n_b2=(800, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.b2p_1 = S3_to_S2(n_b1=32,
                                  n_b2=(800, 256),
                                  n_p1=32,
                                  n_p2=(800, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.j2p_2 = S1_to_S2(n_j1=64,
                                  n_j2=(832, 256),
                                  n_p1=64,
                                  n_p2=(832, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.p2j_2 = S2_to_S1(n_p1=64,
                                  n_p2=(832, 256),
                                  n_j1=64,
                                  n_j2=(832, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.p2b_2 = S2_to_S3(n_p1=64,
                                  n_p2=(832, 256),
                                  n_b1=64,
                                  n_b2=(832, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
            self.b2p_2 = S3_to_S2(n_b1=64,
                                  n_b2=(832, 256),
                                  n_p1=64,
                                  n_p2=(832, 256),
                                  t_kernel=5,
                                  t_stride=(1, 2),
                                  t_padding=2)
        else:
            raise ValueError('No Such Fusion Architecture')

        if edge_weighting:
            self.emul_s1 = nn.ParameterList(
                [nn.Parameter(torch.ones(self.A_j.size())) for i in range(5)])
            self.eadd_s1 = nn.ParameterList(
                [nn.Parameter(torch.zeros(self.A_j.size())) for i in range(5)])
            self.emul_s2 = nn.ParameterList(
                [nn.Parameter(torch.ones(self.A_p.size())) for i in range(4)])
            self.eadd_s2 = nn.ParameterList(
                [nn.Parameter(torch.zeros(self.A_p.size())) for i in range(4)])
            self.emul_s3 = nn.ParameterList(
                [nn.Parameter(torch.ones(self.A_b.size())) for i in range(4)])
            self.eadd_s3 = nn.ParameterList(
                [nn.Parameter(torch.zeros(self.A_b.size())) for i in range(4)])
        else:
            self.emul_s1 = [1] * 0
            self.eadd_s1 = nn.ParameterList(
                [nn.Parameter(torch.zeros(self.A_j.size())) for i in range(5)])
            self.emul_s2 = [1] * 4
            self.eadd_s2 = nn.ParameterList(
                [nn.Parameter(torch.zeros(self.A_p.size())) for i in range(4)])
            self.emul_s3 = [1] * 4
            self.eadd_s3 = nn.ParameterList(
                [nn.Parameter(torch.zeros(self.A_b.size())) for i in range(4)])
Пример #13
0
    def __init__(self,
                 in_shape=[32, 32, 3],
                 num_classes=10,
                 verbose=True,
                 arch='cifar',
                 no_weights=False,
                 init_weights=None,
                 dropout_rate=0.25):
        super(ZenkeNet, self).__init__(num_classes, verbose)

        assert (in_shape[0] == 32 and in_shape[1] == 32)
        self._in_shape = in_shape

        assert (arch in ZenkeNet._architectures.keys())
        self._param_shapes = ZenkeNet._architectures[arch]
        self._param_shapes[-2][0] = num_classes
        self._param_shapes[-1][0] = num_classes

        assert (init_weights is None or no_weights is False)
        self._no_weights = no_weights

        self._use_dropout = dropout_rate != -1

        self._has_bias = True
        self._has_fc_out = True
        # We need to make sure that the last 2 entries of `weights` correspond
        # to the weight matrix and bias vector of the last layer.
        self._mask_fc_out = True
        # We don't use any output non-linearity.
        self._has_linear_out = True

        self._num_weights = MainNetInterface.shapes_to_num_weights( \
            self._param_shapes)
        if verbose:
            print('Creating a ZenkeNet with %d weights' \
                  % (self._num_weights)
                  + (', that uses dropout.' if self._use_dropout else '.'))

        if self._use_dropout:
            if dropout_rate > 0.5:
                # FIXME not a pretty solution, but we aim to follow the original
                # paper.
                raise ValueError('Dropout rate must be smaller equal 0.5.')
            self._drop_conv = nn.Dropout2d(p=dropout_rate)
            self._drop_fc1 = nn.Dropout(p=dropout_rate * 2.)

        self._layer_weight_tensors = nn.ParameterList()
        self._layer_bias_vectors = nn.ParameterList()

        if no_weights:
            self._weights = None
            self._hyper_shapes_learned = self._param_shapes
            self._is_properly_setup()
            return

        ### Define and initialize network weights.
        # Each odd entry of this list will contain a weight Tensor and each
        # even entry a bias vector.
        self._weights = nn.ParameterList()

        for i, dims in enumerate(self._param_shapes):
            self._weights.append(
                nn.Parameter(torch.Tensor(*dims), requires_grad=True))

            if i % 2 == 0:
                self._layer_weight_tensors.append(self._weights[i])
            else:
                assert (len(dims) == 1)
                self._layer_bias_vectors.append(self._weights[i])

        if init_weights is not None:
            assert (len(init_weights) == len(self._param_shapes))
            for i in range(len(init_weights)):
                assert (np.all(
                    np.equal(list(init_weights[i].shape),
                             list(self._weights[i].shape))))
                self._weights[i].data = init_weights[i]
        else:
            for i in range(len(self._layer_weight_tensors)):
                init_params(self._layer_weight_tensors[i],
                            self._layer_bias_vectors[i])

        self._is_properly_setup()
Пример #14
0
 def _initialize_alpha(self):
     k = sum(1 for i in range(self.nodes) for n in range(2+i))
     num_ops = len(ATT_PRIMITIVES)
     self.alphas = nn.Parameter(1e-3 * torch.randn(k, num_ops).cuda(),
         requires_grad=True)
     self._arch_param = nn.ParameterList([self.alphas])
Пример #15
0
    def __init__(self, in_feat, out_feat, num_rels, num_bases=-1, bias=None,
                 activation=None, is_input_layer=False, ranks=None, input_dropout=0.2, rank_per=0.1, decomp='tucker'):
        super(RGCNTorchTuckerLayer, self).__init__(in_feat, out_feat, bias, activation)
        self.in_feat = in_feat
        self.out_feat = out_feat
        self.num_rels = num_rels
        self.num_bases = num_bases
        self.is_input_layer = is_input_layer
        self.num_bases = self.num_rels

        # calculate

        # if is_input_layer:
        #     self.ranks = [self.num_bases, rank_per, self.out_feat]
        # else:
        #     self.ranks = [self.num_bases, self.in_feat, self.out_feat]
        if ranks[0] == -1:
            ranks[0] = self.num_rels

        if ranks[1] == -1:
            ranks[1] = in_feat

        if self.is_input_layer:
            self.ranks = [ranks[0], ranks[1], self.out_feat]
        else:
            self.ranks = [ranks[0], self.in_feat, self.out_feat]
        print("Ranks - {}".format(self.ranks))

        # add basis weights
        if decomp == 'tucker':
            self.weight = tn.randn(self.num_bases, self.in_feat,
                                  self.out_feat, ranks_tucker=self.ranks, device='cuda',
                                   requires_grad=True)
        elif decomp == 'tt':
            self.weight = tn.randn(self.num_bases, self.in_feat,
                                   self.out_feat, ranks_tt=self.ranks, device='cuda',
                                   requires_grad=True)
        else:
            raise NotImplementedError("decomposition not implemented")

        # self.core = nn.Parameter(torch.empty((self.ranks[0], self.ranks[1], self.ranks[2])))
        # self.factor_1 = nn.Parameter(torch.empty((weight.shape[0], self.ranks[0])))
        # self.factor_2 = nn.Parameter(torch.empty((weight.shape[1], self.ranks[1])))
        # self.factor_3 = nn.Parameter(torch.empty((weight.shape[2], self.ranks[2])))

        self.input_dropout = torch.nn.Dropout(input_dropout)
        self.bnw = torch.nn.BatchNorm1d(self.in_feat)

        # self.factors = nn.ParameterList([])
        # for f_i,f in enumerate(factors):
        #     fac = nn.Parameter(f)
        #     # self.register_parameter('tucker_factor_{}'.format(f_i), fac)
        #     self.factors.append(fac)
        # # self.weight_full = nn.Parameter(self.weight.torch())
        cores = []
        for c_i, core in enumerate(self.weight.cores):
            core = nn.Parameter(core)
            #nn.init.xavier_normal_(core, gain=nn.init.calculate_gain('sigmoid'))
            self.register_parameter('tucker_core_{}'.format(c_i), core)
            cores.append(core)
        self.weight.cores = cores

        Us = []
        for u_i, u in enumerate(self.weight.Us):
            u = nn.Parameter(u)
            #nn.init.orthogonal(u, gain=nn.init.calculate_gain('sigmoid'))
            self.register_parameter('tucker_Us_{}'.format(u_i), u)
            Us.append(u)

        self.weight.Us = Us
        self.model_params = nn.ParameterList(cores + Us)
Пример #16
0
    def __init__(self,
                 num_tokens_per_channel,
                 codebook_dim,
                 upscale_factors,
                 list_of_num_layers,
                 n_head,
                 d_model,
                 dim_feedforward,
                 num_tokens_bottleneck,
                 dropout):
        super(AuxiliaryDecoderRelative, self).__init__()
        assert len(list_of_num_layers) == len(upscale_factors)
        self.num_tokens_per_channel = num_tokens_per_channel
        self.num_channels = len(self.num_tokens_per_channel)
        self.d_model = d_model
        self.codebook_dim = codebook_dim
        self.upscale_factors = upscale_factors

        self.linear = nn.Linear(self.codebook_dim, self.d_model)

        # TODO factorised positional embeddings
        positional_embedding_size = self.d_model

        self.upscale_embeddings = nn.ParameterList(
            [
                nn.Parameter(
                    torch.randn(upscale, self.d_model)
                )
                for upscale in self.upscale_factors
            ]
        )

        # build transformer list
        self.num_tokens_per_transformer_block = [
            num_tokens_bottleneck * int(np.prod(self.upscale_factors[:i]))
            for i in range(len(self.upscale_factors))
        ]

        # self.code_embedding_dim = self.d_model
        # - positional_embedding_size
        # TODO for now sum positional embedding
        self.code_embedding_dim = self.d_model - positional_embedding_size

        transformer_list = []
        for i, (num_layers, num_tokens) in enumerate(
                zip(list_of_num_layers, self.num_tokens_per_transformer_block)):
            encoder_layer = TransformerEncoderLayerCustom(
                d_model=self.d_model,
                nhead=n_head,
                attention_bias_type='relative_attention',
                dim_feedforward=dim_feedforward,
                dropout=dropout,
                num_events=num_tokens // self.num_channels,
                num_channels=self.num_channels
            )
            transformer = TransformerEncoderCustom(
                encoder_layer=encoder_layer,
                num_layers=num_layers,
            )
            transformer_list.append(transformer)

        self.transformers = nn.ModuleList(
            transformer_list
        )

        self.pre_softmaxes = nn.ModuleList([nn.Linear(self.d_model, num_notes)
                                            for num_notes in num_tokens_per_channel
                                            ]
                                           )
Пример #17
0
 def __init__(self, list_length):
     super(MultiLossLayer, self).__init__()
     self._sigmas_sq = nn.ParameterList(
         [nn.Parameter(torch.empty(())) for i in range(list_length)])
     for p in self.parameters():
         nn.init.uniform_(p, 0.5, 0.8)
Пример #18
0
    def __init__(self, config, imgc, imgsz, device):

        super(Learner, self).__init__()
        
        self.config2vars = [None] * len(config)
        self.config2vars_bn = [None] * len(config)
        self.config = config
        self.device = device
        
        # this dict contains all tensors needed to be optimized
        self.vars = nn.ParameterList()
        # running_mean and running_var
        self.vars_bn = nn.ParameterList()
        
        self.pruning_record = []
    
        for i, (name, param) in enumerate(self.config):
            if name is 'conv2d':
                # [ch_out, ch_in, kernelsz, kernelsz]
                w = nn.Parameter(torch.ones(*param[:4]))
                # gain=1 according to cbfin's implementation
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                # [ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[0])))
                self.config2vars[i] = len(self.vars) // 2 - 1

            elif name is 'convt2d':
                # [ch_in, ch_out, kernelsz, kernelsz, stride, padding]
                w = nn.Parameter(torch.ones(*param[:4]))
                # gain=1 according to cbfin's implementation
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                # [ch_in, ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[1])))
                self.config2vars[i] = len(self.vars) // 2 - 1

            elif name is 'linear':
                # [ch_out, ch_in]
                w = nn.Parameter(torch.ones(*param))
                # gain=1 according to cbfinn's implementation
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                # [ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[0])))
                self.config2vars[i] = len(self.vars) // 2 - 1

            elif name is 'bn':
                # [ch_out]
                w = nn.Parameter(torch.ones(param[0]))
                self.vars.append(w)
                # [ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[0])))
                self.config2vars[i] = len(self.vars) // 2 - 1
                
                # must set requires_grad=False
                running_mean = nn.Parameter(torch.zeros(param[0]), requires_grad=False)
                running_var = nn.Parameter(torch.ones(param[0]), requires_grad=False)
                self.vars_bn.extend([running_mean, running_var])
                self.config2vars_bn[i] = len(self.vars_bn) // 2 - 1


            elif name in ['tanh', 'relu', 'upsample', 'avg_pool2d', 'max_pool2d',
                          'flatten', 'reshape', 'leakyrelu', 'sigmoid']:
                continue
            else:
                raise NotImplementedError
Пример #19
0
    def __init__(self,
                 block,
                 layers,
                 pretrain=False,
                 num_classes=10,
                 stochastic_depth=False,
                 PL=0.5,
                 noise_level=0.001,
                 noise=False):
        self.in_planes = 16
        self.planes = [16, 32, 64]
        self.strides = [1, 2, 2]
        super(InResNet, self).__init__()
        self.noise = noise
        self.block = block
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.pretrain = pretrain
        self.ks = nn.ParameterList([
            nn.Parameter(torch.Tensor(1).uniform_(0.2, 0.25))
            for i in range(layers[0] + layers[1] + layers[2])
        ])
        #        self.ks=nn.ParameterList([nn.Parameter(torch.Tensor(1).uniform_(0.2, 0.25))for i in range(layers[0]+layers[1]+layers[2])]) # Use this line for \lambda-In-ResNet; for 164-layer experiments, use [0.8, 0.9] for In-ResNet or [0.1, 0.2] for \lambda-In-ResNet
        self.stochastic_depth = stochastic_depth
        blocks = []
        n = layers[0] + layers[1] + layers[2]

        if not self.stochastic_depth:
            for i in range(3):
                blocks.append(
                    block(self.in_planes, self.planes[i], self.strides[i]))
                self.in_planes = self.planes[i] * block.expansion
                for j in range(1, layers[i]):
                    blocks.append(block(self.in_planes, self.planes[i]))
        else:
            death_rates = [i / (n - 1) * (1 - PL) for i in range(n)]
            print(death_rates)
            for i in range(3):
                blocks.append(
                    block(self.in_planes,
                          self.planes[i],
                          self.strides[i],
                          death_rate=death_rates[i * layers[0]]))
                self.in_planes = self.planes[i] * block.expansion
                for j in range(1, layers[i]):
                    blocks.append(
                        block(self.in_planes,
                              self.planes[i],
                              death_rate=death_rates[i * layers[0] + j]))
        self.blocks = nn.ModuleList(blocks)
        self.downsample1 = Downsample(16, 64, stride=1)
        self.downsample21 = Downsample(16 * block.expansion,
                                       32 * block.expansion)
        self.downsample31 = Downsample(32 * block.expansion,
                                       64 * block.expansion)

        self.bn = nn.BatchNorm2d(64 * block.expansion)
        self.avgpool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
Пример #20
0
 def __init__(self, depth=1, sizes=[128]):
     super(MLP, self).__init__()
     assert len(sizes) == depth, 'num_layers must match depth!'
     self.depth = depth
     self.sizes = sizes
     self.linear_weights = nn.ParameterList()
Пример #21
0
    def __init__(self,nWay):
        super(Learner, self).__init__()


        self.config = [
         ('conv2d', [32, 1, 3, 3, 2, 0]),
         ('relu', [True]),
         ('bn', [32]),
         ('conv2d', [64, 32, 3, 3, 2, 0]),
         ('relu', [True]),
         ('bn', [64]),
         ('conv2d', [128, 64, 3, 3, 2, 0]),
         ('relu', [True]),
         ('bn', [128]),
         ('conv2d', [128, 128, 2, 2, 1, 0]),
         ('relu', [True]),
         ('bn', [128]),
         ('flatten', []),
         ('linear', [nWay, 128])
         ]


        # this dict contains all tensors needed to be optimized
        self.vars = nn.ParameterList()
        self.vars_bn = nn.ParameterList()

        for i, (name, param) in enumerate(self.config):
            if name is 'conv2d':
                # [ch_out, ch_in, kernelsz, kernelsz]
                w = nn.Parameter(torch.ones(*param[:4]))
                # gain=1 according to cbfin's implementation
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                self.vars.append(nn.Parameter(torch.zeros(param[0])))

            elif name is 'convt2d':
                # [ch_in, ch_out, kernelsz, kernelsz, stride, padding]
                w = nn.Parameter(torch.ones(*param[:4]))
                # gain=1 according to cbfin's implementation
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                # [ch_in, ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[1])))

            elif name is 'linear':
                # [ch_out, ch_in]
                w = nn.Parameter(torch.ones(*param))
                # gain=1 according to cbfinn's implementation
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                # [ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[0])))

            elif name is 'bn':
                # [ch_out]
                w = nn.Parameter(torch.ones(param[0]))
                self.vars.append(w)
                # [ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[0])))

                running_mean = nn.Parameter(torch.zeros(param[0]), requires_grad=False)
                running_var = nn.Parameter(torch.ones(param[0]), requires_grad=False)
                self.vars_bn.extend([running_mean, running_var])


            elif name in ['tanh', 'relu', 'upsample', 'avg_pool2d', 'max_pool2d',
                          'flatten', 'reshape', 'leakyrelu', 'sigmoid']:
                continue
            else:
                raise NotImplementedError
Пример #22
0
    def __init__(self,
                 adjmat_list,
                 input_data_dim,
                 num_agg_steps,
                 vertex_embed_dim,
                 mlp_num_hidden,
                 mlp_hidden_dim,
                 vertices_are_onehot,
                 target_dim,
                 epsilon_tunable=False,
                 dense_layer_dropout=0.0,
                 other_mlp_parameters={}):
        """
        Most parameters defined in the parent class

        :param adjmat_list: List of all adjmats to be considered
        Purpose: force input validation, but not saved to any variable.
        The user will enter the graphs in the dataset.  In principle, the graphs passed to
        initialize could be different than those used in the forward method; it is up 
        to the user to properly do input validation on all desired graphs
        
        This is NOT stored as a self object; rest easy we're not wasting memory

        :param target_dim: Dimension of the response variable (the target)

        :param epsilon_tunable: Do we make epsilon in equation 4.1 tunable
        :param dense_layer_dropout: Dropout to apply to the dense layer.
                                    In accordance with the GIN paper's experimental section
        """

        # Make sure all entered matrices are coo
        def is_coo(mat):
            return isinstance(mat, sps.coo.coo_matrix)

        # Make sure there are ones on the diagonal.
        def diags_all_one(mat):
            return np.array_equal(mat.diagonal(), np.ones(mat.shape[0]))

        assert all(list(map(
            is_coo,
            adjmat_list))), "All adjacency matrices must be scipy sparse coo"
        assert all(list(map(
            diags_all_one,
            adjmat_list))), "All adjacency matrices must have ones on the diag"
        assert isinstance(
            dense_layer_dropout,
            float), "Dense layer dropout must be a float in 0 <= p < 1"
        assert 0 <= dense_layer_dropout < 1, "Dense layer dropout must be a float in 0 <= p < 1"

        super(GinMultiGraph,
              self).__init__(input_data_dim=input_data_dim,
                             num_agg_steps=num_agg_steps,
                             vertex_embed_dim=vertex_embed_dim,
                             mlp_num_hidden=mlp_num_hidden,
                             mlp_hidden_dim=mlp_hidden_dim,
                             vertices_are_onehot=vertices_are_onehot,
                             other_mlp_parameters=other_mlp_parameters)

        self.target_dim = target_dim
        self.add_module("last_linear",
                        nn.Linear(self.graph_embed_dim, target_dim))

        self.epsilon_tunable = epsilon_tunable

        logging.info("Dense layer dropout: {}".format(dense_layer_dropout))
        self.dense_layer_dropout = nn.Dropout(p=dense_layer_dropout)

        if epsilon_tunable:
            logging.info("User indicated: epsilon_tunable = True")
            logging.info("Epsilon_k WILL be LEARNED via backprop")
            logging.info("It is initialized to zero")

            self.epsilons = nn.ParameterList()
            for ll in range(num_agg_steps):
                epsilon_k = nn.Parameter(torch.zeros(1), requires_grad=True)
                self.epsilons.append(epsilon_k)
        else:
            logging.info("User indicated: epsilon_tunable = False")
            logging.info(
                "Epsilon_k WILL NOT be learned via backprop (and set to zero implicitly)"
            )
Пример #23
0
    def __init__(self,
                 in_channels,
                 num_class,
                 graph_cfg,
                 T=300,
                 RAM_encoder_output_channels=128,
                 RAM_decoder_output_channels=64,
                 edge_importance_weighting=True,
                 relative_attention_component=True,
                 geometric_component=True,
                 temporal_kernel_size=9,
                 **kwargs):
        super().__init__()

        self.relative_attention_component = relative_attention_component
        self.geometric_component = geometric_component

        # load graph
        self.graph = Graph(**graph_cfg)
        A = torch.tensor(self.graph.A,
                         dtype=torch.float32,
                         requires_grad=False)
        self.register_buffer('A', A)

        # build networks
        spatial_kernel_size = A.size(0)
        self.temporal_kernel_size = temporal_kernel_size
        kernel_size = (self.temporal_kernel_size, spatial_kernel_size,
                       A.size(1))
        self.data_bn = nn.BatchNorm2d(in_channels)
        kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
        self.st_gcn_networks = nn.ModuleList((
            DRGCBlock(in_channels,
                      64,
                      kernel_size,
                      1,
                      residual=False,
                      **kwargs0),
            DRGCBlock(64, 64, kernel_size, 1, **kwargs),
            DRGCBlock(64, 64, kernel_size, 1, **kwargs),
            DRGCBlock(64, 64, kernel_size, 1, **kwargs),
            DRGCBlock(64, 128, kernel_size, 2, **kwargs),
            DRGCBlock(128, 128, kernel_size, 1, **kwargs),
            DRGCBlock(128, 128, kernel_size, 1, **kwargs),
            DRGCBlock(128, 256, kernel_size, 2, **kwargs),
            DRGCBlock(256, 256, kernel_size, 1, **kwargs),
            DRGCBlock(256, 256, kernel_size, 1, **kwargs),
        ))

        self.RAMGen = RAMGen(3, RAM_encoder_output_channels,
                             RAM_decoder_output_channels, kernel_size, T,
                             self.relative_attention_component,
                             self.geometric_component)

        # initialize parameters for edge importance weighting
        if edge_importance_weighting:
            self.edge_importance = nn.ParameterList([
                nn.Parameter(torch.ones(self.A.size()))
                for i in self.st_gcn_networks
            ])
            # edge importance for RAM_r's encoder and decoder in RAMGen
            self.RAMGen_edge_importance = nn.Parameter(
                torch.ones(self.A.size()))
        else:
            self.edge_importance = [1] * len(self.st_gcn_networks)

        # fcn for prediction
        self.fcn = nn.Conv2d(256, num_class, kernel_size=(1, 1))
Пример #24
0
    def __init__(self, max_length, n_way, type="cnnLinear"):
        '''
        :param max_length:
        :param n_way:
        :param type: "cnnLinear" "concatLinear" "clsLinear"
        '''
        nn.Module.__init__(self)
        self.max_length = max_length
        pretrain_path = './pretrain/bert-base-uncased/'
        self.sentence_embedding = network.embedding.BERTSentenceEmbedding(
            pretrain_path=pretrain_path, max_length=self.max_length)
        self.vars = nn.ParameterList()
        self.n_way = n_way
        self.feature_dim = 768
        self.filter_num = 128
        self.type = type

        self.attention = False
        # kernel size = 2
        # [ch_out, ch_in, kernelsz, kernelsz]
        if type == "pcnnLinear":
            # CNN
            self.filter_sizes = [2, 3, 4, 5]
            for filter_size in self.filter_sizes:
                w = nn.Parameter(
                    torch.ones(self.filter_num, 1, filter_size,
                               self.feature_dim))  # [64,1,3,3]]
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                self.vars.append(nn.Parameter(torch.zeros(self.filter_num)))

            filter_dim = self.filter_num * len([2, 3, 4, 5])
            labels_num = self.n_way

            # dropout
            self.dropout = nn.Dropout(0.5)

            # linear
            w = nn.Parameter(torch.ones(128, filter_dim * 3))
            self.linear = nn.Linear(filter_dim * 3, 128)
            torch.nn.init.kaiming_normal_(w)
            self.vars.append(w)
            # [ch_out]
            self.vars.append(nn.Parameter(torch.zeros(128)))

            w = nn.Parameter(torch.ones(labels_num, 128))
            self.linear = nn.Linear(128, labels_num)
            torch.nn.init.kaiming_normal_(w)
            self.vars.append(w)
            self.vars.append(nn.Parameter(torch.zeros(labels_num)))

        elif self.type == 'cnnLinear':
            # *************attention*****************

            if self.attention:
                w = nn.Parameter(torch.ones(self.feature_dim,
                                            self.feature_dim),
                                 requires_grad=True)
                # self.linear = nn.Linear(filter_dim, labels_num)
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                self.vars.append(
                    nn.Parameter(torch.zeros(self.feature_dim),
                                 requires_grad=True))

                w = nn.Parameter(torch.ones(1, self.feature_dim),
                                 requires_grad=True)
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
            # *************conv*********************
            # kernel size = 2
            # [ch_out, ch_in, kernelsz, kernelsz]
            for filter_size in [2, 3, 4, 5]:
                w = nn.Parameter(torch.ones(self.filter_num, 1, filter_size,
                                            self.feature_dim),
                                 requires_grad=True)  # [64,1,3,3]]
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                self.vars.append(
                    nn.Parameter(torch.zeros(self.filter_num),
                                 requires_grad=True))

            filter_dim = self.filter_num * len([2, 3, 4, 5])
            labels_num = self.n_way

            # dropout
            self.dropout = nn.Dropout(0.5)

            # linear
            w = nn.Parameter(torch.ones(labels_num, filter_dim),
                             requires_grad=True)
            # self.linear = nn.Linear(filter_dim, labels_num)
            torch.nn.init.kaiming_normal_(w)
            self.vars.append(w)
            # [ch_out]
            self.vars.append(
                nn.Parameter(torch.zeros(labels_num), requires_grad=True))

        # linear
        elif self.type == "concatLinear":
            w = nn.Parameter(torch.ones(self.n_way, 1536))
            self.linear = nn.Linear(1536, self.n_way)
            torch.nn.init.kaiming_normal_(w)
            self.vars.append(w)
            self.vars.append(nn.Parameter(torch.zeros(self.n_way)))

        elif self.type == "clsLinear":
            w = nn.Parameter(torch.ones(self.n_way, 768))
            self.linear = nn.Linear(768, self.n_way)
            torch.nn.init.kaiming_normal_(w)
            self.vars.append(w)
            self.vars.append(nn.Parameter(torch.zeros(self.n_way)))

        else:
            raise Exception(
                "Learner type only can be cnnLinear、concatLinear、clsLinear")
Пример #25
0
    def __init__(self,
                 field_size,
                 feature_sizes,
                 embedding_size=4,
                 h_depth=3,
                 deep_layers=[32, 32, 32],
                 is_deep_dropout=True,
                 dropout_deep=[0.5, 0.5, 0.5],
                 use_inner_product=True,
                 use_outer_product=False,
                 deep_layers_activation='relu',
                 n_epochs=64,
                 batch_size=256,
                 learning_rate=0.003,
                 optimizer_type='adam',
                 is_batch_norm=False,
                 verbose=False,
                 random_seed=950104,
                 weight_decay=0.0,
                 loss_type='logloss',
                 eval_metric=roc_auc_score,
                 use_cuda=True,
                 n_class=1,
                 greater_is_better=True):
        super(PNN, self).__init__()
        self.field_size = field_size
        self.feature_sizes = feature_sizes
        self.embedding_size = embedding_size
        self.h_depth = h_depth
        self.deep_layers = deep_layers
        self.is_deep_dropout = is_deep_dropout
        self.dropout_deep = dropout_deep
        self.use_inner_product = use_inner_product
        self.use_outer_product = use_outer_product
        self.deep_layers_activation = deep_layers_activation
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.optimizer_type = optimizer_type
        self.is_batch_norm = is_batch_norm
        self.verbose = verbose
        self.weight_decay = weight_decay
        self.random_seed = random_seed
        self.loss_type = loss_type
        self.eval_metric = eval_metric
        self.use_cuda = use_cuda
        self.n_class = n_class
        self.greater_is_better = greater_is_better

        torch.manual_seed(self.random_seed)
        """
            check cuda
        """
        if self.use_cuda and not torch.cuda.is_available():
            self.use_cuda = False
            print(
                "Cuda is not available, automatically changed into cpu model")
        """
            check use inner_product or outer_product
        """
        if self.use_inner_product and self.use_inner_product:
            print("The model uses both inner product and outer product")
        elif self.use_inner_product:
            print("The model uses inner product (IPNN))")
        elif self.use_ffm:
            print("The model uses outer product (OPNN)")
        else:
            print(
                "The model is sample deep model only! Neither inner product or outer product is used"
            )
        """
            embbedding part
        """
        print("Init embeddings")
        self.embeddings = nn.ModuleList([
            nn.Embedding(feature_size, self.embedding_size)
            for feature_size in self.feature_sizes
        ])
        print("Init embeddings finished")
        """
            first order part (linear part)
        """
        print("Init first order part")
        self.first_order_weight = nn.ModuleList([
            nn.ParameterList([
                torch.nn.Parameter(torch.randn(self.embedding_size),
                                   requires_grad=True)
                for j in range(self.field_size)
            ]) for i in range(self.deep_layers[0])
        ])
        self.bias = torch.nn.Parameter(torch.randn(self.deep_layers[0]),
                                       requires_grad=True)
        print("Init first order part finished")
        """
            second order part (quadratic part)
        """
        print("Init second order part")
        if self.use_inner_product:
            self.inner_second_weight_emb = nn.ModuleList([
                nn.ParameterList([
                    torch.nn.Parameter(torch.randn(self.embedding_size),
                                       requires_grad=True)
                    for j in range(self.field_size)
                ]) for i in range(self.deep_layers[0])
            ])

        if self.use_outer_product:
            arr = []
            for i in range(self.deep_layers[0]):
                tmp = torch.randn(self.embedding_size, self.embedding_size)
                arr.append(torch.nn.Parameter(torch.mm(tmp, tmp.t())))
            self.outer_second_weight_emb = nn.ParameterList(arr)
        print("Init second order part finished")

        print("Init nn part")

        for i, h in enumerate(self.deep_layers[1:], 1):
            setattr(self, 'linear_' + str(i),
                    nn.Linear(self.deep_layers[i - 1], self.deep_layers[i]))
            if self.is_batch_norm:
                setattr(self, 'batch_norm_' + str(i),
                        nn.BatchNorm1d(deep_layers[i]))
            if self.is_deep_dropout:
                setattr(self, 'linear_' + str(i) + '_dropout',
                        nn.Dropout(self.dropout_deep[i]))
        self.deep_last_layer = nn.Linear(self.deep_layers[-1], self.n_class)
        print("Init nn part succeed")

        print("Init succeed")
Пример #26
0
    def train(self):
        self.train_writer = SummaryWriter(logdir=self.save_path)
        dictionary_size = self.dictionary_size
        top1_acc_list_cumul = np.zeros(
            (int(self.args.num_classes / self.args.nb_cl), 4,
             self.args.nb_runs))
        top1_acc_list_ori = np.zeros(
            (int(self.args.num_classes / self.args.nb_cl), 4,
             self.args.nb_runs))
        X_train_total = np.array(self.trainset.train_data)
        Y_train_total = np.array(self.trainset.train_labels)
        X_valid_total = np.array(self.testset.test_data)
        Y_valid_total = np.array(self.testset.test_labels)
        np.random.seed(1993)
        for iteration_total in range(self.args.nb_runs):
            order_name = osp.join(
                self.save_path,
                "seed_{}_{}_order_run_{}.pkl".format(1993, self.args.dataset,
                                                     iteration_total))
            print("Order name:{}".format(order_name))
            if osp.exists(order_name):
                print("Loading orders")
                order = utils.misc.unpickle(order_name)
            else:
                print("Generating orders")
                order = np.arange(self.args.num_classes)
                np.random.shuffle(order)
                utils.misc.savepickle(order, order_name)
            order_list = list(order)
            print(order_list)
        np.random.seed(self.args.random_seed)
        X_valid_cumuls = []
        X_protoset_cumuls = []
        X_train_cumuls = []
        Y_valid_cumuls = []
        Y_protoset_cumuls = []
        Y_train_cumuls = []
        alpha_dr_herding = np.zeros(
            (int(self.args.num_classes / self.args.nb_cl), dictionary_size,
             self.args.nb_cl), np.float32)
        prototypes = np.zeros(
            (self.args.num_classes, dictionary_size, X_train_total.shape[1],
             X_train_total.shape[2], X_train_total.shape[3]))
        for orde in range(self.args.num_classes):
            prototypes[orde, :, :, :, :] = X_train_total[np.where(
                Y_train_total == order[orde])]
        start_iter = int(self.args.nb_cl_fg / self.args.nb_cl) - 1
        for iteration in range(start_iter,
                               int(self.args.num_classes / self.args.nb_cl)):
            if iteration == start_iter:
                last_iter = 0
                tg_model = self.network(num_classes=self.args.nb_cl_fg)
                in_features = tg_model.fc.in_features
                out_features = tg_model.fc.out_features
                print("Out_features:", out_features)
                ref_model = None
                free_model = None
                ref_free_model = None
            elif iteration == start_iter + 1:
                last_iter = iteration
                ref_model = copy.deepcopy(tg_model)
                print("Fusion Mode: " + self.args.fusion_mode)
                tg_model = self.network_mtl(num_classes=self.args.nb_cl_fg)
                ref_dict = ref_model.state_dict()
                tg_dict = tg_model.state_dict()
                tg_dict.update(ref_dict)
                tg_model.load_state_dict(tg_dict)
                tg_model.to(self.device)
                in_features = tg_model.fc.in_features
                out_features = tg_model.fc.out_features
                print("Out_features:", out_features)
                new_fc = modified_linear.SplitCosineLinear(
                    in_features, out_features, self.args.nb_cl)
                new_fc.fc1.weight.data = tg_model.fc.weight.data
                new_fc.sigma.data = tg_model.fc.sigma.data
                tg_model.fc = new_fc
                lamda_mult = out_features * 1.0 / self.args.nb_cl
            else:
                last_iter = iteration
                ref_model = copy.deepcopy(tg_model)
                in_features = tg_model.fc.in_features
                out_features1 = tg_model.fc.fc1.out_features
                out_features2 = tg_model.fc.fc2.out_features
                print("Out_features:", out_features1 + out_features2)
                new_fc = modified_linear.SplitCosineLinear(
                    in_features, out_features1 + out_features2,
                    self.args.nb_cl)
                new_fc.fc1.weight.data[:
                                       out_features1] = tg_model.fc.fc1.weight.data
                new_fc.fc1.weight.data[
                    out_features1:] = tg_model.fc.fc2.weight.data
                new_fc.sigma.data = tg_model.fc.sigma.data
                tg_model.fc = new_fc
                lamda_mult = (out_features1 +
                              out_features2) * 1.0 / (self.args.nb_cl)
            if iteration > start_iter:
                cur_lamda = self.args.lamda * math.sqrt(lamda_mult)
            else:
                cur_lamda = self.args.lamda
            actual_cl = order[range(last_iter * self.args.nb_cl,
                                    (iteration + 1) * self.args.nb_cl)]
            indices_train_10 = np.array([
                i in order[range(last_iter * self.args.nb_cl,
                                 (iteration + 1) * self.args.nb_cl)]
                for i in Y_train_total
            ])
            indices_test_10 = np.array([
                i in order[range(last_iter * self.args.nb_cl,
                                 (iteration + 1) * self.args.nb_cl)]
                for i in Y_valid_total
            ])
            X_train = X_train_total[indices_train_10]
            X_valid = X_valid_total[indices_test_10]
            X_valid_cumuls.append(X_valid)
            X_train_cumuls.append(X_train)
            X_valid_cumul = np.concatenate(X_valid_cumuls)
            X_train_cumul = np.concatenate(X_train_cumuls)
            Y_train = Y_train_total[indices_train_10]
            Y_valid = Y_valid_total[indices_test_10]
            Y_valid_cumuls.append(Y_valid)
            Y_train_cumuls.append(Y_train)
            Y_valid_cumul = np.concatenate(Y_valid_cumuls)
            Y_train_cumul = np.concatenate(Y_train_cumuls)
            if iteration == start_iter:
                X_valid_ori = X_valid
                Y_valid_ori = Y_valid
            else:
                X_protoset = np.concatenate(X_protoset_cumuls)
                Y_protoset = np.concatenate(Y_protoset_cumuls)
                if self.args.rs_ratio > 0:
                    scale_factor = (len(X_train) * self.args.rs_ratio) / (
                        len(X_protoset) * (1 - self.args.rs_ratio))
                    rs_sample_weights = np.concatenate(
                        (np.ones(len(X_train)),
                         np.ones(len(X_protoset)) * scale_factor))
                    rs_num_samples = int(
                        len(X_train) / (1 - self.args.rs_ratio))
                    print(
                        "X_train:{}, X_protoset:{}, rs_num_samples:{}".format(
                            len(X_train), len(X_protoset), rs_num_samples))
                X_train = np.concatenate((X_train, X_protoset), axis=0)
                Y_train = np.concatenate((Y_train, Y_protoset))
            print('Batch of classes number {0} arrives'.format(iteration + 1))
            map_Y_train = np.array([order_list.index(i) for i in Y_train])
            map_Y_valid_cumul = np.array(
                [order_list.index(i) for i in Y_valid_cumul])
            is_start_iteration = (iteration == start_iter)
            if iteration > start_iter:
                old_embedding_norm = tg_model.fc.fc1.weight.data.norm(
                    dim=1, keepdim=True)
                average_old_embedding_norm = torch.mean(old_embedding_norm,
                                                        dim=0).to('cpu').type(
                                                            torch.DoubleTensor)
                tg_feature_model = nn.Sequential(
                    *list(tg_model.children())[:-1])
                num_features = tg_model.fc.in_features
                novel_embedding = torch.zeros((self.args.nb_cl, num_features))
                for cls_idx in range(iteration * self.args.nb_cl,
                                     (iteration + 1) * self.args.nb_cl):
                    cls_indices = np.array([i == cls_idx for i in map_Y_train])
                    assert (len(
                        np.where(cls_indices == 1)[0]) == dictionary_size)
                    self.evalset.test_data = X_train[cls_indices].astype(
                        'uint8')
                    self.evalset.test_labels = np.zeros(
                        self.evalset.test_data.shape[0])
                    evalloader = torch.utils.data.DataLoader(
                        self.evalset,
                        batch_size=self.args.eval_batch_size,
                        shuffle=False,
                        num_workers=self.args.num_workers)
                    num_samples = self.evalset.test_data.shape[0]
                    cls_features = compute_features(tg_model, free_model,
                                                    tg_feature_model,
                                                    is_start_iteration,
                                                    evalloader, num_samples,
                                                    num_features)
                    norm_features = F.normalize(torch.from_numpy(cls_features),
                                                p=2,
                                                dim=1)
                    cls_embedding = torch.mean(norm_features, dim=0)
                    novel_embedding[cls_idx -
                                    iteration * self.args.nb_cl] = F.normalize(
                                        cls_embedding, p=2,
                                        dim=0) * average_old_embedding_norm
                tg_model.to(self.device)
                tg_model.fc.fc2.weight.data = novel_embedding.to(self.device)
            self.trainset.train_data = X_train.astype('uint8')
            self.trainset.train_labels = map_Y_train
            if iteration > start_iter and self.args.rs_ratio > 0 and scale_factor > 1:
                print("Weights from sampling:", rs_sample_weights)
                index1 = np.where(rs_sample_weights > 1)[0]
                index2 = np.where(map_Y_train < iteration * self.args.nb_cl)[0]
                assert ((index1 == index2).all())
                train_sampler = torch.utils.data.sampler.WeightedRandomSampler(
                    rs_sample_weights, rs_num_samples)
                trainloader = torch.utils.data.DataLoader(
                    self.trainset,
                    batch_size=self.args.train_batch_size,
                    shuffle=False,
                    sampler=train_sampler,
                    num_workers=self.args.num_workers)
            else:
                trainloader = torch.utils.data.DataLoader(
                    self.trainset,
                    batch_size=self.args.train_batch_size,
                    shuffle=True,
                    num_workers=self.args.num_workers)
            self.testset.test_data = X_valid_cumul.astype('uint8')
            self.testset.test_labels = map_Y_valid_cumul
            testloader = torch.utils.data.DataLoader(
                self.testset,
                batch_size=self.args.test_batch_size,
                shuffle=False,
                num_workers=self.args.num_workers)
            print('Max and min of train labels: {}, {}'.format(
                min(map_Y_train), max(map_Y_train)))
            print('Max and min of valid labels: {}, {}'.format(
                min(map_Y_valid_cumul), max(map_Y_valid_cumul)))
            ckp_name = osp.join(
                self.save_path,
                'run_{}_iteration_{}_model.pth'.format(iteration_total,
                                                       iteration))
            ckp_name_free = osp.join(
                self.save_path, 'run_{}_iteration_{}_free_model.pth'.format(
                    iteration_total, iteration))
            print('Checkpoint name:', ckp_name)
            if iteration == start_iter and self.args.resume_fg:
                print("Loading first group models from checkpoint")
                tg_model = torch.load(self.args.ckpt_dir_fg)
            elif self.args.resume and os.path.exists(ckp_name):
                print("Loading models from checkpoint")
                tg_model = torch.load(ckp_name)
            else:
                if iteration > start_iter:
                    ref_model = ref_model.to(self.device)
                    ignored_params = list(map(id,
                                              tg_model.fc.fc1.parameters()))
                    base_params = filter(lambda p: id(p) not in ignored_params,
                                         tg_model.parameters())
                    base_params = filter(lambda p: p.requires_grad,
                                         base_params)
                    base_params = filter(lambda p: p.requires_grad,
                                         base_params)
                    tg_params_new = [{
                        'params':
                        base_params,
                        'lr':
                        self.args.base_lr2,
                        'weight_decay':
                        self.args.custom_weight_decay
                    }, {
                        'params': tg_model.fc.fc1.parameters(),
                        'lr': 0,
                        'weight_decay': 0
                    }]
                    tg_model = tg_model.to(self.device)
                    tg_optimizer = optim.SGD(
                        tg_params_new,
                        lr=self.args.base_lr2,
                        momentum=self.args.custom_momentum,
                        weight_decay=self.args.custom_weight_decay)
                else:
                    tg_params = tg_model.parameters()
                    tg_model = tg_model.to(self.device)
                    tg_optimizer = optim.SGD(
                        tg_params,
                        lr=self.args.base_lr1,
                        momentum=self.args.custom_momentum,
                        weight_decay=self.args.custom_weight_decay)
                if iteration > start_iter:
                    tg_lr_scheduler = lr_scheduler.MultiStepLR(
                        tg_optimizer,
                        milestones=self.lr_strat,
                        gamma=self.args.lr_factor)
                else:
                    tg_lr_scheduler = lr_scheduler.MultiStepLR(
                        tg_optimizer,
                        milestones=self.lr_strat_first_phase,
                        gamma=self.args.lr_factor)
                print("Incremental train")
                if iteration > start_iter:
                    tg_model = incremental_train_and_eval(
                        self.args.epochs, tg_model, ref_model, free_model,
                        ref_free_model, tg_optimizer, tg_lr_scheduler,
                        trainloader, testloader, iteration, start_iter,
                        cur_lamda, self.args.dist, self.args.K,
                        self.args.lw_mr)
                else:
                    tg_model = incremental_train_and_eval(
                        self.args.epochs, tg_model, ref_model, free_model,
                        ref_free_model, tg_optimizer, tg_lr_scheduler,
                        trainloader, testloader, iteration, start_iter,
                        cur_lamda, self.args.dist, self.args.K,
                        self.args.lw_mr)
                torch.save(tg_model, ckp_name)
            if self.args.fix_budget:
                nb_protos_cl = int(
                    np.ceil(self.args.nb_protos * 100. / self.args.nb_cl /
                            (iteration + 1)))
            else:
                nb_protos_cl = self.args.nb_protos
            tg_feature_model = nn.Sequential(*list(tg_model.children())[:-1])
            num_features = tg_model.fc.in_features
            for iter_dico in range(last_iter * self.args.nb_cl,
                                   (iteration + 1) * self.args.nb_cl):
                self.evalset.test_data = prototypes[iter_dico].astype('uint8')
                self.evalset.test_labels = np.zeros(
                    self.evalset.test_data.shape[0])
                evalloader = torch.utils.data.DataLoader(
                    self.evalset,
                    batch_size=self.args.eval_batch_size,
                    shuffle=False,
                    num_workers=self.args.num_workers)
                num_samples = self.evalset.test_data.shape[0]
                mapped_prototypes = compute_features(tg_model, free_model,
                                                     tg_feature_model,
                                                     is_start_iteration,
                                                     evalloader, num_samples,
                                                     num_features)
                D = mapped_prototypes.T
                D = D / np.linalg.norm(D, axis=0)
                mu = np.mean(D, axis=1)
                index1 = int(iter_dico / self.args.nb_cl)
                index2 = iter_dico % self.args.nb_cl
                alpha_dr_herding[index1, :,
                                 index2] = alpha_dr_herding[index1, :,
                                                            index2] * 0
                w_t = mu
                iter_herding = 0
                iter_herding_eff = 0
                while not (np.sum(alpha_dr_herding[index1, :, index2] != 0)
                           == min(nb_protos_cl,
                                  500)) and iter_herding_eff < 1000:
                    tmp_t = np.dot(w_t, D)
                    ind_max = np.argmax(tmp_t)

                    iter_herding_eff += 1
                    if alpha_dr_herding[index1, ind_max, index2] == 0:
                        alpha_dr_herding[index1, ind_max,
                                         index2] = 1 + iter_herding
                        iter_herding += 1
                    w_t = w_t + mu - D[:, ind_max]
            X_protoset_cumuls = []
            Y_protoset_cumuls = []
            class_means = np.zeros((64, 100, 2))
            for iteration2 in range(iteration + 1):
                for iter_dico in range(self.args.nb_cl):
                    current_cl = order[range(iteration2 * self.args.nb_cl,
                                             (iteration2 + 1) *
                                             self.args.nb_cl)]
                    self.evalset.test_data = prototypes[
                        iteration2 * self.args.nb_cl +
                        iter_dico].astype('uint8')
                    self.evalset.test_labels = np.zeros(
                        self.evalset.test_data.shape[0])  #zero labels
                    evalloader = torch.utils.data.DataLoader(
                        self.evalset,
                        batch_size=self.args.eval_batch_size,
                        shuffle=False,
                        num_workers=self.args.num_workers)
                    num_samples = self.evalset.test_data.shape[0]
                    mapped_prototypes = compute_features(
                        tg_model, free_model, tg_feature_model,
                        is_start_iteration, evalloader, num_samples,
                        num_features)
                    D = mapped_prototypes.T
                    D = D / np.linalg.norm(D, axis=0)
                    self.evalset.test_data = prototypes[
                        iteration2 * self.args.nb_cl +
                        iter_dico][:, :, :, ::-1].astype('uint8')
                    evalloader = torch.utils.data.DataLoader(
                        self.evalset,
                        batch_size=self.args.eval_batch_size,
                        shuffle=False,
                        num_workers=self.args.num_workers)
                    mapped_prototypes2 = compute_features(
                        tg_model, free_model, tg_feature_model,
                        is_start_iteration, evalloader, num_samples,
                        num_features)
                    D2 = mapped_prototypes2.T
                    D2 = D2 / np.linalg.norm(D2, axis=0)
                    alph = alpha_dr_herding[iteration2, :, iter_dico]
                    alph = (alph > 0) * (alph < nb_protos_cl + 1) * 1.
                    X_protoset_cumuls.append(
                        prototypes[iteration2 * self.args.nb_cl + iter_dico,
                                   np.where(alph == 1)[0]])
                    Y_protoset_cumuls.append(
                        order[iteration2 * self.args.nb_cl + iter_dico] *
                        np.ones(len(np.where(alph == 1)[0])))
                    alph = alph / np.sum(alph)
                    class_means[:, current_cl[iter_dico],
                                0] = (np.dot(D, alph) + np.dot(D2, alph)) / 2
                    class_means[:, current_cl[iter_dico], 0] /= np.linalg.norm(
                        class_means[:, current_cl[iter_dico], 0])
                    alph = np.ones(dictionary_size) / dictionary_size
                    class_means[:, current_cl[iter_dico],
                                1] = (np.dot(D, alph) + np.dot(D2, alph)) / 2
                    class_means[:, current_cl[iter_dico], 1] /= np.linalg.norm(
                        class_means[:, current_cl[iter_dico], 1])
            current_means = class_means[:, order[range(0, (iteration + 1) *
                                                       self.args.nb_cl)]]
            X_protoset_array_old = np.array(X_protoset_cumuls)
            self.T = self.args.mnemonics_steps * self.args.mnemonics_epochs
            self.img_size = 32
            self.mnemonics_lrs = self.args.mnemonics_lr
            num_classes_incremental = self.args.nb_cl
            num_classes = self.args.nb_cl_fg
            nb_cl = self.args.nb_cl
            transform_proto = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.5071, 0.4866, 0.4409),
                                     (0.2009, 0.1984, 0.2023)),
            ])
            self.mnemonics_label = []
            if iteration == start_iter:
                the_X_protoset_array = np.array(X_protoset_cumuls).astype(
                    'uint8')
                the_Y_protoset_cumuls = np.array(Y_protoset_cumuls)
            else:
                the_X_protoset_array = np.array(
                    X_protoset_cumuls[-num_classes_incremental:]).astype(
                        'uint8')
                the_Y_protoset_cumuls = np.array(
                    Y_protoset_cumuls[-num_classes_incremental:])
            self.mnemonics_data = torch.zeros(the_X_protoset_array.shape[0],
                                              the_X_protoset_array.shape[1], 3,
                                              self.img_size, self.img_size)
            for idx1 in range(the_X_protoset_array.shape[0]):
                for idx2 in range(the_X_protoset_array.shape[1]):
                    the_img = the_X_protoset_array[idx1][idx2]
                    the_PIL_image = Image.fromarray(the_img)
                    the_PIL_image = transform_proto(the_PIL_image)
                    self.mnemonics_data[idx1][idx2] = the_PIL_image
                map_Y_label = self.map_labels(order_list,
                                              the_Y_protoset_cumuls[idx1])
                self.mnemonics_label.append(map_Y_label)
            self.mnemonics = nn.ParameterList()
            self.mnemonics.append(nn.Parameter(self.mnemonics_data))
            start_iteration = start_iter
            device = self.device
            self.mnemonics.to(device)
            tg_feature_model = nn.Sequential(*list(tg_model.children())[:-1])
            tg_feature_model.eval()
            tg_model.eval()
            if free_model is not None:
                free_model.eval()
            self.mnemonics_optimizer = optim.SGD(
                self.mnemonics,
                lr=self.args.mnemonics_outer_lr,
                momentum=0.9,
                weight_decay=5e-4)
            self.mnemonics_lr_scheduler = optim.lr_scheduler.StepLR(
                self.mnemonics_optimizer,
                step_size=self.args.mnemonics_decay_epochs,
                gamma=self.args.mnemonics_decay_factor)
            current_means_new = current_means[:, :, 0].T
            for epoch in range(self.args.mnemonics_total_epochs):
                train_loss = 0
                self.mnemonics_lr_scheduler.step()
                for batch_idx, (q_inputs, q_targets) in enumerate(trainloader):
                    q_inputs, q_targets = q_inputs.to(device), q_targets.to(
                        device)
                    if iteration == start_iteration:
                        q_feature = tg_feature_model(q_inputs)
                    else:
                        q_feature = process_inputs_fp(tg_model,
                                                      free_model,
                                                      q_inputs,
                                                      feature_mode=True)
                    self.mnemonics_optimizer.zero_grad()
                    total_tr_loss = 0
                    if iteration == start_iteration:
                        mnemonics_outputs = tg_feature_model(
                            self.mnemonics[0][0])
                    else:
                        mnemonics_outputs = process_inputs_fp(
                            tg_model,
                            free_model,
                            self.mnemonics[0][0],
                            feature_mode=True)
                    this_class_mean_mnemonics = torch.mean(mnemonics_outputs,
                                                           dim=0)
                    this_class_mean_mnemonics = torch.squeeze(
                        this_class_mean_mnemonics)
                    total_class_mean_mnemonics = this_class_mean_mnemonics.unsqueeze(
                        dim=0)
                    for mnemonics_idx in range(len(self.mnemonics[0]) - 1):
                        if iteration == start_iteration:
                            mnemonics_outputs = tg_feature_model(
                                self.mnemonics[0][mnemonics_idx + 1])
                        else:
                            mnemonics_outputs = process_inputs_fp(
                                tg_model,
                                free_model,
                                self.mnemonics[0][mnemonics_idx + 1],
                                feature_mode=True)
                        this_class_mean_mnemonics = torch.mean(
                            mnemonics_outputs, dim=0)
                        this_class_mean_mnemonics = torch.squeeze(
                            this_class_mean_mnemonics)
                        total_class_mean_mnemonics = torch.cat(
                            (total_class_mean_mnemonics,
                             this_class_mean_mnemonics.unsqueeze(dim=0)),
                            dim=0)
                    if iteration == start_iteration:
                        all_cls_means = total_class_mean_mnemonics
                    else:
                        all_cls_means = torch.tensor(
                            current_means_new).float().to(device)
                        all_cls_means[-nb_cl:] = total_class_mean_mnemonics
                    the_logits = F.linear(
                        F.normalize(torch.squeeze(q_feature), p=2, dim=1),
                        F.normalize(all_cls_means, p=2, dim=1))
                    loss = F.cross_entropy(the_logits, q_targets)
                    loss.backward()
                    self.mnemonics_optimizer.step()
                    train_loss += loss.item()
            X_protoset_cumuls = process_mnemonics(
                X_protoset_cumuls, Y_protoset_cumuls, self.mnemonics,
                self.mnemonics_label, order_list, self.args.nb_cl_fg,
                self.args.nb_cl, iteration, start_iter)
            X_protoset_array = np.array(X_protoset_cumuls)
            X_protoset_cumuls_idx = 0
            for iteration2 in range(iteration + 1):
                for iter_dico in range(self.args.nb_cl):
                    alph = alpha_dr_herding[iteration2, :, iter_dico]
                    alph = (alph > 0) * (alph < nb_protos_cl + 1) * 1.
                    this_X_protoset_array = X_protoset_array[
                        X_protoset_cumuls_idx]
                    X_protoset_cumuls_idx += 1
                    this_X_protoset_array = this_X_protoset_array.astype(
                        np.float64)
                    prototypes[iteration2 * self.args.nb_cl + iter_dico,
                               np.where(alph == 1)[0]] = this_X_protoset_array
            class_means = np.zeros((64, 100, 2))
            for iteration2 in range(iteration + 1):
                for iter_dico in range(self.args.nb_cl):
                    current_cl = order[range(iteration2 * self.args.nb_cl,
                                             (iteration2 + 1) *
                                             self.args.nb_cl)]
                    self.evalset.test_data = prototypes[
                        iteration2 * self.args.nb_cl +
                        iter_dico].astype('uint8')
                    self.evalset.test_labels = np.zeros(
                        self.evalset.test_data.shape[0])  #zero labels
                    evalloader = torch.utils.data.DataLoader(
                        self.evalset,
                        batch_size=self.args.eval_batch_size,
                        shuffle=False,
                        num_workers=self.args.num_workers)
                    num_samples = self.evalset.test_data.shape[0]
                    mapped_prototypes = compute_features(
                        tg_model, free_model, tg_feature_model,
                        is_start_iteration, evalloader, num_samples,
                        num_features)
                    D = mapped_prototypes.T
                    D = D / np.linalg.norm(D, axis=0)
                    self.evalset.test_data = prototypes[
                        iteration2 * self.args.nb_cl +
                        iter_dico][:, :, :, ::-1].astype('uint8')
                    evalloader = torch.utils.data.DataLoader(
                        self.evalset,
                        batch_size=self.args.eval_batch_size,
                        shuffle=False,
                        num_workers=self.args.num_workers)
                    mapped_prototypes2 = compute_features(
                        tg_model, free_model, tg_feature_model,
                        is_start_iteration, evalloader, num_samples,
                        num_features)
                    D2 = mapped_prototypes2.T
                    D2 = D2 / np.linalg.norm(D2, axis=0)
                    alph = alpha_dr_herding[iteration2, :, iter_dico]
                    alph = (alph > 0) * (alph < nb_protos_cl + 1) * 1.
                    alph = alph / np.sum(alph)
                    class_means[:, current_cl[iter_dico],
                                0] = (np.dot(D, alph) + np.dot(D2, alph)) / 2
                    class_means[:, current_cl[iter_dico], 0] /= np.linalg.norm(
                        class_means[:, current_cl[iter_dico], 0])
                    alph = np.ones(dictionary_size) / dictionary_size
                    class_means[:, current_cl[iter_dico],
                                1] = (np.dot(D, alph) + np.dot(D2, alph)) / 2
                    class_means[:, current_cl[iter_dico], 1] /= np.linalg.norm(
                        class_means[:, current_cl[iter_dico], 1])
            torch.save(
                class_means,
                osp.join(
                    self.save_path,
                    'run_{}_iteration_{}_class_means.pth'.format(
                        iteration_total, iteration)))
            current_means = class_means[:, order[range(0, (iteration + 1) *
                                                       self.args.nb_cl)]]
            is_start_iteration = (iteration == start_iter)
            map_Y_valid_ori = np.array(
                [order_list.index(i) for i in Y_valid_ori])
            print('Computing accuracy for first-phase classes')
            self.evalset.test_data = X_valid_ori.astype('uint8')
            self.evalset.test_labels = map_Y_valid_ori
            evalloader = torch.utils.data.DataLoader(
                self.evalset,
                batch_size=self.args.eval_batch_size,
                shuffle=False,
                num_workers=self.args.num_workers)
            ori_acc, fast_fc = compute_accuracy(
                tg_model,
                free_model,
                tg_feature_model,
                current_means,
                X_protoset_cumuls,
                Y_protoset_cumuls,
                evalloader,
                order_list,
                is_start_iteration=is_start_iteration,
                maml_lr=self.args.maml_lr,
                maml_epoch=self.args.maml_epoch)
            top1_acc_list_ori[iteration, :,
                              iteration_total] = np.array(ori_acc).T
            self.train_writer.add_scalar('ori_acc/LwF', float(ori_acc[0]),
                                         iteration)
            self.train_writer.add_scalar('ori_acc/iCaRL', float(ori_acc[1]),
                                         iteration)
            map_Y_valid_cumul = np.array(
                [order_list.index(i) for i in Y_valid_cumul])
            print('Computing accuracy for all seen classes')
            self.evalset.test_data = X_valid_cumul.astype('uint8')
            self.evalset.test_labels = map_Y_valid_cumul
            evalloader = torch.utils.data.DataLoader(
                self.evalset,
                batch_size=self.args.eval_batch_size,
                shuffle=False,
                num_workers=self.args.num_workers)
            cumul_acc, _ = compute_accuracy(
                tg_model,
                free_model,
                tg_feature_model,
                current_means,
                X_protoset_cumuls,
                Y_protoset_cumuls,
                evalloader,
                order_list,
                is_start_iteration=is_start_iteration,
                fast_fc=fast_fc,
                maml_lr=self.args.maml_lr,
                maml_epoch=self.args.maml_epoch)
            top1_acc_list_cumul[iteration, :,
                                iteration_total] = np.array(cumul_acc).T
            self.train_writer.add_scalar('cumul_acc/LwF', float(cumul_acc[0]),
                                         iteration)
            self.train_writer.add_scalar('cumul_acc/iCaRL',
                                         float(cumul_acc[1]), iteration)
        torch.save(
            top1_acc_list_ori,
            osp.join(self.save_path,
                     'run_{}_top1_acc_list_ori.pth'.format(iteration_total)))
        torch.save(
            top1_acc_list_cumul,
            osp.join(self.save_path,
                     'run_{}_top1_acc_list_cumul.pth'.format(iteration_total)))
        self.train_writer.close
Пример #27
0
    def __init__(self, target_shapes, chunk_size, chunk_emb_size=8,
                 cond_chunk_embs=False, uncond_in_size=0, cond_in_size=8,
                 layers=(100, 100), verbose=True, activation_fn=torch.nn.ReLU(),
                 use_bias=True, no_uncond_weights=False, no_cond_weights=False,
                 num_cond_embs=1, dropout_rate=-1, use_spectral_norm=False,
                 use_batch_norm=False):
        # FIXME find a way using super to handle multiple inheritance.
        nn.Module.__init__(self)
        HyperNetInterface.__init__(self)

        assert isinstance(chunk_size, int) and chunk_size > 0
        assert isinstance(chunk_emb_size, int) and chunk_emb_size > 0

        ### Make constructor arguments internally available ###
        self._chunk_size = chunk_size
        self._chunk_emb_size = chunk_emb_size
        self._cond_chunk_embs = cond_chunk_embs
        self._uncond_in_size = uncond_in_size
        self._cond_in_size = cond_in_size
        self._no_uncond_weights = no_uncond_weights
        self._no_cond_weights = no_cond_weights
        self._num_cond_embs = num_cond_embs

        ### Create underlying full hypernet ###
        # Note, even if chunk embeddings are considered conditional, they
        # are maintained in this object and just fed as an external input to the
        # underlying hnet.
        hnet_uncond_in_size = uncond_in_size + chunk_emb_size
        hnet_num_cond_embs = num_cond_embs
        if cond_chunk_embs and cond_in_size == 0:
            # If there are no other conditional embeddings except the chunk
            # embeddings, we tell the underlying hnet explicitly that it doesn't
            # need to maintain any conditional weights to avoid that it will
            # throw a warning.
            hnet_num_cond_embs = 0
        self._hnet = HMLP([[chunk_size]], uncond_in_size=hnet_uncond_in_size,
            cond_in_size=cond_in_size, layers=layers, verbose=False,
            activation_fn=activation_fn, use_bias=use_bias,
            no_uncond_weights=no_uncond_weights,
            no_cond_weights=no_cond_weights, num_cond_embs=hnet_num_cond_embs,
            dropout_rate=dropout_rate, use_spectral_norm=use_spectral_norm,
            use_batch_norm=use_batch_norm)

        ### Setup attributes required by interface ###
        # Most of these attributes are taken over from `self._hnet`
        self._target_shapes = target_shapes
        self._num_known_conds = self._num_cond_embs
        self._unconditional_param_shapes_ref = \
            list(self._hnet._unconditional_param_shapes_ref)

        if self._hnet._internal_params is not None:
            self._internal_params = \
                nn.ParameterList(self._hnet._internal_params)
        self._param_shapes = list(self._hnet._param_shapes)
        self._param_shapes_meta = list(self._hnet._param_shapes_meta)
        if self._hnet._hyper_shapes_learned is not None:
            self._hyper_shapes_learned = list(self._hnet._hyper_shapes_learned)
            self._hyper_shapes_learned_ref = \
                list(self._hnet._hyper_shapes_learned_ref)
        if self._hnet._hyper_shapes_distilled is not None:
            self._hyper_shapes_distilled = \
                list(self._hnet._hyper_shapes_distilled)
        self._has_bias = self._hnet._has_bias
        self._has_fc_out = self._hnet._has_fc_out
        # Just to make that clear explicitly. We will additionally append
        # the chunk embeddings at the end of `param_shapes`.
        # We don't prepend it to the beginning, to keep conditional input
        # embeddings at the beginning.
        self._mask_fc_out = False
        self._has_linear_out = self._hnet._has_linear_out
        self._layer_weight_tensors = \
            nn.ParameterList(self._hnet._layer_weight_tensors)
        self._layer_bias_vectors = \
            nn.ParameterList(self._hnet._layer_bias_vectors)
        if self._hnet._batchnorm_layers is not None:
            self._batchnorm_layers = nn.ModuleList(self._hnet._batchnorm_layers)
        if self._hnet._context_mod_layers is not None:
            self._context_mod_layers = \
                nn.ModuleList(self._hnet._context_mod_layers)

        ### Create chunk embeddings ###
        if cond_in_size == 0 and uncond_in_size == 0 and not cond_chunk_embs:
            # Note, we could also allow this case. It would be analoguous to
            # creating a full hypernet with no unconditional input and one
            # conditional embedding. But the user can explicitly achieve that
            # as noted below.
            raise ValueError('If no external (conditional or unconditional) ' +
                             'input is provided to the hypernetwork, then ' +
                             'it can only learn a fixed output. If this ' +
                             'behavior is desired, please enable ' +
                             '"cond_chunk_embs" and set "num_cond_embs=1".')

        num_cemb_mats = 1
        no_cemb_weights = no_uncond_weights
        if cond_chunk_embs:
            num_cemb_mats = num_cond_embs
            no_cemb_weights = no_cond_weights

        self._cemb_shape = [self.num_chunks, chunk_emb_size]

        for _ in range(num_cemb_mats):
            if not no_cemb_weights:
                self._internal_params.append(nn.Parameter( \
                    data=torch.Tensor(*self._cemb_shape), requires_grad=True))
                torch.nn.init.normal_(self._internal_params[-1], mean=0.,
                                      std=1.)
            else:
                self._hyper_shapes_learned.append(self._cemb_shape)
                self._hyper_shapes_learned_ref.append(len(self.param_shapes))

            if not cond_chunk_embs:
                self._unconditional_param_shapes_ref.append( \
                    len(self.param_shapes))

            self._param_shapes.append(self._cemb_shape)
            # In principle, these embeddings also belong to the input, so we
            # just assign them as "layer" 0 (note, the underlying hnet uses the
            # same layer ID for its embeddings.
            self._param_shapes_meta.append({
                'name': 'embedding',
                'index': -1 if no_cemb_weights else \
                    len(self._internal_params)-1,
                'layer': 0,
                'info': 'chunk embeddings'
            })

        ### Finalize construction ###
        self._is_properly_setup()

        if verbose:
            print('Created Chunked MLP Hypernet with %d chunk(s) of size %d.' \
                  % (self.num_chunks, chunk_size))
            print(self)
Пример #28
0
 def __init__(self, f, params=None):
     if params is None:
         params = ()
     super(FuncModule, self).__init__()
     self.f = f
     self.params = nn.ParameterList(list(params))
Пример #29
0
 def __init__(self):
     super(MyListDense, self).__init__()
     self.params = nn.ParameterList([nn.Parameter(torch.randn(4, 4)) for i in range(3)])
     self.params.append(nn.Parameter(torch.randn(4, 1)))
Пример #30
0
    def __init__(self, config, imgc, imgsz):
        """
        :param config: network config file, type:list of (string, list)
        :param imgc: 1 or 3
        :param imgsz:  28 or 84
        """
        super(Learner, self).__init__()
        self.config = config
        self.vars = nn.ParameterList()
        self.vars_bn = nn.ParameterList()
        for i, (name, param) in enumerate(self.config):
            if name is 'conv2d':
                # [ch_out, ch_in, kernelsz, kernelsz]
                w = nn.Parameter(torch.ones(*param[:4]))
                # gain=1 according to cbfin's implementation
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                # [ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[0])))

            elif name is 'convt2d':
                # [ch_in, ch_out, kernelsz, kernelsz, stride, padding]
                w = nn.Parameter(torch.ones(*param[:4]))
                # gain=1 according to cbfin's implementation
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                # [ch_in, ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[1])))

            elif name is 'linear':
                # [ch_out, ch_in]
                w = nn.Parameter(torch.ones(*param))
                # gain=1 according to cbfinn's implementation
                torch.nn.init.kaiming_normal_(w)
                self.vars.append(w)
                # [ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[0])))

            elif name is 'bn':
                # [ch_out]
                w = nn.Parameter(torch.ones(param[0]))
                self.vars.append(w)
                # [ch_out]
                self.vars.append(nn.Parameter(torch.zeros(param[0])))
                # must set requires_grad=False
                running_mean = nn.Parameter(torch.zeros(param[0]),
                                            requires_grad=False)
                running_var = nn.Parameter(torch.ones(param[0]),
                                           requires_grad=False)
                self.vars_bn.extend([running_mean, running_var])

            elif name in [
                    'tanh', 'relu', 'upsample', 'avg_pool2d', 'max_pool2d',
                    'flatten', 'reshape', 'leakyrelu', 'sigmoid'
            ]:
                continue
            else:
                raise NotImplementedError

        for p in self.vars_bn:
            p.requires_grad = False
        pass