def __init__(self, params, inputdim, nclasses, l2reg=0., batch_size=64, seed=1111, cudaEfficient=False): super(self.__class__, self).__init__(inputdim, nclasses, l2reg, batch_size, seed, cudaEfficient) """ PARAMETERS: -nhid: number of hidden units (0: Logistic Regression) -optim: optimizer ("sgd,lr=0.1", "adam", "rmsprop" ..) -tenacity: how many times dev acc does not increase before stopping -epoch_size: each epoch corresponds to epoch_size pass on the train set -max_epoch: max number of epoches -dropout: dropout for MLP """ self.nhid = 0 if "nhid" not in params else params["nhid"] self.optim = "adam" if "optim" not in params else params["optim"] self.tenacity = 5 if "tenacity" not in params else params["tenacity"] self.epoch_size = 4 if "epoch_size" not in params else params[ "epoch_size"] self.max_epoch = 200 if "max_epoch" not in params else params[ "max_epoch"] self.dropout = 0. if "dropout" not in params else params["dropout"] self.batch_size = 64 if "batch_size" not in params else params[ "batch_size"] if params["nhid"] == 0: self.model = nn.Sequential(nn.Linear(self.inputdim, self.nclasses), ).cuda() else: ''' self.model = nn.Sequential( nn.Linear(self.inputdim, params["nhid"]), nn.Dropout(p=self.dropout), nn.Sigmoid(), nn.Linear(params["nhid"], self.nclasses), ).cuda() ''' self.model = nn.Sequential( nn.Dropout(p=self.dropout), nn.Linear(self.inputdim, params["nhid"]), nn.Tanh(), nn.Dropout(p=self.dropout), nn.Linear(params["nhid"], params["nhid"]), nn.Tanh(), nn.Dropout(p=self.dropout), nn.Linear(params["nhid"], self.nclasses), ).cuda() self.loss_fn = nn.CrossEntropyLoss().cuda() self.loss_fn.size_average = False optim_fn, optim_params = utils.get_optimizer(self.optim) self.optimizer = optim_fn(self.model.parameters(), **optim_params) self.optimizer.param_groups[0]['weight_decay'] = self.l2reg
def set_params(self, params): if params['type']['type'] == 'MLP': self.nb_layers = 1 if "nb_layers" not in params['type'] else int( params['type']["nb_layers"]) self.nb_hid = 50 if "nb_hid" not in params['type'] else int( params['type']["nb_hid"]) if "act_fn" not in params['type'] or params['type'][ "act_fn"] == 'sigmoid': self.act_fn = nn.Sigmoid() elif params['type']["act_fn"] == 'tanh': self.act_fn = nn.Tanh() elif params['type']["act_fn"] == 'relu': self.act_fn = nn.ReLU() elif params['type']["act_fn"] == 'elu': self.act_fn = nn.ELU(alpha=1.0) self.optimizer = "adam" if "optimizer" not in params else params[ "optimizer"] self.tenacity = 5 if "tenacity" not in params else int( params["tenacity"]) self.epoch_size = 4 if "epoch_size" not in params else int( params["epoch_size"]) self.max_epoch = 100 if "max_epoch" not in params else int( params["max_epoch"]) self.dropout = 0. if "dropout" not in params else params["dropout"] self.batch_size = 128 if "batch_size" not in params else int( params["batch_size"]) self.l2reg = 0.02 if "l2reg" not in params else params["l2reg"] # set model if params["type"]['type'] == 'LogisticRegression': self.model = nn.Sequential( nn.Linear(self.inputdim, self.outputdim), nn.Softmax(dim=-1)) elif params["type"]['type'] == 'MLP': modules = [] modules.append(nn.Linear(self.inputdim, self.nb_hid)) for l in np.arange(self.nb_layers - 1): modules.append(nn.Linear(self.nb_hid, self.nb_hid)) modules.append(nn.Dropout(p=self.dropout)) modules.append(self.act_fn) modules.append(nn.Linear(self.nb_hid, self.outputdim)) modules.append(nn.Softmax(dim=-1)) self.model = nn.Sequential(*modules) self.loss_fn = nn.MSELoss() if torch.cuda.is_available(): self.model = self.model.cuda() self.loss_fn = self.loss_fn.cuda() self.loss_fn.size_average = False optim_fn, optim_params = utils.get_optimizer(self.optimizer) self.optimizer = optim_fn(self.model.parameters(), **optim_params) self.optimizer.param_groups[0]['weight_decay'] = self.l2reg
def trainepoch(self, vec_tr, j, epoch_size=1): tup = vec_tr[j] X, y = tup optim_fn, optim_params = utils.get_optimizer(self.optim) optimizer = optim_fn(self.models[j].parameters(), **optim_params) optimizer.param_groups[0]['weight_decay'] = self.l2reg self.models[j].train() #Partie de pénalisation à faire fonctionner # u,v=penalty(self.models0,vec_tr,j) # penal=0 # for n, p in self.models[j].named_parameters(): # if 'weight' in n: # d=p.data # un,vn=u[n],v[n] # if len(d.size())>1: # d=d[:,0] # un,vn=un[:,0],vn[:,0] # penal=torch.dot(d,un*d)-2*torch.dot(d,vn) for _ in range(self.nepoch, self.nepoch + epoch_size): permutation = np.random.permutation(len(X)) all_costs = [] for i in range(0, len(X), self.batch_size): # forward idx = torch.from_numpy( permutation[i:i + self.batch_size]).long().to(X.device) Xbatch = X[idx] ybatch = y[idx] if self.cudaEfficient: Xbatch = Xbatch.cuda() ybatch = ybatch.cuda() output = self.models[j](Xbatch) # loss loss = self.loss_fn(output, ybatch) #+0.01*penal all_costs.append(loss.data.item()) # backward optimizer.zero_grad() loss.backward() # Update parameters optimizer.step() self.nepoch += epoch_size