def main(args): if args.gpu < 0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) # create dataset config_file = json.load(open(args.cfg)) train_dir = config_file['train_dir'] dataset = config_file['dataset'] cache_io = config_file['cache_io'] napp = config_file['node_app'] eapp = config_file['edge_app'] symm_io = config_file['symm_io'] shuffle_io = config_file['shuffle_io'] n_classes = config_file['num_classes'] apply_da = config_file['data_augment'] rad_scale = config_file['rad_scale'] angle_scale = config_file['angle_scale'] length_scale = config_file['length_scale'] curve_scale = config_file['curve_scale'] poly_scale = config_file['poly_scale'] domains = config_file['domains'] batch_io = args.batch_size epochs = args.epochs bdir = os.path.basename(train_dir) print(domains) input_dim = 58 if napp: input_dim = input_dim + 21 if eapp: input_dim = input_dim + 9 norm_factors = { 'rad_scale': rad_scale, 'angle_scale': angle_scale, 'length_scale': length_scale, 'curve_scale': curve_scale, 'poly_scale': poly_scale } prefix = 'data-' + str(bdir) + ':' + str(dataset) + '_m-tag_ni-' + str( input_dim) + '_nh-' + str(args.n_hidden) + '_lay-' + str( args.n_layers) + '_hops-' + str(args.hops) + '_napp-' + str( napp) + '_eapp-' + str(eapp) + '_do-' + str( args.dropout) + '_ro-' + str( args.readout) + '_norm-' + str(args.norm) if args.readout == 'spp': extra = '_ng-' + str(args.n_grid) prefix += extra extra = '_b-' + str(batch_io) prefix += extra print('saving to prefix: ', prefix) # create train dataset trainset = ShockGraphDataset(train_dir, dataset, norm_factors, node_app=napp, edge_app=eapp, cache=cache_io, symmetric=symm_io, data_augment=apply_da, grid=args.n_grid, domains=domains) # Use PyTorch's DataLoader and the collate function # defined before. data_loader = DataLoader(trainset, batch_size=batch_io, shuffle=shuffle_io, collate_fn=partial(collate, device_name=device)) if args.ctype == 'tagconv': print('A TAGConv Graph Classifier is being trained') else: print('A SGConv Graph Classifier is being trained') model = Classifier(input_dim, args.n_hidden, n_classes, args.n_layers, args.ctype, args.hops, args.readout, F.relu, args.dropout, args.n_grid, args.K, args.norm, device) loss_func = nn.CrossEntropyLoss() # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) model.train() print(model) epoch_losses = [] for epoch in tqdm(range(epochs)): epoch_loss = 0 for iter, (bg, label) in enumerate(data_loader): prediction = model(bg) loss = loss_func(prediction, label) optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.detach().item() epoch_loss /= (iter + 1) epoch_losses.append(epoch_loss) print('Epoch {}, loss {:.6f}'.format(epoch, epoch_loss)) if (epoch + 1) % 25 == 0: path = prefix + '_epoch_' + str(epoch + 1).zfill(3) + '.pth' torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': epoch_loss }, path)
def main(args): if args.gpu < 0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) # create dataset config_file = json.load(open(args.cfg)) train_dir = config_file['train_dir'] dataset = config_file['dataset'] cache_io = config_file['cache_io'] app_io = config_file['app'] symm_io = config_file['symm_io'] shuffle_io = config_file['shuffle_io'] n_classes = config_file['num_classes'] apply_da = config_file['data_augment'] num_feats = config_file['features_dim'] batch_io = args.batch_size epochs = args.epochs bdir = os.path.basename(train_dir) prefix = 'agnn_sg_model_' + dataset + '_' + bdir + '_' + str( args.n_layers) + '_' + args.readout if args.readout == 'spp': extra = '_' + str(args.n_grid) prefix += extra print('saving to prefix: ', prefix) # create train dataset trainset = ShockGraphDataset(train_dir, dataset, app=app_io, cache=cache_io, symmetric=symm_io, data_augment=apply_da, grid=args.n_grid) # Use PyTorch's DataLoader and the collate function # defined before. data_loader = DataLoader(trainset, batch_size=batch_io, shuffle=shuffle_io, collate_fn=partial(collate, device_name=device)) print('A AGNN Graph Classifier is being trained') model = Classifier(num_feats, n_classes, args.n_layers, args.init_beta, args.learn_beta, args.readout, F.relu, args.dropout, args.n_grid, device) loss_func = nn.CrossEntropyLoss() # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) model.train() print(model) epoch_losses = [] for epoch in tqdm(range(epochs)): epoch_loss = 0 for iter, (bg, label) in enumerate(data_loader): prediction = model(bg) loss = loss_func(prediction, label) optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.detach().item() epoch_loss /= (iter + 1) epoch_losses.append(epoch_loss) print('Epoch {}, loss {:.6f}'.format(epoch, epoch_loss)) if (epoch + 1) % 25 == 0: path = prefix + '_epoch_' + str(epoch + 1).zfill(3) + '.pth' torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': epoch_loss }, path)
def main(args): if args.gpu < 0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) # create dataset config_file = json.load(open(args.cfg)) train_dir = config_file['train_dir'] test_dir = config_file['test_dir'] dataset = config_file['dataset'] cache_io = config_file['cache_io'] napp = config_file['node_app'] eapp = config_file['edge_app'] symm_io = config_file['symm_io'] shuffle_io = config_file['shuffle_io'] n_classes = config_file['num_classes'] apply_da = config_file['data_augment'] rad_scale = config_file['rad_scale'] angle_scale = config_file['angle_scale'] length_scale = config_file['length_scale'] curve_scale = config_file['curve_scale'] poly_scale = config_file['poly_scale'] batch_io = args.batch_size epochs = args.epochs bdir = os.path.basename(train_dir) bdir = 'se_tcg_train_fc' input_dim = 58 if napp: input_dim = input_dim + 21 if eapp: input_dim = input_dim + 9 norm_factors = { 'rad_scale': rad_scale, 'angle_scale': angle_scale, 'length_scale': length_scale, 'curve_scale': curve_scale, 'poly_scale': poly_scale } prefix = 'data-' + str(bdir) + ':' + str(dataset) + '_m-tag_ni-' + str( input_dim) + '_nh-' + str(args.n_hidden) + '_lay-' + str( args.n_layers) + '_hops-' + str(args.hops) + '_napp-' + str( napp) + '_eapp-' + str(eapp) + '_do-' + str( args.dropout) + '_ro-' + str(args.readout) if args.readout == 'spp': extra = '_ng-' + str(args.n_grid) prefix += extra extra = '_b-' + str(batch_io) prefix += extra print('saving to prefix: ', prefix) # create train and test dataset trainset = ShockGraphDataset(train_dir, dataset, norm_factors, node_app=napp, edge_app=eapp, cache=True, symmetric=symm_io, data_augment=False, grid=args.n_grid) testset = ShockGraphDataset(test_dir, dataset, norm_factors, node_app=napp, edge_app=eapp, cache=True, symmetric=symm_io, data_augment=False, grid=args.n_grid) # Use PyTorch's DataLoader and the collate function # defined before. data_loader_train = DataLoader(trainset, batch_size=1, shuffle=False, collate_fn=partial(collate, device_name=device)) data_loader_test = DataLoader(testset, batch_size=1, shuffle=False, collate_fn=partial(collate, device_name=device)) model_files = glob.glob(prefix + '*pth') model_files.sort() spp = args.n_grid * args.n_grid for state_path in model_files: print('Using weights: ', state_path) model = Classifier(input_dim, args.n_hidden, n_classes, args.n_layers, args.ctype, args.hops, args.readout, F.relu, args.dropout, args.n_grid, args.K, device) layer = nn.Module() for name, module in model.named_children(): if name == 'readout_fcn': layer = module model.load_state_dict(torch.load(state_path)['model_state_dict']) model.to(device) model.eval() # get train embeddings and labels train_embeddings = torch.zeros( (len(data_loader_train) * spp, args.n_hidden)) train_labels = torch.zeros(len(data_loader_train) * spp, dtype=torch.int32) start = 0 stop = spp for iter, (bg, label) in enumerate(data_loader_train): train_embeddings[start:stop, :] = im2vec(bg, layer, model, args.n_hidden) train_labels[start:stop] = label start = stop stop = start + spp # get test embeddings and labels test_embeddings = torch.zeros( (len(data_loader_test) * spp, args.n_hidden)) test_labels = torch.zeros(len(data_loader_test), dtype=torch.int32) start = 0 stop = spp for iter, (bg, label) in enumerate(data_loader_test): test_embeddings[start:stop, :] = im2vec(bg, layer, model, args.n_hidden) test_labels[iter] = label start = stop stop = start + spp D = cosine_distance(test_embeddings, train_embeddings) predicted = predict(D, train_labels, n_classes, spp) groundtruth = test_labels confusion_matrix = np.zeros((n_classes, n_classes)) for ind in range(groundtruth.shape[0]): if groundtruth[ind] == predicted[ind]: confusion_matrix[groundtruth[ind], groundtruth[ind]] += 1 else: confusion_matrix[groundtruth[ind], predicted[ind]] += 1 confusion_matrix = (confusion_matrix / np.sum(confusion_matrix, 1)[:, None]) * 100 #print(confusion_matrix) mAP = np.diagonal(confusion_matrix) print(mAP) print("mAP: ", np.mean(mAP)) print( 'Accuracy of argmax predictedions on the test set: {:4f}%'.format( (groundtruth == predicted).sum().item() / len(groundtruth) * 100)) del model
def main(args): if args.gpu < 0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) # create dataset config_file = json.load(open(args.cfg)) test_dir = config_file['test_dir'] train_dir = config_file['train_dir'] dataset = config_file['dataset'] cache_io = config_file['cache_io'] app_io = config_file['app'] symm_io = config_file['symm_io'] shuffle_io = config_file['shuffle_io'] n_classes = config_file['num_classes'] apply_da = config_file['data_augment'] num_feats = config_file['features_dim'] batch_io = args.batch_size epochs = args.epochs bdir = os.path.basename(train_dir) prefix = 'gin_sg_model_' + dataset + '_' + bdir + '_' + str( args.n_layers) + '_' + str( args.n_hidden ) + '_' + args.neighbor_pooling_type + '_' + args.graph_pooling_type # create train dataset testset = ShockGraphDataset(test_dir, dataset, app=app_io, cache=cache_io, symmetric=symm_io, data_augment=apply_da) # Use PyTorch's DataLoader and the collate function # defined before. data_loader = DataLoader(testset, batch_size=batch_io, shuffle=shuffle_io, collate_fn=partial(collate, device_name=device)) model_files = glob.glob(prefix + '*pth') model_files.sort() for state_path in model_files: print('Using weights: ', state_path) model = Classifier(args.n_layers, args.num_mlp_layers, num_feats, args.n_hidden, n_classes, args.dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, device) model.load_state_dict(torch.load(state_path)['model_state_dict']) model.to(device) model.eval() groundtruth, predicted, scores = classify_data(model, data_loader) confusion_matrix = np.zeros((n_classes, n_classes)) for ind in range(0, groundtruth.shape[0]): if groundtruth[ind] == predicted[ind]: confusion_matrix[groundtruth[ind], groundtruth[ind]] += 1 else: confusion_matrix[groundtruth[ind], predicted[ind]] += 1 confusion_matrix = (confusion_matrix / np.sum(confusion_matrix, 1)[:, None]) * 100 print(confusion_matrix) mAP = np.diagonal(confusion_matrix) print(mAP) print("mAP: ", np.mean(mAP)) print( 'Accuracy of argmax predictedions on the test set: {:4f}%'.format( (groundtruth == predicted).sum().item() / len(groundtruth) * 100)) del model
def main(args): if args.gpu<0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) # create dataset config_file=json.load(open(args.cfg)) train_dir=config_file['train_dir'] test_dir=config_file['test_dir'] dataset=config_file['dataset'] cache_io=config_file['cache_io'] app_io=config_file['app'] symm_io=config_file['symm_io'] shuffle_io=config_file['shuffle_io'] n_classes=config_file['num_classes'] apply_da=config_file['data_augment'] num_feats = config_file['features_dim'] batch_io=1 epochs=args.epochs bdir=os.path.basename(train_dir) prefix=args.ctype+'_sg_model_'+dataset+'_'+bdir+'_'+str(args.n_layers)+'_'+str(args.n_hidden)+'_'+str(args.hops)+'_'+args.readout # create train dataset testset=ShockGraphDataset(test_dir,dataset,app=app_io,cache=True,symmetric=symm_io,data_augment=False) # Use PyTorch's DataLoader and the collate function # defined before. data_loader = DataLoader(testset, batch_size=batch_io, shuffle=False, collate_fn=partial(collate,device_name=device)) if args.ctype == 'tagconv': print('A TAGConv Graph Classifier is being trained') else: print('A SGConv Graph Classifier is being trained') model = Classifier(num_feats, args.n_hidden, n_classes, args.n_layers, args.ctype, args.hops, args.readout, F.relu, args.dropout, device) model.load_state_dict(torch.load(args.model)['model_state_dict']) layer=nn.Module() for name,module in model.named_children(): if name == 'readout_fcn': layer=module model.to(device) model.eval() embeddings=np.zeros((len(data_loader),args.n_hidden)) for iter, (bg, label) in enumerate(data_loader): embeddings[iter,:] = im2vec(bg,layer,model,args.n_hidden) np.savetxt('test_embeddings.txt',embeddings,delimiter=' ') fid=open('testset.txt','w') for i in testset.files: fid.writelines(i+'\n') fid.close()
def eval(config_file, state_path, device, flip): test_dir = config_file['test_dir'] dataset = config_file['dataset'] cache_io = True symm_io = config_file['symm_io'] shuffle_io = False num_classes = config_file['num_classes'] input_dim = config_file['features_dim'] hidden_dim = config_file['hidden_dim'] hidden_layers = config_file['hidden_layers'] aggregate = config_file['aggregate'] combine = config_file['combine'] batch_io = config_file['batch'] dropout = config_file['dropout'] model = Classifier(input_dim, hidden_dim, num_classes, hidden_layers, combine, nn.functional.relu, dropout, device) model.load_state_dict(torch.load(state_path)['model_state_dict']) model.to(device) model.eval() testset = ShockGraphDataset(test_dir, dataset, cache=cache_io, symmetric=symm_io, data_augment=False) if flip: testset_flip = ShockGraphDataset(test_dir, dataset, cache=cache_io, symmetric=symm_io, data_augment=False, flip_pp=True) groundtruth, predicted, scores, testfiles = classify_data( model, testset, batch_io, device) if flip: _, predicted_flip, scores_flip, _ = classify_data( model, testset_flip, batch_io, device) confusion_matrix = np.zeros((num_classes, num_classes)) combined_predicted = np.copy(predicted) if flip: for i in range(groundtruth.shape[0]): if scores_flip[i] > scores[i]: print("flipping: ", scores_flip[i], scores[i], combined_predicted[i], predicted_flip[i]) combined_predicted[i] = predicted_flip[i] for ind in range(0, groundtruth.shape[0]): if groundtruth[ind] == combined_predicted[ind]: confusion_matrix[groundtruth[ind], groundtruth[ind]] += 1 else: confusion_matrix[groundtruth[ind], combined_predicted[ind]] += 1 confusion_matrix = (confusion_matrix / np.sum(confusion_matrix, 1)[:, None]) * 100 print(confusion_matrix) mAP = np.diagonal(confusion_matrix) print(mAP) print("mAP: ", np.mean(mAP)) print(groundtruth) print(predicted) print(combined_predicted) print('Accuracy of argmax combined_predictedions on the test set: {:4f}%'. format((groundtruth == combined_predicted).sum().item() / len(groundtruth) * 100)) fid = open('output.txt', 'w') for ind in range(0, len(testfiles)): line = [ testfiles[ind] + ' ', str(groundtruth[ind]) + ' ', str(predicted[ind]) + ' ', str(scores[ind]) + '\n' ] fid.writelines(line) fid.close()
def main(args): if args.gpu<0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) # create dataset config_file=json.load(open(args.cfg)) train_dir=config_file['train_dir'] dataset=config_file['dataset'] cache_io=config_file['cache_io'] napp=config_file['node_app'] eapp=config_file['edge_app'] symm_io=config_file['symm_io'] shuffle_io=config_file['shuffle_io'] n_classes=config_file['num_classes'] apply_da=config_file['data_augment'] rad_scale=config_file['rad_scale'] angle_scale=config_file['angle_scale'] length_scale=config_file['length_scale'] curve_scale=config_file['curve_scale'] poly_scale=config_file['poly_scale'] batch_io=args.batch_size epochs=args.epochs input_dim=58 if napp: input_dim=input_dim+21 if eapp: input_dim=input_dim+9 norm_factors={'rad_scale':rad_scale,'angle_scale':angle_scale,'length_scale':length_scale,'curve_scale':curve_scale,'poly_scale':poly_scale} prefix='data-'+str(dataset)+'_m-triplet_ni-'+str(input_dim)+'_nh-'+str(args.n_hidden)+'_lay-'+str(args.n_layers)+'_hops-'+str(args.hops)+'_napp-'+str(napp)+'_eapp-'+str(eapp)+'_do-'+str(args.dropout)+'_ro-'+str(args.readout)+'_m-'+str(args.margin) if args.readout == 'spp': extra='_ng-'+str(args.n_grid) prefix+=extra extra='_b-'+str(batch_io) prefix+=extra print('saving to prefix: ', prefix) # create train dataset trainset=ShockGraphDataset(train_dir,dataset,norm_factors,node_app=napp,edge_app=eapp,cache=cache_io,symmetric=symm_io,data_augment=apply_da,grid=args.n_grid) # Use PyTorch's DataLoader and the collate function # defined before. data_loader = DataLoader(trainset, batch_size=batch_io, shuffle=False, collate_fn=partial(collate,device_name=device)) print('A TAGConv Triplet Graph Classifier is being trained') model = Classifier(input_dim, args.n_hidden, args.n_layers, args.hops, args.readout, F.relu, args.dropout, args.n_grid, args.K, device) model.load_state_dict(torch.load(args.model)['model_state_dict']) layer=nn.Module() for name,module in model.named_children(): if name == 'readout_fcn': layer=module model.to(device) model.eval() embeddings=np.zeros((len(data_loader),args.n_hidden)) for iter, (bg, label) in enumerate(data_loader): embeddings[iter,:] = im2vec(bg,layer,model,args.n_hidden) np.savetxt('test_embeddings.txt',embeddings,delimiter=' ') fid=open('testset.txt','w') for i in trainset.files: fid.writelines(i+'\n') fid.close()
def main(args): if args.gpu<0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) # create dataset config_file=json.load(open(args.cfg)) train_dir=config_file['train_dir'] test_dir=config_file['test_dir'] dataset=config_file['dataset'] cache_io=config_file['cache_io'] app_io=config_file['app'] symm_io=config_file['symm_io'] shuffle_io=config_file['shuffle_io'] n_classes=config_file['num_classes'] apply_da=config_file['data_augment'] num_feats = config_file['features_dim'] batch_io=args.batch_size epochs=args.epochs bdir=os.path.basename(train_dir) prefix=args.ctype+'_sg_model_'+dataset+'_'+bdir+'_'+str(args.n_layers)+'_'+str(args.n_hidden)+'_'+str(args.hops)+'_'+args.readout if args.readout == 'spp': extra='_'+str(args.n_grid) prefix+=extra # create train dataset testset=ShockGraphDataset(test_dir,dataset,app=app_io,cache=True,symmetric=symm_io, data_augment=False,grid=args.n_grid) # Use PyTorch's DataLoader and the collate function # defined before. data_loader = DataLoader(testset, batch_size=batch_io, shuffle=False, collate_fn=partial(collate,device_name=device)) if args.flip: testset_flip=ShockGraphDataset(test_dir,dataset,app=app_io,cache=True, symmetric=symm_io,data_augment=False,grid=args.n_grid,flip_pp=True) # Use PyTorch's DataLoader and the collate function # defined before. data_loader_flip = DataLoader(testset_flip, batch_size=batch_io, shuffle=False, collate_fn=partial(collate,device_name=device)) if args.ctype == 'tagconv': print('A TAGConv Graph Classifier is being trained') else: print('A SGConv Graph Classifier is being trained') model_files=glob.glob(prefix+'*pth') model_files.sort() for state_path in model_files: print('Using weights: ',state_path) model = Classifier(num_feats, args.n_hidden, n_classes, args.n_layers, args.ctype, args.hops, args.readout, F.relu, args.dropout, args.n_grid, device) model.load_state_dict(torch.load(state_path)['model_state_dict']) model.to(device) model.eval() groundtruth,predicted,scores=classify_data(model,data_loader,n_classes) if args.flip: _,predicted_flip,scores_flip=classify_data(model,data_loader_flip,n_classes) combined_predicted=np.copy(predicted) if args.flip: for i in range(groundtruth.shape[0]): if np.max(scores_flip[i,:]) > np.max(scores[i,:]): print("flipping: ",np.max(scores_flip[i,:]),np.max(scores[i,:]),predicted[i],predicted_flip[i]) combined_predicted[i]=predicted_flip[i] confusion_matrix=np.zeros((n_classes,n_classes)) for ind in range(0,groundtruth.shape[0]): if groundtruth[ind]==combined_predicted[ind]: confusion_matrix[groundtruth[ind],groundtruth[ind]]+=1 else: confusion_matrix[groundtruth[ind],combined_predicted[ind]]+=1 confusion_matrix=(confusion_matrix/np.sum(confusion_matrix,1)[:,None])*100 #print(confusion_matrix) mAP=np.diagonal(confusion_matrix) print(mAP) print("mAP: ",np.mean(mAP)) print('Accuracy of argmax combined_predictedions on the test set: {:4f}%'.format( (groundtruth == combined_predicted).sum().item() / len(groundtruth) * 100)) testfiles=testset.files fid=open('output.txt','w') for ind in range(0,len(testfiles)): line=[testfiles[ind]+' ',str(groundtruth[ind])+' ',str(combined_predicted[ind])+' '] for idx in range(scores.shape[1]): val=scores[ind,idx] if idx==scores.shape[1]-1: line.append(str(val)+'\n') else: line.append(str(val)+' ') fid.writelines(line) fid.close() del model
def train(config_file, device): train_dir = config_file['train_dir'] dataset = config_file['dataset'] cache_io = config_file['cache_io'] symm_io = config_file['symm_io'] shuffle_io = config_file['shuffle_io'] num_classes = config_file['num_classes'] input_dim = config_file['features_dim'] hidden_dim = config_file['hidden_dim'] hidden_layers = config_file['hidden_layers'] aggregate = config_file['aggregate'] combine = config_file['combine'] epochs = config_file['epochs'] batch_io = config_file['batch'] apply_da = config_file['data_augment'] dropout = config_file['dropout'] bdir = os.path.basename(train_dir) prefix = 'gcn_sg_model_' + dataset + '_' + bdir + '_' + aggregate + '_' + combine + '_' + str( hidden_dim) + '_' + str(hidden_layers) print("Training with batch size of: ", batch_io, " over ", epochs, " Epochs with da: ", apply_da) print("Writing out to : ", prefix) trainset = ShockGraphDataset(train_dir, dataset, cache=cache_io, symmetric=symm_io, data_augment=apply_da) # Use PyTorch's DataLoader and the collate function # defined before. data_loader = DataLoader(trainset, batch_size=batch_io, shuffle=shuffle_io, collate_fn=partial(collate, device_name=device)) # Create model model = Classifier(input_dim, hidden_dim, num_classes, hidden_layers, combine, nn.functional.relu, dropout, device) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4) model.to(device) model.train() print(model) epoch_losses = [] for epoch in tqdm(range(epochs)): epoch_loss = 0 for iter, (bg, label) in enumerate(data_loader): prediction = model(bg) loss = loss_func(prediction, label) optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.detach().item() epoch_loss /= (iter + 1) epoch_losses.append(epoch_loss) print('Epoch {}, loss {:.6f}'.format(epoch, epoch_loss)) if (epoch + 1) % 50 == 0: path = prefix + '_epoch_' + str(epoch + 1).zfill(3) + '.pth' torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': epoch_loss }, path)