def train(epochs=epochs, init_lr=init_lr, lr_coefficient=lr_coefficient, weight_decay=weight_decay, model_num=model_num, batch_size=batch_size, train_dir=train_dir, test_dir=test_dir, log_dir=log_dir): #loading_data print("data loading...\n") transform = enhance_transforms() transform_std = transform_standard() trainset = DataClassify(train_dir, transforms=transform) testset = DataClassify(test_dir, transforms=transform_std) total_train = len(trainset) total_test = len(testset) data_loader_train = t.utils.data.DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True) data_loader_test = t.utils.data.DataLoader(dataset=testset, batch_size=batch_size, shuffle=False) print("data loading complete\n") ################################## #TO DO ################################## if model_num == 0: exit(0) elif model_num == 18: net = resnet18() elif model_num == 34: net = resnet34() elif model_num == 50: net = resnet50() elif model_num == 101: net = resnet101() elif model_num == 152: net = resnet152() ################################## #确定网络基于cpu还是gpu device = t.device("cuda:0" if t.cuda.is_available() else "cpu") net.to(device) cost = t.nn.CrossEntropyLoss() train_loss_list = [] train_accurate_list = [] test_loss_list = [] test_accurate_list = [] for epoch in range(epochs): print("epoch " + str(epoch + 1) + " start training...\n") net.train() learning_rate = dloss(train_loss_list, init_lr, lr_coefficient, init_lr) optimizer = t.optim.Adam(list(net.parameters()), lr=learning_rate, weight_decay=weight_decay) run_loss, corr = train_once(data_loader_train, net, optimizer, cost, device) train_loss_list.append(run_loss / total_train) train_accurate_list.append(corr / total_train) print('epoch %d, training loss %.6f, training accuracy %.4f ------\n' % (epoch + 1, run_loss / total_train, corr / total_train)) print("epoch " + str(epoch + 1) + " finish training\n") print("-----------------------------------------------\n") print("epoch " + str(epoch + 1) + " start testing...\n") net.eval() test_corr = evaluate(net, data_loader_test, device) test_accurate_list.append(test_corr / total_test) print('epoch %d, testing accuracy %.4f ------\n' % (epoch + 1, test_corr / total_test)) print("epoch " + str(epoch + 1) + " finish testing\n") print("-----------------------------------------------\n") t.save(net, save_trained_net) t.save(net.state_dict(), save_trained_net_params) curve_draw(train_loss_list, train_accurate_list, test_accurate_list, log_dir) print("mission complete")
def train( epochs=120, init_lr=0.001, lr_coefficient=5, weight_decay=1e-8, model_num=1, batch_size=64, train_dir='s3://classifier-gar/train_try/', test_dir='s3://classifier-gar/test_try/', log_dir='s3://classifier-gar/log/', #用之前记着写默认路径 version='V0_0_0'): #loading_data print("data loading...\n") transform = enhance_transforms() transform_std = transform_standard() trainset = DataClassify(train_dir, transforms=transform) testset = DataClassify(test_dir, transforms=transform_std) total_train = len(trainset) total_test = len(testset) data_loader_train = t.utils.data.DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True) data_loader_test = t.utils.data.DataLoader(dataset=testset, batch_size=batch_size, shuffle=False) print("data loading complete\n") ################################## #TO DO ################################## if model_num == 0: #写不同模型的分支 exit(0) else: net = resnet50() ################################## cost = t.nn.CrossEntropyLoss() train_loss_list = [] train_accurate_list = [] test_loss_list = [] test_accurate_list = [] for epoch in range(epochs): print("epoch " + str(epoch + 1) + " start training...\n") net.train() learning_rate = dloss(train_loss_list, init_lr, lr_coefficient, init_lr) optimizer = t.optim.Adam(list(net.parameters()), lr=learning_rate, weight_decay=weight_decay) run_loss, corr = train_once(data_loader_train, net, optimizer, cost) train_loss_list.append(run_loss / total_train) train_accurate_list.append(corr / total_train) print('epoch %d, training loss %.6f, training accuracy %.4f ------\n' % (epoch + 1, run_loss / total_train, corr / total_train)) print("epoch " + str(epoch + 1) + " finish training\n") print("-----------------------------------------------\n") print("epoch " + str(epoch + 1) + " start testing...\n") net.eval() test_corr = evaluate(net, data_loader_test) test_accurate_list.append(test_corr / total_test) print('epoch %d, testing accuracy %.4f ------\n' % (epoch + 1, test_corr / total_test)) print("epoch " + str(epoch + 1) + " finish testing\n") print("-----------------------------------------------\n") #torch.save(net.module, net_name)#保存模型全部内容,用于进行模型的导入导出,net_name后缀为pkl #这种方式保存的模型需要使用net = torch.load(net_name)进行加载 #torch.save(net.state_dict(), net_name_para)#只保存参数,用于进行模型的迁移,net_name后缀为pkl #这种方式保存的模型加载时需要定义网络,并且需要加载的参数名称与保存模型中一致 #并通过net.load_state_dict(torch.load(net_name_para))进行加载 curve_draw(train_loss_list, train_accurate_list, test_accurate_list, log_dir, version) print("mission complete")
def train(epochs=120, init_lr=0.001, lr_coefficient=5, weight_decay = 1e-8, model_num=1, batch_size=64, train_dir='s3://classifier-gar/train_try/', test_dir='s3://classifier-gar/test_try/', log_dir='s3://classifier-gar/log/', version = 'V0_0_0'): #loading_data print("data loading...\n") transform = enhance_transforms() transform_std = transform_standard() trainset = DataClassify(train_dir, transforms=transform) testset = DataClassify(test_dir, transforms=transform_std) total_train = len(trainset) total_test = len(testset) data_loader_train = t.utils.data.DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True) data_loader_test = t.utils.data.DataLoader(dataset=testset, batch_size=batch_size, shuffle=False) print("data loading complete\n") ################################## #TO DO ################################## if model_num==0: exit(0) else: net = resnet50() ################################## ################## #cuda ################## device = t.device("cuda:0" if t.cuda.is_available() else "cpu") net.to(device) cost = t.nn.CrossEntropyLoss() train_loss_list = [] train_accurate_list = [] test_loss_list = [] test_accurate_list = [] for epoch in range(epochs): print("epoch " + str(epoch+1) + " start training...\n") net.train() learning_rate = dloss(train_loss_list, init_lr, lr_coefficient, init_lr) optimizer = t.optim.Adam(list(net.parameters()), lr=learning_rate, weight_decay=weight_decay) run_loss, corr = train_once(data_loader_train,net, optimizer, cost, device) train_loss_list.append(run_loss/total_train) train_accurate_list.append(corr/total_train) print('epoch %d, training loss %.6f, training accuracy %.4f ------\n' %(epoch+1, run_loss/total_train, corr/total_train)) print("epoch " + str(epoch+1) + " finish training\n") print("-----------------------------------------------\n") print("epoch " + str(epoch+1) + " start testing...\n") net.eval() test_corr = evaluate(net, data_loader_test, device) test_accurate_list.append(test_corr/total_test) print('epoch %d, testing accuracy %.4f ------\n' %(epoch+1, test_corr/total_test)) print("epoch " + str(epoch+1) + " finish testing\n") print("-----------------------------------------------\n") #torch.save(net.module, net_name) #torch.save(net.state_dict(), net_name_para) curve_draw(train_loss_list, train_accurate_list, test_accurate_list, log_dir, version) print("mission complete")