def main(): wandb.init(entity="fmlab-its", project="KT") lr, node_feature_size, hidden_dim, node_embedding_size, seq_len, head_num, gcn_on, dropout, gcn_layer_num, n_hop, gcn_type, batch_size, epoch_num,\ single_skill_cnt, skill_cnt, max_idx, device,\ train_dir, test_dir, qs_graph_dir, save_dir_best, save_dir_final, pretrain_dir = init_proj(wandb.config) if pretrain_dir is None: model = Model(node_feature_size, hidden_dim, node_embedding_size, seq_len, head_num, qs_graph_dir, device, dropout, n_hop, gcn_type, gcn_layer_num, gcn_on) else: with open(qs_graph_dir, "r") as src: qs_graph = json.load(src) qs_graph_torch = Data(x=None, edge_index=get_edge_index(qs_graph), y=get_node_labels(qs_graph)).to(device) pretrained_model = pyg_nn.Node2Vec( edge_index=qs_graph_torch.edge_index, embedding_dim=node_feature_size, walk_length=20, context_size=10, walks_per_node=10, num_negative_samples=1, p=1, q=1, sparse=True) pretrained_model.load_state_dict( torch.load(pretrain_dir, map_location=device)) pretrained_model.to(device) pretrained_model.eval() pretrained_embedding = pretrained_model() print("pretrained model loaded.") model = Model(node_feature_size, hidden_dim, node_embedding_size, seq_len, head_num, qs_graph_dir, device, dropout, n_hop, gcn_type, gcn_layer_num, gcn_on, pretrained_embedding) model.to(device) wandb.watch(model) optimizer = torch.optim.Adam(model.parameters(), lr=lr) train_set = CustomDataset(train_dir, [single_skill_cnt, skill_cnt, max_idx], seq_len) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) test_set = CustomDataset(test_dir, [single_skill_cnt, skill_cnt, max_idx], seq_len) test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True) train(model, optimizer, epoch_num, train_loader, test_loader, save_dir_best, save_dir_final, device) return
def testLoader(features_test, labels_test): test_dataset = CustomDataset(train=False, features=features_test, labels=labels_test) test_dataloader = DataLoader(test_dataset, batch_size=10, shuffle=False, num_workers=4) return test_dataloader
os.makedirs(main_folder, exist_ok=True) os.makedirs(os.path.join(main_folder, "train"), exist_ok=True) os.makedirs(os.path.join(main_folder, "test"), exist_ok=True) os.makedirs(os.path.join(main_folder, "train/generated_images_A"), exist_ok=True) os.makedirs(os.path.join(main_folder, "train/generated_images_B"), exist_ok=True) os.makedirs(os.path.join(main_folder, "train/real_images_A"), exist_ok=True) os.makedirs(os.path.join(main_folder, "train/real_images_B"), exist_ok=True) os.makedirs(os.path.join(main_folder, "test/generated_images_A"), exist_ok=True) os.makedirs(os.path.join(main_folder, "test/generated_images_B"), exist_ok=True) os.makedirs(os.path.join(main_folder, "test/real_images_A"), exist_ok=True) os.makedirs(os.path.join(main_folder, "test/real_images_B"), exist_ok=True) save_path_train = os.path.join(main_folder, "loss_train.png") save_path_test = os.path.join(main_folder, "loss_test.png") traindataset = CustomDataset(root="./data/train", transform=data_transforms) train_dataloader = torch.utils.data.DataLoader(dataset=traindataset, batch_size=batch, shuffle=True) testdataset = CustomDataset(root="./data/test", transform=data_transforms) test_dataloader = torch.utils.data.DataLoader(dataset=testdataset, batch_size=batch, shuffle=True) netG_A2B = Generator().to(device) netG_B2A = Generator().to(device) netD_A = Discriminator().to(device) netD_B = Discriminator().to(device) optimizerG = torch.optim.Adam(itertools.chain(netG_A2B.parameters(),netG_B2A.parameters()),lr=lr_g,betas=(0.5, 0.999)) optimizerD_A = optim.Adam(netD_A.parameters(), lr = lr_d, betas=(0.5, 0.999)) optimizerD_B = optim.Adam(netD_B.parameters(), lr = lr_d, betas=(0.5, 0.999))
import matplotlib.pyplot as plt from custom_dataset import CustomDataset from torch.utils.data import DataLoader from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score, confusion_matrix from sklearn.ensemble import RandomForestClassifier from sklearn import tree from sklearn import svm import seaborn as sn data_path = "C:\\Users\\Mehmet\\Desktop\\yeniANN" # Initialize the dataset and dataloader traindataset = CustomDataset(data_path=data_path, train=True, val=False) trainloader = DataLoader(traindataset, batch_size=len(traindataset), shuffle=True, pin_memory=True, num_workers=0) """ valdataset = CustomDataset(data_path = data_path, train = False, val = True) valloader = DataLoader(traindataset, batch_size = len(valdataset), shuffle = False, pin_memory = True, num_workers = 0) """ testdataset = CustomDataset(data_path=data_path, train=False, val=False) testloader = DataLoader(testdataset, batch_size=len(testdataset), shuffle=True, pin_memory=True, num_workers=0) print('Processing train data')
def build_model(self): """ Random seed """ torch.manual_seed(131) torch.cuda.manual_seed_all(131) np.random.seed(131) """ DataLoader """ train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.Resize((self.img_size + 12, self.img_size + 12)), transforms.RandomCrop(self.img_size), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) test_transform = transforms.Compose([ transforms.Resize((self.img_size + 12, self.img_size + 12)), transforms.CenterCrop(self.img_size), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) self.train_folder = ImageFolder(os.path.join(self.dataset, 'train'), train_transform) self.train_loader = DataLoader(self.train_folder, batch_size=self.batch_size * 2, shuffle=True, drop_last=True, num_workers=self.num_workers) self.test_folder = CustomDataset(os.path.join(self.dataset, 'test'), test_transform, target_num=self.K) self.test_loader = DataLoader(self.test_folder, batch_size=self.batch_size, shuffle=True, drop_last=True, num_workers=self.num_workers) """ Define Generator, Discriminator """ self.ConEn = ContentEncoder(input_nc=3, nf=self.ngf, n_downsampling=self.ng_downsampling, n_blocks=self.ng_res).to(self.device) self.ClsEn = ClassEncoder(input_nc=3, nf=self.ngf, class_dim=self.code_dim, n_downsampling=self.nc_downsampling).to( self.device) self.Dec = Decoder(output_nc=3, nf=self.ngf * 8, nmf=self.nmf, class_dim=self.code_dim, n_upsampling=self.ng_upsampling, n_blocks=self.ng_res, mlp_blocks=self.n_mlp).to(self.device) self.Dis = Discriminator(input_nc=3, output_nc=self.n_class, nf=self.ndf, n_blocks=self.nd_res).to(self.device) """ init """ weight_init(self.ConEn) weight_init(self.ClsEn) weight_init(self.Dec) weight_init(self.Dis) self.ConEn_, self.ClsEn_, self.Dec_ = deepcopy(self.ConEn), deepcopy( self.ClsEn), deepcopy(self.Dec) self.ConEn_.eval(), self.ClsEn_.eval(), self.Dec_.eval() """ Define Loss """ self.L1_loss = nn.L1Loss().to(self.device) """ Optimizer """ self.G_optim = torch.optim.RMSprop(itertools.chain( self.ConEn.parameters(), self.ClsEn.parameters(), self.Dec.parameters()), lr=self.lrG, weight_decay=self.weight_decay) self.D_optim = torch.optim.RMSprop(self.Dis.parameters(), lr=self.lrD, weight_decay=self.weight_decay)
qs_graph_dir = "data/" + args.dataset + "/" + args.dataset + "_qs_graph.json" if args.dataset == "assist09": single_skill_cnt = 123 skill_cnt = 167 max_idx = 17905 elif args.dataset == "assist12": single_skill_cnt = 265 skill_cnt = 265 max_idx = 53331 elif args.dataset == "ednet": single_skill_cnt = 189 skill_cnt = 1886 max_idx = 14037 else: raise ValueError("metadata not defined") test_set = CustomDataset(test_dir, [single_skill_cnt, skill_cnt, max_idx], seq_len) test_loader = DataLoader(test_set, batch_size=batch_size) print("cuda availability: {}".format(torch.cuda.is_available())) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # TODO: NEED TO BE CHANGED since parameters are changed model = Model(node_feature_size, hidden_dim, node_feature_size, seq_len, head_num, qs_graph_dir, device) model.load_state_dict(torch.load(model_dir, map_location=device)) model.to(device) model.eval() print(evaluate(model, test_loader, device))
import h5py import numpy as np from custom_dataset import CustomSampler, CustomDataset from tqdm import tqdm from math import sqrt with h5py.File('datasets/train.hdf5', 'r') as file: sampler = CustomSampler(file, 127 * 127 * 1024) dataset = CustomDataset(file, sampler, std=1, convert_to_tensor=False) nimages = 0 mean = 0.0 var = 0.0 for idx in tqdm(list(sampler)): batch, _ = dataset[idx] nimages += batch.shape[0] mean += batch.mean() var += batch.var() mean /= nimages var /= nimages print('mean:', mean, 'var:', var, 'std:', sqrt(var))
print("count of validation image is: ", len(valid_image_paths)) #count of validation image is: 99 test_image_paths = folder_data[split_2:] print("count of test images is: ", len(test_image_paths)) #count of test images is: 100 #print(test_image_paths) train_mask_paths = folder_mask[:split_1] valid_mask_paths = folder_mask[split_1:split_2] test_mask_paths = folder_mask[split_2:] train_dataset = CustomDataset(train_image_paths, train_mask_paths) print(len(train_dataset[0])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2) valid_dataset = CustomDataset(valid_image_paths, valid_mask_paths) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=True, num_workers=2) test_dataset = CustomDataset(test_image_paths, test_mask_paths) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1,
h5py.File('datasets/test.hdf5', 'r') as val_dataset_file: batch_samplers = [ CustomBatchSampler(train_dataset_file, mem, shuffle=True, drop_last=True), CustomBatchSampler(val_dataset_file, mem, shuffle=False, drop_last=True) ] # Create training and validation datasets datasets = [ CustomDataset(train_dataset_file), CustomDataset(val_dataset_file) ] kwargs = { #'num_workers': 6, 'pin_memory': True } # Create training and validation dataloaders dataloaders = [ DataLoader(datasets[0], batch_sampler=batch_samplers[0], **kwargs), DataLoader(datasets[1], batch_sampler=batch_samplers[1], **kwargs) ] # Detect if we have a GPU available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def get_dataloader(dataset_file): batch_sampler = CustomBatchSampler(dataset_file, args.mem, **sampler_kwargs) dataset = CustomDataset(dataset_file) return DataLoader(dataset, batch_sampler=batch_sampler, **dset_kwargs)
os.mkdir('./dc_img') os.system("rm -rf ./model") os.mkdir("./model") num_epochs = 40000 batch_size = 64 learning_rate = [1e-3, 1e-3] OUTPUT_SAVE_RATE = 20 # Output is written to dc_img once in these many epochs MODEL_SAVE_RATE = 200 data_dir = "./data/" dataset = [] for i in range(2): dataset.append(CustomDataset(data_dir, i)) dataloaders = { x: torch.utils.data.DataLoader(dataset[x], batch_size=batch_size, shuffle=True, num_workers=12) for x in range(2) } dataset_sizes = {x: len(dataloaders[x]) for x in range(2)} model = autoencoder(learning_rate).cuda() criterion = nn.MSELoss() for epoch in range(num_epochs):
def train(num_epochs): # Train the model train_batchsize = 8 valid_batchsize = 8 lr = 0.005 momentum = 0.9 weight_decay = 0.0005 step_size = 30 gamma = 0.1 pretrained = True timeStamp = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") output_folder = os.path.join("..", "Sessions", timeStamp) if not os.path.isdir(output_folder): os.makedirs(output_folder) for dir_name in ["models", "info"]: dir_path = os.path.join(output_folder, dir_name) if not os.path.isdir(dir_path): os.makedirs(dir_path) settings_path = os.path.join(output_folder, "info", "settings.txt") with open(settings_path, "w") as f: f.write("Max epochs: {}\n".format(num_epochs)) f.write("Training batch size: {}\n".format(train_batchsize)) f.write("Validation batch size: {}\n".format(valid_batchsize)) f.write("Initial learning rate: {}\n".format(lr)) f.write("Momentum: {}\n".format(momentum)) f.write("Weight decay: {}\n".format(weight_decay)) f.write("LR step size: {}\n".format(step_size)) f.write("LR gamma: {}\n".format(gamma)) f.write("pretrained model: {}\n".format(pretrained)) # train on the GPU or on the CPU, if a GPU is not available if torch.cuda.is_available(): device = torch.device('cuda') print("Using GPU") else: print("WARNING: Using CPU") device = torch.device('cpu') # our dataset has two classes only - background and person num_classes = 2 model = initializeModel(pretrained, num_classes) # use our dataset and defined transformations dataset_train = CustomDataset('../data/', data_type='train', transforms=get_transform(train=True)) dataset_valid = CustomDataset('../data/', data_type='valid', transforms=get_transform(train=False)) # define training and validation data loaders data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=train_batchsize, shuffle=True, num_workers=0, collate_fn=utils.collate_fn) data_loader_valid = torch.utils.data.DataLoader( dataset_valid, batch_size=valid_batchsize, shuffle=False, num_workers=0, collate_fn=utils.collate_fn) # move model to the device (GPU/CPU) model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=lr, momentum=momentum, weight_decay=weight_decay) # and a learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) for epoch in range(num_epochs): # train for one epoch, printing every 10 iterations training_info = train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=100) # update the learning rate lr_scheduler.step() # evaluate on the test dataset coco_results = evaluate(model, data_loader_valid, device=device) model_path = os.path.join( output_folder, "models", 'faster_RCNN_resnet50_{0}epochs.tar'.format(str(epoch + 1))) torch.save( { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, model_path) training_info_path = os.path.join( output_folder, "info", "training_info_{0}epochs.pkl".format(str(epoch + 1))) with open(training_info_path, "wb") as f: pkl.dump(training_info, f, protocol=pkl.HIGHEST_PROTOCOL) evaluation_info_path = os.path.join( output_folder, "info", "coco_evaluation_{0}epochs.txt".format(str(epoch + 1))) writeCOCOtoTXT(coco_results.coco_eval["bbox"].stats, evaluation_info_path, "Zebrafish") print( "Training has finished after {0} epochs\nThe weights have been stored in {1}" .format(epoch, model_path))
def main(config): # 연산 디바이스 설정 if config.gpu_id < 0: print("Device: CPU") device = torch.device('cpu') else: print("Device:", torch.cuda.get_device_name(0)) device = torch.device('cuda:%d' % config.gpu_id) # 유방암 데이터 가져오기 cancer_data = load_breast_cancer() df = pd.DataFrame(cancer_data.data, columns=cancer_data.feature_names) df['class'] = cancer_data.target data = torch.from_numpy(df.values).float() x = data[:, :30] y = data[:, -1:] # 학습, 검증, 테스트 데이터 나누고 섞기 ratios = [.6, .2, .2] train_cnt = int(x.size(0) * ratios[0]) valid_cnt = int(x.size(0) * ratios[1]) test_cnt = x.size(0) - train_cnt - valid_cnt cnts = [train_cnt, valid_cnt, test_cnt] indices = torch.randperm(x.size(0)) x = torch.index_select(x, dim=0, index=indices).to(device) y = torch.index_select(y, dim=0, index=indices).to(device) x = x.split(cnts, dim=0) y = y.split(cnts, dim=0) # 토치 데이터셋, 로더를 이용하여 데이터 객체화 train_loader = DataLoader(dataset=CustomDataset(x[0], y[0]), batch_size=config.batch_size, shuffle=True) valid_loader = DataLoader(dataset=CustomDataset(x[1], y[1]), batch_size=config.batch_size, shuffle=False) test_loader = DataLoader(dataset=CustomDataset(x[2], y[2]), batch_size=config.batch_size, shuffle=False) print("Train %d / Valid %d / Test %d samples." % ( len(train_loader.dataset), len(valid_loader.dataset), len(test_loader.dataset), )) # 모델 선언 및 구조 결정 model = CancerClassifier(x[0].size(-1), y[0].size(-1)).to(device) optimizer = optim.Adam(model.parameters()) # 학습 수행 trainer = Trainer(model, optimizer, train_loader, valid_loader) trainer.train(config) # Loss history plot_from = 2 plt.figure(figsize=(20, 10)) plt.grid(True) plt.title("Train / Valid Loss History") plt.plot( range(plot_from, len(trainer.train_history)), trainer.train_history[plot_from:], range(plot_from, len(trainer.valid_history)), trainer.valid_history[plot_from:], ) plt.yscale('log') plt.show() # Evaluate test_loss = 0 y_hat = [] model.eval() with torch.no_grad(): for x_i, y_i in test_loader: y_hat_i = model(x_i) loss = F.binary_cross_entropy(y_hat_i, y_i) test_loss += float(loss) # Gradient is already detached. y_hat += [y_hat_i] test_loss = test_loss / len(test_loader) y_hat = torch.cat(y_hat, dim=0) print("Test loss: %.4e" % test_loss) correct_cnt = (y[2] == (y_hat > .5)).sum() total_cnt = float(y[2].size(0)) print('Test Accuracy: %.4f' % (correct_cnt / total_cnt))
start_dir = '../data/test' for dir, _, _ in os.walk(start_dir): testImgs.extend(glob(os.path.join(dir, "*.JPEG"))) features_test = torch.load('../lib/features_test.pt') features_train = torch.load('../lib/features_train.pt') input_labels_train = open('../lib/labels_train.json') labels_train = json.load(input_labels_train) input_labels_test = open('../lib/labels_test.json') labels_test = json.load(input_labels_test) train_dataset = CustomDataset(train=True, features=features_train, labels=labels_train) test_dataset = CustomDataset(train=False, features=features_test, labels=labels_test) train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True, num_workers=4) test_dataloader = DataLoader(test_dataset, batch_size=10, shuffle=False, num_workers=4) FFNmodel = FFNModel(100)
parser.add_argument('-s', '--startfolder', type=str, default='datasets/google_test/127') args = parser.parse_args() classes = sorted(os.listdir(args.startfolder)) num_classes = len(classes) with h5py.File('datasets/google_train.hdf5', 'r') as dataset_file: batch_sampler = CustomBatchSampler(dataset_file, args.mem, num_replicas=1, rank=0) dataset = CustomDataset(dataset_file) dataloader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=args.num_workers, pin_memory=True) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Model(num_classes) state_dict = torch.load( 'saves/squeezenet_115c_epepoch=10_val_acc=0.51acc.ckpt')['state_dict'] model.load_state_dict(state_dict) model = model.to(device) model.eval() samples = 0 corrects = 0