Exemplo n.º 1
0
def main():

    wandb.init(entity="fmlab-its", project="KT")

    lr, node_feature_size, hidden_dim, node_embedding_size, seq_len, head_num, gcn_on, dropout, gcn_layer_num, n_hop, gcn_type, batch_size, epoch_num,\
        single_skill_cnt, skill_cnt, max_idx, device,\
        train_dir, test_dir, qs_graph_dir, save_dir_best, save_dir_final, pretrain_dir = init_proj(wandb.config)

    if pretrain_dir is None:
        model = Model(node_feature_size, hidden_dim, node_embedding_size,
                      seq_len, head_num, qs_graph_dir, device, dropout, n_hop,
                      gcn_type, gcn_layer_num, gcn_on)
    else:
        with open(qs_graph_dir, "r") as src:
            qs_graph = json.load(src)
        qs_graph_torch = Data(x=None,
                              edge_index=get_edge_index(qs_graph),
                              y=get_node_labels(qs_graph)).to(device)
        pretrained_model = pyg_nn.Node2Vec(
            edge_index=qs_graph_torch.edge_index,
            embedding_dim=node_feature_size,
            walk_length=20,
            context_size=10,
            walks_per_node=10,
            num_negative_samples=1,
            p=1,
            q=1,
            sparse=True)
        pretrained_model.load_state_dict(
            torch.load(pretrain_dir, map_location=device))
        pretrained_model.to(device)
        pretrained_model.eval()
        pretrained_embedding = pretrained_model()

        print("pretrained model loaded.")

        model = Model(node_feature_size, hidden_dim, node_embedding_size,
                      seq_len, head_num, qs_graph_dir, device, dropout, n_hop,
                      gcn_type, gcn_layer_num, gcn_on, pretrained_embedding)

    model.to(device)

    wandb.watch(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_set = CustomDataset(train_dir,
                              [single_skill_cnt, skill_cnt, max_idx], seq_len)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)

    test_set = CustomDataset(test_dir, [single_skill_cnt, skill_cnt, max_idx],
                             seq_len)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)

    train(model, optimizer, epoch_num, train_loader, test_loader,
          save_dir_best, save_dir_final, device)

    return
Exemplo n.º 2
0
def testLoader(features_test, labels_test):
    test_dataset = CustomDataset(train=False,
                                 features=features_test,
                                 labels=labels_test)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=10,
                                 shuffle=False,
                                 num_workers=4)

    return test_dataloader
Exemplo n.º 3
0
os.makedirs(main_folder, exist_ok=True)
os.makedirs(os.path.join(main_folder, "train"), exist_ok=True)
os.makedirs(os.path.join(main_folder, "test"), exist_ok=True)
os.makedirs(os.path.join(main_folder, "train/generated_images_A"), exist_ok=True)
os.makedirs(os.path.join(main_folder, "train/generated_images_B"), exist_ok=True)
os.makedirs(os.path.join(main_folder, "train/real_images_A"), exist_ok=True)
os.makedirs(os.path.join(main_folder, "train/real_images_B"), exist_ok=True)
os.makedirs(os.path.join(main_folder, "test/generated_images_A"), exist_ok=True)
os.makedirs(os.path.join(main_folder, "test/generated_images_B"), exist_ok=True)
os.makedirs(os.path.join(main_folder, "test/real_images_A"), exist_ok=True)
os.makedirs(os.path.join(main_folder, "test/real_images_B"), exist_ok=True)

save_path_train = os.path.join(main_folder, "loss_train.png")
save_path_test = os.path.join(main_folder, "loss_test.png")

traindataset = CustomDataset(root="./data/train", transform=data_transforms)
train_dataloader = torch.utils.data.DataLoader(dataset=traindataset, batch_size=batch, shuffle=True)

testdataset = CustomDataset(root="./data/test", transform=data_transforms)
test_dataloader = torch.utils.data.DataLoader(dataset=testdataset, batch_size=batch, shuffle=True)

netG_A2B = Generator().to(device)
netG_B2A = Generator().to(device)

netD_A = Discriminator().to(device)
netD_B = Discriminator().to(device)

optimizerG = torch.optim.Adam(itertools.chain(netG_A2B.parameters(),netG_B2A.parameters()),lr=lr_g,betas=(0.5, 0.999))
optimizerD_A = optim.Adam(netD_A.parameters(), lr = lr_d, betas=(0.5, 0.999))
optimizerD_B = optim.Adam(netD_B.parameters(), lr = lr_d, betas=(0.5, 0.999))
Exemplo n.º 4
0
import matplotlib.pyplot as plt
from custom_dataset import CustomDataset
from torch.utils.data import DataLoader
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn import svm
import seaborn as sn

data_path = "C:\\Users\\Mehmet\\Desktop\\yeniANN"

# Initialize the dataset and dataloader
traindataset = CustomDataset(data_path=data_path, train=True, val=False)
trainloader = DataLoader(traindataset,
                         batch_size=len(traindataset),
                         shuffle=True,
                         pin_memory=True,
                         num_workers=0)
"""
valdataset = CustomDataset(data_path = data_path, train = False, val = True)
valloader = DataLoader(traindataset, batch_size = len(valdataset), shuffle = False, pin_memory = True, num_workers = 0)
"""
testdataset = CustomDataset(data_path=data_path, train=False, val=False)
testloader = DataLoader(testdataset,
                        batch_size=len(testdataset),
                        shuffle=True,
                        pin_memory=True,
                        num_workers=0)

print('Processing train data')
Exemplo n.º 5
0
    def build_model(self):
        """ Random seed """
        torch.manual_seed(131)
        torch.cuda.manual_seed_all(131)
        np.random.seed(131)
        """ DataLoader """
        train_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Resize((self.img_size + 12, self.img_size + 12)),
            transforms.RandomCrop(self.img_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])
        test_transform = transforms.Compose([
            transforms.Resize((self.img_size + 12, self.img_size + 12)),
            transforms.CenterCrop(self.img_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])

        self.train_folder = ImageFolder(os.path.join(self.dataset, 'train'),
                                        train_transform)
        self.train_loader = DataLoader(self.train_folder,
                                       batch_size=self.batch_size * 2,
                                       shuffle=True,
                                       drop_last=True,
                                       num_workers=self.num_workers)
        self.test_folder = CustomDataset(os.path.join(self.dataset, 'test'),
                                         test_transform,
                                         target_num=self.K)
        self.test_loader = DataLoader(self.test_folder,
                                      batch_size=self.batch_size,
                                      shuffle=True,
                                      drop_last=True,
                                      num_workers=self.num_workers)
        """ Define Generator, Discriminator """
        self.ConEn = ContentEncoder(input_nc=3,
                                    nf=self.ngf,
                                    n_downsampling=self.ng_downsampling,
                                    n_blocks=self.ng_res).to(self.device)
        self.ClsEn = ClassEncoder(input_nc=3,
                                  nf=self.ngf,
                                  class_dim=self.code_dim,
                                  n_downsampling=self.nc_downsampling).to(
                                      self.device)
        self.Dec = Decoder(output_nc=3,
                           nf=self.ngf * 8,
                           nmf=self.nmf,
                           class_dim=self.code_dim,
                           n_upsampling=self.ng_upsampling,
                           n_blocks=self.ng_res,
                           mlp_blocks=self.n_mlp).to(self.device)
        self.Dis = Discriminator(input_nc=3,
                                 output_nc=self.n_class,
                                 nf=self.ndf,
                                 n_blocks=self.nd_res).to(self.device)
        """ init """
        weight_init(self.ConEn)
        weight_init(self.ClsEn)
        weight_init(self.Dec)
        weight_init(self.Dis)
        self.ConEn_, self.ClsEn_, self.Dec_ = deepcopy(self.ConEn), deepcopy(
            self.ClsEn), deepcopy(self.Dec)
        self.ConEn_.eval(), self.ClsEn_.eval(), self.Dec_.eval()
        """ Define Loss """
        self.L1_loss = nn.L1Loss().to(self.device)
        """ Optimizer """
        self.G_optim = torch.optim.RMSprop(itertools.chain(
            self.ConEn.parameters(), self.ClsEn.parameters(),
            self.Dec.parameters()),
                                           lr=self.lrG,
                                           weight_decay=self.weight_decay)
        self.D_optim = torch.optim.RMSprop(self.Dis.parameters(),
                                           lr=self.lrD,
                                           weight_decay=self.weight_decay)
Exemplo n.º 6
0
qs_graph_dir = "data/" + args.dataset + "/" + args.dataset + "_qs_graph.json"

if args.dataset == "assist09":
    single_skill_cnt = 123
    skill_cnt = 167
    max_idx = 17905
elif args.dataset == "assist12":
    single_skill_cnt = 265
    skill_cnt = 265
    max_idx = 53331
elif args.dataset == "ednet":
    single_skill_cnt = 189
    skill_cnt = 1886
    max_idx = 14037
else:
    raise ValueError("metadata not defined")

test_set = CustomDataset(test_dir, [single_skill_cnt, skill_cnt, max_idx], seq_len)
test_loader = DataLoader(test_set, batch_size=batch_size)

print("cuda availability: {}".format(torch.cuda.is_available()))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# TODO: NEED TO BE CHANGED since parameters are changed
model = Model(node_feature_size, hidden_dim, node_feature_size, seq_len, head_num, qs_graph_dir, device)
model.load_state_dict(torch.load(model_dir, map_location=device))
model.to(device)

model.eval()

print(evaluate(model, test_loader, device))
Exemplo n.º 7
0
import h5py
import numpy as np
from custom_dataset import CustomSampler, CustomDataset
from tqdm import tqdm
from math import sqrt

with h5py.File('datasets/train.hdf5', 'r') as file:
    sampler = CustomSampler(file, 127 * 127 * 1024)
    dataset = CustomDataset(file, sampler, std=1, convert_to_tensor=False)
    nimages = 0
    mean = 0.0
    var = 0.0
    for idx in tqdm(list(sampler)):
        batch, _ = dataset[idx]
        nimages += batch.shape[0]
        mean += batch.mean()
        var += batch.var()

mean /= nimages
var /= nimages

print('mean:', mean, 'var:', var, 'std:', sqrt(var))
Exemplo n.º 8
0
print("count of validation image is: ", len(valid_image_paths))
#count of validation image is:  99

test_image_paths = folder_data[split_2:]
print("count of test images is: ", len(test_image_paths))
#count of test images is:  100

#print(test_image_paths)

train_mask_paths = folder_mask[:split_1]

valid_mask_paths = folder_mask[split_1:split_2]

test_mask_paths = folder_mask[split_2:]

train_dataset = CustomDataset(train_image_paths, train_mask_paths)
print(len(train_dataset[0]))
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=1,
                                           shuffle=True,
                                           num_workers=2)

valid_dataset = CustomDataset(valid_image_paths, valid_mask_paths)
valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                           batch_size=1,
                                           shuffle=True,
                                           num_workers=2)

test_dataset = CustomDataset(test_image_paths, test_mask_paths)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=1,
Exemplo n.º 9
0
     h5py.File('datasets/test.hdf5', 'r')  as val_dataset_file:

    batch_samplers = [
        CustomBatchSampler(train_dataset_file,
                           mem,
                           shuffle=True,
                           drop_last=True),
        CustomBatchSampler(val_dataset_file,
                           mem,
                           shuffle=False,
                           drop_last=True)
    ]

    # Create training and validation datasets
    datasets = [
        CustomDataset(train_dataset_file),
        CustomDataset(val_dataset_file)
    ]

    kwargs = {
        #'num_workers': 6,
        'pin_memory': True
    }
    # Create training and validation dataloaders
    dataloaders = [
        DataLoader(datasets[0], batch_sampler=batch_samplers[0], **kwargs),
        DataLoader(datasets[1], batch_sampler=batch_samplers[1], **kwargs)
    ]

    # Detect if we have a GPU available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Exemplo n.º 10
0
def get_dataloader(dataset_file):
    batch_sampler = CustomBatchSampler(dataset_file, args.mem, **sampler_kwargs)
    dataset = CustomDataset(dataset_file)
    return DataLoader(dataset, batch_sampler=batch_sampler, **dset_kwargs)
Exemplo n.º 11
0
    os.mkdir('./dc_img')

os.system("rm -rf ./model")
os.mkdir("./model")

num_epochs = 40000
batch_size = 64
learning_rate = [1e-3, 1e-3]
OUTPUT_SAVE_RATE = 20  # Output is written to dc_img once in these many epochs
MODEL_SAVE_RATE = 200

data_dir = "./data/"

dataset = []
for i in range(2):
    dataset.append(CustomDataset(data_dir, i))

dataloaders = {
    x: torch.utils.data.DataLoader(dataset[x],
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=12)
    for x in range(2)
}

dataset_sizes = {x: len(dataloaders[x]) for x in range(2)}

model = autoencoder(learning_rate).cuda()
criterion = nn.MSELoss()

for epoch in range(num_epochs):
Exemplo n.º 12
0
def train(num_epochs):

    # Train the model
    train_batchsize = 8
    valid_batchsize = 8
    lr = 0.005
    momentum = 0.9
    weight_decay = 0.0005
    step_size = 30
    gamma = 0.1
    pretrained = True
    timeStamp = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")

    output_folder = os.path.join("..", "Sessions", timeStamp)
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)

    for dir_name in ["models", "info"]:
        dir_path = os.path.join(output_folder, dir_name)
        if not os.path.isdir(dir_path):
            os.makedirs(dir_path)

    settings_path = os.path.join(output_folder, "info", "settings.txt")
    with open(settings_path, "w") as f:
        f.write("Max epochs: {}\n".format(num_epochs))
        f.write("Training batch size: {}\n".format(train_batchsize))
        f.write("Validation batch size: {}\n".format(valid_batchsize))
        f.write("Initial learning rate: {}\n".format(lr))
        f.write("Momentum: {}\n".format(momentum))
        f.write("Weight decay: {}\n".format(weight_decay))
        f.write("LR step size: {}\n".format(step_size))
        f.write("LR gamma:  {}\n".format(gamma))
        f.write("pretrained model: {}\n".format(pretrained))

    # train on the GPU or on the CPU, if a GPU is not available
    if torch.cuda.is_available():
        device = torch.device('cuda')
        print("Using GPU")
    else:
        print("WARNING: Using CPU")
        device = torch.device('cpu')

    # our dataset has two classes only - background and person
    num_classes = 2

    model = initializeModel(pretrained, num_classes)

    # use our dataset and defined transformations
    dataset_train = CustomDataset('../data/',
                                  data_type='train',
                                  transforms=get_transform(train=True))
    dataset_valid = CustomDataset('../data/',
                                  data_type='valid',
                                  transforms=get_transform(train=False))

    # define training and validation data loaders
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=train_batchsize,
        shuffle=True,
        num_workers=0,
        collate_fn=utils.collate_fn)

    data_loader_valid = torch.utils.data.DataLoader(
        dataset_valid,
        batch_size=valid_batchsize,
        shuffle=False,
        num_workers=0,
        collate_fn=utils.collate_fn)

    # move model to the device (GPU/CPU)
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=lr,
                                momentum=momentum,
                                weight_decay=weight_decay)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=step_size,
                                                   gamma=gamma)

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        training_info = train_one_epoch(model,
                                        optimizer,
                                        data_loader_train,
                                        device,
                                        epoch,
                                        print_freq=100)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        coco_results = evaluate(model, data_loader_valid, device=device)

        model_path = os.path.join(
            output_folder, "models",
            'faster_RCNN_resnet50_{0}epochs.tar'.format(str(epoch + 1)))
        torch.save(
            {
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, model_path)

        training_info_path = os.path.join(
            output_folder, "info",
            "training_info_{0}epochs.pkl".format(str(epoch + 1)))
        with open(training_info_path, "wb") as f:
            pkl.dump(training_info, f, protocol=pkl.HIGHEST_PROTOCOL)

        evaluation_info_path = os.path.join(
            output_folder, "info",
            "coco_evaluation_{0}epochs.txt".format(str(epoch + 1)))
        writeCOCOtoTXT(coco_results.coco_eval["bbox"].stats,
                       evaluation_info_path, "Zebrafish")

    print(
        "Training has finished after {0} epochs\nThe weights have been stored in {1}"
        .format(epoch, model_path))
Exemplo n.º 13
0
def main(config):
    # 연산 디바이스 설정
    if config.gpu_id < 0:
        print("Device: CPU")
        device = torch.device('cpu')
    else:
        print("Device:", torch.cuda.get_device_name(0))
        device = torch.device('cuda:%d' % config.gpu_id)

    # 유방암 데이터 가져오기
    cancer_data = load_breast_cancer()
    df = pd.DataFrame(cancer_data.data, columns=cancer_data.feature_names)
    df['class'] = cancer_data.target
    data = torch.from_numpy(df.values).float()
    x = data[:, :30]
    y = data[:, -1:]

    # 학습, 검증, 테스트 데이터 나누고 섞기
    ratios = [.6, .2, .2]
    train_cnt = int(x.size(0) * ratios[0])
    valid_cnt = int(x.size(0) * ratios[1])
    test_cnt = x.size(0) - train_cnt - valid_cnt
    cnts = [train_cnt, valid_cnt, test_cnt]
    indices = torch.randperm(x.size(0))
    x = torch.index_select(x, dim=0, index=indices).to(device)
    y = torch.index_select(y, dim=0, index=indices).to(device)
    x = x.split(cnts, dim=0)
    y = y.split(cnts, dim=0)

    # 토치 데이터셋, 로더를 이용하여 데이터 객체화
    train_loader = DataLoader(dataset=CustomDataset(x[0], y[0]),
                              batch_size=config.batch_size,
                              shuffle=True)
    valid_loader = DataLoader(dataset=CustomDataset(x[1], y[1]),
                              batch_size=config.batch_size,
                              shuffle=False)
    test_loader = DataLoader(dataset=CustomDataset(x[2], y[2]),
                             batch_size=config.batch_size,
                             shuffle=False)
    print("Train %d / Valid %d / Test %d samples." % (
        len(train_loader.dataset),
        len(valid_loader.dataset),
        len(test_loader.dataset),
    ))

    # 모델 선언 및 구조 결정
    model = CancerClassifier(x[0].size(-1), y[0].size(-1)).to(device)
    optimizer = optim.Adam(model.parameters())

    # 학습 수행
    trainer = Trainer(model, optimizer, train_loader, valid_loader)
    trainer.train(config)

    # Loss history
    plot_from = 2
    plt.figure(figsize=(20, 10))
    plt.grid(True)
    plt.title("Train / Valid Loss History")
    plt.plot(
        range(plot_from, len(trainer.train_history)),
        trainer.train_history[plot_from:],
        range(plot_from, len(trainer.valid_history)),
        trainer.valid_history[plot_from:],
    )
    plt.yscale('log')
    plt.show()

    # Evaluate
    test_loss = 0
    y_hat = []
    model.eval()
    with torch.no_grad():
        for x_i, y_i in test_loader:
            y_hat_i = model(x_i)
            loss = F.binary_cross_entropy(y_hat_i, y_i)
            test_loss += float(loss)  # Gradient is already detached.
            y_hat += [y_hat_i]
    test_loss = test_loss / len(test_loader)
    y_hat = torch.cat(y_hat, dim=0)
    print("Test loss: %.4e" % test_loss)
    correct_cnt = (y[2] == (y_hat > .5)).sum()
    total_cnt = float(y[2].size(0))
    print('Test Accuracy: %.4f' % (correct_cnt / total_cnt))
Exemplo n.º 14
0
start_dir = '../data/test'
for dir, _, _ in os.walk(start_dir):
    testImgs.extend(glob(os.path.join(dir, "*.JPEG")))

features_test = torch.load('../lib/features_test.pt')
features_train = torch.load('../lib/features_train.pt')

input_labels_train = open('../lib/labels_train.json')
labels_train = json.load(input_labels_train)

input_labels_test = open('../lib/labels_test.json')
labels_test = json.load(input_labels_test)

train_dataset = CustomDataset(train=True,
                              features=features_train,
                              labels=labels_train)
test_dataset = CustomDataset(train=False,
                             features=features_test,
                             labels=labels_test)

train_dataloader = DataLoader(train_dataset,
                              batch_size=10,
                              shuffle=True,
                              num_workers=4)
test_dataloader = DataLoader(test_dataset,
                             batch_size=10,
                             shuffle=False,
                             num_workers=4)

FFNmodel = FFNModel(100)
Exemplo n.º 15
0
parser.add_argument('-s',
                    '--startfolder',
                    type=str,
                    default='datasets/google_test/127')
args = parser.parse_args()

classes = sorted(os.listdir(args.startfolder))
num_classes = len(classes)

with h5py.File('datasets/google_train.hdf5', 'r') as dataset_file:

    batch_sampler = CustomBatchSampler(dataset_file,
                                       args.mem,
                                       num_replicas=1,
                                       rank=0)
    dataset = CustomDataset(dataset_file)
    dataloader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            num_workers=args.num_workers,
                            pin_memory=True)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = Model(num_classes)
    state_dict = torch.load(
        'saves/squeezenet_115c_epepoch=10_val_acc=0.51acc.ckpt')['state_dict']
    model.load_state_dict(state_dict)
    model = model.to(device)
    model.eval()

    samples = 0
    corrects = 0