def main(): train_list = make_datapath_list(phase="train") val_list = make_datapath_list(phase="val") # Dataset train_dataset = MyDataset(train_list, transform=ImageTransform(resize, mean, std), phase="train") val_dataset = MyDataset(val_list, transform=ImageTransform(resize, mean, std), phase="val") train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=4, shuffle=False) dataloader_dict = {"train": train_dataloader, "val": val_dataloader} # Network use_pretrained = "true" net = models.vgg16(pretrained=use_pretrained) net.classifier[6] = nn.Linear(in_features=4096, out_features=2) # Loss criterior = nn.CrossEntropyLoss() # Optimizer # param_to_update = [] # update_param_name = ["classifier.6.weight", "classifier.6.bias"] # for name, param in net.named_parameters(): # if name in update_param_name: # param.requires_grad = True # param_to_update.append(param) # print(name) # else: # param.requires_grad = False params1, params2, params3 = param_to_update(net) optimizer = optim.SGD([ { 'params': params1, 'lr': 1e-4 }, { 'params': params2, 'lr': 5e-4 }, { 'params': params3, 'lr': 1e-3 }, ], momentum=0.9) train_model(net, dataloader_dict, criterior, optimizer, num_epochs)
def main(): train_list = make_datapath_list("train") val_list = make_datapath_list("val") # dataset train_dataset = MyDataset(train_list, transform=ImageTransform(resize, mean, std), phase="train") val_dataset = MyDataset(val_list, transform=ImageTransform(resize, mean, std), phase="val") # dataloader train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=False) dataloader_dict = {"train": train_dataloader, "val": val_dataloader} # network use_pretrained = True net = models.vgg16(pretrained=use_pretrained) net.classifier[6] = nn.Linear(in_features=4096, out_features=2) # loss criterior = nn.CrossEntropyLoss() # optimizer params1, params2, params3 = params_to_update(net) optimizer = optim.SGD([ { 'params': params1, 'lr': 1e-4 }, { 'params': params2, 'lr': 5e-4 }, { 'params': params3, 'lr': 1e-3 }, ], momentum=0.9) # training train_model(net, dataloader_dict, criterior, optimizer, num_epochs)
def create_menu_bar(self, root): self.file_menu.add_command(label = "Open audio file", command = self.open_audio_file) self.menu_bar.add_cascade(label="File", menu=self.file_menu) self.ds_menu.add_command(label = "Generate graphics", command = lambda: spectogram.create_data_set_graphs()) self.ds_menu.add_command(label = "Graphics augmentation", command = lambda: ImageTransform.gen_dataset_augmens()) self.menu_bar.add_cascade(label = "Data-Set", menu=self.ds_menu) self.nn_menu.add_command(label = "Train", command = lambda: NeuralNetwork.create_and_train_nn()) self.nn_menu.add_command(label = "Load last model weights", command=lambda : NeuralNetwork.load_model_weights()) self.menu_bar.add_cascade(label = "Neural Network", menu=self.nn_menu)
def plotstft(audiopath, generatefig=True, binsize=2**10, plotpath=None, colormap="jet"): #colormap="jet" samplerate, samples = wav.read(audiopath) s = stft(samples, binsize) audio_path_split = audiopath.split('/') if len(audio_path_split)-1 > 0: plotpath = audio_path_split[0] + "/" + audio_path_split[1] + "/graphs/" + audio_path_split[2] # dodaj folder graphs u putanju za cuvanje grafika plotpath = plotpath.replace('.wav', '.png') # zameni ekstenziju fajla na .png plotpath = audiopath.replace('.wav', '.png') sshow, freq = logscale_spec(s, factor=80.0, sr=samplerate) ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel timebins, freqbins = np.shape(ims) fig = plt.figure(figsize=(8, 4.25)) plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none") plt.colorbar() plt.xlabel("Time [s]") plt.ylabel("Frequency dB[Hz]") plt.xlim([0, timebins-1]) plt.ylim([0, freqbins]) xlocs = np.float32(np.linspace(0, timebins-1, 10)) plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate]) ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 20))) plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) #plt.clf() fig.canvas.draw() # bitno!!! formira model grafika tj samu matricu grafika, ali je ne prikazuje korisniku! if not(generatefig): plt.show() """ -temp- deo samo za prikaz sta ce ici u obucavanje mreze... posle obrisati.. """ # odlicno radi... img_data = ImageTransform.fig2data(fig) img_data = ImageTransform.transform(img_data) plt.imshow(img_data, 'gray') plt.figure() img_data = ImageTransform.image_bin(img_data) img_data = ImageTransform.invert(img_data) img_data = ImageTransform.remove_noise(img_data) # zatvaranje 1.dilate 2.erode img_data = ImageTransform.resize_graph(img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije cv2.imwrite("test.png", img_data) plt.imshow(img_data, 'gray') plt.show() else: img_data = prepare_fig_to_img(fig) #za formiranje grafika u data-set-u ... TODO: napraviti zasebnu fun.. cv2.imwrite(plotpath, img_data) #plt.close(fig) # sprecava memory leak - curenje memorije return fig # vrati matlabov plot obj(numpy array)
def create_menu_bar(self, root): self.file_menu.add_command(label="Open audio file", command=self.open_audio_file) self.menu_bar.add_cascade(label="File", menu=self.file_menu) self.ds_menu.add_command( label="Generate graphics", command=lambda: spectogram.create_data_set_graphs()) self.ds_menu.add_command( label="Graphics augmentation", command=lambda: ImageTransform.gen_dataset_augmens()) self.menu_bar.add_cascade(label="Data-Set", menu=self.ds_menu) self.nn_menu.add_command( label="Train", command=lambda: NeuralNetwork.create_and_train_nn()) self.nn_menu.add_command( label="Load last model weights", command=lambda: NeuralNetwork.load_model_weights()) self.menu_bar.add_cascade(label="Neural Network", menu=self.nn_menu)
def main(): # 画像の読み込みと表示 img_path = './data/9497/resize-070327.jpg' img = Image.open(img_path) plt.imshow(img) plt.show() size = (224, 224) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) transform = ImageTransform(size) img_transform = transform(img, phase='train') print(img_transform.shape) img_transformed = img_transform.numpy().transpose((1, 2, 0)) img_transformed = np.clip(img_transformed, 0, 1) plt.imshow(img_transformed) plt.show()
def predict(img): # prepare network use_pretrained = True net = models.vgg16(pretrained=use_pretrained) net.classifier[6] = nn.Linear(in_features=4096, out_features=2) net.eval() # prepare model model = load_model(net, save_path) # prepare input img transform = ImageTransform(resize, mean, std) img = transform(img, phase="test") img = img.unsqueeze_( 0) # (chan, height, width) -> (1, chan, height, width) # predict output = model(img) response = predictor.predict_max(output) return response
def prepare_fig_to_img(graph_fig): """ @brief Ulaz: matlabov grafik objekat Matlabova figura postaje slika, nad slikom se vrsi 1. crop-ovanje 2. grayscale 3. binarizacija 4. uklanjanje suma 5. resize Izlaz: slika spremna za obucavanje mreze (numpy matrica) """ img_data = ImageTransform.fig2data(graph_fig) img_data = ImageTransform.transform(img_data) img_data = ImageTransform.image_bin(img_data) img_data = ImageTransform.invert(img_data) img_data = ImageTransform.remove_noise(img_data) # zatvaranje 1.dilate 2.erode img_data = ImageTransform.resize_graph(img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije return img_data
def prepare_fig_to_img(graph_fig): """ @brief Ulaz: matlabov grafik objekat Matlabova figura postaje slika, nad slikom se vrsi 1. crop-ovanje 2. grayscale 3. binarizacija 4. uklanjanje suma 5. resize Izlaz: slika spremna za obucavanje mreze (numpy matrica) """ img_data = ImageTransform.fig2data(graph_fig) img_data = ImageTransform.transform(img_data) img_data = ImageTransform.image_bin(img_data) img_data = ImageTransform.invert(img_data) img_data = ImageTransform.remove_noise( img_data) # zatvaranje 1.dilate 2.erode img_data = ImageTransform.resize_graph( img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije return img_data
img_mean = (0.4914, 0.4822, 0.4465) img_std = (0.2023, 0.1994, 0.2010) # Transforms transform_train = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(img_mean, img_std), ] transform_test = [ transforms.ToTensor(), transforms.Normalize(img_mean,img_std), ] t_transform_train = ImageTransform.transform(transform_train) t_transform_test = ImageTransform.transform(transform_test) # Dataset and DataLoader arguments dataset_name = torchvision.datasets.CIFAR10 trainSet_dict = dict(root='./data', train=True, download=True, transform=t_transform_train) trainLoad_dict = dict(batch_size=32, shuffle=True, num_workers=4) testSet_dict = dict(root='./data', train=False, download=True, transform=t_transform_test) testLoad_dict = dict(batch_size=32, shuffle=False, num_workers=4) IMAGE_PATH = "images/" MODEL_PATH = "model/" def main(): # Device SEED = 1 cuda = torch.cuda.is_available()
def plotstft(audiopath, generatefig=True, binsize=2**10, plotpath=None, colormap="jet"): #colormap="jet" samplerate, samples = wav.read(audiopath) s = stft(samples, binsize) audio_path_split = audiopath.split('/') if len(audio_path_split) - 1 > 0: plotpath = audio_path_split[0] + "/" + audio_path_split[ 1] + "/graphs/" + audio_path_split[ 2] # dodaj folder graphs u putanju za cuvanje grafika plotpath = plotpath.replace('.wav', '.png') # zameni ekstenziju fajla na .png plotpath = audiopath.replace('.wav', '.png') sshow, freq = logscale_spec(s, factor=80.0, sr=samplerate) ims = 20. * np.log10(np.abs(sshow) / 10e-6) # amplitude to decibel timebins, freqbins = np.shape(ims) fig = plt.figure(figsize=(8, 4.25)) plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none") plt.colorbar() plt.xlabel("Time [s]") plt.ylabel("Frequency dB[Hz]") plt.xlim([0, timebins - 1]) plt.ylim([0, freqbins]) xlocs = np.float32(np.linspace(0, timebins - 1, 10)) plt.xticks(xlocs, [ "%.02f" % l for l in ((xlocs * len(samples) / timebins) + (0.5 * binsize)) / samplerate ]) ylocs = np.int16(np.round(np.linspace(0, freqbins - 1, 20))) plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) #plt.clf() fig.canvas.draw( ) # bitno!!! formira model grafika tj samu matricu grafika, ali je ne prikazuje korisniku! if not (generatefig): plt.show() """ -temp- deo samo za prikaz sta ce ici u obucavanje mreze... posle obrisati.. """ # odlicno radi... img_data = ImageTransform.fig2data(fig) img_data = ImageTransform.transform(img_data) plt.imshow(img_data, 'gray') plt.figure() img_data = ImageTransform.image_bin(img_data) img_data = ImageTransform.invert(img_data) img_data = ImageTransform.remove_noise( img_data) # zatvaranje 1.dilate 2.erode img_data = ImageTransform.resize_graph( img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije cv2.imwrite("test.png", img_data) plt.imshow(img_data, 'gray') plt.show() else: img_data = prepare_fig_to_img( fig ) #za formiranje grafika u data-set-u ... TODO: napraviti zasebnu fun.. cv2.imwrite(plotpath, img_data) #plt.close(fig) # sprecava memory leak - curenje memorije return fig # vrati matlabov plot obj(numpy array)
df_labels_idx = df_train.loc[df_train.duplicated(["labels", "labels_n"])==False]\ [["labels_n", "labels"]].set_index("labels_n").sort_index() # Load Image train_list = make_datapath_list(phase="train") print(f"train data length : {len(train_list)}") val_list = make_datapath_list(phase="val") print(f"validation data length : {len(val_list)}") test_list = make_datapath_list(phase="test") print(f"test data length : {len(test_list)}") # Create Dataset train_dataset = PlantDataset(df_labels_idx, df_train, train_list, transform=ImageTransform(size, mean, std), phase='train') val_dataset = PlantDataset(df_labels_idx, df_train, val_list, transform=ImageTransform(size, mean, std), phase='val') test_dataset = PlantDataset(df_labels_idx, df_train, test_list, transform=ImageTransform(size, mean, std), phase='test') index = 0 print("【train dataset】")
def main(cfg): # 病理画像のデータのリストを取得 data_dir = make_data_path_list() # データセットの作成 dataset = PathologicalImage(file_list=data_dir, transform=ImageTransform(cfg.size), num_pixels=cfg.num_pixels) # 学習用データの枚数を取得 train_size = int(len(dataset) * cfg.rate) # 検証用のデータの枚数を取得 val_size = len(dataset) - train_size # データセットの分割 train_dataset, val_dataset = data.random_split(dataset, [train_size, val_size]) # 動作確認 print("入力画像サイズ:" + str(train_dataset.__getitem__(0)[0].size())) print("教師データサイズ:" + str(train_dataset.__getitem__(0)[1].shape)) # 学習用のDataLoaderを作成 train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True) # 検証用のDataLoaderを作成 val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=cfg.batch_size, shuffle=False) # 動作確認 batch_iterator = iter(train_dataloader) inputs, labels = next(batch_iterator) log.info("-----Image and label shape of dataloader-----") log.info("入力データ:" + str(inputs.size())) log.info("入力ラベル:" + str(labels.shape)) # GPU初期設定 # ネットワークモデル(自作FCNs)をimport device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # ネットワークをGPUへ net = FCNs() net.to(device) torch.backends.cudnn.benchmark = True log.info("-----Constitution of network-----") log.info(net) # 損失関数の設定 criterion = nn.MSELoss() # 最適化手法の設定 optimizer = optim.SGD(net.parameters(), lr=cfg.SGD.lr, momentum=cfg.SGD.momentum) log.info("-----Details of optimizer function-----") log.info(optimizer) # 損失値を保持するリスト train_loss = [] val_loss = [] # 学習 for epoch in range(cfg.num_epochs): log.info("Epoch {} / {} ".format(epoch + 1, cfg.num_epochs)) log.info("----------") # 学習 train_history = train_model(net, train_dataloader, criterion, optimizer) # 学習したlossのリストを作成 train_loss.append(train_history) # 検証 val_history = val_model(net, val_dataloader, criterion) # 検証したlossのリスト作成 val_loss.append(val_history) # テストと出力値保存 test_history = test_model(net, val_dataloader, criterion) # figインスタンスとaxインスタンスを作成 fig_loss, ax_loss = plt.subplots(figsize=(10, 10)) ax_loss.plot(range(1, cfg.num_epochs + 1, 1), train_loss, label="train_loss") ax_loss.plot(range(1, cfg.num_epochs + 1, 1), val_loss, label="val_loss") ax_loss.set_xlabel("epoch") ax_loss.legend() fig_loss.savefig("loss.png") # パラメータの保存 save_path = './pathological.pth' torch.save(net.state_dict(), save_path)