def main(load=False, lr=1e-3, savestr="", reset=True, palette=False): total_epochs = 10 iter_per_epoch = 100000 lr = lr optim = None starting_epoch = 0 starting_iteration = 0 logfile = "log.txt" num_workers = 3 ig = InputGenD() trainds, validds = train_valid_split(ig, split_fold=10) traindl = DataLoader(dataset=trainds, batch_size=1, num_workers=num_workers) validdl = DataLoader(dataset=validds, batch_size=1) print("Using", num_workers, "workers for training set") computer = NotMySam(input_size=47764, hidden_size=128, last_output_size=3620, rnn_type='lstm', num_layers=4, nr_cells=100, cell_size=32, read_heads=4, sparse_reads=4, batch_first=True, gpu_id=0) # load model: if load: print("loading model") computer, optim, starting_epoch, starting_iteration = load_model( computer, optim, starting_epoch, starting_iteration, savestr) computer = computer.cuda() if optim is None: print("Using Adam with lr", lr) optimizer = torch.optim.Adam( [i for i in computer.parameters() if i.requires_grad], lr=lr) else: # print('use Adadelta optimizer with learning rate ', lr) # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr) optimizer = optim real_criterion = nn.SmoothL1Loss() binary_criterion = nn.BCEWithLogitsLoss(size_average=False) # starting with the epoch after the loaded one train(computer, optimizer, real_criterion, binary_criterion, traindl, iter(validdl), int(starting_epoch), total_epochs, int(starting_iteration), iter_per_epoch, savestr, logfile)
def main(load=False, lr=1e-4, savestr="6"): total_epochs = 10 iter_per_epoch = 10000 lr = lr optim = None starting_epoch = 0 starting_iteration = 0 logfile = "smalltacolog.txt" num_workers = 32 ig = InputGenD() # multiprocessing disabled, because socket request seems unstable. # performance should not be too bad? trainds, validds = train_valid_split(ig, split_fold=10) traindl = DataLoader(dataset=trainds, batch_size=32, num_workers=num_workers, collate_fn=pad_collate) validdl = DataLoader(dataset=validds, batch_size=8, num_workers=4, collate_fn=pad_collate) print("Using", num_workers, "workers for training set") computer = Tacotron() # load model: if load: print("loading model") computer, optim, starting_epoch, starting_iteration = load_model( computer, optim, starting_epoch, starting_iteration, savestr) computer = computer.cuda() if optim is None: print("Using Adam with lr", lr) optimizer = torch.optim.Adam( [i for i in computer.parameters() if i.requires_grad], lr=lr) else: # print('use Adadelta optimizer with learning rate ', lr) # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr) optimizer = optim real_criterion = nn.SmoothL1Loss() # time-wise sum, label-wise average. binary_criterion = nn.BCEWithLogitsLoss() # starting with the epoch after the loaded one train(computer, optimizer, real_criterion, binary_criterion, traindl, validdl, int(starting_epoch), total_epochs, int(starting_iteration), iter_per_epoch, savestr, logfile)
def main(): total_epochs = 10 iter_per_epoch = 100000 lr = 1e-5 target_dim = 3656 logfile = "log.txt" num_workers = 3 ig = InputGenD() # multiprocessing disabled, because socket request seems unstable. # performance should not be too bad? trainds, validds = train_valid_split(ig, split_fold=10) traindl = DataLoader(dataset=trainds, batch_size=1, num_workers=num_workers) validdl = DataLoader(dataset=validds, batch_size=1) print("Using", num_workers, "workers for training set") computer = DNC() computer.train() # load model: if True: print("loading model") computer, optim, starting_epoch, starting_iteration = load_model( computer) computer = computer.cuda() if optim is None: optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, computer.parameters()), lr=lr) else: # print('use Adadelta optimizer with learning rate ', lr) # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr) optimizer = optim real_criterion = nn.SmoothL1Loss() binary_criterion = nn.BCEWithLogitsLoss(size_average=False) # starting with the epoch after the loaded one train(computer, optimizer, real_criterion, binary_criterion, traindl, iter(validdl), int(starting_epoch), total_epochs, int(starting_iteration), iter_per_epoch, target_dim, logfile)
def main(load=False, lr=1e-3): total_epochs = 10 iter_per_epoch = 100000 lr = lr optim = None starting_epoch = 0 starting_iteration = 0 logfile = "log.txt" num_workers = 3 ig = InputGenD() trainds, validds = train_valid_split(ig, split_fold=10) traindl = DataLoader(dataset=trainds, batch_size=1, num_workers=num_workers) validdl = DataLoader(dataset=validds, batch_size=1) print("Using", num_workers, "workers for training set") computer = DNC() # load model: if load: print("loading model") computer, optim, starting_epoch, starting_iteration = load_model( computer, optim, starting_epoch, starting_iteration) computer = computer.cuda() if optim is None: print("Using Adam with lr", lr) optimizer = torch.optim.Adam( [i for i in computer.parameters() if i.requires_grad], lr=lr) else: # print('use Adadelta optimizer with learning rate ', lr) # optimizer = torch.optim.Adadelta(computer.parameters(), lr=lr) optimizer = optim real_criterion = nn.SmoothL1Loss() binary_criterion = nn.BCEWithLogitsLoss(size_average=False) # starting with the epoch after the loaded one train(computer, optimizer, real_criterion, binary_criterion, traindl, iter(validdl), int(starting_epoch), total_epochs, int(starting_iteration), iter_per_epoch, logfile)
# this file contains all the statistical models for baselines. # I chose to do all of these in Python, not R, because the data is finalized in Python, not R. We can import from # python to R, but it seems unreasonable. from death.post.inputgen_planD import InputGenD, train_valid_split from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression # Logistic regression. # The goal is bunch of binary values. ig = InputGenD() trainds, validds = train_valid_split(ig, split_fold=10) lr = LogisticRegression(random_state=0, multi_class="ovr") # X is a nd array with (150,4) # y is a nd array with (150) X, y = load_iris(return_X_y=True) # lrfit=lr.fit(X,y) print("end")
### To see the pandas data frames # Note that DFManager allows you to load from pickle file or raw csv files # pickle loading is much faster. loading raw rebuilds pickle # from death.post.dfmanager import DFManager dfs = DFManager() dfs.load_pickle(verbose=True) # from here, you can see all dataframes as dfs' properties # for example, this is demographics csv: print(dfs.demo) # if you want to load_raw and rebuild pickle files, run: dfs.load_raw(save=True) # making dictionary necessary for one-hot encodings dfs.make_dictionary(verbose=True, save=True, skip=False) ### To see the inputs and outputs used by the deep learning model from death.post.inputgen_planD import InputGenD, train_valid_split ig = InputGenD(verbose=False) # split to training set and validation set if you want # it's fine if you don't do this step train, valid = train_valid_split(ig) # __getitem__() method is how you should access this dataset print(train[123]) ### Loading into PyTorch is trickier, because sequences don't have even lengths # I have two solutions, one with ChannelManager and one with padded sequences # you should see the script for BatchDNC and Tacotron respectively.