def predict_fn(input_data, model): print('Inferring sentiment of input data.') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if model.word_dict is None: raise Exception('Model has not been loaded properly, no word_dict.') # TODO: Process input_data so that it is ready to be sent to our model. # You should produce two variables: # data_X - A sequence of length 500 which represents the converted review # data_len - The length of the review data_words = review_to_words(input_data) data_X, data_len = convert_and_pad(model.word_dict, data_words, pad=500) # Using data_X and data_len we construct an appropriate input tensor. Remember # that our model expects input data of the form 'len, review[500]'. data_pack = np.hstack((data_len, data_X)) data_pack = data_pack.reshape(1, -1) data = torch.from_numpy(data_pack) data = data.to(device) # Make sure to put the model into evaluation mode model.eval() # TODO: Compute the result of applying the model to the input data. The variable `result` should # be a numpy array which contains a single integer which is either 1 or 0 result = model.forward(data.long()) result = result.detach().numpy() result = 1 if result > 0.5 else 0 return np.array([result], dtype=np.int32)
def train(args, model, device, train_loader, optimizer, epoch, timestamp, ngrams, id2w): model.train() count = 0 train_loss = 0 total = 0 sos = 0 criterion = nn.CrossEntropyLoss() for batch_idx, (data) in enumerate(train_loader): loss = 0 data = data.long().to(device) inputs = [ n_gram_batchify(sos, n_gram, device, data) for n_gram in ngrams ] for input in inputs: output = model(input) loss += criterion(output, data) optimizer.zero_grad() train_loss += loss.item() loss.backward() optimizer.step() total += len(data) if (batch_idx % args.log_interval == 1): print('Train Epoch: {} [{}/{}] \t Loss: {:.6f} '.format( epoch, batch_idx * len(data), len(train_loader.dataset), train_loss / (batch_idx + 1))) cpkt_fol_name = '/home/hatzis/Desktop/stim_ctc/multigram/checkpoints/test_day_' + timestamp if not os.path.exists(cpkt_fol_name): print( "Checkpoint Directory does not exist! Making directory {}".format( cpkt_fol_name)) os.mkdir(cpkt_fol_name) logger(cpkt_fol_name + '/training.txt', [str(epoch), str(float(train_loss / (batch_idx + 1)))])
def validate(args, model, device, test_loader, optimizer, epoch, timestamp, ngrams, id2w): model.eval() eval_loss = 0 # correct = 0 sentences = [] criterion = nn.CTCLoss(blank=0, reduction='mean') with torch.no_grad(): for batch_idx, (data) in enumerate(test_loader): data = data.long().to(device) inputs = [ n_gram_batchify(sos, n_gram, device, data) for n_gram in ngrams ] for input in inputs: output = model(input) loss += criterion(output, data) eval_loss += loss.item() # sum up batch loss probs = nn.functional.softmax(output, dim=-1) pred = probs.argmax(dim=-1, keepdim=True).squeeze().cpu().numpy() ref = '' refs = target.squeeze().cpu().numpy() for i in range(target.size(1)): ref += id2w[refs[i]] + ' ' s = greedy_decode(id2w, pred, output.size(0), ' ') sentences.append(s) #model_out_path = model.get_name() +'no_pad' +'_loss_' +str(float(eval_loss / len(test_loader.dataset)))+'_epoch_'+str(epoch) + ".pth" cpkt_fol_name = '/home/hatzis/Desktop/stim_ctc/multigram/checkpoints/test_day_' + timestamp if not os.path.exists(cpkt_fol_name): print( "Checkpoint Directory does not exist! Making directory {}".format( cpkt_fol_name)) os.mkdir(cpkt_fol_name) pred_name = cpkt_fol_name + '/subunetseval_greedy_predictions_epoch' + str( epoch) + 'loss_' + str(float( eval_loss / len(test_loader.dataset))) + '_' + timestamp + '_.csv' write_csv(sentences, pred_name) wer = calc_wer( "/home/hatzis/Desktop/teo/ctc_last2/files/dev_phoenixv1.csv", pred_name) val_loss = eval_loss / len(test_loader.dataset) print('Evaluation : Average loss: {:.4f} Word error rate {}%'.format( eval_loss / len(test_loader.dataset), wer)) global best_wer for_checkpoint = { 'epoch': epoch, 'model_dict': model.state_dict(), 'optimizer_dict': optimizer.state_dict(), 'validation_loss': str(val_loss), 'word error rate': wer } is_best = wer < best_wer if (is_best): print("BEST WER") best_wer = wer save_checkpoint(for_checkpoint, is_best, cpkt_fol_name, 'best_wer' + str(wer)) else: save_checkpoint(for_checkpoint, is_best, cpkt_fol_name, 'last') with open(cpkt_fol_name + '/params_args.txt', 'w') as f: json.dump(args.__dict__, f, indent=2) logger(cpkt_fol_name + '/validation.txt', [ str(epoch), str(float(eval_loss / len(test_loader.dataset))), str(wer) ])
def train(self, iteration, epochs=1): #move from write cache to db self.clearSampleCache() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') #device = torch.device('cpu') if config.newIterNets: newNet = Net(softmax=self.softmax) #embedding should be preserved across iterations #but we want a fresh start for the strategy #actually, the deep cfr paper said don't do this #newNet.load_state_dict(self.net.state_dict()) #I'm still going to copy over the embedding #newNet.embeddings.load_state_dict(self.net.embeddings.state_dict()) self.net = newNet self.net = self.net.to(device) #self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr) self.optimizer = OPTIMIZER(self.net.parameters(), lr=self.lr) self.net, self.optimizer = amp.initialize(self.net, self.optimizer, opt_level=AMP_OPT_LEVEL) #self.optimizer = optim.SGD(self.net.parameters(), lr=self.lr, momentum=0.9) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, 'min', patience=config.schedulerPatience, verbose=False) self.net.train(True) #used for scheduling lowestLoss = 999 lowestLossIndex = -1 lastResetLoss = None runningLoss = [] #we don't really use the dataset, but we use it to read some files #we should fix this, but it works and doesn't really hurt anything dataset = dataStorage.Dataset(self.name, self.sharedDict, self.outputSize) #validation split based on #https://stackoverflow.com/questions/50544730/how-do-i-split-a-custom-dataset-into-training-and-test-datasets indices = list(range(dataset.size)) split = int( np.floor(config.valSplit * min(dataset.size, config.epochMaxNumSamples))) np.random.shuffle(indices) trainIndices, testIndices = indices[split:min( dataset.size, config.epochMaxNumSamples)], indices[:split] #trainSampler = SubsetRandomSampler(trainIndices) #testSampler = SubsetRandomSampler(testIndices) #we could scale the minibatch size by the number of samples, but this slows things down #miniBatchSize = min(config.miniBatchSize, len(trainIndices) // config.numWorkers) miniBatchSize = config.miniBatchSize #we could instead scale the number of workers by the number of minibatches #numWorkers = min(config.numWorkers, len(trainIndices) // miniBatchSize) numWorkers = config.numWorkers trainingLoader = dataStorage.BatchDataLoader( id=self.name, indices=trainIndices, batch_size=miniBatchSize, num_threads_in_mt=config.numWorkers) baseTrainingLoader = trainingLoader if numWorkers > 1: trainingLoader = MultiThreadedAugmenter(trainingLoader, None, numWorkers, 2, None) testingLoader = dataStorage.BatchDataLoader( id=self.name, indices=testIndices, batch_size=miniBatchSize, num_threads_in_mt=numWorkers) baseTestingLoader = testingLoader if numWorkers > 1: testingLoader = MultiThreadedAugmenter(testingLoader, None, numWorkers) print(file=sys.stderr) shuffleStride = 1 #TODO move to config for j in range(epochs): if epochs > 1: print('\repoch', j, end=' ', file=sys.stderr) if j == 0: print('training size:', len(trainIndices), 'val size:', len(testIndices), file=sys.stderr) totalLoss = 0 if (j + 1) % shuffleStride == 0: baseTrainingLoader.shuffle() i = 0 sampleCount = 0 chunkSize = dataset.size / (miniBatchSize * 10) for data, dataLengths, labels, iters in trainingLoader: sampleCount += 1 #dataLengths.shape[0] i += 1 labels = labels.float().to(device) iters = iters.float().to(device) data = data.long().to(device) dataLengths = dataLengths.long().to(device) #evaluate on network self.optimizer.zero_grad() ys = self.net(data, lengths=dataLengths, trace=False).squeeze() #loss function from the paper, except we mask out ignored values #loss = iters.view(labels.shape[0],-1) * ((labels - ys) ** 2) #mask = loss == IGNORE_LABEL #loss[mask] = 0 #loss = torch.sum(loss) / (torch.sum(iters).item()) loss = DeepCfrModel._loss(labels, ys, iters) #get gradient of loss #use amp because nvidia said it's better with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() #loss.backward() #clip gradient norm, which was done in the paper nn.utils.clip_grad_norm_(self.net.parameters(), 5) if config.gradPlotStride and ( j + 1) % config.gradPlotStride == 0 and i == 1: gradPlot.plot_grad_flow(self.net.named_parameters()) #train the network self.optimizer.step() totalLoss += loss.item() avgLoss = totalLoss / sampleCount with open('trainloss.csv', 'a') as file: print(avgLoss, end=',', file=file) #get validation loss #testLoader = torch.utils.data.DataLoader(dataset, batch_size=miniBatchSize, num_workers=config.numWorkers, collate_fn=myCollate, sampler=testSampler) self.net.train(False) baseTestingLoader.shuffle() totalValLoss = 0 valCount = 0 stdTotal = 0 stdCount = 0 #for data, dataLengths, labels, iters in testLoader: for data, dataLengths, labels, iters in testingLoader: labels = labels.float().to(device) #print('labels', np.round(100 * labels.cpu().numpy()) / 100, file=sys.stderr) iters = iters.float().to(device) data = data.long().to(device) dataLengths = dataLengths.long().to(device) ys = self.net(data, lengths=dataLengths, trace=False).squeeze() if config.verboseValidation and valCount == 0: #print('data', data[0:min(10, len(data))]) print('labels', labels[0:min(10, len(labels))]) print('output', ys[0:min(10, len(labels))]) print('stddev', ys[:, 0].std()) #first column is good enough stdTotal += ys.std().item() stdCount += 1 #loss = torch.sum(iters.view(labels.shape[0],-1) * ((labels - ys) ** 2)) / (torch.sum(iters).item()) loss = DeepCfrModel._loss(labels, ys, iters) totalValLoss += loss.item() valCount += 1 #dataLengths.shape[0] self.net.train(True) with open('stddev.csv', 'a') as file: print(stdTotal / stdCount, end=',', file=file) avgValLoss = totalValLoss / valCount #running average of last 3 validation losses runningLoss.append(avgValLoss) if len(runningLoss) > 3: runningLoss = runningLoss[-3:] schedLoss = sum(runningLoss) / len(runningLoss) if config.useScheduler: self.scheduler.step(schedLoss) if schedLoss < lowestLoss: lowestLoss = schedLoss lowestLossIndex = j """ if schedLoss < 0.35: print('eh,', schedLoss, 'is good enough', file=sys.stderr) break """ """ if j - lowestLossIndex > 3 * config.schedulerPatience:#avoid saddle points #print('resetting learn rate to default', j, lowestLossIndex, lowestLoss, schedLoss, lastResetLoss, file=sys.stderr) #self.optimizer = optim.Adam(self.net.parameters(), lr=config.learnRate) #self.optimizer = optim.SGD(self.net.parameters(), lr=self.lr, momentum=0.9) #self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', patience=config.schedulerPatience, verbose=False) print('stopping epoch early') break lowestLossIndex = j #if we've reset before and made no progress, just stop if lastResetLoss is not None and (schedLoss - lastResetLoss) / lastResetLoss > -0.01: print('stopping epoch early, (schedLoss - lastResetLoss) / lastResetLoss) is', (schedLoss - lastResetLoss) / lastResetLoss, file=sys.stderr) break lastResetLoss = schedLoss """ #show in console and output to csv print('val Loss', avgValLoss, end='', file=sys.stderr) with open('valloss.csv', 'a') as file: #print(avgValLoss, end=',', file=file) print(schedLoss, end=',', file=file) with open('valloss.csv', 'a') as file: print(file=file) with open('trainloss.csv', 'a') as file: print(file=file) with open('stddev.csv', 'a') as file: print(file=file) print('\n', file=sys.stderr) self.net.train(False) self.saveModel(iteration) #warPoker examples """ exampleInfoSets = [ ['start', 'hand', '2', '0', 'deal', '1', 'raise'], ['start', 'hand', '7', '0', 'deal', '1', 'raise'], ['start', 'hand', '14', '0', 'deal', '1', 'raise'], ['start', 'hand', '2', '1', 'deal'], ['start', 'hand', '7', '1', 'deal'], ['start', 'hand', '14', '1', 'deal'], ] for example in exampleInfoSets: print('example input:', example, file=sys.stderr) probs, expVal = self.predict(example, trace=False) print('exampleOutput (deal, fold, call, raise)', np.round(100 * probs), 'exp value', round(expVal * 100), file=sys.stderr) """ #ace example """
def dataAsLongDeviceTensor(data, device): if isinstance(data, torch.LongTensor): return data.to(device) if isinstance(data, torch.Tensor): return data.long().to(device) return torch.tensor(data, dtype=torch.long, device=device)