def predict(features, stroke_names_id, encoder, decoder, dataloaders, labs_keys, labs_values, phase="val"): assert phase == "val" or phase == "test", "Incorrect Phase." encoder = encoder.eval() decoder = decoder.eval() # Iterate over data. for bno, (inputs, targets, vid_path, stroke, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values) # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16) # inputs = inputs.permute(0, 2, 1, 3, 4).float() inputs, targets = inputs.to(device), targets.to(device) labels = labels.to(device) # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): batch_size = inputs.size(0) enc_h = encoder.init_hidden(batch_size) enc_out, h = encoder(inputs, enc_h) dec_h = h dec_in = torch.zeros(batch_size, targets.size(2)).to(device) dec_out_lst = [] target_length = targets.size( 1) # assign SEQ_LEN as target length for now # run for each word of the sequence (use teacher forcing) for ti in range(target_length): dec_out, dec_h, dec_attn = decoder(dec_h, enc_out, dec_in) dec_out_lst.append(dec_out) # loss += criterion(dec_out, targets[:,ti,:]) dec_in = dec_out outputs = torch.stack(dec_out_lst, dim=1)
def train_model(features, stroke_names_id, model, dataloaders, criterion, optimizer, scheduler, labs_keys, labs_values, num_epochs=25): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'test']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0.0 # Iterate over data. for bno, (inputs, vid_path, stroke, _, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, inputs.shape[1]) # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16) inputs = inputs.float() inp_emb = attn_utils.get_long_tensor(inputs) # comment out for SA inputs = inp_emb.to(device) # comment out for SA inputs = inputs.t().contiguous() # Convert to (SEQ, BATCH) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward output = model(inputs) # output size (SEQ_SIZE, BATCH, NCLASSES) output = output.permute(1, 0, 2).contiguous() output = F.softmax(output.view(-1, output.shape[-1]), dim=1) # output = output.view(-1, output.shape[-1]) # To (BATCH*SEQ_SIZE, NCLUSTERS) loss = criterion(output, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() # track history if only in train _, preds = torch.max(output, 1) # statistics running_loss += loss.item() #* inputs.size(0) # print("Iter : {} :: Running Loss : {}".format(bno, running_loss)) running_corrects += torch.sum(preds == labels.data) # if bno==20: # break epoch_loss = running_loss / len(dataloaders[phase]) #.dataset) epoch_acc = running_corrects.double() / (inputs.size(0) * len(dataloaders[phase].dataset)) print('{} Loss: {:.4f} Acc: {:.4f} LR: {}'.format(phase, epoch_loss, epoch_acc, scheduler.get_lr()[0])) if phase == 'train': scheduler.step() # # deep copy the model for best test accuracy if phase == 'test' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \ time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) # load best model weights model.load_state_dict(best_model_wts) return model
def predict(features, stroke_names_id, model, dataloaders, labs_keys, labs_values, seq, phase="val"): assert phase == "val" or phase=="test", "Incorrect Phase." model = model.eval() gt_list, pred_list, stroke_ids = [], [], [] # Iterate over data. for bno, (inputs, vid_path, stroke, _, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM seq = inputs.shape[1] labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, seq) inputs = inputs.float() inp_emb = attn_utils.get_long_tensor(inputs) # comment out for SA inputs = inp_emb.to(device) # comment out for SA inputs = inputs.t().contiguous() labels = labels.to(device) # forward with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) # output size (BATCH, SEQ_SIZE, NCLUSTERS) outputs = outputs.permute(1, 0, 2).contiguous() outputs = F.softmax(outputs.view(-1, outputs.shape[-1]), dim=1) gt_list.append(labels.tolist()) pred_list.append((torch.max(outputs, 1)[1]).tolist()) for i, vid in enumerate(vid_path): stroke_ids.extend([vid+"_"+str(stroke[0][i].item())+"_"+str(stroke[1][i].item())] * seq) # epoch_loss = running_loss #/ len(dataloaders[phase].dataset) # epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset) # print('{} Loss: {:.4f}'.format(phase, epoch_loss)) ########################################################################### confusion_mat = np.zeros((model.decoder.out_features, model.decoder.out_features)) gt_list = [g for batch_list in gt_list for g in batch_list] pred_list = [p for batch_list in pred_list for p in batch_list] predictions = {"gt": gt_list, "pred": pred_list} # Save prediction and ground truth labels with open(os.path.join(log_path, "preds_Seq"+str(seq)+"_C"+str(cluster_size)+".pkl"), "wb") as fp: pickle.dump(predictions, fp) with open(os.path.join(log_path, "preds_Seq"+str(seq)+"_C"+str(cluster_size)+".pkl"), "rb") as fp: predictions = pickle.load(fp) gt_list = predictions['gt'] pred_list = predictions['pred'] # # get boundaries (worse accuracy when used) # vkeys = list(set([v.rsplit('_', 2)[0] for v in stroke_ids])) # boundaries = read_boundaries(vkeys, HIST_DIFFS, SBD_MODEL) # prev_gt = stroke_ids[0] val_labels, pred_labels, vid_preds = [], [], [] for i, pr in enumerate(pred_list): if prev_gt != stroke_ids[i]: # find max category predicted in pred_labels val_labels.append(gt_list[i-1]) pred_labels.append(max(set(vid_preds), key = vid_preds.count)) vid_preds = [] prev_gt = stroke_ids[i] vid_preds.append(pr) val_labels.append(gt_list[-1]) pred_labels.append(max(set(vid_preds), key = vid_preds.count)) ########################################################################### correct = 0 for i,true_val in enumerate(val_labels): if pred_labels[i] == true_val: correct+=1 confusion_mat[pred_labels[i], true_val]+=1 print('#'*30) print("GRU Sequence Classification Results:") print("%d/%d Correct" % (correct, len(pred_labels))) print("Accuracy = {} ".format( float(correct) / len(pred_labels))) print("Confusion matrix") print(confusion_mat) return (float(correct) / len(pred_labels))
def train_model(encoder, decoder, dataloaders, criterion, encoder_optimizer, decoder_optimizer, scheduler, labs_keys, labs_values, seq=8, num_epochs=25): since = time.time() # best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'test']: if phase == 'train': encoder.train() # Set model to training mode decoder.train() else: encoder.eval() # Set model to evaluate mode decoder.eval() running_loss = 0.0 running_corrects = 0 # Iterate over data. for bno, (inputs, vid_path, stroke, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM # print("Batch No : {} / {}".format(bno, len(dataloaders[phase]))) labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, 1) # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16) # vis_samples(inputs, True) inputs = inputs.permute(0, 2, 1, 3, 4).float() targets = inputs inputs = inputs.to(device) targets = targets.to(device) labels = labels.to(device) # zero the parameter gradients encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss = 0 # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): batch_size = inputs.size(0) enc_h = encoder._init_hidden(batch_size) enc_out, enc_h = encoder(inputs, enc_h) dec_h = decoder._init_hidden(batch_size) #start symbol of dim (batch x output_size) inp = torch.zeros( (dec_h.size(1), HIDDEN_SIZE)).to(device) #starting symbol # dec_out = decoder(dec_h, enc_out, inp) dec_out, attn_wts = decoder(enc_h, enc_out, inp) # loss += criterion(dec_out, targets) loss += criterion(dec_out, labels) _, preds = torch.max(dec_out, 1) ############################################################################### # for si in range(0, inputs.size(2)-seq+1, SHIFT): # mod_inp = inputs[:,:,si:(si+seq)] # mod_inp = mod_inp.to(device) # enc_h = encoder._init_hidden(batch_size) # # attention ## enc_out, enc_h, attn_wts = encoder(mod_inp, enc_h) ## dec_out, attn_wts_lst = decoder(h, enc_out) # enc_out, enc_h = encoder(mod_inp, enc_h) # dec_out = decoder(enc_out) ## loss += criterion(enc_out, labels) ## _, preds = torch.max(enc_out, 1) # loss += criterion(dec_out, mod_inp) ############################################################################### # dec_h = h # dec_out_lst = [] # target_length = targets.size(1) # assign SEQ_LEN as target length for now # # run for each word of the sequence (use teacher forcing) # for ti in range(target_length): # dec_out, dec_h, dec_attn = decoder(dec_h, enc_out, targets[:,ti,:]) # dec_out_lst.append(dec_out) # loss += criterion(dec_out, targets[:,ti,:]) # #decoder_input = target_tensor[di] # Teacher forcing # # outputs = torch.stack(dec_out_lst, dim=1) # outputs, dec_h, wts = model(inputs, inputs) # _, preds = torch.max(outputs, 1) # loss = criterion(outputs, targets) #torch.flip(targets, [1]) ############################################################################### # backward + optimize only if in training phase if phase == 'train': loss.backward() encoder_optimizer.step() decoder_optimizer.step() # statistics running_loss += loss.item() # print("Iter : {} / {} :: Running Loss : {}".format(bno, # len(dataloaders[phase]), running_loss)) running_corrects += torch.sum(preds == labels.data) # print("Batch No : {} / {}".format(bno, len(dataloaders[phase]))) # if (bno+1) % 20 == 0: # break if phase == 'train': scheduler.step() epoch_loss = running_loss / ((bno + 1) * inputs.shape[0] ) #/ len(dataloaders[phase].dataset) epoch_acc = running_corrects.double() / ( (bno + 1) * inputs.shape[0] ) #/ len(dataloaders[phase].dataset) # print('{} Loss: {:.4f}'.format(phase, epoch_loss)) print('{} Loss: {:.6f} Acc: {:.6f} LR: {}'.format( phase, epoch_loss, epoch_acc, scheduler.get_last_lr()[0])) # vis_samples(dec_out.permute(0, 2, 1, 3, 4).cpu().detach(), True) # # deep copy the model # if phase == 'test' and epoch_acc > best_acc: # best_acc = epoch_acc # best_model_wts = copy.deepcopy(model.state_dict()) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \ time_elapsed % 60)) # print('Best val Acc: {:4f}'.format(best_acc)) # # load best model weights # model.load_state_dict(best_model_wts) return encoder, decoder
def predict(encoder, decoder, dataloaders, criterion, labs_keys, labs_values, phase="val", seq=8): assert phase == "val" or phase == "test", "Incorrect Phase." encoder = encoder.eval() # decoder = decoder.eval() vid_path_lst, stroke_lst, labs_lst, batch_wts = [], [], [], [] gt_list, pred_list, stroke_ids = [], [], [] # Iterate over data. for bno, (inputs, vid_path, stroke, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, 1) # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16) inputs = inputs.permute(0, 2, 1, 3, 4).float() # targets = inputs inputs = inputs.to(device) labels = labels.to(device) # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): batch_size = inputs.size(0) enc_h = encoder._init_hidden(batch_size) enc_out, enc_h = encoder(inputs, enc_h) dec_h = decoder._init_hidden(batch_size) inp = torch.zeros( (dec_h.size(1), HIDDEN_SIZE)).to(device) #starting symbol dec_out, attn_wts = decoder(enc_h, enc_out, inp) probs = dec_out vid_path_lst.append(vid_path) stroke_lst.append(stroke) labs_lst.append(labels) batch_wts.append(attn_wts) gt_list.append(labels.tolist()) pred_list.append((torch.max(probs, 1)[1]).tolist()) for i, vid in enumerate(vid_path): stroke_ids.extend([ vid + "_" + str(stroke[0][i].item()) + "_" + str(stroke[1][i].item()) ] * 1) # statistics # running_loss += loss.item() # print("Iter : {} :: Running Loss : {}".format(bno, running_loss)) # running_corrects += torch.sum(preds == labels.data) print("Batch No : {} / {}".format(bno, len(dataloaders[phase]))) # if (bno+1) % 20 == 0: # break pred_dict = { "paths": vid_path_lst, "strokes": stroke_lst, "labels": labs_lst, "wts": batch_wts } confusion_mat = np.zeros((decoder.output_size, decoder.output_size)) gt_list = [g for batch_list in gt_list for g in batch_list] pred_list = [p for batch_list in pred_list for p in batch_list] prev_gt = stroke_ids[0] val_labels, pred_labels, vid_preds = [], [], [] for i, pr in enumerate(pred_list): if prev_gt != stroke_ids[i]: # find max category predicted in pred_labels val_labels.append(gt_list[i - 1]) pred_labels.append(max(set(vid_preds), key=vid_preds.count)) vid_preds = [] prev_gt = stroke_ids[i] vid_preds.append(pr) val_labels.append(gt_list[-1]) pred_labels.append(max(set(vid_preds), key=vid_preds.count)) correct = 0 for i, true_val in enumerate(val_labels): if pred_labels[i] == true_val: correct += 1 confusion_mat[pred_labels[i], true_val] += 1 print('#' * 30) print("GRU Sequence Classification Results:") print("%d/%d Correct" % (correct, len(pred_labels))) print("Accuracy = {} ".format(float(correct) / len(pred_labels))) print("Confusion matrix") print(confusion_mat) return pred_dict, (float(correct) / len(pred_labels))
def train_model(features, stroke_names_id, model, dataloaders, criterion, optimizer, scheduler, labs_keys, labs_values, num_epochs=25): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'test']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 count = [0.] * 5 # Iterate over data. for bno, (inputs1, vid_path, stroke, labels, inputs2) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, 1) # # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16) # inp_emb1, inp_emb2 = attn_utils.get_long_tensor(inputs1), attn_utils.get_long_tensor(inputs2) inputs1, inputs2 = inputs1.float(), inputs2.float() # inputs1, input2 = inp_emb1.to(device), inp_emb2.to(device) inputs1, inputs2 = inputs1.to(device), inputs2.to(device) labels = labels.to(device) iter_counts = Counter(labels.tolist()) for k, v in iter_counts.items(): count[k] += v # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): # hidden = model.init_hidden(inputs.size(0)) outputs = model(inputs1, inputs2) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) #torch.flip(targets, [1]) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() #* inputs1.size(0) # print("Iter : {} :: Running Loss : {}".format(bno, running_loss)) running_corrects += torch.sum(preds == labels.data) if phase == 'train': scheduler.step() print("Category Weights : {}".format(count)) epoch_loss = running_loss / len(dataloaders[phase].dataset) epoch_acc = running_corrects.double() / len( dataloaders[phase].dataset) print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) # # deep copy the model for best test accuracy if phase == 'test' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \ time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) # # load best model weights model.load_state_dict(best_model_wts) return model
def predict(features, stroke_names_id, model, dataloaders, labs_keys, labs_values, seq, phase="val"): assert phase == "val" or phase == "test", "Incorrect Phase." model = model.eval() gt_list, pred_list, stroke_ids = [], [], [] # Iterate over data. for bno, (inputs1, vid_path, stroke, labels, inputs2) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, 1) # inp_emb1, inp_emb2 = attn_utils.get_long_tensor(inputs1), attn_utils.get_long_tensor(inputs2) inputs1, inputs2 = inputs1.float(), inputs2.float() # inputs1, inputs2 = inp_emb1.to(device), inp_emb2.to(device) inputs1, inputs2 = inputs1.to(device), inputs2.to(device) labels = labels.to(device) # forward with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs1, inputs2) gt_list.append(labels.tolist()) pred_list.append((torch.max(outputs, 1)[1]).tolist()) for i, vid in enumerate(vid_path): stroke_ids.extend([ vid + "_" + str(stroke[0][i].item()) + "_" + str(stroke[1][i].item()) ] * 1) # # taking single hidden unit (initialized once) for entire video : accuracy lower # with torch.set_grad_enabled(phase == 'train'): # batch_size = inputs.size(0) # for si in range(batch_size): # curr_stroke = vid_path[si]+'_'+str(stroke[0][si].item())+'_'+str(stroke[1][si].item()) # if prev_stroke != curr_stroke: # hidden = model.init_hidden(1) # output, hidden = model(inputs[si].unsqueeze(0), hidden) # pred_list.append((torch.max(output, 1)[1]).tolist()) # prev_stroke = curr_stroke ## hidden = model.init_hidden(batch_size) ## outputs, hidden = model(inputs, hidden) # gt_list.append(labels.tolist()) ## pred_list.append((torch.max(outputs, 1)[1]).tolist()) # for i, vid in enumerate(vid_path): # stroke_ids.extend([vid+"_"+str(stroke[0][i].item())+"_"+str(stroke[1][i].item())] * 1) # epoch_loss = running_loss #/ len(dataloaders[phase].dataset) # epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset) # print('{} Loss: {:.4f}'.format(phase, epoch_loss)) ########################################################################### confusion_mat = np.zeros((model.n_classes, model.n_classes)) gt_list = [g for batch_list in gt_list for g in batch_list] pred_list = [p for batch_list in pred_list for p in batch_list] predictions = {"gt": gt_list, "pred": pred_list} # Save prediction and ground truth labels with open( os.path.join( log_path, "preds_test_Seq" + str(seq) + "_C" + str(cluster_size) + ".pkl"), "wb") as fp: pickle.dump(predictions, fp) with open( os.path.join( log_path, "preds_test_Seq" + str(seq) + "_C" + str(cluster_size) + ".pkl"), "rb") as fp: predictions = pickle.load(fp) gt_list = predictions['gt'] pred_list = predictions['pred'] # # get boundaries (worse accuracy when used) # vkeys = list(set([v.rsplit('_', 2)[0] for v in stroke_ids])) # boundaries = read_boundaries(vkeys, HIST_DIFFS, SBD_MODEL) # prev_gt = stroke_ids[0] val_labels, pred_labels, vid_preds = [], [], [] for i, pr in enumerate(pred_list): if prev_gt != stroke_ids[i]: # find max category predicted in pred_labels val_labels.append(gt_list[i - 1]) pred_labels.append(max(set(vid_preds), key=vid_preds.count)) vid_preds = [] prev_gt = stroke_ids[i] vid_preds.append(pr) val_labels.append(gt_list[-1]) pred_labels.append(max(set(vid_preds), key=vid_preds.count)) ########################################################################### correct = 0 for i, true_val in enumerate(val_labels): if pred_labels[i] == true_val: correct += 1 confusion_mat[pred_labels[i], true_val] += 1 print('#' * 30) print("GRU Sequence Classification Results:") print("%d/%d Correct" % (correct, len(pred_labels))) print("Accuracy = {} ".format(float(correct) / len(pred_labels))) print("Confusion matrix") print(confusion_mat) return (float(correct) / len(pred_labels))
def train_model(model, dataloaders, criterion, optimizer, scheduler, labs_keys, labs_values, num_epochs=25): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'test']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. for bno, (inputs, vid_path, stroke, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, inputs.size(1)) # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16) inputs = inputs.permute(0, 2, 1, 3, 4).float() inputs[:, [0, 2], ...] = inputs[:, [ 2, 0 ], ...] # convert RGB to BGR for C3D pretrained # inputs = inputs.permute(0, 4, 1, 2, 3).float() inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() loss = 0 # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): probs = model(inputs) # probs = F.softmax(logits, dim=1) loss = criterion(probs, labels) _, preds = torch.max(probs, 1) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() # print("Iter : {} / {} :: Running Loss : {}".format(bno, # len(dataloaders[phase]), running_loss)) running_corrects += torch.sum(preds == labels.data) # print("Batch No : {} / {}".format(bno, len(dataloaders[phase]))) # if (bno+1) % 20 == 0: # break if phase == 'train': scheduler.step() epoch_loss = running_loss / (bno + 1 ) #len(dataloaders[phase].dataset) epoch_acc = running_corrects.double() / ( 16 * inputs.size(2) * (bno + 1)) #len(dataloaders[phase].dataset) print('{} Loss: {:.6f} :: Acc: {:.6f}'.format( phase, epoch_loss, epoch_acc)) # deep copy the model if phase == 'test' and epoch_acc >= best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \ time_elapsed % 60)) print('Best val Acc: {:6f}'.format(best_acc)) # load best model weights model.load_state_dict(best_model_wts) return model
def predict(model, dataloaders, labs_keys, labs_values, phase="val"): assert phase == "val" or phase == "test", "Incorrect Phase." model = model.eval() gt_list, pred_list, stroke_ids = [], [], [] # Iterate over data. for bno, (inputs, vid_path, stroke, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, inputs.size(1)) # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16) inputs = inputs.permute(0, 2, 1, 3, 4).float() inputs[:, [0, 2], ...] = inputs[:, [2, 0], ...] # inputs = inputs.permute(0, 4, 1, 2, 3).float() inputs = inputs.to(device) # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): probs = model(inputs) gt_list.append(labels.tolist()) pred_list.append((torch.max(probs, 1)[1]).tolist()) for i, vid in enumerate(vid_path): stroke_ids.extend([ vid + "_" + str(stroke[0][i].item()) + "_" + str(stroke[1][i].item()) ] * inputs.size(2)) # statistics # running_loss += loss.item() # print("Iter : {} :: Running Loss : {}".format(bno, running_loss)) # running_corrects += torch.sum(preds == labels.data) print("Batch No : {} / {}".format(bno, len(dataloaders[phase]))) # if (bno+1) % 20 == 0: # break # epoch_loss = running_loss #/ len(dataloaders[phase].dataset) # epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset) confusion_mat = np.zeros((model.output_size, model.output_size)) gt_list = [g for batch_list in gt_list for g in batch_list] pred_list = [p for batch_list in pred_list for p in batch_list] prev_gt = stroke_ids[0] val_labels, pred_labels, vid_preds = [], [], [] for i, pr in enumerate(pred_list): if prev_gt != stroke_ids[i]: # find max category predicted in pred_labels val_labels.append(gt_list[i - 1]) pred_labels.append(max(set(vid_preds), key=vid_preds.count)) vid_preds = [] prev_gt = stroke_ids[i] vid_preds.append(pr) val_labels.append(gt_list[-1]) pred_labels.append(max(set(vid_preds), key=vid_preds.count)) correct = 0 for i, true_val in enumerate(val_labels): if pred_labels[i] == true_val: correct += 1 confusion_mat[pred_labels[i], true_val] += 1 print('#' * 30) print("GRU Sequence Classification Results:") print("%d/%d Correct" % (correct, len(pred_labels))) print("Accuracy = {} ".format(float(correct) / len(pred_labels))) print("Confusion matrix") print(confusion_mat) return (float(correct) / len(pred_labels))
def predict(features, stroke_names_id, model, dataloaders, labs_keys, labs_values, phase="val"): assert phase == "val" or phase == "test", "Incorrect Phase." model = model.eval() gt_list, pred_list, stroke_ids = [], [], [] # Iterate over data. for bno, (inputs, vid_path, stroke, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, 1) inputs = inputs.to(device) labels = labels.to(device) # forward with torch.set_grad_enabled(phase == 'train'): batch_size = inputs.size(0) hidden = model.init_hidden(batch_size) outputs, hidden = model(inputs, hidden) gt_list.append(labels.tolist()) pred_list.append((torch.max(outputs, 1)[1]).tolist()) for i, vid in enumerate(vid_path): stroke_ids.extend([ vid + "_" + str(stroke[0][i].item()) + "_" + str(stroke[1][i].item()) ] * 1) # epoch_loss = running_loss #/ len(dataloaders[phase].dataset) # epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset) # print('{} Loss: {:.4f}'.format(phase, epoch_loss)) confusion_mat = np.zeros((model.n_classes, model.n_classes)) gt_list = [g for batch_list in gt_list for g in batch_list] pred_list = [p for batch_list in pred_list for p in batch_list] prev_gt = stroke_ids[0] val_labels, pred_labels, vid_preds = [], [], [] for i, pr in enumerate(pred_list): if prev_gt != stroke_ids[i]: # find max category predicted in pred_labels val_labels.append(gt_list[i - 1]) pred_labels.append(max(set(vid_preds), key=vid_preds.count)) vid_preds = [] prev_gt = stroke_ids[i] vid_preds.append(pr) val_labels.append(gt_list[-1]) pred_labels.append(max(set(vid_preds), key=vid_preds.count)) correct = 0 for i, true_val in enumerate(val_labels): if pred_labels[i] == true_val: correct += 1 confusion_mat[pred_labels[i], true_val] += 1 print('#' * 30) print("GRU Sequence Classification Results:") print("%d/%d Correct" % (correct, len(pred_labels))) print("Accuracy = {} ".format(float(correct) / len(pred_labels))) print("Confusion matrix") print(confusion_mat) return (float(correct) / len(pred_labels))
def train_model(features, stroke_names_id, encoder, decoder, dataloaders, criterion, encoder_optimizer, decoder_optimizer, scheduler, labs_keys, labs_values, num_epochs=25): since = time.time() # best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train']: # if phase == 'train': # model.train() # Set model to training mode # else: # model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. for bno, (inputs, targets, vid_path, stroke, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, 1) # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16) # inputs = inputs.permute(0, 2, 1, 3, 4).float() inputs, targets = inputs.to(device), targets.to(device) labels = labels.to(device) # zero the parameter gradients encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss = 0 # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): batch_size = inputs.size(0) enc_h = encoder.init_hidden(batch_size) enc_out, h = encoder(inputs, enc_h) dec_h = h dec_out_lst = [] target_length = targets.size( 1) # assign SEQ_LEN as target length for now # run for each word of the sequence (use teacher forcing) for ti in range(target_length): dec_out, dec_h, dec_attn = decoder( dec_h, enc_out, targets[:, ti, :]) dec_out_lst.append(dec_out) loss += criterion(dec_out, targets[:, ti, :]) #decoder_input = target_tensor[di] # Teacher forcing outputs = torch.stack(dec_out_lst, dim=1) # outputs, dec_h, wts = model(inputs, inputs) # _, preds = torch.max(outputs, 1) # loss = criterion(outputs, targets) #torch.flip(targets, [1]) # backward + optimize only if in training phase if phase == 'train': loss.backward() encoder_optimizer.step() decoder_optimizer.step() # statistics running_loss += loss.item() # print("Iter : {} :: Running Loss : {}".format(bno, running_loss)) # running_corrects += torch.sum(preds == labels.data) # print("Batch No : {} / {}".format(bno, len(dataloaders[phase]))) # if (bno+1) % 10 == 0: # break if phase == 'train': scheduler.step() epoch_loss = running_loss #/ len(dataloaders[phase].dataset) # epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset) print('{} Loss: {:.4f}'.format(phase, epoch_loss)) # # deep copy the model # if phase == 'test' and epoch_acc > best_acc: # best_acc = epoch_acc # best_model_wts = copy.deepcopy(model.state_dict()) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \ time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) # # load best model weights # model.load_state_dict(best_model_wts) return encoder, decoder
def train_model(model, dataloaders, criterion, encoder_optimizer, scheduler, labs_keys, labs_values, seq=8, num_epochs=25): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'test']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. for bno, (inputs, vid_path, stroke, labels) in enumerate(dataloaders[phase]): # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM # print("Batch No : {} / {}".format(bno, len(dataloaders[phase]))) labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, 1) # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16) inputs = inputs.permute(0, 2, 1, 3, 4).float() # targets = inputs inputs = inputs.to(device) # targets = targets.to(device) labels = labels.to(device) # zero the parameter gradients encoder_optimizer.zero_grad() loss = 0 # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): batch_size = inputs.size(0) # enc_h = encoder._init_hidden(batch_size) enc_out, enc_h = model(inputs) #, enc_h) loss += criterion(enc_out, labels) _, preds = torch.max(enc_out, 1) # backward + optimize only if in training phase if phase == 'train': loss.backward() encoder_optimizer.step() # statistics running_loss += loss.item() # print("Iter : {} / {} :: Running Loss : {}".format(bno, # len(dataloaders[phase]), running_loss)) running_corrects += torch.sum(preds == labels.data) ## print("Batch No : {} / {}".format(bno, len(dataloaders[phase]))) # if (bno+1) % 5 == 0: # break if phase == 'train': scheduler.step() epoch_loss = running_loss / len(dataloaders[phase].dataset) epoch_acc = running_corrects.double() / len( dataloaders[phase].dataset) #(256*(bno+1)) # # print('{} Loss: {:.4f}'.format(phase, epoch_loss)) print('{} Loss: {:.6f} Acc: {:.6f} LR: {}'.format( phase, epoch_loss, epoch_acc, scheduler.get_last_lr()[0])) # deep copy the model if phase == 'test' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \ time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) # load best model weights model.load_state_dict(best_model_wts) return model