示例#1
0
def predict(features,
            stroke_names_id,
            encoder,
            decoder,
            dataloaders,
            labs_keys,
            labs_values,
            phase="val"):
    assert phase == "val" or phase == "test", "Incorrect Phase."
    encoder = encoder.eval()
    decoder = decoder.eval()
    # Iterate over data.
    for bno, (inputs, targets, vid_path, stroke,
              labels) in enumerate(dataloaders[phase]):
        # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
        labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys,
                                             labs_values)
        # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
        #                inputs = inputs.permute(0, 2, 1, 3, 4).float()

        inputs, targets = inputs.to(device), targets.to(device)
        labels = labels.to(device)
        # forward
        # track history if only in train
        with torch.set_grad_enabled(phase == 'train'):

            batch_size = inputs.size(0)
            enc_h = encoder.init_hidden(batch_size)
            enc_out, h = encoder(inputs, enc_h)
            dec_h = h
            dec_in = torch.zeros(batch_size, targets.size(2)).to(device)
            dec_out_lst = []
            target_length = targets.size(
                1)  # assign SEQ_LEN as target length for now
            # run for each word of the sequence (use teacher forcing)
            for ti in range(target_length):
                dec_out, dec_h, dec_attn = decoder(dec_h, enc_out, dec_in)
                dec_out_lst.append(dec_out)
                #                loss += criterion(dec_out, targets[:,ti,:])
                dec_in = dec_out

            outputs = torch.stack(dec_out_lst, dim=1)
def train_model(features, stroke_names_id, model, dataloaders, criterion, 
                optimizer, scheduler, labs_keys, labs_values, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0
            # Iterate over data.
            for bno, (inputs, vid_path, stroke, _, labels) in enumerate(dataloaders[phase]):
                # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
                labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, inputs.shape[1])
                # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
                inputs = inputs.float()
                inp_emb = attn_utils.get_long_tensor(inputs)    # comment out for SA
                inputs = inp_emb.to(device)                     # comment out for SA
                inputs = inputs.t().contiguous()       # Convert to (SEQ, BATCH)
                labels = labels.to(device)
                
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward
                output = model(inputs)  # output size (SEQ_SIZE, BATCH, NCLASSES)
                output = output.permute(1, 0, 2).contiguous()
                
                output = F.softmax(output.view(-1, output.shape[-1]), dim=1)
#                output = output.view(-1, output.shape[-1])    # To (BATCH*SEQ_SIZE, NCLUSTERS)
                loss = criterion(output, labels)
                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
                    optimizer.step()
                
                # track history if only in train
                _, preds = torch.max(output, 1)

                # statistics
                running_loss += loss.item()  #* inputs.size(0)
#                print("Iter : {} :: Running Loss : {}".format(bno, running_loss))
                running_corrects += torch.sum(preds == labels.data)
#                if bno==20:
#                    break

            epoch_loss = running_loss / len(dataloaders[phase]) #.dataset)
            epoch_acc = running_corrects.double() / (inputs.size(0) * len(dataloaders[phase].dataset))

            print('{} Loss: {:.4f} Acc: {:.4f} LR: {}'.format(phase, epoch_loss, epoch_acc, 
                  scheduler.get_lr()[0]))

            if phase == 'train':
                scheduler.step()
#            # deep copy the model for best test accuracy
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \
          time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
def predict(features, stroke_names_id, model, dataloaders, labs_keys, labs_values, 
            seq, phase="val"):
    assert phase == "val" or phase=="test", "Incorrect Phase."
    model = model.eval()
    gt_list, pred_list, stroke_ids  = [], [], []
    # Iterate over data.
    for bno, (inputs, vid_path, stroke, _, labels) in enumerate(dataloaders[phase]):
        # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
        seq = inputs.shape[1]
        labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys, labs_values, seq)
        inputs = inputs.float()
        inp_emb = attn_utils.get_long_tensor(inputs)    # comment out for SA
        inputs = inp_emb.to(device)                     # comment out for SA
        inputs = inputs.t().contiguous()
        labels = labels.to(device)
        
        # forward
        with torch.set_grad_enabled(phase == 'train'):
            outputs = model(inputs)     # output size (BATCH, SEQ_SIZE, NCLUSTERS)
            outputs = outputs.permute(1, 0, 2).contiguous()
            outputs = F.softmax(outputs.view(-1, outputs.shape[-1]), dim=1)

            gt_list.append(labels.tolist())
            pred_list.append((torch.max(outputs, 1)[1]).tolist())
            for i, vid in enumerate(vid_path):
                stroke_ids.extend([vid+"_"+str(stroke[0][i].item())+"_"+str(stroke[1][i].item())] * seq)
                
#    epoch_loss = running_loss #/ len(dataloaders[phase].dataset)
#            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
#    print('{} Loss: {:.4f}'.format(phase, epoch_loss))
    
    ###########################################################################
    
    confusion_mat = np.zeros((model.decoder.out_features, model.decoder.out_features))
    gt_list = [g for batch_list in gt_list for g in batch_list]
    pred_list = [p for batch_list in pred_list for p in batch_list]
    
    predictions = {"gt": gt_list, "pred": pred_list}
    
    # Save prediction and ground truth labels
    with open(os.path.join(log_path, "preds_Seq"+str(seq)+"_C"+str(cluster_size)+".pkl"), "wb") as fp:
        pickle.dump(predictions, fp)
    with open(os.path.join(log_path, "preds_Seq"+str(seq)+"_C"+str(cluster_size)+".pkl"), "rb") as fp:
        predictions = pickle.load(fp)
    gt_list = predictions['gt']
    pred_list = predictions['pred']
    
#    # get boundaries (worse accuracy when used)
#    vkeys = list(set([v.rsplit('_', 2)[0] for v in stroke_ids]))
#    boundaries = read_boundaries(vkeys, HIST_DIFFS, SBD_MODEL)
    #
    
    prev_gt = stroke_ids[0]
    val_labels, pred_labels, vid_preds = [], [], []
    for i, pr in enumerate(pred_list):
        if prev_gt != stroke_ids[i]:
            # find max category predicted in pred_labels
            val_labels.append(gt_list[i-1])
            pred_labels.append(max(set(vid_preds), key = vid_preds.count))
            vid_preds = []
            prev_gt = stroke_ids[i]
        vid_preds.append(pr)
        
    val_labels.append(gt_list[-1])
    pred_labels.append(max(set(vid_preds), key = vid_preds.count))
    
    ###########################################################################
    
    correct = 0
    for i,true_val in enumerate(val_labels):
        if pred_labels[i] == true_val:
            correct+=1
        confusion_mat[pred_labels[i], true_val]+=1
    print('#'*30)
    print("GRU Sequence Classification Results:")
    print("%d/%d Correct" % (correct, len(pred_labels)))
    print("Accuracy = {} ".format( float(correct) / len(pred_labels)))
    print("Confusion matrix")
    print(confusion_mat)
    return (float(correct) / len(pred_labels))
def train_model(encoder,
                decoder,
                dataloaders,
                criterion,
                encoder_optimizer,
                decoder_optimizer,
                scheduler,
                labs_keys,
                labs_values,
                seq=8,
                num_epochs=25):
    since = time.time()

    #    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                encoder.train()  # Set model to training mode
                decoder.train()

            else:
                encoder.eval()  # Set model to evaluate mode
                decoder.eval()

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for bno, (inputs, vid_path, stroke,
                      labels) in enumerate(dataloaders[phase]):
                # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
                #                print("Batch No : {} / {}".format(bno, len(dataloaders[phase])))
                labels = attn_utils.get_batch_labels(vid_path, stroke,
                                                     labs_keys, labs_values, 1)
                # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
                #                vis_samples(inputs, True)
                inputs = inputs.permute(0, 2, 1, 3, 4).float()

                targets = inputs
                inputs = inputs.to(device)
                targets = targets.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                encoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()
                loss = 0

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):

                    batch_size = inputs.size(0)
                    enc_h = encoder._init_hidden(batch_size)
                    enc_out, enc_h = encoder(inputs, enc_h)
                    dec_h = decoder._init_hidden(batch_size)
                    #start symbol of dim  (batch x output_size)
                    inp = torch.zeros(
                        (dec_h.size(1),
                         HIDDEN_SIZE)).to(device)  #starting symbol
                    #                    dec_out = decoder(dec_h, enc_out, inp)
                    dec_out, attn_wts = decoder(enc_h, enc_out, inp)
                    #                    loss += criterion(dec_out, targets)
                    loss += criterion(dec_out, labels)
                    _, preds = torch.max(dec_out, 1)
                    ###############################################################################
                    #                    for si in range(0, inputs.size(2)-seq+1, SHIFT):
                    #                        mod_inp = inputs[:,:,si:(si+seq)]
                    #                        mod_inp = mod_inp.to(device)
                    #                        enc_h = encoder._init_hidden(batch_size)
                    #                        # attention
                    ##                        enc_out, enc_h, attn_wts = encoder(mod_inp, enc_h)
                    ##                        dec_out, attn_wts_lst = decoder(h, enc_out)
                    #                        enc_out, enc_h = encoder(mod_inp, enc_h)
                    #                        dec_out = decoder(enc_out)
                    ##                        loss += criterion(enc_out, labels)
                    ##                        _, preds = torch.max(enc_out, 1)
                    #                        loss += criterion(dec_out, mod_inp)
                    ###############################################################################
                    #                    dec_h = h
                    #                    dec_out_lst = []
                    #                    target_length = targets.size(1)      # assign SEQ_LEN as target length for now
                    #                    # run for each word of the sequence (use teacher forcing)
                    #                    for ti in range(target_length):
                    #                        dec_out, dec_h, dec_attn = decoder(dec_h, enc_out, targets[:,ti,:])
                    #                        dec_out_lst.append(dec_out)
                    #                        loss += criterion(dec_out, targets[:,ti,:])
                    #                        #decoder_input = target_tensor[di]  # Teacher forcing
                    #
                    #                    outputs = torch.stack(dec_out_lst, dim=1)

                    #                    outputs, dec_h, wts = model(inputs, inputs)
                    #                    _, preds = torch.max(outputs, 1)
                    #                    loss = criterion(outputs, targets)     #torch.flip(targets, [1])
                    ###############################################################################
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        encoder_optimizer.step()
                        decoder_optimizer.step()

                # statistics
                running_loss += loss.item()
                #                print("Iter : {} / {} :: Running Loss : {}".format(bno,
                #                      len(dataloaders[phase]), running_loss))
                running_corrects += torch.sum(preds == labels.data)

#                print("Batch No : {} / {}".format(bno, len(dataloaders[phase])))
#                if (bno+1) % 20 == 0:
#                    break

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / ((bno + 1) * inputs.shape[0]
                                         )  #/ len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / (
                (bno + 1) * inputs.shape[0]
            )  #/ len(dataloaders[phase].dataset)

            #            print('{} Loss: {:.4f}'.format(phase, epoch_loss))
            print('{} Loss: {:.6f} Acc: {:.6f} LR: {}'.format(
                phase, epoch_loss, epoch_acc,
                scheduler.get_last_lr()[0]))


#            vis_samples(dec_out.permute(0, 2, 1, 3, 4).cpu().detach(), True)

#            # deep copy the model
#            if phase == 'test' and epoch_acc > best_acc:
#                best_acc = epoch_acc
#                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \
          time_elapsed % 60))
    #    print('Best val Acc: {:4f}'.format(best_acc))

    #    # load best model weights
    #    model.load_state_dict(best_model_wts)
    return encoder, decoder
def predict(encoder,
            decoder,
            dataloaders,
            criterion,
            labs_keys,
            labs_values,
            phase="val",
            seq=8):
    assert phase == "val" or phase == "test", "Incorrect Phase."
    encoder = encoder.eval()
    #    decoder = decoder.eval()
    vid_path_lst, stroke_lst, labs_lst, batch_wts = [], [], [], []
    gt_list, pred_list, stroke_ids = [], [], []
    # Iterate over data.
    for bno, (inputs, vid_path, stroke,
              labels) in enumerate(dataloaders[phase]):
        # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
        labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys,
                                             labs_values, 1)
        # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
        inputs = inputs.permute(0, 2, 1, 3, 4).float()

        #        targets = inputs
        inputs = inputs.to(device)
        labels = labels.to(device)
        # forward
        # track history if only in train
        with torch.set_grad_enabled(phase == 'train'):

            batch_size = inputs.size(0)
            enc_h = encoder._init_hidden(batch_size)
            enc_out, enc_h = encoder(inputs, enc_h)
            dec_h = decoder._init_hidden(batch_size)
            inp = torch.zeros(
                (dec_h.size(1), HIDDEN_SIZE)).to(device)  #starting symbol
            dec_out, attn_wts = decoder(enc_h, enc_out, inp)

            probs = dec_out
            vid_path_lst.append(vid_path)
            stroke_lst.append(stroke)
            labs_lst.append(labels)
            batch_wts.append(attn_wts)

            gt_list.append(labels.tolist())
            pred_list.append((torch.max(probs, 1)[1]).tolist())
            for i, vid in enumerate(vid_path):
                stroke_ids.extend([
                    vid + "_" + str(stroke[0][i].item()) + "_" +
                    str(stroke[1][i].item())
                ] * 1)
        # statistics
#        running_loss += loss.item()
#                print("Iter : {} :: Running Loss : {}".format(bno, running_loss))
#                running_corrects += torch.sum(preds == labels.data)

        print("Batch No : {} / {}".format(bno, len(dataloaders[phase])))


#        if (bno+1) % 20 == 0:
#            break

    pred_dict = {
        "paths": vid_path_lst,
        "strokes": stroke_lst,
        "labels": labs_lst,
        "wts": batch_wts
    }
    confusion_mat = np.zeros((decoder.output_size, decoder.output_size))
    gt_list = [g for batch_list in gt_list for g in batch_list]
    pred_list = [p for batch_list in pred_list for p in batch_list]
    prev_gt = stroke_ids[0]
    val_labels, pred_labels, vid_preds = [], [], []
    for i, pr in enumerate(pred_list):
        if prev_gt != stroke_ids[i]:
            # find max category predicted in pred_labels
            val_labels.append(gt_list[i - 1])
            pred_labels.append(max(set(vid_preds), key=vid_preds.count))
            vid_preds = []
            prev_gt = stroke_ids[i]
        vid_preds.append(pr)

    val_labels.append(gt_list[-1])
    pred_labels.append(max(set(vid_preds), key=vid_preds.count))
    correct = 0
    for i, true_val in enumerate(val_labels):
        if pred_labels[i] == true_val:
            correct += 1
        confusion_mat[pred_labels[i], true_val] += 1
    print('#' * 30)
    print("GRU Sequence Classification Results:")
    print("%d/%d Correct" % (correct, len(pred_labels)))
    print("Accuracy = {} ".format(float(correct) / len(pred_labels)))
    print("Confusion matrix")
    print(confusion_mat)
    return pred_dict, (float(correct) / len(pred_labels))
def train_model(features,
                stroke_names_id,
                model,
                dataloaders,
                criterion,
                optimizer,
                scheduler,
                labs_keys,
                labs_values,
                num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            count = [0.] * 5

            # Iterate over data.
            for bno, (inputs1, vid_path, stroke, labels,
                      inputs2) in enumerate(dataloaders[phase]):
                # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
                labels = attn_utils.get_batch_labels(vid_path, stroke,
                                                     labs_keys, labs_values, 1)
                #                # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
                #                inp_emb1, inp_emb2 = attn_utils.get_long_tensor(inputs1), attn_utils.get_long_tensor(inputs2)
                inputs1, inputs2 = inputs1.float(), inputs2.float()
                #                inputs1, input2 = inp_emb1.to(device), inp_emb2.to(device)
                inputs1, inputs2 = inputs1.to(device), inputs2.to(device)
                labels = labels.to(device)
                iter_counts = Counter(labels.tolist())
                for k, v in iter_counts.items():
                    count[k] += v

                # zero the parameter gradients
                optimizer.zero_grad()
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    #                    hidden = model.init_hidden(inputs.size(0))
                    outputs = model(inputs1, inputs2)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs,
                                     labels)  #torch.flip(targets, [1])

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item()  #* inputs1.size(0)
                #                print("Iter : {} :: Running Loss : {}".format(bno, running_loss))
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()
                print("Category Weights : {}".format(count))

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(
                dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))
            #            # deep copy the model for best test accuracy
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \
          time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    #    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
def predict(features,
            stroke_names_id,
            model,
            dataloaders,
            labs_keys,
            labs_values,
            seq,
            phase="val"):
    assert phase == "val" or phase == "test", "Incorrect Phase."
    model = model.eval()
    gt_list, pred_list, stroke_ids = [], [], []
    # Iterate over data.
    for bno, (inputs1, vid_path, stroke, labels,
              inputs2) in enumerate(dataloaders[phase]):
        # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
        labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys,
                                             labs_values, 1)
        #        inp_emb1, inp_emb2 = attn_utils.get_long_tensor(inputs1), attn_utils.get_long_tensor(inputs2)
        inputs1, inputs2 = inputs1.float(), inputs2.float()
        #        inputs1, inputs2 = inp_emb1.to(device), inp_emb2.to(device)
        inputs1, inputs2 = inputs1.to(device), inputs2.to(device)
        labels = labels.to(device)
        # forward
        with torch.set_grad_enabled(phase == 'train'):
            outputs = model(inputs1, inputs2)
            gt_list.append(labels.tolist())
            pred_list.append((torch.max(outputs, 1)[1]).tolist())
            for i, vid in enumerate(vid_path):
                stroke_ids.extend([
                    vid + "_" + str(stroke[0][i].item()) + "_" +
                    str(stroke[1][i].item())
                ] * 1)


#       # taking single hidden unit (initialized once) for entire video : accuracy lower
#        with torch.set_grad_enabled(phase == 'train'):
#            batch_size = inputs.size(0)
#            for si in range(batch_size):
#                curr_stroke = vid_path[si]+'_'+str(stroke[0][si].item())+'_'+str(stroke[1][si].item())
#                if prev_stroke != curr_stroke:
#                    hidden = model.init_hidden(1)
#                output, hidden = model(inputs[si].unsqueeze(0), hidden)
#                pred_list.append((torch.max(output, 1)[1]).tolist())
#                prev_stroke = curr_stroke
##            hidden = model.init_hidden(batch_size)
##            outputs, hidden = model(inputs, hidden)
#            gt_list.append(labels.tolist())
##            pred_list.append((torch.max(outputs, 1)[1]).tolist())
#            for i, vid in enumerate(vid_path):
#                stroke_ids.extend([vid+"_"+str(stroke[0][i].item())+"_"+str(stroke[1][i].item())] * 1)

#    epoch_loss = running_loss #/ len(dataloaders[phase].dataset)
#            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
#    print('{} Loss: {:.4f}'.format(phase, epoch_loss))

###########################################################################

    confusion_mat = np.zeros((model.n_classes, model.n_classes))
    gt_list = [g for batch_list in gt_list for g in batch_list]
    pred_list = [p for batch_list in pred_list for p in batch_list]

    predictions = {"gt": gt_list, "pred": pred_list}

    # Save prediction and ground truth labels
    with open(
            os.path.join(
                log_path, "preds_test_Seq" + str(seq) + "_C" +
                str(cluster_size) + ".pkl"), "wb") as fp:
        pickle.dump(predictions, fp)
    with open(
            os.path.join(
                log_path, "preds_test_Seq" + str(seq) + "_C" +
                str(cluster_size) + ".pkl"), "rb") as fp:
        predictions = pickle.load(fp)
    gt_list = predictions['gt']
    pred_list = predictions['pred']

    #    # get boundaries (worse accuracy when used)
    #    vkeys = list(set([v.rsplit('_', 2)[0] for v in stroke_ids]))
    #    boundaries = read_boundaries(vkeys, HIST_DIFFS, SBD_MODEL)
    #

    prev_gt = stroke_ids[0]
    val_labels, pred_labels, vid_preds = [], [], []
    for i, pr in enumerate(pred_list):
        if prev_gt != stroke_ids[i]:
            # find max category predicted in pred_labels
            val_labels.append(gt_list[i - 1])
            pred_labels.append(max(set(vid_preds), key=vid_preds.count))
            vid_preds = []
            prev_gt = stroke_ids[i]
        vid_preds.append(pr)

    val_labels.append(gt_list[-1])
    pred_labels.append(max(set(vid_preds), key=vid_preds.count))

    ###########################################################################

    correct = 0
    for i, true_val in enumerate(val_labels):
        if pred_labels[i] == true_val:
            correct += 1
        confusion_mat[pred_labels[i], true_val] += 1
    print('#' * 30)
    print("GRU Sequence Classification Results:")
    print("%d/%d Correct" % (correct, len(pred_labels)))
    print("Accuracy = {} ".format(float(correct) / len(pred_labels)))
    print("Confusion matrix")
    print(confusion_mat)
    return (float(correct) / len(pred_labels))
示例#8
0
def train_model(model,
                dataloaders,
                criterion,
                optimizer,
                scheduler,
                labs_keys,
                labs_values,
                num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for bno, (inputs, vid_path, stroke,
                      labels) in enumerate(dataloaders[phase]):
                # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
                labels = attn_utils.get_batch_labels(vid_path, stroke,
                                                     labs_keys, labs_values,
                                                     inputs.size(1))
                # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
                inputs = inputs.permute(0, 2, 1, 3, 4).float()
                inputs[:, [0, 2], ...] = inputs[:, [
                    2, 0
                ], ...]  # convert RGB to BGR for C3D pretrained
                #                inputs = inputs.permute(0, 4, 1, 2, 3).float()
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()
                loss = 0
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):

                    probs = model(inputs)
                    #                    probs = F.softmax(logits, dim=1)
                    loss = criterion(probs, labels)
                    _, preds = torch.max(probs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item()
                #                print("Iter : {} / {} :: Running Loss : {}".format(bno,
                #                      len(dataloaders[phase]), running_loss))
                running_corrects += torch.sum(preds == labels.data)


#                print("Batch No : {} / {}".format(bno, len(dataloaders[phase])))
#                if (bno+1) % 20 == 0:
#                    break

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / (bno + 1
                                         )  #len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / (
                16 * inputs.size(2) *
                (bno + 1))  #len(dataloaders[phase].dataset)

            print('{} Loss: {:.6f} :: Acc: {:.6f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc >= best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \
          time_elapsed % 60))
    print('Best val Acc: {:6f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
示例#9
0
def predict(model, dataloaders, labs_keys, labs_values, phase="val"):
    assert phase == "val" or phase == "test", "Incorrect Phase."
    model = model.eval()
    gt_list, pred_list, stroke_ids = [], [], []
    # Iterate over data.
    for bno, (inputs, vid_path, stroke,
              labels) in enumerate(dataloaders[phase]):
        # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
        labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys,
                                             labs_values, inputs.size(1))
        # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
        inputs = inputs.permute(0, 2, 1, 3, 4).float()
        inputs[:, [0, 2], ...] = inputs[:, [2, 0], ...]
        #        inputs = inputs.permute(0, 4, 1, 2, 3).float()
        inputs = inputs.to(device)
        # forward
        # track history if only in train
        with torch.set_grad_enabled(phase == 'train'):

            probs = model(inputs)
            gt_list.append(labels.tolist())
            pred_list.append((torch.max(probs, 1)[1]).tolist())
            for i, vid in enumerate(vid_path):
                stroke_ids.extend([
                    vid + "_" + str(stroke[0][i].item()) + "_" +
                    str(stroke[1][i].item())
                ] * inputs.size(2))
        # statistics
#        running_loss += loss.item()
#                print("Iter : {} :: Running Loss : {}".format(bno, running_loss))
#                running_corrects += torch.sum(preds == labels.data)

        print("Batch No : {} / {}".format(bno, len(dataloaders[phase])))


#        if (bno+1) % 20 == 0:
#            break
#    epoch_loss = running_loss #/ len(dataloaders[phase].dataset)
#            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
    confusion_mat = np.zeros((model.output_size, model.output_size))
    gt_list = [g for batch_list in gt_list for g in batch_list]
    pred_list = [p for batch_list in pred_list for p in batch_list]
    prev_gt = stroke_ids[0]
    val_labels, pred_labels, vid_preds = [], [], []
    for i, pr in enumerate(pred_list):
        if prev_gt != stroke_ids[i]:
            # find max category predicted in pred_labels
            val_labels.append(gt_list[i - 1])
            pred_labels.append(max(set(vid_preds), key=vid_preds.count))
            vid_preds = []
            prev_gt = stroke_ids[i]
        vid_preds.append(pr)

    val_labels.append(gt_list[-1])
    pred_labels.append(max(set(vid_preds), key=vid_preds.count))
    correct = 0
    for i, true_val in enumerate(val_labels):
        if pred_labels[i] == true_val:
            correct += 1
        confusion_mat[pred_labels[i], true_val] += 1
    print('#' * 30)
    print("GRU Sequence Classification Results:")
    print("%d/%d Correct" % (correct, len(pred_labels)))
    print("Accuracy = {} ".format(float(correct) / len(pred_labels)))
    print("Confusion matrix")
    print(confusion_mat)
    return (float(correct) / len(pred_labels))
示例#10
0
def predict(features,
            stroke_names_id,
            model,
            dataloaders,
            labs_keys,
            labs_values,
            phase="val"):
    assert phase == "val" or phase == "test", "Incorrect Phase."
    model = model.eval()
    gt_list, pred_list, stroke_ids = [], [], []
    # Iterate over data.
    for bno, (inputs, vid_path, stroke,
              labels) in enumerate(dataloaders[phase]):
        # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
        labels = attn_utils.get_batch_labels(vid_path, stroke, labs_keys,
                                             labs_values, 1)

        inputs = inputs.to(device)
        labels = labels.to(device)
        # forward
        with torch.set_grad_enabled(phase == 'train'):
            batch_size = inputs.size(0)
            hidden = model.init_hidden(batch_size)
            outputs, hidden = model(inputs, hidden)
            gt_list.append(labels.tolist())
            pred_list.append((torch.max(outputs, 1)[1]).tolist())
            for i, vid in enumerate(vid_path):
                stroke_ids.extend([
                    vid + "_" + str(stroke[0][i].item()) + "_" +
                    str(stroke[1][i].item())
                ] * 1)


#    epoch_loss = running_loss #/ len(dataloaders[phase].dataset)
#            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
#    print('{} Loss: {:.4f}'.format(phase, epoch_loss))

    confusion_mat = np.zeros((model.n_classes, model.n_classes))
    gt_list = [g for batch_list in gt_list for g in batch_list]
    pred_list = [p for batch_list in pred_list for p in batch_list]
    prev_gt = stroke_ids[0]
    val_labels, pred_labels, vid_preds = [], [], []
    for i, pr in enumerate(pred_list):
        if prev_gt != stroke_ids[i]:
            # find max category predicted in pred_labels
            val_labels.append(gt_list[i - 1])
            pred_labels.append(max(set(vid_preds), key=vid_preds.count))
            vid_preds = []
            prev_gt = stroke_ids[i]
        vid_preds.append(pr)

    val_labels.append(gt_list[-1])
    pred_labels.append(max(set(vid_preds), key=vid_preds.count))
    correct = 0
    for i, true_val in enumerate(val_labels):
        if pred_labels[i] == true_val:
            correct += 1
        confusion_mat[pred_labels[i], true_val] += 1
    print('#' * 30)
    print("GRU Sequence Classification Results:")
    print("%d/%d Correct" % (correct, len(pred_labels)))
    print("Accuracy = {} ".format(float(correct) / len(pred_labels)))
    print("Confusion matrix")
    print(confusion_mat)
    return (float(correct) / len(pred_labels))
示例#11
0
def train_model(features,
                stroke_names_id,
                encoder,
                decoder,
                dataloaders,
                criterion,
                encoder_optimizer,
                decoder_optimizer,
                scheduler,
                labs_keys,
                labs_values,
                num_epochs=25):
    since = time.time()

    #    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:
            #            if phase == 'train':
            #                model.train()  # Set model to training mode
            #            else:
            #                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for bno, (inputs, targets, vid_path, stroke,
                      labels) in enumerate(dataloaders[phase]):
                # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
                labels = attn_utils.get_batch_labels(vid_path, stroke,
                                                     labs_keys, labs_values, 1)
                # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
                #                inputs = inputs.permute(0, 2, 1, 3, 4).float()

                inputs, targets = inputs.to(device), targets.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                encoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()
                loss = 0

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):

                    batch_size = inputs.size(0)
                    enc_h = encoder.init_hidden(batch_size)
                    enc_out, h = encoder(inputs, enc_h)
                    dec_h = h
                    dec_out_lst = []
                    target_length = targets.size(
                        1)  # assign SEQ_LEN as target length for now
                    # run for each word of the sequence (use teacher forcing)
                    for ti in range(target_length):
                        dec_out, dec_h, dec_attn = decoder(
                            dec_h, enc_out, targets[:, ti, :])
                        dec_out_lst.append(dec_out)
                        loss += criterion(dec_out, targets[:, ti, :])
                        #decoder_input = target_tensor[di]  # Teacher forcing

                    outputs = torch.stack(dec_out_lst, dim=1)

                    #                    outputs, dec_h, wts = model(inputs, inputs)
                    #                    _, preds = torch.max(outputs, 1)
                    #                    loss = criterion(outputs, targets)     #torch.flip(targets, [1])

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        encoder_optimizer.step()
                        decoder_optimizer.step()

                # statistics
                running_loss += loss.item()
#                print("Iter : {} :: Running Loss : {}".format(bno, running_loss))
#                running_corrects += torch.sum(preds == labels.data)

#                print("Batch No : {} / {}".format(bno, len(dataloaders[phase])))
#                if (bno+1) % 10 == 0:
#                    break

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss  #/ len(dataloaders[phase].dataset)
            #            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f}'.format(phase, epoch_loss))


#            # deep copy the model
#            if phase == 'test' and epoch_acc > best_acc:
#                best_acc = epoch_acc
#                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \
          time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    #    # load best model weights
    #    model.load_state_dict(best_model_wts)
    return encoder, decoder
示例#12
0
def train_model(model,
                dataloaders,
                criterion,
                encoder_optimizer,
                scheduler,
                labs_keys,
                labs_values,
                seq=8,
                num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for bno, (inputs, vid_path, stroke,
                      labels) in enumerate(dataloaders[phase]):
                # inputs of shape BATCH x SEQ_LEN x FEATURE_DIM
                #                print("Batch No : {} / {}".format(bno, len(dataloaders[phase])))
                labels = attn_utils.get_batch_labels(vid_path, stroke,
                                                     labs_keys, labs_values, 1)
                # Extract spatio-temporal features from clip using 3D ResNet (For SL >= 16)
                inputs = inputs.permute(0, 2, 1, 3, 4).float()
                #                targets = inputs
                inputs = inputs.to(device)
                #                targets = targets.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                encoder_optimizer.zero_grad()
                loss = 0

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):

                    batch_size = inputs.size(0)
                    #                    enc_h = encoder._init_hidden(batch_size)
                    enc_out, enc_h = model(inputs)  #, enc_h)
                    loss += criterion(enc_out, labels)
                    _, preds = torch.max(enc_out, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        encoder_optimizer.step()

                # statistics
                running_loss += loss.item()
                #                print("Iter : {} / {} :: Running Loss : {}".format(bno,
                #                      len(dataloaders[phase]), running_loss))
                running_corrects += torch.sum(preds == labels.data)


##                print("Batch No : {} / {}".format(bno, len(dataloaders[phase])))
#                if (bno+1) % 5 == 0:
#                    break

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(
                dataloaders[phase].dataset)  #(256*(bno+1)) #

            #            print('{} Loss: {:.4f}'.format(phase, epoch_loss))
            print('{} Loss: {:.6f} Acc: {:.6f} LR: {}'.format(
                phase, epoch_loss, epoch_acc,
                scheduler.get_last_lr()[0]))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, \
          time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model