Exemplo n.º 1
0
def main(DATASET,
         LABELS,
         CLASS_IDS,
         BATCH_SIZE,
         ANNOTATION_FILE,
         SEQ_SIZE=16,
         STEP=16,
         nstrokes=-1,
         N_EPOCHS=25):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    ###########################################################################

    attn_utils.seed_everything(1234)

    if not os.path.isdir(log_path):
        os.makedirs(log_path)

    # Read the strokes
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))

    #    extract_of_features(feat_path, DATASET, LABELS, train_lst, val_lst)

    features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames)
    # get matrix of features from dictionary (N, vec_size)
    vecs = []
    for key in sorted(list(features.keys())):
        vecs.append(features[key])
    vecs = np.vstack(vecs)

    vecs[np.isnan(vecs)] = 0
    vecs[np.isinf(vecs)] = 0

    #fc7 layer output size (4096)
    INP_VEC_SIZE = vecs.shape[-1]
    print("INP_VEC_SIZE = ", INP_VEC_SIZE)

    km_filepath = os.path.join(log_path, km_filename)
    #    # Uncomment only while training.
    if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"):
        km_model = make_codebook(vecs, cluster_size)  #, model_type='gmm')
        ##    # Save to disk, if training is performed
        print("Writing the KMeans models to disk...")
        pickle.dump(
            km_model,
            open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb"))
    else:
        # Load from disk, for validation and test sets.
        km_model = pickle.load(
            open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb'))

    print("Create numpy one hot representation for train features...")
    onehot_feats = create_bovw_SA(features, stroke_names_id, km_model)

    ft_path = os.path.join(log_path, "C" + str(cluster_size) + "_train.pkl")
    with open(ft_path, "wb") as fp:
        pickle.dump(onehot_feats, fp)

    ###########################################################################

    features_val, stroke_names_id_val = attn_utils.read_feats(
        feat_path, feat_val, snames_val)

    print("Create numpy one hot representation for val features...")
    onehot_feats_val = create_bovw_SA(features_val, stroke_names_id_val,
                                      km_model)

    ft_path_val = os.path.join(log_path, "C" + str(cluster_size) + "_val.pkl")
    with open(ft_path_val, "wb") as fp:
        pickle.dump(onehot_feats_val, fp)

    ###########################################################################

    features_test, stroke_names_id_test = attn_utils.read_feats(
        feat_path, feat_test, snames_test)

    print("Create numpy one hot representation for test features...")
    onehot_feats_test = create_bovw_SA(features_test, stroke_names_id_test,
                                       km_model)

    ft_path_test = os.path.join(log_path,
                                "C" + str(cluster_size) + "_test.pkl")
    with open(ft_path_test, "wb") as fp:
        pickle.dump(onehot_feats_test, fp)

    ###########################################################################
    # Create a Dataset
    train_dataset = StrokeFeaturePairsDataset(ft_path,
                                              train_lst,
                                              DATASET,
                                              LABELS,
                                              CLASS_IDS,
                                              frames_per_clip=SEQ_SIZE,
                                              extracted_frames_per_clip=2,
                                              step_between_clips=STEP,
                                              train=True)
    val_dataset = StrokeFeaturePairsDataset(ft_path_val,
                                            val_lst,
                                            DATASET,
                                            LABELS,
                                            CLASS_IDS,
                                            frames_per_clip=SEQ_SIZE,
                                            extracted_frames_per_clip=2,
                                            step_between_clips=STEP,
                                            train=False)

    # get labels
    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)
    #    # created weighted Sampler for class imbalance
    #    samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values,
    #                                                   train_lst)
    #    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
    #                              sampler=sampler, worker_init_fn=np.random.seed(12))

    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False)

    data_loaders = {"train": train_loader, "test": val_loader}

    num_classes = len(list(set(labs_values)))

    ###########################################################################

    # load model and set loss function
    ntokens = cluster_size  # the size of vocabulary
    emsize = 200  # embedding dimension
    nhid = 200  # the dimension of the feedforward network model in nn.TransformerEncoder
    nlayers = 2  # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
    nhead = 2  # the number of heads in the multiheadattention models
    dropout = 0.2  # the dropout value
    model = tt.TransformerModelSA(ntokens, emsize, nhead, nhid, nlayers,
                                  dropout).to(device)

    # Setup the loss fxn
    #    criterion = nn.CrossEntropyLoss()
    criterion = nn.MSELoss()
    model = model.to(device)
    #    print("Params to learn:")
    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t", name)


#    # Observe that all parameters are being optimized
##    optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001)
#    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#
#    # Decay LR by a factor of 0.1 every 7 epochs
#    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

    lr = 5.0  # learning rate
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 2.0, gamma=0.95)
    ###########################################################################
    # Training the model

    #    start = time.time()
    #
    #    model = train_model(features, stroke_names_id, model, data_loaders, criterion,
    #                        optimizer, scheduler, labs_keys, labs_values,
    #                        num_epochs=N_EPOCHS)
    #
    #    end = time.time()
    #
    ##    # save the best performing model
    #    save_model_checkpoint(log_path, model, N_EPOCHS,
    #                                     "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD")
    # Load model checkpoints
    model = load_weights(
        log_path, model, N_EPOCHS,
        "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD")

    #    print("Total Execution time for {} epoch : {}".format(N_EPOCHS, (end-start)))

    ###########################################################################

    ###########################################################################

    # Extract attention model features
    if not os.path.isfile(os.path.join(log_path, "trans_feats.pkl")):
        if not os.path.exists(log_path):
            os.makedirs(log_path)
        #    # Extract Grid OF / HOOF features {mth = 2, and vary nbins}
        print("Training extraction ... ")
        feats_dict, stroke_names = extract_trans_feats(model,
                                                       DATASET,
                                                       LABELS,
                                                       CLASS_IDS,
                                                       BATCH_SIZE,
                                                       SEQ_SIZE,
                                                       SEQ_SIZE - 1,
                                                       partition='train',
                                                       nstrokes=nstrokes,
                                                       base_name=log_path)

        with open(os.path.join(log_path, "trans_feats.pkl"), "wb") as fp:
            pickle.dump(feats_dict, fp)
        with open(os.path.join(log_path, "trans_snames.pkl"), "wb") as fp:
            pickle.dump(stroke_names, fp)

    if not os.path.isfile(os.path.join(log_path, "trans_feats_val.pkl")):
        print("Validation extraction ....")
        feats_dict_val, stroke_names_val = extract_trans_feats(
            model,
            DATASET,
            LABELS,
            CLASS_IDS,
            BATCH_SIZE,
            SEQ_SIZE,
            SEQ_SIZE - 1,
            partition='val',
            nstrokes=nstrokes,
            base_name=log_path)

        with open(os.path.join(log_path, "trans_feats_val.pkl"), "wb") as fp:
            pickle.dump(feats_dict_val, fp)
        with open(os.path.join(log_path, "trans_snames_val.pkl"), "wb") as fp:
            pickle.dump(stroke_names_val, fp)

    if not os.path.isfile(os.path.join(log_path, "trans_feats_test.pkl")):
        print("Testing extraction ....")
        feats_dict_val, stroke_names_val = extract_trans_feats(
            model,
            DATASET,
            LABELS,
            CLASS_IDS,
            BATCH_SIZE,
            SEQ_SIZE,
            SEQ_SIZE - 1,
            partition='test',
            nstrokes=nstrokes,
            base_name=log_path)

        with open(os.path.join(log_path, "trans_feats_test.pkl"), "wb") as fp:
            pickle.dump(feats_dict_val, fp)
        with open(os.path.join(log_path, "trans_snames_test.pkl"), "wb") as fp:
            pickle.dump(stroke_names_val, fp)

    # call count_paramters(model)  for displaying total no. of parameters
    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))
    return 0
Exemplo n.º 2
0
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, 
         STEP=16, nstrokes=-1, N_EPOCHS=25, base_name=''):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    ###########################################################################
    
    attn_utils.seed_everything(1234)
    
    if not os.path.isdir(log_path):
        os.makedirs(log_path)
    
    # Read the strokes 
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))
        
    features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames)
#    # get matrix of features from dictionary (N, vec_size)
#    vecs = []
#    for key in sorted(list(features.keys())):
#        vecs.append(features[key])
#    vecs = np.vstack(vecs)
#    
#    vecs[np.isnan(vecs)] = 0
#    vecs[np.isinf(vecs)] = 0
#    
#    #fc7 layer output size (4096) 
#    INP_VEC_SIZE = vecs.shape[-1]
#    print("INP_VEC_SIZE = ", INP_VEC_SIZE)
#    
#    km_filepath = os.path.join(log_path, km_filename)
##    # Uncomment only while training.
#    if not os.path.isfile(km_filepath+"_C"+str(cluster_size)+".pkl"):
#        km_model = make_codebook(vecs, cluster_size)  #, model_type='gmm') 
#        ##    # Save to disk, if training is performed
#        print("Writing the KMeans models to disk...")
#        pickle.dump(km_model, open(km_filepath+"_C"+str(cluster_size)+".pkl", "wb"))
#    else:
#        # Load from disk, for validation and test sets.
#        km_model = pickle.load(open(km_filepath+"_C"+str(cluster_size)+".pkl", 'rb'))
#        
#    print("Create numpy one hot representation for train features...")
#    onehot_feats = create_bovw_onehot(features, stroke_names_id, km_model)
#    
#    ft_path = os.path.join(log_path, "C"+str(cluster_size)+"_train.pkl")
#    with open(ft_path, "wb") as fp:
#        pickle.dump(onehot_feats, fp)
#    
#    ###########################################################################
#    
    features_val, stroke_names_id_val = attn_utils.read_feats(feat_path, feat_val, 
                                                              snames_val)
#    
#    print("Create numpy one hot representation for val features...")
#    onehot_feats_val = create_bovw_onehot(features_val, stroke_names_id_val, km_model)
#    
#    ft_path_val = os.path.join(log_path, "C"+str(cluster_size)+"_val.pkl")
#    with open(ft_path_val, "wb") as fp:
#        pickle.dump(onehot_feats_val, fp)
#    
#    ###########################################################################
#    
    features_test, stroke_names_id_test = attn_utils.read_feats(feat_path, feat_test, 
                                                                snames_test)
#    
#    print("Create numpy one hot representation for test features...")
#    onehot_feats_test = create_bovw_onehot(features_test, stroke_names_id_test, km_model)
#    
#    ft_path_test = os.path.join(log_path, "C"+str(cluster_size)+"_test.pkl")
#    with open(ft_path_test, "wb") as fp:
#        pickle.dump(onehot_feats_test, fp)
    
    ###########################################################################    
    # Create a Dataset    
    ft_path = os.path.join(base_name, feat_path, feat)
    train_dataset = StrokeFeaturePairsDataset(ft_path, train_lst, DATASET, LABELS, CLASS_IDS, 
                                         frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2,
                                         step_between_clips=STEP, train=True)
    ft_path_val = os.path.join(base_name, feat_path, feat_val)
    val_dataset = StrokeFeaturePairsDataset(ft_path_val, val_lst, DATASET, LABELS, CLASS_IDS, 
                                         frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2,
                                         step_between_clips=STEP, train=False)
    
    # get labels
    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)
    # created weighted Sampler for class imbalance
    samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, 
                                                   train_lst)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    
    train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, #shuffle=True,
                              sampler=sampler, worker_init_fn=np.random.seed(12))
    
    val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    data_loaders = {"train": train_loader, "test": val_loader}

    num_classes = len(list(set(labs_values)))
    
    ###########################################################################    
    
    model = siamese_net.SiameseGRUNet(INPUT_SIZE, HIDDEN_SIZE, N_LAYERS, bidirectional)
#    model = siamese_net.GRUBoWSANet(INPUT_SIZE, HIDDEN_SIZE, N_LAYERS, bidirectional)
    model = load_weights(model_path, model, N_EPOCHS, 
                                    "S30"+"C"+str(cluster_size)+"_SGD")
    
    # copy the pretrained weights 
    
    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()

    print("Param layers frozen:")
#    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            param.requires_grad = False
#            params_to_update.append(param)
    
    model.g1.fc2 = nn.Linear(HIDDEN_SIZE * (int(bidirectional)+1), HIDDEN_SIZE)
    model.g1.fc3 = nn.Linear(HIDDEN_SIZE, num_classes)
#    initrange = 0.1
#    model.g1.fc3.bias.data.zero_()
#    model.g1.fc3.weight.data.uniform_(-initrange, initrange)

    model = model.to(device)

    print("Params to learn:")
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            print("\t", name)

    
#    # Observe that all parameters are being optimized
#    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#    
#    # Decay LR by a factor of 0.1 every 7 epochs
    scheduler = StepLR(optimizer, step_size=15, gamma=0.1)
    
#    lr = 5.0 # learning rate
#    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
#    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
    ###########################################################################
    # Training the model    
    
    start = time.time()
    
    model = train_model(features, stroke_names_id, model, data_loaders, criterion, 
                        optimizer, scheduler, labs_keys, labs_values,
                        num_epochs=N_EPOCHS)
    
    end = time.time()
    
    # save the best performing model
    save_model_checkpoint(log_path, model, N_EPOCHS, 
                                     "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD")
    # Load model checkpoints
    model = load_weights(log_path, model, N_EPOCHS, 
                                    "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD")
    
    print("Total Execution time for {} epoch : {}".format(N_EPOCHS, (end-start)))

    ###########################################################################
    
    acc = predict(features_val, stroke_names_id_val, model, data_loaders, labs_keys, 
                  labs_values, SEQ_SIZE, phase='test')
    
    ###########################################################################
    
            
    # call count_paramters(model)  for displaying total no. of parameters
    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))
    return 0
Exemplo n.º 3
0
    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))
    return 0

if __name__ == '__main__':
    # Local Paths
    LABELS = "/home/arpan/VisionWorkspace/Cricket/scripts/supporting_files/sample_set_labels/sample_labels_shots/ICC WT20"
    DATASET = "/home/arpan/VisionWorkspace/VideoData/sample_cricket/ICC WT20"
    CLASS_IDS = "/home/arpan/VisionWorkspace/Cricket/cluster_strokes/configs/Class Index_Strokes.txt"
    ANNOTATION_FILE = "/home/arpan/VisionWorkspace/Cricket/CricketStrokeLocalizationBOVW/shots_classes.txt"

    seq_sizes = range(30, 31, 2)
    STEP = 1
    BATCH_SIZE = 32
    N_EPOCHS = 60

    attn_utils.seed_everything(1234)
    acc = []

    print("HOOF bins40 mth2 BOV Transformer SA without Embedding...")
    print("EPOCHS = {} : HIDDEN_SIZE = {} : LAYERS = {}".format(
        N_EPOCHS, HIDDEN_SIZE, N_LAYERS))
    for SEQ_SIZE in seq_sizes:
        print("SEQ_SIZE : {} :: CLUSTER_SIZE : {}".format(
            SEQ_SIZE, cluster_size))
        acc.append(
            main(DATASET,
                 LABELS,
                 CLASS_IDS,
                 BATCH_SIZE,
                 ANNOTATION_FILE,
                 SEQ_SIZE,
Exemplo n.º 4
0
def main(DATASET,
         LABELS,
         CLASS_IDS,
         BATCH_SIZE,
         ANNOTATION_FILE,
         SEQ_SIZE=16,
         STEP=16,
         nstrokes=-1,
         N_EPOCHS=25):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    ###########################################################################

    attn_utils.seed_everything(1234)

    if not os.path.isdir(log_path):
        os.makedirs(log_path)

    # Read the strokes
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))

    features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames)
    features2, stroke_names_id2 = attn_utils.read_feats(
        feat_path2, feat2, snames2)
    # get matrix of features from dictionary (N, vec_size)
    vecs, vecs2 = [], []
    for key in stroke_names_id:
        vecs.append(features[key])
        vecs2.append(features2[key])
    vecs, vecs2 = np.vstack(vecs), np.vstack(vecs2)

    vecs[np.isnan(vecs)] = 0
    vecs[np.isinf(vecs)] = 0
    vecs2[np.isnan(vecs2)] = 0
    vecs2[np.isinf(vecs2)] = 0

    #    vecs = traj_utils.apply_PCA(vecs, 10)
    #    vecs2 = traj_utils.apply_PCA(vecs2, 10)
    #    form_lower_dim_dict(features, stroke_names_id, vecs)
    #    form_lower_dim_dict(features2, stroke_names_id2, vecs2)

    #fc7 layer output size (4096)
    INP_VEC_SIZE, INP_VEC_SIZE2 = vecs.shape[-1], vecs2.shape[-1]
    print("INP_VEC_SIZE = {} : INP_VEC_SIZE2 = {}".format(
        INP_VEC_SIZE, INP_VEC_SIZE2))

    km_filepath = os.path.join(log_path, km_filename)
    # Feats1
    if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"):
        km_model = make_codebook(vecs, cluster_size)  #, model_type='gmm')
        ##    # Save to disk, if training is performed
        print("Writing the KMeans models to disk...")
        pickle.dump(
            km_model,
            open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb"))
    else:
        # Load from disk, for validation and test sets.
        km_model = pickle.load(
            open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb'))
    # Feats2
    if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + "_2.pkl"):
        km_model2 = make_codebook(vecs2, cluster_size)  #, model_type='gmm')
        ##    # Save to disk, if training is performed
        print("Writing the KMeans models to disk...")
        pickle.dump(
            km_model2,
            open(km_filepath + "_C" + str(cluster_size) + "_2.pkl", "wb"))
    else:
        # Load from disk, for validation and test sets.
        km_model2 = pickle.load(
            open(km_filepath + "_C" + str(cluster_size) + "_2.pkl", 'rb'))

    print("Create numpy one hot representation for train features...")
    onehot_feats = create_bovw_SA(features, stroke_names_id, km_model)
    print("Create numpy one hot representation for train features2...")
    onehot_feats2 = create_bovw_SA(features2, stroke_names_id2, km_model2)

    ft_path = os.path.join(log_path,
                           "onehot_C" + str(cluster_size) + "_train.pkl")
    ft_path2 = os.path.join(log_path,
                            "onehot_C" + str(cluster_size) + "_train_2.pkl")
    with open(ft_path, "wb") as fp:
        pickle.dump(onehot_feats, fp)
    with open(ft_path2, "wb") as fp:
        pickle.dump(onehot_feats2, fp)

    ###########################################################################

    features_val, stroke_names_id_val = attn_utils.read_feats(
        feat_path, feat_val, snames_val)
    features_val2, stroke_names_id_val2 = attn_utils.read_feats(
        feat_path2, feat_val2, snames_val2)

    #    # get matrix of features from dictionary (N, vec_size)
    #    vecs, vecs2 = [], []
    #    for key in stroke_names_id:
    #        vecs.append(features[key])
    #        vecs2.append(features2[key])
    #    vecs, vecs2 = np.vstack(vecs), np.vstack(vecs2)
    #
    #    vecs[np.isnan(vecs)] = 0
    #    vecs[np.isinf(vecs)] = 0
    #    vecs2[np.isnan(vecs2)] = 0
    #    vecs2[np.isinf(vecs2)] = 0
    #
    #    form_lower_dim_dict(features, stroke_names_id, vecs)
    #    form_lower_dim_dict(features2, stroke_names_id2, vecs2)

    print("Create numpy one hot representation for val features...")
    onehot_feats_val = create_bovw_SA(features_val, stroke_names_id_val,
                                      km_model)
    print("Create numpy one hot representation for val features2...")
    onehot_feats_val2 = create_bovw_SA(features_val2, stroke_names_id_val2,
                                       km_model2)
    ft_path_val = os.path.join(log_path,
                               "onehot_C" + str(cluster_size) + "_val.pkl")
    ft_path_val2 = os.path.join(log_path,
                                "onehot_C" + str(cluster_size) + "_val_2.pkl")
    with open(ft_path_val, "wb") as fp:
        pickle.dump(onehot_feats_val, fp)
    with open(ft_path_val2, "wb") as fp:
        pickle.dump(onehot_feats_val2, fp)
    ###########################################################################
    # Create a Dataset

#    ft_path = os.path.join(base_name, ft_dir, feat)
    train_dataset = StrokeFeatureSequencesDataset(ft_path,
                                                  ft_path2,
                                                  train_lst,
                                                  DATASET,
                                                  LABELS,
                                                  CLASS_IDS,
                                                  frames_per_clip=SEQ_SIZE,
                                                  extracted_frames_per_clip=2,
                                                  step_between_clips=STEP,
                                                  train=True)
    #    ft_path_val = os.path.join(base_name, ft_dir, feat_val)
    val_dataset = StrokeFeatureSequencesDataset(ft_path_val,
                                                ft_path_val2,
                                                val_lst,
                                                DATASET,
                                                LABELS,
                                                CLASS_IDS,
                                                frames_per_clip=SEQ_SIZE,
                                                extracted_frames_per_clip=2,
                                                step_between_clips=STEP,
                                                train=False)

    # get labels
    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)
    # created weighted Sampler for class imbalance
    samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys,
                                                   labs_values, train_lst)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              sampler=sampler,
                              worker_init_fn=np.random.seed(12))

    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False)

    data_loaders = {"train": train_loader, "test": val_loader}

    num_classes = len(list(set(labs_values)))

    #    vis_clusters(features, onehot_feats, stroke_names_id, 2, DATASET, log_path)

    ###########################################################################

    # load model and set loss function
    model = attn_model.GRUBoWMultiStreamClassifier(INPUT_SIZE, INPUT_SIZE,
                                                   HIDDEN_SIZE, HIDDEN_SIZE,
                                                   num_classes, N_LAYERS,
                                                   bidirectional)

    #    model = load_weights(base_name, model, N_EPOCHS, "Adam")

    #    for ft in model.parameters():
    #        ft.requires_grad = False

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()
    model = model.to(device)
    print("Params to learn:")
    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)


#            print("\t",name)

# Observe that all parameters are being optimized
    optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001)
    #    optimizer_ft = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = StepLR(optimizer_ft, step_size=15, gamma=0.1)

    ###########################################################################
    # Training the model

    start = time.time()

    model = train_model(features,
                        stroke_names_id,
                        model,
                        data_loaders,
                        criterion,
                        optimizer_ft,
                        exp_lr_scheduler,
                        labs_keys,
                        labs_values,
                        num_epochs=N_EPOCHS)

    end = time.time()
    #
    #    # save the best performing model
    attn_utils.save_model_checkpoint(
        log_path, model, N_EPOCHS,
        "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD")
    # Load model checkpoints
    model = attn_utils.load_weights(
        log_path, model, N_EPOCHS,
        "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD")

    print("Total Execution time for {} epoch : {}".format(
        N_EPOCHS, (end - start)))

    #    ###########################################################################

    acc = predict(features_val,
                  stroke_names_id_val,
                  model,
                  data_loaders,
                  labs_keys,
                  labs_values,
                  SEQ_SIZE,
                  phase='test')

    # call count_paramters(model)  for displaying total no. of parameters
    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))
    return acc
Exemplo n.º 5
0
def main(DATASET,
         LABELS,
         CLASS_IDS,
         BATCH_SIZE,
         ANNOTATION_FILE,
         SEQ_SIZE=16,
         STEP=16,
         nstrokes=-1,
         N_EPOCHS=25,
         base_name=""):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    ###########################################################################
    # seed everything
    seed = 1234
    attn_utils.seed_everything(seed)
    if not os.path.isdir(base_name):
        os.makedirs(base_name)

    # Read the strokes
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))

    ###########################################################################
    # Create a Dataset
    # Clip level transform. Use this with framewiseTransform flag turned off
    train_transform = transforms.Compose([
        videotransforms.RandomCrop(224),
        videotransforms.ToPILClip(),
        videotransforms.Resize((112, 112)),
        videotransforms.ToTensor(),
        videotransforms.Normalize(),
        #videotransforms.RandomHorizontalFlip(),\
    ])
    test_transform = transforms.Compose([
        videotransforms.CenterCrop(224),
        videotransforms.ToPILClip(),
        videotransforms.Resize((112, 112)),
        videotransforms.ToTensor(),
        videotransforms.Normalize(),
        #videotransforms.RandomHorizontalFlip(),\
    ])
    train_dataset = CricketStrokesDataset(train_lst,
                                          DATASET,
                                          LABELS,
                                          CLASS_IDS,
                                          frames_per_clip=SEQ_SIZE,
                                          step_between_clips=STEP,
                                          train=True,
                                          framewiseTransform=False,
                                          transform=train_transform)
    val_dataset = CricketStrokesDataset(val_lst,
                                        DATASET,
                                        LABELS,
                                        CLASS_IDS,
                                        frames_per_clip=SEQ_SIZE,
                                        step_between_clips=STEP,
                                        train=False,
                                        framewiseTransform=False,
                                        transform=test_transform)

    ###########################################################################

    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)

    num_classes = len(list(set(labs_values)))

    # created weighted Sampler for class imbalance
    if not os.path.isfile(
            os.path.join(
                base_name, "weights_c" + str(num_classes) + "_" +
                str(len(train_dataset)) + ".pkl")):
        samples_weight = attn_utils.get_sample_weights(train_dataset,
                                                       labs_keys, labs_values,
                                                       train_lst)
        with open(
                os.path.join(
                    base_name, "weights_c" + str(num_classes) + "_" +
                    str(len(train_dataset)) + ".pkl"), "wb") as fp:
            pickle.dump(samples_weight, fp)
    with open(
            os.path.join(
                base_name, "weights_c" + str(num_classes) + "_" +
                str(len(train_dataset)) + ".pkl"), "rb") as fp:
        samples_weight = pickle.load(fp)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              sampler=sampler,
                              worker_init_fn=np.random.seed(12))

    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False)

    data_loaders = {"train": train_loader, "test": val_loader}

    ###########################################################################
    # load model and set loss function
    encoder = conv_attn_model.Conv3DEncoder(HIDDEN_SIZE, 1, bidirectional)
    #    encoder = conv_attn_model.Conv3DAttention(HIDDEN_SIZE, num_classes, 1, 196, bidirectional)
    decoder = conv_attn_model.Conv3DDecoder(HIDDEN_SIZE, num_classes, 1, 1,
                                            bidirectional)
    #    decoder = conv_encdec_model.Conv3DDecoder(HIDDEN_SIZE, HIDDEN_SIZE, 1, 196, bidirectional)
    #    model = attn_model.Encoder(10, 20, bidirectional)

    #    for ft in model.parameters():
    #        ft.requires_grad = False
    #    inp_feat_size = model.fc.in_features
    #    model.fc = nn.Linear(inp_feat_size, num_classes)
    #    model = model.to(device)
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    #    # load checkpoint:

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()
    #    criterion = nn.MSELoss()

    #    # Layers to finetune. Last layer should be displayed
    print("Params to learn:")
    params_to_update = []
    for name, param in encoder.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("Encoder : {}".format(name))
    for name, param in decoder.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("Decoder : {}".format(name))

    # Observe that all parameters are being optimized


#    optimizer_ft = torch.optim.Adam(params_to_update, lr=0.001)
#    optimizer_ft = torch.optim.SGD(params_to_update, lr=0.01, momentum=0.9)
    encoder_optimizer = torch.optim.SGD(encoder.parameters(),
                                        lr=0.01,
                                        momentum=0.9)
    decoder_optimizer = torch.optim.SGD(decoder.parameters(),
                                        lr=0.01,
                                        momentum=0.9)
    #    decoder_optimizer = None

    # Decay LR by a factor of 0.1 every 7 epochs
    lr_scheduler = StepLR(encoder_optimizer, step_size=10, gamma=0.1)

    #    # Observe that all parameters are being optimized
    #    optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

    #    ###########################################################################
    # Training the model
    start = time.time()

    (encoder, decoder) = train_model(encoder,
                                     decoder,
                                     data_loaders,
                                     criterion,
                                     encoder_optimizer,
                                     decoder_optimizer,
                                     lr_scheduler,
                                     labs_keys,
                                     labs_values,
                                     num_epochs=N_EPOCHS)

    end = time.time()

    # save the best performing model
    attn_utils.save_attn_model_checkpoint(base_name, (encoder, decoder),
                                          N_EPOCHS, "SGD")
    # Load model checkpoints
    encoder, decoder = attn_utils.load_attn_model_checkpoint(
        base_name, encoder, decoder, N_EPOCHS, "SGD")

    print("Total Execution time for {} epoch : {}".format(
        N_EPOCHS, (end - start)))

    ###########################################################################

    #    features_val, stroke_names_id_val = attn_utils.read_feats(os.path.join(base_name, ft_dir),
    #                                                              feat_val, snames_val)
    print("Writing prediction dictionary....")
    pred_out_dict, acc = predict(encoder,
                                 decoder,
                                 data_loaders,
                                 criterion,
                                 labs_keys,
                                 labs_values,
                                 phase='test')

    with open(os.path.join(base_name, "pred_dict.pkl"), "wb") as fp:
        pickle.dump(pred_out_dict, fp)

    # save the output wts and related information
    print("#Parameters Encoder : {} ".format(
        autoenc_utils.count_parameters(encoder)))
    print("#Parameters Decoder : {} ".format(
        autoenc_utils.count_parameters(decoder)))

    return encoder, decoder
Exemplo n.º 6
0
def main(DATASET,
         LABELS,
         CLASS_IDS,
         BATCH_SIZE,
         ANNOTATION_FILE,
         SEQ_SIZE=16,
         STEP=16,
         nstrokes=-1,
         N_EPOCHS=25,
         base_name=""):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    if not os.path.isdir(base_name):
        os.makedirs(base_name)
    seed = 1234
    attn_utils.seed_everything(seed)
    ###########################################################################
    # Read the strokes
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))

    ###########################################################################
    # Create a Dataset
    # Clip level transform. Use this with framewiseTransform flag turned off
    train_transforms = transforms.Compose([
        videotransforms.RandomCrop(300),
        videotransforms.ToPILClip(),
        videotransforms.Resize((112, 112)),
        videotransforms.ToTensor(),
        videotransforms.Normalize(),
        #                                           videotransforms.ScaledNormMinMax(),
    ])
    test_transforms = transforms.Compose([
        videotransforms.CenterCrop(300),
        videotransforms.ToPILClip(),
        videotransforms.Resize((112, 112)),
        videotransforms.ToTensor(),
        videotransforms.Normalize(),
        #                                          videotransforms.ScaledNormMinMax(),
    ])
    train_dataset = CricketStrokesDataset(train_lst,
                                          DATASET,
                                          LABELS,
                                          CLASS_IDS,
                                          frames_per_clip=SEQ_SIZE,
                                          step_between_clips=STEP,
                                          train=True,
                                          framewiseTransform=False,
                                          transform=train_transforms)
    val_dataset = CricketStrokesDataset(val_lst,
                                        DATASET,
                                        LABELS,
                                        CLASS_IDS,
                                        frames_per_clip=SEQ_SIZE,
                                        step_between_clips=STEP,
                                        train=False,
                                        framewiseTransform=False,
                                        transform=test_transforms)

    ###########################################################################

    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)

    num_classes = len(list(set(labs_values)))

    # created weighted Sampler for class imbalance
    if not os.path.isfile(
            os.path.join(
                base_name, "weights_c" + str(num_classes) + "_" +
                str(len(train_dataset)) + ".pkl")):
        samples_weight = attn_utils.get_sample_weights(train_dataset,
                                                       labs_keys, labs_values,
                                                       train_lst)
        with open(
                os.path.join(
                    base_name, "weights_c" + str(num_classes) + "_" +
                    str(len(train_dataset)) + ".pkl"), "wb") as fp:
            pickle.dump(samples_weight, fp)
    with open(
            os.path.join(
                base_name, "weights_c" + str(num_classes) + "_" +
                str(len(train_dataset)) + ".pkl"), "rb") as fp:
        samples_weight = pickle.load(fp)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              sampler=sampler,
                              worker_init_fn=np.random.seed(12))

    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False)

    data_loaders = {"train": train_loader, "test": val_loader}

    ###########################################################################
    # load model and set loss function
    model = conv_attn_model.C3DGRUv2Orig(HIDDEN_SIZE, 1, num_classes,
                                         bidirectional)
    model_pretrained = c3d.C3D()
    model_pretrained.load_state_dict(
        torch.load("../localization_rnn/" + wts_path))
    #    model_pretrained = c3d_pre.C3D()
    #    model_pretrained.fc8 = nn.Linear(4096, 5)
    #    model_pretrained.load_state_dict(torch.load(pretrained_c3d_wts))
    copy_pretrained_weights(model_pretrained, model)
    # reset the last layer (default requires_grad is True)
    #    model.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    #    for ft in model.parameters():
    #        ft.requires_grad = False
    #    inp_feat_size = model.fc.in_features
    #    model.fc = nn.Linear(inp_feat_size, num_classes)
    model = model.to(device)

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()
    #    criterion = nn.MSELoss()

    #    # Layers to finetune. Last layer should be displayed
    print("Params to learn:")
    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t {}".format(name))

    # Observe that all parameters are being optimized


#    optimizer_ft = torch.optim.Adam(params_to_update, lr=0.01)
    optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    lr_scheduler = StepLR(optimizer_ft, step_size=30, gamma=0.1)

    ###########################################################################
    # Training the model
    start = time.time()

    model = train_model(model,
                        data_loaders,
                        criterion,
                        optimizer_ft,
                        lr_scheduler,
                        labs_keys,
                        labs_values,
                        num_epochs=N_EPOCHS)

    end = time.time()

    # save the best performing model
    attn_utils.save_model_checkpoint(base_name, model, N_EPOCHS,
                                     "SGD_c8_c3dgruEp60Step30")
    # Load model checkpoints
    model = attn_utils.load_weights(base_name, model, N_EPOCHS,
                                    "SGD_c8_c3dgruEp60Step30")

    print("Total Execution time for {} epoch : {}".format(
        N_EPOCHS, (end - start)))

    #    ###########################################################################

    print("Predicting ...")
    acc = predict(model, data_loaders, labs_keys, labs_values, phase='test')

    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))

    return model
Exemplo n.º 7
0
def main(DATASET,
         LABELS,
         CLASS_IDS,
         BATCH_SIZE,
         ANNOTATION_FILE,
         SEQ_SIZE=16,
         STEP=16,
         nstrokes=-1,
         N_EPOCHS=25,
         base_name=''):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    ###########################################################################

    attn_utils.seed_everything(1234)

    if not os.path.isdir(log_path):
        os.makedirs(log_path)

    # Read the strokes
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))

    #    features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames)

    #    ###########################################################################
    #
    #    features_val, stroke_names_id_val = attn_utils.read_feats(feat_path, feat_val,
    #                                                              snames_val)
    ###########################################################################
    # Create a Dataset
    train_transforms = transforms.Compose([
        T.CenterCrop(300),
        T.ToPILClip(),
        T.Resize((224, 224)),
        T.ToTensor(),
        T.Normalize(),
    ])
    test_transforms = transforms.Compose([
        T.CenterCrop(300),
        T.ToPILClip(),
        T.Resize((224, 224)),
        T.ToTensor(),
        T.Normalize(),
    ])

    #    ft_path = os.path.join(base_name, feat_path, feat)
    train_dataset = CricketStrokeClipsDataset(train_lst,
                                              DATASET,
                                              LABELS,
                                              CLASS_IDS,
                                              frames_per_clip=SEQ_SIZE,
                                              extracted_frames_per_clip=16,
                                              step_between_clips=STEP,
                                              train=True,
                                              framewiseTransform=False,
                                              transform=train_transforms)
    #    ft_path_val = os.path.join(base_name, feat_path, feat_val)
    val_dataset = CricketStrokeClipsDataset(val_lst,
                                            DATASET,
                                            LABELS,
                                            CLASS_IDS,
                                            frames_per_clip=SEQ_SIZE,
                                            extracted_frames_per_clip=16,
                                            step_between_clips=STEP,
                                            train=False,
                                            framewiseTransform=False,
                                            transform=test_transforms)

    # get labels
    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)
    #    # created weighted Sampler for class imbalance
    #    samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values,
    #                                                   train_lst)
    #    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
    #                              sampler=sampler, worker_init_fn=np.random.seed(12))

    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False)

    data_loaders = {"train": train_loader, "test": val_loader}

    num_classes = len(list(set(labs_values)))

    ###########################################################################

    # load model and set loss function
    model = siamese_net.SiameseI3DNet(400, in_channels=3)
    model.i3d.load_state_dict(
        torch.load(
            '/home/arpan/VisionWorkspace/pytorch-i3d/models/rgb_imagenet.pt'))
    #    model.i3d.replace_logits(2)
    #    model = load_weights(log_path, model, N_EPOCHS,
    #                                    "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD")

    lr = 0.1
    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0000001)
    #    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [10, 25]) # [300, 1000])
    # Decay LR by a factor of 0.1 every 7 epochs
    lr_sched = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    # Setup the loss fxn
    criterion = ContrastiveLoss()
    model = model.to(device)
    #    print("Params to learn:")
    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t", name)


#    # Observe that all parameters are being optimized
##    optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001)
#    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
#
#    # Decay LR by a factor of 0.1 every 7 epochs
#    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

#    lr = 5.0 # learning rate
#    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
#    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
###########################################################################
# Training the model

    start = time.time()

    model = train_model(model,
                        data_loaders,
                        criterion,
                        optimizer,
                        lr_sched,
                        labs_keys,
                        labs_values,
                        num_epochs=N_EPOCHS)

    end = time.time()

    #    # save the best performing model
    save_model_checkpoint(log_path, model, N_EPOCHS,
                          "S" + str(SEQ_SIZE) + "_SGD")
    # Load model checkpoints
    model = load_weights(log_path, model, N_EPOCHS,
                         "S" + str(SEQ_SIZE) + "_SGD")

    print("Total Execution time for {} epoch : {}".format(
        N_EPOCHS, (end - start)))

    #    ###########################################################################

    #    acc = predict(features_val, stroke_names_id_val, model, data_loaders, labs_keys,
    #                  labs_values, SEQ_SIZE, phase='test')

    ###########################################################################

    #    # Extract attention model features
    #    if not os.path.isfile(os.path.join(log_path, "siamgru_feats.pkl")):
    #        if not os.path.exists(log_path):
    #            os.makedirs(log_path)
    #        #    # Extract Grid OF / HOOF features {mth = 2, and vary nbins}
    #        print("Training extraction ... ")
    #        feats_dict, stroke_names = extract_trans_feats(model, DATASET, LABELS,
    #                                                      CLASS_IDS, BATCH_SIZE, SEQ_SIZE,
    #                                                      SEQ_SIZE-1, partition='train', nstrokes=nstrokes,
    #                                                      base_name=log_path)
    #
    #        with open(os.path.join(log_path, "siamgru_feats.pkl"), "wb") as fp:
    #            pickle.dump(feats_dict, fp)
    #        with open(os.path.join(log_path, "siamgru_snames.pkl"), "wb") as fp:
    #            pickle.dump(stroke_names, fp)
    #
    #    if not os.path.isfile(os.path.join(log_path, "siamgru_feats_val.pkl")):
    #        print("Validation extraction ....")
    #        feats_dict_val, stroke_names_val = extract_trans_feats(model, DATASET, LABELS,
    #                                                      CLASS_IDS, BATCH_SIZE, SEQ_SIZE,
    #                                                      SEQ_SIZE-1, partition='val', nstrokes=nstrokes,
    #                                                      base_name=log_path)
    #
    #        with open(os.path.join(log_path, "siamgru_feats_val.pkl"), "wb") as fp:
    #            pickle.dump(feats_dict_val, fp)
    #        with open(os.path.join(log_path, "siamgru_snames_val.pkl"), "wb") as fp:
    #            pickle.dump(stroke_names_val, fp)
    #
    #    if not os.path.isfile(os.path.join(log_path, "siamgru_feats_test.pkl")):
    #        print("Testing extraction ....")
    #        feats_dict_val, stroke_names_val = extract_trans_feats(model, DATASET, LABELS,
    #                                                      CLASS_IDS, BATCH_SIZE, SEQ_SIZE,
    #                                                      SEQ_SIZE-1, partition='test', nstrokes=nstrokes,
    #                                                      base_name=log_path)
    #
    #        with open(os.path.join(log_path, "siamgru_feats_test.pkl"), "wb") as fp:
    #            pickle.dump(feats_dict_val, fp)
    #        with open(os.path.join(log_path, "siamgru_snames_test.pkl"), "wb") as fp:
    #            pickle.dump(stroke_names_val, fp)

    # call count_paramters(model)  for displaying total no. of parameters
    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))
    return 0
Exemplo n.º 8
0
def main(DATASET,
         LABELS,
         CLASS_IDS,
         BATCH_SIZE,
         ANNOTATION_FILE,
         SEQ_SIZE=16,
         STEP=16,
         nstrokes=-1,
         N_EPOCHS=25,
         base_name=""):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    attn_utils.seed_everything(123)
    ###########################################################################
    # Read the strokes
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))

    ###########################################################################
    # Create a Dataset
    # Clip level transform. Use this with framewiseTransform flag turned off
    #    clip_transform = transforms.Compose([videotransforms.CenterCrop(224),
    #                                         videotransforms.ToPILClip(),
    #                                         videotransforms.Resize((112, 112)),
    ##                                         videotransforms.RandomCrop(112),
    #                                         videotransforms.ToTensor(),
    #                                         videotransforms.Normalize(),
    #                                        #videotransforms.RandomHorizontalFlip(),\
    #                                        ])
    ft_path = os.path.join(base_name, ft_dir, feat)
    train_dataset = StrokeFeatureSequenceDataset(ft_path,
                                                 train_lst,
                                                 DATASET,
                                                 LABELS,
                                                 CLASS_IDS,
                                                 frames_per_clip=SEQ_SIZE,
                                                 extracted_frames_per_clip=2,
                                                 step_between_clips=STEP,
                                                 train=True)
    ft_path_val = os.path.join(base_name, ft_dir, feat_val)
    val_dataset = StrokeFeatureSequenceDataset(ft_path_val,
                                               test_lst,
                                               DATASET,
                                               LABELS,
                                               CLASS_IDS,
                                               frames_per_clip=SEQ_SIZE,
                                               extracted_frames_per_clip=2,
                                               step_between_clips=STEP,
                                               train=False)

    # get labels
    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)
    # created weighted Sampler for class imbalance
    samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys,
                                                   labs_values, train_lst)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              sampler=sampler)

    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False)

    data_loaders = {"train": train_loader, "test": val_loader}

    num_classes = len(list(set(labs_values)))

    ###########################################################################

    # load model and set loss function
    model = attn_model.GRUClassifier(INPUT_SIZE, HIDDEN_SIZE, num_classes,
                                     N_LAYERS, bidirectional)

    #    model = load_weights(base_name, model, N_EPOCHS, "Adam")

    #    for ft in model.parameters():
    #        ft.requires_grad = False

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()
    model = model.to(device)
    #    print("Params to learn:")
    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)


#            print("\t",name)

# Observe that all parameters are being optimized
#    optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001)
    optimizer_ft = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = StepLR(optimizer_ft, step_size=10, gamma=0.1)

    features, stroke_names_id = attn_utils.read_feats(
        os.path.join(base_name, ft_dir), feat, snames)

    ###########################################################################
    # Training the model
    start = time.time()

    #    model = train_model(features, stroke_names_id, model, data_loaders, criterion,
    #                        optimizer_ft, exp_lr_scheduler, labs_keys, labs_values,
    #                        num_epochs=N_EPOCHS)

    end = time.time()

    # save the best performing model
    #    attn_utils.save_model_checkpoint("logs/gru_of20_Hidden512", model, N_EPOCHS,
    #                                     "S"+str(SEQ_SIZE)+"_SGD")
    # Load model checkpoints
    model = attn_utils.load_weights("logs/gru_of20_Hidden512", model, N_EPOCHS,
                                    "S" + str(SEQ_SIZE) + "_SGD")

    print("Total Execution time for {} epoch : {}".format(
        N_EPOCHS, (end - start)))

    #    ###########################################################################

    features_val, stroke_names_id_val = attn_utils.read_feats(
        os.path.join(base_name, ft_dir), feat_val, snames_val)

    acc = predict(features_val,
                  stroke_names_id_val,
                  model,
                  data_loaders,
                  labs_keys,
                  labs_values,
                  phase='test')

    # call count_paramters(model)  for displaying total no. of parameters
    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))
    return acc
Exemplo n.º 9
0
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, 
         STEP=16, nstrokes=-1, N_EPOCHS=25):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    acc, time for extraction and prediction
    
    '''
    ###########################################################################
    s1 = time.time()

    grid_size = 20
    mag_thresh, bins, density = 2, 20, True
    attn_utils.seed_everything(1234)
    
    if not os.path.isdir(log_path):
        os.makedirs(log_path)
    
    # Read the strokes 
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))
    
    km_filepath = os.path.join(log_path, km_filename)
    if not os.path.isfile(km_filepath+"_C"+str(cluster_size)+".pkl"):
        print("KMeans file not found...")
        sys.exit()
    else:
        # Load from disk, for validation and test sets.
        km_model = pickle.load(open(km_filepath+"_C"+str(cluster_size)+".pkl", 'rb'))
        
    ###########################################################################
    
    nFrames = 0
    partition_lst = val_lst
    strokes_name_id = []
    all_feats = {}
    # extract feats and run on one video at a time
    for i, v_file in enumerate(partition_lst):
        print('-'*60)
        print(str(i+1)+". v_file :: ", v_file)
        if '.avi' in v_file or '.mp4' in v_file:
            v_file = v_file.rsplit('.', 1)[0]
        json_file = v_file + '.json'
        
        # read labels from JSON file
        assert os.path.exists(os.path.join(LABELS, json_file)), "{} doesn't exist!".format(json_file)
            
        with open(os.path.join(LABELS, json_file), 'r') as fr:
            frame_dict = json.load(fr)
        frame_indx = list(frame_dict.values())[0]
        for m,n in frame_indx:
            k = v_file+"_"+str(m)+"_"+str(n)
            print("Stroke {} - {}".format(m,n))
            strokes_name_id.append(k)
            # Extract the stroke features
            if grid_size is None:
                all_feats[k] = extract_flow_angles(os.path.join(DATASET, v_file+".avi"), \
                                                 m, n, bins, mag_thresh, density)
            else:
                all_feats[k] = extract_flow_grid(os.path.join(DATASET, v_file+".avi"), \
                                                 m, n, grid_size)
            nFrames += (all_feats[k].shape[0] + 1)
    
    print("Create numpy one hot representation for val features...")
    onehot_feats_val = create_bovw_SA(all_feats, strokes_name_id, km_model)
    
    ft_path_partition = os.path.join(log_path, "C"+str(cluster_size)+"_partition.pkl")
    with open(ft_path_partition, "wb") as fp:
        pickle.dump(onehot_feats_val, fp)
    ###########################################################################
    
    ###########################################################################
    s2 = time.time()
    # Create a Dataset
    partition_dataset = StrokeFeatureSequenceDataset(ft_path_partition, partition_lst, DATASET, LABELS, CLASS_IDS, 
                                         frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2,
                                         step_between_clips=STEP, train=False)
    
    # get labels
    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)
    
    partition_loader = DataLoader(dataset=partition_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    data_loaders = {"test": partition_loader}

    num_classes = len(list(set(labs_values)))
    
#    vis_clusters(features, onehot_feats, stroke_names_id, 2, DATASET, log_path)
    
    ###########################################################################    
    
    # load model and set loss function
    model = attn_model.GRUBoWSAClassifier(INPUT_SIZE, HIDDEN_SIZE, num_classes, 
                                     N_LAYERS, bidirectional)
    
    model = model.to(device)

    ###########################################################################
    # Training the model    
    
    model = attn_utils.load_weights(log_path, model, N_EPOCHS, 
                                    "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD")
    
#    ###########################################################################
    s3 = time.time()
    acc = predict(all_feats, strokes_name_id, model, data_loaders, labs_keys, 
                  labs_values, SEQ_SIZE, phase='test')
    
    # call count_paramters(model)  for displaying total no. of parameters
    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))
    print("Total Frames : {}".format(nFrames))
    s4 = time.time()
    return acc, [s1, s2, s3, s4]
Exemplo n.º 10
0
def main(DATASET,
         LABELS,
         BATCH_SIZE,
         SEQ_SIZE=16,
         STEP=16,
         nstrokes=-1,
         N_EPOCHS=25):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    ###########################################################################

    attn_utils.seed_everything(1234)

    if not os.path.isdir(log_path):
        os.makedirs(log_path)

    features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames)

    # get matrix of features from dictionary (N, vec_size)
    vecs = []
    for key in sorted(list(features.keys())):
        vecs.append(features[key])
    vecs = np.vstack(vecs)

    vecs[np.isnan(vecs)] = 0
    vecs[np.isinf(vecs)] = 0

    #fc7 layer output size (4096)
    INP_VEC_SIZE = vecs.shape[-1]
    print("INP_VEC_SIZE = ", INP_VEC_SIZE)

    km_filepath = os.path.join(log_path, km_filename)
    #    # Uncomment only while training.
    if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"):
        km_model = make_codebook(vecs, cluster_size)  #, model_type='gmm')
        ##    # Save to disk, if training is performed
        print("Writing the KMeans models to disk...")
        pickle.dump(
            km_model,
            open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb"))
    else:
        # Load from disk, for validation and test sets.
        km_model = pickle.load(
            open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb'))

    print("Create numpy one hot representation for train features...")
    onehot_feats = create_bovw_onehot(features, stroke_names_id, km_model)

    ft_path = os.path.join(log_path, "C" + str(cluster_size) + "_train.pkl")
    #    ft_path = os.path.join(feat_path, feat)
    with open(ft_path, "wb") as fp:
        pickle.dump(onehot_feats, fp)
    with open(
            os.path.join(log_path,
                         "C" + str(cluster_size) + "_snames_train.pkl"),
            "wb") as fp:
        pickle.dump(stroke_names_id, fp)
    #########################################################################
    #########################################################################
    features_test, stroke_names_id_test = attn_utils.read_feats(
        feat_path, feat_test, snames_test)
    print("Create numpy one hot representation for val features...")
    onehot_feats_test = create_bovw_onehot(features_test, stroke_names_id_test,
                                           km_model)

    ft_path_test = os.path.join(log_path,
                                "C" + str(cluster_size) + "_test.pkl")
    #    ft_path_test = os.path.join(feat_path, feat_test)
    with open(ft_path_test, "wb") as fp:
        pickle.dump(onehot_feats_test, fp)
    with open(
            os.path.join(log_path,
                         "C" + str(cluster_size) + "_snames_test.pkl"),
            "wb") as fp:
        pickle.dump(stroke_names_id_test, fp)

    ###########################################################################
    # Create a Dataset

    train_dataset = hmdb.HMDB51FeatureSequenceDataset(
        ft_path,
        DATASET,
        LABELS,
        frames_per_clip=SEQ_SIZE,
        extracted_frames_per_clip=16,
        step_between_clips=STEP,
        train=True)

    test_dataset = hmdb.HMDB51FeatureSequenceDataset(
        ft_path_test,
        DATASET,
        LABELS,
        frames_per_clip=SEQ_SIZE,
        extracted_frames_per_clip=16,
        step_between_clips=STEP,
        train=False)

    #    display_sizes(train_dataset.video_list)
    #    display_sizes(test_dataset.video_list)
    #    # created weighted Sampler for class imbalance
    samples_weight = get_hmdb_sample_weights(train_dataset)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,  #shuffle=True)
        sampler=sampler,
        worker_init_fn=np.random.seed(12))

    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=False)

    data_loaders = {"train": train_loader, "test": test_loader}

    #    num_classes = len(list(set(labs_values)))
    num_classes = 51

    ###########################################################################

    # load model and set loss function
    model = attn_model.GRUBoWHAClassifier(INPUT_SIZE, HIDDEN_SIZE, num_classes,
                                          N_LAYERS, bidirectional)

    #    model = load_weights(base_name, model, N_EPOCHS, "Adam")

    #    for ft in model.parameters():
    #        ft.requires_grad = False

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()
    model = model.to(device)
    #    print("Params to learn:")
    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)


#            print("\t",name)

# Observe that all parameters are being optimized
    optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001)
    #    optimizer_ft = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = StepLR(optimizer_ft, step_size=10, gamma=0.1)

    ###########################################################################
    # Training the model

    start = time.time()

    model = train_model(model,
                        data_loaders,
                        criterion,
                        optimizer_ft,
                        exp_lr_scheduler,
                        num_epochs=N_EPOCHS)

    end = time.time()

    # save the best performing model
    attn_utils.save_model_checkpoint(
        log_path, model, N_EPOCHS,
        "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD")
    # Load model checkpoints
    model = attn_utils.load_weights(
        log_path, model, N_EPOCHS,
        "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD")

    print("Total Execution time for {} epoch : {}".format(
        N_EPOCHS, (end - start)))

    ###########################################################################

    acc = predict(model, data_loaders, SEQ_SIZE, phase='test')

    # call count_paramters(model)  for displaying total no. of parameters
    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))
    return acc