def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### attn_utils.seed_everything(1234) if not os.path.isdir(log_path): os.makedirs(log_path) # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) # extract_of_features(feat_path, DATASET, LABELS, train_lst, val_lst) features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames) # get matrix of features from dictionary (N, vec_size) vecs = [] for key in sorted(list(features.keys())): vecs.append(features[key]) vecs = np.vstack(vecs) vecs[np.isnan(vecs)] = 0 vecs[np.isinf(vecs)] = 0 #fc7 layer output size (4096) INP_VEC_SIZE = vecs.shape[-1] print("INP_VEC_SIZE = ", INP_VEC_SIZE) km_filepath = os.path.join(log_path, km_filename) # # Uncomment only while training. if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"): km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model, open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb")) else: # Load from disk, for validation and test sets. km_model = pickle.load( open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb')) print("Create numpy one hot representation for train features...") onehot_feats = create_bovw_SA(features, stroke_names_id, km_model) ft_path = os.path.join(log_path, "C" + str(cluster_size) + "_train.pkl") with open(ft_path, "wb") as fp: pickle.dump(onehot_feats, fp) ########################################################################### features_val, stroke_names_id_val = attn_utils.read_feats( feat_path, feat_val, snames_val) print("Create numpy one hot representation for val features...") onehot_feats_val = create_bovw_SA(features_val, stroke_names_id_val, km_model) ft_path_val = os.path.join(log_path, "C" + str(cluster_size) + "_val.pkl") with open(ft_path_val, "wb") as fp: pickle.dump(onehot_feats_val, fp) ########################################################################### features_test, stroke_names_id_test = attn_utils.read_feats( feat_path, feat_test, snames_test) print("Create numpy one hot representation for test features...") onehot_feats_test = create_bovw_SA(features_test, stroke_names_id_test, km_model) ft_path_test = os.path.join(log_path, "C" + str(cluster_size) + "_test.pkl") with open(ft_path_test, "wb") as fp: pickle.dump(onehot_feats_test, fp) ########################################################################### # Create a Dataset train_dataset = StrokeFeaturePairsDataset(ft_path, train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=True) val_dataset = StrokeFeaturePairsDataset(ft_path_val, val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=False) # get labels labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) # # created weighted Sampler for class imbalance # samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, # train_lst) # sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) # sampler=sampler, worker_init_fn=np.random.seed(12)) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} num_classes = len(list(set(labs_values))) ########################################################################### # load model and set loss function ntokens = cluster_size # the size of vocabulary emsize = 200 # embedding dimension nhid = 200 # the dimension of the feedforward network model in nn.TransformerEncoder nlayers = 2 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder nhead = 2 # the number of heads in the multiheadattention models dropout = 0.2 # the dropout value model = tt.TransformerModelSA(ntokens, emsize, nhead, nhid, nlayers, dropout).to(device) # Setup the loss fxn # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() model = model.to(device) # print("Params to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t", name) # # Observe that all parameters are being optimized ## optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # # # Decay LR by a factor of 0.1 every 7 epochs # scheduler = StepLR(optimizer, step_size=10, gamma=0.1) lr = 5.0 # learning rate optimizer = torch.optim.SGD(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 2.0, gamma=0.95) ########################################################################### # Training the model # start = time.time() # # model = train_model(features, stroke_names_id, model, data_loaders, criterion, # optimizer, scheduler, labs_keys, labs_values, # num_epochs=N_EPOCHS) # # end = time.time() # ## # save the best performing model # save_model_checkpoint(log_path, model, N_EPOCHS, # "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD") # Load model checkpoints model = load_weights( log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD") # print("Total Execution time for {} epoch : {}".format(N_EPOCHS, (end-start))) ########################################################################### ########################################################################### # Extract attention model features if not os.path.isfile(os.path.join(log_path, "trans_feats.pkl")): if not os.path.exists(log_path): os.makedirs(log_path) # # Extract Grid OF / HOOF features {mth = 2, and vary nbins} print("Training extraction ... ") feats_dict, stroke_names = extract_trans_feats(model, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, SEQ_SIZE - 1, partition='train', nstrokes=nstrokes, base_name=log_path) with open(os.path.join(log_path, "trans_feats.pkl"), "wb") as fp: pickle.dump(feats_dict, fp) with open(os.path.join(log_path, "trans_snames.pkl"), "wb") as fp: pickle.dump(stroke_names, fp) if not os.path.isfile(os.path.join(log_path, "trans_feats_val.pkl")): print("Validation extraction ....") feats_dict_val, stroke_names_val = extract_trans_feats( model, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, SEQ_SIZE - 1, partition='val', nstrokes=nstrokes, base_name=log_path) with open(os.path.join(log_path, "trans_feats_val.pkl"), "wb") as fp: pickle.dump(feats_dict_val, fp) with open(os.path.join(log_path, "trans_snames_val.pkl"), "wb") as fp: pickle.dump(stroke_names_val, fp) if not os.path.isfile(os.path.join(log_path, "trans_feats_test.pkl")): print("Testing extraction ....") feats_dict_val, stroke_names_val = extract_trans_feats( model, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, SEQ_SIZE - 1, partition='test', nstrokes=nstrokes, base_name=log_path) with open(os.path.join(log_path, "trans_feats_test.pkl"), "wb") as fp: pickle.dump(feats_dict_val, fp) with open(os.path.join(log_path, "trans_snames_test.pkl"), "wb") as fp: pickle.dump(stroke_names_val, fp) # call count_paramters(model) for displaying total no. of parameters print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) return 0
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25, base_name=''): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### attn_utils.seed_everything(1234) if not os.path.isdir(log_path): os.makedirs(log_path) # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames) # # get matrix of features from dictionary (N, vec_size) # vecs = [] # for key in sorted(list(features.keys())): # vecs.append(features[key]) # vecs = np.vstack(vecs) # # vecs[np.isnan(vecs)] = 0 # vecs[np.isinf(vecs)] = 0 # # #fc7 layer output size (4096) # INP_VEC_SIZE = vecs.shape[-1] # print("INP_VEC_SIZE = ", INP_VEC_SIZE) # # km_filepath = os.path.join(log_path, km_filename) ## # Uncomment only while training. # if not os.path.isfile(km_filepath+"_C"+str(cluster_size)+".pkl"): # km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') # ## # Save to disk, if training is performed # print("Writing the KMeans models to disk...") # pickle.dump(km_model, open(km_filepath+"_C"+str(cluster_size)+".pkl", "wb")) # else: # # Load from disk, for validation and test sets. # km_model = pickle.load(open(km_filepath+"_C"+str(cluster_size)+".pkl", 'rb')) # # print("Create numpy one hot representation for train features...") # onehot_feats = create_bovw_onehot(features, stroke_names_id, km_model) # # ft_path = os.path.join(log_path, "C"+str(cluster_size)+"_train.pkl") # with open(ft_path, "wb") as fp: # pickle.dump(onehot_feats, fp) # # ########################################################################### # features_val, stroke_names_id_val = attn_utils.read_feats(feat_path, feat_val, snames_val) # # print("Create numpy one hot representation for val features...") # onehot_feats_val = create_bovw_onehot(features_val, stroke_names_id_val, km_model) # # ft_path_val = os.path.join(log_path, "C"+str(cluster_size)+"_val.pkl") # with open(ft_path_val, "wb") as fp: # pickle.dump(onehot_feats_val, fp) # # ########################################################################### # features_test, stroke_names_id_test = attn_utils.read_feats(feat_path, feat_test, snames_test) # # print("Create numpy one hot representation for test features...") # onehot_feats_test = create_bovw_onehot(features_test, stroke_names_id_test, km_model) # # ft_path_test = os.path.join(log_path, "C"+str(cluster_size)+"_test.pkl") # with open(ft_path_test, "wb") as fp: # pickle.dump(onehot_feats_test, fp) ########################################################################### # Create a Dataset ft_path = os.path.join(base_name, feat_path, feat) train_dataset = StrokeFeaturePairsDataset(ft_path, train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=True) ft_path_val = os.path.join(base_name, feat_path, feat_val) val_dataset = StrokeFeaturePairsDataset(ft_path_val, val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=False) # get labels labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) # created weighted Sampler for class imbalance samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, train_lst) sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, #shuffle=True, sampler=sampler, worker_init_fn=np.random.seed(12)) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} num_classes = len(list(set(labs_values))) ########################################################################### model = siamese_net.SiameseGRUNet(INPUT_SIZE, HIDDEN_SIZE, N_LAYERS, bidirectional) # model = siamese_net.GRUBoWSANet(INPUT_SIZE, HIDDEN_SIZE, N_LAYERS, bidirectional) model = load_weights(model_path, model, N_EPOCHS, "S30"+"C"+str(cluster_size)+"_SGD") # copy the pretrained weights # Setup the loss fxn criterion = nn.CrossEntropyLoss() print("Param layers frozen:") # params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: param.requires_grad = False # params_to_update.append(param) model.g1.fc2 = nn.Linear(HIDDEN_SIZE * (int(bidirectional)+1), HIDDEN_SIZE) model.g1.fc3 = nn.Linear(HIDDEN_SIZE, num_classes) # initrange = 0.1 # model.g1.fc3.bias.data.zero_() # model.g1.fc3.weight.data.uniform_(-initrange, initrange) model = model.to(device) print("Params to learn:") for name, param in model.named_parameters(): if param.requires_grad == True: print("\t", name) # # Observe that all parameters are being optimized # optimizer = torch.optim.Adam(model.parameters(), lr=0.001) optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # # # Decay LR by a factor of 0.1 every 7 epochs scheduler = StepLR(optimizer, step_size=15, gamma=0.1) # lr = 5.0 # learning rate # optimizer = torch.optim.SGD(model.parameters(), lr=lr) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95) ########################################################################### # Training the model start = time.time() model = train_model(features, stroke_names_id, model, data_loaders, criterion, optimizer, scheduler, labs_keys, labs_values, num_epochs=N_EPOCHS) end = time.time() # save the best performing model save_model_checkpoint(log_path, model, N_EPOCHS, "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD") # Load model checkpoints model = load_weights(log_path, model, N_EPOCHS, "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD") print("Total Execution time for {} epoch : {}".format(N_EPOCHS, (end-start))) ########################################################################### acc = predict(features_val, stroke_names_id_val, model, data_loaders, labs_keys, labs_values, SEQ_SIZE, phase='test') ########################################################################### # call count_paramters(model) for displaying total no. of parameters print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) return 0
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25, base_name=""): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### # seed everything if not os.path.isdir(base_name): os.makedirs(base_name) # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) ########################################################################### # Create a Dataset # Clip level transform. Use this with framewiseTransform flag turned off clip_transform = transforms.Compose([ videotransforms.CenterCrop(224), videotransforms.ToPILClip(), videotransforms.Resize((112, 112)), # videotransforms.RandomCrop(112), videotransforms.ToTensor(), videotransforms.Normalize(), #videotransforms.RandomHorizontalFlip(),\ ]) # or use CricketStrokesFlowDataset train_dataset = CricketStrokesDataset(train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, step_between_clips=STEP, train=True, framewiseTransform=False, transform=clip_transform) # or use CricketStrokesFlowDataset val_dataset = CricketStrokesDataset(val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, step_between_clips=STEP, train=False, framewiseTransform=False, transform=clip_transform) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} ########################################################################### labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) num_classes = len(list(set(labs_values))) ########################################################################### # load model and set loss function model = conv_encdec_model.ConvVAE() model = model.to(device) # # load checkpoint: # Setup the loss fxn criterion = nn.MSELoss() # # Layers to finetune. Last layer should be displayed print("Params to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("ConvVAE : {}".format(name)) # Observe that all parameters are being optimized # optimizer_ft = torch.optim.Adam(params_to_update, lr=0.001) optimizer_ft = torch.optim.SGD(params_to_update, lr=0.01, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs lr_scheduler = StepLR(optimizer_ft, step_size=10, gamma=0.1) # # Observe that all parameters are being optimized # optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9) ########################################################################### # Training the model start = time.time() model = train_model(model, data_loaders, criterion, optimizer_ft, lr_scheduler, labs_keys, labs_values, seq=8, num_epochs=N_EPOCHS) end = time.time() print("Total Execution time for {} epoch : {}".format( N_EPOCHS, (end - start))) ########################################################################### # Save only the model params model_name = os.path.join(base_name, "conv_vae_ep" + str(N_EPOCHS) + "_SGD.pt") # torch.save(model.state_dict(), model_name) # print("Model saved to disk... : {}".format(model_name)) # Load model checkpoints # Loading the saved model model_name = os.path.join(base_name, "conv_vae_ep" + str(N_EPOCHS) + "_SGD.pt") if os.path.isfile(model_name): model.load_state_dict(torch.load(model_name)) print("Loading ConvVAE weights... : {}".format(model_name)) ########################################################################### print("Writing prediction dictionary....") # pred_out_dict = predict(encoder, decoder, data_loaders, criterion, labs_keys, # labs_values, phase='test') if not os.path.isfile(os.path.join(base_name, "conv_vae_train.pkl")): if not os.path.exists(base_name): os.makedirs(base_name) feats_dict, stroke_names = extract_attn_feats(model, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, 16, 'train', -1, base_name) with open(os.path.join(base_name, "conv_vae_train.pkl"), "wb") as fp: pickle.dump(feats_dict, fp) with open(os.path.join(base_name, "conv_vae_snames_train.pkl"), "wb") as fp: pickle.dump(stroke_names, fp) if not os.path.isfile(os.path.join(base_name, "conv_vae_val.pkl")): if not os.path.exists(base_name): os.makedirs(base_name) feats_dict, stroke_names = extract_attn_feats(model, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, 16, 'val', -1, base_name) with open(os.path.join(base_name, "conv_vae_val.pkl"), "wb") as fp: pickle.dump(feats_dict, fp) with open(os.path.join(base_name, "conv_vae_snames_val.pkl"), "wb") as fp: pickle.dump(stroke_names, fp) if not os.path.isfile(os.path.join(base_name, "conv_vae_test.pkl")): if not os.path.exists(base_name): os.makedirs(base_name) feats_dict, stroke_names = extract_attn_feats(model, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, 16, 'test', -1, base_name) with open(os.path.join(base_name, "conv_vae_test.pkl"), "wb") as fp: pickle.dump(feats_dict, fp) with open(os.path.join(base_name, "conv_vae_snames_test.pkl"), "wb") as fp: pickle.dump(stroke_names, fp) print("#Parameters ConvVAE : {} ".format( autoenc_utils.count_parameters(model))) return model
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### attn_utils.seed_everything(1234) if not os.path.isdir(log_path): os.makedirs(log_path) # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames) features2, stroke_names_id2 = attn_utils.read_feats( feat_path2, feat2, snames2) # get matrix of features from dictionary (N, vec_size) vecs, vecs2 = [], [] for key in stroke_names_id: vecs.append(features[key]) vecs2.append(features2[key]) vecs, vecs2 = np.vstack(vecs), np.vstack(vecs2) vecs[np.isnan(vecs)] = 0 vecs[np.isinf(vecs)] = 0 vecs2[np.isnan(vecs2)] = 0 vecs2[np.isinf(vecs2)] = 0 # vecs = traj_utils.apply_PCA(vecs, 10) # vecs2 = traj_utils.apply_PCA(vecs2, 10) # form_lower_dim_dict(features, stroke_names_id, vecs) # form_lower_dim_dict(features2, stroke_names_id2, vecs2) #fc7 layer output size (4096) INP_VEC_SIZE, INP_VEC_SIZE2 = vecs.shape[-1], vecs2.shape[-1] print("INP_VEC_SIZE = {} : INP_VEC_SIZE2 = {}".format( INP_VEC_SIZE, INP_VEC_SIZE2)) km_filepath = os.path.join(log_path, km_filename) # Feats1 if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + ".pkl"): km_model = make_codebook(vecs, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model, open(km_filepath + "_C" + str(cluster_size) + ".pkl", "wb")) else: # Load from disk, for validation and test sets. km_model = pickle.load( open(km_filepath + "_C" + str(cluster_size) + ".pkl", 'rb')) # Feats2 if not os.path.isfile(km_filepath + "_C" + str(cluster_size) + "_2.pkl"): km_model2 = make_codebook(vecs2, cluster_size) #, model_type='gmm') ## # Save to disk, if training is performed print("Writing the KMeans models to disk...") pickle.dump( km_model2, open(km_filepath + "_C" + str(cluster_size) + "_2.pkl", "wb")) else: # Load from disk, for validation and test sets. km_model2 = pickle.load( open(km_filepath + "_C" + str(cluster_size) + "_2.pkl", 'rb')) print("Create numpy one hot representation for train features...") onehot_feats = create_bovw_SA(features, stroke_names_id, km_model) print("Create numpy one hot representation for train features2...") onehot_feats2 = create_bovw_SA(features2, stroke_names_id2, km_model2) ft_path = os.path.join(log_path, "onehot_C" + str(cluster_size) + "_train.pkl") ft_path2 = os.path.join(log_path, "onehot_C" + str(cluster_size) + "_train_2.pkl") with open(ft_path, "wb") as fp: pickle.dump(onehot_feats, fp) with open(ft_path2, "wb") as fp: pickle.dump(onehot_feats2, fp) ########################################################################### features_val, stroke_names_id_val = attn_utils.read_feats( feat_path, feat_val, snames_val) features_val2, stroke_names_id_val2 = attn_utils.read_feats( feat_path2, feat_val2, snames_val2) # # get matrix of features from dictionary (N, vec_size) # vecs, vecs2 = [], [] # for key in stroke_names_id: # vecs.append(features[key]) # vecs2.append(features2[key]) # vecs, vecs2 = np.vstack(vecs), np.vstack(vecs2) # # vecs[np.isnan(vecs)] = 0 # vecs[np.isinf(vecs)] = 0 # vecs2[np.isnan(vecs2)] = 0 # vecs2[np.isinf(vecs2)] = 0 # # form_lower_dim_dict(features, stroke_names_id, vecs) # form_lower_dim_dict(features2, stroke_names_id2, vecs2) print("Create numpy one hot representation for val features...") onehot_feats_val = create_bovw_SA(features_val, stroke_names_id_val, km_model) print("Create numpy one hot representation for val features2...") onehot_feats_val2 = create_bovw_SA(features_val2, stroke_names_id_val2, km_model2) ft_path_val = os.path.join(log_path, "onehot_C" + str(cluster_size) + "_val.pkl") ft_path_val2 = os.path.join(log_path, "onehot_C" + str(cluster_size) + "_val_2.pkl") with open(ft_path_val, "wb") as fp: pickle.dump(onehot_feats_val, fp) with open(ft_path_val2, "wb") as fp: pickle.dump(onehot_feats_val2, fp) ########################################################################### # Create a Dataset # ft_path = os.path.join(base_name, ft_dir, feat) train_dataset = StrokeFeatureSequencesDataset(ft_path, ft_path2, train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=True) # ft_path_val = os.path.join(base_name, ft_dir, feat_val) val_dataset = StrokeFeatureSequencesDataset(ft_path_val, ft_path_val2, val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=False) # get labels labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) # created weighted Sampler for class imbalance samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, train_lst) sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, sampler=sampler, worker_init_fn=np.random.seed(12)) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} num_classes = len(list(set(labs_values))) # vis_clusters(features, onehot_feats, stroke_names_id, 2, DATASET, log_path) ########################################################################### # load model and set loss function model = attn_model.GRUBoWMultiStreamClassifier(INPUT_SIZE, INPUT_SIZE, HIDDEN_SIZE, HIDDEN_SIZE, num_classes, N_LAYERS, bidirectional) # model = load_weights(base_name, model, N_EPOCHS, "Adam") # for ft in model.parameters(): # ft.requires_grad = False # Setup the loss fxn criterion = nn.CrossEntropyLoss() model = model.to(device) print("Params to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) # print("\t",name) # Observe that all parameters are being optimized optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer_ft = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = StepLR(optimizer_ft, step_size=15, gamma=0.1) ########################################################################### # Training the model start = time.time() model = train_model(features, stroke_names_id, model, data_loaders, criterion, optimizer_ft, exp_lr_scheduler, labs_keys, labs_values, num_epochs=N_EPOCHS) end = time.time() # # # save the best performing model attn_utils.save_model_checkpoint( log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD") # Load model checkpoints model = attn_utils.load_weights( log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "C" + str(cluster_size) + "_SGD") print("Total Execution time for {} epoch : {}".format( N_EPOCHS, (end - start))) # ########################################################################### acc = predict(features_val, stroke_names_id_val, model, data_loaders, labs_keys, labs_values, SEQ_SIZE, phase='test') # call count_paramters(model) for displaying total no. of parameters print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) return acc
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25, base_name=""): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### # seed everything seed = 1234 attn_utils.seed_everything(seed) if not os.path.isdir(base_name): os.makedirs(base_name) # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) ########################################################################### # Create a Dataset # Clip level transform. Use this with framewiseTransform flag turned off train_transform = transforms.Compose([ videotransforms.RandomCrop(224), videotransforms.ToPILClip(), videotransforms.Resize((112, 112)), videotransforms.ToTensor(), videotransforms.Normalize(), #videotransforms.RandomHorizontalFlip(),\ ]) test_transform = transforms.Compose([ videotransforms.CenterCrop(224), videotransforms.ToPILClip(), videotransforms.Resize((112, 112)), videotransforms.ToTensor(), videotransforms.Normalize(), #videotransforms.RandomHorizontalFlip(),\ ]) train_dataset = CricketStrokesDataset(train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, step_between_clips=STEP, train=True, framewiseTransform=False, transform=train_transform) val_dataset = CricketStrokesDataset(val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, step_between_clips=STEP, train=False, framewiseTransform=False, transform=test_transform) ########################################################################### labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) num_classes = len(list(set(labs_values))) # created weighted Sampler for class imbalance if not os.path.isfile( os.path.join( base_name, "weights_c" + str(num_classes) + "_" + str(len(train_dataset)) + ".pkl")): samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, train_lst) with open( os.path.join( base_name, "weights_c" + str(num_classes) + "_" + str(len(train_dataset)) + ".pkl"), "wb") as fp: pickle.dump(samples_weight, fp) with open( os.path.join( base_name, "weights_c" + str(num_classes) + "_" + str(len(train_dataset)) + ".pkl"), "rb") as fp: samples_weight = pickle.load(fp) sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, sampler=sampler, worker_init_fn=np.random.seed(12)) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} ########################################################################### # load model and set loss function encoder = conv_attn_model.Conv3DEncoder(HIDDEN_SIZE, 1, bidirectional) # encoder = conv_attn_model.Conv3DAttention(HIDDEN_SIZE, num_classes, 1, 196, bidirectional) decoder = conv_attn_model.Conv3DDecoder(HIDDEN_SIZE, num_classes, 1, 1, bidirectional) # decoder = conv_encdec_model.Conv3DDecoder(HIDDEN_SIZE, HIDDEN_SIZE, 1, 196, bidirectional) # model = attn_model.Encoder(10, 20, bidirectional) # for ft in model.parameters(): # ft.requires_grad = False # inp_feat_size = model.fc.in_features # model.fc = nn.Linear(inp_feat_size, num_classes) # model = model.to(device) encoder = encoder.to(device) decoder = decoder.to(device) # # load checkpoint: # Setup the loss fxn criterion = nn.CrossEntropyLoss() # criterion = nn.MSELoss() # # Layers to finetune. Last layer should be displayed print("Params to learn:") params_to_update = [] for name, param in encoder.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("Encoder : {}".format(name)) for name, param in decoder.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("Decoder : {}".format(name)) # Observe that all parameters are being optimized # optimizer_ft = torch.optim.Adam(params_to_update, lr=0.001) # optimizer_ft = torch.optim.SGD(params_to_update, lr=0.01, momentum=0.9) encoder_optimizer = torch.optim.SGD(encoder.parameters(), lr=0.01, momentum=0.9) decoder_optimizer = torch.optim.SGD(decoder.parameters(), lr=0.01, momentum=0.9) # decoder_optimizer = None # Decay LR by a factor of 0.1 every 7 epochs lr_scheduler = StepLR(encoder_optimizer, step_size=10, gamma=0.1) # # Observe that all parameters are being optimized # optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9) # ########################################################################### # Training the model start = time.time() (encoder, decoder) = train_model(encoder, decoder, data_loaders, criterion, encoder_optimizer, decoder_optimizer, lr_scheduler, labs_keys, labs_values, num_epochs=N_EPOCHS) end = time.time() # save the best performing model attn_utils.save_attn_model_checkpoint(base_name, (encoder, decoder), N_EPOCHS, "SGD") # Load model checkpoints encoder, decoder = attn_utils.load_attn_model_checkpoint( base_name, encoder, decoder, N_EPOCHS, "SGD") print("Total Execution time for {} epoch : {}".format( N_EPOCHS, (end - start))) ########################################################################### # features_val, stroke_names_id_val = attn_utils.read_feats(os.path.join(base_name, ft_dir), # feat_val, snames_val) print("Writing prediction dictionary....") pred_out_dict, acc = predict(encoder, decoder, data_loaders, criterion, labs_keys, labs_values, phase='test') with open(os.path.join(base_name, "pred_dict.pkl"), "wb") as fp: pickle.dump(pred_out_dict, fp) # save the output wts and related information print("#Parameters Encoder : {} ".format( autoenc_utils.count_parameters(encoder))) print("#Parameters Decoder : {} ".format( autoenc_utils.count_parameters(decoder))) return encoder, decoder
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25, base_name=""): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' if not os.path.isdir(base_name): os.makedirs(base_name) seed = 1234 attn_utils.seed_everything(seed) ########################################################################### # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) ########################################################################### # Create a Dataset # Clip level transform. Use this with framewiseTransform flag turned off train_transforms = transforms.Compose([ videotransforms.RandomCrop(300), videotransforms.ToPILClip(), videotransforms.Resize((112, 112)), videotransforms.ToTensor(), videotransforms.Normalize(), # videotransforms.ScaledNormMinMax(), ]) test_transforms = transforms.Compose([ videotransforms.CenterCrop(300), videotransforms.ToPILClip(), videotransforms.Resize((112, 112)), videotransforms.ToTensor(), videotransforms.Normalize(), # videotransforms.ScaledNormMinMax(), ]) train_dataset = CricketStrokesDataset(train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, step_between_clips=STEP, train=True, framewiseTransform=False, transform=train_transforms) val_dataset = CricketStrokesDataset(val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, step_between_clips=STEP, train=False, framewiseTransform=False, transform=test_transforms) ########################################################################### labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) num_classes = len(list(set(labs_values))) # created weighted Sampler for class imbalance if not os.path.isfile( os.path.join( base_name, "weights_c" + str(num_classes) + "_" + str(len(train_dataset)) + ".pkl")): samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, train_lst) with open( os.path.join( base_name, "weights_c" + str(num_classes) + "_" + str(len(train_dataset)) + ".pkl"), "wb") as fp: pickle.dump(samples_weight, fp) with open( os.path.join( base_name, "weights_c" + str(num_classes) + "_" + str(len(train_dataset)) + ".pkl"), "rb") as fp: samples_weight = pickle.load(fp) sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, sampler=sampler, worker_init_fn=np.random.seed(12)) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} ########################################################################### # load model and set loss function model = conv_attn_model.C3DGRUv2Orig(HIDDEN_SIZE, 1, num_classes, bidirectional) model_pretrained = c3d.C3D() model_pretrained.load_state_dict( torch.load("../localization_rnn/" + wts_path)) # model_pretrained = c3d_pre.C3D() # model_pretrained.fc8 = nn.Linear(4096, 5) # model_pretrained.load_state_dict(torch.load(pretrained_c3d_wts)) copy_pretrained_weights(model_pretrained, model) # reset the last layer (default requires_grad is True) # model.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1)) # for ft in model.parameters(): # ft.requires_grad = False # inp_feat_size = model.fc.in_features # model.fc = nn.Linear(inp_feat_size, num_classes) model = model.to(device) # Setup the loss fxn criterion = nn.CrossEntropyLoss() # criterion = nn.MSELoss() # # Layers to finetune. Last layer should be displayed print("Params to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t {}".format(name)) # Observe that all parameters are being optimized # optimizer_ft = torch.optim.Adam(params_to_update, lr=0.01) optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs lr_scheduler = StepLR(optimizer_ft, step_size=30, gamma=0.1) ########################################################################### # Training the model start = time.time() model = train_model(model, data_loaders, criterion, optimizer_ft, lr_scheduler, labs_keys, labs_values, num_epochs=N_EPOCHS) end = time.time() # save the best performing model attn_utils.save_model_checkpoint(base_name, model, N_EPOCHS, "SGD_c8_c3dgruEp60Step30") # Load model checkpoints model = attn_utils.load_weights(base_name, model, N_EPOCHS, "SGD_c8_c3dgruEp60Step30") print("Total Execution time for {} epoch : {}".format( N_EPOCHS, (end - start))) # ########################################################################### print("Predicting ...") acc = predict(model, data_loaders, labs_keys, labs_values, phase='test') print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) return model
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25, base_name=''): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### attn_utils.seed_everything(1234) if not os.path.isdir(log_path): os.makedirs(log_path) # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) # features, stroke_names_id = attn_utils.read_feats(feat_path, feat, snames) # ########################################################################### # # features_val, stroke_names_id_val = attn_utils.read_feats(feat_path, feat_val, # snames_val) ########################################################################### # Create a Dataset train_transforms = transforms.Compose([ T.CenterCrop(300), T.ToPILClip(), T.Resize((224, 224)), T.ToTensor(), T.Normalize(), ]) test_transforms = transforms.Compose([ T.CenterCrop(300), T.ToPILClip(), T.Resize((224, 224)), T.ToTensor(), T.Normalize(), ]) # ft_path = os.path.join(base_name, feat_path, feat) train_dataset = CricketStrokeClipsDataset(train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=16, step_between_clips=STEP, train=True, framewiseTransform=False, transform=train_transforms) # ft_path_val = os.path.join(base_name, feat_path, feat_val) val_dataset = CricketStrokeClipsDataset(val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=16, step_between_clips=STEP, train=False, framewiseTransform=False, transform=test_transforms) # get labels labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) # # created weighted Sampler for class imbalance # samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, # train_lst) # sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) # sampler=sampler, worker_init_fn=np.random.seed(12)) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} num_classes = len(list(set(labs_values))) ########################################################################### # load model and set loss function model = siamese_net.SiameseI3DNet(400, in_channels=3) model.i3d.load_state_dict( torch.load( '/home/arpan/VisionWorkspace/pytorch-i3d/models/rgb_imagenet.pt')) # model.i3d.replace_logits(2) # model = load_weights(log_path, model, N_EPOCHS, # "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD") lr = 0.1 optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001) # lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [10, 25]) # [300, 1000]) # Decay LR by a factor of 0.1 every 7 epochs lr_sched = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Setup the loss fxn criterion = ContrastiveLoss() model = model.to(device) # print("Params to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t", name) # # Observe that all parameters are being optimized ## optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # # # Decay LR by a factor of 0.1 every 7 epochs # scheduler = StepLR(optimizer, step_size=10, gamma=0.1) # lr = 5.0 # learning rate # optimizer = torch.optim.SGD(model.parameters(), lr=lr) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95) ########################################################################### # Training the model start = time.time() model = train_model(model, data_loaders, criterion, optimizer, lr_sched, labs_keys, labs_values, num_epochs=N_EPOCHS) end = time.time() # # save the best performing model save_model_checkpoint(log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "_SGD") # Load model checkpoints model = load_weights(log_path, model, N_EPOCHS, "S" + str(SEQ_SIZE) + "_SGD") print("Total Execution time for {} epoch : {}".format( N_EPOCHS, (end - start))) # ########################################################################### # acc = predict(features_val, stroke_names_id_val, model, data_loaders, labs_keys, # labs_values, SEQ_SIZE, phase='test') ########################################################################### # # Extract attention model features # if not os.path.isfile(os.path.join(log_path, "siamgru_feats.pkl")): # if not os.path.exists(log_path): # os.makedirs(log_path) # # # Extract Grid OF / HOOF features {mth = 2, and vary nbins} # print("Training extraction ... ") # feats_dict, stroke_names = extract_trans_feats(model, DATASET, LABELS, # CLASS_IDS, BATCH_SIZE, SEQ_SIZE, # SEQ_SIZE-1, partition='train', nstrokes=nstrokes, # base_name=log_path) # # with open(os.path.join(log_path, "siamgru_feats.pkl"), "wb") as fp: # pickle.dump(feats_dict, fp) # with open(os.path.join(log_path, "siamgru_snames.pkl"), "wb") as fp: # pickle.dump(stroke_names, fp) # # if not os.path.isfile(os.path.join(log_path, "siamgru_feats_val.pkl")): # print("Validation extraction ....") # feats_dict_val, stroke_names_val = extract_trans_feats(model, DATASET, LABELS, # CLASS_IDS, BATCH_SIZE, SEQ_SIZE, # SEQ_SIZE-1, partition='val', nstrokes=nstrokes, # base_name=log_path) # # with open(os.path.join(log_path, "siamgru_feats_val.pkl"), "wb") as fp: # pickle.dump(feats_dict_val, fp) # with open(os.path.join(log_path, "siamgru_snames_val.pkl"), "wb") as fp: # pickle.dump(stroke_names_val, fp) # # if not os.path.isfile(os.path.join(log_path, "siamgru_feats_test.pkl")): # print("Testing extraction ....") # feats_dict_val, stroke_names_val = extract_trans_feats(model, DATASET, LABELS, # CLASS_IDS, BATCH_SIZE, SEQ_SIZE, # SEQ_SIZE-1, partition='test', nstrokes=nstrokes, # base_name=log_path) # # with open(os.path.join(log_path, "siamgru_feats_test.pkl"), "wb") as fp: # pickle.dump(feats_dict_val, fp) # with open(os.path.join(log_path, "siamgru_snames_test.pkl"), "wb") as fp: # pickle.dump(stroke_names_val, fp) # call count_paramters(model) for displaying total no. of parameters print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) return 0
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25, base_name=""): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' attn_utils.seed_everything(123) ########################################################################### # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) ########################################################################### # Create a Dataset # Clip level transform. Use this with framewiseTransform flag turned off # clip_transform = transforms.Compose([videotransforms.CenterCrop(224), # videotransforms.ToPILClip(), # videotransforms.Resize((112, 112)), ## videotransforms.RandomCrop(112), # videotransforms.ToTensor(), # videotransforms.Normalize(), # #videotransforms.RandomHorizontalFlip(),\ # ]) ft_path = os.path.join(base_name, ft_dir, feat) train_dataset = StrokeFeatureSequenceDataset(ft_path, train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=True) ft_path_val = os.path.join(base_name, ft_dir, feat_val) val_dataset = StrokeFeatureSequenceDataset(ft_path_val, test_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=False) # get labels labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) # created weighted Sampler for class imbalance samples_weight = attn_utils.get_sample_weights(train_dataset, labs_keys, labs_values, train_lst) sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, sampler=sampler) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} num_classes = len(list(set(labs_values))) ########################################################################### # load model and set loss function model = attn_model.GRUClassifier(INPUT_SIZE, HIDDEN_SIZE, num_classes, N_LAYERS, bidirectional) # model = load_weights(base_name, model, N_EPOCHS, "Adam") # for ft in model.parameters(): # ft.requires_grad = False # Setup the loss fxn criterion = nn.CrossEntropyLoss() model = model.to(device) # print("Params to learn:") params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) # print("\t",name) # Observe that all parameters are being optimized # optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.001) optimizer_ft = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = StepLR(optimizer_ft, step_size=10, gamma=0.1) features, stroke_names_id = attn_utils.read_feats( os.path.join(base_name, ft_dir), feat, snames) ########################################################################### # Training the model start = time.time() # model = train_model(features, stroke_names_id, model, data_loaders, criterion, # optimizer_ft, exp_lr_scheduler, labs_keys, labs_values, # num_epochs=N_EPOCHS) end = time.time() # save the best performing model # attn_utils.save_model_checkpoint("logs/gru_of20_Hidden512", model, N_EPOCHS, # "S"+str(SEQ_SIZE)+"_SGD") # Load model checkpoints model = attn_utils.load_weights("logs/gru_of20_Hidden512", model, N_EPOCHS, "S" + str(SEQ_SIZE) + "_SGD") print("Total Execution time for {} epoch : {}".format( N_EPOCHS, (end - start))) # ########################################################################### features_val, stroke_names_id_val = attn_utils.read_feats( os.path.join(base_name, ft_dir), feat_val, snames_val) acc = predict(features_val, stroke_names_id_val, model, data_loaders, labs_keys, labs_values, phase='test') # call count_paramters(model) for displaying total no. of parameters print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) return acc
def main(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- acc, time for extraction and prediction ''' ########################################################################### s1 = time.time() grid_size = 20 mag_thresh, bins, density = 2, 20, True attn_utils.seed_everything(1234) if not os.path.isdir(log_path): os.makedirs(log_path) # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) km_filepath = os.path.join(log_path, km_filename) if not os.path.isfile(km_filepath+"_C"+str(cluster_size)+".pkl"): print("KMeans file not found...") sys.exit() else: # Load from disk, for validation and test sets. km_model = pickle.load(open(km_filepath+"_C"+str(cluster_size)+".pkl", 'rb')) ########################################################################### nFrames = 0 partition_lst = val_lst strokes_name_id = [] all_feats = {} # extract feats and run on one video at a time for i, v_file in enumerate(partition_lst): print('-'*60) print(str(i+1)+". v_file :: ", v_file) if '.avi' in v_file or '.mp4' in v_file: v_file = v_file.rsplit('.', 1)[0] json_file = v_file + '.json' # read labels from JSON file assert os.path.exists(os.path.join(LABELS, json_file)), "{} doesn't exist!".format(json_file) with open(os.path.join(LABELS, json_file), 'r') as fr: frame_dict = json.load(fr) frame_indx = list(frame_dict.values())[0] for m,n in frame_indx: k = v_file+"_"+str(m)+"_"+str(n) print("Stroke {} - {}".format(m,n)) strokes_name_id.append(k) # Extract the stroke features if grid_size is None: all_feats[k] = extract_flow_angles(os.path.join(DATASET, v_file+".avi"), \ m, n, bins, mag_thresh, density) else: all_feats[k] = extract_flow_grid(os.path.join(DATASET, v_file+".avi"), \ m, n, grid_size) nFrames += (all_feats[k].shape[0] + 1) print("Create numpy one hot representation for val features...") onehot_feats_val = create_bovw_SA(all_feats, strokes_name_id, km_model) ft_path_partition = os.path.join(log_path, "C"+str(cluster_size)+"_partition.pkl") with open(ft_path_partition, "wb") as fp: pickle.dump(onehot_feats_val, fp) ########################################################################### ########################################################################### s2 = time.time() # Create a Dataset partition_dataset = StrokeFeatureSequenceDataset(ft_path_partition, partition_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=False) # get labels labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) partition_loader = DataLoader(dataset=partition_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"test": partition_loader} num_classes = len(list(set(labs_values))) # vis_clusters(features, onehot_feats, stroke_names_id, 2, DATASET, log_path) ########################################################################### # load model and set loss function model = attn_model.GRUBoWSAClassifier(INPUT_SIZE, HIDDEN_SIZE, num_classes, N_LAYERS, bidirectional) model = model.to(device) ########################################################################### # Training the model model = attn_utils.load_weights(log_path, model, N_EPOCHS, "S"+str(SEQ_SIZE)+"C"+str(cluster_size)+"_SGD") # ########################################################################### s3 = time.time() acc = predict(all_feats, strokes_name_id, model, data_loaders, labs_keys, labs_values, SEQ_SIZE, phase='test') # call count_paramters(model) for displaying total no. of parameters print("#Parameters : {} ".format(autoenc_utils.count_parameters(model))) print("Total Frames : {}".format(nFrames)) s4 = time.time() return acc, [s1, s2, s3, s4]
def train_attn_model(DATASET, LABELS, CLASS_IDS, BATCH_SIZE, ANNOTATION_FILE, SEQ_SIZE=16, STEP=16, nstrokes=-1, N_EPOCHS=25, base_name="", dest_dir=""): ''' Extract sequence features from AutoEncoder. Parameters: ----------- DATASET : str path to the video dataset LABELS : str path containing stroke labels CLASS_IDS : str path to txt file defining classes, similar to THUMOS BATCH_SIZE : int size for batch of clips SEQ_SIZE : int no. of frames in a clip (min. 16 for 3D CNN extraction) STEP : int stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ... partition : str 'all' / 'train' / 'test' / 'val' : Videos to be considered nstrokes : int partial extraction of features (do not execute for entire dataset) Returns: -------- trajectories, stroke_names ''' ########################################################################### # Read the strokes # Divide the highlight dataset files into training, validation and test sets train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET) print("No. of training videos : {}".format(len(train_lst))) ########################################################################### # Create a Dataset # Clip level transform. Use this with framewiseTransform flag turned off ft_path = os.path.join(base_name, ft_dir, feat) train_dataset = StrokeFeaturePairsDataset(ft_path, train_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=True) ft_path_val = os.path.join(base_name, ft_dir, feat_val) val_dataset = StrokeFeaturePairsDataset(ft_path_val, val_lst, DATASET, LABELS, CLASS_IDS, frames_per_clip=SEQ_SIZE, extracted_frames_per_clip=2, step_between_clips=STEP, train=False) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False) data_loaders = {"train": train_loader, "test": val_loader} ########################################################################### labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE) num_classes = len(list(set(labs_values))) ########################################################################### # load model and set loss function encoder = attn_model.Encoder(INPUT_SIZE, HIDDEN_SIZE, 1, bidirectional) decoder = attn_model.AttentionDecoder(HIDDEN_SIZE * (1 + bidirectional), TARGET_SIZE, max_length=SEQ_SIZE - 2 + 1) encoder, decoder = encoder.to(device), decoder.to(device) # Setup the loss fxn # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() # # Layers to finetune. Last layer should be displayed # params_to_update = model.parameters() # print("Params to learn:") # params_to_update = [] # for name, param in model.named_parameters(): # if param.requires_grad == True: # params_to_update.append(param) # print("\t",name) # Observe that all parameters are being optimized # optimizer_ft = torch.optim.Adam(params_to_update, lr=0.001) encoder_optimizer = torch.optim.SGD(encoder.parameters(), lr=0.01, momentum=0.9) decoder_optimizer = torch.optim.SGD(decoder.parameters(), lr=0.01, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs lr_scheduler = StepLR(encoder_optimizer, step_size=10, gamma=0.1) # # Observe that all parameters are being optimized # optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9) features, stroke_names_id = attn_utils.read_feats( os.path.join(base_name, ft_dir), feat, snames) # ########################################################################### # Training the model start = time.time() # (encoder, decoder) = train_model(features, stroke_names_id, encoder, decoder, data_loaders, criterion, # encoder_optimizer, decoder_optimizer, lr_scheduler, labs_keys, labs_values, # num_epochs=N_EPOCHS) end = time.time() # save the best performing model # attn_utils.save_attn_model_checkpoint(base_name, (encoder, decoder), N_EPOCHS, "Adam") # Load model checkpoints encoder, decoder = attn_utils.load_attn_model_checkpoint( base_name, encoder, decoder, N_EPOCHS, "Adam") print("Total Execution time for {} epoch : {}".format( N_EPOCHS, (end - start))) # ########################################################################### features_val, stroke_names_id_val = attn_utils.read_feats( os.path.join(base_name, ft_dir), feat_val, snames_val) # predict(features_val, stroke_names_id_val, encoder, decoder, data_loaders, # labs_keys, labs_values) ########################################################################### # Extract attention model features if not os.path.isfile(os.path.join(dest_dir, "attn_feats.pkl")): if not os.path.exists(dest_dir): os.makedirs(dest_dir) # # Extract Grid OF / HOOF features {mth = 2, and vary nbins} print("Training extraction ... ") feats_dict, stroke_names = extract_attn_feats(encoder, decoder, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, STEP, partition='train', nstrokes=nstrokes, base_name=base_name) with open(os.path.join(dest_dir, "attn_feats.pkl"), "wb") as fp: pickle.dump(feats_dict, fp) with open(os.path.join(dest_dir, "attn_snames.pkl"), "wb") as fp: pickle.dump(stroke_names, fp) if not os.path.isfile(os.path.join(dest_dir, "attn_feats_val.pkl")): print("Validation extraction ....") feats_dict_val, stroke_names_val = extract_attn_feats( encoder, decoder, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, STEP, partition='val', nstrokes=nstrokes, base_name=base_name) with open(os.path.join(dest_dir, "attn_feats_val.pkl"), "wb") as fp: pickle.dump(feats_dict_val, fp) with open(os.path.join(dest_dir, "attn_snames_val.pkl"), "wb") as fp: pickle.dump(stroke_names_val, fp) if not os.path.isfile(os.path.join(dest_dir, "attn_feats_test.pkl")): print("Testing extraction ....") feats_dict_test, stroke_names_test = extract_attn_feats( encoder, decoder, DATASET, LABELS, CLASS_IDS, BATCH_SIZE, SEQ_SIZE, STEP, partition='test', nstrokes=nstrokes, base_name=base_name) with open(os.path.join(dest_dir, "attn_feats_test.pkl"), "wb") as fp: pickle.dump(feats_dict_test, fp) with open(os.path.join(dest_dir, "attn_snames_test.pkl"), "wb") as fp: pickle.dump(stroke_names_test, fp) return None, None