Пример #1
0
    def read_data(self, list_of_videos):
        
        T_o = self.T_o
        T_a = self.T_a
        max_sq_len = 0
        totLabels= []
       
        for vid in list_of_videos:

            
            start_To=0    
            file_ptr = open(vid, 'r')
            content = file_ptr.read().split()[:-1]
            #a scalar value used in normalizing the input length,
            #the paper specify that should be eaulq to the mean number of action per video
            T = (1.0/self.alpha)*len(content)
            #label_seq, length_seq = get_label_length_seq(content) 
            while start_To+T_o+T_a < len(content):
                obs  = content[start_To:start_To+T_o]  
                target = content[start_To+T_o+T_a]
                label_seq, length_seq = get_label_length_seq(obs)
                #What i'm actually obeserving. p_seq is a matrix of 48 columns like classes + 1 where it stores the lenght of the action
                #Morover  it has 25 rows beacuse the max number in a video is 25 for breakfast
                p_seq = []
                seq_len = 0
                for i in label_seq:
                    p_seq.append(np.zeros((self.nClasses+1)))
                    #Lenght of the action obeserved
                    p_seq[-1][-1] = length_seq[label_seq.index(i)]/T
                    #1-hot encoding
                    p_seq[-1][self.actions_dict[i]] = 1
                    seq_len += 1

                    if i not in totLabels:
                        totLabels.append(i)
                #padding to zero every other free slot               
                if(seq_len > max_sq_len):
                    max_sq_len = seq_len

                for j in range(self.max_seq_sz - seq_len):
                        p_seq.append(np.zeros((self.nClasses+1)))

                #The target is gonna be the next action in 1-hot fashion 
                #label_target,_= get_label_length_seq(target)
                p_tar = np.zeros((self.nClasses))

                p_tar[ self.actions_dict[target]] = 1


                example = [p_seq, p_tar]
                self.list_of_examples.append(example)
                start_To = start_To+T_a 

            
            random.shuffle(self.list_of_examples)

        return
Пример #2
0
 def predict(self, sess, model_save_path, input_x, sigma, actions_dict):
         self.saver.restore(sess, model_save_path)
         result = sess.run([self.prediction], feed_dict={self.input_vid: input_x})[0]      
         result = np.reshape(result,[self.nRows, 48])
         result = self.__post_process(result, sigma)
         output = []
         for i in range(len(result)):
             output.append(actions_dict.keys()[actions_dict.values().index(np.argmax(result[i]))])
         label_seq, length_seq = get_label_length_seq(output)
         return label_seq, length_seq
Пример #3
0
                recog_sq[str(init)] = ["start"]
            for init in range(8):
                target_sq[str(init)] = ["start"]

            T = (1.0 / args.alpha) * vid_len
            while start_To + S_enc_frame + S_ant_frame + beta_frame < len(
                    content):
                k = 0
                for stride in range(
                        start_To + S_enc_frame + beta_frame,
                        start_To + S_enc_frame + S_ant_frame + beta_frame,
                        beta_frame):
                    obs = content[start_To:start_To + stride]
                    target = content[start_To + S_enc_frame + S_ant_frame +
                                     beta_frame]
                    label_seq, length_seq = get_label_length_seq(obs)
                    #What i'm actually obeserving. p_seq is a matrix of 48 columns like classes + 1 where it stores the lenght of the action
                    #Morover  it has 25 rows beacuse the max number in a video is 25 for breakfast
                    p_seq = []
                    seq_len = 0
                    for i in label_seq:
                        p_seq.append(np.zeros((nClasses + 1)))
                        #Lenght of the action obeserved
                        p_seq[-1][-1] = length_seq[label_seq.index(i)] / T
                        #1-hot encoding
                        p_seq[-1][actions_dict[i]] = 1
                        seq_len += 1
                    #padding to zero every other free slot
                    if (seq_len > max_sq_len):
                        max_sq_len = seq_len
                    for j in range(args.max_seq_sz - seq_len):
Пример #4
0
             
         for obs_p in obs_percentages:
             
             if args.input_type == "decoded":
                 file_ptr = open(args.decoded_path+"/obs"+str(obs_p)+"/"+f_name+'.txt', 'r') 
                 observed_content = file_ptr.read().split('\n')[:-1]
                 vid_len = int(len(observed_content)/obs_p)
             elif args.input_type == "gt":
                 observed_content = content[:int(obs_p*vid_len)]
             T = (1.0/args.alpha)*vid_len
             
             for pred_p in pred_percentages:
                 pred_len = int(pred_p*vid_len)  
                 output_len = pred_len + len(observed_content)
                 
                 label_seq, length_seq = get_label_length_seq(observed_content)                    
                 with tf.Session() as sess:
                     label_seq, length_seq = model.predict(sess, model_restore_path, pred_len, label_seq, length_seq, actions_dict, T)
                 
                 recognition = []
                 for i in range(len(label_seq)):
                     recognition = np.concatenate((recognition, [label_seq[i]]*int(length_seq[i])))
                 recognition = recognition[:output_len]
                 #write results to file
                 f_name = vid.split('/')[-1].split('.')[0]
                 path=args.results_save_path+"/obs"+str(obs_p)+"-pred"+str(pred_p)
                 write_predictions(path, f_name, recognition)
                 
                     
 elif args.model == "cnn":
     model = ModelCNN(args.nRows, nClasses)
Пример #5
0
n_T = np.zeros(len(classes))
n_F = np.zeros(len(classes))
totalGT = []
totalRecog = []
IoU = 0
index_act = int(args.nactions)
div = 0

for filename in filelist:

    gt, recog = read_sequences(
        filename, args.ground_truth_path,
        obs_percentage)  # gt e recog hano la stessa lunghezza
    if (recog != 0):

        label_seq_gt, length_seq_gt = get_label_length_seq(gt)
        label_seq_recog, length_seq_recog = get_label_length_seq(recog)

        gt_dict = get_label_dict(gt)
        recog_dict = get_label_dict(recog)

        if (len(label_seq_gt) >= index_act):
            act = label_seq_gt[index_act - 1]
            start_gt = gt_dict[index_act - 1][1]
            stop_gt = gt_dict[index_act - 1][2]

            # if(start_gt!=0):
            #     print("ciao")
            #     gt_dict = get_label_dict(recog_dict)

            act_recog = recog[start_gt:stop_gt]
Пример #6
0
                    observed_content = content[:int(
                        obs_p * vid_len)]  # observed_actions_per_frame
                unobserved_content = content[int(obs_p * vid_len):]
                T = (1.0 / args.alpha) * vid_len

                pred_percentages = [.1, .2, .3, .5, .7, .8]
                pred_percentages = [
                    pred_p for pred_p in pred_percentages
                    if obs_p + pred_p <= 1.0
                ]
                for pred_p in pred_percentages:
                    pred_len = int(pred_p * vid_len)  # num_frames_to_predict
                    output_len = pred_len + len(
                        observed_content)  # write out observed + predictions

                    label_seq, length_seq = get_label_length_seq(
                        observed_content)
                    obs_label_seq, obs_length_seq = list(label_seq), list(
                        length_seq)
                    num_obs_actions = len(obs_label_seq)
                    with tf.Session() as sess:
                        label_seq, length_seq = model.predict(
                            sess, model_restore_path, pred_len, label_seq,
                            length_seq, actions_dict, T)

                    recognition = []
                    for i in range(len(label_seq)):
                        recognition = np.concatenate(
                            (recognition, [label_seq[i]] * int(length_seq[i])))
                    recognition = recognition[:output_len]
                    # write results to file
                    f_name = vid.split('/')[-1].split('.')[0]