def create_db(self): ''' Create dataset pickles from avi files Each pickle is a tupple : (video_nparray, video_labels) where video_nparray is an array of 500 items each item is a video sequence composed by 15 frames (3s at 5fps)''' f = os.listdir(self.avi_dir)[0] vid = video.asvideo(os.path.join(self.avi_dir, f)).V # 500 samples of 3sec videos ds_data = np.ndarray((500, 15) + vid.shape[1:], dtype='int8') ds_labels = np.ndarray((500, 3), dtype='uint8') # Divide dataset into x files idx = 0 for f in os.listdir(self.avi_dir): # print(f) if ".avi" in f: vid = video.asvideo(os.path.join(self.avi_dir, f)).V for idx2 in range(int(len(vid) / 15) - 1): ds_data[(idx) % 500] = vid[idx2 * 15:(idx2 + 1) * 15] - 125 ds_labels[(idx) % 500, 0] = int(re.findall(r'\d+', f)[0]) ds_labels[(idx) % 500, 1] = int(re.findall(r'\d+', f)[1]) ds_labels[(idx) % 500, 2] = self.labels.index( re.findall(r'_(.*)_d+', f)[0]) if idx % 500 == 0 and idx > 0: # Save subset in a file print(idx, " saving to ", self.db_path(int(idx / 500))) self.write_db(int(idx / 500), ds_data, ds_labels) idx += 1
def create_db(self): ''' Create dataset pickles from avi files Each pickle is a tupple : (video_nparray, video_labels) where video_nparray is an array of 500 items each item is a video sequence composed by 15 frames (3s at 5fps)''' f = os.listdir(self.avi_dir)[0] vid = video.asvideo(os.path.join(self.avi_dir,f)).V # 500 samples of 3sec videos ds_data = np.ndarray( (500,15)+vid.shape[1:] , dtype='int8') ds_labels = np.ndarray( (500,3), dtype='uint8') # Divide dataset into x files # there is a mistake at (idx+idx2)%500 with overlap possibilities at 501 == 1 idx = 0 for f in os.listdir(self.avi_dir): # print(f) if ".avi" in f: vid = video.asvideo(os.path.join(self.avi_dir,f)).V for idx2 in range(int(len(vid)/15)-1): ds_data[(idx)%500] = vid[idx2*15 : (idx2+1)*15 ]-125 ds_labels[(idx)%500,0] = int(re.findall(r'\d+', f)[0]) ds_labels[(idx)%500,1] = int(re.findall(r'\d+', f)[1]) ds_labels[(idx)%500,2] = self.labels.index( re.findall(r'_(.*)_d+', f)[0] ) if idx % 500 == 0 and idx > 0: # Save subset in a file print(idx," saving to ", self.db_path(int(idx/500))) self.write_db(int(idx/500), ds_data, ds_labels) idx += 1
def match_ncc(T,A): ''' Implements normalized cross-correlation of the template to the search video A Will do weighting of the template inside here... ''' szT = T.shape szA = A.shape # leave this in here if you want to weight the template W = 1 - T[:,:,:,6] - T[:,:,:,4] T = T*W.reshape([szT[0],szT[1],szT[2],1]) split(video.asvideo(T)).display() M = np.zeros([szA[0],szA[1],szA[2]],dtype=np.float32) for i in range(7): if i==4 or i==6: continue t = np.squeeze(T[:,:,:,i]) # need to zero-mean the template per the normxcorr3d function below t = t - t.mean() M = M + normxcorr3d(t,np.squeeze(A[:,:,:,i])) M = M / 5 return M
def featurize_video(vid_in,factor=1,maxcols=None,lock=None): ''' Takes a video, converts it into its 5 dim of "pure" oriented energy. This is a slight deviation from the original Action Spotting method, which uses all 7 dimensions. We found the extra two dimensions (static and lack of structure) to decrease performance and sharpen the other 5 motion energies when used to remove "background". Input: vid_in is either a numpy video array or a path to a video file lock is a multiprocessing.Lock that is needed if this is being called from multiple threads. ''' # Converting video to video object (if needed) svid_obj=None if type(vid_in) is video.Video: svid_obj = vid_in else: svid_obj=video.asvideo(vid_in,factor,maxcols=maxcols,lock=lock) if svid_obj.V.shape[3] > 1: svid_obj=svid_obj.rgb2gray() # Calculating and storing the 7D feature videos for the search video left_search,right_search,up_search,down_search,static_search,flicker_search,los_search=calc_spatio_temporal_energies(svid_obj) # Compressing all search feature videos to a single 7D array. search_final=compress_to_7D(left_search,right_search,up_search,down_search,static_search,flicker_search,los_search,7) #do not force a downsampling. #res_search_final=call_resample_with_7D(search_final) # Taking away static and structure features and normalising again fin = normalize(takeaway(linstretch(search_final))) return fin
def split(V): ''' split a N-band image into a 1-band image side-by-side, like pretty ''' sz = np.asarray(V.shape) n = sz[3] sz[3] = 1 w = sz[2] sz[2] *= n A = np.zeros(sz,dtype=np.float32) for i in np.arange(n): A[:,:,i*w:(i+1)*w,0] = V[:,:,:,i] return video.asvideo(A)
def human_detection(video_filename, output_file, downsample_factor, threshold): ''' This function does video processing (background subtraction) then does human detection on raw video and maps its output to background subtracted video using the input argument threshold.''' global video_file global downsampling_factor video_file = video_filename downsampling_factor = downsample_factor if os.path.isfile(video_file)==False: raise IOError(video_file + ' not found') vid = video.asvideo(video_file, downsampling_factor) # Do Background Subtraction W = bs.remove_background(vid) human_detection_processing(vid, W, output_file, threshold) return 0
def pretty(*args): ''' Takes the argument videos, assumes they are all the same size, and drops them into one monster video, row-wise. ''' n = len(args) if type(args[0]) is video.Video: sz = np.asarray(args[0].V.shape) else: # assumed it is a numpy.ndarray sz = np.asarray(args[0].shape) w = sz[2] sz[2] *= n A = np.zeros(sz,dtype=np.float32) if type(args[0]) is video.Video: for i in np.arange(n): A[:,:,i*w:(i+1)*w,:] = args[i].V else: #assumed it is a numpy.ndarray for i in np.arange(n): A[:,:,i*w:(i+1)*w,:] = args[i] return video.asvideo(A)
def ret_7D_video_objs(V): return [(video.asvideo(V[:,:,:,0]),video.asvideo(V[:,:,:,0]),video.asvideo(V[:,:,:,0]),video.asvideo(V[:,:,:,0]),video.asvideo(V[:,:,:,0]),video.asvideo(V[:,:,:,0]),video.asvideo(V[:,:,:,0]),video.asvideo(V[:,:,:,0]))]
def calc_spatio_temporal_energies(vid): ''' This function returns a 7 Feature per pixel video corresponding to 7 energies oriented towards the left, right, up, down, flicker, static and 'lack of structure' spatio-temporal energies. JJC: Returned as a list of seven grayscale-videos ''' ts=t.time() #print 'Generating G3 basis Filters.. Function definition in G3H3_helpers.py' (G3a_img\ ,G3b_img\ ,G3c_img\ ,G3d_img\ ,G3e_img\ ,G3f_img\ ,G3g_img\ ,G3h_img\ ,G3i_img\ ,G3j_img) = imgInit3DG3(vid) #'Unit normals for each spatio-temporal direction. Used in eq 3 of paper' root2 = 1.41421356 leftn_hat = ([-1/root2, 0, 1/root2]) rightn_hat = ([1/root2, 0, 1/root2]) downn_hat = ([0, 1/root2,1/root2]) upn_hat = ([0, -1/root2,1/root2]) flickern_hat = ([0, 0, 1 ]) staticn_hat = ([1/root2, 1/root2,0 ]) e_axis = ([0,1,0]) sigmag=1.0 #print('Calculating Left Oriented Energy') energy_left= calc_total_energy(leftn_hat,e_axis,G3a_img,G3b_img,G3c_img,G3d_img,G3e_img,G3f_img,G3g_img,G3h_img,G3i_img,G3j_img) energy_left=ndimage.gaussian_filter(energy_left,sigma=sigmag) #******************************* #print('Calculating Right Oriented Energy') energy_right= calc_total_energy(rightn_hat,e_axis,G3a_img,G3b_img,G3c_img,G3d_img,G3e_img,G3f_img,G3g_img,G3h_img,G3i_img,G3j_img) energy_right=ndimage.gaussian_filter(energy_right,sigma=sigmag) #******************************* #print('Calculating Up Oriented Energy') energy_up= calc_total_energy(upn_hat,e_axis,G3a_img,G3b_img,G3c_img,G3d_img,G3e_img,G3f_img,G3g_img,G3h_img,G3i_img,G3j_img) energy_up=ndimage.gaussian_filter(energy_up,sigma=sigmag) #******************************* #print('Calculating Down Oriented Energy') energy_down= calc_total_energy(downn_hat,e_axis,G3a_img,G3b_img,G3c_img,G3d_img,G3e_img,G3f_img,G3g_img,G3h_img,G3i_img,G3j_img) energy_down=ndimage.gaussian_filter(energy_down,sigma=sigmag) #******************************* #print('Calculating Static Oriented Energy') energy_static= calc_total_energy(staticn_hat,e_axis,G3a_img,G3b_img,G3c_img,G3d_img,G3e_img,G3f_img,G3g_img,G3h_img,G3i_img,G3j_img) energy_static=ndimage.gaussian_filter(energy_static,sigma=sigmag) #******************************* #print('Calculating Flicker Oriented Energy') energy_flicker= calc_total_energy(flickern_hat,e_axis,G3a_img,G3b_img,G3c_img,G3d_img,G3e_img,G3f_img,G3g_img,G3h_img,G3i_img,G3j_img) energy_flicker=ndimage.gaussian_filter(energy_flicker,sigma=sigmag) #******************************* #print 'Normalising Energies' c=np.max([np.mean(energy_left),np.mean(energy_right),np.mean(energy_up),np.mean(energy_down),np.mean(energy_static),np.mean(energy_flicker)])*1/100 #print ("normalize with c %d" %c) # norm_energy is the sum of the consort planar energies. c is the epsillon value in eq5 norm_energy = energy_left \ + energy_right \ + energy_up \ + energy_down \ + energy_static \ + energy_flicker \ + c # Normalisation with consort planar energy vid_left_out = video.asvideo( energy_left / ( norm_energy )) vid_right_out = video.asvideo( energy_right / ( norm_energy )) vid_up_out = video.asvideo( energy_up / ( norm_energy )) vid_down_out = video.asvideo( energy_down / ( norm_energy )) vid_static_out = video.asvideo( energy_flicker / ( norm_energy )) vid_flicker_out = video.asvideo( energy_static / ( norm_energy )) vid_structure_out= video.asvideo( c / ( norm_energy )) #print 'Done' te=t.time() print str((te-ts)) + ' Seconds to execution (calculating energies)' return vid_left_out \ ,vid_right_out \ ,vid_up_out \ ,vid_down_out \ ,vid_static_out \ ,vid_flicker_out \ ,vid_structure_out
def random_video(factor=4): '''Gets a random istare video filename''' ind = random.randrange(0, len(video_fnames)) vid = video.asvideo(video_fnames[ind], factor) return vid
def get_video(fname_part, factor=4, frames=None): '''Can specify all or part of the file name. Note that the video ID is the first set of hex code in the file name. frames is a list of frames to get with default to get all frames.''' return video.asvideo(get_video_path(fname_part), factor, frames)