def vid2frames(vid, oflow, pred_type, mul_oflow, oflow_pnum): from frame import frames_downsample, images_rescale from opticalflow import OpticalFlow, frames2flows from oflow_multiprocessing import process_oflow # Extract frames of a video and then normalize it to fixed-length # Then make optical flow and RGB lists # Input : video(Stream), RGB(Boolean), oflow(Boolean) # Output : RGB-list, oflow-list rgbFrames, oflowFrames = [], [] tuRectangle = (224, 224) success, frame = vid.read() frame_num = 0 while success: if not success: print("[warrning]: some video frames are corrupted.") # see if this line effecting the results frame = cv2.flip(frame, 1) frame = cv2.resize(frame, tuRectangle, interpolation=cv2.INTER_AREA) rgbFrames.append(frame) success, frame = vid.read() frame_num += 1 if len(rgbFrames) == 0: raise ValueError("Could not extract webcam frames successfully.") #rgbFrames = image_normalize(np.array(rgbFrames), 40) if len(rgbFrames) < 40: if oflow: if mul_oflow: oflowFrames = process_oflow(rgbFrames, oflow_pnum) else: oflowFrames = frames2flows(rgbFrames) oflowFrames = frames_downsample(oflowFrames, 40) rgbFrames = frames_downsample(np.array(rgbFrames), 40) else: if pred_type != "sentence": rgbFrames = frames_downsample(np.array(rgbFrames), 40) if oflow: if mul_oflow: oflowFrames = process_oflow(rgbFrames, oflow_pnum) else: oflowFrames = frames2flows(rgbFrames) rgbFrames = images_rescale(rgbFrames) return rgbFrames, oflowFrames, frame_num
def vid2frames(vid): oOpticalFlow = OpticalFlow(bThirdChannel=False) tuRectangle = (224, 224) success, frame = vid.read() rgbFrames = [] oflowFrames = [] while success: if not success: print("[warrning]: some video frames are corrupted.") frame = cv2.flip(frame, 1) frame = cv2.resize(frame, tuRectangle, interpolation=cv2.INTER_AREA) rgbFrames.append(frame) oflow = oOpticalFlow.next(frame) oflowFrames.append(oflow) success, frame = vid.read() rgbFrames = image_normalize(np.array(rgbFrames), 40) oflowFrames = frames_downsample(np.array(oflowFrames), 40) #print(rgbFrames.shape) #print(oflowFrames.shape) return rgbFrames, oflowFrames
def framesDir2flowsDir(sFrameBaseDir:str, sFlowBaseDir:str, nFramesNorm:int = None, sAlgorithm:str = "tvl1-fast"): """ Calculate optical flow from frames (extracted from videos) Input videoframe structure: ... sFrameDir / train / class001 / videoname / frames.jpg Output: ... sFlowDir / train / class001 / videoname / flow.jpg """ # do not (partially) overwrite existing directory #if os.path.exists(sFlowBaseDir): # warnings.warn("\nOptical flow folder " + sFlowBaseDir + " alredy exists: flow calculation stopped") # return # get list of directories with frames: ... / sFrameDir/train/class/videodir/frames.jpg sCurrentDir = os.getcwd() os.chdir(sFrameBaseDir) liVideos = sorted(glob.glob("*/*/*")) os.chdir(sCurrentDir) print("Found %d directories=videos with frames in %s" % (len(liVideos), sFrameBaseDir)) # loop over all videos-directories nCounter = 0 for sFrameDir in liVideos: # generate target directory sFlowDir = sFlowBaseDir + "/" + sFrameDir if nFramesNorm != None and os.path.exists(sFlowDir): nFlows = len(glob.glob(sFlowDir + "/*.*")) if nFlows == nFramesNorm: print("Video %5d: optical flow already extracted to %s" % (nCounter, sFlowDir)) nCounter += 1 continue else: print("Video %5d: Directory with %d instead of %d flows detected" % (nCounter, nFlows, nFramesNorm)) # retrieve frame files - in ascending order arFrames = files2frames(sFrameBaseDir + "/" + sFrameDir) # downsample if nFramesNorm != None: arFrames = frames_downsample(arFrames, nFramesNorm) # calculate and save optical flow print("Video %5d: Calc optical flow with %s from %s frames to %s" % (nCounter, sAlgorithm, str(arFrames.shape), sFlowDir)) arFlows = frames2flows(arFrames, sAlgorithm = sAlgorithm) flows2file(arFlows, sFlowDir) nCounter += 1 return
def livedemo(): # dataset diVideoSet = { "sName": "chalearn", "nClasses": 20, # number of classes "nFramesNorm": 40, # number of frames per video "nMinDim": 240, # smaller dimension of saved video-frames "tuShape": (240, 320), # height, width "nFpsAvg": 10, "nFramesAvg": 50, "fDurationAvg": 5.0 } # seconds # files sClassFile = "data-set/%s/%03d/class.csv" % (diVideoSet["sName"], diVideoSet["nClasses"]) sVideoDir = "data-set/%s/%03d" % (diVideoSet["sName"], diVideoSet["nClasses"]) print("\nStarting gesture recognition live demo ... ") print(os.getcwd()) print(diVideoSet) # load label description oClasses = VideoClasses(sClassFile) sModelFile = "model/20180627-0729-chalearn020-oflow-i3d-entire-best.h5" h, w = 224, 224 keI3D = I3D_load(sModelFile, diVideoSet["nFramesNorm"], (h, w, 2), oClasses.nClasses) # open a pointer to the webcam video stream oStream = video_start(device=1, tuResolution=(320, 240), nFramePerSecond=diVideoSet["nFpsAvg"]) #liVideosDebug = glob.glob(sVideoDir + "/train/*/*.*") nCount = 0 sResults = "" timer = Timer() # loop over action states while True: # show live video and wait for key stroke key = video_show(oStream, "green", "Press <blank> to start", sResults, tuRectangle=(h, w)) # start! if key == ord(' '): # countdown n sec video_show(oStream, "orange", "Recording starts in ", tuRectangle=(h, w), nCountdown=3) # record video for n sec fElapsed, arFrames, _ = video_capture(oStream, "red", "Recording ", \ tuRectangle = (h, w), nTimeDuration = int(diVideoSet["fDurationAvg"]), bOpticalFlow = False) print("\nCaptured video: %.1f sec, %s, %.1f fps" % \ (fElapsed, str(arFrames.shape), len(arFrames)/fElapsed)) # show orange wait box frame_show(oStream, "orange", "Translating sign ...", tuRectangle=(h, w)) # crop and downsample frames arFrames = images_crop(arFrames, h, w) arFrames = frames_downsample(arFrames, diVideoSet["nFramesNorm"]) # Translate frames to flows - these are already scaled between [-1.0, 1.0] print("Calculate optical flow on %d frames ..." % len(arFrames)) timer.start() arFlows = frames2flows(arFrames, bThirdChannel=False, bShow=True) print("Optical flow per frame: %.3f" % (timer.stop() / len(arFrames))) # predict video from flows print("Predict video with %s ..." % (keI3D.name)) arX = np.expand_dims(arFlows, axis=0) arProbas = keI3D.predict(arX, verbose=1)[0] nLabel, sLabel, fProba = probability2label(arProbas, oClasses, nTop=3) sResults = "Sign: %s (%.0f%%)" % (sLabel, fProba * 100.) print(sResults) nCount += 1 # quit elif key == ord('q'): break # do a bit of cleanup oStream.release() cv2.destroyAllWindows() return
def image_normalize(rgbFrames, nFrames): rgbFrames = frames_downsample(rgbFrames, 40) rgbFrames = images_rescale(rgbFrames) return rgbFrames
def sent_preds(rgbs,oflows,frames_count,labels,lstmModel,rgb_model,oflow_model, nTop,frames_to_process=30,stride=10,threshold=30): pos = 0 results = [] #print("rgbs.shape:",rgbs.shape) Next = 0 while rgbs[Next:Next+stride].shape[0] != 0: rgbs_p = rgbs[pos * frames_to_process-(pos*stride):(pos+1)*frames_to_process-(pos*stride)] #print(f"from {pos*frames_to_process-(pos*stride)} to {(pos+1)*frames_to_process-(pos*stride)}") oflows_p = oflows[pos*frames_to_process-(pos*stride):(pos+1)*frames_to_process-(pos*stride)] #print("rgbs_p.shape:",rgbs_p.shape) #print(rgbs[pos*frames_to_process:pos*frames_to_process+stride]) rgbs_p = frames_downsample(np.array(rgbs_p), 40) oflows_p = frames_downsample(np.array(oflows_p), 40) if lstmModel is not None: predictions = i3d_LSTM_prediction(rgbs_p, oflows_p, labels, lstmModel, rgb_model, oflow_model, nTop) else: predictions = get_predicts(rgbs_p, oflows_p, labels, oflow_model, rgb_model, nTop, False) #print("predictions:", predictions) tmp = [d for item in predictions for d in item.items()] if tmp[0][1] > threshold: keys = list(list(zip(*tmp))[0]) vals = list(list(zip(*tmp))[1]) if len(results) == 0: results.append(predictions) else: tmp_list = [] added = False for result in results: pred = [d for item in result for d in item.items()] pred_keys = list(list(zip(*pred))[0]) pred_vals = list(list(zip(*pred))[1]) #if len([i for i,j in zip(pred_keys,keys) if i==j]) == 3: #print("pred_keys[0]:",pred_keys[0]) #print("keys[0]:",keys[0]) if str(pred_keys[0]).strip() == str(keys[0]).strip() : avg = reduce(lambda x,y: x+y, vals) / len(vals) pred_avg = reduce(lambda x,y: x+y, pred_vals) / len(pred_vals) if avg > pred_avg: results.remove(result) #tmp_list.remove() tmp_list.append(predictions) added = True if not added: #print("no similar to this:",keys[0]) tmp_list.append(predictions) added = True results.extend(tmp_list) Next = (pos+1)*frames_to_process-(pos*stride) pos += 1 #print(f"is there from {Next} to {Next+stride}") def Phase(results): new_res = [] key = [""] * 3 val = [0] * 3 idx = 0 for result in results: for d in result: for i in d.items(): tmp_key,tmp_val = i key[idx] += tmp_key + "-" val[idx] += tmp_val #print(key) #print(val) idx += 1 #print(idx) idx = 0 for i in range(len(key)): new_res.append({key[i].rsplit("-",1)[0]:val[i]/len(results)}) #print("new_res:", new_res) return new_res if len(results) > 0: predictions = Phase(results) else: predictions = [{'Unknown': 0.}, {'Unknown': 0.}, {'Unknown': 0.}] return predictions
def live(): gameDisplay.blit(carImg, (0, 0)) # open a pointer to the webcam video stream oStream = video_start(device=1, tuResolution=(320, 240), nFramePerSecond=diVideoSet["nFpsAvg"]) timer = Timer() sResults = "" nCount = 0 while True: # show live video and wait for key stroke key = video_show(oStream, "green", "Press <blank> to start", sResults, tuRectangle=(h, w)) # start! if key == ord(' '): # countdown n sec video_show(oStream, "orange", "Recording starts in ", tuRectangle=(h, w), nCountdown=3) # record video for n sec fElapsed, arFrames, _ = video_capture(oStream, "red", "Recording ", \ tuRectangle=(h, w), nTimeDuration=int(diVideoSet["fDurationAvg"]), bOpticalFlow=False) print("\nCaptured video: %.1f sec, %s, %.1f fps" % \ (fElapsed, str(arFrames.shape), len(arFrames) / fElapsed)) # show orange wait box frame_show(oStream, "orange", "Translating sign ...", tuRectangle=(h, w)) # crop and downsample frames arFrames = images_crop(arFrames, h, w) arFrames = frames_downsample(arFrames, diVideoSet["nFramesNorm"]) # Translate frames to flows - these are already scaled between [-1.0, 1.0] print("Calculate optical flow on %d frames ..." % len(arFrames)) timer.start() arFlows = frames2flows(arFrames, bThirdChannel=False, bShow=True) print("Optical flow per frame: %.3f" % (timer.stop() / len(arFrames))) # predict video from flows print("Predict video with %s ..." % (keI3D.name)) arX = np.expand_dims(arFlows, axis=0) arProbas = keI3D.predict(arX, verbose=1)[0] nLabel, sLabel, fProba = probability2label(arProbas, oClasses, nTop=3) sResults = "Sign: %s (%.0f%%)" % (sLabel, fProba * 100.) print(sResults) nCount += 1 # quit break # do a bit of cleanup message_display(sResults) oStream.release() cv2.destroyAllWindows()