# Get dataset categories if args.multi: categories = models.load_categories('category_multi_momentsv2.txt') else: categories = models.load_categories('category_momentsv2.txt') # Load the video frame transform transform = models.load_transform() # Obtain video frames if args.frame_folder is not None: print('Loading frames in {}'.format(args.frame_folder)) import glob # here make sure after sorting the frame paths have the correct temporal order frame_paths = sorted(glob.glob(os.path.join(args.frame_folder, '*.jpg'))) frames = load_frames(frame_paths) else: print('Extracting frames using ffmpeg...') frames = extract_frames(args.video_file, args.num_segments) # Prepare input tensor if 'resnet3d50' in args.arch: # [1, num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames], 1).unsqueeze(0) else: # [num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames]) # Make video prediction with torch.no_grad(): logits = model(input)
def load_video(video_hash): yt = YouTube('https://youtube.com/embed/%s?start=%d&end=%d' % (video_hash, start, end)) video = yt.streams.all()[0] name = video.download('/tmp') # Load model model = models.load_model(arch) av_categories = pd.read_csv('CVS_Actions(NEW).csv', delimiter=';').values.tolist() trax = pd.read_csv('audioTracks_urls.csv') # Get dataset categories #categories = models.load_categories() # Load the video frame transform transform = models.load_transform() # Obtain video frames if frame_folder is not None: print('Loading frames in {}'.format(frame_folder)) import glob # here make sure after sorting the frame paths have the correct temporal order frame_paths = sorted(glob.glob(os.path.join(frame_folder, '*.jpg'))) print(frame_paths) frames = load_frames(frame_paths) else: print('Extracting frames using ffmpeg...') frames = extract_frames(name, num_segments) # Prepare input tensor if arch == 'resnet3d50': # [1, num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames], 1).unsqueeze(0) else: # [num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames]) # Make video prediction with torch.no_grad(): logits = model(input) h_x = F.softmax(logits, 1).mean(dim=0) probs, idx = h_x.sort(0, True) # Output the prediction. print('RESULT ON ' + name) y = float(av_categories[idx[0]][1]) * 125 x = float(av_categories[idx[0]][2]) * 125 trax = trax.assign( dist=lambda row: np.sqrt((x - row.valence)**2 + (y - row.energy)**2)) print('min', trax['dist'].min()) best = trax.nsmallest(100, 'dist') print(best) rand = randint(0, 9) print(rand) choice = best.iloc[rand, [1, 2, 5]] print('choice', choice) song = 'valence: ' + str(x) + ' arousal: ' + str( y) + " " + choice[0] + ' ' + choice[1] print(song) print(x, y) for i in range(0, 5): print('{:.3f} -> {} ->{}'.format(probs[i], idx[i], av_categories[idx[i]])) print('result cutegories', av_categories[idx[i]][0], av_categories[idx[i]][1]) #r = requests.get(match.iloc[0,2], allow_redirects=True) r = requests.get(choice[2], allow_redirects=True) open('./tmp/preview.mp3', 'wb').write(r.content) # Render output frames with prediction text. rendered_output = './tmp/' + video_hash + '_' + str(x) + '_' + str( y) + '.mp4' if rendered_output is not None: clip = VideoFileClip(name).subclip(30, 60) audioclip = AudioFileClip('./tmp/preview.mp3') txt_clip = TextClip(song, fontsize=16, color='white') clip_final = clip.set_audio(audioclip) video = CompositeVideoClip([clip_final, txt_clip]) video.set_duration(30).write_videofile(rendered_output)
def __init__(self, sequence): super().__init__() self.sequence = sequence self.frames = load_frames('sequences/' + self.sequence) self.num_frames, self.height, self.width = self.frames.shape[:3] # # init model self.model = model(self.frames) # set window self.setWindowTitle('Demo: Interaction-and-Propagation Network') self.setGeometry(100, 100, self.width, self.height + 100) # buttons self.prev_button = QPushButton('Prev') self.prev_button.clicked.connect(self.on_prev) self.next_button = QPushButton('Next') self.next_button.clicked.connect(self.on_next) self.play_button = QPushButton('Play') self.play_button.clicked.connect(self.on_play) self.run_button = QPushButton('Propagate!') self.run_button.clicked.connect(self.on_run) # LCD self.lcd = QTextEdit() self.lcd.setReadOnly(True) self.lcd.setMaximumHeight(28) self.lcd.setMaximumWidth(100) self.lcd.setText('{: 3d} / {: 3d}'.format(0, self.num_frames - 1)) # slide self.slider = QSlider(Qt.Horizontal) self.slider.setMinimum(0) self.slider.setMaximum(self.num_frames - 1) self.slider.setValue(0) self.slider.setTickPosition(QSlider.TicksBelow) self.slider.setTickInterval(1) self.slider.valueChanged.connect(self.slide) # combobox self.combo = QComboBox(self) self.combo.addItem("fade") self.combo.addItem("davis") self.combo.addItem("checker") self.combo.addItem("color") self.combo.currentTextChanged.connect(self.set_viz_mode) # canvas self.fig = plt.Figure() self.ax = plt.Axes(self.fig, [0., 0., 1., 1.]) self.ax.set_axis_off() self.fig.add_axes(self.ax) self.canvas = FigureCanvas(self.fig) self.cidpress = self.fig.canvas.mpl_connect('button_press_event', self.on_press) self.cidrelease = self.fig.canvas.mpl_connect('button_release_event', self.on_release) self.cidmotion = self.fig.canvas.mpl_connect('motion_notify_event', self.on_motion) # navigator navi = QHBoxLayout() navi.addWidget(self.lcd) navi.addWidget(self.prev_button) navi.addWidget(self.play_button) navi.addWidget(self.next_button) navi.addStretch(1) navi.addWidget(QLabel('Overlay Mode')) navi.addWidget(self.combo) navi.addStretch(1) navi.addWidget(self.run_button) layout = QVBoxLayout() layout.addWidget(self.canvas) layout.addWidget(self.slider) layout.addLayout(navi) layout.setStretchFactor(navi, 1) layout.setStretchFactor(self.canvas, 0) self.setLayout(layout) # timer self.timer = QTimer() self.timer.setSingleShot(False) self.timer.timeout.connect(self.on_time) # initialize visualize self.viz_mode = 'fade' self.current_mask = np.zeros( (self.num_frames, self.height, self.width), dtype=np.uint8) self.cursur = 0 self.on_showing = None self.show_current() # initialize action self.reset_scribbles() self.pressed = False self.on_drawing = None self.drawn_strokes = [] self.show()
def collect(video, size=300, npfft=True, filter_signal=False, persist=True, verbose=True, time_start=0, time_finish=1): """ params: video: String with the filename of the video to analyze. size: Int with the size of the window to analyze. It is always centered. npfft: Boolean that determines whether we use our implementation of the fft or the one that comes with numpy filter_signal: Boolean that determines if we filter the signal between 50 and 130 bpm persist: Boolean that determines if we store the data of this run verbose: Boolean that determines if we print the current status of the script time_start: Double between [0,1) that determines the % of the length of the video from which to start. (time_start < time_finish) time_finish: Double between (0,1] that determines the % of the length of the video until we stop analyzing. (time_start < time_finish) """ if time_start >= time_finish: raise 'Incorrect time_start and time_finish, time_start must be smaller than time_finish' if verbose: print('Opening video...') cap = cv2.VideoCapture(video) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) size = size upperLeftCornerX = width // 2 - size // 2 upperLeftCornerY = height // 2 - size // 2 lowerRightCornerX = width // 2 + size // 2 lowerRightCornerY = height // 2 + size // 2 if verbose: print('Loading frames..') r0, g0, b0 = ut.load_frames(cap, upperLeftCornerX, upperLeftCornerY, lowerRightCornerX, lowerRightCornerY, length, time_start, time_finish) cap.release() cv2.destroyAllWindows() n = int(2**np.floor(np.log2(r0.shape[1]))) f = np.linspace(-n / 2, n / 2 - 1, n) * fps / n r0 = r0[0, 0:n] g0 = g0[0, 0:n] b0 = b0[0, 0:n] r = r0 - np.mean(r0) g = g0 - np.mean(g0) b = b0 - np.mean(b0) if verbose: print('Applying the Fourier Transform...') fft_method = None fft_shift_method = None if npfft: fft_method = np.fft.fft fft_shift_method = np.fft.fftshift else: fft_method = fft.FFT_R fft_shift_method = fft.FFT_SHIFT R = np.abs(fft_shift_method(fft_method(r)))**2 G = np.abs(fft_shift_method(fft_method(g)))**2 B = np.abs(fft_shift_method(fft_method(b)))**2 if filter_signal: R, G, B = fft.band_pass_filter(R, G, B, f) title = video.split("/")[-1].split(".")[0] filename = ut.filename_builder(title, fps, len(r), size, filter_signal) if persist: if verbose: print('Storing data...') if not filter_signal: plt.subplot(2, 1, 1) plt.plot(60 * f, R, 'red') plt.plot(60 * f, G, 'green') plt.plot(60 * f, B, 'blue') plt.xlim(0, 200) if filter_signal: plt.axvline(x=50, linestyle="--") plt.axvline(x=130, linestyle="--") plt.xlabel("frecuencia [1/minuto]") plt.annotate("{} latidos por minuto".format( abs(round(f[np.argmax(R)] * 60, 1))), xy=(1, 0), xycoords='axes fraction', fontsize=10, xytext=(0, -20), textcoords='offset points', ha='right', va='top') plt.title(title) if not filter_signal: plt.subplot(2, 1, 2) plt.plot(np.arange(n), r0, 'red') plt.plot(np.arange(n), g0, 'green') plt.plot(np.arange(n), b0, 'blue') plt.xlabel("valor r g b") plt.tight_layout() ut.write_csv(filename, r, g, b, R, G, B) plt.savefig("{}.png".format(filename)) #plt.clf() if verbose: print("Frecuencia cardíaca: ", abs(f[np.argmax(R)]) * 60, " pulsaciones por minuto en R") print("Frecuencia cardíaca: ", abs(f[np.argmax(G)]) * 60, " pulsaciones por minuto en G") print("Frecuencia cardíaca: ", abs(f[np.argmax(B)]) * 60, " pulsaciones por minuto en B") return abs(round(f[np.argmax(R)] * 60, 1))