示例#1
0
# Get dataset categories
if args.multi:
    categories = models.load_categories('category_multi_momentsv2.txt')
else:
    categories = models.load_categories('category_momentsv2.txt')

# Load the video frame transform
transform = models.load_transform()

# Obtain video frames
if args.frame_folder is not None:
    print('Loading frames in {}'.format(args.frame_folder))
    import glob
    # here make sure after sorting the frame paths have the correct temporal order
    frame_paths = sorted(glob.glob(os.path.join(args.frame_folder, '*.jpg')))
    frames = load_frames(frame_paths)
else:
    print('Extracting frames using ffmpeg...')
    frames = extract_frames(args.video_file, args.num_segments)

# Prepare input tensor
if 'resnet3d50' in args.arch:
    # [1, num_frames, 3, 224, 224]
    input = torch.stack([transform(frame) for frame in frames], 1).unsqueeze(0)
else:
    # [num_frames, 3, 224, 224]
    input = torch.stack([transform(frame) for frame in frames])

# Make video prediction
with torch.no_grad():
    logits = model(input)
示例#2
0
def load_video(video_hash):
    yt = YouTube('https://youtube.com/embed/%s?start=%d&end=%d' %
                 (video_hash, start, end))
    video = yt.streams.all()[0]
    name = video.download('/tmp')
    #   Load model
    model = models.load_model(arch)

    av_categories = pd.read_csv('CVS_Actions(NEW).csv',
                                delimiter=';').values.tolist()
    trax = pd.read_csv('audioTracks_urls.csv')

    # Get dataset categories
    #categories = models.load_categories()

    # Load the video frame transform
    transform = models.load_transform()

    # Obtain video frames
    if frame_folder is not None:
        print('Loading frames in {}'.format(frame_folder))
        import glob
        # here make sure after sorting the frame paths have the correct temporal order
        frame_paths = sorted(glob.glob(os.path.join(frame_folder, '*.jpg')))
        print(frame_paths)
        frames = load_frames(frame_paths)
    else:
        print('Extracting frames using ffmpeg...')
        frames = extract_frames(name, num_segments)

    # Prepare input tensor
    if arch == 'resnet3d50':
        # [1, num_frames, 3, 224, 224]
        input = torch.stack([transform(frame) for frame in frames],
                            1).unsqueeze(0)
    else:
        # [num_frames, 3, 224, 224]
        input = torch.stack([transform(frame) for frame in frames])

    # Make video prediction
    with torch.no_grad():
        logits = model(input)
        h_x = F.softmax(logits, 1).mean(dim=0)
        probs, idx = h_x.sort(0, True)

    # Output the prediction.

    print('RESULT ON ' + name)
    y = float(av_categories[idx[0]][1]) * 125
    x = float(av_categories[idx[0]][2]) * 125

    trax = trax.assign(
        dist=lambda row: np.sqrt((x - row.valence)**2 + (y - row.energy)**2))
    print('min', trax['dist'].min())

    best = trax.nsmallest(100, 'dist')
    print(best)

    rand = randint(0, 9)
    print(rand)
    choice = best.iloc[rand, [1, 2, 5]]

    print('choice', choice)

    song = 'valence: ' + str(x) + ' arousal: ' + str(
        y) + " " + choice[0] + ' ' + choice[1]
    print(song)
    print(x, y)
    for i in range(0, 5):
        print('{:.3f} -> {} ->{}'.format(probs[i], idx[i],
                                         av_categories[idx[i]]))
        print('result   cutegories', av_categories[idx[i]][0],
              av_categories[idx[i]][1])

    #r = requests.get(match.iloc[0,2], allow_redirects=True)
    r = requests.get(choice[2], allow_redirects=True)
    open('./tmp/preview.mp3', 'wb').write(r.content)
    # Render output frames with prediction text.
    rendered_output = './tmp/' + video_hash + '_' + str(x) + '_' + str(
        y) + '.mp4'
    if rendered_output is not None:
        clip = VideoFileClip(name).subclip(30, 60)
        audioclip = AudioFileClip('./tmp/preview.mp3')
        txt_clip = TextClip(song, fontsize=16, color='white')
        clip_final = clip.set_audio(audioclip)
        video = CompositeVideoClip([clip_final, txt_clip])
        video.set_duration(30).write_videofile(rendered_output)
示例#3
0
    def __init__(self, sequence):
        super().__init__()
        self.sequence = sequence
        self.frames = load_frames('sequences/' + self.sequence)
        self.num_frames, self.height, self.width = self.frames.shape[:3]
        # # init model
        self.model = model(self.frames)

        # set window
        self.setWindowTitle('Demo: Interaction-and-Propagation Network')
        self.setGeometry(100, 100, self.width, self.height + 100)

        # buttons
        self.prev_button = QPushButton('Prev')
        self.prev_button.clicked.connect(self.on_prev)
        self.next_button = QPushButton('Next')
        self.next_button.clicked.connect(self.on_next)
        self.play_button = QPushButton('Play')
        self.play_button.clicked.connect(self.on_play)
        self.run_button = QPushButton('Propagate!')
        self.run_button.clicked.connect(self.on_run)

        # LCD
        self.lcd = QTextEdit()
        self.lcd.setReadOnly(True)
        self.lcd.setMaximumHeight(28)
        self.lcd.setMaximumWidth(100)
        self.lcd.setText('{: 3d} / {: 3d}'.format(0, self.num_frames - 1))

        # slide
        self.slider = QSlider(Qt.Horizontal)
        self.slider.setMinimum(0)
        self.slider.setMaximum(self.num_frames - 1)
        self.slider.setValue(0)
        self.slider.setTickPosition(QSlider.TicksBelow)
        self.slider.setTickInterval(1)
        self.slider.valueChanged.connect(self.slide)

        # combobox
        self.combo = QComboBox(self)
        self.combo.addItem("fade")
        self.combo.addItem("davis")
        self.combo.addItem("checker")
        self.combo.addItem("color")
        self.combo.currentTextChanged.connect(self.set_viz_mode)

        # canvas
        self.fig = plt.Figure()
        self.ax = plt.Axes(self.fig, [0., 0., 1., 1.])
        self.ax.set_axis_off()
        self.fig.add_axes(self.ax)

        self.canvas = FigureCanvas(self.fig)

        self.cidpress = self.fig.canvas.mpl_connect('button_press_event',
                                                    self.on_press)
        self.cidrelease = self.fig.canvas.mpl_connect('button_release_event',
                                                      self.on_release)
        self.cidmotion = self.fig.canvas.mpl_connect('motion_notify_event',
                                                     self.on_motion)

        # navigator
        navi = QHBoxLayout()
        navi.addWidget(self.lcd)
        navi.addWidget(self.prev_button)
        navi.addWidget(self.play_button)
        navi.addWidget(self.next_button)
        navi.addStretch(1)
        navi.addWidget(QLabel('Overlay Mode'))
        navi.addWidget(self.combo)
        navi.addStretch(1)
        navi.addWidget(self.run_button)

        layout = QVBoxLayout()
        layout.addWidget(self.canvas)
        layout.addWidget(self.slider)
        layout.addLayout(navi)
        layout.setStretchFactor(navi, 1)
        layout.setStretchFactor(self.canvas, 0)
        self.setLayout(layout)

        # timer
        self.timer = QTimer()
        self.timer.setSingleShot(False)
        self.timer.timeout.connect(self.on_time)

        # initialize visualize
        self.viz_mode = 'fade'
        self.current_mask = np.zeros(
            (self.num_frames, self.height, self.width), dtype=np.uint8)
        self.cursur = 0
        self.on_showing = None
        self.show_current()

        # initialize action
        self.reset_scribbles()
        self.pressed = False
        self.on_drawing = None
        self.drawn_strokes = []

        self.show()
示例#4
0
def collect(video,
            size=300,
            npfft=True,
            filter_signal=False,
            persist=True,
            verbose=True,
            time_start=0,
            time_finish=1):
    """ params:
    video: String with the filename of the video to analyze.
    size: Int with the size of the window to analyze. It is always centered.
    npfft: Boolean that determines whether we use our implementation of the fft or the one that comes with numpy
    filter_signal: Boolean that determines if we filter the signal between 50 and 130 bpm
    persist: Boolean that determines if we store the data of this run
    verbose: Boolean that determines if we print the current status of the script
    time_start: Double between [0,1) that determines the % of the length of the video from which to start. (time_start < time_finish)
    time_finish: Double between (0,1] that determines the % of the length of the video until we stop analyzing. (time_start < time_finish)
    """
    if time_start >= time_finish:
        raise 'Incorrect time_start and time_finish, time_start must be smaller than time_finish'

    if verbose:
        print('Opening video...')
    cap = cv2.VideoCapture(video)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    size = size
    upperLeftCornerX = width // 2 - size // 2
    upperLeftCornerY = height // 2 - size // 2
    lowerRightCornerX = width // 2 + size // 2
    lowerRightCornerY = height // 2 + size // 2

    if verbose:
        print('Loading frames..')
    r0, g0, b0 = ut.load_frames(cap, upperLeftCornerX, upperLeftCornerY,
                                lowerRightCornerX, lowerRightCornerY, length,
                                time_start, time_finish)
    cap.release()
    cv2.destroyAllWindows()

    n = int(2**np.floor(np.log2(r0.shape[1])))
    f = np.linspace(-n / 2, n / 2 - 1, n) * fps / n
    r0 = r0[0, 0:n]
    g0 = g0[0, 0:n]
    b0 = b0[0, 0:n]
    r = r0 - np.mean(r0)
    g = g0 - np.mean(g0)
    b = b0 - np.mean(b0)

    if verbose:
        print('Applying the Fourier Transform...')
    fft_method = None
    fft_shift_method = None
    if npfft:
        fft_method = np.fft.fft
        fft_shift_method = np.fft.fftshift
    else:
        fft_method = fft.FFT_R
        fft_shift_method = fft.FFT_SHIFT

    R = np.abs(fft_shift_method(fft_method(r)))**2
    G = np.abs(fft_shift_method(fft_method(g)))**2
    B = np.abs(fft_shift_method(fft_method(b)))**2

    if filter_signal:
        R, G, B = fft.band_pass_filter(R, G, B, f)

    title = video.split("/")[-1].split(".")[0]
    filename = ut.filename_builder(title, fps, len(r), size, filter_signal)

    if persist:
        if verbose:
            print('Storing data...')
        if not filter_signal:
            plt.subplot(2, 1, 1)
        plt.plot(60 * f, R, 'red')
        plt.plot(60 * f, G, 'green')
        plt.plot(60 * f, B, 'blue')
        plt.xlim(0, 200)
        if filter_signal:
            plt.axvline(x=50, linestyle="--")
            plt.axvline(x=130, linestyle="--")
        plt.xlabel("frecuencia [1/minuto]")
        plt.annotate("{} latidos por minuto".format(
            abs(round(f[np.argmax(R)] * 60, 1))),
                     xy=(1, 0),
                     xycoords='axes fraction',
                     fontsize=10,
                     xytext=(0, -20),
                     textcoords='offset points',
                     ha='right',
                     va='top')
        plt.title(title)

        if not filter_signal:
            plt.subplot(2, 1, 2)
            plt.plot(np.arange(n), r0, 'red')
            plt.plot(np.arange(n), g0, 'green')
            plt.plot(np.arange(n), b0, 'blue')
            plt.xlabel("valor r g b")
            plt.tight_layout()

        ut.write_csv(filename, r, g, b, R, G, B)
        plt.savefig("{}.png".format(filename))
        #plt.clf()

    if verbose:
        print("Frecuencia cardíaca: ",
              abs(f[np.argmax(R)]) * 60, " pulsaciones por minuto en R")
        print("Frecuencia cardíaca: ",
              abs(f[np.argmax(G)]) * 60, " pulsaciones por minuto en G")
        print("Frecuencia cardíaca: ",
              abs(f[np.argmax(B)]) * 60, " pulsaciones por minuto en B")
    return abs(round(f[np.argmax(R)] * 60, 1))