def beat_extractor(queue_beat):
    kwargs = dict(
        fps=100,
        correct=True,
        infile=None,
        outfile=None,
        max_bpm=170,
        min_bpm=60,
        #nn_files = [BEATS_LSTM[0]],
        transition_lambda=100,
        num_frames=1,
        online=True,
        verbose=1)

    def beat_callback(beats, output=None):
        if len(beats) > 0:
            # Do something with the beat (for now, just print the array to stdout)
            queue_beat.put(beats[0])
            #print(beats)

    #print('Process to write betas: %s' % os.getpid())
    in_processor = RNNBeatProcessor(**kwargs)
    beat_processor = DBNBeatTrackingProcessor(**kwargs)
    out_processor = [beat_processor, beat_callback]
    processor = IOProcessor(in_processor, out_processor)
    process_online(processor, **kwargs)
示例#2
0
def get_beat_processor():
    print('START BEAT PROCESSOR   >> ', str(datetime.now()))
    from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
    from madmom.processors import SequentialProcessor
    print('BEAT PROCESSOR         >> ', str(datetime.now()))
    return SequentialProcessor(
        [RNNBeatProcessor(),
         DBNBeatTrackingProcessor(fps=100)])
示例#3
0
def beatSyncFeature(feature, audio, sr, hop_length):
    # Aggregate feature between beat events
    fps = SR / HOP_LENGTH
    beat_proc = DBNBeatTrackingProcessor(fps=100)
    beat_act = RNNBeatProcessor()(audio)
    beat_times = beat_proc(beat_act)
    # We'll use the median value of each feature between beat frames
    feature = librosa.feature.sync(feature, (beat_times * fps).astype(int),
                                   aggregate=np.median)
    return feature, beat_times
示例#4
0
文件: main.py 项目: lhl2617/radetzky
def get_beats(file_path: str) -> List[float]:
    """
    Given the path to an audio file get a list of detected beat timings (in seconds)
    """
    print(f"Getting beats for {file_path}")
    proc = DBNBeatTrackingProcessor(fps=100)
    act = RNNBeatProcessor()(file_path)
    res: List[float] = proc(act)
    print(f"Got {len(res)} beats")
    print(res)
    return res
示例#5
0
def getRNNDBNOnsets(filename):
    """
    Call Madmom's implementation of RNN + DBN beat tracking
    :param filename: Path to audio file
    """
    print("Computing madmom beats...")
    from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
    proc = DBNBeatTrackingProcessor(fps=100)
    act = RNNBeatProcessor()(filename)
    b = proc(act)
    return b
示例#6
0
def extract(yt_id):
    beats = SequentialProcessor(
        [RNNBeatProcessor(),
         DBNBeatTrackingProcessor(fps=100)])
    chordrec = SequentialProcessor(
        [CNNChordFeatureProcessor(),
         CRFChordRecognitionProcessor()])
    processMulti = ParallelProcessor([])
    processMulti.append(beats)
    processMulti.append(chordrec)
    beatSync = SequentialProcessor(
        [printTime, processMulti, printTime, arrange, printTime])
    return beatSync('tmp/' + yt_id + '.wav')
示例#7
0
文件: features.py 项目: ctralie/acoss
 def madmom_features(self, fps=100):
     """
     Call Madmom's implementation of RNN + DBN beat tracking. Madmom's
     results are returned in terms of seconds, but round and convert to
     be in terms of hop_size so that they line up with the features.
     The novelty function is also computed as a side effect (and is
     the bottleneck in the computation), so also return that
     Parameters
     ----------
     fps: int
         Frames per second in processing
     Returns
     -------
     {
         'tempos': ndarray(n_levels, 2)
             An array of tempo estimates in beats per minute,
             along with their confidences
         'onsets': ndarray(n_onsets)
             Array of onsets, where each onset indexes into a particular window
         'novfn': ndarray(n_frames)
             Evaluation of the rnn audio novelty function at each audio
             frame, in time increments equal to self.hop_length
         'snovfn': ndarray(n_frames)
             Superflux audio novelty function at each audio frame,
             in time increments equal to self.hop_length
     }
     """
     from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
     from madmom.features.tempo import TempoEstimationProcessor
     from madmom.features.onsets import SpectralOnsetProcessor
     from madmom.audio.filters import LogarithmicFilterbank
     beatproc = DBNBeatTrackingProcessor(fps=fps)
     tempoproc = TempoEstimationProcessor(fps=fps)
     novfn = RNNBeatProcessor()(self.audio_file) # This step is the computational bottleneck
     beats = beatproc(novfn)
     tempos = tempoproc(novfn)
     onsets = np.array(np.round(beats*self.fs/float(self.hop_length)), dtype=np.int64)
     # Resample the audio novelty function to correspond to the 
     # correct hop length
     nframes = len(self.librosa_noveltyfn())
     novfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(novfn))/float(fps), novfn) 
     
     # For good measure, also compute and return superflux
     sodf = SpectralOnsetProcessor(onset_method='superflux', fps=fps, \
                         filterbank=LogarithmicFilterbank,\
                           num_bands=24, log=np.log10)
     snovfn = sodf(self.audio_file)
     snovfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(snovfn))/float(fps), snovfn) 
     return {'tempos':tempos, 'onsets':onsets, 'novfn':novfn, 'snovfn':snovfn}
示例#8
0
def getRNNDBNOnsets(filename, Fs, hopSize):
    """
    Call Madmom's implementation of RNN + DBN beat tracking
    :param filename: Path to audio file
    :param Fs: Sample rate
    :param hopSize: Hop size of each onset function value
    :returns (tempo, beats): Average tempo, numpy array
        of beat intervals in seconds
    """
    print("Computing madmom beats...")
    from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
    proc = DBNBeatTrackingProcessor(fps=100)
    act = RNNBeatProcessor()(filename)
    b = proc(act)
    tempo = 60 / np.mean(b[1::] - b[0:-1])
    beats = np.array(np.round(b * Fs / hopSize), dtype=np.int64)
    return (tempo, beats)
示例#9
0
def chordBeats(infile, outfile):
    print 'Loading audio file...', infile

    #proc = BeatTrackingProcessor(
    #    fps = 100,
    #    method='comb', min_bpm=40,
    #    max_bpm=240, act_smooth=0.09,
    #    hist_smooth=7, alpha=0.79)
    proc = DBNBeatTrackingProcessor(fps=100,
                                    method='comb',
                                    min_bpm=40,
                                    max_bpm=240)
    act = RNNBeatProcessor()(infile)
    beats = proc(act).astype('float32')
    audio = essentia.standard.MonoLoader(filename=infile)()
    # TODO: best partameters.
    parameters = {}
    stepsize, semitones = vamp.collect(audio,
                                       44100,
                                       "nnls-chroma:nnls-chroma",
                                       output="semitonespectrum",
                                       step_size=2048)["matrix"]
    np.savez(outfile, [len(audio)], beats, semitones)
示例#10
0
    def __init__(self):
        self.pa = pyaudio.PyAudio()
        self.c_count = 0
        using_callback = True
        self.buffer = collections.deque(maxlen=self.RATE * 14)
        self.rnn = RNNBeatProcessor(online=True, nn_files=[BEATS_LSTM[0]])
        self.act_proc = DBNBeatTrackingProcessor(fps=100,
                                                 min_bpm=80.0,
                                                 max_bpm=180.0)
        self.dcp = DeepChromaProcessor()
        self.decode = DeepChromaChordRecognitionProcessor()
        self.start_current_time = None
        if using_callback:
            self.stream = self.pa.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       output=True,
                                       frames_per_buffer=self.CHUNK,
                                       stream_callback=self.callback)
            print(self.pa.get_default_output_device_info())
            print(self.pa.get_default_input_device_info())
            self.t_start = time.time()
            beepsnd, _ = librosa.load('block.wav', sr=None)
            out1 = (beepsnd).tostring()
            #print(beepsnd.size, len(out1))
            self.beepsnd = out1
            self.Flag = False
            self.beep_count = 0
            while self.stream.is_active():
                if len(self.buffer) == self.RATE * 8:
                    print('14 sec')
                    print(self.time_info)
                    print(time.time() - self.t_start)
                    self.tmp = np.array(self.buffer)
                    self.buffer.clear()
                    print(time.time() - self.t_start)
                    chroma_thread = threading.Thread(target=self.chroma_rec,
                                                     args=())
                    chroma_thread.start()
                    #chord = chroma_thread.run()

                    tmp2 = self.rnn(self.tmp)
                    # tmp2 = librosa.onset.onset_strength(tmp,sr=self.RATE, hop_length = int(self.RATE / 100),max_size=1,aggregate=np.median, n_mels=256)
                    # tmp2 /= np.max(tmp2)
                    #t_axes = librosa.frames_to_time(np.arange(len(tmp2)),sr=self.RATE)
                    t_proc = time.time() - self.t_start
                    print(t_proc)
                    tmp3_2 = self.act_proc(tmp2)
                    tmp3_1 = 60 / np.mean(np.diff(tmp3_2))
                    # print(tmp3)
                    #tmp3_1,tmp3_2 = librosa.beat.beat_track(onset_envelope=tmp2, sr=self.RATE)
                    print('tempo is %f' % tmp3_1)
                    print('beat is ', tmp3_2)

                    t_proc = time.time() - self.t_start
                    chroma_thread.join()

                    print(t_proc)
                    t = threading.Timer(60. / tmp3_1 - t_proc, self.flagit, ())
                    t.daemon = True
                    t.start()
                    # self.stream.write(self.beepsnd)

                    print(time.time() - self.t_start)
                else:
                    time.sleep(0.001)

        else:
            self.stream = self.pa.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       output=True,
                                       frames_per_buffer=self.CHUNK)
            self.t_start = time.time()
            self.loop()
示例#11
0
def activation2downbeat(activation, fps=100):
    return DBNBeatTrackingProcessor(min_bpm=16.0, max_bpm=55.0,
                                    fps=100)(activation)
示例#12
0
    def __init__(self):
        self.pa = pyaudio.PyAudio()
        self.c_count = 0
        using_callback = True
        self.buffer = collections.deque(maxlen=self.RATE * 14)
        self.rnn = RNNBeatProcessor(online=True, nn_files=[BEATS_LSTM[0]])
        self.act_proc = DBNBeatTrackingProcessor(fps=100,
                                                 min_bpm=80.0,
                                                 max_bpm=180.0)
        self.dcp = DeepChromaProcessor()
        self.decode = DeepChromaChordRecognitionProcessor()
        self.start_current_time = None
        self.beep_count = 0
        source_path = 'tool'
        style_name = 'test_midi_folder'

        self.test = InstScheduler(FoxDot.lib.Clock, source_path)
        self.test.AddMidiFolder(style_name)
        self.test.Live_event(
        )  # Online random playing event determined by prosperity function
        self.test.set_tempo_pattern(
            4, 4
        )  # if the meta file is exist, calling this routine is not required
        if using_callback:
            self.stream = self.pa.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       output=True,
                                       frames_per_buffer=self.CHUNK,
                                       stream_callback=self.callback)
            print(self.pa.get_default_output_device_info())
            print(self.pa.get_default_input_device_info())
            self.t_start = time.time()
            beepsnd, _ = librosa.load('block.wav', sr=None)
            out1 = (beepsnd).tostring()
            #print(beepsnd.size, len(out1))
            self.beepsnd = out1
            self.Flag = False

            while self.stream.is_active():
                if len(self.buffer) == self.RATE * 14:
                    print('14 sec')
                    print(self.time_info)
                    print(time.time() - self.t_start)
                    self.tmp = np.array(self.buffer)
                    self.buffer.clear()
                    print(time.time() - self.t_start)
                    chroma_thread = threading.Thread(target=self.chroma_rec,
                                                     args=())
                    chroma_thread.start()
                    #chord = chroma_thread.run()

                    tmp2 = self.rnn(self.tmp)
                    # tmp2 = librosa.onset.onset_strength(tmp,sr=self.RATE, hop_length = int(self.RATE / 100),max_size=1,aggregate=np.median, n_mels=256)
                    # tmp2 /= np.max(tmp2)
                    #t_axes = librosa.frames_to_time(np.arange(len(tmp2)),sr=self.RATE)
                    t_proc = time.time() - self.t_start
                    print(t_proc)
                    tmp3_2 = self.act_proc(tmp2)
                    tmp3_1 = 60 / np.mean(np.diff(tmp3_2))
                    # print(tmp3)
                    #tmp3_1,tmp3_2 = librosa.beat.beat_track(onset_envelope=tmp2, sr=self.RATE)
                    print('tempo is %f' % tmp3_1)
                    print('beat is ', tmp3_2)

                    t_proc = time.time() - self.t_start
                    chroma_thread.join()

                    print(t_proc)
                    t = threading.Timer(60. / tmp3_1 - t_proc, self.flagit, ())
                    t.daemon = True
                    t.start()
                    print(int(tmp3_1))
                    self.test.StartInTime(
                        np.mean(np.diff(tmp3_2)) * 4 - (14 - tmp3_2[-1]) -
                        t_proc, int(tmp3_1))
                    break
                    # self.stream.write(self.beepsnd)

                    print(time.time() - self.t_start)
                else:
                    time.sleep(0.001)
            while (1):
                time.sleep(0.01)

        else:
            self.stream = self.pa.open(format=self.FORMAT,
                                       channels=self.CHANNELS,
                                       rate=self.RATE,
                                       input=True,
                                       output=True,
                                       frames_per_buffer=self.CHUNK)
            self.t_start = time.time()
            self.loop()
示例#13
0
def main():
    video_dir = 'dance_videos\\Danny Ocean - Baby I Wont.mp4'
    beat_dir = video_dir.strip('mp4') + 'npy'
    interval = [32, 36]  #in second
    REDU = True

    motion_base_dir = 'MyNao\\motion_base\\motion_base.json'
    if not os.path.exists(motion_base_dir):
        motion_base = {}
        with open(motion_base_dir, 'w') as f:
            json.dump(motion_base, f)
    with open(motion_base_dir, 'r') as f:
        motion_base = json.load(f)
    if REDU:
        pose_save_dir = 'MyNao\\motion_glance\\' + str(len(motion_base) - 1)
    else:
        pose_save_dir = 'MyNao\\motion_glance\\' + str(len(motion_base))
    if not os.path.exists(pose_save_dir):
        os.mkdir(pose_save_dir)

    motion = {}
    motion['feature'] = {}
    motion['feature']['bps'] = [None]
    motion['feature']['symmetric'] = False
    motion['feature']['repeat'] = True
    motion['frame'] = {}
    #args = parse_args()
    #cfg.set_args(args.gpu_ids)
    cudnn.fastest = True
    cudnn.benchmark = True
    cudnn.deterministic = False
    cudnn.enabled = True

    time_0 = time.time()
    tester = Tester(24)

    ##loading 3D pose estimation model
    tester._make_model()

    time_1 = time.time()
    print('loading integral pose model elapse:', round(time_1 - time_0, 2),
          's')

    ##loading yolo detector
    detector = YOLOv3(
        model_def=
        "3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\config\\yolov3.cfg",
        class_path=
        "3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\data\\coco.names",
        weights_path=
        "3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\weights\\yolov3.weights",
        classes=('person', ),
        max_batch_size=16,
        device=torch.device('cuda:{}'.format(cfg.gpu_ids[0])))
    print('loading yolo elapse:', round(time.time() - time_1, 2), 's')
    skeleton = ((0, 7), (7, 8), (8, 9), (9, 10), (8, 11), (11, 12), (12, 13),
                (8, 14), (14, 15), (15, 16), (0, 1), (1, 2), (2, 3), (0, 4),
                (4, 5), (5, 6))
    fig = plt.figure(figsize=(10, 10))
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)
    ])
    ##load model

    if not os.path.exists(video_dir.strip('mp4') + 'wav'):
        videoclip = VideoFileClip(video_dir)
        audioclip = videoclip.audio
        audioclip.write_audiofile(video_dir.strip('mp4') + 'wav')

    video = cv2.VideoCapture(video_dir)
    if not os.path.exists(beat_dir):
        time_2 = time.time()
        videoclip = VideoFileClip(video_dir)
        audioclip = videoclip.audio
        beat_activation = RNNBeatProcessor()(video_dir.strip('mp4') + 'wav')
        processor = DBNBeatTrackingProcessor(fps=100)
        beats = processor(beat_activation)
        frames_at_beat = (beats / audioclip.duration *
                          video.get(cv2.CAP_PROP_FRAME_COUNT)).astype(int)
        print('extracting beat sequence elapse:',
              round(time.time() - time_2, 2), 's')
        np.save(beat_dir, frames_at_beat)
    frames_at_beat = np.load(beat_dir).tolist()

    for beat in frames_at_beat:
        if interval[0] * video.get(cv2.CAP_PROP_FPS) > beat:
            continue
        else:
            interval[0] = beat
            break
    for beat in frames_at_beat:
        if interval[1] * video.get(cv2.CAP_PROP_FPS) > beat:
            continue
        else:
            interval[1] = beat
            break

    video.set(1, interval[0])
    frame = 0
    next_beat = 0
    last_beat = 0
    num_beat = 0
    num_frame_between_beats = []
    with torch.no_grad():
        while True:
            time_start = time.time()
            current_frame = video.get(cv2.CAP_PROP_POS_FRAMES)
            ret_val, raw_image = video.read()
            if current_frame == interval[1]:
                break
            input_img = raw_image.copy()
            ##using yolo to get human bounding box
            detections = detector.predict_single(input_img)
            # if not detections.cpu().numpy().all():
            #     detections = (0,0,input_img.shape[1],input_img.shape[0],1,1)
            #     print('not detected')

            if detections is None:
                detections = np.array(
                    [[0, 0, input_img.shape[1], input_img.shape[0], 1, 1, 1]])
                print('not detected')
            elif detections.size()[0] == 0:
                detections = np.array(
                    [[0, 0, input_img.shape[1], input_img.shape[0], 1, 1, 1]])
                print('not detected')
            last_conf = 0
            last_last_conf = 0
            for i, (x1_pred, y1_pred, x2_pred, y2_pred, conf, cls_conf,
                    cls_pred) in enumerate(detections):
                if conf.item() > last_conf:
                    x1 = int(round(x1_pred.item())) - 40
                    x2 = int(round(x2_pred.item())) + 40
                    y1 = int(round(y1_pred.item())) - 20
                    y2 = int(
                        round(y2_pred.item())
                    ) + 20  #for getting a larger bounding box to cover the full body, in order to get more accurate pose
                    last_last_conf = last_conf
                    last_conf = conf.item()
                print(last_conf, last_last_conf)
                if last_last_conf != 0:
                    sys.exit()
            #print(x1, x2, y1, y2, last_conf)
            img_patch = (input_img[y1:y2,
                                   x1:x2, ::-1]).copy().astype(np.float32)
            input_patch = cv2.resize(img_patch, (cfg.input_shape))

            input_patch = transform(input_patch).unsqueeze(0)
            coord_out = tester.model(input_patch)
            print('Running model time:', round(time.time() - time_start, 2),
                  's')

            motion['frame'][frame] = {}
            if frame + interval[0] in frames_at_beat:
                motion['frame'][frame]['next_beat'] = 0
                motion['frame'][frame]['last_beat'] = 0
                #frames_at_beat.remove(frame)
                next_beat = frames_at_beat.index(frame + interval[0]) + 1
                last_beat = frames_at_beat.index(frame + interval[0])
                num_beat += 1
                num_frame_between_beats.append(frames_at_beat[next_beat] -
                                               frames_at_beat[last_beat])
                print('Record key frame with beat:', current_frame)
            else:
                motion['frame'][frame]['next_beat'] = frames_at_beat[
                    next_beat] - (frame + interval[0])
                motion['frame'][frame]['last_beat'] = (
                    frame + interval[0]) - frames_at_beat[last_beat]

            coord_out = coord_out.cpu().numpy()
            coord_out_resize = coord_out * np.array([
                img_patch.shape[1] / cfg.input_shape[1],
                img_patch.shape[0] / cfg.input_shape[0], 1
            ])

            for idx in range(coord_out_resize.shape[1] - 1):
                motion['frame'][frame][idx] = (
                    coord_out_resize[0][idx][0].item(),
                    coord_out_resize[0][idx][2].item(),
                    coord_out_resize[0][idx][1].item())

            vis = True
            vis_3d = False
            if vis:
                tmpimg = input_patch[0].cpu().numpy()
                tmpimg = tmpimg * np.array(cfg.pixel_std).reshape(
                    3, 1, 1) + np.array(cfg.pixel_mean).reshape(3, 1, 1)
                tmpimg = (tmpimg).astype(np.uint8)
                tmpimg = tmpimg[::-1, :, :]
                tmpimg = np.transpose(tmpimg, (1, 2, 0)).copy()
                tmpkps = np.zeros((3, 18))
                tmpkps[:2, :] = coord_out[0, :, :2].transpose(
                    1, 0) / cfg.output_shape[0] * cfg.input_shape[0]
                tmpkps[2, :] = 1
                tmpimg = vis_keypoints(tmpimg, tmpkps, skeleton)
                tmpimg = cv2.resize(tmpimg,
                                    (img_patch.shape[1], img_patch.shape[0]))
                file_name = pose_save_dir + '\\{0}.png'.format(
                    str(frame).zfill(4))
                cv2.imwrite(file_name, tmpimg)
            if vis_3d:
                #coord_out = coord_out.cpu().numpy()
                #coord_out = coord_out * np.array([img_patch.shape[1]/cfg.input_shape[1], img_patch.shape[0]/cfg.input_shape[0], 1])
                pred = coord_out_resize.squeeze(
                )  #remove first batch dimension

                ax = plt.subplot('121', projection='3d')
                plt.axis('off')
                show3D_pose(pred, ax, skeleton, radius=40)
                file_name = pose_save_dir + '\\{0}.png'.format(
                    str(frame).zfill(4))
                plt.savefig(file_name)
                # cv2.imwrite(file_name, tmpimg)

            frame += 1
            print('Processing Frame:', round(time.time() - time_start, 2), 's')

        motion['feature']['fpb'] = np.mean(num_frame_between_beats)
        if REDU:
            motion_base[len(motion_base) - 1] = motion
        else:
            motion_base[len(motion_base)] = motion
        #with open(motion_base_dir, 'w') as f:
        #    json.dump(motion_base, f)
    print('done with', num_beat + 1,
          'beats! (This should be even for a normal dance)')
    print('num_frame between beats:')
    print(num_frame_between_beats)
示例#14
0
import sys
import bmaFunctions
import numpy
from madmom.features.chords import DeepChromaChordRecognitionProcessor
from madmom.audio.chroma import DeepChromaProcessor
from madmom.features.beats import DBNBeatTrackingProcessor
from madmom.features.beats import RNNBeatProcessor

#Setting up Deep Chroma Chord Recognition Processor
dcp = DeepChromaProcessor()
decode = DeepChromaChordRecognitionProcessor()
chroma = dcp(sys.argv[1])
chords = decode(chroma)

#Setting up Dynamic Baysian Network Tracking Processor
proc = DBNBeatTrackingProcessor(fps=100)
act = RNNBeatProcessor()(sys.argv[1])
beats = proc(act)

#calculating msi
beatsArray = numpy.array(beats)
msi = numpy.mean(beatsArray[1:] - beatsArray[:-1]) * 1000

beatmap = bmaFunctions.assignKeys(beats, chords, sys.argv[3])
if msi < 360:
    del beatmap[1::2]

#generating and printing beatmap
bmaFunctions.fancyPrint(beatmap, msi, sys.argv[2])

#TODO: eliminate trailing Ns
示例#15
0
def main():
    video_list = ['Cant stop the feeling - Justin Timberlake - Easy Dance for Kids', 'Dance like yo daddy', 'Danny Ocean - Baby I Wont', 'Si una vez - If I Once', 'Vaiven - MegaMix']
    for video in video_list:
        video_dir = 'dance_videos\\' + video + '.mp4'
        beat_dir = video_dir.strip('mp4') + 'npy'

        cudnn.fastest = True
        cudnn.benchmark = True
        cudnn.deterministic = False
        cudnn.enabled = True

        time_0 = time.time()
        tester = Tester(24)

        ##loading 3D pose estimation model
        tester._make_model()

        time_1 = time.time()
        print('loading integral pose model elapse:',round(time_1-time_0,2),'s')

        ##loading yolo detector
        detector = YOLOv3( model_def="3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\config\\yolov3.cfg",
                            class_path="3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\data\\coco.names",
                            weights_path="3DMPPE_POSENET_RELEASE\\common\\detectors\\yolo\\weights\\yolov3.weights",
                            classes=('person',),
                            max_batch_size=16,
                            device=torch.device('cuda:{}'.format(cfg.gpu_ids[0])))
        print('loading yolo elapse:',round(time.time()-time_1,2),'s')
        skeleton = ( (0, 7), (7, 8), (8, 9), (9, 10), (8, 11), (11, 12), (12, 13), (8, 14), (14, 15), (15, 16), (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6) )
        #fig = plt.figure(figsize=(10,10)) 
        transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)]
                                )
        
        if not os.path.exists(video_dir.strip('mp4')+'wav'):
            videoclip = VideoFileClip(video_dir)
            audioclip = videoclip.audio
            audioclip.write_audiofile(video_dir.strip('mp4')+'wav')

        video = cv2.VideoCapture(video_dir)
        if not os.path.exists(beat_dir):
            time_2 = time.time()
            videoclip = VideoFileClip(video_dir)
            audioclip = videoclip.audio
            beat_activation = RNNBeatProcessor()(video_dir.strip('mp4')+'wav')
            processor = DBNBeatTrackingProcessor(fps=100)
            beats = processor(beat_activation)
            frames_at_beat = (beats/audioclip.duration*video.get(cv2.CAP_PROP_FRAME_COUNT)).astype(int)
            print('extracting beat sequence elapse:', round(time.time()-time_2, 2), 's')
            np.save(beat_dir, frames_at_beat)
        frames_at_beat = np.load(beat_dir).tolist()

        ##########################################
        dance_primitives_dir = '.\\danceprimitives_trial'
        if not os.path.exists(dance_primitives_dir):
            os.mkdir(dance_primitives_dir)
        motion_index = len(os.listdir(dance_primitives_dir))
        for i in range(len(frames_at_beat)-1):

            motion_dir = os.path.join(dance_primitives_dir, '{0}'.format(str(motion_index).zfill(5)))
            if not os.path.exists(motion_dir):
                os.mkdir(motion_dir)

            start = frames_at_beat[i]
            end =frames_at_beat[i+1]
            dance_primitive = np.empty((0, 17*3)) # for motion control
            #dance_primitive_norm = np.empty((0, 17*3)) # for motion clustering
            video.set(1, start)
            jump_flag = 0
            frame = 0
            with torch.no_grad():
                time_start = time.time()
                while True:
                    current_frame = video.get(cv2.CAP_PROP_POS_FRAMES)
                    ret_val, raw_image = video.read()
                    if current_frame == end:
                        break
                    ##using yolo to get human bounding box
                    input_img = raw_image.copy()
                    detections = detector.predict_single(input_img)
                    if detections is None or detections.size()[0] == 0:
                        jump_flag = 1
                        break
                    last_conf = 0
                    for i, (x1_pred, y1_pred, x2_pred, y2_pred, conf, cls_conf, cls_pred) in enumerate(detections):
                        if conf.item() > last_conf:
                            x1 = max(int(round(x1_pred.item())) - 40, 0)
                            x2 = min(int(round(x2_pred.item())) + 40, input_img.shape[1]-1)
                            y1 = max(int(round(y1_pred.item())) - 20, 0)
                            y2 = min(int(round(y2_pred.item())) + 20, input_img.shape[0]-1)   #for getting a larger bounding box to cover the full body, in order to get more accurate pose
                            last_conf = conf.item()
                    img_patch = (input_img[y1:y2, x1:x2, ::-1]).copy().astype(np.float32)
                    ##using ResPoseNet to get 3D human pose
                    input_patch = cv2.resize(img_patch,(cfg.input_shape))
                    input_patch = transform(input_patch).unsqueeze(0)
                    coord_out = tester.model(input_patch).cpu().numpy() #dimention: 1 X 18 X 3, where '3' refers to x, z, y in sequence.
                    #show_pose(input_patch, img_patch, coord_out, skeleton, motion_dir, frame)
                    coord_out_resize = coord_out * np.array([img_patch.shape[1]/cfg.input_shape[1], img_patch.shape[0]/cfg.input_shape[0], 1]) #transform to original scale
                    coord_out = coord_out_resize[:, :-1, :] # neglect the key point for "throx"
                    #coord_out_norm = (coord_out-np.mean(coord_out, axis=1))/np.std(coord_out, axis=1)
                    dance_primitive = np.vstack((dance_primitive, np.reshape(coord_out[0], -1)))
                    #dance_primitive_norm = np.vstack((dance_primitive_norm, np.reshape(coord_out_norm[0], -1)))
                    frame += 1
                print('Processing Time Elapse:', round(time.time()-time_start,2), 's')

            if jump_flag == 1:
                continue

            #norm_sample = np.empty((0, 17*3))
            #num_sample = 10
            #print(dance_primitive_norm.shape[0])
            #sample_step = (dance_primitive_norm.shape[0]-1)/(num_sample-1)
            #for i in range(num_sample):
            #    norm_sample = np.vstack((norm_sample, dance_primitive_norm[round(i * sample_step)]))
            
            #print(norm_sample.shape)
            print(dance_primitive.shape)
            #np.save(os.path.join(motion_dir, 'dance_motion_normlized_'+ str(motion_index)), norm_sample)
            np.save(os.path.join(motion_dir, 'dance_motion_'+ str(motion_index)), dance_primitive)

            motion_index+=1



    ###########################################
    sys.exit()
    video.set(1, interval[0])
    frame=0
    next_beat = 0
    last_beat = 0
    num_beat = 0
    num_frame_between_beats = []
    with torch.no_grad():
        while True:
            time_start = time.time()
            current_frame = video.get(cv2.CAP_PROP_POS_FRAMES)
            ret_val, raw_image = video.read()
            if current_frame == interval[1]:
                break
            input_img = raw_image.copy()
                    ##using yolo to get human bounding box
            detections = detector.predict_single(input_img)
            # if not detections.cpu().numpy().all():
            #     detections = (0,0,input_img.shape[1],input_img.shape[0],1,1)
            #     print('not detected')

            if detections is None:
                detections = np.array([[0,0,input_img.shape[1],input_img.shape[0],1,1,1]])
                print('not detected')
            elif detections.size()[0] == 0:
                detections = np.array([[0,0,input_img.shape[1],input_img.shape[0],1,1,1]])
                print('not detected')
            last_conf = 0
            last_last_conf = 0
            for i, (x1_pred, y1_pred, x2_pred, y2_pred, conf, cls_conf, cls_pred) in enumerate(detections):
                if conf.item() > last_conf:
                    x1 = int(round(x1_pred.item())) - 40
                    x2 = int(round(x2_pred.item())) + 40
                    y1 = int(round(y1_pred.item())) - 20
                    y2 = int(round(y2_pred.item())) + 20    #for getting a larger bounding box to cover the full body, in order to get more accurate pose
                    last_last_conf = last_conf
                    last_conf = conf.item()
                print(last_conf, last_last_conf)
                if last_last_conf != 0:
                    sys.exit()
            #print(x1, x2, y1, y2, last_conf)
            img_patch = (input_img[y1:y2, x1:x2, ::-1]).copy().astype(np.float32)
            input_patch = cv2.resize(img_patch,(cfg.input_shape))

            input_patch = transform(input_patch).unsqueeze(0)
            coord_out = tester.model(input_patch)
            print('Running model time:',round(time.time()-time_start,2),'s')

            motion['frame'][frame] = {}
            if frame+interval[0] in frames_at_beat:
                motion['frame'][frame]['next_beat'] = 0
                motion['frame'][frame]['last_beat'] = 0
                #frames_at_beat.remove(frame)
                next_beat = frames_at_beat.index(frame+interval[0]) + 1
                last_beat = frames_at_beat.index(frame+interval[0])
                num_beat += 1
                num_frame_between_beats.append(frames_at_beat[next_beat] - frames_at_beat[last_beat])
                print('Record key frame with beat:', current_frame)
            else:
                motion['frame'][frame]['next_beat'] = frames_at_beat[next_beat] - (frame+interval[0])
                motion['frame'][frame]['last_beat'] = (frame+interval[0]) - frames_at_beat[last_beat]

            coord_out = coord_out.cpu().numpy()
            coord_out_resize = coord_out * np.array([img_patch.shape[1]/cfg.input_shape[1], img_patch.shape[0]/cfg.input_shape[0], 1])

            for idx in range(coord_out_resize.shape[1]-1):
                motion['frame'][frame][idx]=(coord_out_resize[0][idx][0].item(), coord_out_resize[0][idx][2].item(), coord_out_resize[0][idx][1].item())
            
            vis = True
            vis_3d = False
            if vis:
                    tmpimg = input_patch[0].cpu().numpy()
                    tmpimg = tmpimg * np.array(cfg.pixel_std).reshape(3,1,1) + np.array(cfg.pixel_mean).reshape(3,1,1)
                    tmpimg = (tmpimg).astype(np.uint8)
                    tmpimg = tmpimg[::-1, :, :]
                    tmpimg = np.transpose(tmpimg,(1,2,0)).copy()
                    tmpkps = np.zeros((3,18))
                    tmpkps[:2,:] = coord_out[0,:,:2].transpose(1,0) / cfg.output_shape[0] * cfg.input_shape[0]
                    tmpkps[2,:] = 1
                    tmpimg = vis_keypoints(tmpimg, tmpkps, skeleton)
                    tmpimg = cv2.resize(tmpimg,(img_patch.shape[1],img_patch.shape[0]))
                    file_name = pose_save_dir+'\\{0}.png'.format(str(frame).zfill(4))
                    cv2.imwrite(file_name, tmpimg)
            if vis_3d:
                #coord_out = coord_out.cpu().numpy()
                #coord_out = coord_out * np.array([img_patch.shape[1]/cfg.input_shape[1], img_patch.shape[0]/cfg.input_shape[0], 1])
                pred=coord_out_resize.squeeze() #remove first batch dimension

                ax=plt.subplot('121',projection='3d')
                plt.axis('off')
                show3D_pose(pred,ax,skeleton,radius=40)
                file_name = pose_save_dir + '\\{0}.png'.format(str(frame).zfill(4))
                plt.savefig(file_name)
                # cv2.imwrite(file_name, tmpimg)

            frame+=1
            print('Processing Frame:',round(time.time()-time_start,2),'s')

        motion['feature']['fpb'] = np.mean(num_frame_between_beats)
        if REDU:
            motion_base[len(motion_base)-1] = motion
        else:
            motion_base[len(motion_base)] = motion
        #with open(motion_base_dir, 'w') as f:
        #    json.dump(motion_base, f)
    print('done with', num_beat + 1, 'beats! (This should be even for a normal dance)')
    print('num_frame between beats:')
    print(num_frame_between_beats)
示例#16
0
def main():
    """DBNBeatTracker"""

    # define parser
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='''
    The DBNBeatTracker.py program detects all beats in an audio file according to
    the method described in:

    "A Multi-Model Approach to Beat Tracking Considering Heterogeneous Music
     Styles"
    Sebastian Böck, Florian Krebs and Gerhard Widmer.
    Proceedings of the 15th International Society for Music Information
    Retrieval Conference (ISMIR), 2014.

    It does not use the multi-model (Section 2.2.) and selection stage (Section
    2.3), i.e. this version corresponds to the pure DBN version of the
    algorithm for which results are given in Table 2.

    Instead of the originally proposed state space and transition model for the
    DBN, the following is used:

    "An Efficient State Space Model for Joint Tempo and Meter Tracking"
    Florian Krebs, Sebastian Böck and Gerhard Widmer.
    Proceedings of the 16th International Society for Music Information
    Retrieval Conference (ISMIR), 2015.

    This program can be run in 'single' file mode to process a single audio
    file and write the detected beats to STDOUT or the given output file.

      $ DBNBeatTracker.py single INFILE [-o OUTFILE]

    If multiple audio files should be processed, the program can also be run
    in 'batch' mode to save the detected beats to files with the given suffix.

      $ DBNBeatTracker.py batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES

    If no output directory is given, the program writes the files with the
    detected beats to the same location as the audio files.

    The 'pickle' mode can be used to store the used parameters to be able to
    exactly reproduce experiments.

    ''')

    # version
    p.add_argument('--version',
                   action='version',
                   version='DBNBeatTracker.py.2016')
    # input/output options
    io_arguments(p, output_suffix='.beats.txt', online=True)
    ActivationsProcessor.add_arguments(p)
    # signal processing arguments
    SignalProcessor.add_arguments(p, norm=False, gain=0)
    # peak picking arguments
    DBNBeatTrackingProcessor.add_arguments(p)
    NeuralNetworkEnsemble.add_arguments(p, nn_files=None)

    # parse arguments
    args = p.parse_args()

    # set immutable arguments
    args.fps = 100

    # print arguments
    if args.verbose:
        print(args)

    # input processor
    if args.load:
        # load the activations from file
        in_processor = ActivationsProcessor(mode='r', **vars(args))
    else:
        # use a RNN to predict the beats
        in_processor = RNNBeatProcessor(**vars(args))

    # output processor
    if args.save:
        # save the RNN beat activations to file
        out_processor = ActivationsProcessor(mode='w', **vars(args))
    else:
        # track the beats with a DBN
        beat_processor = DBNBeatTrackingProcessor(**vars(args))
        # output handler
        from madmom.utils import write_events as writer
        # sequentially process everything
        out_processor = [beat_processor, writer]

    # create an IOProcessor
    processor = IOProcessor(in_processor, out_processor)
    # and call the processing function
    args.func(processor, **vars(args))
示例#17
0
def activation2beat(activation, fps=100):
    return DBNBeatTrackingProcessor(fps=100)(activation)