示例#1
0
def main():
    # man 900 middle 750 default 500 frame_length = 900
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--speed', type=float, default=1.)
    parser.add_argument('-t', '--time', type=float, default=-10.)
    parser.add_argument('-o', '--output', type=str, default='output.wav')
    parser.add_argument('-i', '--input', type=str, default='input.wav')

    args = parser.parse_args()

    parameters = {}

    input_filename = args.input
    output_filename = args.output

    if not os.path.isfile(input_filename):
        raise RuntimeError('no input file')

    x, fs = core.load(input_filename)
    #f0, sp, ap = pw.wav2world(x, fs)
    frame_length = 1500  #100000 // int(calculateF0(f0)) // 2 * 2
    y, sr = core.load(input_filename, sr=fs)

    onset_frames = onset.onset_detect(x,
                                      sr=sr,
                                      wait=1,
                                      pre_avg=1,
                                      post_avg=1,
                                      pre_max=1,
                                      post_max=1)
    onset_times = librosa.frames_to_time(onset_frames)

    plt.plot(y)
    for i in onset_times:
        plt.plot([i * 22050, i * 22050], [-1, 1], color="red")

    S = librosa.stft(x)
    logS = librosa.amplitude_to_db(abs(S))

    plt.savefig('woman.png')

    if args.time < 0:
        parameters['origin_time'] = core.get_duration(y, sr)
        parameters['convert_time'] = parameters['origin_time'] / args.speed
    else:
        parameters['origin_time'] = core.get_duration(y, sr)
        parameters['convert_time'] = args.time
    parameters['sample_rate'] = sr
    parameters['frame_length'] = int(fs / 22050 * frame_length)

    #if parameters['convert_time'] / parameters['origin_time'] > 0.8:
    convert_upper_threshold(input_filename, output_filename, parameters)
示例#2
0
def upload_tracks(request, project_id):
    message = ""
    project = Project.objects.get(id=project_id)
    if request.method == "POST":
        form = MultipleFileFieldForm(request.POST, request.FILES)
        current_files = project.audiotrack_set.values_list("name", flat=True)
        if form.is_valid():
            files = request.FILES.getlist("file_field")
            non_audio = 0
            duplicate = 0
            success = 0
            for f in files:
                if not f.name.endswith((".mp3", ".ogg", ".wav", ".flac")):
                    non_audio += 1
                    continue
                if f.name in current_files:
                    duplicate += 1
                    continue
                local_file = os.path.join(settings.MEDIA_ROOT, f.name)
                with open(local_file, 'wb+') as destination:
                    for chunk in f.chunks():
                        destination.write(chunk)
                try:
                    duration = get_duration(filename=local_file)
                except Exception:
                    duration = 0

                AudioTrack.objects.create(
                    name=f.name,
                    file=os.path.join(settings.MEDIA_URL, f.name),
                    format=os.path.splitext(f.name)[1][1:],
                    project=project,
                    duration=duration)
                success += 1

            message = f"{success} files successfully uploaded"
            if non_audio:
                message += f", {non_audio} non-audio files rejected"
            if duplicate:
                message += f", {duplicate} duplicate files rejected"

    form = MultipleFileFieldForm()
    return render(request, 'annotate/upload.html', {
        'form': form,
        'project': project,
        'message': message
    })
示例#3
0
def get_next_clip(all_files):
    # select only clips that are available
    all_files = [a for a in all_files if a["elapsed"] is False]
    # if no more files are available, exit
    if len(all_files) == 0:
        return None

    # randomly pick a file
    selected_idx = np.random.randint(0, len(all_files))
    selected_file = all_files[selected_idx]
    file_path = selected_file["file_path"]

    # load only part of the clip

    clip, sr = librosa.load(file_path,
                            sr=args.sample_rate,
                            offset=selected_file["time_elapsed"].seconds,
                            duration=args.max_clip_len)

    assert sr == args.sample_rate

    clip_len = timedelta(seconds=get_duration(clip, sr=sr))

    if clip_len.seconds == 0:
        selected_file["elapsed"] = True
        return timedelta(seconds=0, hours=0, minutes=0)

    elapsed_time = selected_file["time_elapsed"].seconds
    # print("Clip ID: ", elapsed_time, "Clip Len:", clip_len,
    #       "Time Elapsed: ", selected_file["time_elapsed"])
    selected_file["time_elapsed"] += clip_len

    split_clip_id = "{}_{}_{}".format(selected_file["book_id"],
                                      selected_file["clip_id"], elapsed_time)
    dest_path = os.path.join(output_dir, "{}.wav".format(split_clip_id))

    log.debug("Saving clip to {}".format(dest_path))

    librosa.output.write_wav(dest_path, clip, args.sample_rate)

    return clip_len
 def extract_feature(self, song_fname):
     song, _ = self.load_song(song_fname)
     duration = int(get_duration(song))
     n = song.shape[0]
     num_windows = int(np.floor((duration - self.wsize) / self.stride)) + 20
     feats = np.zeros((num_windows, self.feat_dim), dtype=np.float32)
     start = 0
     counter = 0
     while True:
         end = start + int(self.wsize * self.sr)
         if end >= n:
             break
         window = song[start:end]
         feat_ = self._calc_all_feat(window)
         feats[counter] = feat_
         counter += 1
         start += int(self.stride * self.sr)
         if start >= n:
             break
     if counter < num_windows:
         feats = feats[:counter, :]
     return feats
示例#5
0
            start = datetime.now()

            # Get speaker ID
            split, _, speaker, _ = fpath.split(os.sep)[-4:]
            if speaker in all_speakers:
                speaker_id = all_speakers.index(speaker)
            else:
                speaker_id = len(all_speakers)
                all_speakers.append(speaker)

            # Get duration
            if debug:
                print('\n==== DEBUG ====')
                print(fpath)
            try:
                duration = str(timedelta(seconds=get_duration(filename=fpath)))
                if debug:
                    print('File duration: {}'.format(duration))
            except:
                print('UNABLE TO GET DURATION')
                raise

            # Chunk into segments with speech audio
            times, segs = VAD_chunk(2, fpath)
            if segs == []:
                print('No voice activity detected')
                continue
            if debug:
                print('{} - {:,} segments'.format(datetime.now() - start,
                                                  len(segs)))
    transforms.ToTensor(), normalize
])

categories = utils.categories  ### ImageNet Categories

places_categories = placesCNN_basic.classes  ### Places Categories

fps = 24
nb_frames = 1

nbsec = 1.49

n_obj = 3

beg_film = 0
end_film = np.floor(get_duration(filename=wavfile))

allpreds = []
onsets = []

model_imagenet = resnet18(pretrained=True)
model_imagenet.eval()

model_places = placesCNN_basic.model.eval()

### Define and register hook for extracting output feature map
places_fm = []
places_proba = []


def get_fm_places(m, i, o):
示例#7
0
#!/usr/bin/env python

from sklearn.metrics.pairwise import pairwise_distances
import scipy
from librosa.core import get_duration
import numpy as np
import h5py
from scipy.fftpack import dct, idct
from utils import (WINDOW_SIZE, HOP_SIZE, SAMPLE_RATE, COEFS, TIMBRE_GROUP)

ONE_FRAME = get_duration(sr=SAMPLE_RATE,
                         n_fft=WINDOW_SIZE,
                         hop_length=HOP_SIZE,
                         S=np.zeros((1, 2)))


def beat_similarity(vecs):
    return 1.0 - pairwise_distances(vecs, metric='cosine')


def normed_diags(B, max_lag):
    B_diags = np.asarray([B.trace(i) for i in range(max_lag)])
    B_diags -= np.min(B_diags)
    return B_diags / np.max(B_diags)


def fft_autocorrelate(S):
    B_fft = scipy.signal.fftconvolve(S, S[::-1, ::-1], mode='full')
    B_fft = B_fft[S.shape[0]:, S.shape[1]:]
    return B_fft
示例#8
0
        with open(os.path.join(meta_path, cl) + ".json") as reader:
            meta_data = json.load(reader)

        meta_data = {d["book_id"]: d for d in meta_data}

        book_ids = defaultdict(
            lambda: timedelta(hours=0, minutes=0, seconds=0))
        author_ids = defaultdict(
            lambda: timedelta(hours=0, minutes=0, seconds=0))

        for idx, file_id in enumerate(all_files):
            file_path = os.path.join(cl_path, file_id)
            sound, sample_rate = librosa.load(file_path, sr=None)
            assert sample_rate == assigned_sample_rate
            current_time += timedelta(
                seconds=get_duration(sound, sr=sample_rate))

            book_id = file_id.split("_")[0]
            book_ids[book_id] += timedelta(
                seconds=get_duration(sound, sr=sample_rate))
            author_ids[meta_data[book_id]["reader_url"]] += timedelta(
                seconds=get_duration(sound, sr=sample_rate))

            if idx % 100 == 0:
                print("\t{} of {}".format(idx, len(all_files)))

        print("{} Books".format(cl))
        for book_id in book_ids:
            print("\t{}: Time: {}".format(book_id, book_ids[book_id]))
            book_times.append({
                "language": cl,
# collect all wav files
cwd = os.getcwd()
stations = filter(os.path.isdir, os.listdir(cwd))
stations = [f for f in stations if not f.startswith('.')]

audio_paths = []
for station in stations:
    data_path = f'{cwd}/{station}'
    files = os.listdir(data_path)
    wav_names = [f for f in files if f.endswith('.wav')]
    wav_paths = [f'{data_path}/{f}' for f in wav_names]
    audio_paths.extend(wav_paths)

print(f'we found {len(audio_paths)} audio files in total')
lens = [get_duration(filename=f) for f in audio_paths]
print(f'their total play time amounts to {sum(lens) / 60 / 60:.2f} hours')

# collect all aup files
labeled_data = []
for station in stations:
    data_path = f'{cwd}/{station}'
    files = os.listdir(data_path)
    audacity_projects = [f for f in files if f.endswith('.aup')]
    project_paths = [f'{data_path}/{aup}' for aup in audacity_projects]
    station_data = [extract_aup(i, data_path, station, verbose=0) for i in project_paths]
    labeled_data.extend(station_data)

print(f'{len(labeled_data)} instances have labels provided')

# infer amount of detections