Пример #1
0
def test_initialize():
    d = initialize("orchset")
    assert isinstance(d, core.Dataset)
    assert d.name == "orchset"

    with pytest.raises(ValueError):
        initialize("asdfasdfasdfa")
Пример #2
0
def test_dataset_errors():
    with pytest.raises(ValueError):
        mirdata.initialize("not_a_dataset")

    d = mirdata.initialize("orchset")
    d._track_class = None
    with pytest.raises(AttributeError):
        d.track("asdf")

    with pytest.raises(AttributeError):
        d.multitrack("asdf")

    with pytest.raises(AttributeError):
        d.load_tracks()

    with pytest.raises(AttributeError):
        d.load_multitracks()

    with pytest.raises(AttributeError):
        d.choice_track()

    with pytest.raises(AttributeError):
        d.choice_multitrack()

    d = mirdata.initialize("acousticbrainz_genre")
    with pytest.raises(FileNotFoundError):
        d._index

    d = mirdata.initialize("phenicx_anechoic")
    with pytest.raises(ValueError):
        d._multitrack("a")
Пример #3
0
def test_dataset():
    dataset = mirdata.initialize("guitarset")
    assert isinstance(dataset, core.Dataset)

    dataset = mirdata.initialize("rwc_jazz")
    assert isinstance(dataset, core.Dataset)

    dataset = mirdata.initialize("ikala")
    assert isinstance(dataset, core.Dataset)

    print(dataset)  # test that repr doesn't fail
Пример #4
0
def dataset(test_dataset):
    if test_dataset == "":
        return None
    elif test_dataset not in mirdata.DATASETS:
        raise ValueError("{} is not a dataset in mirdata".format(test_dataset))
    data_home = os.path.join("tests/resources/mir_datasets_full", test_dataset)
    return mirdata.initialize(test_dataset, data_home)
Пример #5
0
def main():
    DATASETS = ["rwc_classical"]
    ALL_INDEXES = ["rwc_classical_index.json"]
    # Download metadata from all datasets for computing metadata checksums
    for module in DATASETS:
        if module not in ["dali", "beatles", "groove_midi"]:
            dataset = mirdata.initialize(module)
            if dataset.remotes is not None:
                dataset.download(
                    partial_download=[
                        "metadata" if "metadata" in dataset.remotes else key
                        for key in dataset.remotes
                        if key is not "audio"
                        and "training" not in key
                        and "testing" not in key
                    ]
                )

    # Update index to new format
    print("Updating indexes...\n")
    update_index(ALL_INDEXES)
    # Check new indexes are shaped as expected
    print("Quick check on datasets...\n")
    test_index(DATASETS)
    test_track_load(DATASETS)
Пример #6
0
def update_index(all_indexes):
    """Function to update indexes to new format.
    Parameters
    ----------
    all_indexes (list): list of all current dataset indexes


    """

    for index_name in tqdm(all_indexes):
        module = index_name.replace("_index.json", "")

        # load old index
        old_index = mirdata.initialize(module)._index

        # avoid modifying when running multiple times
        if "tracks" in old_index.keys():
            old_index = old_index["tracks"]

        data_home = mirdata.initialize(module).data_home

        # get metadata checksum
        metadata_files = get_metadata_paths(module)
        metadata_checksums = None

        if metadata_files is not None:
            metadata_checksums = {
                key: [
                    metadata_files[key],
                    md5(os.path.join(data_home, metadata_files[key])),
                ]
                for key in metadata_files.keys()
            }

        # get version of dataset
        version = get_dataset_version(module)

        # Some datasets have a single metadata file, some have multiple.
        # The computation of the checksum should be customized in the make_index
        # of each dataset. This is a patch to convert previous indexes to the new format.
        new_index = {"version": version, "tracks": old_index}

        if metadata_files is not None:
            new_index["metadata"] = metadata_checksums

        with open(os.path.join(INDEXES_PATH, index_name), "w") as fhandle:
            json.dump(new_index, fhandle, indent=2)
Пример #7
0
def test_multitracks(httpserver):
    data_home_dir = "tests/resources/mir_datasets"

    for dataset_name in DATASETS:

        module = importlib.import_module(
            "mirdata.datasets.{}".format(dataset_name))
        if dataset_name not in REMOTE_DATASETS:
            dataset = module.Dataset()
        else:
            # TODO fix these tests
            continue
            # remote_index = create_remote_index(httpserver, dataset_name)
            # dataset = module.Dataset(remote_index=remote_index)

        # TODO this is currently an opt-in test. Make it an opt out test
        # once #265 is addressed
        if dataset_name in CUSTOM_TEST_MTRACKS:
            mtrack_id = CUSTOM_TEST_MTRACKS[dataset_name]
        else:
            # there are no multitracks
            continue

        try:
            mtrack_default = dataset.MultiTrack(mtrack_id)
        except:
            assert False, "{}: {}".format(dataset_name, sys.exc_info()[0])

        # test data home specified
        data_home = os.path.join(data_home_dir, dataset_name)
        dataset_specific = mirdata.initialize(dataset_name,
                                              data_home=data_home)
        try:
            mtrack_test = dataset_specific.MultiTrack(mtrack_id,
                                                      data_home=data_home)
        except:
            assert False, "{}: {}".format(dataset_name, sys.exc_info()[0])

        assert isinstance(
            mtrack_test, core.MultiTrack
        ), "{}.MultiTrack must be an instance of type core.MultiTrack".format(
            dataset_name)

        assert hasattr(
            mtrack_test,
            "to_jams"), "{}.MultiTrack must have a to_jams method".format(
                dataset_name)

        # Validate JSON schema
        try:
            jam = mtrack_test.to_jams()
        except:
            assert False, "{}: {}".format(dataset_name, sys.exc_info()[0])

        assert jam.validate(
        ), "Jams validation failed for {}.MultiTrack({})".format(
            dataset_name, mtrack_id)
        if dataset_name in REMOTE_DATASETS:
            clean_remote_dataset(dataset_name)
Пример #8
0
def test_dataset_errors():
    with pytest.raises(ValueError):
        mirdata.initialize("not_a_dataset")

    d = mirdata.initialize("orchset")
    d._track_class = None
    with pytest.raises(NotImplementedError):
        d.track("asdf")

    with pytest.raises(NotImplementedError):
        d.load_tracks()

    with pytest.raises(NotImplementedError):
        d.choice_track()

    d = mirdata.initialize("acousticbrainz_genre")
    with pytest.raises(FileNotFoundError):
        d._index
Пример #9
0
def test_track_load(dataset_names):
    """Function to test all loaders work and indexes are fine (run locally)
    Parameters
    ----------
    dataset_names (list): list of dataset names

    """
    for module in dataset_names:
        dataset = mirdata.initialize(module)
        dataset.load_tracks()
Пример #10
0
def test_index(dataset_names):
    """ Test if updated indexes are as expected.
    Parameters
    ----------
    dataset_names (list): list of dataset names

    """

    mandatory_keys = ["version"]
    for module in dataset_names:
        index = mirdata.initialize(module)._index
        assert type(index["tracks"]) == dict
        assert set(mandatory_keys) <= set([*index.keys()])
Пример #11
0
def main(args):
    data_home = "tests/resources/mir_datasets/{}".format(dataset.name)
    print(data_home)

    dataset = mirdata.initialize(args.dataset, data_home=data_home)

    if args.dataset in TEST_TRACKIDS.keys():
        track_id = TEST_TRACKIDS[args.dataset]
    else:
        print("No test track found for {}. ".format(args.dataset))
        print(
            "Please add a test track to the dictionary at the top of this script."
        )
        return

    track = dataset.track(track_id)
    data = get_attributes_and_properties(track)

    print('"""{} Track class'.format(args.dataset))
    print("")
    print("Args:")
    print("    track_id (str): track id of the track")
    print("")

    if len(data["attributes"]) > 0:
        print("Attributes:")
        for attr in data["attributes"]:
            if attr == "track_id":
                print("    {} ({}): track id".format(
                    attr,
                    type(getattr(track, attr)).__name__))
            else:
                print("    {} ({}): TODO".format(
                    attr,
                    type(getattr(track, attr)).__name__))
        print("")

    if len(data["cached_properties"]) > 0:
        print("Cached Properties:")
        for attr in data["cached_properties"]:
            print("    {} ({}): TODO".format(
                attr,
                type(getattr(track, attr)).__name__))
        print("")

    print('"""')
Пример #12
0
import json
import mirdata
import muda
import os
from tqdm import tqdm

giantsteps_key = mirdata.initialize('giantsteps_key',
                                    data_home='/scratch/qx244/data/gskey')
gs_data = giantsteps_key.load_tracks()

with open('/scratch/qx244/data/gskey/good_files.json', 'r') as fp:
    good_files = json.load(fp)

pitch_shifter = muda.deformers.LinearPitchShift(n_samples=12,
                                                lower=-5,
                                                upper=6)

for idx in tqdm(good_files.keys()):
    track = gs_data[idx]
    track_jams_path = os.path.join('/scratch/qx244/data/gskey/jams/',
                                   track.title + '.jams')

    #check if already augmented:
    if os.path.isfile(
            '/scratch/qx244/data/gskey/augmentation/{}.11.jams'.format(
                track.title)):
        continue

    j_orig = muda.load_jam_audio(track_jams_path, track.audio_path)

    for i, jam_out in enumerate(pitch_shifter.transform(j_orig)):
Пример #13
0
    def split_dataset_in_chunks(self):
        # Create output folder if it does not exist
        if not os.path.exists(self.chunks_path):
            os.mkdir(self.chunks_path)

        # Initialize Saraga Carnatic dataset and get list of multitrack audio subset
        saraga_carnatic = mirdata.initialize('saraga_carnatic',
                                             data_home=self.dataset_path)
        track_ids = saraga_carnatic.track_ids
        saraga_data = saraga_carnatic.load_tracks()
        concerts_to_ignore = ['Akkarai', 'Sundar']
        multitrack_list = self.get_multitrack_ids(track_ids, saraga_data,
                                                  concerts_to_ignore)

        split_count = 0
        for track_id in tqdm(multitrack_list):

            # Get track to format
            track = saraga_data[track_id]

            # Get tonic rounded at 4 decimals
            tonic = round(self.get_tonic(track), 4)

            # Get tanpura audio from the synthesized tanpura dataset
            tanpura_filename = os.path.join(self.tanpura_dataset_path,
                                            'tanpura_' + str(tonic) + '.wav')
            audio_tanpura = estd.MonoLoader(filename=tanpura_filename)()
            # Get voice
            audio_vocal = estd.MonoLoader(filename=track.audio_vocal_path)()
            # Get violin
            audio_violin = estd.MonoLoader(filename=track.audio_violin_path)()
            # Get mridangam right
            audio_mridangam_right = estd.MonoLoader(
                filename=track.audio_mridangam_right_path)()
            # Get mridangam left
            audio_mridangam_left = estd.MonoLoader(
                filename=track.audio_mridangam_left_path)()

            # Get splits
            split_mridangam_left = self.split_into_chunks(
                audio_mridangam_left, len(audio_tanpura))
            split_mridangam_right = self.split_into_chunks(
                audio_mridangam_right, len(audio_tanpura))
            split_violin = self.split_into_chunks(audio_violin,
                                                  len(audio_tanpura))
            split_vocal = self.split_into_chunks(audio_vocal,
                                                 len(audio_tanpura))
            split_tanpura = [audio_tanpura] * len(split_vocal)

            number_of_chunks = 0
            for split_id, (tanpura, vocal, violin, mri_right,
                           mri_left) in enumerate(
                               zip(split_tanpura, split_vocal, split_violin,
                                   split_mridangam_right,
                                   split_mridangam_left)):
                write(filename=os.path.join(
                    self.chunks_path,
                    str(split_id + split_count) + '_tanpura.wav'),
                      rate=44100,
                      data=np.array(tanpura))
                write(filename=os.path.join(
                    self.chunks_path,
                    str(split_id + split_count) + '_vocal.wav'),
                      rate=44100,
                      data=np.array(vocal))
                write(filename=os.path.join(
                    self.chunks_path,
                    str(split_id + split_count) + '_violin.wav'),
                      rate=44100,
                      data=np.array(violin))
                write(filename=os.path.join(
                    self.chunks_path,
                    str(split_id + split_count) + '_mridangam_right.wav'),
                      rate=44100,
                      data=np.array(mri_right))
                write(filename=os.path.join(
                    self.chunks_path,
                    str(split_id + split_count) + '_mridangam_left.wav'),
                      rate=44100,
                      data=np.array(mri_left))
                number_of_chunks = split_id

            split_count = split_count + number_of_chunks
Пример #14
0
        items.pop("v_num", None)
        return items

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(),
                                     lr=4e-2,
                                     weight_decay=0.0001)
        scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer, step_size=10,
            gamma=0.1)  # reduce the learning after 10 epochs by a factor of 10
        return [optimizer], [scheduler]


#### Init the Mridangam stroke dataset
data_home = '/Volumes/Macintosh HD 2/Documents/git/mirdata/tests/resources/mir_datasets_full/mridangam_stroke'
mridangam = mirdata.initialize("mridangam_stroke")  #,data_home=data_home

download = False
if download:
    mridangam.download()

random_seed = 0
pl.utilities.seed.seed_everything(seed=random_seed)

#### Pytorch dataset loaders
train_dataset = MridangamDataset(mirdataset=mridangam,
                                 subset=0,
                                 random_seed=random_seed)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=128,
                                           num_workers=24,