Пример #1
0
def initialiseWAV(my_json):  # -> 1716 secs, no probs
    print("Creating the WAV files.")
    navigate.set_path_to_dataset_MP3_sorted(my_json)
    all_datasets = utils.get_list_subs(
    )  # List all the sub folders of the dataset folder.
    threads = []
    for index_d, dataset in enumerate(all_datasets):
        if (len(os.listdir(my_json['dataset_mp3_sorted_path'] + '\\' +
                           dataset)) >= 30):
            navigate.set_path(
                my_json["dataset_mp3_sorted_path"] + "\\" + dataset
            )  # Set the path to the first sub dataset directory, which contains tracks.
            actual_dataset_path = navigate.get_actual_path()
            items_dataset = navigate.get_list_subs(
            )  # Return all the track of the actual folder.
            my_thread = WavThread("Wav Thread " + str(index_d), my_json,
                                  actual_dataset_path, dataset, items_dataset)
            threads.append(my_thread)
            navigate.set_path_to_dataset_MP3_sorted(my_json)
        else:
            print("There is less than 30 songs in the ", dataset,
                  " dataset. I will not export it.")
    navigate.set_path_to_project_path(my_json)
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()
    navigate.set_path_to_project_path(my_json)
Пример #2
0
def shapeDataConv2DGTZAN(my_json, percent_train):
    """
		overview :  Shape the datas to return the train and test sets.
		input :
			- my_json : .json file with all the datas we need.
			- percent_train : float -> [0.0 : 1.0] which represent the percentage 
			of images which will be include in the train set.
	"""
    print("Shaping the data to get the Train and Test sets.")
    navigate.set_path_to_dataset_image_GTZAN(my_json)
    X, Y = [], []
    path = navigate.get_actual_path()
    maxi = len(utils.get_list_subs())
    convert_str = "L"  # L=B&W, RGB
    for index_dir, dir_x in enumerate(utils.get_list_subs()):
        navigate.set_path(dir_x)
        for index_fig, fig in enumerate(utils.get_list_subs()):
            im = Image.open(navigate.get_actual_path() + "\\" + fig).convert(
                convert_str)  # 640 * 480 pixels
            width, height = im.size
            pix = np.array(im.getdata())
            im.close()
            X.append(pix)
            Y.append(dir_x)
        navigate.set_path_to_dataset_image_GTZAN(my_json)
    if (convert_str == "L"):
        X = reshapeEntriesConv2D(X, width, height, 1)
    if (convert_str == "RGB"):
        X = reshapeEntriesConv2D(X, width, height, 3)
    Y = reshapeOutputsDense(Y)
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=(1 - percent_train))
    return X_train, Y_train, X_test, Y_test, width, height
Пример #3
0
def set_security_autorizations_images(my_json):
    print("Setting the autorization of the image folders.")
    navigate.set_path_to_dataset_image(my_json)
    for dir_x in get_list_subs():
        navigate.set_path(dir_x)
        # print("Path : ",navigate.get_actual_path(),".\n")
        os.chmod(navigate.get_actual_path(), 0o777)
        navigate.set_path_to_dataset_image(my_json)
Пример #4
0
def initialiseFigs(my_json, dpi, height, width, legend,
                   colormesh):  # 1444 secs
    # We can't use threat, because matplotlib isn't thread protected.
    print("Creating the Images.")
    navigate.set_path_to_dataset_WAV(my_json)
    all_datasets = utils.get_list_subs(
    )  # List all the sub folders of the dataset folder.
    navigate.set_path_to_dataset_image(my_json)
    for dataset in all_datasets:
        if not (os.path.exists(dataset)):
            os.makedirs(dataset)
    navigate.set_path_to_dataset_WAV(my_json)
    processes = []
    outputs_shaped = model.shapeOutputs(my_json)
    right_array = 0
    flag = False
    for index_d, dataset in enumerate(all_datasets):
        for item in outputs_shaped:
            if (item['category_name'] == dataset):
                right_array = item["category_array"]
                flag = True
        navigate.set_path(
            dataset
        )  # Set the path to the first sub dataset directory, which contains tracks.
        actual_dataset_path = navigate.get_actual_path()
        items_dataset = utils.get_list_subs(
        )  # Return all the track of the actual folder.
        if (len(items_dataset) >= 50 and flag == True):  #
            my_process = ImageProcess("Image process " + str(index_d),
                                      my_json,
                                      actual_dataset_path,
                                      dataset,
                                      items_dataset,
                                      right_array,
                                      my_json["dataset_image_path"] + "\\" +
                                      dataset,
                                      dpi,
                                      height,
                                      width,
                                      legend=legend,
                                      colormesh=colormesh)
            processes.append(my_process)
        navigate.set_path_to_dataset_WAV(my_json)
    processes = [mp.Process(target=p.run, args=()) for p in processes]
    for p in processes:
        p.start()
    for p in processes:
        p.join()
    navigate.set_path_to_project_path(my_json)
Пример #5
0
def shapeDataDense(my_json, percent_train, encoder=False):
    """
		overview :  Shape the datas to return the train and test sets.
		input :
			- my_json : .json file with all the datas we need.
			- percent_train : float -> [0.0 : 1.0] which represent the percentage 
			of images which will be include in the train set.
	"""
    print("Shaping the data to get the Train and Test sets.")
    navigate.set_path_to_dataset_WAV_GTZAN(my_json)
    X, Y = [], []
    path = navigate.get_actual_path()
    maxi = len(utils.get_list_subs())
    encoder = LabelEncoder()
    for index_dir, dir_x in enumerate(utils.get_list_subs()):
        navigate.set_path(dir_x)
        for index_fig, audio_path in enumerate(utils.get_list_subs()):
            print("Index folder : ", index_dir + 1, "/10 -- File ",
                  index_fig + 1, "/100.")
            audio, freq = lr.load(audio_path, mono=True)
            chroma_stft = lr.feature.chroma_stft(y=audio, sr=freq)
            rmse = lr.feature.rmse(y=audio)
            spec_cent = lr.feature.spectral_centroid(y=audio, sr=freq)
            spec_bw = lr.feature.spectral_bandwidth(y=audio, sr=freq)
            rolloff = lr.feature.spectral_rolloff(y=audio, sr=freq)
            zcr = lr.feature.zero_crossing_rate(audio)
            mfcc = lr.feature.mfcc(y=audio, sr=freq)
            data = [
                np.mean(chroma_stft),
                np.mean(rmse),
                np.mean(spec_cent),
                np.mean(spec_bw),
                np.mean(rolloff),
                np.mean(zcr)
            ]
            for mfcc_x in mfcc:
                data.append(np.mean(mfcc_x))
            X.append(data)
            Y.append(dir_x)
            break
        navigate.set_path_to_dataset_WAV_GTZAN(my_json)
    X = reshapeEntriesDense(X)
    Y = reshapeOutputsDense(Y)
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=(1 - percent_train))
    return X_train, Y_train, X_test, Y_test
Пример #6
0
def initialiseMP3Sorted(my_json):  # -> 1748 secs, no probs
    print("Sorting the MP3 files.")
    datas = pd.read_csv(tracks,
                        encoding="utf-8",
                        usecols=["album", 'track.19', "track.7", "track.16"],
                        dtype={
                            "album": object,
                            'track.19': object,
                            "track.7": object,
                            "track.16": object
                        })
    datas = datas.rename(
        columns={
            "album": "track_id",
            'track.19': "title",
            "track.7": "genre_top",
            "track.16": "number"
        })
    threads = []
    navigate.set_path_to_datasets(my_json)
    if not (os.path.exists("Mp3Sorted")):
        os.makedirs("Mp3Sorted")
    navigate.set_path_to_dataset_MP3(my_json)
    for index_dir, x_dir in enumerate(utils.get_list_subs()):
        if (x_dir != 'checksums' and x_dir != "fma_metadata"
                and x_dir != "README.txt"):
            navigate.set_path(x_dir)
            actual_dataset_path = navigate.get_actual_path()
            items_dataset = navigate.get_list_subs(
            )  # Return all the track of the actual folder.
            my_thread = SortMP3Thread("MP3 Sorted Thread " + str(index_dir),
                                      my_json, actual_dataset_path, x_dir,
                                      items_dataset, datas)
            threads.append(my_thread)
        else:
            print("This is not a directory of mp3 tracks.")
        navigate.set_path_to_dataset_MP3(my_json)
    navigate.set_path_to_project_path(my_json)
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()
    navigate.set_path_to_project_path(my_json)
Пример #7
0
def initialiseFigsGTZAN(my_json, dpi, height, width, legend):  # 1444 secs
    # We can't use threat, because matplotlib isn't thread protected.
    print("Creating the Images.")
    navigate.set_path_to_dataset_WAV_GTZAN(my_json)
    all_datasets = utils.get_list_subs(
    )  # List all the sub folders of the dataset folder.
    navigate.set_path_to_dataset_image_GTZAN(my_json)
    for dataset in all_datasets:
        if not (os.path.exists(dataset)):
            os.makedirs(dataset)
    navigate.set_path_to_dataset_WAV_GTZAN(my_json)
    processes = []
    for index_d, dataset in enumerate(all_datasets):
        navigate.set_path(
            dataset
        )  # Set the path to the first sub dataset directory, which contains tracks.
        actual_dataset_path = navigate.get_actual_path()
        items_dataset = utils.get_list_subs(
        )  # Return all the track of the actual folder.
        if (len(items_dataset) >= 50):  #
            my_process = ImageProcessGTZAN(
                "Image GTZAN process " + str(index_d),
                my_json,
                actual_dataset_path,
                dataset,
                items_dataset,
                my_json["dataset_images_gtzan_path"] + "\\" + dataset,
                dpi,
                height,
                width,
                legend=legend)
            processes.append(my_process)
        navigate.set_path_to_dataset_WAV_GTZAN(my_json)
    processes = [mp.Process(target=p.run, args=()) for p in processes]
    for p in processes:
        p.start()
    for p in processes:
        p.join()
    navigate.set_path_to_project_path(my_json)
Пример #8
0
def shapeDataConv2D(my_json, percent_train, encoder=False):
    """
		overview :  Shape the datas to return the train and test sets.
		input :
			- my_json : .json file with all the datas we need.
			- percent_train : float -> [0.0 : 1.0] which represent the percentage 
			of images which will be include in the train set.
	"""
    print("Shaping the data to get the Train and Test sets.")
    navigate.set_path_to_dataset_image(my_json)
    X, Y = [], []
    path = navigate.get_actual_path()
    maxi = len(utils.get_list_subs())
    for index_dir, dir_x in enumerate(utils.get_list_subs()):
        navigate.set_path(dir_x)
        for index_fig, fig in enumerate(utils.get_list_subs()):
            im = Image.open(navigate.get_actual_path() + "\\" + fig).convert(
                'L')  # 640 * 480 pixels
            width, height = im.size
            pix = np.array(im.getdata()).reshape(width, height)
            if (encoder):
                category = str(im.info["category"])
            else:
                category = ast.literal_eval(im.info["category_shaped"])
            im.close()
            X.append(pix)
            Y.append(category)
        navigate.set_path_to_dataset_image(my_json)
    if (encoder):
        Y = encoder.fit_transform(Y)
        n_classes = encoder.classes_.size
        Y = Y.reshape(len(Y), 1)
    else:
        Y = reshapeOutputs(Y)
    X = reshapeEntriesConv2D(X, width, height)

    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=(1 - percent_train))
    return X_train, Y_train, X_test, Y_test, width, height