def benchmarkOrder(self, folder, maxOrder, train=0.8, saveFig=False): np.random.seed(0) # We get all the midi files files = [] for filename in glob(folder + '/**', recursive=True): if filename[filename.rfind("."):] in [".mid", ".midi"]: files.append(filename) np.random.shuffle(files) print("____ PROCESSING THE DATA") trainData = data.data() trainData.addFiles(files[:int(train * len(files))], augmentation=True) testData = data.data() testData.addFiles(files[int(train * len(files)):], augmentation=False) retMeans = np.zeros(maxOrder) retStd = np.zeros(maxOrder) print("There is", trainData.getSize(), "scores for training") for order in range(1, maxOrder): self.cleanWeights(order=order) self.train(trainData) tmp = self.getLikelihoodfromData(testData) means = np.zeros(testData.getSize()) for i in range(len(tmp)): means[i] = np.mean(tmp[i]) retMeans[order] = np.mean(means) retStd[order] = np.std(means) plt.plot(retMeans) plt.ylabel('Likelihood over dataset') plt.xlabel('Max order of the model') plt.fill_between(range(len(retMeans)), retMeans + retStd, retMeans - retStd, alpha=.5) if saveFig is False: plt.show() else: plt.savefig("Benchmark.eps") print("TRAIN DATA") print(files[:int(train * len(files))]) for i in range(len(means)): print(files[int(train * len(files)):][i], "->", means[i]) return (retMeans, retStd)
def benchmarkQuantization( self, folder, quantizations=[1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 24, 32, 64], train=0.8): # We get all the midi files files = [] for filename in glob(folder + '/**', recursive=True): if filename[filename.rfind("."):] in [".mid", ".midi"]: files.append(filename) np.random.shuffle(files) print("____ PROCESSING THE DATA") retMeans = np.zeros(len(quantizations)) retStd = np.zeros(len(quantizations)) k = 0 for quantization in quantizations: trainData = data.data(quantization=quantization) trainData.addFiles(files[:int(train * len(files))]) testData = data.data(quantization=quantization) testData.addFiles(files[int(train * len(files)):], augmentation=False) print(trainData.getData("length")[0]) self.cleanWeights(order=self.maxOrder) self.train(trainData) tmp = self.getLikelihoodfromData(testData) means = np.zeros(testData.getSize()) for i in range(len(tmp)): means[i] = np.mean(tmp[i]) retMeans[k] = np.mean(means) retStd[k] = np.std(means) k += 1 plt.plot(retMeans) plt.xticks(np.arange(len(retMeans)), quantizations) plt.ylabel('Likelihood over dataset') plt.xlabel('Quantization') plt.fill_between(range(len(retMeans)), retMeans + retStd, retMeans - retStd, alpha=.5) plt.show() return (retMeans, retStd)
def Train(folder, k_fold=5, quantization=24, maxOrder=20, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False): if folder[-1] == "/": folder = folder[:-1] if os.path.isfile("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + ".model"): print( "There is already a model saved for these data, would you like to train again? (y/N)\n" ) rep = input("") while rep not in ["y", "Y", "n", "N", "", "\n"]: rep = input("We did not understand, please type again (y/N).") if rep.lower() == "y": pass else: return L = idyom.idyom(maxOrder=maxOrder) M = data.data(quantization=quantization) M.parse(folder) L.train(M) L.save("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + ".model")
def Train(folder, jump=False): L = idyom.idyom(jump=jump, maxOrder=100) M = data.data(quantization=24) M.parse(folder) L.train(M) L.save("models/jump_" + str(jump) + ".model")
def cross_validation(folder, k_fold=10, maxOrder=20, quantization=24, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False): """ """ np.random.seed(0) Likelihoods = [] files = [] for filename in glob(folder + '/**', recursive=True): if filename[filename.rfind("."):] in [".mid", ".midi"]: files.append(filename) np.random.shuffle(files) if int(k_fold) == -1: k_fold = len(files) if int(k_fold) > len(files): raise ValueError( "Cannot process with k_fold greater than number of files. Please use -k options to specify a smaller k for cross validation." ) k_fold = len(files) // int(k_fold) validationFiles = [] for i in tqdm(range(math.ceil(len(files) / k_fold))): trainData = files[:i * k_fold] + files[(i + 1) * k_fold:] evalData = files[i * k_fold:(i + 1) * k_fold] # Our IDyOM L = idyom.idyom(maxOrder=maxOrder) M = data.data(quantization=quantization) M.addFiles(trainData) L.train(M) for file in evalData: tmp = L.getLikelihoodfromFile(file, long_term_only=long_term_only, short_term_only=short_term_only) for i in range(len(tmp)): if tmp[i] != tmp[i]: tmp[i] = 1 / 30 Likelihoods.append(np.mean(tmp)) filename = file[file.rfind("/") + 1:file.rfind(".")] validationFiles.append(filename) return Likelihoods, validationFiles
def checkDataSet(folder): """ Function that check if the dataset is corrupted (contains duplicates). Does not delete automatically! """ files = [] for filename in glob(folder + '/**', recursive=True): if filename[filename.rfind("."):] in [".mid", ".midi"]: files.append(filename) D = data.data(deleteDuplicates=False) D.addFiles(files) DATA = D.getData("pitch") delete = [] delete_pitches = [] for i in range(len(files)): for j in range(i, len(files)): if i != j and comparePitches(DATA[i], DATA[j]): print(files[i], "matches", files[j]) # We recommand to delete the smallest one if len(DATA[i]) > len(DATA[j]): for d in delete_pitches: if comparePitches(d, DATA[i]): delete.append(files[i]) delete_pitches.append(DATA[i]) break delete.append(files[j]) delete_pitches.append(DATA[j]) else: for d in delete_pitches: if comparePitches(d, DATA[j]): delete.append(files[j]) delete_pitches.append(DATA[j]) break delete.append(files[i]) delete_pitches.append(DATA[i]) if len(delete) > 0: print( "We recommand you to delete the following files because they are duplicates:" ) print(list(set(delete))) else: print("We did not find any duplicates.")
def getSurprisefromFile(self, file, zero_padding=False, time_representation=False, short_term_only=False, long_term_only=False): """ Return surprise(-log2(p)) over a score :param folder: file to compute surprise on :param zero_padding: return surprise as spikes if True :type data: string :type zero_padding: bool :return: list of float """ probas, entropies = self.getLikelihoodfromFile( file, short_term_only=short_term_only, long_term_only=long_term_only) # We compute the surprise by using -log2(probas) probas = -np.log(probas + sys.float_info.epsilon) / np.log(2) if time_representation is False: return probas, entropies D = data.data() D.addFile(file) # We get the length of the notes lengths = D.getData("length")[0] surprise = [] entropy = [] for i in range(len(probas)): surprise.append(probas[i]) entropy.append(entropies[i]) for j in range(int(lengths[i])): if zero_padding: surprise.append(0) entropy.append(0) else: surprise.append(probas[i]) entropy.append(entropies[i]) return surprise, entropy
def Train(folder, k_fold=5, quantization=24, maxOrder=20, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False, viewPoints="both"): """ Train a model with the passed parameters and then save it to the hardrive. """ if folder[-1] == "/": folder = folder[:-1] if viewPoints == "pitch": viewPoints_o = ["pitch"] elif viewPoints == "length": viewPoints_o = ["length"] elif viewPoints == "both": viewPoints_o = ["pitch", "length"] else: raise ValueError("We do not know these viewpoints ... ") if os.path.isfile("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model"): print( "There is already a model saved for these data, would you like to train again? (y/N)\n" ) rep = input("") while rep not in ["y", "Y", "n", "N", "", "\n"]: rep = input("We did not understand, please type again (y/N).") if rep.lower() == "y": pass else: return preComputeEntropies = not ( long_term_only or short_term_only ) # We only precompute if we need to combine short and long term models L = idyom.idyom(maxOrder=maxOrder, viewPoints=viewPoints_o) M = data.data(quantization=quantization) M.parse(folder, augment=True) L.train(M) L.save("models/" + str(folder[folder.rfind("/") + 1:]) + "_quantization_" + str(quantization) + "_maxOrder_" + str(maxOrder) + "_viewpoints_" + str(viewPoints) + ".model")
def plotLikelihood(folder, k_fold=2): """ Compare the likelihood between idyom model and jump model. """ likelihood1, files = cross_validation(folder, k_fold=k_fold, jump=True) print(likelihood1) print(files) plt.ylabel("Likelihood") plt.bar([0], [np.mean(likelihood1)], color="b", yerr=[np.std(likelihood1)]) plt.show() print() print() print() print("Mean:", np.mean(likelihood1)) print("Std:", np.std(likelihood1)) M = data.data() M.parse(folder) dat, files2 = M.getScoresFeatures() dico = dict(zip(files, likelihood1)) weights = [] for file in files2: if file in dico: weights.append(500 * dico[file]**2) else: weights.append(0) plt.scatter(dat[0][:len(dat[1])], dat[1], s=weights) plt.title('Database') plt.xlabel('Average 1-note interval') plt.ylabel('Average note onset') plt.show()
def cross_validation(folder, k_fold=10, maxOrder=20, quantization=24, jump=False): """ """ np.random.seed(0) Likelihoods = [] files = [] for filename in glob(folder + '/**', recursive=True): if filename[filename.rfind("."):] in [".mid", ".midi"]: files.append(filename) np.random.shuffle(files) k_fold = len(files) // int(k_fold) validationFiles = [] for i in range(len(files) // k_fold): trainData = files[:i * k_fold] + files[(i + 1) * k_fold:] evalData = files[i * k_fold:(i + 1) * k_fold] # Our IDyOM L = idyom.idyom(maxOrder=maxOrder, jump=jump) M = data.data(quantization=quantization) M.addFiles(trainData) L.train(M) for file in evalData: Likelihoods.append(np.mean(L.getLikelihoodfromFile(file))) validationFiles.append(file) return Likelihoods, validationFiles
l3[file] = np.nan_to_num(l3[file]) l1[file] = np.nan_to_num(l1[file]) likelihoods1.append(np.mean(l1[file])) likelihoods2.append(2**-np.mean(l2[file])) likelihoods3.append(2**-np.mean(l3[file])) plt.bar([1,2,3], [np.mean(likelihoods1), np.mean(likelihoods2), np.mean(likelihoods3)], yerr=[np.std(likelihoods1), np.std(likelihoods2), np.std(likelihoods3)]) plt.savefig(folder+"comparisonsIDYOM_IDYOMpy_JUMP.eps") plt.show() compareLikelihoods(likelihoods2, likelihoods1, name=folder+"compareLikelihoodsIDyOMpy_VS_IDyOM") # ploting in the music space M = data.data() M.parse("../", augment=False) dat2, files4 = M.getScoresFeatures() weights = [] colors = [] for file in range(len(likelihoods1)): weights.append(80000*abs(likelihoods1[file]-likelihoods2[file])**2) if likelihoods1[file]-likelihoods2[file] < 0: colors.append('coral') elif likelihoods1[file]-likelihoods2[file] > 0: colors.append('deepskyblue') else: colors.append('black')
def getDistributionsfromFile(self, file, threshold, short_term_only=False, long_term_only=False, normalization=True): """ Return likelihood over a score :param folder: file to compute likelihood on :type data: string :return: np.array(length) """ D = data.data() D.addFile(file) distribution = [] for model in self.LTM: if model.viewPoint == "length": dat = D.getData(model.viewPoint)[0] STM = longTermModel.longTermModel(model.viewPoint, maxOrder=20, STM=True, init=dat) for i in tqdm(range(1, len(dat))): # we instanciate a Short Term Model for the current viewpoint STM.train([dat[:i]], shortTerm=True) predictions_LTM = model.getPrediction(dat[:i]) predictions_STM = STM.getPrediction(dat[:i]) durations = [] for duration in predictions_LTM: if duration not in durations and predictions_LTM[ duration] != 0: durations.append(duration) for duration in predictions_STM: if duration not in durations and predictions_STM[ duration] != 0: durations.append(duration) distribution_note = {} for duration in durations: if duration in predictions_LTM: p1 = predictions_LTM[duration] flag = True else: p1 = 1 / 30 flag = None if duration in predictions_STM: p2 = predictions_STM[duration] else: p2 = None if self.stm and p2 is not None: if flag is not None: p = self.mergeProbas([p1, p2], [ model.getRelativeEntropy(dat[:i]), STM.getRelativeEntropy(dat[:i]) ]) else: p = p2 else: p = p1 if long_term_only: p = p1 if short_term_only: p = p2 if p is None: p = 1 / 30 distribution_note[duration] = p distribution.append(distribution_note) ### Time Representation D = data.data() D.addFile(file) probas, entropies = self.getLikelihoodfromFile( file, short_term_only=short_term_only, long_term_only=short_term_only) # We compute the surprise by using -log2(probas) probas = -np.log(probas + sys.float_info.epsilon) / np.log(2) # We get the length of the notes lengths = D.getData("length")[0] ret = [] for i in range(len(probas)): ret.append(probas[i]) for j in range(int(lengths[i])): ret.append(0) notes_surprise = ret indexes = [] probas = [] current_index = 1 for i in range(len(distribution)): sum_distribution = sum(distribution[i].values()) keys = np.array(list(distribution[i])).astype(int) keys.sort() for duration in keys: duration = str(duration) if int(duration) < int( lengths[i] ) and distribution[i][duration] / sum_distribution > threshold: indexes.append(current_index + int(duration)) probas.append(distribution[i][duration] / sum_distribution) if normalization: sum_distribution -= distribution[i][duration] current_index += int(lengths[i]) + 1 missing_notes = np.zeros(len(notes_surprise)) missing_notes[indexes] = probas plt.plot(notes_surprise) plt.plot(missing_notes) plt.legend(["surprise", "missing notes"]) plt.show() return notes_surprise, missing_notes
import sys sys.path.append('../') from idyom import longTermModel from idyom import data from idyom import score from idyom import idyom import numpy as np import matplotlib.pyplot as plt L = idyom.idyom(maxOrder=20, jump=False, maxDepth=10) M = data.data(quantization=6) #M.parse("../dataset/") #M.parse("../datasetprout/") M.parse("../examples/dataBaseTest") L.train(M) L.sample([{"pitch": 74, "length": 24}]) s = L.generate(20) print(s.getData()) s.plot() s.writeToMidi("exGen.mid")
def cross_validation(folder, k_fold=10, maxOrder=20, quantization=24, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False,\ viewPoints="both"): """ Cross validate a unique folder using k-fold """ if viewPoints == "pitch": viewPoints_o = ["pitch"] elif viewPoints == "length": viewPoints_o = ["length"] elif viewPoints == "both": viewPoints_o = ["pitch", "length"] np.random.seed(0) ICs = [] Entropies = [] files = [] for filename in glob(folder + '/**', recursive=True): if filename[filename.rfind("."):] in [".mid", ".midi"]: files.append(filename) np.random.shuffle(files) if int(k_fold) == -1: k_fold = len(files) if int(k_fold) > len(files): raise ValueError( "Cannot process with k_fold greater than number of files. Please use -k options to specify a smaller k for cross validation." ) k_fold = len(files) // int(k_fold) validationFiles = [] for i in tqdm(range(math.ceil(len(files) / k_fold))): trainData = files[:i * k_fold] + files[(i + 1) * k_fold:] evalData = files[i * k_fold:(i + 1) * k_fold] L = idyom.idyom(maxOrder=maxOrder, viewPoints=viewPoints_o) M = data.data(quantization=quantization) M.addFiles(trainData) L.train(M) for file in evalData: IC, E = L.getSurprisefromFile( file, long_term_only=long_term_only, short_term_only=short_term_only, time_representation=time_representation, zero_padding=zero_padding) ICs.append(IC) Entropies.append(E) filename = file[file.rfind("/") + 1:file.rfind(".")] filename = filename.replace("-", "_") validationFiles.append(filename) return ICs, Entropies, validationFiles
def Train_by_piece(folder, nb_pieces=20, quantization=24, maxOrder=20, time_representation=False, \ zero_padding=True, long_term_only=False, short_term_only=False, viewPoints="both", \ europa_init=True): """ Train and evaluate a model piece by piece. This allows to see the evolution of the generalization error during the course of the training and for instance identify the right number of data needed the converge. """ name_temp_file = ".tmp_test_folder_" + folder[folder.rfind("/") + 1:] + "_" + str( np.random.randint( 100, 999)) if folder[-1] == "/": folder = folder[:-1] if viewPoints == "pitch": viewPoints_o = ["pitch"] elif viewPoints == "length": viewPoints_o = ["length"] elif viewPoints == "both": viewPoints_o = ["pitch", "length"] else: raise ValueError("We do not know this viewpoint ... ") L = idyom.idyom(maxOrder=maxOrder, viewPoints=viewPoints_o, evolutive=True) files = glob(folder + '/**.mid', recursive=True) + glob( folder + '/**.midi', recursive=True) random.shuffle(files) train = files[:-nb_pieces] test = files[-nb_pieces:] if europa_init: europe_files = files = glob( 'dataset/mixed2/**.mid', recursive=True) + glob( 'dataset/mixed2/**.midi', recursive=True) train = europe_files[:100] + train if os.path.exists(name_temp_file): if os.path.isdir(name_temp_file): rmtree(name_temp_file) else: os.remove(name_temp_file) os.mkdir(name_temp_file) for file in test: copyfile(file, name_temp_file + file[file.rfind("/"):]) note_counter = [] dicos = [] matrix = np.zeros((len(train), nb_pieces)) print("___ Starting Training ___") k = 0 for file in tqdm(train): try: M = data.data(quantization=quantization) M.parseFile(file) L.train(M, preComputeEntropies=False) S, E, files = L.getSurprisefromFolder( name_temp_file, time_representation=time_representation, long_term_only=long_term_only, short_term_only=short_term_only) note_counter.append(len(M.viewPointRepresentation["pitch"][0])) dico = {} for i in range(len(files)): dico[files[i]] = S[i] dicos.append(dico) tmp = [] for s in S: tmp.append(np.mean(s)) matrix[k, :] = tmp k += 1 except (FileNotFoundError, RuntimeError, ValueError): print(file + " skipped.") for i in range(1, len(note_counter)): note_counter[i] += note_counter[i - 1] saving = {} saving['matrix'] = matrix saving['note_counter'] = note_counter saving['dico'] = dico if not os.path.exists("out/" + folder[folder.rfind("/"):]): os.makedirs("out/" + folder[folder.rfind("/"):]) if not os.path.exists("out/" + folder[folder.rfind("/"):] + "/evolution/"): os.makedirs("out/" + folder[folder.rfind("/"):] + "/evolution/") pickle.dump( saving, open( "out/" + folder[folder.rfind("/") + 1:] + "/evolution/" + folder[folder.rfind("/") + 1:] + '.pickle', "wb")) sio.savemat( "out/" + folder[folder.rfind("/") + 1:] + "/evolution/" + folder[folder.rfind("/") + 1:] + '.mat', data) print() print() print() print("Data saved at " + "out/" + folder[folder.rfind("/") + 1:] + "/evolution/" + folder[folder.rfind("/") + 1:] + '.pickle') print( "Including a .mat for matlab purpose and a .pickle for python purpose." ) print() print() if not SERVER: plt.errorbar(note_counter, np.mean(matrix, 1), yerr=np.std(matrix, 1) / np.sqrt(nb_pieces)) plt.title("Evolution of the mean IC over Learning (" + folder[folder.rfind("/") + 1:] + ")") plt.ylabel("Mean IC (generlization error)") plt.xlabel("Learning (in notes)") plt.show() rmtree(name_temp_file)
import sys sys.path.append('../') from idyom import markovChain from idyom import data from idyom import score import numpy as np M = markovChain.markovChain(3) D = data.data() D.parse("dataBaseTest/") M.train(D.getData("pitch")) print(D.getData("pitch")) S = M.generate(500) S.writeToMidi("generation1.mid") S.toWaveForm("generation1.wav") print(S.getData()) quit() matrix = M.getStatesMatrix() print(M.transitions)
def compareWithLISP(folder): """ Start comparisons between our idyom and the one in lisp. This function, will add the dataset to lisp, and start training. You should have lisp and idyom already installed. """ if not os.path.exists("lisp/midis/"): os.makedirs("lisp/midis/") os.system("rm -rf lisp/midis/*") # Add folder to lisp database replaceinFile("lisp/compute.lisp", "FOLDER", folder) # Compute with LISP IDyOM os.system("sbcl --noinform --load lisp/compute.lisp") replaceinFile("lisp/compute.lisp", folder, "FOLDER") folder = "lisp/midis/" folder = "dataset/bach_sub/" # Our IDyOM now = time.time() likelihoods1, files1 = cross_validation(folder, maxOrder=20, quantization=24, k_fold=5) #k-fold=10 print("execution:", time.time() - now) # LISP version L2 = lisp.getDico( "lisp/12-cpitch_onset-cpitch_onset-nil-nil-melody-nil-10-both-nil-t-nil-c-nil-t-t-x-3.dat" ) likelihoods2, files2 = lisp.getLikelihoods(L2) likelihood2 = np.mean(likelihoods2), np.std(likelihoods2), len( likelihoods2) plt.ylabel("Likelihood") plt.bar([0, 1], [np.mean(likelihoods1), likelihood2[0]], color="b", yerr=[ 1.96 * np.std(likelihoods1) / np.sqrt(len(likelihoods1)), 1.96 * likelihood2[1] / np.sqrt(likelihood2[2]) ]) if not SERVER: plt.show() else: plt.savefig("figs/server/Lisp/likelihood.eps") plt.close() print("IDyOMpy:", likelihoods1) print("LISP:", likelihoods2) # Comparing models on pieces M = data.data() M.parse(folder, augment=False) dat1, files3 = M.getScoresFeatures() dico = dict(zip(files1, likelihoods1)) dico2 = dict(zip(files2, likelihoods2)) x1 = [] x2 = [] for file in files1: if file in dico2 and dico[file] is not None and dico2[file] is not None: x1.append(dico[file]) x2.append(dico2[file]) compareLikelihoods(x1, x2, name="Lisp/compareLikelihoods") # ploting in the music space dat2, files4 = M.getScoresFeatures() dico2 = dict(zip(files2, likelihoods2)) weights = [] colors = [] for file in files1: if file in dico2 and dico2[file] is not None: weights.append(500 * abs(dico[file] - dico2[file])**2) if dico[file] - dico2[file] < 0: colors.append('coral') elif dico[file] - dico2[file] > 0: colors.append('deepskyblue') else: colors.append('black') else: weights.append(10) colors.append('black') plt.scatter(dat2[0][:len(dat2[1])], dat2[1], s=weights, c=colors) plt.title('Python - Lisp') plt.xlabel('Average 1-note interval') plt.ylabel('Average note onset') if not SERVER: plt.show() else: plt.savefig("figs/server/Lisp/scoreSpace.eps") plt.close() # LATER quit() plt.ylabel("Likelihood") plt.xlabel("time") plt.plot(L2['1']["probability"]) plt.plot( L.getLikelihoodfromFile(folder + L2['1']["melody.name"][0][1:-1] + ".mid")) plt.show()
def getLikelihoodfromFile(self, file, short_term_only=False, long_term_only=False): """ Return likelihood over a score :param folder: file to compute likelihood on :type data: string :return: np.array(length) """ D = data.data() D.addFile(file) probas = np.ones(D.getSizeofPiece(0)) probas[0] = 1 / len(self.LTM[0].models[0].alphabet) for model in self.LTM: dat = D.getData(model.viewPoint)[0] STM = longTermModel.longTermModel(model.viewPoint, maxOrder=20, STM=True, init=dat) for i in tqdm(range(1, len(dat))): # we instanciate a Short Term Model for the current viewpoint STM.train([dat[:i]], shortTerm=True) p1 = model.getLikelihood(dat[:i], dat[i]) flag = True # This happens when the state never happened in the training data if p1 is None: p1 = 1 / 30 flag = None p2 = STM.getLikelihood(dat[:i], dat[i]) if self.stm and p2 is not None: if flag is not None: p = self.mergeProbas([p1, p2], [ model.getRelativeEntropy(dat[:i]), STM.getRelativeEntropy(dat[:i]) ]) else: p = p2 else: p = p1 if long_term_only: p = p1 if short_term_only: p = p2 if p is None: p = 1 / 30 probas[i] *= p if probas[i] == 563540: print("LTM:", model.getLikelihood(dat[:i], dat[i])) print("STM:", p2) #print("ret:", self.mergeProbas([p, p2], [model.getEntropy(dat[:i]), STM.getEntropy(dat[:i])])) print() return probas
def getLikelihoodfromFile(self, file, short_term_only=False, long_term_only=False): """ Return likelihood over a score :param folder: file to compute likelihood on :type data: string :return: np.array(length) """ D = data.data() D.addFile(file) probas = np.ones(D.getSizeofPiece(0)) probas[0] = 1 / len(self.LTM[0].models[0].alphabet) entropies = np.zeros(D.getSizeofPiece(0)) L = np.ones(len(self.LTM[0].models[0].alphabet)) / len( self.LTM[0].models[0].alphabet) entropies[0] = -np.sum(L * np.log2(L)) for model in self.LTM: dat = D.getData(model.viewPoint)[0] if long_term_only is False: STM = longTermModel.longTermModel(model.viewPoint, maxOrder=20, STM=True, init=dat) for i in range(1, len(dat)): # we instanciate a Short Term Model for the current viewpoint if long_term_only is False: STM.train([dat[:i]], shortTerm=True) p1 = model.getLikelihood(dat[:i], dat[i]) if p1 is None: e1 = 4.9 else: e1 = model.getEntropy(dat[:i]) flag = True # This happens when the state never happened in the training data if p1 is None: p1 = 1 / 30 e1 = 4.9 flag = None if long_term_only is False: p2 = STM.getLikelihood(dat[:i], dat[i]) if p2 is None: e2 = 4.9 else: e2 = STM.getEntropy(dat[:i]) if long_term_only: p = p1 e = e1 elif short_term_only: p = p2 e = e2 if p is None: p = 1 / 30 e = 4.9 elif self.stm and p2 is not None: if flag is not None: p = self.mergeProbas([p1, p2], [ model.getRelativeEntropy(dat[:i]), STM.getRelativeEntropy(dat[:i]) ]) e = self.mergeProbas([e1, e2], [ model.getRelativeEntropy(dat[:i]), STM.getRelativeEntropy(dat[:i]) ]) else: p = p2 e = e2 else: p = p1 e = e1 probas[i] *= p entropies[i] += e return probas, entropies
def compareJump(folder, k_fold=2): """ Compare the likelihood between idyom model and jump model. """ # if os.path.isfile(".IDyOM.save"): # likelihood1, files1 = pickle.load(open(".IDyOM.save", 'rb')) # print("We loaded idyom model from pickle.") # else: # print("We store idyom model for later.") # likelihood1, files1 = cross_validation(folder, k_fold=k_fold, jump=False) # pickle.dump((likelihood1, files1), open(".IDyOM.save", 'wb')) likelihood1, files1 = cross_validation(folder, k_fold=k_fold, jump=False) likelihood2, files2 = cross_validation(folder, k_fold=k_fold, jump=True) plt.ylabel("Likelihood") plt.bar([0, 1], [np.mean(likelihood1), np.mean(likelihood2)], color="b", yerr=[ 1.96 * np.std(likelihood1) / np.sqrt(len(likelihood1)), 1.96 * np.std(likelihood2) / np.sqrt(len(likelihood2)) ]) if not SERVER: plt.show() else: plt.savefig("figs/server/JUMPCompare.eps") plt.close() print("IDyOM") print("Mean:", np.mean(likelihood1)) print("Std:", np.std(likelihood1)) print("JUMP") print("Mean:", np.mean(likelihood2)) print("Std:", np.std(likelihood2)) M = data.data() M.parse(folder) dat1, files3 = M.getScoresFeatures() dico = dict(zip(files1, likelihood1)) dico2 = dict(zip(files2, likelihood2)) x1 = [] x2 = [] for file in files1: if file in dico2 and dico[file] is not None and dico2[file] is not None: x1.append(dico[file]) x2.append(dico2[file]) compareLikelihoods(x1, x2) weights = [] for file in files3: if file in dico and dico[file] is not None: weights.append(500 * dico[file]**2) else: weights.append(0) plt.subplot(2, 1, 1) plt.scatter(dat1[0][:len(dat1[1])], dat1[1], s=weights) plt.title('IDyOM') plt.xlabel('Average 1-note interval') plt.ylabel('Average note onset') dat2, files4 = M.getScoresFeatures() dico = dict(zip(files2, likelihood2)) weights = [] for file in files4: if file in dico and dico[file] is not None: weights.append(500 * dico[file]**2) else: weights.append(0) plt.subplot(2, 1, 2) plt.scatter(dat2[0][:len(dat2[1])], dat2[1], s=weights) plt.title('JUMP') plt.xlabel('Average 1-note interval') plt.ylabel('Average note onset') if not SERVER: plt.show() else: plt.savefig("figs/server/scoreSpace.eps") plt.close()