Exemplo n.º 1
0
    def train(self,
              mode_name,
              pt_list,
              ref_freq_list,
              pt_dir='',
              metric='pcd',
              save_dir='./'):
        """---------------------------------------------------------------------------------------
		This function handles everything related to supervised learning portion of this system. 
		It expects the list of text files containing the pitch tracks of the dataset, the array
		of their known tonics and generates the joint distribution of the mode and saves it.
		---------------------------------------------------------------------------------------"""
        mode_track = []
        for idx in range(len(pt_list)):
            print pt_list[idx]
            cur_track = mf.load_track(pt_list[idx], pt_dir)
            cur_cent_track = mf.hz_to_cent(cur_track,
                                           ref_freq=ref_freq_list[idx])
            for i in cur_cent_track:
                mode_track.append(i)
        joint_dist = mf.generate_pd(mode_track,
                                    smooth_factor=self.smooth_factor,
                                    cent_ss=self.cent_ss,
                                    source=mode_name,
                                    segment='all')
        if (metric == 'pcd'):
            joint_dist = mf.generate_pcd(joint_dist)
        joint_dist.save((mode_name + '_' + metric + '.json'),
                        save_dir=save_dir)
Exemplo n.º 2
0
	def train(self, mode_name, pt_list, ref_freq_list, pt_dir='', metric='pcd', save_dir='./'):
		"""---------------------------------------------------------------------------------------
		This function handles everything related to supervised learning portion of this system. 
		It expects the list of text files containing the pitch tracks of the dataset, the array
		of their known tonics and generates the joint distribution of the mode and saves it.
		---------------------------------------------------------------------------------------"""
		mode_track = []
		for idx in range(len(pt_list)):
                        print pt_list[idx]
			cur_track = mf.load_track(pt_list[idx], pt_dir)
			cur_cent_track = mf.hz_to_cent(cur_track, ref_freq=ref_freq_list[idx])
			for i in cur_cent_track:
				mode_track.append(i)
		joint_dist = mf.generate_pd(mode_track, smooth_factor=self.smooth_factor, cent_ss=self.cent_ss, source=mode_name, segment='all')
		if(metric=='pcd'):
			joint_dist = mf.generate_pcd(joint_dist)
		joint_dist.save((mode_name + '_' + metric + '.json'), save_dir=save_dir)
Exemplo n.º 3
0
 def train_segments(self,
                    pts,
                    seg_tuples,
                    ref_freq,
                    save_dir,
                    save_name,
                    metric='pcd'):
     dist_list = []
     for idx in range(len(pts)):
         src = seg_tuples[idx][0]
         interval = (seg_tuples[idx][1], seg_tuples[idx][2])
         dist = mf.generate_pd(pts[idx],
                               ref_freq=ref_freq,
                               smooth_factor=self.smooth_factor,
                               cent_ss=self.cent_ss,
                               source=src,
                               segment=interval)
         if (metric == 'pcd'):
             dist = mf.generate_pcd(dist)
         dist_list.append(dist)
     return dist_list
Exemplo n.º 4
0
    def estimate(self,
                 pitch_track,
                 mode_names=[],
                 mode_name='',
                 mode_dir='./',
                 est_tonic=True,
                 est_mode=True,
                 rank=1,
                 distance_method="euclidean",
                 metric='pcd',
                 ref_freq=440):
        """---------------------------------------------------------------------------------------
		This is the high level function that users are expected to interact with, for estimation
		purposes. Using the est_* flags, it is possible to estimate tonic, mode or both.
		---------------------------------------------------------------------------------------"""
        ### Preliminaries before the estimations
        cent_track = mf.hz_to_cent(pitch_track, ref_freq=ref_freq)
        dist = mf.generate_pd(cent_track,
                              ref_freq=ref_freq,
                              smooth_factor=self.smooth_factor,
                              cent_ss=self.cent_ss)
        dist = mf.generate_pcd(dist) if (metric == 'pcd') else dist
        mode_dists = [(p_d.load((m + '_' + metric + '.json'), mode_dir))
                      for m in mode_names]
        mode_dist = p_d.load(
            (mode_name + '_' + metric +
             '.json'), mode_dir) if (mode_name != '') else None
        tonic_list = np.zeros(rank)
        mode_list = ['' for x in range(rank)]

        if (est_tonic):
            if (metric == 'pcd'):
                ### Shifting to the global minima to prevent wrong detection of peaks
                shift_factor = dist.vals.tolist().index(min(dist.vals))
                dist = dist.shift(shift_factor)
                anti_freq = mf.cent_to_hz([dist.bins[shift_factor]],
                                          ref_freq=ref_freq)[0]
                peak_idxs, peak_vals = dist.detect_peaks()
            elif (metric == 'pd'):
                peak_idxs, peak_vals = dist.detect_peaks()
                origin = np.where(dist.bins == 0)[0][0]
                shift_idxs = [(idx - origin) for idx in peak_idxs]

        ### Call to actual estimation functions
        if (est_tonic and est_mode):
            if (metric == 'pcd'):
                dist_mat = mf.generate_distance_matrix(dist,
                                                       peak_idxs,
                                                       mode_dists,
                                                       method=distance_method)
                for r in range(rank):
                    min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
                    min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
                    tonic_list[r] = mf.cent_to_hz(
                        [dist.bins[peak_idxs[min_row]]], anti_freq)[0]
                    mode_list[r] = mode_names[min_col]
                    dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
                return mode_list, tonic_list

            elif (metric == 'pd'):
                dist_mat = np.zeros((len(shift_idxs), len(mode_dists)))
                for m in range(len(mode_dists)):
                    dist_mat[:, m] = self.tonic_estimate(
                        dist,
                        shift_idxs,
                        mode_dists[m],
                        distance_method=distance_method,
                        metric=metric)
                for r in range(rank):
                    min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
                    min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
                    tonic_list[r] = mf.cent_to_hz(
                        [shift_idxs[min_row] * self.cent_ss], ref_freq)[0]
                    mode_list[r] = mode_names[min_col]
                    dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
                return mode_list, tonic_list

        elif (est_tonic):
            if (metric == 'pcd'):
                distance_vector = self.tonic_estimate(
                    dist,
                    peak_idxs,
                    mode_dist,
                    distance_method=distance_method,
                    metric=metric)
                for r in range(rank):
                    idx = np.argmin(distance_vector)
                    tonic_list[r] = mf.cent_to_hz([dist.bins[peak_idxs[idx]]],
                                                  anti_freq)[0]
                    distance_vector[idx] = (np.amax(distance_vector) + 1)
                return tonic_list
            elif (metric == 'pd'):
                distance_vector = self.tonic_estimate(
                    dist,
                    shift_idxs,
                    mode_dist,
                    distance_method=distance_method,
                    metric=metric)
                for r in range(rank):
                    idx = np.argmin(distance_vector)
                    tonic_list[r] = mf.cent_to_hz(
                        [shift_idxs[idx] * self.cent_ss], ref_freq)[0]
                    distance_vector[idx] = (np.amax(distance_vector) + 1)
                return tonic_list

        elif (est_mode):
            distance_vector = self.mode_estimate(
                dist,
                mode_dists,
                distance_method=distance_method,
                metric=metric)
            for r in range(rank):
                idx = np.argmin(distance_vector)
                mode_list[r] = mode_names[idx]
                distance_vector[idx] = (np.amax(distance_vector) + 1)
            return mode_list

        else:
            # Nothing is expected to be estimated
            return 0
Exemplo n.º 5
0
    def estimate(self,
                 pitch_track,
                 mode_names=[],
                 mode_name='',
                 mode_dir='./',
                 est_tonic=True,
                 est_mode=True,
                 rank=1,
                 distance_method="euclidean",
                 metric='pcd',
                 ref_freq=440):
        ### Preliminaries before the estimations
        cent_track = mf.hz_to_cent(pitch_track, ref_freq)
        dist = mf.generate_pd(cent_track,
                              ref_freq=ref_freq,
                              smooth_factor=self.smooth_factor,
                              cent_ss=self.cent_ss)
        dist = mf.generate_pcd(dist) if (metric == 'pcd') else dist
        mode_collections = [
            self.load_collection(mode, metric, dist_dir=mode_dir)
            for mode in mode_names
        ]
        mode_dists = [dist for col in mode_collections for dist in col]
        mode_dist = self.load_collection(
            mode_name, metric,
            dist_dir=mode_dir) if (mode_name != '') else None
        tonic_list = np.zeros(rank)
        mode_list = ['' for x in range(rank)]

        if (est_tonic):
            if (metric == 'pcd'):
                ### Shifting to the global minima to prevent wrong detection of peaks
                shift_factor = dist.vals.tolist().index(min(dist.vals))
                dist = dist.shift(shift_factor)
                anti_freq = mf.cent_to_hz([dist.bins[shift_factor]],
                                          ref_freq=ref_freq)[0]
                peak_idxs, peak_vals = dist.detect_peaks()
            elif (metric == 'pd'):
                peak_idxs, peak_vals = dist.detect_peaks()
                origin = np.where(dist.bins == 0)[0][0]
                shift_idxs = [(idx - origin) for idx in peak_idxs]

        ### Call to actual estimation functions
        if (est_tonic and est_mode):
            if (metric == 'pcd'):
                dist_mat = mf.generate_distance_matrix(dist,
                                                       peak_idxs,
                                                       mode_dists,
                                                       method=distance_method)
                for r in range(rank):
                    min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
                    min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
                    tonic_list[r] = mf.cent_to_hz(
                        [dist.bins[peak_idxs[min_row]]], anti_freq)[0]
                    mode_list[r] = (mode_dists[min_col].source,
                                    mode_dists[min_col].segmentation)
                    dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
                return mode_list, tonic_list

            elif (metric == 'pd'):
                dist_mat = np.zeros((len(shift_idxs), len(mode_dists)))
                for m in range(len(mode_dists)):
                    dist_mat[:, m] = self.tonic_estimate(
                        dist,
                        shift_idxs,
                        mode_dists[m],
                        distance_method=distance_method,
                        metric=metric)
                for r in range(rank):
                    min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
                    min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
                    tonic_list[r] = mf.cent_to_hz(
                        [shift_idxs[min_row] * self.cent_ss], ref_freq)[0]
                    mode_list[r] = (mode_dists[min_col].source,
                                    mode_dists[min_col].segmentation)
                    dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
                return mode_list, tonic_list

        elif (est_tonic):
            if (metric == 'pcd'):
                dist_mat = [(np.array(
                    mf.generate_distance_matrix(dist,
                                                peak_idxs, [d],
                                                method=distance_method))[:, 0])
                            for d in mode_dist]

            elif (metric == 'pd'):
                peak_idxs = shift_idxs
                temp = p_d.PitchDistribution(dist.bins,
                                             dist.vals,
                                             kernel_width=dist.kernel_width,
                                             source=dist.source,
                                             ref_freq=dist.ref_freq,
                                             segment=dist.segmentation)
                dist_mat = []
                for d in mode_dist:
                    temp, d = mf.pd_zero_pad(temp, d, cent_ss=self.cent_ss)

                    ### Filling both sides of vals with zeros, to make sure that the shifts won't drop any non-zero values
                    temp.vals = np.concatenate(
                        (np.zeros(abs(max(peak_idxs))), temp.vals,
                         np.zeros(abs(min(peak_idxs)))))
                    d.vals = np.concatenate(
                        (np.zeros(abs(max(peak_idxs))), d.vals,
                         np.zeros(abs(min(peak_idxs)))))
                    cur_vector = np.array(
                        mf.generate_distance_matrix(temp,
                                                    peak_idxs, [d],
                                                    method=distance_method))[:,
                                                                             0]
                    dist_mat.append(cur_vector)

            for r in range(rank):
                min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
                min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
                tonic_list[r] = mf.cent_to_hz([dist.bins[peak_idxs[min_row]]],
                                              anti_freq)[0]
                dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
            return tonic_list

        elif (est_mode):
            distance_vector = self.mode_estimate(
                dist,
                mode_dists,
                distance_method=distance_method,
                metric=metric)
            for r in range(rank):
                idx = np.argmin(distance_vector)
                mode_list[r] = (mode_dists[idx].source,
                                mode_dists[idx].segmentation)
                distance_vector[idx] = (np.amax(distance_vector) + 1)
            return mode_list

        else:
            return 0
Exemplo n.º 6
0
	def estimate(self, pitch_track, mode_names=[], mode_name='', mode_dir='./', est_tonic=True, est_mode=True, rank=1, distance_method="euclidean", metric='pcd', ref_freq=440):
		"""---------------------------------------------------------------------------------------
		This is the high level function that users are expected to interact with, for estimation
		purposes. Using the est_* flags, it is possible to estimate tonic, mode or both.
		---------------------------------------------------------------------------------------"""
		### Preliminaries before the estimations
		cent_track = mf.hz_to_cent(pitch_track, ref_freq=ref_freq)
		dist = mf.generate_pd(cent_track, ref_freq=ref_freq, smooth_factor=self.smooth_factor, cent_ss=self.cent_ss)
		dist = mf.generate_pcd(dist) if (metric=='pcd') else dist
		mode_dists = [(p_d.load((m + '_' + metric + '.json'), mode_dir)) for m in mode_names]
		mode_dist = p_d.load((mode_name + '_' + metric + '.json'), mode_dir) if (mode_name!='') else None
		tonic_list = np.zeros(rank)
		mode_list = ['' for x in range(rank)]

		if(est_tonic):
			if(metric=='pcd'):
				### Shifting to the global minima to prevent wrong detection of peaks
				shift_factor = dist.vals.tolist().index(min(dist.vals))
				dist = dist.shift(shift_factor)
				anti_freq = mf.cent_to_hz([dist.bins[shift_factor]], ref_freq=ref_freq)[0]
				peak_idxs, peak_vals = dist.detect_peaks()
			elif(metric=='pd'):
				peak_idxs, peak_vals = dist.detect_peaks()
				origin =  np.where(dist.bins==0)[0][0]
				shift_idxs = [(idx - origin) for idx in peak_idxs]

		### Call to actual estimation functions
		if(est_tonic and est_mode):
			if(metric=='pcd'):
				dist_mat = mf.generate_distance_matrix(dist, peak_idxs, mode_dists, method=distance_method)
				for r in range(rank):
					min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
					min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
					tonic_list[r] = mf.cent_to_hz([dist.bins[peak_idxs[min_row]]], anti_freq)[0]
					mode_list[r] = mode_names[min_col]
					dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
				return mode_list, tonic_list

			elif(metric=='pd'):
				dist_mat = np.zeros((len(shift_idxs), len(mode_dists)))
				for m in range(len(mode_dists)):
					dist_mat[:,m] = self.tonic_estimate(dist, shift_idxs, mode_dists[m], distance_method=distance_method, metric=metric)
				for r in range(rank):
					min_row = np.where((dist_mat == np.amin(dist_mat)))[0][0]
					min_col = np.where((dist_mat == np.amin(dist_mat)))[1][0]
					tonic_list[r] = mf.cent_to_hz([shift_idxs[min_row] * self.cent_ss], ref_freq)[0]
					mode_list[r] = mode_names[min_col]
					dist_mat[min_row][min_col] = (np.amax(dist_mat) + 1)
				return mode_list, tonic_list

		elif(est_tonic):
			if(metric=='pcd'):
				distance_vector = self.tonic_estimate(dist, peak_idxs, mode_dist, distance_method=distance_method, metric=metric)
				for r in range(rank):
					idx = np.argmin(distance_vector)
					tonic_list[r] = mf.cent_to_hz([dist.bins[peak_idxs[idx]]], anti_freq)[0]
					distance_vector[idx] = (np.amax(distance_vector) + 1)
				return tonic_list
			elif(metric=='pd'):
				distance_vector = self.tonic_estimate(dist, shift_idxs, mode_dist, distance_method=distance_method, metric=metric)
				for r in range(rank):
					idx = np.argmin(distance_vector)
					tonic_list[r] = mf.cent_to_hz([shift_idxs[idx] * self.cent_ss], ref_freq)[0]
					distance_vector[idx] = (np.amax(distance_vector) + 1)
				return tonic_list

		elif(est_mode):
			distance_vector = self.mode_estimate(dist, mode_dists, distance_method=distance_method, metric=metric)
			for r in range(rank):
				idx = np.argmin(distance_vector)
				mode_list[r] = mode_names[idx]
				distance_vector[idx] = (np.amax(distance_vector) + 1)
			return mode_list
	
		else:
			# Nothing is expected to be estimated
			return 0