Python find_nearest_index 예제들, utils.find_nearest_index Python 예제들

예제 #1

0

파일 보기

파일: recording.py 프로젝트: Sghilas/intonation

    def label_contours(self, intervals, window=150, hop=30):
        """
        In a very flowy contour, it is not trivial to say which pitch value corresponds
         to what interval. This function labels pitch contours with intervals by guessing
         from the characteristics of the contour and its melodic context.

        :param window: the size of window over which the context is gauged, in milliseconds.
        :param hop: hop size in milliseconds.
        """
        window /= 1000.0
        hop /= 1000.0
        exposure = int(window / hop)

        boundary = window - hop
        final_index = utils.find_nearest_index(
            self.pitch_obj.timestamps,
            self.pitch_obj.timestamps[-1] - boundary)

        interval = np.median(np.diff(self.pitch_obj.timestamps))
        #interval = 0.00290254832393
        window_step = window / interval
        hop_step = hop / interval
        start_index = 0
        end_index = window_step
        contour_labels = {}
        means = []
        while end_index < final_index:
            temp = self.pitch_obj.pitch[start_index:end_index][
                self.pitch_obj.pitch[start_index:end_index] > -10000]
            means.append(np.mean(temp))
            start_index = start_index + hop_step
            end_index = start_index + window_step

        for i in xrange(exposure, len(means) - exposure + 1):
            _median = np.median(means[i - exposure:i])
            if _median < -5000:
                continue
            ind = utils.find_nearest_index(_median, intervals)
            contour_end = (i - exposure) * hop_step + window_step
            contour_start = contour_end - hop_step
            #print sliceBegin, sliceEnd, JICents[ind]
            #newPitch[sliceBegin:sliceEnd] = JICents[ind]
            if intervals[ind] in contour_labels.keys():
                contour_labels[intervals[ind]].append(
                    [contour_start, contour_end])
            else:
                contour_labels[intervals[ind]] = [[contour_start, contour_end]]

        self.contour_labels = contour_labels

예제 #2

0

파일 보기

    def shiftgrid(self, lon0, lonsin=None, cyclic=360.0):
        '''
        Adapted basemap.shiftgrid.
        Shifts a field to origin at longitude lon0.
        Lonsin is a vector of longitudes. Works only on cyclic grid.
        lon0 - starting longitude for shifted grid
        '''

        if lonsin is None: lonsin = self.grid['lon'][0]
        i0 = utl.find_nearest_index(lonsin, lon0)
        i0_shift = len(lonsin) - i0

        dataout = sp.ma.zeros(self.data.shape, self.data.dtype)
        lonsout = sp.zeros(self.grid['lon'].shape, self.grid['lon'].dtype)
        latsout = sp.zeros(self.grid['lat'].shape, self.grid['lat'].dtype)

        lonsout[:, 0:i0_shift] = self.grid['lon'][:, i0:]
        latsout[:, 0:i0_shift] = self.grid['lat'][:, i0:]
        dataout[:, :, :, 0:i0_shift] = self.data[:, :, :, i0:]

        lonsout[:, i0_shift:] = self.grid['lon'][:, :i0] + cyclic
        latsout[:, i0_shift:] = self.grid['lat'][:, :i0]
        dataout[:, :, :, i0_shift:] = self.data[:, :, :, :i0]

        self.data = dataout
        self.grid['lon'] = lonsout
        self.grid['lat'] = latsout

예제 #3

0

파일 보기

파일: recording.py 프로젝트: gopalkoduri/intonation

    def label_contours(self, intervals, window=150, hop=30):
        """
        In a very flowy contour, it is not trivial to say which pitch value corresponds
         to what interval. This function labels pitch contours with intervals by guessing
         from the characteristics of the contour and its melodic context.

        :param window: the size of window over which the context is gauged, in milliseconds.
        :param hop: hop size in milliseconds.
        """
        window /= 1000.0
        hop /= 1000.0
        exposure = int(window / hop)

        boundary = window - hop
        final_index = utils.find_nearest_index(self.pitch_obj.timestamps,
                                               self.pitch_obj.timestamps[-1] - boundary)

        interval = np.median(np.diff(self.pitch_obj.timestamps))
        #interval = 0.00290254832393
        window_step = window / interval
        hop_step = hop / interval
        start_index = 0
        end_index = window_step
        contour_labels = {}
        means = []
        while end_index < final_index:
            temp = self.pitch_obj.pitch[start_index:end_index][self.pitch_obj.pitch[start_index:end_index] > -10000]
            means.append(np.mean(temp))
            start_index = start_index + hop_step
            end_index = start_index + window_step

        for i in xrange(exposure, len(means) - exposure + 1):
            _median = np.median(means[i - exposure:i])
            if _median < -5000:
                continue
            ind = utils.find_nearest_index(_median, intervals)
            contour_end = (i - exposure) * hop_step + window_step
            contour_start = contour_end - hop_step
            #print sliceBegin, sliceEnd, JICents[ind]
            #newPitch[sliceBegin:sliceEnd] = JICents[ind]
            if intervals[ind] in contour_labels.keys():
                contour_labels[intervals[ind]].append([contour_start, contour_end])
            else:
                contour_labels[intervals[ind]] = [[contour_start, contour_end]]

        self.contour_labels = contour_labels

예제 #4

0

파일 보기

파일: sedsorter.py 프로젝트: dongwooc/Utils

 def plot_parents(self, rflam):
     plt.figure()
     plt.xlim(1e3, 2.5e4)
     xnorm = find_nearest_index(16000, rflam)
     for i in self.id_parents:
         yplt = self.seds[np.where(self.ids == i)][0]
         plt.loglog(rflam, yplt / yplt[xnorm])
     plt.show()

예제 #5

0

파일 보기

파일: sedsorter.py 프로젝트: marcoviero/Utils

 def plot_parents(self,rflam):
     plt.figure()
     plt.xlim(1e3,2.5e4)
     xnorm = find_nearest_index(16000,rflam)
     for i in self.id_parents:
         yplt = self.seds[np.where(self.ids == i)][0]
         plt.loglog(rflam,yplt/yplt[xnorm])
     plt.show()

예제 #6

0

파일 보기

파일: Inflection.py 프로젝트: temaled/HEAD

def discretize(self, intervals, slope_thresh=1500, cents_thresh=50):
    """
        This function takes the pitch data and returns it quantized to given
        set of intervals. All transactions must happen in cent scale.

        slope_thresh is the bound beyond which the pitch contour is said to transit
        from one svara to another. It is specified in cents/sec.

        cents_thresh is a limit within which two pitch values are considered the same.
        This is what pushes the quantization limit.

        The function returns quantized pitch data.
        """

    #eps = np.finfo(float).eps
    #pitch = median_filter(pitch, 7)+eps

    self.pitch = median_filter(self.pitch, 7)
    pitch_quantized = np.zeros(len(self.pitch))
    pitch_quantized[0] = utils.find_nearest_index(intervals, self.pitch[0])
    pitch_quantized[-1] = utils.find_nearest_index(intervals, self.pitch[-1])

    for i in xrange(1, len(self.pitch) - 1):
        if self.pitch[i] == -10000:
            pitch_quantized[i] = -10000
            continue
        slope_back = abs((self.pitch[i] - self.pitch[i - 1]) /
                         (self.timestamps[i] - self.timestamps[i - 1]))
        slope_front = abs((self.pitch[i + 1] - self.pitch[i]) /
                          (self.timestamps[i + 1] - self.timestamps[i]))
        if slope_front < slope_thresh or slope_back < slope_thresh:
            ind = utils.find_nearest_index(intervals, self.pitch[i])
            cents_diff = abs(self.pitch[i] - intervals[ind])
            if cents_diff <= cents_thresh:
                pitch_quantized[i] = intervals[ind]
            else:
                pitch_quantized[i] = -10000
        else:
            pitch_quantized[i] = -10000

    self.pitch = pitch_quantized
    return self.pitch

예제 #7

0

파일 보기

파일: pitch.py 프로젝트: gopalkoduri/intonation

    def discretize(self, intervals, slope_thresh=1500, cents_thresh=50):
        """
        This function takes the pitch data and returns it quantized to given
        set of intervals. All transactions must happen in cent scale.

        slope_thresh is the bound beyond which the pitch contour is said to transit
        from one svara to another. It is specified in cents/sec.

        cents_thresh is a limit within which two pitch values are considered the same.
        This is what pushes the quantization limit.

        The function returns quantized pitch data.
        """

        #eps = np.finfo(float).eps
        #pitch = median_filter(pitch, 7)+eps

        self.pitch = median_filter(self.pitch, 7)
        pitch_quantized = np.zeros(len(self.pitch))
        pitch_quantized[0] = utils.find_nearest_index(intervals, self.pitch[0])
        pitch_quantized[-1] = utils.find_nearest_index(intervals, self.pitch[-1])

        for i in xrange(1, len(self.pitch)-1):
            if self.pitch[i] == -10000:
                pitch_quantized[i] = -10000
                continue
            slope_back = abs((self.pitch[i] - self.pitch[i-1])/(self.timestamps[i] - self.timestamps[i-1]))
            slope_front = abs((self.pitch[i+1] - self.pitch[i])/(self.timestamps[i+1] - self.timestamps[i]))
            if slope_front < slope_thresh or slope_back < slope_thresh:
                ind = utils.find_nearest_index(intervals, self.pitch[i])
                cents_diff = abs(self.pitch[i] - intervals[ind])
                if cents_diff <= cents_thresh:
                    pitch_quantized[i] = intervals[ind]
                else:
                    pitch_quantized[i] = -10000
            else:
                pitch_quantized[i] = -10000

        self.pitch = pitch_quantized

예제 #8

0

파일 보기

파일: test.py 프로젝트: xiaomagh/environment-data

def main():
    """
    Simple test harness for assignment1 functions - incomplete.
    """
    print 'Squaring [1,2,3,4,5]:', utils.square_all([1, 2, 3, 4, 5])

    print 'Finding RMS of [1,2,3,4,5]:', utils.root_mean_square([1, 2, 3, 4, 5])

    print 'Finding index of number closest to zero in [1,2,3,4,5]:', utils.find_nearest_index([1, 2, 3, 4, 5], 0)

    # Set up dummy dataset / variable to test method names
    ds = Dataset('/test3.nc', 'w')
    lat_dim = ds.createDimension('latitude', 180)
    v = ds.createVariable('latitude', 'f8', ('latitude',))
    v.units = 'degrees_north'
    ds.createVariable('temperature', 'f8', ('latitude',))

    print 'Checking if Variable is longitude:', netcdf_utils.is_longitude_var(v)

    print 'Finding if "temperature" has a longitude dimension in dataset:',\
        netcdf_utils.find_longitude_var(ds, 'temperature')

예제 #9

0

파일 보기

파일: recording.py 프로젝트: gopalkoduri/intonation

    def parametrize_peaks(self, intervals, max_peakwidth=50, min_peakwidth=25, symmetric_bounds=True):
        """
        Computes and stores the intonation profile of an audio recording.

        :param intervals: these will be the reference set of intervals to which peak positions
         correspond to. For each interval, the properties of corresponding peak, if exists,
         will be computed and stored as intonation profile.
        :param max_peakwidth: the maximum allowed width of the peak at the base for computing
        parameters of the distribution.
        :param min_peakwidth: the minimum allowed width of the peak at the base for computing
        parameters of the distribution.
        """
        assert isinstance(self.pitch_obj.pitch, np.ndarray)
        valid_pitch = self.pitch_obj.pitch
        valid_pitch = [i for i in valid_pitch if i > -10000]
        valid_pitch = np.array(valid_pitch)

        parameters = {}
        for i in xrange(len(self.histogram.peaks["peaks"][0])):
            peak_pos = self.histogram.peaks["peaks"][0][i]
            #Set left and right bounds of the distribution.
            max_leftbound = peak_pos - max_peakwidth
            max_rightbound = peak_pos + max_peakwidth
            leftbound = max_leftbound
            rightbound = max_rightbound
            nearest_valleyindex = utils.find_nearest_index(self.histogram.peaks["valleys"][0], peak_pos)
            if peak_pos > self.histogram.peaks["valleys"][0][nearest_valleyindex]:
                leftbound = self.histogram.peaks["valleys"][0][nearest_valleyindex]
                if len(self.histogram.peaks["valleys"][0][nearest_valleyindex + 1:]) == 0:
                    rightbound = peak_pos + max_peakwidth
                else:
                    offset = nearest_valleyindex + 1
                    nearest_valleyindex = utils.find_nearest_index(
                        self.histogram.peaks["valleys"][0][offset:], peak_pos)
                    rightbound = self.histogram.peaks["valleys"][0][offset + nearest_valleyindex]
            else:
                rightbound = self.histogram.peaks["valleys"][0][nearest_valleyindex]
                if len(self.histogram.peaks["valleys"][0][:nearest_valleyindex]) == 0:
                    leftbound = peak_pos - max_peakwidth
                else:
                    nearest_valleyindex = utils.find_nearest_index(
                        self.histogram.peaks["valleys"][0][:nearest_valleyindex], peak_pos)
                    leftbound = self.histogram.peaks["valleys"][0][nearest_valleyindex]

            #In terms of x-axis, leftbound should be at least min_peakwidth
            # less than peak_pos, and at max max_peakwidth less than peak_pos,
            # and viceversa for the rightbound.
            if leftbound < max_leftbound:
                leftbound = max_leftbound
            elif leftbound > peak_pos - min_peakwidth:
                leftbound = peak_pos - min_peakwidth

            if rightbound > max_rightbound:
                rightbound = max_rightbound
            elif rightbound < peak_pos + min_peakwidth:
                rightbound = peak_pos + min_peakwidth

            #If symmetric bounds are asked for, then make the bounds symmetric
            if symmetric_bounds:
                if peak_pos - leftbound < rightbound - peak_pos:
                    imbalance = (rightbound - peak_pos) - (peak_pos - leftbound)
                    rightbound -= imbalance
                else:
                    imbalance = (peak_pos - leftbound) - (rightbound - peak_pos)
                    leftbound += imbalance

            #extract the distribution and estimate the parameters
            distribution = valid_pitch[valid_pitch >= leftbound]
            distribution = distribution[distribution <= rightbound]
            #print peak_pos, "\t", len(distribution), "\t", leftbound, "\t", rightbound

            interval_index = utils.find_nearest_index(intervals, peak_pos)
            interval = intervals[interval_index]
            _mean = float(np.mean(distribution))
            _variance = float(variation(distribution))
            _skew = float(skew(distribution))
            _kurtosis = float(kurtosis(distribution))
            pearson_skew = float(3.0 * (_mean - peak_pos) / np.sqrt(abs(_variance)))
            parameters[interval] = {"position": float(peak_pos),
                                    "mean": _mean,
                                    "amplitude": float(self.histogram.peaks["peaks"][1][i]),
                                    "variance": _variance,
                                    "skew1": _skew,
                                    "skew2": pearson_skew,
                                    "kurtosis": _kurtosis}

        self.intonation_profile = parameters

예제 #10

0

파일 보기

파일: Inflection.py 프로젝트: temaled/HEAD

def fit_lines(data, pitch, timestamps, window=1500, break_thresh=1500):
    """
        Fits lines to pitch contours.

        :param window: size of each chunk to which linear equation is to be fit (in milliseconds).
        To keep it simple, hop is chosen to be one third of the window.
        :param break_thresh: If there is silence beyond this limit (in milliseconds),
        the contour will be broken there into two so that we don't fit a line over and
        including the silent region.
        """
    window /= 1000
    hop = window / 3
    break_thresh /= 1000

    #cut the whole song into pieces if there are gaps more than break_thresh seconds
    i = 0
    break_indices = []
    count = 0
    while i < len(pitch):
        if pitch[i] == -10000:
            count = 1
            start_index = i
            while i < len(pitch) and pitch[i] == -10000:
                count += 1
                i += 1
            end_index = i - 1
            if timestamps[end_index] - timestamps[start_index] >= break_thresh:
                break_indices.append([start_index, end_index])
        i += 1
    break_indices = np.array(break_indices)

    #In creating the data blocks which are not silences, note that we
    # take complimentary break indices. i.e., if [[s1, e1], [s2, e2] ...]
    # is break_indices, we take e1-s2, e2-s3 chunks and build data blocks

    data_blocks = []
    if len(break_indices) == 0:
        t_pitch = pitch.reshape(len(pitch), 1)
        t_timestamps = timestamps.reshape(len(timestamps), 1)
        data_blocks = [np.append(t_timestamps, t_pitch, axis=0)]
    else:
        if break_indices[0, 0] != 0:
            t_pitch = pitch[:break_indices[0, 0]]
            t_pitch = t_pitch.reshape(len(t_pitch), 1)
            t_timestamps = timestamps[:break_indices[0, 0]]
            t_timestamps = t_timestamps.reshape(len(t_timestamps), 1)
            data_blocks.append(np.append(t_timestamps, t_pitch, axis=1))
        block_start = break_indices[0, 1]
        for i in xrange(1, len(break_indices)):
            block_end = break_indices[i, 0]
            t_pitch = pitch[block_start:block_end]
            t_pitch = t_pitch.reshape(len(t_pitch), 1)
            t_timestamps = timestamps[block_start:block_end]
            t_timestamps = t_timestamps.reshape(len(t_timestamps), 1)
            data_blocks.append(np.append(t_timestamps, t_pitch, axis=1))
            block_start = break_indices[i, 1]
        if block_start != len(pitch) - 1:
            t_pitch = pitch[block_start:]
            t_pitch = t_pitch.reshape(len(t_pitch), 1)
            t_timestamps = timestamps[block_start:]
            t_timestamps = t_timestamps.reshape(len(t_timestamps), 1)
            data_blocks.append(np.append(t_timestamps, t_pitch, axis=1))

    label_start_offset = (window - hop) / 2
    label_end_offset = label_start_offset + hop

    #dataNew = np.zeros_like(data)
    #dataNew[:, 0] = data[:, 0]
    data_new = np.array([[0, 0]])
    for data in data_blocks:
        start_index = 0
        while start_index < len(data) - 1:
            end_index = utils.find_nearest_index(data[:, 0],
                                                 data[start_index][0] + window)
            segment = data[start_index:end_index]
            if len(segment) == 0:
                start_index = utils.find_nearest_index(
                    data[:, 0], data[start_index, 0] + hop)
                continue
            segment_clean = np.delete(segment,
                                      np.where(segment[:, 0] == -10000),
                                      axis=0)
            if len(segment_clean) == 0:
                #After splitting into blocks, this loop better not come into play
                #raise ValueError("This part of the block is absolute silence! Make sure block_thresh >= window!")
                start_index = utils.find_nearest_index(
                    data[:, 0], data[start_index, 0] + hop)
                continue
            n_clean = len(segment_clean)
            x_clean = np.matrix(segment_clean[:, 0]).reshape(n_clean, 1)
            y_clean = np.matrix(segment_clean[:, 0]).reshape(n_clean, 1)
            #return [x_clean, y_clean]
            theta = utils.normal_equation(x_clean, y_clean)

            #determine the start and end of the segment to be labelled
            label_start_index = utils.find_nearest_index(
                x_clean, data[start_index, 0] + label_start_offset)
            label_end_index = utils.find_nearest_index(
                x_clean, data[start_index, 0] + label_end_offset)
            x_clean = x_clean[label_start_index:label_end_index]
            #return x_clean
            x_clean = np.insert(x_clean, 0, np.ones(len(x_clean)), axis=1)
            newy = x_clean * theta
            result = np.append(x_clean[:, 1], newy, axis=1)
            data_new = np.append(data_new, result, axis=0)

            start_index = utils.find_nearest_index(data[:, 0],
                                                   data[start_index, 0] + hop)

    return data_new[:, 0], data_new[:, 1]

예제 #11

0

파일 보기

파일: recording.py 프로젝트: Sghilas/intonation

    def parametrize_peaks(self,
                          intervals,
                          max_peakwidth=50,
                          min_peakwidth=25,
                          symmetric_bounds=True):
        """
        Computes and stores the intonation profile of an audio recording.

        :param intervals: these will be the reference set of intervals to which peak positions
         correspond to. For each interval, the properties of corresponding peak, if exists,
         will be computed and stored as intonation profile.
        :param max_peakwidth: the maximum allowed width of the peak at the base for computing
        parameters of the distribution.
        :param min_peakwidth: the minimum allowed width of the peak at the base for computing
        parameters of the distribution.
        """
        assert isinstance(self.pitch_obj.pitch, np.ndarray)
        valid_pitch = self.pitch_obj.pitch
        valid_pitch = [i for i in valid_pitch if i > -10000]
        valid_pitch = np.array(valid_pitch)

        parameters = {}
        for i in xrange(len(self.histogram.peaks["peaks"][0])):
            peak_pos = self.histogram.peaks["peaks"][0][i]
            #Set left and right bounds of the distribution.
            max_leftbound = peak_pos - max_peakwidth
            max_rightbound = peak_pos + max_peakwidth
            leftbound = max_leftbound
            rightbound = max_rightbound
            nearest_valleyindex = utils.find_nearest_index(
                self.histogram.peaks["valleys"][0], peak_pos)
            if peak_pos > self.histogram.peaks["valleys"][0][
                    nearest_valleyindex]:
                leftbound = self.histogram.peaks["valleys"][0][
                    nearest_valleyindex]
                if len(self.histogram.peaks["valleys"][0][nearest_valleyindex +
                                                          1:]) == 0:
                    rightbound = peak_pos + max_peakwidth
                else:
                    offset = nearest_valleyindex + 1
                    nearest_valleyindex = utils.find_nearest_index(
                        self.histogram.peaks["valleys"][0][offset:], peak_pos)
                    rightbound = self.histogram.peaks["valleys"][0][
                        offset + nearest_valleyindex]
            else:
                rightbound = self.histogram.peaks["valleys"][0][
                    nearest_valleyindex]
                if len(self.histogram.peaks["valleys"][0]
                       [:nearest_valleyindex]) == 0:
                    leftbound = peak_pos - max_peakwidth
                else:
                    nearest_valleyindex = utils.find_nearest_index(
                        self.histogram.peaks["valleys"][0]
                        [:nearest_valleyindex], peak_pos)
                    leftbound = self.histogram.peaks["valleys"][0][
                        nearest_valleyindex]

            #In terms of x-axis, leftbound should be at least min_peakwidth
            # less than peak_pos, and at max max_peakwidth less than peak_pos,
            # and viceversa for the rightbound.
            if leftbound < max_leftbound:
                leftbound = max_leftbound
            elif leftbound > peak_pos - min_peakwidth:
                leftbound = peak_pos - min_peakwidth

            if rightbound > max_rightbound:
                rightbound = max_rightbound
            elif rightbound < peak_pos + min_peakwidth:
                rightbound = peak_pos + min_peakwidth

            #If symmetric bounds are asked for, then make the bounds symmetric
            if symmetric_bounds:
                if peak_pos - leftbound < rightbound - peak_pos:
                    imbalance = (rightbound - peak_pos) - (peak_pos -
                                                           leftbound)
                    rightbound -= imbalance
                else:
                    imbalance = (peak_pos - leftbound) - (rightbound -
                                                          peak_pos)
                    leftbound += imbalance

            #extract the distribution and estimate the parameters
            distribution = valid_pitch[valid_pitch >= leftbound]
            distribution = distribution[distribution <= rightbound]
            #print peak_pos, "\t", len(distribution), "\t", leftbound, "\t", rightbound

            interval_index = utils.find_nearest_index(intervals, peak_pos)
            interval = intervals[interval_index]
            _mean = float(np.mean(distribution))
            _variance = float(variation(distribution))
            _skew = float(skew(distribution))
            _kurtosis = float(kurtosis(distribution))
            pearson_skew = float(3.0 * (_mean - peak_pos) /
                                 np.sqrt(abs(_variance)))
            parameters[interval] = {
                "position": float(peak_pos),
                "mean": _mean,
                "amplitude": float(self.histogram.peaks["peaks"][1][i]),
                "variance": _variance,
                "skew1": _skew,
                "skew2": pearson_skew,
                "kurtosis": _kurtosis
            }

        self.intonation_profile = parameters

예제 #12

0

파일 보기

파일: pitch.py 프로젝트: gopalkoduri/intonation

    def fit_lines(self, window=1500, break_thresh=1500):
        """
        Fits lines to pitch contours.

        :param window: size of each chunk to which linear equation is to be fit (in milliseconds).
        To keep it simple, hop is chosen to be one third of the window.
        :param break_thresh: If there is silence beyond this limit (in milliseconds),
        the contour will be broken there into two so that we don't fit a line over and
        including the silent region.
        """
        window /= 1000
        hop = window/3
        break_thresh /= 1000

        #cut the whole song into pieces if there are gaps more than break_thresh seconds
        i = 0
        break_indices = []
        count = 0
        while i < len(self.pitch):
            if self.pitch[i] == -10000:
                count = 1
                start_index = i
                while i < len(self.pitch) and self.pitch[i] == -10000:
                    count += 1
                    i += 1
                end_index = i-1
                if self.timestamps[end_index]-self.timestamps[start_index] >= break_thresh:
                    break_indices.append([start_index, end_index])
            i += 1
        break_indices = np.array(break_indices)

        #In creating the data blocks which are not silences, note that we
        # take complimentary break indices. i.e., if [[s1, e1], [s2, e2] ...]
        # is break_indices, we take e1-s2, e2-s3 chunks and build data blocks

        data_blocks = []
        if len(break_indices) == 0:
            t_pitch = self.pitch.reshape(len(self.pitch), 1)
            t_timestamps = self.timestamps.reshape(len(self.timestamps), 1)
            data_blocks = [np.append(t_timestamps, t_pitch, axis=1)]
        else:
            if break_indices[0, 0] != 0:
                t_pitch = self.pitch[:break_indices[0, 0]]
                t_pitch = t_pitch.reshape(len(t_pitch), 1)
                t_timestamps = self.timestamps[:break_indices[0, 0]]
                t_timestamps = t_timestamps.reshape(len(t_timestamps), 1)
                data_blocks.append(np.append(t_timestamps, t_pitch, axis=1))
            block_start = break_indices[0, 1]
            for i in xrange(1, len(break_indices)):
                block_end = break_indices[i, 0]
                t_pitch = self.pitch[block_start:block_end]
                t_pitch = t_pitch.reshape(len(t_pitch), 1)
                t_timestamps = self.timestamps[block_start:block_end]
                t_timestamps = t_timestamps.reshape(len(t_timestamps), 1)
                data_blocks.append(np.append(t_timestamps, t_pitch, axis=1))
                block_start = break_indices[i, 1]
            if block_start != len(self.pitch)-1:
                t_pitch = self.pitch[block_start:]
                t_pitch = t_pitch.reshape(len(t_pitch), 1)
                t_timestamps = self.timestamps[block_start:]
                t_timestamps = t_timestamps.reshape(len(t_timestamps), 1)
                data_blocks.append(np.append(t_timestamps, t_pitch, axis=1))

        label_start_offset = (window-hop)/2
        label_end_offset = label_start_offset+hop

        #dataNew = np.zeros_like(data)
        #dataNew[:, 0] = data[:, 0]
        data_new = np.array([[0, 0]])
        for data in data_blocks:
            start_index = 0
            while start_index < len(data)-1:
                end_index = utils.find_nearest_index(data[:, 0], data[start_index][0]+window)
                segment = data[start_index:end_index]
                if len(segment) == 0:
                    start_index = utils.find_nearest_index(data[:, 0], data[start_index, 0]+hop)
                    continue
                segment_clean = np.delete(segment, np.where(segment[:, 1] == -10000), axis=0)
                if len(segment_clean) == 0:
                    #After splitting into blocks, this loop better not come into play
                    #raise ValueError("This part of the block is absolute silence! Make sure block_thresh >= window!")
                    start_index = utils.find_nearest_index(data[:, 0], data[start_index, 0]+hop)
                    continue
                n_clean = len(segment_clean)
                x_clean = np.matrix(segment_clean[:, 0]).reshape(n_clean, 1)
                y_clean = np.matrix(segment_clean[:, 1]).reshape(n_clean, 1)
                #return [x_clean, y_clean]
                theta = utils.normal_equation(x_clean, y_clean)

                #determine the start and end of the segment to be labelled
                label_start_index = utils.find_nearest_index(x_clean, data[start_index, 0]+label_start_offset)
                label_end_index = utils.find_nearest_index(x_clean, data[start_index, 0]+label_end_offset)
                x_clean = x_clean[label_start_index:label_end_index]
                #return x_clean
                x_clean = np.insert(x_clean, 0, np.ones(len(x_clean)), axis=1)
                newy = x_clean*theta
                result = np.append(x_clean[:, 1], newy, axis=1)
                data_new = np.append(data_new, result, axis=0)

                start_index = utils.find_nearest_index(data[:, 0], data[start_index, 0]+hop)

        return [data_new[:, 0], data_new[:, 1]]