def generate_data():
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    '''

    #############
    # LOAD DATA #
    #############


    print '... loading data'

    train_set = draw.GeneratePolygons([3, 4, 5, 6, 20], 50000, [5, 10], changeratio = [1.0])
    valid_set = draw.GeneratePolygons([3, 4, 5, 6, 20], 10000, [5, 10], changeratio = [1.0])
    test_set = draw.GeneratePolygons([3, 4, 5, 6, 20], 10000, [5, 10], changeratio = [1.0])

    train_set_frame_x = numpy.zeros((len(train_set[1]), 56 * 56))
    train_set_frame_y = train_set[1]
    valid_set_frame_x = numpy.zeros((len(valid_set[1]), 56 * 56))
    valid_set_frame_y = valid_set[1]
    test_set_frame_x = numpy.zeros((len(test_set[1]), 56 * 56))
    test_set_frame_y = test_set[1]
    
    #add black blocks
    for i in range(len(train_set[1])):
        preprocessing.addblock(train_set[0][i].reshape(28, 28))
    for i in range(len(valid_set[1])):
        preprocessing.addblock(valid_set[0][i].reshape(28, 28))
        preprocessing.addblock(test_set[0][i].reshape(28, 28))

    #put digits into frame
    for i in range(len(train_set[1])):
        train_set_frame_x[i] = preprocessing.frame(train_set[0][i], offset = 5)
    for i in range(len(valid_set[1])):
        valid_set_frame_x[i] = preprocessing.frame(valid_set[0][i], offset = 5)
        test_set_frame_x[i] = preprocessing.frame(test_set[0][i], offset = 5)
    #do not save the images
#    sio.savemat('sets.mat', {'trainsetx' : train_set_frame_x,
#                             'validsetx' : valid_set_frame_x,
#                             'testsetx' : test_set_frame_x,
#                             'trainsety' : train_set_frame_y,
#                             'validsety' : valid_set_frame_y,
#                             'testsety' : test_set_frame_y})
    train_set = train_set_frame_x, train_set_frame_y
    valid_set = valid_set_frame_x, valid_set_frame_y   
    test_set = test_set_frame_x, test_set_frame_y

    
    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    return rval
示例#2
0
import preprocessing
import feature_extraction as fe

# ex = '..\\..\\boy_and_girl\\class1\\arctic_a0012.wav'
# ex = '..\\..\\cello_and_viola\\viola\\Viola.arco.ff.sulA.A4.stereo.aiff'
time_series, fs = lib.load(ex, sr=None, mono=True, res_type='kaiser_best')

time_series = preprocessing.avoid_overlap(time_series,
                                          N=100,
                                          f=500,
                                          fs=fs,
                                          plot=False)
time_series = preprocessing.downsample(time_series, fs, 4410)

print(fs)
frames = preprocessing.frame(time_series, int(0.03 * fs), int(0.015 * fs))
for i in range(frames.shape[1]):
    acf1 = stattools.acf(frames[:, i], nlags=100)
    fft, _ = fe.fft_singleside(frames[:, i], 4410, 8096)

    plt.figure()
    plt.subplot(211)
    plt.plot(np.abs(fft))
    plt.subplot(212)
    plt.stem(acf1)
    plt.show()


def acf_fundamental_freq(x, fmin, fmax, fs):
    y = copy.copy(x)
    y = preprocessing.avoid_overlap(y, N=100, f=fmax + 100, fs=fs,
示例#3
0
          'nccf_thresh1': 0.3,
          'nccf_thresh2': 0.9,
          'nccf_maaxcands': 3,
          'nccf_pwidth': 5,       # 5
          'merit_boost': 5,
          'merit_pivot': 0.20,
          'merit_extra': 0.4,
          'median_value': 7,
          'dp_w1': 0.15,
          'dp_w2': 0.5,
          'dp_w3': 100,
          'dp_w4': 0.9
          }

pitch = pYAAPT.yaapt(signal, **params)
frames = preprocessing.frame(silence_remove, frame_length, frame_overlap)
f, t, stft = fea.stft(silence_remove, pic=None, fs=sample_rate, nperseg=frame_length,
         noverlap=frame_overlap, nfft=8192, padded=True, boundary=None)
f,t,stft = scipy.signal.stft(x=silence_remove, fs=sample_rate, window='hann', nperseg=frame_length, noverlap=frame_overlap,
                  nfft=8192, detrend=False, return_onesided=True, boundary='zeros', padded=True, axis=-1)
print(pitch.samp_values.shape[0], frames.shape[1])
for i in range(min(pitch.samp_values.shape[0], frames.shape[1])):
    plt.figure()
    plt.subplot(211)
    X, _ = np.abs(fea.fft_singleside(x=frames[:,i], fs=sample_rate, n=8192, pic=None))
    plt.plot(np.arange(0, 8192/2+1), np.abs(stft[:,i]), 'y')
    plt.axvline(pitch.samp_interp[i], c='b')
    plt.axvline(pitch.samp_values[i], c='g')
    plt.subplot(212)
    plt.plot(np.arange(0, 8192 / 2 + 1), X, 'r')
    plt.axvline(pitch.samp_interp[i], c='b')
def load_data(dataset, preprocess=False):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    '''

    #############
    # LOAD DATA #
    #############

    # Download the MNIST dataset if it is not present
    data_dir, data_file = os.path.split(dataset)
    if data_dir == "" and not os.path.isfile(dataset):
        # Check if dataset is in the data directory.
        new_path = os.path.join(os.path.split(__file__)[0], "..", "data", dataset)
        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
            dataset = new_path

    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        import urllib
        origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        print 'Downloading data from %s' % origin
        urllib.urlretrieve(origin, dataset)

    print '... loading data'

    # Load the dataset
    f = gzip.open(dataset, 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()
    #train_set, valid_set, test_set format: tuple(input, target)
    #input is an numpy.ndarray of 2 dimensions (a matrix)
    #witch row's correspond to an example. target is a
    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
    #the number of rows in the input. It should give the target
    #target to the example with the same index in the input.
    
    if preprocess:
        train_set_frame_x = numpy.zeros((50000, 56 * 56))
        train_set_frame_y = train_set[1]
        valid_set_frame_x = numpy.zeros((10000, 56 * 56))
        valid_set_frame_y = valid_set[1]
        test_set_frame_x = numpy.zeros((10000, 56 * 56))
        test_set_frame_y = test_set[1]
        
        #add black blocks
        for i in range(50000):
            preprocessing.addblock(train_set[0][i].reshape(28, 28))
        for i in range(10000):
            preprocessing.addblock(valid_set[0][i].reshape(28, 28))
            preprocessing.addblock(test_set[0][i].reshape(28, 28))

        #put digits into frame
        for i in range(50000):
            train_set_frame_x[i] = preprocessing.frame(train_set[0][i], offset = 5)
        for i in range(10000):
            valid_set_frame_x[i] = preprocessing.frame(valid_set[0][i], offset = 5)
            test_set_frame_x[i] = preprocessing.frame(test_set[0][i], offset = 5)

    train_set = train_set_frame_x, train_set_frame_y
    valid_set = valid_set_frame_x, valid_set_frame_y   
    test_set = test_set_frame_x, test_set_frame_y

    
    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    return rval
示例#5
0
def reload_and_feature(picall, feature_type, average, nmel, order_frft, nmfcc,
                       saveprojectpath, savedata, savepic, savetestdata,
                       savepreprocess, savefeature, path, downsample_rate,
                       frame_time, frame_length, frame_overlap, test_rate):
    '''
    fe.stft,                           # 0
    fe.zero_crossing_rate,             # 1
    fe.energy,                         # 2
    fe.entropy_of_energy,              # 3
    fe.spectral_centroid_spread,       # 4
    fe.spectral_entropy,               # 5
    fe.spectral_flux,                  # 6
    fe.spectral_rolloff,               # 7
    fe.bandwidth,                      # 8
    fe.mfccs,                          # 9
    fe.rms                             # 10
    fe.stfrft                          # 11
    fe.frft_mfcc                       # 12

    '''
    labelname = os.listdir(path)  # 获取该数据集路径下的子文件名
    if not os.path.exists(savefeature):
        os.mkdir(savefeature)  # 创建保存特征结果的文件
    for i in range(len(labelname)):
        if not os.path.exists(savefeature + '\\' + labelname[i]):
            os.mkdir(savefeature + '\\' + labelname[i])

    datafile = open(savepreprocess, encoding='utf-8')  # 读取预处理结果
    csv_reader = csv.reader(datafile)  # 以这种方式读取文件得到的结果是一个迭代器

    feature_set = []  # 当使用统计量作为特征时,将所有样本的特征缓存入该变量以进行归一化
    for row in csv_reader:  # row中的元素是字符类型
        time_series = np.array(row[2:]).astype(
            'float32')  # row的前两个元素分别是标签和对应文件次序
        #######################################################################
        frames = preprocessing.frame(time_series, frame_length,
                                     frame_overlap)  # 分帧
        f, t, stft = fe.stft(time_series,
                             pic=None,
                             fs=downsample_rate,
                             nperseg=frame_length,
                             noverlap=frame_length - frame_overlap,
                             nfft=8192,
                             boundary=None,
                             padded=False)
        # if stft.shape[1] != frames.shape[1]:                                 # 防止stft的时间个数和帧的个数不一样
        #     dim = min(stft.shape[1], frames.shape[1])
        #     stft = stft[:, 0:dim]
        #     frames = frames[:, 0:dim]
        # Mel = lib.feature.melspectrogram(S=np.abs(stft), sr=downsample_rate, n_fft=2*(stft.shape[0]-1), n_mels=512)
        feature_list = []  # 用于存放各种类型的特征,每个帧对应一个特征向量,其元素分别是每种类型的特征
        if picall:  # 用于绘图控制
            pic = savepic + '\\' + row[0] + '_' + row[1]
        else:
            pic = None

        for i in feature_type:
            if i == 0:
                feature0 = np.abs(stft)
                feature_list.append(feature0)
            elif i == 1:
                feature1 = fe.zero_crossing_rate(frames, pic=pic)
                feature_list.append(feature1)
            elif i == 2:
                feature2 = fe.energy(frames, pic=pic)
                feature_list.append(feature2)
            elif i == 3:
                feature3 = fe.entropy_of_energy(frames, pic=pic)
                feature_list.append(feature3)
            elif i == 4:
                feature4, feature41 = fe.spectral_centroid_spread(
                    stft, downsample_rate, pic=pic)
                feature_list.append(feature4)
                feature_list.append(feature41)
            elif i == 5:
                feature5 = fe.spectral_entropy(stft, pic=pic)
                feature_list.append(feature5)
            elif i == 6:
                feature6 = fe.spectral_flux(stft, pic=pic)
                feature_list.append(feature6)
            elif i == 7:
                feature7 = fe.spectral_rolloff(stft,
                                               0.85,
                                               downsample_rate,
                                               pic=pic)
                feature_list.append(feature7)
            elif i == 8:
                feature8 = fe.bandwidth(stft, f, pic=pic)
                feature_list.append(feature8)
            elif i == 9:
                feature9 = fe.mfccs(
                    X=stft,
                    fs=downsample_rate,
                    # nfft=2*(stft.shape[0]-1),
                    nfft=8192,
                    n_mels=nmel,
                    n_mfcc=nmfcc,
                    pic=pic)
                feature_list.append(feature9)
            elif i == 10:
                feature10 = fe.rms(stft, pic=pic)
                feature_list.append(feature10)
            elif i == 11:
                feature11 = fe.stfrft(frames,
                                      p=order_frft[int(row[0])],
                                      pic=pic)
                feature_list.append(feature11)
            elif i == 12:
                tmp = fe.stfrft(frames, p=order_frft[int(row[0])])
                feature12 = fe.frft_MFCC(S=tmp,
                                         fs=downsample_rate,
                                         n_mfcc=nmfcc,
                                         n_mels=nmel,
                                         pic=pic)
                feature_list.append(feature12)
            elif i == 13:
                feature13, feature13_ = fe.fundalmental_freq(
                    frames=frames, fs=downsample_rate, pic=pic)
                feature_list.append(feature13)
            elif i == 14:
                feature14 = fe.chroma_stft(S=stft,
                                           n_chroma=12,
                                           A440=440.0,
                                           ctroct=5.0,
                                           octwidth=2,
                                           base_c=True,
                                           norm=2)
                feature_list.append(feature14)
            elif i == 15:
                feature15 = fe.log_attack_time(x=time_series,
                                               lower_ratio=0.02,
                                               upper_ratio=0.99,
                                               fs=downsample_rate,
                                               n=frames.shape[1])
                feature_list.append(feature15)
            elif i == 16:
                feature16 = fe.temoporal_centroid(S=stft,
                                                  hop_length=frame_overlap,
                                                  fs=downsample_rate)
                feature_list.append(feature16)
            elif i == 17:
                # harm_freq, harm_mag = fe.harmonics(nfft=8192, nht=0.15, f=f, S=stft, fs=downsample_rate, fmin=50, fmax=500, threshold=0.2)
                # hsc = fe.harmonic_spectral_centroid(harm_freq, harm_mag)
                # hsd = fe.harmonic_spectral_deviation(harm_mag)
                # hss = fe.harmonic_spectral_spread(hsc, harm_freq, harm_mag)
                # hsv = fe.harmonic_spectral_variation(harm_mag)
                # feature17 = np.concatenate([hsc, hsd, hss, hsv], axis=0)
                # feature_list.append(feature17)
                harm_freq, harm_mag = timbral.harmonics(frames=frames,
                                                        fs=downsample_rate,
                                                        S=stft,
                                                        f=f,
                                                        nfft=8192,
                                                        fmin=50,
                                                        fmax=500,
                                                        nht=0.15)
                hsc = timbral.harmonic_spectral_centroid(harm_freq, harm_mag)
                hsd = timbral.harmonic_spectral_deviation(harm_mag)
                hss = timbral.harmonic_spectral_spread(hsc, harm_freq,
                                                       harm_mag)
                hsv = timbral.harmonic_spectral_variation(harm_mag)
                feature17 = np.concatenate([hsc, hsd, hss, hsv], axis=0)
                feature_list.append(feature17)
            elif i == 18:
                feature18 = fe.pitches_mag_CDSV(f=f,
                                                S=stft,
                                                fs=downsample_rate,
                                                fmin=50,
                                                fmax=downsample_rate / 2,
                                                threshold=0.2)
                feature_list.append(feature18)
            elif i == 19:
                feature19 = fe.delta_features(feature9, order=1)
                feature_list.append(feature19)
            elif i == 20:
                feature20 = fe.delta_features(feature9, order=2)
                feature_list.append(feature20)

        features = np.concatenate([j for j in feature_list],
                                  axis=0)  # 我很欣赏这一句代码,将各种特征拼在一起
        long = list(range(features.shape[1]))  # 删除含有nan的帧
        for t in long[::-1]:
            if np.isnan(features[:, t]).any():
                features = np.delete(features, t, 1)
        if average:  # 使用统计量作为特征
            mean = np.mean(features, axis=1).reshape(
                1, features.shape[0])  # 原来的特征向量是列向量,这里转成行向量
            var = np.var(features, axis=1).reshape(1, features.shape[0])
            # std = np.std(features, axis=1).reshape(1, features.shape[0])
            # ske = np.zeros((1, features.shape[0]))
            # kur = np.zeros((1, features.shape[0]))
            # for n in range(features.shape[0]):
            #     ske[0, i] = sts.skewness(features[i, :])
            #     kur[0, i] = sts.kurtosis(features[i, :])
            features = np.concatenate([
                mean, var,
                np.array([int(row[0]), int(row[1])]).reshape(1, 2)
            ],
                                      axis=1)  # 使用统计平均代替每个帧的特征
            feature_set.append(features)
        else:
            scale = StandardScaler().fit(features)
            features = scale.transform(features)  # 进行归一化
            csv_path = savefeature + '\\' + labelname[int(
                row[0])] + '\\' + row[0] + '_' + row[1] + '.csv'
            with open(csv_path, 'w', encoding='utf-8', newline='') as csvfile:
                csv_writer = csv.writer(csvfile)
                buffer = np.concatenate([
                    features.T,
                    int(row[0]) * np.ones((features.shape[1], 1)),
                    int(row[1]) * np.ones((features.shape[1], 1))
                ],
                                        axis=1)
                csv_writer.writerows(buffer)
        print('featuring:', row[0], row[1])
    datafile.close()  # 关闭文件,避免不必要的错误
    if average:  # 使用统计量作为特征
        features = np.concatenate([k for k in feature_set],
                                  axis=0)  # 我很欣赏这一句代码    行表示样本数,列表示特征数
        tmp = features[:, -2:]  # 防止归一化的时候把标签也归一化
        features = features[:, 0:-2]
        scale = StandardScaler().fit(features)
        features = scale.transform(features)  # 进行归一化
        features = np.concatenate([features, tmp], axis=1)  # 把之前分开的特征和标签拼在一起
        for k in range(features.shape[0]):
            csv_path = savefeature + '\\' + labelname[int(features[k, -2])] + \
                       '\\' + str(int(features[k, -2])) + '_' + str(int(features[k, -1])) + '.csv'
            with open(csv_path, 'w', encoding='utf-8', newline='') as csvfile:
                csv_writer = csv.writer(csvfile)  # 每个音频文件只有一个特征向量,并存入一个csv文件
                csv_writer.writerow(features[k, :])  # 注意这里写入的是一行,要用writerow
示例#6
0
import preprocessing
import feature_extraction as fe
import visualization as visual
import timbral_feature as timbral
import matplotlib.pyplot as plt

# ex = '..\\..\\suhao.wav'
# ex = '..\\..\\boy_and_girl\\class1\\arctic_a0012.wav'
# ex = '..\\..\\数据集2\\pre2012\\bflute\\BassFlute.mf.C4B4.aiff'
# ex = '..\\..\\cello_and_viola\\viola\\Viola.arco.ff.sulA.A4.stereo.aiff'
ex = '..\\..\\数据集2\\post2012\\cello\\Cello.arco.ff.sulA.A5.stereo.aiff'
data, fs = lib.load(ex, sr=None, mono=True, res_type='kaiser_best')
frame_length = int(0.03*fs)
frame_lap = int(0.015*fs)
# data = data + np.random.randn(len(data))
frames = preprocessing.frame(data, frame_length, frame_lap)
f, t, stft = fe.stft(data, pic=None, fs=fs, nperseg=frame_length,
                     noverlap=frame_length-frame_lap, nfft=8192, boundary=None, padded=False)
stft = np.abs(stft)


harm_freq, harm_mag = timbral.harmonics(frames, fs, stft, f, nfft=8192, fmin=50, fmax=500,  nht=0.15)


# 绘制谐波以及频谱
i = 20
y2 = harm_freq[i]
# y2 = y2[0: 10]
visual.picfftandpitch(f, stft[:, i], y2, title='谐波提取', xlabel='freq(Hz)', ylabel='mag', pic=None)
plt.figure()
plt.plot(frames[:, i])
示例#7
0
def generate_data():
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    '''

    #############
    # LOAD DATA #
    #############

    print '... loading data'

    train_set = draw.GeneratePolygons([3, 4, 5, 6, 20],
                                      50000, [5, 10],
                                      changeratio=[1.0])
    valid_set = draw.GeneratePolygons([3, 4, 5, 6, 20],
                                      10000, [5, 10],
                                      changeratio=[1.0])
    test_set = draw.GeneratePolygons([3, 4, 5, 6, 20],
                                     10000, [5, 10],
                                     changeratio=[1.0])

    train_set_frame_x = numpy.zeros((len(train_set[1]), 56 * 56))
    train_set_frame_y = train_set[1]
    valid_set_frame_x = numpy.zeros((len(valid_set[1]), 56 * 56))
    valid_set_frame_y = valid_set[1]
    test_set_frame_x = numpy.zeros((len(test_set[1]), 56 * 56))
    test_set_frame_y = test_set[1]

    #add black blocks
    for i in range(len(train_set[1])):
        preprocessing.addblock(train_set[0][i].reshape(28, 28))
    for i in range(len(valid_set[1])):
        preprocessing.addblock(valid_set[0][i].reshape(28, 28))
        preprocessing.addblock(test_set[0][i].reshape(28, 28))

    #put digits into frame
    for i in range(len(train_set[1])):
        train_set_frame_x[i] = preprocessing.frame(train_set[0][i], offset=5)
    for i in range(len(valid_set[1])):
        valid_set_frame_x[i] = preprocessing.frame(valid_set[0][i], offset=5)
        test_set_frame_x[i] = preprocessing.frame(test_set[0][i], offset=5)
    #do not save the images


#    sio.savemat('sets.mat', {'trainsetx' : train_set_frame_x,
#                             'validsetx' : valid_set_frame_x,
#                             'testsetx' : test_set_frame_x,
#                             'trainsety' : train_set_frame_y,
#                             'validsety' : valid_set_frame_y,
#                             'testsety' : test_set_frame_y})
    train_set = train_set_frame_x, train_set_frame_y
    valid_set = valid_set_frame_x, valid_set_frame_y
    test_set = test_set_frame_x, test_set_frame_y

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    return rval
示例#8
0
def load_data(dataset, preprocess=False):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    '''

    #############
    # LOAD DATA #
    #############

    # Download the MNIST dataset if it is not present
    data_dir, data_file = os.path.split(dataset)
    if data_dir == "" and not os.path.isfile(dataset):
        # Check if dataset is in the data directory.
        new_path = os.path.join(
            os.path.split(__file__)[0], "..", "data", dataset)
        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
            dataset = new_path

    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        import urllib
        origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        print 'Downloading data from %s' % origin
        urllib.urlretrieve(origin, dataset)

    print '... loading data'

    # Load the dataset
    f = gzip.open(dataset, 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()
    #train_set, valid_set, test_set format: tuple(input, target)
    #input is an numpy.ndarray of 2 dimensions (a matrix)
    #witch row's correspond to an example. target is a
    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
    #the number of rows in the input. It should give the target
    #target to the example with the same index in the input.

    if preprocess:
        train_set_frame_x = numpy.zeros((50000, 56 * 56))
        train_set_frame_y = train_set[1]
        valid_set_frame_x = numpy.zeros((10000, 56 * 56))
        valid_set_frame_y = valid_set[1]
        test_set_frame_x = numpy.zeros((10000, 56 * 56))
        test_set_frame_y = test_set[1]

        #add black blocks
        for i in range(50000):
            preprocessing.addblock(train_set[0][i].reshape(28, 28))
        for i in range(10000):
            preprocessing.addblock(valid_set[0][i].reshape(28, 28))
            preprocessing.addblock(test_set[0][i].reshape(28, 28))

        #put digits into frame
        for i in range(50000):
            train_set_frame_x[i] = preprocessing.frame(train_set[0][i],
                                                       offset=5)
        for i in range(10000):
            valid_set_frame_x[i] = preprocessing.frame(valid_set[0][i],
                                                       offset=5)
            test_set_frame_x[i] = preprocessing.frame(test_set[0][i], offset=5)

    train_set = train_set_frame_x, train_set_frame_y
    valid_set = valid_set_frame_x, valid_set_frame_y
    test_set = test_set_frame_x, test_set_frame_y

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    return rval
示例#9
0
    # how many columns can we fit within MAX_MEM_BLOCK?
    n_columns = int(util.MAX_MEM_BLOCK / (stft_matrix.shape[0] *
                                          stft_matrix.itemsize))

    for bl_s in range(0, stft_matrix.shape[1], n_columns):
        bl_t = min(bl_s + n_columns, stft_matrix.shape[1])

        stft_matrix[:, bl_s:bl_t] = fft.fft(fft_window *
                                            y_frames[:, bl_s:bl_t],
                                            axis=0)[:stft_matrix.shape[0]]
    f = np.linspace(0, np.pi, stft_matrix.shape[0], endpoint=True) * fs / np.pi / 2
    return stft_matrix, f


def stft_specgram(f, t, zxx, picname=None):
    plt.figure()
    plt.pcolormesh(t, f, (np.abs(zxx)))
    plt.colorbar()
    plt.title('STFT Magnitude')
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    plt.tight_layout()


t=np.arange(0, 1, 0.0001)
x = np.sin(2*np.pi*200 * t) + np.sin(2*np.pi*50*t)
frames = frame(x, 1000, 100)
f1, _, S1 = stft(x, pic=None, fs=10000, nperseg=1000, noverlap=1000-100, nfft=8192, boundary=None, padded=False)
S2, f2 = mystft(frames, 10000, 8192)
# S3, f3 = libstft(x, fs=10000, n_fft=8192, hop_length=100, win_length=1000,center=False, dtype=np.complex64, pad_mode='reflect')
stft_specgram(f2, np.arange(0,S2.shape[1]), S2, picname=None)