def prepare_dataset_convnet(resources_dir, filenames, winLSecs, energyBands_sr, n_classes): # ------------- prepare dataset Xs = [] ys = [] for iString in range(0, len(filenames)): inputFile = resources_dir + filenames[ iString] + '.16bit-EnergyBankFilter.txt' energy_bands = np.loadtxt(inputFile, skiprows=0).T energy_bands = (energy_bands / 120) + 1 #normalize [0-1] target = np.ones(energy_bands.shape[1]) * (iString) if iString == 0: # We want winLSecs seconds of audio in our window #winLSecs = 0.05 windowSize = int((winLSecs * energyBands_sr) // 2 * 2) # And we'll move our window by windowSize/2 hopSize = windowSize // 2 print('windowSize', windowSize) n_hops = (energy_bands.shape[1]) // hopSize n_hops = int(n_hops) - 1 #?? for hop_i in range(n_hops): # Creating our sliding window frames = energy_bands[:, (hop_i * hopSize):(hop_i * hopSize + windowSize)] avgString = round( statistics.median(target[(hop_i * hopSize):(hop_i * hopSize + windowSize)])) if (avgString - target[hop_i * hopSize] == 0 ): #take only windows in the same string Xs.append(frames[..., np.newaxis]) ys.append(int(avgString)) #ys.append(target[(hop_i * hopSize):(hop_i * hopSize + windowSize)]) Xs = np.array(Xs) ys = np.array(ys) print("Xs.shape:", Xs.shape, ", Xs.shape:", ys.shape) #ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], n_classes=0) ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], one_hot=True, n_classes=n_classes) return ds, Xs, ys, windowSize
def prepare_dataset_ffw(resources_dir, filenames, winLSecs, energyBands_sr, n_classes): Xs = [] ys = [] for iString in range(0, len(filenames)): inputFile = resources_dir + filenames[ iString] + '.16bit-EnergyBankFilter.txt' energy_bands = np.loadtxt(inputFile, skiprows=0).T energy_bands = (energy_bands / 120) + 1 # normalize [0-1] target = np.ones(energy_bands.shape[1]) * iString print("Preparing dataset: reading ", inputFile) if iString == 0: # We want winLSecs seconds of audio in our window # winLSecs = 0.05 windowSize = int((winLSecs * energyBands_sr) // 2 * 2) # And we'll move our window by windowSize/2 hopSize = windowSize // 2 print('windowSize', windowSize) n_hops = (energy_bands.shape[1]) // hopSize n_hops = int(n_hops) - 1 # ?? nFrames = len(target) for iframe in range(nFrames): frame = energy_bands[:, iframe] # frame=np.append(frame, pitch[iframe]) Xs.append(frame) ys.append(int(target[iframe])) if iframe % 100 == 0: print("String:", iString, ", frame:", iframe, "/", nFrames) Xs = np.array(Xs) ys = np.array(ys) print(Xs.shape, ys.shape) #n_classes = 5 # 0--> not playing, 1,2,3,4 --> strings ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], one_hot=True, n_classes=n_classes) return ds, Xs, ys, windowSize
#print(filenames) # Read every filename as an RGB image imgs = [plt.imread(fname)[..., :3] for fname in filenames] # Crop every image to a square imgs = [utils.imcrop_tosquare(img_i) for img_i in imgs] # Then resize the square image to 100 x 100 pixels; mode='reflect' imgs = [resize(img_i, (100, 100), mode='reflect') for img_i in imgs] # Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array): Xs = np.array(imgs).astype(np.float32) #print(Xs.shape) assert (Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100) ds = datasets.Dataset(Xs) mean_img = ds.mean() #plt.imshow(mean_img) #plt.show() # If your image comes out entirely black, try w/o the `astype(np.uint8)` # that means your images are read in as 0-255, rather than 0-1 and # this simply depends on the version of matplotlib you are using. std_img = ds.std() #plt.imshow(std_img) #plt.show() #print(std_img.shape) std_img = np.mean(std_img, axis=2) #plt.imshow(std_img) #plt.show() #plt.imshow(ds.X[0])
myown_obj = get_myown_imgs(direc) # Then resize the square image to 100 x 100 pixels myown_img = [resize(img_i, (100, 100, 3)) for img_i in myown_img] myown_obj = [resize(img_i, (100, 100, 3)) for img_i in myown_obj] plt.figure(figsize=(10, 10)) plt.imshow(utils.montage(myown_img)) # Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array): Xs = np.array(myown_img).copy()*255 print(Xs.shape) assert(Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100) ds_img = datasets.Dataset(Xs) # Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array): Xs = np.array(myown_obj).copy()*255 print(Xs.shape) assert(Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100) ds_obj = datasets.Dataset(Xs) for (X_img, y) in ds_img.train.next_batch(batch_size=25): print(X_img.shape) for (X_obj, y) in ds_obj.train.next_batch(batch_size=25): print(X_obj.shape) # Just to make sure that you've coded the previous two functions correctly:
def main(winLSecs): data_dir = "/Users/alfonso/matlab/IndirectAcquisition/keras/dataforMarius/export" files = [os.path.join(data_dir, file_i) for file_i in os.listdir(data_dir) if file_i.endswith('.mat')] matlabStruct=umatlab.loadmat(files[1]).get('data') energyBand=matlabStruct.get('residualEnergyBand') energyBand=(energyBand /120 )+1 #normalize [0-1] totalSecs=matlabStruct.get('waveIn').shape[0]/matlabStruct.get('audioSR') energyBands_sr=240 #energyBand.shape[1]/totalSecs #This is around 240Hz- around 5ms at 44100Hz controlNames=matlabStruct.get('controlNames') controlData=matlabStruct.get('controlData') indexVel=[i for i in range(controlNames.shape[0]) if controlNames[i] == 'abs(velocity)'][0] indexForce=[i for i in range(controlNames.shape[0]) if controlNames[i] == 'forceN'][0] velocity=controlData[indexVel,:]/150 force=(controlData[indexForce,:]+0.2)/2 #indexString=[i for i in range(controlNames.shape[0]) if controlNames[i] == 'string'][0] #string=controlData[indexString,:] #pitch=controlData[6,:]/1500 # We want winLSecs seconds of audio in our window #winLSecs = 0.05 windowSize = int((winLSecs * energyBands_sr) // 2 * 2) # And we'll move our window by windowSize/2 hopSize = windowSize // 2 n_hops = (energyBand.shape[1]) // hopSize print('windowSize', windowSize) # ------------- prepare dataset Xs = [] ys = [] # Let's start with the music files for filename in files: # print(filename) matlabStruct = umatlab.loadmat(filename).get('data') energyBand = (matlabStruct.get('energyBand') / 120) + 1 # energyBand=(matlabStruct.get('residualEnergyBand')/120)+1 controlData = matlabStruct.get('controlData') controlNames = matlabStruct.get('controlNames') target = controlData[indexVel, :] / 150 # target=(controlData[indexForce,:]+0.2)/2 n_hops = (energyBand.shape[1]) // hopSize # print(n_frames_per_second, n_frames, frame_hops, n_hops) n_hops = int(n_hops) - 1 for hop_i in range(n_hops): # Creating our sliding window frames = energyBand[:, (hop_i * hopSize):(hop_i * hopSize + windowSize)] Xs.append(frames[..., np.newaxis]) # And then store the vel ys.append(target[(hop_i * hopSize):(hop_i * hopSize + windowSize)]) Xs = np.array(Xs) ys = np.array(ys) print(Xs.shape, ys.shape) ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], n_classes=0) #---------- create ConvNet tf.reset_default_graph() # Create the input to the network. This is a 4-dimensional tensor (batch_size, height(freq), widht(time), channels?)! # Recall that we are using sliding windows of our magnitudes (TODO): X = tf.placeholder(name='X', shape=(None, Xs.shape[1], Xs.shape[2], Xs.shape[3]), dtype=tf.float32) # Create the output to the network. This is our one hot encoding of 2 possible values (TODO)! Y = tf.placeholder(name='Y', shape=(None, windowSize), dtype=tf.float32) # TODO: Explore different numbers of layers, and sizes of the network n_filters = [9, 9, 9] # Now let's loop over our n_filters and create the deep convolutional neural network H = X for layer_i, n_filters_i in enumerate(n_filters): # Let's use the helper function to create our connection to the next layer: # TODO: explore changing the30 parameters here: H, W = utils.conv2d( H, n_filters_i, k_h=2, k_w=2, d_h=2, d_w=2, name=str(layer_i)) # And use a nonlinearity # TODO: explore changing the activation here: # H = tf.nn.relu(H) H = tf.nn.softplus(H) # H 4D tensor [batch, height, width, channels] # H=tf.nn.max_pool(value=H, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding='SAME', data_format='NHWC', name=None) # Just to check what's happening: print(H.get_shape().as_list()) # Connect the last convolutional layer to a fully connected network fc1, W = utils.linear(H, n_output=100, name="fcn1", activation=tf.nn.relu) # fc2, W = utils.linear(fc, n_output=50, name="fcn2", activation=tf.nn.relu) # fc3, W = utils.linear(fc2, n_output=10, name="fcn3", activation=tf.nn.relu) # And another fully connceted network, now with just n_classes outputs, the number of outputs Y_pred, W = utils.linear(fc1, n_output=windowSize, name="pred", activation=tf.nn.sigmoid) loss = tf.squared_difference(Y_pred, Y) cost = tf.reduce_mean(tf.reduce_sum(loss, 1)) learning_rate = 0.001 optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # predicted_y = tf.argmax(Y_pred,1) # actual_y = tf.argmax(Y,1) # correct_prediction = tf.equal(predicted_y, actual_y) # accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) #-----TRAIN ConvNet # Explore these parameters: (TODO) batch_size = 400 # Create a session and init! sess = tf.Session() saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) # Now iterate over our dataset n_epoch times n_epochs = 100 for epoch_i in range(n_epochs): print('Epoch: ', epoch_i) # Train this_cost = 0 its = 0 # Do our mini batches: for Xs_i, ys_i in ds.train.next_batch(batch_size): # Note here: we are running the optimizer so # that the network parameters train! this_cost += sess.run([cost, optimizer], feed_dict={ X: Xs_i, Y: ys_i})[0] its += 1 # print(this_cost / its) print('Training cost: ', this_cost / its) # Validation (see how the network does on unseen data). this_cost = 0 its = 0 # Do our mini batches: for Xs_i, ys_i in ds.valid.next_batch(batch_size): # Note here: we are NOT running the optimizer! # we only measure the accuracy! this_cost += sess.run(cost, feed_dict={ X: Xs_i, Y: ys_i}) # , keep_prob: 1.0 its += 1 print('Validation cost: ', this_cost / its) # #-----plot convolutional Kernels learned # g = tf.get_default_graph() # for layer_i in range(len(n_filters)): # W = sess.run(g.get_tensor_by_name('{}/W:0'.format(layer_i))) # plt.figure(figsize=(5, 5)) # plt.imshow(utils.montage_filters(W)) # plt.title('Layer {}\'s Learned Convolution Kernels'.format(layer_i)) modelFileName = './models/velocity_wL' + str(winLSecs) + '_' + datetime.datetime.now().strftime( "%Y%m_d_%H%M") + '.chkp' saver.save(sess, modelFileName)
data_file = "/Users/luke/ownCloud/deep_learning/course/final_project/fer2013.csv" labels, images = import_csv(data_file) assert (len(labels) == len(images)) #read in the images imgs = [] for image in images: imgs.append(np.fromstring(str(image), dtype=np.uint8, sep=' ')) Xs = imgs ys = labels Xs = np.array(imgs).astype(np.uint8) ys = np.array(ys).astype(np.uint8) #print(ys) assert (len(Xs) == len(ys)) ds = datasets.Dataset(Xs, ys, one_hot=True, split=[0.8, 0.1, 0.1]) for i in range(0, 10): ds.X[i].shape from tensorflow.python.framework.ops import reset_default_graph reset_default_graph() # We'll have placeholders just like before which we'll fill in later. n_input = 48 * 48 n_output = 7 ds_X_reshape = np.reshape(ds.X, (28709, 48, 48, 1)) ds_valid_images_reshape = np.reshape(ds.valid.images, (ds.valid.images.shape[0], 48, 48, 1))
ys.append(1) # Convert them to an array: Xs = np.array(Xs) ys = np.array(ys) print(Xs.shape, ys.shape) # Just to make sure you've done it right. If you've changed any of the # parameters of the dft/hop size, then this will fail. If that's what you # wanted to do, then don't worry about this assertion. assert (Xs.shape == (15360, 43, 256, 1) and ys.shape == (15360, )) n_observations, n_height, n_width, n_channels = Xs.shape ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], one_hot=True) Xs_i, ys_i = next(ds.train.next_batch()) # Notice the shape this returns. This will become the shape of our input and output of the network: print(Xs_i.shape, ys_i.shape) assert (ys_i.shape == (100, 2)) tf.reset_default_graph() # Create the input to the network. This is a 4-dimensional tensor! # Don't forget that we should use None as a shape for the first dimension # Recall that we are using sliding windows of our magnitudes (TODO): with tf.device('/gpu:0'): X = tf.placeholder(name='X', shape=[None, 43, 256, 1], dtype=tf.float32)