(int(img_raw_raw.shape[0] / args.scale), int(img_raw_raw.shape[1] / args.scale))) print img_raw_raw.shape, " ->>>>", img_raw.shape print "img_raw", img_raw.shape img, lab, count = getTrainingExampleCells(img_raw, framesize_w, framesize_h, labelPath, x, y, args.stride, args.scale) print "count", count markers = getMarkersCells(labelPath, args.scale, img_raw.shape[0:2]) markers = markers[y:y + framesize_h, x:x + framesize_w] count = getCellCountCells(markers, (0, 0, framesize_w, framesize_h)) print "count", count, 'markers max', markers.max() pcount = classify([img.transpose((2, 0, 1))], [0])[0] lab_est = [(l.sum() / ef).astype(np.int) for l in lab] pred_est = [(l.sum() / ef).astype(np.int) for l in pcount] print "img shape", img.shape print "label shape", lab.shape print "label est ", lab_est, " --> predicted est ", pred_est # In[18]: fig = plt.Figure(figsize=(18, 9), dpi=160) gcf = plt.gcf() gcf.set_size_inches(18, 15) fig.set_canvas(gcf.canvas)
def main(): # parse command line arguments args = parse_arguments() print "theano", theano.version.full_version print "lasagne", lasagne.__version__ job_id = os.environ.get('SLURM_JOB_ID') if job_id == None: job_id = os.environ.get('PBS_JOBID') print "job_id", job_id patch_size = 32 framesize = int(args.framesize / args.scale) framesize_h = framesize_w = framesize noutputs = 1 channels = 3 paramfilename = str(args.scale) + "-" + str( patch_size) + "-" + args.data + "-" + args.kern + str( args.cov) + "_params.p" datasetfilename = str(args.scale) + "-" + str(patch_size) + "-" + str( framesize) + "-" + args.kern + str( args.stride) + "-" + args.data + "-" + str(args.cov) + "-dataset.p" print paramfilename print datasetfilename random.seed(args.seed) np.random.seed(args.seed) lasagne.random.set_rng(np.random.RandomState(args.seed)) input_var = T.tensor4('inputs') input_var_ex = T.ivector('input_var_ex') input_shape = (None, channels, framesize, framesize) img = InputLayer(shape=input_shape, input_var=input_var[input_var_ex]) net = img net = ConvFactory(net, filter_size=3, num_filter=64, pad=patch_size) print net.output_shape net = SimpleFactory(net, 16, 16) print net.output_shape net = SimpleFactory(net, 16, 32) print net.output_shape net = ConvFactory(net, filter_size=14, num_filter=16) print net.output_shape net = SimpleFactory(net, 112, 48) print net.output_shape net = SimpleFactory(net, 64, 32) print net.output_shape net = SimpleFactory(net, 40, 40) print net.output_shape net = SimpleFactory(net, 32, 96) print net.output_shape net = ConvFactory(net, filter_size=18, num_filter=32) print net.output_shape net = ConvFactory(net, filter_size=1, pad=0, num_filter=64) print net.output_shape net = ConvFactory(net, filter_size=1, pad=0, num_filter=64) print net.output_shape net = ConvFactory(net, filter_size=1, num_filter=1, stride=args.stride) print net.output_shape output_shape = lasagne.layers.get_output_shape(net) real_input_shape = (None, input_shape[1], input_shape[2] + 2 * patch_size, input_shape[3] + 2 * patch_size) print "real_input_shape:", real_input_shape, "-> output_shape:", output_shape print "network output size should be", (input_shape[2] + 2 * patch_size) - (patch_size) if (args.kern == "sq"): ef = ((patch_size / args.stride)**2.0) elif (args.kern == "gaus"): ef = 1.0 print "ef", ef prediction = lasagne.layers.get_output(net, deterministic=True) prediction_count = (prediction / ef).sum(axis=(2, 3)) classify = theano.function([input_var, input_var_ex], prediction) train_start_time = time.time() print classify( np.zeros((1, channels, framesize, framesize), dtype=theano.config.floatX), [0]).shape print time.time() - train_start_time, "sec" train_start_time = time.time() print classify( np.zeros((1, channels, framesize, framesize), dtype=theano.config.floatX), [0]).shape print time.time() - train_start_time, "sec" imgs = [] for filename in glob.iglob(args.data + "/*dots.png"): imgg = filename.replace("dots", "cell") imgs.append([imgg, filename]) if len(imgs) == 0: print "Issue with dataset" sys.exit() ## code to debug data generation plt.rcParams['figure.figsize'] = (18, 9) imgPath, labelPath, x, y = imgs[9][0], imgs[9][1], 0, 0 #imgPath,labelPath,x,y = imgs[0][0], imgs[0][1], 100,200 print imgPath, labelPath im = imread(imgPath) img_raw_raw = im #grayscale img_raw = scipy.misc.imresize(img_raw_raw, (int(img_raw_raw.shape[0] / args.scale), int(img_raw_raw.shape[1] / args.scale))) print img_raw_raw.shape, " ->>>>", img_raw.shape print "img_raw", img_raw.shape img, lab, count = getTrainingExampleCells(args, img_raw, framesize_w, framesize_h, labelPath, x, y, args.stride, args.scale) print "count", count markers = getMarkersCells(labelPath, args.scale, img_raw.shape[0:2]) markers = markers[y:y + framesize_h, x:x + framesize_w] count = getCellCountCells(markers, (0, 0, framesize_w, framesize_h)) print "count", count, 'markers max', markers.max() pcount = classify([img.transpose((2, 0, 1))], [0])[0] lab_est = [(l.sum() / ef).astype(np.int) for l in lab] pred_est = [(l.sum() / ef).astype(np.int) for l in pcount] print "img shape", img.shape print "label shape", lab.shape print "label est ", lab_est, " --> predicted est ", pred_est # In[18]: fig = plt.Figure(figsize=(18, 9), dpi=160) gcf = plt.gcf() gcf.set_size_inches(18, 15) fig.set_canvas(gcf.canvas) ax2 = plt.subplot2grid((2, 4), (0, 0), colspan=2) ax3 = plt.subplot2grid((2, 4), (0, 2), colspan=3) ax4 = plt.subplot2grid((2, 4), (1, 2), colspan=3) ax5 = plt.subplot2grid((2, 4), (1, 0), rowspan=1) ax6 = plt.subplot2grid((2, 4), (1, 1), rowspan=1) ax2.set_title("Input Image") ax2.imshow(img, interpolation='none', cmap='Greys_r') ax3.set_title("Regression target, {}x{} sliding window.".format( patch_size, patch_size)) ax3.imshow(np.concatenate((lab), axis=1), interpolation='none') #ax3.imshow(lab[0], interpolation='none') ax4.set_title("Predicted counts") ax4.imshow(np.concatenate((pcount), axis=1), interpolation='none') ax5.set_title("Real " + str(lab_est)) ax5.set_ylim((0, np.max(lab_est) * 2)) ax5.set_xticks(np.arange(0, noutputs, 1.0)) ax5.bar(range(noutputs), lab_est, align='center') ax6.set_title("Pred " + str(pred_est)) ax6.set_ylim((0, np.max(lab_est) * 2)) ax6.set_xticks(np.arange(0, noutputs, 1.0)) ax6.bar(range(noutputs), pred_est, align='center') img_pad = np.asarray([ np.pad(img[:, :, i], (patch_size - 1) / 2, "constant", constant_values=255) for i in range(img[0, 0].shape[0]) ]) img_pad = img_pad.transpose((1, 2, 0)) plt.imshow(img_pad) plt.imshow(lab[0], alpha=0.5) for path in imgs: if (not os.path.isfile(path[0])): print path, "bad", path[0] if (not os.path.isfile(path[1])): print path, "bad", path[1] dataset = [] if (os.path.isfile(datasetfilename)): print "reading", datasetfilename dataset = pickle.load(open(datasetfilename, "rb")) else: dataset_x = [] dataset_y = [] dataset_c = [] print len(imgs) for path in imgs: imgPath = path[0] print imgPath im = imread(imgPath) img_raw_raw = im img_raw = scipy.misc.imresize( img_raw_raw, (int(img_raw_raw.shape[0] / args.scale), int(img_raw_raw.shape[1] / args.scale))) print img_raw_raw.shape, " ->>>>", img_raw.shape labelPath = path[1] for base_x in range(0, img_raw.shape[0], framesize_h): for base_y in range(0, img_raw.shape[1], framesize_w): if (img_raw.shape[1] - base_y < framesize_w) or ( img_raw.shape[0] - base_x < framesize_h): print "!!!! Not adding image because size is", img_raw.shape[ 1] - base_y, img_raw.shape[0] - base_x continue img, lab, count = getTrainingExampleCells( args, img_raw, framesize_w, framesize_h, labelPath, base_y, base_x, args.stride, args.scale) print "count ", count if img.shape[0:2] != (framesize_w, framesize_h): print "!!!! Not adding image because size is", img.shape[ 0:2] else: lab_est = [(l.sum() / ef).astype(np.int) for l in lab] assert np.allclose(count, lab_est, 0) dataset.append((img, lab, count)) print "lab_est", lab_est, "img shape", img.shape, "label shape", lab.shape sys.stdout.flush() print "dataset size", len(dataset) print "writing", datasetfilename out = open(datasetfilename, "wb", 0) pickle.dump(dataset, out) out.close() print "DONE" # %matplotlib inline # plt.rcParams['figure.figsize'] = (18, 9) # plt.imshow(lab[0]) #np_dataset = np.asarray(dataset) np.random.shuffle(dataset) np_dataset_x = np.asarray([d[0] for d in dataset], dtype=theano.config.floatX) np_dataset_y = np.asarray([d[1] for d in dataset], dtype=theano.config.floatX) np_dataset_c = np.asarray([d[2] for d in dataset], dtype=theano.config.floatX) np_dataset_x = np_dataset_x.transpose((0, 3, 1, 2)) print "np_dataset_x", np_dataset_x.shape print "np_dataset_y", np_dataset_y.shape print "np_dataset_c", np_dataset_c.shape length = len(np_dataset_x) n = args.nsamples np_dataset_x_train = np_dataset_x[0:n] np_dataset_y_train = np_dataset_y[0:n] np_dataset_c_train = np_dataset_c[0:n] print "np_dataset_x_train", len(np_dataset_x_train) np_dataset_x_valid = np_dataset_x[n:2 * n] np_dataset_y_valid = np_dataset_y[n:2 * n] np_dataset_c_valid = np_dataset_c[n:2 * n] print "np_dataset_x_valid", len(np_dataset_x_valid) np_dataset_x_test = np_dataset_x[-100:] np_dataset_y_test = np_dataset_y[-100:] np_dataset_c_test = np_dataset_c[-100:] print "np_dataset_x_test", len(np_dataset_x_test) # In[25]: print "number of counts total ", np_dataset_c.sum() print "number of counts on average ", np_dataset_c.mean( ), "+-", np_dataset_c.std() print "counts min:", np_dataset_c.min(), "max:", np_dataset_c.max() plt.rcParams['figure.figsize'] = (15, 5) plt.title("Example images") plt.imshow(np.concatenate(np_dataset_x_train[:5].astype( np.uint8).transpose((0, 2, 3, 1)), axis=1), interpolation='none') # In[27]: plt.title("Example images") plt.imshow(np.concatenate(np_dataset_y_train[:5, 0], axis=1), interpolation='none') # In[28]: plt.rcParams['figure.figsize'] = (15, 5) plt.title("Counts in each image") plt.bar(range(len(np_dataset_c_train)), np_dataset_c_train) # In[29]: print "Total cells in training", np.sum(np_dataset_c_train[0:], axis=0) print "Total cells in validation", np.sum(np_dataset_c_valid[0:], axis=0) print "Total cells in testing", np.sum(np_dataset_c_test[0:], axis=0) #to make video: ffmpeg -i images-cell/image-0-%d-cell.png -vcodec libx264 aout.mp4 directory = "network-temp/" ext = "countception.p" if not os.path.exists(directory): os.makedirs(directory) print "Random performance" print test_perf(np_dataset_x_train, np_dataset_y_train, np_dataset_c_train) print test_perf(np_dataset_x_valid, np_dataset_y_valid, np_dataset_c_valid) print test_perf(np_dataset_x_test, np_dataset_y_test, np_dataset_c_test) target_var = T.tensor4('target') lr = theano.shared(np.array(0.0, dtype=theano.config.floatX)) #Mean Absolute Error is computed between each count of the count map l1_loss = T.abs_(prediction - target_var[input_var_ex]) #Mean Absolute Error is computed for the overall image prediction prediction_count2 = (prediction / ef).sum(axis=(2, 3)) mae_loss = T.abs_(prediction_count2 - (target_var[input_var_ex] / ef).sum(axis=(2, 3))) loss = l1_loss.mean() params = lasagne.layers.get_all_params(net, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=lr) train_fn = theano.function([input_var_ex], [loss, mae_loss], updates=updates, givens={ input_var: np_dataset_x_train, target_var: np_dataset_y_train }) print "DONE compiling theano functons" lr.set_value(args.lr) best_valid_err = 99999999 best_test_err = 99999999 epoch = 0 # In[37]: batch_size = 2 print "batch_size", batch_size print "lr", lr.eval() datasetlength = len(np_dataset_x_train) print "datasetlength", datasetlength for epoch in range(epoch, 1000): start_time = time.time() epoch_err_pix = [] epoch_err_pred = [] todo = range(datasetlength) for i in range(0, datasetlength, batch_size): ex = todo[i:i + batch_size] train_start_time = time.time() err_pix, err_pred = train_fn(ex) train_elapsed_time = time.time() - train_start_time epoch_err_pix.append(err_pix) epoch_err_pred.append(err_pred) valid_pix_err, valid_err = test_perf(np_dataset_x_valid, np_dataset_y_valid, np_dataset_c_valid) # a threshold is used to reduce processing when we are far from the goal if (valid_err < 10 and valid_err < best_valid_err): best_valid_err = valid_err best_test_err = test_perf(np_dataset_x_test, np_dataset_y_test, np_dataset_c_test) print "OOO best test (err_pix, err_pred)", best_test_err, ",epoch", epoch save_network(net, "best_valid_err" + job_id) elapsed_time = time.time() - start_time err = np.mean(epoch_err_pix) acc = np.mean(np.concatenate(epoch_err_pred)) if epoch % 5 == 0: print "#" + str(epoch) + "#(err_pix:" + str(np.around( err, 3)) + ",err_pred:" + str(np.around( acc, 3)) + "),valid(err_pix:" + str( np.around(valid_pix_err, 3)) + ",err_pred:" + str( np.around(valid_err, 3)) + "),(time:" + str( np.around(elapsed_time, 3)) + "sec)" #visualize training #processImages(str(epoch) + '-cell',0) print "#####", "best_test_acc", best_test_err, args print "Done" #load best network load_network(net, "best_valid_err" + job_id) plt.rcParams['figure.figsize'] = (15, 5) plt.title("Training Data") pcounts = compute_counts(np_dataset_x_train) plt.bar(np.arange(len(np_dataset_c_train)) - 0.1, np_dataset_c_train, width=0.5, label="Real Count") plt.bar(np.arange(len(np_dataset_c_train)) + 0.1, pcounts, width=0.5, label="Predicted Count") plt.tight_layout() plt.legend() plt.rcParams['figure.figsize'] = (15, 5) plt.title("Valid Data") pcounts = compute_counts(np_dataset_x_valid) plt.bar(np.arange(len(np_dataset_c_valid)) - 0.1, np_dataset_c_valid, width=0.5, label="Real Count") plt.bar(np.arange(len(np_dataset_c_valid)) + 0.1, pcounts, width=0.5, label="Predicted Count") plt.tight_layout() plt.legend() plt.rcParams['figure.figsize'] = (15, 5) plt.title("Test Data") pcounts = compute_counts(np_dataset_x_test) plt.bar(np.arange(len(np_dataset_c_test)) - 0.1, np_dataset_c_test, width=0.5, label="Real Count") plt.bar(np.arange(len(np_dataset_c_test)) + 0.1, pcounts, width=0.5, label="Predicted Count") plt.tight_layout() plt.legend() # process images processImages('test', 0) processImages('test', 1) processImages('test', 2) processImages('test', 3) processImages('test', 4) processImages('test', 5) processImages('test', 10)