Exemplo n.º 1
0
                              (int(img_raw_raw.shape[0] / args.scale),
                               int(img_raw_raw.shape[1] / args.scale)))
print img_raw_raw.shape, " ->>>>", img_raw.shape

print "img_raw", img_raw.shape
img, lab, count = getTrainingExampleCells(img_raw, framesize_w, framesize_h,
                                          labelPath, x, y, args.stride,
                                          args.scale)
print "count", count

markers = getMarkersCells(labelPath, args.scale, img_raw.shape[0:2])
markers = markers[y:y + framesize_h, x:x + framesize_w]
count = getCellCountCells(markers, (0, 0, framesize_w, framesize_h))
print "count", count, 'markers max', markers.max()

pcount = classify([img.transpose((2, 0, 1))], [0])[0]

lab_est = [(l.sum() / ef).astype(np.int) for l in lab]
pred_est = [(l.sum() / ef).astype(np.int) for l in pcount]

print "img shape", img.shape
print "label shape", lab.shape
print "label est ", lab_est, " --> predicted est ", pred_est

# In[18]:

fig = plt.Figure(figsize=(18, 9), dpi=160)
gcf = plt.gcf()
gcf.set_size_inches(18, 15)
fig.set_canvas(gcf.canvas)
Exemplo n.º 2
0
def main():
    # parse command line arguments
    args = parse_arguments()

    print "theano", theano.version.full_version
    print "lasagne", lasagne.__version__

    job_id = os.environ.get('SLURM_JOB_ID')

    if job_id == None:
        job_id = os.environ.get('PBS_JOBID')

    print "job_id", job_id

    patch_size = 32
    framesize = int(args.framesize / args.scale)
    framesize_h = framesize_w = framesize
    noutputs = 1
    channels = 3

    paramfilename = str(args.scale) + "-" + str(
        patch_size) + "-" + args.data + "-" + args.kern + str(
            args.cov) + "_params.p"
    datasetfilename = str(args.scale) + "-" + str(patch_size) + "-" + str(
        framesize) + "-" + args.kern + str(
            args.stride) + "-" + args.data + "-" + str(args.cov) + "-dataset.p"
    print paramfilename
    print datasetfilename

    random.seed(args.seed)
    np.random.seed(args.seed)
    lasagne.random.set_rng(np.random.RandomState(args.seed))

    input_var = T.tensor4('inputs')
    input_var_ex = T.ivector('input_var_ex')

    input_shape = (None, channels, framesize, framesize)
    img = InputLayer(shape=input_shape, input_var=input_var[input_var_ex])
    net = img

    net = ConvFactory(net, filter_size=3, num_filter=64, pad=patch_size)
    print net.output_shape
    net = SimpleFactory(net, 16, 16)
    print net.output_shape
    net = SimpleFactory(net, 16, 32)
    print net.output_shape
    net = ConvFactory(net, filter_size=14, num_filter=16)
    print net.output_shape
    net = SimpleFactory(net, 112, 48)
    print net.output_shape
    net = SimpleFactory(net, 64, 32)
    print net.output_shape
    net = SimpleFactory(net, 40, 40)
    print net.output_shape
    net = SimpleFactory(net, 32, 96)
    print net.output_shape
    net = ConvFactory(net, filter_size=18, num_filter=32)
    print net.output_shape
    net = ConvFactory(net, filter_size=1, pad=0, num_filter=64)
    print net.output_shape
    net = ConvFactory(net, filter_size=1, pad=0, num_filter=64)
    print net.output_shape
    net = ConvFactory(net, filter_size=1, num_filter=1, stride=args.stride)
    print net.output_shape

    output_shape = lasagne.layers.get_output_shape(net)
    real_input_shape = (None, input_shape[1], input_shape[2] + 2 * patch_size,
                        input_shape[3] + 2 * patch_size)
    print "real_input_shape:", real_input_shape, "-> output_shape:", output_shape

    print "network output size should be", (input_shape[2] +
                                            2 * patch_size) - (patch_size)

    if (args.kern == "sq"):
        ef = ((patch_size / args.stride)**2.0)
    elif (args.kern == "gaus"):
        ef = 1.0
    print "ef", ef

    prediction = lasagne.layers.get_output(net, deterministic=True)
    prediction_count = (prediction / ef).sum(axis=(2, 3))

    classify = theano.function([input_var, input_var_ex], prediction)
    train_start_time = time.time()
    print classify(
        np.zeros((1, channels, framesize, framesize),
                 dtype=theano.config.floatX), [0]).shape
    print time.time() - train_start_time, "sec"

    train_start_time = time.time()
    print classify(
        np.zeros((1, channels, framesize, framesize),
                 dtype=theano.config.floatX), [0]).shape
    print time.time() - train_start_time, "sec"

    imgs = []
    for filename in glob.iglob(args.data + "/*dots.png"):
        imgg = filename.replace("dots", "cell")
        imgs.append([imgg, filename])
    if len(imgs) == 0:
        print "Issue with dataset"
        sys.exit()

    ## code to debug data generation

    plt.rcParams['figure.figsize'] = (18, 9)
    imgPath, labelPath, x, y = imgs[9][0], imgs[9][1], 0, 0
    #imgPath,labelPath,x,y = imgs[0][0], imgs[0][1], 100,200

    print imgPath, labelPath

    im = imread(imgPath)
    img_raw_raw = im  #grayscale

    img_raw = scipy.misc.imresize(img_raw_raw,
                                  (int(img_raw_raw.shape[0] / args.scale),
                                   int(img_raw_raw.shape[1] / args.scale)))
    print img_raw_raw.shape, " ->>>>", img_raw.shape

    print "img_raw", img_raw.shape
    img, lab, count = getTrainingExampleCells(args, img_raw, framesize_w,
                                              framesize_h, labelPath, x, y,
                                              args.stride, args.scale)
    print "count", count

    markers = getMarkersCells(labelPath, args.scale, img_raw.shape[0:2])
    markers = markers[y:y + framesize_h, x:x + framesize_w]
    count = getCellCountCells(markers, (0, 0, framesize_w, framesize_h))
    print "count", count, 'markers max', markers.max()

    pcount = classify([img.transpose((2, 0, 1))], [0])[0]

    lab_est = [(l.sum() / ef).astype(np.int) for l in lab]
    pred_est = [(l.sum() / ef).astype(np.int) for l in pcount]

    print "img shape", img.shape
    print "label shape", lab.shape
    print "label est ", lab_est, " --> predicted est ", pred_est

    # In[18]:

    fig = plt.Figure(figsize=(18, 9), dpi=160)
    gcf = plt.gcf()
    gcf.set_size_inches(18, 15)
    fig.set_canvas(gcf.canvas)

    ax2 = plt.subplot2grid((2, 4), (0, 0), colspan=2)
    ax3 = plt.subplot2grid((2, 4), (0, 2), colspan=3)
    ax4 = plt.subplot2grid((2, 4), (1, 2), colspan=3)
    ax5 = plt.subplot2grid((2, 4), (1, 0), rowspan=1)
    ax6 = plt.subplot2grid((2, 4), (1, 1), rowspan=1)

    ax2.set_title("Input Image")
    ax2.imshow(img, interpolation='none', cmap='Greys_r')
    ax3.set_title("Regression target, {}x{} sliding window.".format(
        patch_size, patch_size))
    ax3.imshow(np.concatenate((lab), axis=1), interpolation='none')
    #ax3.imshow(lab[0], interpolation='none')

    ax4.set_title("Predicted counts")
    ax4.imshow(np.concatenate((pcount), axis=1), interpolation='none')

    ax5.set_title("Real " + str(lab_est))
    ax5.set_ylim((0, np.max(lab_est) * 2))
    ax5.set_xticks(np.arange(0, noutputs, 1.0))
    ax5.bar(range(noutputs), lab_est, align='center')
    ax6.set_title("Pred " + str(pred_est))
    ax6.set_ylim((0, np.max(lab_est) * 2))
    ax6.set_xticks(np.arange(0, noutputs, 1.0))
    ax6.bar(range(noutputs), pred_est, align='center')

    img_pad = np.asarray([
        np.pad(img[:, :, i], (patch_size - 1) / 2,
               "constant",
               constant_values=255) for i in range(img[0, 0].shape[0])
    ])
    img_pad = img_pad.transpose((1, 2, 0))
    plt.imshow(img_pad)
    plt.imshow(lab[0], alpha=0.5)

    for path in imgs:
        if (not os.path.isfile(path[0])):
            print path, "bad", path[0]
        if (not os.path.isfile(path[1])):
            print path, "bad", path[1]

    dataset = []
    if (os.path.isfile(datasetfilename)):
        print "reading", datasetfilename
        dataset = pickle.load(open(datasetfilename, "rb"))
    else:
        dataset_x = []
        dataset_y = []
        dataset_c = []
        print len(imgs)
        for path in imgs:

            imgPath = path[0]
            print imgPath

            im = imread(imgPath)
            img_raw_raw = im
            img_raw = scipy.misc.imresize(
                img_raw_raw, (int(img_raw_raw.shape[0] / args.scale),
                              int(img_raw_raw.shape[1] / args.scale)))
            print img_raw_raw.shape, " ->>>>", img_raw.shape

            labelPath = path[1]
            for base_x in range(0, img_raw.shape[0], framesize_h):
                for base_y in range(0, img_raw.shape[1], framesize_w):
                    if (img_raw.shape[1] - base_y < framesize_w) or (
                            img_raw.shape[0] - base_x < framesize_h):
                        print "!!!! Not adding image because size is", img_raw.shape[
                            1] - base_y, img_raw.shape[0] - base_x
                        continue
                    img, lab, count = getTrainingExampleCells(
                        args, img_raw, framesize_w, framesize_h, labelPath,
                        base_y, base_x, args.stride, args.scale)
                    print "count ", count
                    if img.shape[0:2] != (framesize_w, framesize_h):
                        print "!!!! Not adding image because size is", img.shape[
                            0:2]
                    else:
                        lab_est = [(l.sum() / ef).astype(np.int) for l in lab]
                        assert np.allclose(count, lab_est, 0)
                        dataset.append((img, lab, count))
                        print "lab_est", lab_est, "img shape", img.shape, "label shape", lab.shape
                        sys.stdout.flush()
            print "dataset size", len(dataset)
        print "writing", datasetfilename
        out = open(datasetfilename, "wb", 0)
        pickle.dump(dataset, out)
        out.close()
    print "DONE"

    # %matplotlib inline
    # plt.rcParams['figure.figsize'] = (18, 9)
    # plt.imshow(lab[0])

    #np_dataset = np.asarray(dataset)

    np.random.shuffle(dataset)

    np_dataset_x = np.asarray([d[0] for d in dataset],
                              dtype=theano.config.floatX)
    np_dataset_y = np.asarray([d[1] for d in dataset],
                              dtype=theano.config.floatX)
    np_dataset_c = np.asarray([d[2] for d in dataset],
                              dtype=theano.config.floatX)

    np_dataset_x = np_dataset_x.transpose((0, 3, 1, 2))

    print "np_dataset_x", np_dataset_x.shape
    print "np_dataset_y", np_dataset_y.shape
    print "np_dataset_c", np_dataset_c.shape

    length = len(np_dataset_x)

    n = args.nsamples

    np_dataset_x_train = np_dataset_x[0:n]
    np_dataset_y_train = np_dataset_y[0:n]
    np_dataset_c_train = np_dataset_c[0:n]
    print "np_dataset_x_train", len(np_dataset_x_train)

    np_dataset_x_valid = np_dataset_x[n:2 * n]
    np_dataset_y_valid = np_dataset_y[n:2 * n]
    np_dataset_c_valid = np_dataset_c[n:2 * n]
    print "np_dataset_x_valid", len(np_dataset_x_valid)

    np_dataset_x_test = np_dataset_x[-100:]
    np_dataset_y_test = np_dataset_y[-100:]
    np_dataset_c_test = np_dataset_c[-100:]
    print "np_dataset_x_test", len(np_dataset_x_test)

    # In[25]:

    print "number of counts total ", np_dataset_c.sum()
    print "number of counts on average ", np_dataset_c.mean(
    ), "+-", np_dataset_c.std()
    print "counts min:", np_dataset_c.min(), "max:", np_dataset_c.max()

    plt.rcParams['figure.figsize'] = (15, 5)
    plt.title("Example images")
    plt.imshow(np.concatenate(np_dataset_x_train[:5].astype(
        np.uint8).transpose((0, 2, 3, 1)),
                              axis=1),
               interpolation='none')

    # In[27]:

    plt.title("Example images")
    plt.imshow(np.concatenate(np_dataset_y_train[:5, 0], axis=1),
               interpolation='none')

    # In[28]:

    plt.rcParams['figure.figsize'] = (15, 5)
    plt.title("Counts in each image")
    plt.bar(range(len(np_dataset_c_train)), np_dataset_c_train)

    # In[29]:

    print "Total cells in training", np.sum(np_dataset_c_train[0:], axis=0)
    print "Total cells in validation", np.sum(np_dataset_c_valid[0:], axis=0)
    print "Total cells in testing", np.sum(np_dataset_c_test[0:], axis=0)

    #to make video: ffmpeg -i images-cell/image-0-%d-cell.png -vcodec libx264 aout.mp4

    directory = "network-temp/"
    ext = "countception.p"

    if not os.path.exists(directory):
        os.makedirs(directory)

    print "Random performance"
    print test_perf(np_dataset_x_train, np_dataset_y_train, np_dataset_c_train)
    print test_perf(np_dataset_x_valid, np_dataset_y_valid, np_dataset_c_valid)
    print test_perf(np_dataset_x_test, np_dataset_y_test, np_dataset_c_test)

    target_var = T.tensor4('target')
    lr = theano.shared(np.array(0.0, dtype=theano.config.floatX))

    #Mean Absolute Error is computed between each count of the count map
    l1_loss = T.abs_(prediction - target_var[input_var_ex])

    #Mean Absolute Error is computed for the overall image prediction
    prediction_count2 = (prediction / ef).sum(axis=(2, 3))
    mae_loss = T.abs_(prediction_count2 -
                      (target_var[input_var_ex] / ef).sum(axis=(2, 3)))

    loss = l1_loss.mean()

    params = lasagne.layers.get_all_params(net, trainable=True)
    updates = lasagne.updates.adam(loss, params, learning_rate=lr)

    train_fn = theano.function([input_var_ex], [loss, mae_loss],
                               updates=updates,
                               givens={
                                   input_var: np_dataset_x_train,
                                   target_var: np_dataset_y_train
                               })

    print "DONE compiling theano functons"

    lr.set_value(args.lr)
    best_valid_err = 99999999
    best_test_err = 99999999
    epoch = 0

    # In[37]:

    batch_size = 2

    print "batch_size", batch_size
    print "lr", lr.eval()

    datasetlength = len(np_dataset_x_train)
    print "datasetlength", datasetlength

    for epoch in range(epoch, 1000):
        start_time = time.time()

        epoch_err_pix = []
        epoch_err_pred = []
        todo = range(datasetlength)
        for i in range(0, datasetlength, batch_size):
            ex = todo[i:i + batch_size]

            train_start_time = time.time()
            err_pix, err_pred = train_fn(ex)
            train_elapsed_time = time.time() - train_start_time

            epoch_err_pix.append(err_pix)
            epoch_err_pred.append(err_pred)

        valid_pix_err, valid_err = test_perf(np_dataset_x_valid,
                                             np_dataset_y_valid,
                                             np_dataset_c_valid)

        # a threshold is used to reduce processing when we are far from the goal
        if (valid_err < 10 and valid_err < best_valid_err):
            best_valid_err = valid_err
            best_test_err = test_perf(np_dataset_x_test, np_dataset_y_test,
                                      np_dataset_c_test)
            print "OOO best test (err_pix, err_pred)", best_test_err, ",epoch", epoch
            save_network(net, "best_valid_err" + job_id)

        elapsed_time = time.time() - start_time
        err = np.mean(epoch_err_pix)
        acc = np.mean(np.concatenate(epoch_err_pred))
        if epoch % 5 == 0:
            print "#" + str(epoch) + "#(err_pix:" + str(np.around(
                err, 3)) + ",err_pred:" + str(np.around(
                    acc, 3)) + "),valid(err_pix:" + str(
                        np.around(valid_pix_err, 3)) + ",err_pred:" + str(
                            np.around(valid_err, 3)) + "),(time:" + str(
                                np.around(elapsed_time, 3)) + "sec)"

        #visualize training
        #processImages(str(epoch) + '-cell',0)

    print "#####", "best_test_acc", best_test_err, args

    print "Done"

    #load best network
    load_network(net, "best_valid_err" + job_id)

    plt.rcParams['figure.figsize'] = (15, 5)
    plt.title("Training Data")

    pcounts = compute_counts(np_dataset_x_train)
    plt.bar(np.arange(len(np_dataset_c_train)) - 0.1,
            np_dataset_c_train,
            width=0.5,
            label="Real Count")
    plt.bar(np.arange(len(np_dataset_c_train)) + 0.1,
            pcounts,
            width=0.5,
            label="Predicted Count")
    plt.tight_layout()
    plt.legend()

    plt.rcParams['figure.figsize'] = (15, 5)
    plt.title("Valid Data")

    pcounts = compute_counts(np_dataset_x_valid)
    plt.bar(np.arange(len(np_dataset_c_valid)) - 0.1,
            np_dataset_c_valid,
            width=0.5,
            label="Real Count")
    plt.bar(np.arange(len(np_dataset_c_valid)) + 0.1,
            pcounts,
            width=0.5,
            label="Predicted Count")
    plt.tight_layout()
    plt.legend()

    plt.rcParams['figure.figsize'] = (15, 5)
    plt.title("Test Data")

    pcounts = compute_counts(np_dataset_x_test)
    plt.bar(np.arange(len(np_dataset_c_test)) - 0.1,
            np_dataset_c_test,
            width=0.5,
            label="Real Count")
    plt.bar(np.arange(len(np_dataset_c_test)) + 0.1,
            pcounts,
            width=0.5,
            label="Predicted Count")
    plt.tight_layout()
    plt.legend()

    # process images
    processImages('test', 0)
    processImages('test', 1)
    processImages('test', 2)
    processImages('test', 3)
    processImages('test', 4)
    processImages('test', 5)
    processImages('test', 10)