示例#1
0
文件: from_svs.py 项目: nathanin/milk
def main(args):
    ## Read in the slides
    slides = []
    with open(args.slides, 'r') as f:
        for line in f:
            lstrip = line.strip()
            if os.path.exists(lstrip):
                slides.append(lstrip)
            else:
                print(lstrip)
    print('Working with {} slides'.format(len(slides)))

    ## Initialize the dataset; store the metadata file
    with open(args.meta_file, 'r') as f:
        lines = [line for line in f]

    ## Read in the labels
    attrs = ['case_id', 'stage_str', 'stage_code']
    labels = read_labels(args.labels, attrs)
    print('Working with labels:', len(labels))

    ## Create the dataset
    meta_string = ''.join(lines)
    if not os.path.exists(args.data_h5):
        create_dataset(args.data_h5, meta_string)
    else:
        print('HDF5 {} already exists'.format(args.data_h5))

    ## Load the dataset
    mildataset = MILDataset(args.data_h5, meta_string)

    # slides = [s for s in slides if s not in mildataset.data_group_names]
    for i, src in enumerate(slides):
        print('\n[\t{}/{}\t]'.format(i, len(slides)))
        print('File {:04d} {} --> {}'.format(i, src, args.ramdisk))
        basename = os.path.splitext(os.path.basename(src))[0]

        try:
            lab = labels[basename]
            print(basename, lab)
            if lab[-1] > 1:
                print('Skipping unused labels')
                continue
        except Exception as e:
            print('basename {} no labels.'.format(basename))
            traceback.print_tb(e.__traceback__)

        rdsrc = cpramdisk(src, args.ramdisk)
        try:
            slide = Slide(rdsrc, args)
            tile_stack = stack_tiles(slide, args)
            mildataset.new_dataset(basename, tile_stack, attrs, lab)

        except Exception as e:
            print('Breaking')
            traceback.print_tb(e.__traceback__)

        finally:
            print('Removing {}'.format(rdsrc))
            os.remove(rdsrc)
示例#2
0
def main(args):
    with open(args.lst, 'r') as f:
        srclist = [x.strip() for x in f]

    for src in srclist:

        dst = repext(src, args.suffix)
        if os.path.exists(dst):
            print('Exists', src, '-->', dst)
            continue

        # Loading data from ramdisk incurs a one-time copy cost
        rdsrc = cpramdisk(src, args.ramdisk)
        print('File:', rdsrc)

        try:
            slide = Slide(src, args)
            slide.initialize_output('wsi',
                                    3,
                                    mode='full',
                                    compute_fn=compute_fn)
            ret = slide.compute('wsi', args)
            print('Saving {} --> {}'.format(ret.shape, dst))
            cv2.imwrite(dst, ret)
        except Exception as e:
            traceback.print_tb(e.__traceback__)
        finally:
            print('Removing {}'.format(rdsrc))
            os.remove(rdsrc)
示例#3
0
def main(args):
    # Define a compute_fn that should do three things:
    # 1. define an iterator over the slide's tiles
    # 2. compute an output with given model parameter
    # 3.

    if args.iter_type == 'python':

        def compute_fn(slide, args, model=None):
            print('Slide with {}'.format(len(slide.tile_list)))
            it_factory = PythonIterator(slide, args)
            for k, (img, idx) in enumerate(it_factory.yield_batch()):
                prob = model(img)
                if k % 50 == 0:
                    print('Batch #{:04d} idx:{} img:{} prob:{}'.format(
                        k, idx.shape, img.shape, prob.shape))
                slide.place_batch(prob, idx, 'prob', mode='tile')
            ret = slide.output_imgs['prob']
            return ret

    # Tensorflow multithreaded queue-based iterator (in eager mode)
    elif args.iter_type == 'tf':

        def compute_fn(slide, args, model=None):
            assert tf.executing_eagerly()
            print('Slide with {}'.format(len(slide.tile_list)))

            # In eager mode, we return a tf.contrib.eager.Iterator
            eager_iterator = TensorflowIterator(slide, args).make_iterator()

            # The iterator can be used directly. Ququeing and multithreading
            # are handled in the backend by the tf.data.Dataset ops
            features, indices = [], []
            for k, (img, idx) in enumerate(eager_iterator):
                # img = tf.expand_dims(img, axis=0)
                features.append(
                    model.encode_bag(img, training=False, return_z=True))
                indices.append(idx.numpy())

                img, idx = img.numpy(), idx.numpy()
                if k % 50 == 0:
                    print('Batch #{:04d}\t{}'.format(k, img.shape))

            features = tf.concat(features, axis=0)
            z_att, att = model.mil_attention(features,
                                             training=False,
                                             return_raw_att=True)
            att = np.squeeze(att)
            indices = np.concatenate(indices)
            slide.place_batch(att, indices, 'att', mode='tile')
            ret = slide.output_imgs['att']
            return ret

    # Set up the model first
    encoder_args = get_encoder_args(args.encoder)
    model = MilkEager(encoder_args=encoder_args,
                      mil_type=args.mil,
                      deep_classifier=args.deep_classifier,
                      batch_size=args.batchsize,
                      temperature=args.temperature,
                      heads=args.heads)

    x = tf.zeros((1, 1, args.process_size, args.process_size, 3))
    _ = model(x, verbose=True, head='all', training=True)
    model.load_weights(args.snapshot, by_name=True)

    # keras Model subclass
    model.summary()

    # Read list of inputs
    with open(args.slides, 'r') as f:
        slides = [x.strip() for x in f]

    # Loop over slides
    for src in slides:
        # Dirty substitution of the file extension give us the
        # destination. Do this first so we can just skip the slide
        # if this destination already exists.
        # Set the --suffix option to reflect the model / type of processed output
        dst = repext(src, args.suffix)

        # Loading data from ramdisk incurs a one-time copy cost
        rdsrc = cpramdisk(src, args.ramdisk)
        print('File:', rdsrc)

        # Wrapped inside of a try-except-finally.
        # We want to make sure the slide gets cleaned from
        # memory in case there's an error or stop signal in the
        # middle of processing.
        try:
            # Initialze the side from our temporary path, with
            # the arguments passed in from command-line.
            # This returns an svsutils.Slide object
            slide = Slide(rdsrc, args)

            # This step will eventually be included in slide creation
            # with some default compute_fn's provided by svsutils
            # For now, do it case-by-case, and use the compute_fn
            # that we defined just above.
            slide.initialize_output('att',
                                    args.n_classes,
                                    mode='tile',
                                    compute_fn=compute_fn)

            # Call the compute function to compute this output.
            # Again, this may change to something like...
            #     slide.compute_all
            # which would loop over all the defined output types.
            ret = slide.compute('att', args, model=model)
            print('{} --> {}'.format(ret.shape, dst))
            np.save(dst, ret[:, :, ::-1])
        except Exception as e:
            print(e)
            traceback.print_tb(e.__traceback__)
        finally:
            print('Removing {}'.format(rdsrc))
            os.remove(rdsrc)
示例#4
0
def main(args):


  # Define a compute_fn that should do three things:
  # 1. define an iterator over the slide's tiles
  # 2. compute an output with a given model / arguments
  # 3. return a reconstructed slide
  def compute_fn(slide, args, model=None, n_dropout=10 ):
    assert tf.executing_eagerly()
    print('Slide with {}'.format(len(slide.tile_list)))

    # In eager mode, we return a tf.contrib.eager.Iterator
    eager_iterator = TensorflowIterator(slide, args).make_iterator()

    # The iterator can be used directly. Ququeing and multithreading
    # are handled in the backend by the tf.data.Dataset ops
    features, indices = [], []
    for k, (img, idx) in enumerate(eager_iterator):
      # img = tf.expand_dims(img, axis=0)
      features.append( model.encode_bag(img, training=False, return_z=True) )
      indices.append(idx.numpy())

      img, idx = img.numpy(), idx.numpy()
      if k % 50 == 0:
        print('Batch #{:04d}\t{}'.format(k, img.shape))

    features = tf.concat(features, axis=0)

    ## Sample-dropout
    # features = features.numpy()
    # print(features.shape)
    # n_instances = features.shape[0]
    # att = np.zeros(n_instances)
    # n_choice = int(n_instances * 0.7)
    # all_heads = list(range(args.heads))
    # for j in range(n_dropout):
    #   idx = np.random.choice(range(n_instances), n_choice, replace=False)
    #   print(idx)
    #   fdrop = features[idx, :]

    z_att, att = model.mil_attention(features,
                                     training=False, 
                                     return_raw_att=True)

    # att[idx] += np.squeeze(attdrop)
    yhat_multihead = model.apply_classifier(z_att, heads=all_heads, 
      training=False)
    print('yhat mean {}'.format(np.mean(yhat_multihead, axis=0)))

    indices = np.concatenate(indices)
    att = np.squeeze(att)
    slide.place_batch(att, indices, 'att', mode='tile')
    ret = slide.output_imgs['att']
    print('Got attention image: {}'.format(ret.shape))

    return ret, features.numpy()




  ## Begin main script:
  # Set up the model first
  encoder_args = get_encoder_args(args.encoder)
  model = MilkEager(encoder_args=encoder_args,
                    mil_type=args.mil,
                    deep_classifier=args.deep_classifier,
                    batch_size=args.batchsize,
                    temperature=args.temperature,
                    heads = args.heads)
  
  x = tf.zeros((1, 1, args.process_size,
                args.process_size, 3))
  all_heads = [0,1,2,3,4,5,6,7,8,9]
  _ = model(x, verbose=True, heads=all_heads, training=True)
  model.load_weights(args.snapshot, by_name=True)

  # keras Model subclass
  model.summary()

  # Read list of inputs
  with open(args.slides, 'r') as f:
    slides = [x.strip() for x in f]

  # Loop over slides
  for src in slides:
    # Dirty substitution of the file extension give us the
    # destination. Do this first so we can just skip the slide
    # if this destination already exists.
    # Set the --suffix option to reflect the model / type of processed output
    dst = repext(src, args.suffix)
    featdst = repext(src, args.suffix+'.feat.npy')

    # Loading data from ramdisk incurs a one-time copy cost
    rdsrc = cpramdisk(src, args.ramdisk)
    print('\n\nFile:', rdsrc)

    # Wrapped inside of a try-except-finally.
    # We want to make sure the slide gets cleaned from 
    # memory in case there's an error or stop signal in the 
    # middle of processing.
    try:
      # Initialze the side from our temporary path, with 
      # the arguments passed in from command-line.
      # This returns an svsutils.Slide object
      slide = Slide(rdsrc, args)

      # This step will eventually be included in slide creation
      # with some default compute_fn's provided by svsutils
      # For now, do it case-by-case, and use the compute_fn
      # that we defined just above.
      slide.initialize_output('att', args.n_classes, mode='tile',
        compute_fn=compute_fn)

      # Call the compute function to compute this output.
      # Again, this may change to something like...
      #     slide.compute_all
      # which would loop over all the defined output types.
      ret, features = slide.compute('att', args, model=model)
      print('{} --> {}'.format(ret.shape, dst))
      print('{} --> {}'.format(features.shape, featdst))
      np.save(dst, ret)
      np.save(featdst, features)
    except Exception as e:
      print(e)
      traceback.print_tb(e.__traceback__)
    finally:
      print('Removing {}'.format(rdsrc))
      os.remove(rdsrc)
示例#5
0
def main(args, sess):
    # Define a compute_fn that should do three things:
    # 1. define an iterator over the slide's tiles
    # 2. compute an output with given model parameter
    # 3. asseble / gather the output
    #
    # compute_fn - function can define part of a computation
    # graph in eager mode -- possibly in graph mode.
    # We should completely reset the graph each call then
    # I still don't know how nodes are actually represented in memory
    # or if keeping them around has a real cost.

    def compute_fn(slide, args, sess=None):
        # assert tf.executing_eagerly()
        print('\n\nSlide with {}'.format(len(slide.tile_list)))

        # I'm not sure if spinning up new ops every time is bad.
        # In this example the iterator is separate from the
        # infernce function, it can also be set up with the two
        # connected to skip the feed_dict
        tf_iterator = TensorflowIterator(slide, args).make_iterator()
        img_op, idx_op = tf_iterator.get_next()
        # prob_op = model(img_op)
        # sess.run(tf.global_variables_initializer())

        # The iterator can be used directly. Ququeing and multithreading
        # are handled in the backend by the tf.data.Dataset ops
        # for k, (img, idx) in enumerate(eager_iterator):
        k, nk = 0, 0
        while True:
            try:
                img, idx = sess.run([
                    img_op,
                    idx_op,
                ])
                prob = model.inference(img)
                nk += img.shape[0]
                slide.place_batch(prob, idx, 'prob', mode='full', clobber=True)
                k += 1

                if k % 50 == 0:
                    prstr = 'Batch #{:04d} idx:{} img:{} ({:2.2f}-{:2.2f}) prob:{} T {} \
          '.format(k, idx.shape, img.shape, img.min(), img.max(), prob.shape,
                    nk)
                    print(prstr)
                    if args.verbose:
                        print('More info: ')
                        print('img: ', img.dtype, img.min(), img.max(),
                              img.mean())
                        pmax = np.argmax(prob, axis=-1).ravel()
                        for u in range(args.n_classes):
                            count_u = (pmax == u).sum()
                            print('- class {:02d} : {}'.format(u, count_u))

            except tf.errors.OutOfRangeError:
                print('Finished.')
                print('Total: {}'.format(nk))
                break

            except Exception as e:
                print(e)
                traceback.print_tb(e.__traceback__)
                break

        # We've exited the loop. Clean up the iterator
        del tf_iterator, idx_op, img_op

        # slide.make_outputs()
        slide.make_outputs()
        ret = slide.output_imgs['prob']
        return ret

    # Set up the model first
    model = gg.get_model(args.model, sess, args.process_size, args.n_classes)
    # NOTE big time wasted because you have to initialize,
    # THEN run the restore op to replace the already-created weights
    sess.run(tf.global_variables_initializer())
    model.restore(args.snapshot)

    # Read list of inputs
    with open(args.slides, 'r') as f:
        slides = [x.strip() for x in f]

    # Loop over slides; Record times
    nslides = len(slides)
    successes, ntiles, total_time, fpss = [], [], [], []
    for i, src in enumerate(slides):
        # Dirty substitution of the file extension give us the
        # destination. Do this first so we can just skip the slide
        # if this destination already exists.
        # Set the --suffix option to reflect the model / type of processed output
        dst = repext(src, args.suffix)
        if os.path.exists(dst):
            print('{} Exists.'.format(dst))
            continue

        # Loading data from ramdisk incurs a one-time copy cost
        rdsrc = cpramdisk(src, args.ramdisk)

        # Wrapped inside of a try-except-finally.
        # We want to make sure the slide gets cleaned from
        # memory in case there's an error or stop signal in the
        # middle of processing.
        try:
            # Initialze the side from our temporary path, with
            # the arguments passed in from command-line.
            # This returns an svsutils.Slide object
            print('\n\n-------------------------------')
            print('File:', rdsrc, '{:04d} / {:04d}'.format(i, nslides))
            t0 = time.time()
            slide = Slide(rdsrc, args)

            # This step will eventually be included in slide creation
            # with some default compute_fn's provided by svsutils
            # For now, do it case-by-case, and use the compute_fn
            # that we defined just above.
            # TODO pull the expected output size from the model.. ?
            # support common model types - keras, tfmodels, tfhub..
            slide.initialize_output('prob',
                                    args.n_classes,
                                    mode='full',
                                    compute_fn=compute_fn)

            # Call the compute function to compute this output.
            # Again, this may change to something like...
            #     slide.compute_all
            # which would loop over all the defined output types.
            ret = slide.compute('prob', args, sess=sess)
            print('{} --> {}'.format(ret.shape, dst))
            ret = (ret * 255).astype(np.uint8)
            np.save(dst, ret)

            # If it finishes, record some stats
            tend = time.time()
            deltat = tend - t0
            fps = len(slide.tile_list) / float(deltat)
            successes.append(rdsrc)
            ntiles.append(len(slide.tile_list))
            total_time.append(deltat)
            fpss.append(fps)
        except Exception as e:
            print(e)
            traceback.print_tb(e.__traceback__)
        finally:
            print('Removing {}'.format(rdsrc))
            os.remove(rdsrc)
            try:
                print('Cleaning slide object')
                slide.close()
                del slide
            except:
                print('No slide object not found to clean up ?')

    write_times(args.timefile, successes, ntiles, total_time, fpss)
示例#6
0
def main(args, sess):
    # Define a compute_fn that should do three things:
    # 1. define an iterator over the slide's tiles
    # 2. compute an output with given model parameter
    # 3.

    # def compute_fn(slide, args, model=None):
    #   print('Slide with {}'.format(len(slide.tile_list)))
    #   it_factory = PythonIterator(slide, args)
    #   for k, (img, idx) in enumerate(it_factory.yield_batch()):
    #     prob = model.predict_on_batch(img)
    #     if k % 50 == 0:
    #       print('Batch #{:04d} idx:{} img:{} prob:{} \
    #       '.format(k, idx.shape, img.shape, prob.shape))
    #     slide.place_batch(prob, idx, 'prob', mode='tile')
    #   ret = slide.output_imgs['prob']
    #   return ret

    # Tensorflow multithreaded queue-based iterator (in eager mode)
    # elif args.iter_type == 'tf':

    def compute_fn(slide, args, sess=None, img_pl=None, prob_op=None):
        # assert tf.executing_eagerly()
        print('\n\nSlide with {}'.format(len(slide.tile_list)))

        # I'm not sure if spinning up new ops every time is bad.
        tf_iterator = TensorflowIterator(slide, args).make_iterator()
        img_op, idx_op = tf_iterator.get_next()
        # prob_op = model(img_op)
        # sess.run(tf.global_variables_initializer())

        # The iterator can be used directly. Ququeing and multithreading
        # are handled in the backend by the tf.data.Dataset ops
        # for k, (img, idx) in enumerate(eager_iterator):
        k, nk = 0, 0
        while True:
            try:
                img, idx = sess.run([
                    img_op,
                    idx_op,
                ])
                prob = sess.run(prob_op, {img_pl: img})
                nk += img.shape[0]
                if k % 50 == 0:
                    print('Batch #{:04d} idx:{} img:{} ({}) prob:{} T {} \
          '.format(k, idx.shape, img.max(), img.shape, prob.shape, nk))
                slide.place_batch(prob, idx, 'prob', mode='tile')
                k += 1
            except tf.errors.OutOfRangeError:
                print('Finished.')
                print('Total: {}'.format(nk))
                break
            finally:
                ret = slide.output_imgs['prob']
        return ret

    # Set up the model first

    # Set up a placeholder for the input
    img_pl = tf.placeholder(tf.float32,
                            (None, args.process_size, args.process_size, 3))
    model = load_model(args.snapshot)
    prob_op = model(img_pl)
    sess.run(tf.global_variables_initializer())

    # Read list of inputs
    with open(args.slides, 'r') as f:
        slides = [x.strip() for x in f]

    # Loop over slides
    for src in slides:
        # Dirty substitution of the file extension give us the
        # destination. Do this first so we can just skip the slide
        # if this destination already exists.
        # Set the --suffix option to reflect the model / type of processed output
        dst = repext(src, args.suffix)

        # Loading data from ramdisk incurs a one-time copy cost
        rdsrc = cpramdisk(src, args.ramdisk)
        print('File:', rdsrc)

        # Wrapped inside of a try-except-finally.
        # We want to make sure the slide gets cleaned from
        # memory in case there's an error or stop signal in the
        # middle of processing.
        try:
            # Initialze the side from our temporary path, with
            # the arguments passed in from command-line.
            # This returns an svsutils.Slide object
            print('\n\n-------------------------------')
            slide = Slide(rdsrc, args)

            # This step will eventually be included in slide creation
            # with some default compute_fn's provided by svsutils
            # For now, do it case-by-case, and use the compute_fn
            # that we defined just above.
            slide.initialize_output('prob',
                                    4,
                                    mode='tile',
                                    compute_fn=compute_fn)

            # Call the compute function to compute this output.
            # Again, this may change to something like...
            #     slide.compute_all
            # which would loop over all the defined output types.
            ret = slide.compute('prob',
                                args,
                                sess=sess,
                                img_pl=img_pl,
                                prob_op=prob_op)
            print('{} --> {}'.format(ret.shape, dst))
            np.save(dst, ret)
        except Exception as e:
            print(e)
            traceback.print_tb(e.__traceback__)
        finally:
            print('Removing {}'.format(rdsrc))
            os.remove(rdsrc)