Exemplo n.º 1
0
def main(argv=()):
  del argv  # Unused.
  hvd.init()
  #logging.basicConfig(level=logging.ERROR)
  model_class = import_symbol(FLAGS.model_name)
  # Multiply the task number by a value large enough that tasks starting at a
  # similar time cannot end up with the same seed.
  seed = int(time.time() + hvd.rank() * 3600 * 24)
  random.seed(seed)
  if hvd.rank() == 0:
   # logging.basicConfig(level=logging.INFO) # not for tflogging
    logging.set_verbosity(logging.INFO)
   # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
    logging.info('Rank: %d / %d' % (hvd.rank(), hvd.size()))
    logging.info('Random seed: %r', seed)
    logging.info('Learning rate: %r', get_learning_rate(1,FLAGS.batch_size))
    logging.info('Batch size: %d', FLAGS.batch_size)
    try:
      logging.info('Python version: {}'.format(sys.version))
      logging.info('numpy version: {}'.format(np.__version__))
      logging.info('tensorflow version: {}'.format(tf.__version__))
      logging.info('horovod version: {}'.format(hvd.__version__))
      import mkl
      logging.info('MKL: {}'.format(mklstr))
      mklstr = mkl.get_version_string()
    except:
      pass
  train_ffn(model_class, batch_size=FLAGS.batch_size,
            **json.loads(FLAGS.model_args))
Exemplo n.º 2
0
def main(argv=()):
  del argv  # Unused.
  model_class = import_symbol(FLAGS.model_name)
  # Multiply the task number by a value large enough that tasks starting at a
  # similar time cannot end up with the same seed.
  seed = int(time.time() + FLAGS.task * 3600 * 24)
  logging.info('Random seed: %r', seed)
  random.seed(seed)

  train_ffn(model_class, batch_size=FLAGS.batch_size,
            **json.loads(FLAGS.model_args))
Exemplo n.º 3
0
def main(argv=()):
    del argv  # Unused.
    model_class = import_symbol(FLAGS.model_name)
    # Multiply the task number by a value large enough that tasks starting at a
    # similar time cannot end up with the same seed.
    seed = int(time.time() + FLAGS.task * 3600 * 24)
    logging.info('Random seed: %r', seed)
    random.seed(seed)

    # added by julien. forces tf to use a single GPU by making the other one invisible
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.device_ID)

    train_ffn(model_class,
              batch_size=FLAGS.batch_size,
              **json.loads(FLAGS.model_args))
Exemplo n.º 4
0
def main(argv=()):
    del argv  # Unused.

    hvd.init()  #initialize horovod here

    if hvd.rank() == 0:
        print("---number of horovod ranks: ", hvd.size())
        if FLAGS.do_benchmark_test == 1:
            print("---this is a benchmark run")

    model_class = import_symbol(FLAGS.model_name)
    # Multiply the task number by a value large enough that tasks starting at a
    # similar time cannot end up with the same seed.
    seed = int(time.time() + hvd.rank() * 3600 * 24)
    if hvd.rank() == 0:
        logging.info('Random seed: %r', seed)
    random.seed(seed)
    if hvd.rank() == 0:
        print("train dir:", FLAGS.train_dir)

    train_ffn(model_class,
              batch_size=FLAGS.batch_size,
              **json.loads(FLAGS.model_args))
Exemplo n.º 5
0
def main(unused_argv):
    model_class = import_symbol(FLAGS.model_name, 'em_mask')
    model_args = json.loads(FLAGS.model_args)
    fov_size = tuple([int(i) for i in model_args['fov_size']])

    if FLAGS.input_offset and FLAGS.input_size:
        input_offset = np.array(
            [int(i) for i in FLAGS.input_offset.split(',')])
        input_size = np.array([int(i) for i in FLAGS.input_size.split(',')])
    else:
        input_offset, input_size = precomputed_utils.get_offset_and_size(
            FLAGS.input_volume)

    if 'label_size' in model_args:
        label_size = tuple([int(i) for i in model_args['label_size']])
    else:
        label_size = fov_size
        model_args['label_size'] = label_size

    input_mip = FLAGS.input_mip
    input_cv = CloudVolume('file://%s' % FLAGS.input_volume,
                           mip=FLAGS.input_mip)
    resolution = input_cv.meta.resolution(FLAGS.input_mip)
    overlap = [int(i) for i in FLAGS.overlap]
    num_bbox = precomputed_utils.get_num_bbox(input_offset, input_size,
                                              fov_size, overlap)
    logging.warning('num bbox: %s', num_bbox)

    num_classes = int(model_args['num_classes'])
    params = {
        'model_class': model_class,
        'model_args': model_args,
        'batch_size': FLAGS.batch_size,
        'num_classes': num_classes
    }

    mask_estimator = prepare_model(params, FLAGS.model_checkpoint,
                                   FLAGS.use_gpu)
    tensors_to_log = {"center": "center"}
    logging_hook = tf.compat.v1.train.LoggingTensorHook(tensors=tensors_to_log,
                                                        every_n_iter=1)

    predictions = mask_estimator.predict(
        input_fn=lambda: precomputed_utils.predict_input_fn_precomputed(
            input_volume=FLAGS.input_volume,
            input_offset=input_offset,
            input_size=input_size,
            input_mip=input_mip,
            chunk_shape=fov_size,
            label_shape=label_size,
            overlap=overlap,
            batch_size=FLAGS.batch_size,
            offset=FLAGS.image_mean,
            scale=FLAGS.image_stddev,
            var_threshold=FLAGS.var_threshold),
        predict_keys=['center', 'logits', 'class_prediction'],
        # hooks=[logging_hook],
        hooks=[],
        yield_single_examples=False)

    _ = precomputed_utils.writer(predictions,
                                 output_volume=FLAGS.output_volume,
                                 output_offset=input_offset,
                                 output_size=input_size,
                                 chunk_shape=fov_size,
                                 label_shape=label_size,
                                 resolution=resolution,
                                 overlap=overlap,
                                 num_iter=num_bbox // mpi_size //
                                 FLAGS.batch_size)
Exemplo n.º 6
0
def main(unused_argv):
    hvd.init()
    model_class = import_symbol(FLAGS.model_name, 'em_mask')
    model_args = json.loads(FLAGS.model_args)
    fov_size = tuple([int(i) for i in model_args['fov_size']])
    if 'label_size' in model_args:
        label_size = tuple([int(i) for i in model_args['label_size']])
    else:
        label_size = fov_size
        model_args['label_size'] = label_size
    num_classes = int(model_args['num_classes'])

    if num_classes == 1:
        # model_fn = model_utils.mask_model_fn_regression
        model_fn = model_utils.mask_model_fn_binary
    else:
        model_fn = model_utils.mask_model_fn_classfication

    params = {
        'model_class': model_class,
        'model_args': model_args,
        'batch_size': FLAGS.batch_size,
        'num_classes': num_classes,
        'learning_rate': FLAGS.learning_rate,
        'weighted': FLAGS.weighted
    }

    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    if gpus:
        tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()],
                                                   'GPU')
    sess_config = tf.compat.v1.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    sess_config.gpu_options.visible_device_list = str(hvd.local_rank())

    model_dir = FLAGS.train_dir if hvd.rank() == 0 else None
    save_summary_steps = 90 if hvd.rank() == 0 else None
    save_checkpoints_secs = 540 if hvd.rank() == 0 else None

    config = tf.estimator.RunConfig(
        model_dir=model_dir,
        save_summary_steps=save_summary_steps,
        save_checkpoints_secs=save_checkpoints_secs,
        session_config=sess_config,
        keep_checkpoint_max=10,
    )
    mask_estimator = tf.estimator.Estimator(model_fn=model_fn,
                                            config=config,
                                            params=params)
    bcast_hook = hvd.BroadcastGlobalVariablesHook(0)

    if FLAGS.weights_volumes:
        input_fn = io_utils.train_input_fn_with_weight(
            FLAGS.data_volumes, FLAGS.label_volumes, FLAGS.weights_volumes,
            FLAGS.tf_coords, num_classes, fov_size, label_size,
            FLAGS.batch_size, FLAGS.image_mean, FLAGS.image_stddev,
            FLAGS.rotation)
    else:
        # input_fn = io_utils.train_input_fn(
        #   FLAGS.data_volumes,
        #   FLAGS.label_volumes,
        #   FLAGS.tf_coords,
        #   num_classes,
        #   fov_size,
        #   label_size,
        #   FLAGS.batch_size,
        #   FLAGS.image_mean,
        #   FLAGS.image_stddev,
        #   FLAGS.rotation)
        # input_fn = io_utils.train_input_rebalance_fn(
        input_fn = io_utils.train_input_mult_fn(
            FLAGS.data_volumes, FLAGS.label_volumes, FLAGS.tf_coords,
            num_classes, fov_size, label_size, FLAGS.batch_size,
            FLAGS.image_mean, FLAGS.image_stddev, FLAGS.rotation, False)

    mask_estimator.train(input_fn=input_fn,
                         steps=FLAGS.max_steps,
                         hooks=[bcast_hook])