示例#1
0
    def start_bundle(self):
        # pylint: disable=reimported,redefined-outer-name
        import io
        import logging
        import numpy as np
        import os
        from PIL import Image
        from google.api_core import client_info
        from google.cloud import storage as gcs
        from goes_truecolor.lib import goes_predict
        from goes_truecolor.lib import goes_reader

        # Datastore client.
        if self.gcs_client is None:
            self.gcs_client = gcs.Client(project=self.project_id)

        # Create the GoesReader lazily so that beam will not pickle it
        # when copying this object to other workers.
        if self.reader is None:
            logging.info('creating GoesReader')
            shape = self.image_size, self.image_size
            self.reader = goes_reader.GoesReader(
                project_id=self.project_id,
                goes_bucket_name=self.goes_bucket_name,
                shape=shape,
                tmp_dir=self.tmp_dir,
                client=self.gcs_client,
                cache=False)
示例#2
0
 def test_raw_image(self):
     """Test GoesReader.raw_image."""
     utc = dateutil.tz.tzutc()
     t = datetime.datetime(2018, 1, 1, 12, 15, 0, tzinfo=utc)
     channels = [7, 8, 9, 10]
     for c in channels:
         self.create_fake_goes_image(t, c)
     reader = goes_reader.GoesReader(project_id='test',
                                     shape=(4, 4),
                                     tmp_dir=self.tmp_dir,
                                     client=self.client)
     img, _ = reader.raw_image(t, channels)
     self.assertEqual((4, 4, 4), img.shape)
示例#3
0
 def test_cloud_mask(self):
     """Test GoesReader.cloud_mask."""
     utc = dateutil.tz.tzutc()
     t = datetime.datetime(2018, 1, 1, 12, 15, 0, tzinfo=utc)
     channels = [1]
     for c in channels:
         self.create_fake_goes_image(t, c)
     reader = goes_reader.GoesReader(project_id='test',
                                     shape=(4, 4),
                                     tmp_dir=self.tmp_dir,
                                     client=self.client)
     mask, _ = reader.cloud_mask(t)
     self.assertEqual((4, 4), mask.shape)
示例#4
0
    def test_load_channel_images(self):
        """Test GoesReader.load_channel_images."""
        utc = dateutil.tz.tzutc()
        t = datetime.datetime(2018, 1, 1, 12, 15, 0, tzinfo=utc)
        channel = 1
        self.create_fake_goes_image(t, channel)

        reader = goes_reader.GoesReader(project_id='test',
                                        shape=(4, 4),
                                        tmp_dir=self.tmp_dir,
                                        client=self.client)
        table = reader.load_channel_images(t, [1])
        img, md = table[1]
        self.assertEqual((4, 4), img.shape)
        self.assertAlmostEqual(1e-2, md['kappa0'])
示例#5
0
  def process(self, t: datetime.datetime) -> Generator[Text, None, None]:
    # pylint: disable=reimported,redefined-outer-name
    import logging
    import numpy as np
    import tensorflow as tf
    from goes_truecolor.lib import goes_reader
    from goes_truecolor.learning import hparams

    # Create the GoesReader lazily so that beam will not pickle it
    # when copying this object to other workers.
    if self.reader is None:
      logging.info('creating GoesReader')
      shape = self.image_size, self.image_size
      self.reader = goes_reader.GoesReader(
          project_id=self.project_id,
          goes_bucket_name=self.goes_bucket_name, shape=shape,
          tmp_dir=self.tmp_dir, client=self.gcs_client)

    # Fetch the truecolor and IR images.
    logging.info('creating cloud mask image %s', t)
    mask_img = self.reader.cloud_mask(t)
    if mask_img is None:
      return
    mask_img, _ = mask_img
    logging.info('creating IR image for %s', t)
    ir = self.reader.raw_image(t, self.ir_channels)
    if ir is None:
      return
    ir, _ = ir

    # Split into tiles and generate tensorflow examples.
    logging.info('creating tiles for %s', t)
    partitions = self.image_size // self.tile_size
    mask_rows = np.split(mask_img, partitions, axis=0)
    ir_rows = np.split(ir, partitions, axis=0)
    for mask_row, ir_row in zip(mask_rows, ir_rows):
      mask_tiles = np.split(mask_row, partitions, axis=1)
      ir_tiles = np.split(ir_row, partitions, axis=1)
      for mask_tile, ir_tile in zip(mask_tiles, ir_tiles):
        features = {
            hparams.CLOUD_MASK_FEATURE_NAME: tf.train.Feature(
                int64_list=tf.train.Int64List(value=mask_tile.ravel())),

            hparams.IR_CHANNELS_FEATURE_NAME:  tf.train.Feature(
                int64_list=tf.train.Int64List(value=ir_tile.ravel())),
        }
        example = tf.train.Example(features=tf.train.Features(feature=features))
        yield example.SerializeToString()
示例#6
0
    def test_list_time_range(self):
        """Test GoesReader.list_time_range."""
        utc = dateutil.tz.tzutc()
        t = datetime.datetime(2018, 1, 1, 12, 15, 0, tzinfo=utc)
        channel = 1
        filename = self.create_fake_goes_image(t, channel)

        reader = goes_reader.GoesReader(project_id='test',
                                        shape=(4, 4),
                                        tmp_dir=self.tmp_dir,
                                        client=self.client)
        start_time = t - datetime.timedelta(hours=1)
        end_time = t + datetime.timedelta(hours=1)
        [(actual_t, d)] = reader.list_time_range(start_time, end_time)
        self.assertEqual(t, actual_t)
        self.assertEqual(filename, d[1])
示例#7
0
    def test_load_channel_images_from_files(self):
        """Test GoesReader.load_channel_images_from_files."""
        utc = dateutil.tz.tzutc()
        t = datetime.datetime(2018, 1, 1, 12, 15, 0, tzinfo=utc)
        channel = 1
        self.create_fake_goes_image(t, channel)

        reader = goes_reader.GoesReader(project_id='test',
                                        shape=(4, 4),
                                        tmp_dir=self.tmp_dir,
                                        client=self.client)
        start_time = t - datetime.timedelta(hours=1)
        end_time = t + datetime.timedelta(hours=1)
        [(_, file_table)] = reader.list_time_range(start_time, end_time)
        table = reader.load_channel_images_from_files(file_table, [1])
        img, md = table[1]
        self.assertEqual((4, 4), img.shape)
        self.assertAlmostEqual(1e-2, md['kappa0'])
示例#8
0
def main(unused_argv):
    """Beam pipeline to create examples."""
    # Get the files to process.
    if FLAGS.start_date and FLAGS.end_date:
        utc = dateutil.tz.tzutc()
        start_date = dateparser.parse(FLAGS.start_date)
        start_date = start_date.replace(tzinfo=utc)
        end_date = dateparser.parse(FLAGS.end_date)
        end_date = end_date.replace(tzinfo=utc)
        reader = goes_reader.GoesReader(project_id=FLAGS.project,
                                        goes_bucket_name=FLAGS.goes_bucket,
                                        shape=(FLAGS.image_size,
                                               FLAGS.image_size))
        files = reader.list_time_range(start_date, end_date)
    else:
        files = None

    # Create the beam pipeline.
    options = {
        'author':
        'Jason Hickey',
        'author_email':
        '*****@*****.**',
        'region':
        'us-central1',
        'staging_location':
        os.path.join(FLAGS.tmp_dir, 'tmp', 'staging'),
        'temp_location':
        os.path.join(FLAGS.tmp_dir, 'tmp'),
        'job_name':
        datetime.datetime.now().strftime('cloud-masks-%y%m%d-%H%M%S'),
        'project':
        FLAGS.project,
        'num_workers':
        10,
        'max_num_workers':
        FLAGS.max_workers,
        'machine_type':
        'n1-standard-4',
        'setup_file':
        os.path.join(os.path.dirname(os.path.abspath(__file__)),
                     '../../setup.py'),
        'teardown_policy':
        'TEARDOWN_ALWAYS',
        'save_main_session':
        False,
        'streaming':
        True,
    }
    opts = beam.pipeline.PipelineOptions(flags=[], **options)

    # Run the beam pipeline.
    with beam.Pipeline(FLAGS.runner, options=opts) as p:
        if files:
            p = p | beam.Create(files)
        else:
            p = (p
                 | beam.io.gcp.pubsub.ReadFromPubSub(
                     subscription='projects/weather-324/subscriptions/goes-16')
                 | beam.ParDo(
                     ListFiles(project_id=FLAGS.project,
                               goes_bucket_name=FLAGS.goes_bucket,
                               image_size=FLAGS.image_size)))

        (p
         | beam.ParDo(
             CreateCloudMasks(project_id=FLAGS.project,
                              goes_bucket_name=FLAGS.goes_bucket,
                              image_size=FLAGS.image_size,
                              tile_size=FLAGS.tile_size,
                              tile_border_size=FLAGS.tile_border_size,
                              ir_channels=goes_reader.IR_CHANNELS,
                              model_dir=FLAGS.model_dir,
                              output_bucket=FLAGS.output_bucket,
                              output_dir=FLAGS.output_dir))
         | beam.io.WriteToText(FLAGS.output_summary))