示例#1
0
def create_snapshots_on_cloud(bucket, project, runner):
    import datetime, os
    import apache_beam as beam
    import hurricanes.goes_to_jpeg as g2j

    input_file = 'maria/input/maria.csv'
    g2j.copy_togcs('MARIA.csv', bucket, input_file)

    OUTPUT_DIR = 'gs://{}/maria/'.format(bucket)
    options = {
        'staging_location': os.path.join(OUTPUT_DIR, 'tmp', 'staging'),
        'temp_location': os.path.join(OUTPUT_DIR, 'tmp'),
        'job_name':
        'maria-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S'),
        'project': project,
        'max_num_workers': 12,
        'setup_file': './setup.py',
        'teardown_policy': 'TEARDOWN_ALWAYS',
        'no_save_main_session': True
    }
    opts = beam.pipeline.PipelineOptions(flags=[], **options)
    p = beam.Pipeline(runner, options=opts)
    (p
     | 'lines' >> beam.io.ReadFromText('gs://{}/{}'.format(bucket, input_file))
     | 'parse' >> beam.Map(lambda line: g2j.parse_line(line))
     | 'to_jpg' >> beam.Map(lambda (dt, lat, lon): g2j.goes_to_jpeg(
         g2j.get_objectId_at(dt), lat, lon, bucket,
         'maria/images/ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format(
             dt.year, dt.month, dt.day, dt.hour, dt.second))))
    job = p.run()
    if runner == 'DirectRunner':
        job.wait_until_finish()
示例#2
0
def create_snapshots_on_cloud(bucket, project, runner, opts):
    import datetime, os
    import apache_beam as beam
    import hurricanes.goes_to_jpeg as g2j

    query = create_query(opts)

    OUTPUT_DIR = 'gs://{}/hurricane/'.format(bucket)
    options = {
        'staging_location': os.path.join(OUTPUT_DIR, 'tmp', 'staging'),
        'temp_location': os.path.join(OUTPUT_DIR, 'tmp'),
        'job_name':
        'maria-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S'),
        'project': project,
        'max_num_workers': 12,
        'setup_file': './setup.py',
        'teardown_policy': 'TEARDOWN_ALWAYS',
        'no_save_main_session': True
    }
    opts = beam.pipeline.PipelineOptions(flags=[], **options)
    p = beam.Pipeline(runner, options=opts)
    (p
     | 'get_tracks' >> beam.io.Read(
         beam.io.BigQuerySource(query=query, use_standard_sql=True))
     | 'loc_at_time' >>
     beam.Map(lambda rowdict:
              (g2j.parse_timestamp(rowdict['iso_time']), rowdict['name'].lower(
              ), rowdict['latitude'], rowdict['longitude']))
     | 'to_jpg' >> beam.Map(lambda (dt, name, lat, lon): g2j.goes_to_jpeg(
         g2j.get_objectId_at(dt), lat, lon, bucket,
         'hurricane/images/{}/ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format(
             name, dt.year, dt.month, dt.day, dt.hour, dt.second))))
    job = p.run()
    if runner == 'DirectRunner':
        job.wait_until_finish()
示例#3
0
def create_snapshots_one_by_one(outdir):
    import shutil, os
    import hurricanes.goes_to_jpeg as g2j
    shutil.rmtree(outdir, ignore_errors=True)
    os.mkdir(outdir)
    with open('MARIA.csv', 'r') as ifp:
        for line in ifp:
            dt, lat, lon = g2j.parse_line(line)
            objectId = g2j.get_objectId_at(dt)
            outfilename = os.path.join(
                outdir, 'ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format(
                    dt.year, dt.month, dt.day, dt.hour, dt.second))
            jpgfile = g2j.goes_to_jpeg(objectId, lat, lon, None, outfilename)
            break  # take out this  to process all the timestamps ...
def create_local_snapshots(outdir, hurricane_file):
    import shutil,os
    import hurricanes.goes_to_jpeg as g2j
    shutil.rmtree(outdir, ignore_errors=True)
    os.mkdir(outdir)
    with open(hurricane_file, 'r') as ifp:
     for line in ifp:
       dt, lat, lon = g2j.parse_line(line)
       objectId = g2j.get_objectId_at(dt)
       outfilename = os.path.join(
                   outdir, 
                   'ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format(
                       dt.year, dt.month, dt.day, dt.hour, dt.second))
       jpgfile = g2j.goes_to_jpeg(objectId, lat, lon, None, outfilename)
       break  # take out this  to process all the timestamps ...
def create_snapshots_on_cloud(bucket, project, runner, opts):
   import datetime, os
   import apache_beam as beam
   import hurricanes.goes_to_jpeg as g2j

   query = create_query(opts)

   OUTPUT_DIR = 'gs://{}/hurricane/'.format(bucket)
   options = {
        'staging_location': os.path.join(OUTPUT_DIR, 'tmp', 'staging'),
        'temp_location': os.path.join(OUTPUT_DIR, 'tmp'),
        'job_name': 'maria-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S'),
        'project': project,
        'max_num_workers': 12,
        'setup_file': './setup.py',
        'teardown_policy': 'TEARDOWN_ALWAYS',
        'no_save_main_session': True
   }
   opts = beam.pipeline.PipelineOptions(flags=[], **options)
   p = beam.Pipeline(runner, options=opts)
   (p
        | 'get_tracks' >> beam.io.Read(beam.io.BigQuerySource(query=query, use_standard_sql=True))
        | 'loc_at_time' >> beam.Map(lambda rowdict: (
                                     g2j.parse_timestamp(rowdict['iso_time']),
                                     rowdict['name'].lower(),
                                     rowdict['latitude'],
                                     rowdict['longitude']))
        | 'to_jpg' >> beam.Map(lambda (dt,name,lat,lon): 
            g2j.goes_to_jpeg(
                g2j.get_objectId_at(dt), 
                lat, lon, 
                bucket, 
                'hurricane/images/{}/ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format(
                       name, dt.year, dt.month, dt.day, dt.hour, dt.second)))
   )
   job = p.run()
   if runner == 'DirectRunner':
      job.wait_until_finish()