def create_snapshots_on_cloud(bucket, project, runner): import datetime, os import apache_beam as beam import hurricanes.goes_to_jpeg as g2j input_file = 'maria/input/maria.csv' g2j.copy_togcs('MARIA.csv', bucket, input_file) OUTPUT_DIR = 'gs://{}/maria/'.format(bucket) options = { 'staging_location': os.path.join(OUTPUT_DIR, 'tmp', 'staging'), 'temp_location': os.path.join(OUTPUT_DIR, 'tmp'), 'job_name': 'maria-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S'), 'project': project, 'max_num_workers': 12, 'setup_file': './setup.py', 'teardown_policy': 'TEARDOWN_ALWAYS', 'no_save_main_session': True } opts = beam.pipeline.PipelineOptions(flags=[], **options) p = beam.Pipeline(runner, options=opts) (p | 'lines' >> beam.io.ReadFromText('gs://{}/{}'.format(bucket, input_file)) | 'parse' >> beam.Map(lambda line: g2j.parse_line(line)) | 'to_jpg' >> beam.Map(lambda (dt, lat, lon): g2j.goes_to_jpeg( g2j.get_objectId_at(dt), lat, lon, bucket, 'maria/images/ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format( dt.year, dt.month, dt.day, dt.hour, dt.second)))) job = p.run() if runner == 'DirectRunner': job.wait_until_finish()
def create_snapshots_on_cloud(bucket, project, runner, opts): import datetime, os import apache_beam as beam import hurricanes.goes_to_jpeg as g2j query = create_query(opts) OUTPUT_DIR = 'gs://{}/hurricane/'.format(bucket) options = { 'staging_location': os.path.join(OUTPUT_DIR, 'tmp', 'staging'), 'temp_location': os.path.join(OUTPUT_DIR, 'tmp'), 'job_name': 'maria-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S'), 'project': project, 'max_num_workers': 12, 'setup_file': './setup.py', 'teardown_policy': 'TEARDOWN_ALWAYS', 'no_save_main_session': True } opts = beam.pipeline.PipelineOptions(flags=[], **options) p = beam.Pipeline(runner, options=opts) (p | 'get_tracks' >> beam.io.Read( beam.io.BigQuerySource(query=query, use_standard_sql=True)) | 'loc_at_time' >> beam.Map(lambda rowdict: (g2j.parse_timestamp(rowdict['iso_time']), rowdict['name'].lower( ), rowdict['latitude'], rowdict['longitude'])) | 'to_jpg' >> beam.Map(lambda (dt, name, lat, lon): g2j.goes_to_jpeg( g2j.get_objectId_at(dt), lat, lon, bucket, 'hurricane/images/{}/ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format( name, dt.year, dt.month, dt.day, dt.hour, dt.second)))) job = p.run() if runner == 'DirectRunner': job.wait_until_finish()
def create_snapshots_one_by_one(outdir): import shutil, os import hurricanes.goes_to_jpeg as g2j shutil.rmtree(outdir, ignore_errors=True) os.mkdir(outdir) with open('MARIA.csv', 'r') as ifp: for line in ifp: dt, lat, lon = g2j.parse_line(line) objectId = g2j.get_objectId_at(dt) outfilename = os.path.join( outdir, 'ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format( dt.year, dt.month, dt.day, dt.hour, dt.second)) jpgfile = g2j.goes_to_jpeg(objectId, lat, lon, None, outfilename) break # take out this to process all the timestamps ...
def create_local_snapshots(outdir, hurricane_file): import shutil,os import hurricanes.goes_to_jpeg as g2j shutil.rmtree(outdir, ignore_errors=True) os.mkdir(outdir) with open(hurricane_file, 'r') as ifp: for line in ifp: dt, lat, lon = g2j.parse_line(line) objectId = g2j.get_objectId_at(dt) outfilename = os.path.join( outdir, 'ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format( dt.year, dt.month, dt.day, dt.hour, dt.second)) jpgfile = g2j.goes_to_jpeg(objectId, lat, lon, None, outfilename) break # take out this to process all the timestamps ...
def create_snapshots_on_cloud(bucket, project, runner, opts): import datetime, os import apache_beam as beam import hurricanes.goes_to_jpeg as g2j query = create_query(opts) OUTPUT_DIR = 'gs://{}/hurricane/'.format(bucket) options = { 'staging_location': os.path.join(OUTPUT_DIR, 'tmp', 'staging'), 'temp_location': os.path.join(OUTPUT_DIR, 'tmp'), 'job_name': 'maria-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S'), 'project': project, 'max_num_workers': 12, 'setup_file': './setup.py', 'teardown_policy': 'TEARDOWN_ALWAYS', 'no_save_main_session': True } opts = beam.pipeline.PipelineOptions(flags=[], **options) p = beam.Pipeline(runner, options=opts) (p | 'get_tracks' >> beam.io.Read(beam.io.BigQuerySource(query=query, use_standard_sql=True)) | 'loc_at_time' >> beam.Map(lambda rowdict: ( g2j.parse_timestamp(rowdict['iso_time']), rowdict['name'].lower(), rowdict['latitude'], rowdict['longitude'])) | 'to_jpg' >> beam.Map(lambda (dt,name,lat,lon): g2j.goes_to_jpeg( g2j.get_objectId_at(dt), lat, lon, bucket, 'hurricane/images/{}/ir_{}{:02d}{:02d}{:02d}{:02d}.jpg'.format( name, dt.year, dt.month, dt.day, dt.hour, dt.second))) ) job = p.run() if runner == 'DirectRunner': job.wait_until_finish()