def find_new_sentinel2_scenes(*args, **kwargs): """Find new Sentinel 2 scenes and kick off imports Uses the execution date to determine what day to check for imports """ logging.info("Finding Sentinel-2 scenes...") execution_date = kwargs['execution_date'] tilepaths = find_sentinel2_scenes(execution_date.year, execution_date.month, execution_date.day) dag_id = 'import_sentinel2_scenes' # Split into groups for more efficient jobs num_groups = 32 if len(tilepaths) >= 32 else len(tilepaths) logger.info('Kicking off %s dags to import scene groups', num_groups) tilepath_groups = chunkify(tilepaths, num_groups) for idx, path_group in enumerate(tilepath_groups): slug_path = '_'.join(path_group[0].split('/')) run_id = 'sentinel2_import_{year}_{month}_{day}_{idx}_{slug}'.format( year=execution_date.year, month=execution_date.month, day=execution_date.day, idx=idx, slug=slug_path) logger.info('Kicking off new scene import: %s', run_id) conf = json.dumps({'tilepaths': path_group}) dag_args = DagArgs(dag_id=dag_id, conf=conf, run_id=run_id) trigger_dag(dag_args) return "Finished kicking off new Sentinel-2 dags"
def test_cli_list_dag_runs(self): cli.trigger_dag(self.parser.parse_args([ 'dags', 'trigger', 'example_bash_operator', ])) args = self.parser.parse_args(['dags', 'list_runs', 'example_bash_operator', '--no_backfill']) cli.list_dag_runs(args)
def test_trigger_dag(self): cli.trigger_dag( self.parser.parse_args([ 'dags', 'trigger', 'example_bash_operator', '-c', '{"foo": "bar"}' ])) self.assertRaises( ValueError, cli.trigger_dag, self.parser.parse_args([ 'dags', 'trigger', 'example_bash_operator', '--run_id', 'trigger_dag_xxx', '-c', 'NOT JSON' ]))
def find_geotiffs(*args, **kwargs): """Find geotiffs which match the bucket and prefix and kick off imports """ logging.info("Finding geotiff scenes...") conf = kwargs['dag_run'].conf bucket = conf.get('bucket') prefix = conf.get('prefix') execution_date = kwargs['execution_date'] try: tilepaths = find_geotiff_scenes( bucket, prefix ) except: logger.error('encountered error finding tile paths') raise dag_id = 'import_geotiff_scenes' group_max = int(os.getenv('AIRFLOW_CHUNK_SIZE', 32)) num_groups = group_max if len(tilepaths) >= group_max else len(tilepaths) logger.info('Kicking off %s dags to import scene groups', num_groups) tilepath_groups = chunkify(tilepaths, num_groups) for idx, path_group in enumerate(tilepath_groups): slug_path = '_'.join(path_group[0].split('/')) run_id = 'geotiff_import_{year}_{month}_{day}_{idx}_{slug}'.format( year=execution_date.year, month=execution_date.month, day=execution_date.day, idx=idx, slug=slug_path ) logger.info('Kicking off new scene import: %s', run_id) conf['tilepaths'] = path_group confjson = json.dumps(conf) dag_args = DagArgs(dag_id=dag_id, conf=confjson, run_id=run_id) trigger_dag(dag_args) logger.info('Finished kicking off new Geotiff scene dags')
def check_for_scenes_to_ingest(): """Requests uningested scenes, kicks off ingest DAG for each scene Notes At some point this should batch scene ingests together, but for now they are kept separate for debugging and because the ingests themselves do not parallelize well """ logger.info("Requesting uningested scenes...") scenes = get_uningested_scenes() dag_id = 'ingest_project_scenes' if len(scenes) == 0: return 'No scenes to ingest' logger.info('Kicking off ingests for %s scenes', len(scenes)) for scene in scenes: run_id = 'scene_ingest_{}_{}'.format(scene['id'], time()) logger.info('Kicking off new scene ingest: %s', run_id) conf = json.dumps({'scene': scene}) dag_args = DagArgs(dag_id=dag_id, conf=conf, run_id=run_id) trigger_dag(dag_args) return "Finished kicking off ingests"
def create(self, validated_data): # TODO: Importar Jinja 2 # TODO: Crear el diccionario execution = Execution.objects.get(pk=validated_data['execution_id']) min_long, max_long, min_lat, max_lat = self.get_area( validated_data['parameters']) params = dict(self.get_kwargs(validated_data['parameters'])) params['lat'] = (min_lat, max_lat) params['lon'] = (min_long, max_long) params['products'] = self.get_product(validated_data['parameters']) params['time_ranges'] = self.get_time_periods( validated_data['parameters']) params['execID'] = 'exec_{}'.format(str( validated_data['execution_id'])) params['elimina_resultados_anteriores'] = True params['genera_mosaico'] = validated_data['generate_mosaic'] # params['owner'] = Execution.executed_by. params['owner'] = "API-REST" # TODO: Cargar el template template_path = os.path.join(os.environ['TEMPLATE_PATH'], slugify(validated_data['algorithm_name'])) generic_template_path = os.path.join(os.environ['TEMPLATE_PATH'], "generic-template") if execution.version is not None and execution.version.publishing_state == Version.PUBLISHED_STATE and os.path.exists( template_path): file_loader = FileSystemLoader(template_path) env = Environment(loader=file_loader) algorithm_template_path = '{}_{}.py'.format( slugify(validated_data['algorithm_name']), validated_data['version_id']) template = env.get_template(algorithm_template_path) else: file_loader = FileSystemLoader(generic_template_path) env = Environment(loader=file_loader) algorithm_template_path = '{}_{}.py'.format( "generic-template", "1.0") params['algorithm_name'] = slugify( validated_data['algorithm_name']) params['algorithm_version'] = validated_data['version_id'] template = env.get_template(algorithm_template_path) # TODO: Renderizar el template airflow_dag_path = os.environ['AIRFLOW_DAG_PATH'] execution_dag_path = '{}/exec_{}.py'.format( airflow_dag_path, str(validated_data['execution_id'])) output = template.render(params=params) with open(execution_dag_path, 'w') as dag: dag.write("from airflow.operators import CompressFileSensor\n") dag.write("from cdcol_utils import other_utils\n") dag.write(output) dag.write( "\nsensor_fin_ejecucion = CompressFileSensor(task_id='sensor_fin_ejecucion',poke_interval=60, soft_fail=True,mode='reschedule', queue='util', dag=dag) \n" ) dag.write( "comprimir_resultados = PythonOperator(task_id='comprimir_resultados',provide_context=True,python_callable=other_utils.compress_results,queue='util',op_kwargs={'execID': args['execID']},dag=dag) \n" ) dag.write("sensor_fin_ejecucion >> comprimir_resultados \n") dag.close() execution.dag_id = params['execID'] execution.save() # TODO: Ejecutar workflow bash_command1 = '/home/cubo/anaconda/bin/airflow list_dags' bash_command2 = '/home/cubo/anaconda/bin/airflow unpause' + params[ 'execID'] subprocess.call(bash_command1.split()) subprocess.call(bash_command2.split()) dagbag = models.DagBag(settings.DAGS_FOLDER) dagbag.collect_dags() dagbag.process_file(filepath=execution_dag_path) args = argparse.Namespace() args.dag_id = params['execID'] args.run_id = None args.exec_id = None args.conf = None args.exec_date = None args.subdir = None #cli.set_is_paused(False, args=args) cli.trigger_dag(args) # TODO: Modificar la ejecución en la base de datos # time_ranges = self.get_time_periods(validated_data['parameters']) # # gtask_parameters = {} # gtask_parameters['execID'] = str(validated_data['execution_id']) # gtask_parameters['algorithm'] = validated_data['algorithm_name'] # gtask_parameters['version'] = validated_data['version_id'] # gtask_parameters['output_expression'] = '' # gtask_parameters['product'], gtask_parameters['bands'] = self.get_product(validated_data['parameters']) # gtask_parameters = dict(self.get_kwargs(validated_data['parameters']), **gtask_parameters) # # gtask = import_module(os.environ['GEN_TASK_MOD']) # # flower = os.environ['FLOWER'] # for key in gtask_parameters: # print 'param \'' + key + '\': ' + str(gtask_parameters[key]) # result = gtask.generic_task(min_long=min_long, min_lat=min_lat, **gtask_parameters) # if validated_data['is_gif']: # gtask_parameters['min_lat'] = int(min_lat) # gtask_parameters['min_long'] = int(min_long) # result = group( # gtask.generic_task.s(time_ranges=[("01-01-" + str(A), +"31-12-" + str(A))], **gtask_parameters) for A in # xrange(int(time_ranges[0][0].split('-')[2]), int(time_ranges[0][1].split('-')[2]) + 1)).delay() # for each_result in result.results: # new_task = { # 'uuid': each_result.id, # 'state': '1', # 'execution_id': gtask_parameters['execID'], # 'state_updated_at': str(datetime.datetime.now()), # 'created_at': str(datetime.datetime.now()), # 'updated_at': str(datetime.datetime.now()), # 'start_date': str(datetime.date.today()), # 'end_date': str(datetime.date.today()), # # } # Task.objects.create(**new_task) # else: # gtask_parameters['time_ranges'] = time_ranges # result = group(gtask.generic_task.s(min_lat=Y, min_long=X, **gtask_parameters) for Y in # xrange(int(min_lat), int(max_lat)) for X in xrange(int(min_long), int(max_long))).delay() # for each_result in result.results: # # try: # # task = json.loads(urlopen(flower + '/api/task/info/'+each_result.id).read()) # # except: # # task = {'kwargs':''} # new_task = { # 'uuid': each_result.id, # 'state': '1', # 'execution_id': gtask_parameters['execID'], # 'state_updated_at': str(datetime.datetime.now()), # 'created_at': str(datetime.datetime.now()), # 'updated_at': str(datetime.datetime.now()), # 'start_date': str(datetime.date.today()), # 'end_date': str(datetime.date.today()), # # 'parameters': json.dumps(each_result.__dict__), # } # Task.objects.create(**new_task) return validated_data