def create_runner(runner_name, jobmanager_heap_size=1024, num_of_taskmanagers=1, taskmanager_heap_size=4096, num_task_slots=1): """ Create a Beam runner. Creates the job with the job type that corresponds to the requested runner Args: runner_name: Name of the runner. jobmanager_heap_size: The memory(mb) of the Flink cluster JobManager num_of_taskmanagers: The number of TaskManagers of the Flink cluster. taskmanager_heap_size: The memory(mb) of the each TaskManager in the Flink cluster. num_task_slots: Number of slots of the Flink cluster. Returns: The runner spec. """ # In the future we will support beamSparkJobConfiguration type = "beamFlinkJobConfiguration" job_config = { "type": type, "amQueue": "default", "jobmanager.heap.size": jobmanager_heap_size, "amVCores": "1", "numberOfTaskManagers": num_of_taskmanagers, "taskmanager.heap.size": taskmanager_heap_size, "taskmanager.numberOfTaskSlots": num_task_slots } return jobs.create_job(runner_name, job_config)
"?filter_by=state:INITIALIZING,RUNNING,ACCEPTED,NEW,NEW_SAVING,SUBMITTED," "STARTING_APP_MASTER,AGGREGATING_LOGS&sort_by=id:desc") app_id = args.application_id if execution is None or execution['count'] == 0: # Create Flink Hopsworks job and start it. This effectively creates a Flink cluster to submit jobs to type = "flinkJobConfiguration" job_config = { "type": type, "amQueue": "default", "jobmanager.heap.size": args.yarnjobManagerMemory, "amVCores": "1", "numberOfTaskManagers": args.task_managers, "taskmanager.heap.size": args.yarntaskManagerMemory, "taskmanager.numberOfTaskSlots": args.yarnslots } jobs.create_job(args.job, job_config) print("Waiting for flink cluster to start...") jobs.start_job(args.job) # Wait 90 seconds until runner is in status "RUNNING", wait = 90 wait_count = 0 execution = jobs.get_executions( args.job, "?offset=0&limit=1&sort_by=id:desc")['items'][0] state = execution['state'] while wait_count < wait and state != "RUNNING": time.sleep(5) wait_count += 5 execution = jobs.get_executions( args.job, "?offset=0&limit=1&sort_by=id:desc")['items'][0] state = execution['state']
data = json_file.read() data = data.replace("{PROJECT_NAME}", project_name) \ .replace("{DATASET_NAME}", dataset_name) \ .replace("{APP_FILE}", ntpath.basename(app_file)) \ .replace("{JOB_NAME}", job_name) \ .replace("{DEPENDENCY}", dependency + ".zip") data = json.loads(data) print("Jobs configuration:\n") print(data) print("===============================\n") # Zip the folder containing the code shutil.make_archive(dependency, "zip", app_folder) hopsworks_url = args.hopsworks_url.split(":") project.connect(project_name, hopsworks_url[0], port=hopsworks_url[1], api_key=args.apikey) print("Connected to project: " + project_name) dataset.upload(os.path.join(app_folder, app_file), dataset_name) dataset.upload(dependency + ".zip", dataset_name) print("===============================\n") print("Uploaded program to Hopsworks.") jobs.create_job(job_name, data) jobs.start_job(job_name, " ".join(args.cmd)) print("===============================\n") print("Started job: " + job_name)