示例#1
0
def create_runner(runner_name,
                  jobmanager_heap_size=1024,
                  num_of_taskmanagers=1,
                  taskmanager_heap_size=4096,
                  num_task_slots=1):
    """
    Create a Beam runner. Creates the job with the job type that corresponds to the requested runner

    Args:
        runner_name: Name of the runner.
        jobmanager_heap_size: The memory(mb) of the Flink cluster JobManager
        num_of_taskmanagers: The number of TaskManagers of the Flink cluster.
        taskmanager_heap_size: The memory(mb) of the each TaskManager in the Flink cluster.
        num_task_slots: Number of slots of the Flink cluster.

    Returns:
        The runner spec.
    """

    # In the future we will support beamSparkJobConfiguration
    type = "beamFlinkJobConfiguration"
    job_config = {
        "type": type,
        "amQueue": "default",
        "jobmanager.heap.size": jobmanager_heap_size,
        "amVCores": "1",
        "numberOfTaskManagers": num_of_taskmanagers,
        "taskmanager.heap.size": taskmanager_heap_size,
        "taskmanager.numberOfTaskSlots": num_task_slots
    }
    return jobs.create_job(runner_name, job_config)
示例#2
0
    "?filter_by=state:INITIALIZING,RUNNING,ACCEPTED,NEW,NEW_SAVING,SUBMITTED,"
    "STARTING_APP_MASTER,AGGREGATING_LOGS&sort_by=id:desc")
app_id = args.application_id
if execution is None or execution['count'] == 0:
    # Create Flink Hopsworks job and start it. This effectively creates a Flink cluster to submit jobs to
    type = "flinkJobConfiguration"
    job_config = {
        "type": type,
        "amQueue": "default",
        "jobmanager.heap.size": args.yarnjobManagerMemory,
        "amVCores": "1",
        "numberOfTaskManagers": args.task_managers,
        "taskmanager.heap.size": args.yarntaskManagerMemory,
        "taskmanager.numberOfTaskSlots": args.yarnslots
    }
    jobs.create_job(args.job, job_config)

    print("Waiting for flink cluster to start...")
    jobs.start_job(args.job)
    # Wait 90 seconds until runner is in status "RUNNING",
    wait = 90
    wait_count = 0
    execution = jobs.get_executions(
        args.job, "?offset=0&limit=1&sort_by=id:desc")['items'][0]
    state = execution['state']
    while wait_count < wait and state != "RUNNING":
        time.sleep(5)
        wait_count += 5
        execution = jobs.get_executions(
            args.job, "?offset=0&limit=1&sort_by=id:desc")['items'][0]
        state = execution['state']
    data = json_file.read()

data = data.replace("{PROJECT_NAME}", project_name) \
    .replace("{DATASET_NAME}", dataset_name) \
    .replace("{APP_FILE}", ntpath.basename(app_file)) \
    .replace("{JOB_NAME}", job_name) \
    .replace("{DEPENDENCY}", dependency + ".zip")

data = json.loads(data)

print("Jobs configuration:\n")
print(data)
print("===============================\n")

# Zip the folder containing the code
shutil.make_archive(dependency, "zip", app_folder)
hopsworks_url = args.hopsworks_url.split(":")
project.connect(project_name,
                hopsworks_url[0],
                port=hopsworks_url[1],
                api_key=args.apikey)
print("Connected to project: " + project_name)

dataset.upload(os.path.join(app_folder, app_file), dataset_name)
dataset.upload(dependency + ".zip", dataset_name)
print("===============================\n")
print("Uploaded program to Hopsworks.")
jobs.create_job(job_name, data)
jobs.start_job(job_name, " ".join(args.cmd))
print("===============================\n")
print("Started job: " + job_name)