示例#1
0
def wait_for_config():
    '''
    Waits for the master to set the Job config
    '''
    logging.info("{0} waiting for config".format(job_id))
    while True:
        config = kv.read_store(kv_conn, job_id + '_config')
        if config != '\r':
            return config
        else:
            continue
示例#2
0
def consolidate_output(reducer_jobids: list, output_file_path: str):
    '''
    Consolidate reducer results and write to output file
    '''
    reducer_output = [
        kv.read_store(kv_conn, job_id + '_result').replace('\r,', '\n')
        for job_id in reducer_jobids
    ]
    with open(output_file_path, 'w', encoding="utf8",
              errors='ignore') as output:
        output.writelines(reducer_output)
示例#3
0
def wait_for_reducers(reducer_jobids: list):
    '''
    Poll and waits for Reducer Jobs to complete
    '''
    while True:
        statuses = [
            kv.read_store(kv_conn, job_id + '_status')
            for job_id in reducer_jobids
        ]
        if all(status == "DONE\r" for status in statuses):
            logging.debug("Reducers Completed - status\n{0}".format(statuses))
            break
        else:
            continue
示例#4
0
def get_job_status():
    '''
    This API route is used to get the current Job status given a Job ID
    Returns Job output if the Job has completed
    '''
    try:
        job_id = request.args.get('jobid')
        logging.info("Getting status for job : {0}".format(job_id))
        conn = kv.get_store_connection()
        status = kv.read_store(conn, "{0}_status".format(job_id))
        if status == "COMPLETED\r":
            kv.close_store_connection(conn)
            return send_from_directory('output',
                                       filename='output_{}.txt'.format(job_id))
        else:
            kv.close_store_connection(conn)
            return status
    except Exception as e:
        logging.error("Job status check failed : %s", e)
        kv.close_store_connection(conn)
        return "ERROR : Job status check failed"
示例#5
0
def main():
    '''
    The driver function that runs the Map job
    '''
    try:
        if job_id == None:
            logging.critical("Job Initialization Error! ABORTING")
            exit()
        # Step 1 : Wait for Map Job config
        config = json.loads(wait_for_config())
        reducer_node = config['reducer_node']
        # Step 2 : Update status as started
        res = kv.set_command(kv_conn, job_id + '_status',
                             len("STARTED".encode()), "STARTED")
        if res != "STORED\r\n":
            logging.error("Status set failure : %s", res)
            logging.critical("ABORTING")
            exit()
        # Step 3 : Read Mapper Input
        message = kv.read_store(kv_conn, job_id + '_input')
        message_list = message.split('#\r#')[1:]
        map_result = []
        # Step 4 : Run map job based on input file
        for i in range(0, len(message_list), 2):
            map_result = map_result + run_map(bytes(
                config['map_fn']), message_list[i], message_list[i + 1])
        # Step 5 : Partition Map results
        partition_map = partition_intermediate_results(map_result,
                                                       len(reducer_node),
                                                       reducer_node)
        # Step 6 : Store Map results
        store_intermediate_results(partition_map)
    except Exception as e:
        logging.critical("JOB FAILED : %s", e)
        res = kv.set_command(kv_conn, job_id + '_status',
                             len("FAILED".encode()), "FAILED")
    finally:
        kv.close_store_connection(kv_conn)
示例#6
0
def main():
    '''
    The driver function that runs the Reduce job
    '''
    try:
        if job_id == None:
            logging.critical("Job Initialization Error! ABORTING")
            exit()
        # Step 1 : Wait for Reduce Job config
        config = json.loads(wait_for_config())
        partition_key = config['partition_key']
        # Step 2 : Update status as started
        res = kv.set_command(kv_conn, job_id + '_status',
                             len("STARTED".encode()), "STARTED")
        if res != "STORED\r\n":
            logging.error("Status set failure : %s", res)
            logging.critical("ABORTING")
            exit()
        # Step 3 : Read partition data from mapper
        message = kv.read_store(kv_conn, partition_key)
        # Step 4 : Sort intermediate data
        sorted_results = sort_intermediate_results(message)
        reduce_output = []
        # Step 5 : Run Reduce on the sorted data
        reduce_fn_serialized = bytes(config['reduce_fn'])
        for key in sorted_results:
            output = run_reduce(reduce_fn_serialized, key, sorted_results[key])
            reduce_output.append(output)
        # Step 6 : Store reduce results
        store_reduce_output(reduce_output)
    except Exception as e:
        logging.critical("JOB FAILED : %s", e)
        res = kv.set_command(kv_conn, job_id + '_status',
                             len("FAILED".encode()), "FAILED")
    finally:
        kv.close_store_connection(kv_conn)