Python init_log示例，argolog.init_log Python示例

示例#1

0

显示文件

文件： recompute.py 项目： skanct/argo-compute-engine

def recompute(recalculation_id=None, tenant=None):
    """
    Script to execute recomputation
    
    :param recalculation_id: The id of the job to be recomputed
    :param tenant: tenants name
    """
    # default paths
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    argo_exec = "/usr/libexec/ar-compute/bin"

    # Init configuration
    cfg = ArgoConfiguration(fn_ar_cfg)

    # Init logging
    log = init_log(cfg.log_mode, cfg.log_file, cfg.log_level, 'argo.recompute')

    # Check recomputation
    col = get_mongo_collection(
        cfg.mongo_host, cfg.mongo_port, "AR", "recalculations", log)
    recomputation = get_recomputation(col, recalculation_id, log)
    dates = get_time_period(recomputation)

    update_status(col, recalculation_id, "running", datetime.now(), log)
    loop_recompute(argo_exec, dates, tenant, cfg.jobs[tenant], log)
    update_status(col, recalculation_id, "done", datetime.now(), log)

示例#2

0

显示文件

文件： poller_ar.py 项目： ARGOeu/argo-compute-engine

def main(tenant=None):
    """
    Checks if there are any pending recomputation requests and if the running
    requests do not exceed a threshold and queues another one to be recomputed
    :param tenant:
    :return:
    """
    # default paths
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    argo_exec = "/usr/libexec/ar-compute/bin"
    arcomp_conf = "/etc/ar-compute"
    # Init configuration
    cfg = ArgoConfiguration(fn_ar_cfg)
    cfg.load_tenant_db_conf(os.path.join(arcomp_conf, args.tenant + "_db_conf.json"))
    threshold = cfg.threshold

    # Init logging
    log = init_log(cfg.log_mode, cfg.log_file, cfg.log_level, 'argo.recompute')
    db_name = cfg.get_mongo_database("ar")

    col = get_mongo_collection(cfg.mongo_host, cfg.mongo_port, db_name)
    num_pen, num_run = get_pending_and_running(col)
    log.info("Running recomputations: %s (threshold: %s)", num_run, threshold)
    try:
        run_recomputation(col, tenant, num_run, num_pen, threshold)
        log.info("Below threshold recomputation sent for execution")
    except ValueError as ex:
        log.info(ex)

示例#3

0

显示文件

文件： mongo_recompute.py 项目： ARGOeu/argo-compute-engine

def main(args=None):
    """
    Script to retrieve relevant recomputations requests for a specific date

    :param args: Command line arguments
    """

    # default paths
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    arcomp_conf = "/etc/ar-compute"

    # Init configuration
    cfg = ArgoConfiguration(fn_ar_cfg)
    cfg.load_tenant_db_conf(os.path.join(arcomp_conf, args.tenant + "_db_conf.json"))
    db_name = cfg.get_mongo_database("ar")
    col_recomputations = "recomputations"

    # Init logging
    log = init_log(cfg.log_mode, cfg.log_file, cfg.log_level, 'argo.mongo_recompute')

    # Get mongo collection
    col = get_mongo_collection(cfg.mongo_host, cfg.mongo_port, db_name, col_recomputations, log)
    results = get_mongo_results(col, args.date, args.job)
    log.info("Date: %s, relevant recomputations found: %s", args.date, len(results))

    # Write results to file
    write_output(results, args.tenant, get_date_under(args.date), cfg.sync_path)

示例#4

0

显示文件

文件： poller_ar.py 项目： skanct/argo-compute-engine

def get_poller_config(fn_ar_cfg="/etc/ar-compute-engine.conf", logging_config='logging',
                      default_config='default'):
    """
    Initialize the logger and retrieve the settings for the poller
    :param fn_ar_cfg: file from which to retrieve configuration
    :param logging_config: logging section of the configuration
    :param default_config: default section of the configuration
    :return: logger instance, mongo hostname, mongo port and threshold of running
             recomputations in a tuple
    """
    # Read Configuration file
    ar_config = SafeConfigParser()
    ar_config.read(fn_ar_cfg)

    # Initialize logging
    log_mode = ar_config.get(logging_config, 'log_mode')
    log_file = ar_config.get(logging_config, 'log_file') if log_mode == 'file' else None
    log_level = ar_config.get(logging_config, 'log_level')
    log = init_log(log_mode, log_file, log_level, 'argo.poller')

    # Get mongo configurations
    mongo_host = ar_config.get(default_config, 'mongo_host')
    mongo_port = ar_config.get(default_config, 'mongo_port')

    threshold = int(ar_config.get(default_config, 'recomp_threshold'))
    log.info("Recomputation threshold: %s", threshold)
    return log, mongo_host, mongo_port, threshold

示例#5

0

显示文件

文件： mongo_clean_status.py 项目： ARGOeu/argo-compute-engine

def main(args=None):

    # default config
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    arcomp_conf = "/etc/ar-compute/"
    # Init configuration
    cfg = utils.ArgoConfiguration(fn_ar_cfg)
    cfg.load_tenant_db_conf(os.path.join(arcomp_conf, args.tenant + "_db_conf.json"))
    # Init logging
    log = init_log(cfg.log_mode, cfg.log_file, cfg.log_level, 'argo.job_ar')

    # Split db.collection path strings to obtain database name and collection
    # name

    mongo_host = cfg.get_mongo_host("status")
    mongo_port = cfg.get_mongo_port("status")
    db_name = cfg.get_mongo_database("status")
    col_status = "status_metric"

    # Create a date integer for use in the database queries
    date_int = int(args.date.replace("-", ""))

    log.info("Connecting to mongo server: %s:%s", mongo_host, mongo_port)
    client = MongoClient(str(mongo_host), int(mongo_port))

    log.info("Regarding service a/r data...")

    log.info("Opening database: %s", db_name)
    db = client[db_name]

    cols = ["status_metrics","status_endpoints","status_services","status_endpoint_groups"]

    # Iterate over collections
    for item in cols:

        log.info("Opening collection: %s", item)
        col = db[item]

        if args.report:
            num_of_rows = col.find({"date_integer": date_int, "report": args.report}).count()
            log.info("Found %s entries for date %s and report %s",
                     num_of_rows, args.date, args.report)
        else:
            num_of_rows = col.find({"date_integer": date_int}).count()
            log.info("Found %s entries for date %s", num_of_rows, args.date)

        if num_of_rows > 0:

            if args.report:
                log.info(
                    "Remove entries for date: %s and report: %s", args.date, args.report)
                col.delete_many({"date_integer": date_int, "report": args.report})
            else:
                log.info("Remove entries for date: %s", args.date)
                col.delete_many({"date_integer": date_int})

            log.info("Entries Removed!")

        else:
            log.info("Zero entries found. No need to remove anything")

示例#6

0

显示文件

文件： job_cycle.py 项目： ARGOeu/argo-compute-engine

def main(args=None):

    # default config
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    arcomp_conf = "/etc/ar-compute/"
    sdl_exec = "/usr/libexec/ar-compute/bin/"

    ArConfig = SafeConfigParser()
    ArConfig.read(fn_ar_cfg)

    # Initialize logging
    log_mode = ArConfig.get('logging', 'log_mode')
    log_file = None

    if log_mode == 'file':
        log_file = ArConfig.get('logging', 'log_file')

    # Set hadoop root logger settings
    os.environ["HADOOP_ROOT_LOGGER"] = ArConfig.get(
        'logging', 'hadoop_log_root')

    log_level = ArConfig.get('logging', 'log_level')
    log = init_log(log_mode, log_file, log_level, 'argo.job_cycle')

    # Get the available tenants from config file
    tenant_list = ArConfig.get("jobs", "tenants")
    tenant_list = tenant_list.split(',')

    if args.tenant is not None:
        tenant_list = [args.tenant]

    # For each available tenant prepare and execute all tenant's jobs
    for tenant in tenant_list:

        # Get specific tenant's job set
        job_set = ArConfig.get("jobs", tenant + '_jobs')
        job_set = job_set.split(',')

        # Notify that he job cycle has begun
        log.info(
            "Job Cycle: started for tenant:%s and date: %s", tenant, args.date)

        # Command to upload the prefilter data
        cmd_upload_metric = [
            os.path.join(sdl_exec, "upload_metric.py"), '-d', args.date, '-t', tenant]

        log.info("Job Cycle: Upload metric data to hdfs")
        run_cmd(cmd_upload_metric, log)

        log.info("Job Cycle: Iterate over jobs and submit them")

        # For each job genereate ar
        for job in job_set:
            log.info("Job Cycle: tenant %s has job named %s", tenant, job)
            cmd_job_ar = [os.path.join(sdl_exec, "job_ar.py"), '-d', args.date, '-t', tenant, '-j', job]
            run_cmd(cmd_job_ar, log)
            # Command to submit job status detail
            cmd_job_status = [os.path.join(sdl_exec, "job_status_detail.py"), '-d', args.date, '-t', tenant, '-j', job]
            run_cmd(cmd_job_status, log)

示例#7

0

显示文件

文件： upload_metric.py 项目： kaggis/argo-compute-engine

def main(args=None):

    # default config
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    date_under = args.date.replace("-", "_")

    ArConfig = SafeConfigParser()
    ArConfig.read(fn_ar_cfg)

    # Get sync exec and path
    arsync_exec = ArConfig.get('connectors', 'sync_exec')
    arsync_lib = ArConfig.get('connectors', 'sync_path')

    # Get mode from config file
    ar_mode = ArConfig.get('default', 'mode')



    prefilter_clean = ArConfig.get('default', 'prefilter_clean')

    # Initialize logging
    log_mode = ArConfig.get('logging', 'log_mode')
    log_file = None

    if log_mode == 'file':
        log_file = ArConfig.get('logging', 'log_file')

    log_level = ArConfig.get('logging', 'log_level')
    log = init_log(log_mode, log_file, log_level, 'argo.upload_metric')

    # Inform the user in wether argo runs locally or distributed
    if ar_mode == 'local':
        log.info("ARGO compute engine runs in LOCAL mode")
        log.info("metric data will be staged for computations locally")
    else:
        log.info("ARGO compute engine runs in CLUSTER mode")
        log.info("metric data will be uploaded to HDFS")

    # call prefilter if necessary for specified tenant
    if ArConfig.has_option('jobs', args.tenant + '_prefilter'):
        prefilter_exec = ArConfig.get('jobs', args.tenant + '_prefilter')
        cmd_pref = [os.path.join(arsync_exec, prefilter_exec), '-d', args.date]

        log.info("Calling %s for date: %s", os.path.join(arsync_exec, prefilter_exec), args.date)

        run_cmd(cmd_pref, log)

        fn_prefilter = "prefilter_" + date_under + ".avro"
        local_prefilter = os.path.join(arsync_lib, args.tenant, fn_prefilter)

        log.info("Check if produced %s exists: %s",
                 local_prefilter, os.path.exists(local_prefilter))

    # if prefilter not necessary, copy the orignal data file as a prefiltered result
    # so as to be picked up and transfered to hdfs
    else:
        fn_mdata = 'argo-consumer_log_' + args.date + '.avro'
        fn_prefilter = "prefilter_" + date_under + ".avro"
        local_mdata = os.path.join('/var/lib','argo-'+args.tenant.lower()+'-consumer',fn_mdata)
        local_prefilter = os.path.join(arsync_lib, args.tenant, fn_prefilter)
        cmd_copy = ['cp', local_mdata , local_prefilter ]
        run_cmd(cmd_copy,log)


    if ar_mode == 'cluster':
        # compose hdfs destination
        # hdfs path = ./tenant/mdata/...
        hdfs_path = "./" + args.tenant + "/mdata/"
    else:
        # compose local temporary destination
        hdfs_path = "/tmp/" + args.tenant + "/mdata/"

    # Command to establish tentant's metric data hdfs folder
    cmd_hdfs_mkdir = ['hadoop', 'fs', '-mkdir', '-p', hdfs_path]

    # Put file to hdfs destination
    cmd_hdfs = ['hadoop', 'fs', '-put', '-f', local_prefilter, hdfs_path]

    # Command to clear prefilter data after hdfs transfer
    cmd_clean = ['rm', '-f', local_prefilter]

    log.info("Establish if not present hdfs metric data directory")
    run_cmd(cmd_hdfs_mkdir, log)

    log.info("Transfer files to hdfs")
    run_cmd(cmd_hdfs, log)

    if prefilter_clean == "true":
        log.info("System configured to clean prefilter data after transfer")
        run_cmd(cmd_clean, log)

    log.info("Metric Data of tenant %s for date %s uploaded successfully to hdfs",
             args.tenant, args.date)

示例#8

0

显示文件

文件： job_status_detail.py 项目： kaggis/argo-compute-engine

def main(args=None):

    # default paths
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    arcomp_conf = "/etc/ar-compute/"
    arcomp_exec = "/usr/libexec/ar-compute/"
    stdl_exec = "/usr/libexec/ar-compute/bin"
    pig_script_path = "/usr/libexec/ar-compute/pig/"

    one_day_ago = utils.get_actual_date(args.date) - timedelta(days=1)
    prev_date = utils.get_date_str(one_day_ago)
    prev_date_under = utils.get_date_under(prev_date)
    date_under = utils.get_date_under(args.date)

    # Init configuration
    cfg = utils.ArgoConfiguration(fn_ar_cfg)
    cfg.load_tenant_db_conf(os.path.join(arcomp_conf, args.tenant + "_db_conf.json"))
    # Init logging
    log = init_log(cfg.log_mode, cfg.log_file, cfg.log_level, 'argo.job_status_detail')

    local_cfg_path = arcomp_conf
    # open job configuration file
    json_cfg_file = open(
        local_cfg_path + args.tenant + "_" + args.job + "_cfg.json")
    json_cfg = json.load(json_cfg_file)

    # Inform the user in wether argo runs locally or distributed
    if cfg.mode == 'local':
        log.info("ARGO compute engine runs in LOCAL mode")
        log.info("computation job will be run locally")
    else:
        log.info("ARGO compute engine runs in CLUSTER mode")
        log.info("computation job will be submitted to the hadoop cluster")

    # Proposed hdfs pathways
    hdfs_mdata_path = './' + args.tenant + "/mdata/"
    hdfs_sync_path = './scratch/sync/' + args.tenant + \
        "/" + args.job + "/" + date_under + "/"

    # Proposed local pathways
    local_mdata_path = '/tmp/' + args.tenant + "/mdata/"
    local_sync_path = '/tmp/scratch/sync/' + args.tenant + \
        '/' + args.job + '/' + date_under + '/'
    local_cfg_path = arcomp_conf

    if cfg.mode == 'cluster':
        mode = 'cache'
        mdata_path = hdfs_mdata_path
        sync_path = hdfs_sync_path
        cfg_path = hdfs_sync_path

    else:
        mode = 'local'
        mdata_path = local_mdata_path
        sync_path = local_sync_path
        cfg_path = local_cfg_path

    # dictionary with necessary pig parameters
    pig_params = {}

    pig_params['mdata'] = mdata_path + 'prefilter_' + date_under + '.avro'
    pig_params['p_mdata'] = mdata_path + \
        'prefilter_' + prev_date_under + '.avro'
    pig_params['egs'] = sync_path + 'group_endpoints.avro'
    pig_params['ggs'] = sync_path + 'group_groups.avro'
    pig_params['mps'] = sync_path + 'poem_sync.avro'
    pig_params['cfg'] = cfg_path + args.tenant + '_' + args.job + '_cfg.json'
    pig_params['aps'] = cfg_path + args.tenant + '_' + args.job + '_ap.json'
    pig_params['rec'] = cfg_path + 'recomputations_' + args.tenant + '_' + date_under + '.json'
    pig_params['ops'] = cfg_path + args.tenant + '_ops.json'
    pig_params['dt'] = args.date
    pig_params['mode'] = mode
    pig_params['flt'] = '1'
    pig_params['mongo_status_metrics'] = cfg.get_mongo_uri('status', 'status_metrics')
    pig_params['mongo_status_endpoints'] = cfg.get_mongo_uri('status', 'status_endpoints')
    pig_params['mongo_status_services'] = cfg.get_mongo_uri('status', 'status_services')
    pig_params['mongo_status_endpoint_groups'] = cfg.get_mongo_uri('status', 'status_endpoint_groups')
    cmd_pig = []

    # Append pig command
    cmd_pig.append('pig')

    # Append Pig local execution mode flag
    if cfg.mode == "local":
        cmd_pig.append('-x')
        cmd_pig.append('local')

    # Append Pig Parameters
    for item in pig_params:
        cmd_pig.append('-param')
        cmd_pig.append(item + '=' + pig_params[item])

    # Append Pig Executionable Script
    cmd_pig.append('-f')
    cmd_pig.append(pig_script_path + 'compute-status.pig')

    # Command to clean a/r data from mongo
    cmd_clean_mongo_status = [
        os.path.join(stdl_exec, "mongo_clean_status.py"), '-d', args.date, '-t', args.tenant, '-r', json_cfg['id']]

    # Command to upload sync data to hdfs
    cmd_upload_sync = [os.path.join(
        stdl_exec, "upload_sync.py"), '-d', args.date, '-t', args.tenant, '-j', args.job]

    # Command to clean hdfs data
    cmd_clean_sync = ['hadoop', 'fs', '-rm', '-r', '-f', hdfs_sync_path]

    # Upload data to hdfs
    log.info("Uploading sync data to hdfs...")
    run_cmd(cmd_upload_sync, log)

    # Clean data from mongo
    log.info("Cleaning data from mongodb")
    run_cmd(cmd_clean_mongo_status, log)

    # Call pig
    log.info("Submitting pig compute status detail job...")
    run_cmd(cmd_pig, log)

    # Cleaning hdfs sync data
    if cfg.sync_clean == "true":
        log.info("System configured to clean sync hdfs data after job")
        run_cmd(cmd_clean_sync, log)

    log.info("Execution of status job for tenant %s for date %s completed!",
             args.tenant, args.date)

示例#9

0

显示文件

文件： upload_metric.py 项目： skanct/argo-compute-engine

def main(args=None):

    # default config
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    date_under = args.date.replace("-", "_")

    ArConfig = SafeConfigParser()
    ArConfig.read(fn_ar_cfg)

    # Get sync exec and path
    arsync_exec = ArConfig.get('connectors', 'sync_exec')
    arsync_lib = ArConfig.get('connectors', 'sync_path')

    # Get mode from config file
    ar_mode = ArConfig.get('default', 'mode')



    prefilter_clean = ArConfig.get('default', 'prefilter_clean')

    # Initialize logging
    log_mode = ArConfig.get('logging', 'log_mode')
    log_file = None

    if log_mode == 'file':
        log_file = ArConfig.get('logging', 'log_file')

    log_level = ArConfig.get('logging', 'log_level')
    log = init_log(log_mode, log_file, log_level, 'argo.upload_metric')

    # Inform the user in wether argo runs locally or distributed
    if ar_mode == 'local':
        log.info("ARGO compute engine runs in LOCAL mode")
        log.info("metric data will be staged for computations locally")
    else:
        log.info("ARGO compute engine runs in CLUSTER mode")
        log.info("metric data will be uploaded to HDFS")

    # call prefilter if necessary for specified tenant
    # FIXME: This conditional won't work if a prefilter wrapper is missing. 
    # DETAILS: If a prefilter wrapper is not provided then the 
    # local_prefilter variable will not be initialized. Note, however, that
    # this variable is used below and outside of this conditional. 
    if ArConfig.has_option('jobs', args.tenant + '_prefilter'):
        prefilter_exec = ArConfig.get('jobs', args.tenant + '_prefilter')
        cmd_pref = [os.path.join(arsync_exec, prefilter_exec), '-d', args.date]

        log.info("Calling %s for date: %s", os.path.join(arsync_exec, prefilter_exec), args.date)

        run_cmd(cmd_pref, log)

        fn_prefilter = "prefilter_" + date_under + ".avro"
        local_prefilter = os.path.join(arsync_lib, args.tenant, fn_prefilter)

        log.info("Check if produced %s exists: %s",
                 local_prefilter, os.path.exists(local_prefilter))

    if ar_mode == 'cluster':
        # compose hdfs destination
        # hdfs path = ./tenant/mdata/...
        hdfs_path = "./" + args.tenant + "/mdata/"
    else:
        # compose local temporary destination
        hdfs_path = "/tmp/" + args.tenant + "/mdata/"

    # Command to establish tentant's metric data hdfs folder
    cmd_hdfs_mkdir = ['hadoop', 'fs', '-mkdir', '-p', hdfs_path]

    # Put file to hdfs destination
    cmd_hdfs = ['hadoop', 'fs', '-put', '-f', local_prefilter, hdfs_path]

    # Command to clear prefilter data after hdfs transfer
    cmd_clean = ['rm', '-f', local_prefilter]

    log.info("Establish if not present hdfs metric data directory")
    run_cmd(cmd_hdfs_mkdir, log)

    log.info("Transfer files to hdfs")
    run_cmd(cmd_hdfs, log)

    if prefilter_clean == "true":
        log.info("System configured to clean prefilter data after transfer")
        run_cmd(cmd_clean, log)

    log.info("Metric Data of tenant %s for date %s uploaded successfully to hdfs",
             args.tenant, args.date)

示例#10

0

显示文件

文件： test_argolog.py 项目： ARGOeu/argo-compute-engine

def test_init_log_raise_on_none_filename():
    with pytest.raises(TypeError) as excinfo:
        init_log(log_mode='file', log_file=None, log_level='INFO', log_name='argo.test')
    assert "Log filename is NoneType" in str(excinfo.value)

示例#11

0

显示文件

文件： sync_backup.py 项目： ARGOeu/argo-compute-engine

def main(args=None):

    # default config
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    arcomp_conf = "/etc/ar-compute/"
    arcomp_exec = "/usr/libexec/ar-compute/"
    stdl_exec = "/usr/libexec/ar-compute/bin"

    ArConfig = SafeConfigParser()
    ArConfig.read(fn_ar_cfg)

    # Get sync path
    arsync_lib = ArConfig.get('connectors', 'sync_path')

    # Initialize logging
    log_mode = ArConfig.get('logging', 'log_mode')
    log_file = None

    if log_mode == 'file':
        log_file = ArConfig.get('logging', 'log_file')

    log_level = ArConfig.get('logging', 'log_level')
    log = init_log(log_mode, log_file, log_level, 'argo.sync_backup')

    # Parse date argument
    actual_date = datetime.strptime(args.date, '%Y-%m-%d')
    # Set day on the first of the month
    actual_date = actual_date.replace(day=1)
    # First day of the month minus one day gets us back one month ago (or even
    # year ago)
    month_ago = actual_date - timedelta(days=1)

    fn_sync_tar = 'sync_backup_' + args.tenant + '_' + \
        str(month_ago.strftime("%B")) + '_' + str(month_ago.year) + '.tar'

    local_tar = os.path.join('/tmp', fn_sync_tar)

    # Check if tar file already exists from a previous backup try and remove it
    if os.path.exists(local_tar):
        os.remove(local_tar)

    sync_tar = tarfile.open(local_tar, mode='w')

    # Grab all available jobs in the system
    job_set = ArConfig.get("jobs", args.tenant+"_jobs")
    job_set = job_set.split(',')

    # Create query strings to list appropriate files
    query_down = '*_' + \
        month_ago.strftime("%Y") + "_" + month_ago.strftime("%m") + '_*.avro'
    query_dash = '*_' + \
        month_ago.strftime("%Y") + "-" + month_ago.strftime("%m") + '-*.avro'

    # Downtimes are special because they always reside in the ar-sync root
    # (might change in the future)
    downtime_list = glob.glob(os.path.join(arsync_lib, query_dash))

    # Add downtimes to the tar file

    log.info("Adding downtime files for %s of %s",
             month_ago.strftime("%B"), month_ago.year)
    for f in downtime_list:
        tar_path = args.tenant + '/' + os.path.basename(f)
        sync_tar.add(f, tar_path)

    # Iterate over job folders
    for item in job_set:
        jobsync_list = glob.glob(
            os.path.join(arsync_lib, args.tenant, item, query_down))

        log.info("adding sync files for %s of %s for Job: %s",
                 month_ago.strftime("%B"), month_ago.year, item)

        for f in jobsync_list:
            tar_path = args.tenant + '/' + item + '/' + os.path.basename(f)
            sync_tar.add(f, tar_path)

    sync_tar.close()

    # Create HDFS backup path
    hdfs_dest = args.tenant + '/backup/sync/'
    cmd_establish_hdfs = ['hadoop', 'fs', '-mkdir', '-p', hdfs_dest]

    log.info("Establish hdfs backup directory: %s", hdfs_dest)
    run_cmd(cmd_establish_hdfs, log)

    # Transfer tar archive to hdfs
    cmd_hdfs_put = ['hadoop', 'fs', '-put', '-f', local_tar, hdfs_dest]

    log.info("Transfer backup  from local:%s to hdfs:%s", local_tar, hdfs_dest)
    run_cmd(cmd_hdfs_put, log)

    # Clean up temporary tar
    log.info("Cleanup tmp data")
    if os.path.exists(local_tar):
        os.remove(local_tar)

    log.info("Backup Completed to hdfs")

示例#12

0

显示文件

文件： upload_sync.py 项目： ARGOeu/argo-compute-engine

def main(args=None):

    # Default core paths
    fn_ar_cfg = "/etc/ar-compute-engine.conf"
    arcomp_conf = "/etc/ar-compute/"
    argo_exec = "/usr/libexec/ar-compute/bin/"

    actual_date = datetime.strptime(args.date, '%Y-%m-%d')

    # Create a second date used by the file formats
    date_under = args.date.replace("-", "_")

    # Initiate config file parser to read global ar-compute-engine.conf
    ArConfig = SafeConfigParser()
    ArConfig.read(fn_ar_cfg)

    # Get sync conf, exec and path
    arsync_conf = ArConfig.get('connectors', 'sync_conf')
    arsync_exec = ArConfig.get('connectors', 'sync_exec')
    arsync_lib = ArConfig.get('connectors', 'sync_path')


    # Initialize logging
    log_mode = ArConfig.get('logging', 'log_mode')
    log_file = None

    if log_mode == 'file':
        log_file = ArConfig.get('logging', 'log_file')

    log_level = ArConfig.get('logging', 'log_level')
    log = init_log(log_mode, log_file, log_level, 'argo.upload_sync')

    # Get mode from config file
    ar_mode = ArConfig.get('default', 'mode')

    # Inform the user in wether argo runs locally or distributed
    if ar_mode == 'local':
        log.info("ARGO compute engine runs in LOCAL mode")
        log.info("sync data will be staged for computations locally")
    else:
        log.info("ARGO compute engine runs in CLUSTER mode")
        log.info("sync data will be  uploaded to HDFS")

    # Compose needed sync filenames using the correct prefixes, dates and file
    # extensions (avro/json)

    fn_ops = args.tenant + '_ops.json'
    fn_aps = args.tenant + '_' + args.job + '_ap.json'
    fn_cfg = args.tenant + '_' + args.job + '_cfg.json'
    fn_rec = "recomputations_" + args.tenant + "_" + date_under + ".json"

    if ar_mode == 'cluster':
        # compose hdfs temporary destination
        # hdfs dest = ./scratch/sync/tenant/job/date/...
        # sync files are not meant to be kept in hdfs (unless archived in
        # batches)
        hdfs_dest = './scratch/sync/' + args.tenant + \
            '/' + args.job + '/' + date_under + '/'
    else:
        # compose local temporary destination
        hdfs_dest = '/tmp/scratch/sync/' + args.tenant + \
            '/' + args.job + '/' + date_under + '/'

    # Compose the local ar-sync files job folder
    # arsync job = /path/to/synced_stuff/tenant/job/...
    arsync_job = arsync_lib + '/' + args.tenant + '/' + args.job + '/'

    # Call downtimes latest info
    cmd_call_downtimes = [
        os.path.join(arsync_exec, 'downtimes-gocdb-connector.py'), '-d', args.date,
        '-c', os.path.join(arsync_conf, args.tenant.lower() + '-customer.conf')]
    log.info("Calling downtime sync connector to give us latest downtime info")

    run_cmd(cmd_call_downtimes, log)

    # Call script to retrieve a json file of recomputations for the specific date/tenant from mongodb
    cmd_mongo_recomputations = [os.path.join(argo_exec, 'mongo_recompute.py'), '-d', args.date, '-t', args.tenant, '-j', args.job]
    log.info("Retrieving relevant recomputation requests...")
    run_cmd(cmd_mongo_recomputations, log)

    # Compose the local paths for files (paths+filenames)
    local_egroups = getSyncFile(
        actual_date, os.path.join(arsync_job, "group_endpoints_"), '.avro', '_', log)
    local_ggroups = getSyncFile(
        actual_date, os.path.join(arsync_job, "group_groups_"), '.avro', '_', log)
    local_weights = getSyncFile(
        actual_date, os.path.join(arsync_job, "weights_"), '.avro', '_', log)
    local_mps = getSyncFile(
        actual_date, os.path.join(arsync_job, "poem_sync_"), '.avro', '_', log)
    local_downtimes = getSyncFile(
        actual_date, os.path.join(arsync_job, "downtimes_"), '.avro', '_', log)

    local_aps = os.path.join(arcomp_conf, fn_aps)
    local_ops = os.path.join(arcomp_conf, fn_ops)
    local_cfg = os.path.join(arcomp_conf, fn_cfg)
    local_rec = os.path.join(arsync_lib, fn_rec)

    # Check filenames if exist
    log.info("Check if %s exists: %s", local_aps, os.path.exists(local_aps))
    log.info("Check if %s exists: %s", local_ops, os.path.exists(local_ops))
    log.info("Check if %s exists: %s", local_cfg, os.path.exists(local_cfg))
    log.info("Check if %s exists: %s", local_rec, os.path.exists(local_rec))

    # Remove scratch sync directory in hdfs (cause we don't keep unarchived
    # sync files)
    cmd_clearHdfs = ['hadoop', 'fs', '-rm', '-r', '-f', hdfs_dest]
    # Establish new scratch sync directory in hdfs for this job
    cmd_estHdfs = ['hadoop', 'fs', '-mkdir', '-p', hdfs_dest]
    # Transfer endpoint groups topo from local to hdfs
    cmd_putEgroups = ['hadoop', 'fs', '-put', '-f',
                      local_egroups, hdfs_dest + 'group_endpoints.avro']
    # Transfer group of groups topo from local to hdfs
    cmd_putGgroups = ['hadoop', 'fs', '-put', '-f',
                      local_ggroups, hdfs_dest + 'group_groups.avro']
    # Transfer weight factors from local to hdfs
    cmd_putWeights = ['hadoop', 'fs', '-put', '-f',
                      local_weights, hdfs_dest + 'weights.avro']
    # Transfer metric profile from local to hdfs
    cmd_putMps = ['hadoop', 'fs', '-put', '-f',
                  local_mps, hdfs_dest + 'poem_sync.avro']
    # Transfer downtime info from local to hdfs
    cmd_putDowntimes = ['hadoop', 'fs', '-put', '-f',
                        local_downtimes, hdfs_dest + 'downtimes.avro']

    # Transfer availability profile from local to hdfs
    cmd_putAps = ['hadoop', 'fs', '-put', '-f', local_aps, hdfs_dest]
    # Transfer operations from local to hdfs
    cmd_putOps = ['hadoop', 'fs', '-put', '-f', local_ops, hdfs_dest]
    # Transfer job configuration file from local to hdfs
    cmd_putCfg = ['hadoop', 'fs', '-put', '-f', local_cfg, hdfs_dest]
    # Transfer recalculations requests (if any) from local to hdfs
    cmd_putRec = ['hadoop', 'fs', '-put', '-f', local_rec, hdfs_dest]

    # try:
    log.info("Remove old scratch sync folder: %s", hdfs_dest)
    run_cmd(cmd_clearHdfs, log)
    log.info("Establish new scratch sync folder %s", hdfs_dest)
    run_cmd(cmd_estHdfs, log)
    log.info("Transfer metric profile")
    run_cmd(cmd_putMps, log)
    log.info("Transfer endpoint group topology")
    run_cmd(cmd_putEgroups, log)
    log.info("Transfer group of group topology")
    run_cmd(cmd_putGgroups, log)
    log.info("Transfer weight factors")
    run_cmd(cmd_putWeights, log)
    log.info("Transfer downtimes")
    run_cmd(cmd_putDowntimes, log)
    log.info("Transfer availability profile")
    run_cmd(cmd_putAps, log)
    log.info("Transfer operations file")
    run_cmd(cmd_putOps, log)
    log.info("Transfer job configuration")
    run_cmd(cmd_putCfg, log)
    log.info("Transfer recalculation requests")
    run_cmd(cmd_putRec, log)

    # Clear local temporary recomputation file
    os.remove(local_rec)

    log.info("Sync Data of tenant %s for job %s for date %s uploaded successfully to hdfs",
             args.tenant, args.job, args.date)