def main(): "Main function" optmgr = OptionParser() opts = optmgr.parser.parse_args() print("Input arguments: %s" % opts) time0 = time.time() fout = opts.fout date = opts.date verbose = opts.verbose yarn = opts.yarn inst = opts.inst if inst in ['global', 'phys01', 'phys02', 'phys03']: inst = inst.upper() else: raise Exception('Unsupported DBS instance "%s"' % inst) patterns = opts.patterns.split(',') if opts.patterns else [] antipatterns = opts.antipatterns.split(',') if opts.antipatterns else [] run(fout, date, yarn, verbose, patterns, antipatterns, inst) print('Start time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time0))) print('End time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time()))) print('Elapsed time: %s' % elapsed_time(time0)) with open("spark_exec_time_campaigns.txt", "w") as text_file: text_file.write(elapsed_time(time0))
def main(): "Main function" optmgr = OptionParser() opts = optmgr.parser.parse_args() print("Input arguments: %s" % opts) start_time = time.time() verbose = opts.verbose yarn = opts.yarn inst = opts.inst date = opts.date fout = opts.fout if inst.lower() in ['global', 'phys01', 'phys02', 'phys03']: inst = inst.upper() else: raise Exception('Unsupported DBS instance "%s"' % inst) # Create spark context ctx = spark_context('cms', yarn, verbose) # Create SQL context to be used for SQL queries sql_context = HiveContext(ctx) # Initialize DBS tables (will be used with AAA, CMSSW) dbs_tables(sql_context, inst=inst, verbose=verbose) aaa_start_time = time.time() run_aaa(date, fout, ctx, sql_context, verbose) aaa_elapsed_time = elapsed_time(aaa_start_time) cmssw_start_time = time.time() run_cmssw(date, fout, ctx, sql_context, verbose) cmssw_elapsed_time = elapsed_time(cmssw_start_time) eos_start_time = time.time() run_eos(date, fout, ctx, sql_context, verbose) eos_elapsed_time = elapsed_time(eos_start_time) jm_start_time = time.time() run_jm(date, fout, ctx, sql_context, verbose) jm_elapsed_time = elapsed_time(jm_start_time) ctx.stop() print('Start time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(start_time))) print('End time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time()))) print('Total elapsed time : %s' % elapsed_time(start_time)) print('AAA elapsed time : %s' % aaa_elapsed_time) print('CMSSW elapsed time : %s' % cmssw_elapsed_time) print('EOS elapsed time : %s' % eos_elapsed_time) print('JM elapsed time : %s' % jm_elapsed_time)
def main(): "Main function" optmgr = OptionParser() opts = optmgr.parser.parse_args() print("Input arguments: %s" % opts) time0 = time.time() fout = opts.fout date = opts.date verbose = opts.verbose yarn = opts.yarn run(date, fout, yarn, verbose) print('Start time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time0))) print('End time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time()))) print('Elapsed time: %s sec' % elapsed_time(time0)) with open('phedex_time_data.txt', 'w') as file: file.write(elapsed_time(time0))
def main(): "Main function" optmgr = OptionParser() opts = optmgr.parser.parse_args() print("Input arguments: %s" % opts) time0 = time.time() run(opts.fout, opts.hdir, opts.date, opts.yarn, opts.verbose) print('Start time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time0))) print('End time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time()))) print('Elapsed time: %s sec' % elapsed_time(time0))
def main(): "Main function" optmgr = OptionParser() opts = optmgr.parser.parse_args() print("Input arguments: %s" % opts) time0 = time.time() inst = opts.inst if inst in ['global', 'phys01', 'phys02', 'phys03']: inst = inst.upper() else: raise Exception('Unsupported DBS instance "%s"' % inst) run(opts.date, opts.fout, opts.yarn, opts.verbose, inst) print('Start time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time0))) print('End time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time()))) print('Elapsed time: %s sec' % elapsed_time(time0))
def main(): "Main function" optmgr = OptionParser() opts = optmgr.parser.parse_args() print("Input arguments: %s" % opts) start_time = time.time() verbose = opts.verbose yarn = opts.yarn inst = opts.inst date = opts.date fout = opts.fout aaa_hdir = opts.aaa_hdir if inst.lower() in ['global', 'phys01', 'phys02', 'phys03']: inst = inst.upper() else: raise Exception('Unsupported DBS instance "%s"' % inst) # Create spark context ctx = spark_context('cms', yarn, verbose) quiet_logs(ctx) # Create SQL context to be used for SQL queries sql_context = SQLContext(ctx) # Initialize DBS tables dbs_tables(sql_context, inst=inst, verbose=verbose, tables=['fdf', 'bdf', 'ddf']) # Initialize PhEDEx table to be used in file_block_site table phedex_tables(sql_context, verbose=verbose) # Register clean_site_name to be used with SQL queries sql_context.udf.register("clean_site_name", clean_site_name) # Register tier_from_site_name to be used with SQL queries sql_context.udf.register("tier_from_site_name", tier_from_site_name) # Register dn2uuid to be used with SQL queries sql_context.udf.register("dn2uuid", dn2uuid) # Register parse_app to be used with SQL queries sql_context.udf.register("parse_app", parse_app) # Register stream4app to be used with SQL queries sql_context.udf.register("stream4app", stream4app) # Register parse_dn to be used with SQL queries sql_context.udf.register("parse_dn", parse_dn) f_b_s_start_time = time.time() # Create temp table with file name, block name, site name and site from PhEDEx create_file_block_site_table(ctx, sql_context, verbose) f_b_s_elapsed_time = elapsed_time(f_b_s_start_time) cmssw_start_time = time.time() aggregated_cmssw_df = run_agg_cmssw(date, ctx, sql_context, verbose) cmssw_elapsed_time = elapsed_time(cmssw_start_time) aaa_start_time = time.time() if len(aaa_hdir) > 0: aggregated_aaa_df = run_agg_aaa(date, ctx, sql_context, aaa_hdir, verbose) else: aggregated_aaa_df = run_agg_aaa(date, ctx, sql_context, verbose=verbose) aaa_elapsed_time = elapsed_time(aaa_start_time) eos_start_time = time.time() aggregated_eos_df = run_agg_eos(date, ctx, sql_context, verbose) eos_elapsed_time = elapsed_time(eos_start_time) jm_start_time = time.time() aggregated_jm_df = run_agg_jm(date, ctx, sql_context, verbose) jm_elapsed_time = elapsed_time(jm_start_time) if verbose: print('Will union outputs from all streams to a single dataframe') # Schema for output is: # site name, dataset name, number of accesses, distinct users, stream all_df = aggregated_cmssw_df.unionAll(aggregated_aaa_df) all_df = all_df.unionAll(aggregated_eos_df) all_df = all_df.unionAll(aggregated_jm_df) all_df = all_df.sort(desc("nacc")) if verbose: print('Done joining all outputs to a single dataframe') fout = fout + "/" + short_date_string(date) # output_dataframe(fout + "/Aggregated/CMSSW/" + short_date_string(date), aggregated_cmssw_df, verbose) # output_dataframe(fout + "/Aggregated/AAA/" + short_date_string(date), aggregated_aaa_df, verbose) # output_dataframe(fout + "/Aggregated/EOS/" + short_date_string(date), aggregated_eos_df, verbose) # output_dataframe(fout + "/Aggregated/JobMonitoring/" + short_date_string(date), aggregated_jm_df, verbose) output_dataframe(fout, all_df, verbose) if verbose: cmssw_df_size = aggregated_cmssw_df.count() aaa_df_size = aggregated_aaa_df.count() eos_df_size = aggregated_eos_df.count() jm_df_size = aggregated_jm_df.count() all_df_size = all_df.count() print('CMSSW:') aggregated_cmssw_df.show(10) aggregated_cmssw_df.printSchema() print('AAA:') aggregated_aaa_df.show(10) aggregated_aaa_df.printSchema() print('EOS:') aggregated_eos_df.show(10) aggregated_eos_df.printSchema() print('JobMonitoring:') aggregated_jm_df.show(10) aggregated_jm_df.printSchema() print('Aggregated all:') all_df.show(10) all_df.printSchema() print('Output record count:') print('Output record count CMSSW : %s' % cmssw_df_size) print('Output record count AAA : %s' % aaa_df_size) print('Output record count EOS : %s' % eos_df_size) print('Output record count JobMonitoring : %s' % jm_df_size) print('Output record count Total: : %s' % all_df_size) ctx.stop() print('Start time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(start_time))) print('End time : %s' % time.strftime('%Y-%m-%d %H:%M:%S GMT', time.gmtime(time.time()))) print('Total elapsed time : %s' % elapsed_time(start_time)) print('FileBlockSite elapsed time : %s' % f_b_s_elapsed_time) print('AAA elapsed time : %s' % aaa_elapsed_time) print('CMSSW elapsed time : %s' % cmssw_elapsed_time) print('EOS elapsed time : %s' % eos_elapsed_time) print('JobMonitoring elapsed time : %s' % jm_elapsed_time)