def test_summarize_by_fs(): """cli.index_darshanlogs.summarize_by_fs """ tokiotest.check_darshan() result = tokio.cli.index_darshanlogs.summarize_by_fs( tokiotest.SAMPLE_DARSHAN_LOG) assert result assert 'summaries' in result assert 'headers' in result assert 'mounts' in result assert result['summaries'] assert result['headers'] assert result['mounts'] truth = tokio.connectors.darshan.Darshan(tokiotest.SAMPLE_DARSHAN_LOG) truth.darshan_parser_base() print( "Verify that result contains only the subset of mounts actually used") assert len(result['mounts']) < len(truth['mounts']) for mount in result['mounts']: if mount != "UNKNOWN": print("Ensure that result mount %s is in actual Darshan log" % mount) assert mount in truth['mounts'] print("Verify that there aren't more mounts than files opened") assert len(result['summaries']) >= len(result['mounts']) assert 'filename' in result['headers'] assert 'exe' in result['headers'] assert 'username' in result['headers'] assert 'exename' in result['headers']
def run_cache_connector(config, to_file=False): """ Test a connector cache (cache_*.py) CLI interface """ if config['binary'] == tokiobin.cache_darshan: tokiotest.check_darshan() runfunction = config.get('runfunction', run_connector) if to_file: argv = ['-o', tokiotest.TEMP_FILE.name] + config['args'] print "Caching to", tokiotest.TEMP_FILE.name print "Executing:", ' '.join(argv) output_str = runfunction(config['binary'], argv) # (validate_contents == True) means the associated validator function # expects the contents of the output file rather than the name of the # output file if config.get('validate_contents', True): output_str = tokiotest.TEMP_FILE.read() else: argv = config['args'] print "Caching to stdout" print "Executing:", ' '.join(argv) output_str = runfunction(config['binary'], argv) for validator in config['validators']: validator(output_str)
def test_update(): """cli.index_darshanlogs with an existing database """ tokiotest.check_darshan() # create a database with a couple of entries argv = ['--quiet', '--output', tokiotest.TEMP_FILE.name ] + [os.path.dirname(SAMPLE_DARSHAN_LOGS[0])] print("Executing: %s" % " ".join(argv)) tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) # hack on database conn = sqlite3.connect(tokiotest.TEMP_FILE.name) cursor = conn.cursor() print("Initial database:") orig_num_rows = {} for table in TABLES: orig_num_rows[table] = get_table_len(table=table, conn=conn, cursor=cursor) print("Test database with all headers, all summaries") tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) for table in TABLES: num_rows = get_table_len(table=table, conn=conn, cursor=cursor) assert num_rows == orig_num_rows[table] @nose.tools.raises(sqlite3.IntegrityError) def all_headers_half_summaries(): """Test database with half of the summaries rows missing """ print("Test database with all headers, only half summaries") cursor.execute("DELETE FROM summaries WHERE log_id % 2 = 0") conn.commit() # sqlite3.IntegrityError tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) for table in TABLES: num_rows = get_table_len(table=table, conn=conn, cursor=cursor) assert num_rows == orig_num_rows[table] all_headers_half_summaries() print("Test consistent and half-populated database") cursor.execute("DELETE FROM headers WHERE log_id % 2 = 0") conn.commit() tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) for table in TABLES: num_rows = get_table_len(table=table, conn=conn, cursor=cursor) assert num_rows == orig_num_rows[table] print("Test unpopulated database") cursor.execute("DELETE FROM headers") cursor.execute("DELETE FROM summaries") conn.commit() tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) for table in TABLES: num_rows = get_table_len(table=table, conn=conn, cursor=cursor) assert num_rows == orig_num_rows[table]
def wrap_load_darshanlogs_assert_raises(test_input, exception): """Allow named args to pass through nosetests; expect an exception """ tokiotest.check_darshan() print("Running: %s" % test_input['descr']) print("Test args: %s" % test_input['params']) nose.tools.assert_raises(exception, tokio.tools.darshan.load_darshanlogs, **(test_input['params']))
def test_good_log(): """cli.darshan_bad_ost: detect no false positives in a good Darshan log """ tokiotest.check_darshan() argv = ['--json', "-p", MODEST_PVALUE_CUTOFF, SAMPLE_GOOD_DARSHAN_LOG] output_str = tokiotest.run_bin(tokio.cli.darshan_bad_ost, argv) decoded_result = json.loads(output_str) assert len(decoded_result) == 0
def test_single_file_log(): """cli.darshan_bad_ost: handle log with insufficient data for correlation """ tokiotest.check_darshan() argv = ['--json', SAMPLE_1FILE_DARSHAN_LOG] output_str = tokiotest.run_bin(tokio.cli.darshan_bad_ost, argv) decoded_result = json.loads(output_str) assert len(decoded_result) == 0
def test_get_biggest_api(): """ summarize_job.get_biggest_api() functionality """ tokiotest.check_darshan() argv = ['--json', tokiotest.SAMPLE_DARSHAN_LOG] output_str = tokiotest.run_bin(tokiobin.summarize_job, argv) assert verify_output_json(output_str, key='darshan_biggest_read_api')
def test_total(): """ darshan_parser_total() method """ tokiotest.check_darshan() darshan = tokio.connectors.darshan.Darshan(tokiotest.SAMPLE_DARSHAN_LOG) darshan.darshan_parser_total() verify_darshan(darshan) verify_total_counters(darshan)
def test_get_biggest_api(): """ cli.summarize_job.get_biggest_api() functionality """ tokiotest.check_darshan() argv = ['--json', tokiotest.SAMPLE_DARSHAN_LOG] print("Executing: %s" % ' '.join(argv)) output_str = tokiotest.run_bin(tokio.cli.summarize_job, argv) assert verify_output_json(output_str, key='darshan_biggest_read_api')
def test_input_files(): """bin/summarize_darshanlogs.py with multiple input logs """ tokiotest.check_darshan() argv = SAMPLE_DARSHAN_LOGS print "Executing:", " ".join(argv) output_str = tokiotest.run_bin(tokiobin.summarize_darshanlogs, argv) decoded_result = json.loads(output_str) assert len(decoded_result) > 0
def test_multithreaded(): """bin/summarize_darshanlogs.py --threads """ tokiotest.check_darshan() argv = ['--threads', '4'] + SAMPLE_DARSHAN_LOGS print "Executing:", " ".join(argv) output_str = tokiotest.run_bin(tokiobin.summarize_darshanlogs, argv) decoded_result = json.loads(output_str) assert len(decoded_result) > 0
def test_input_file(): """cli.index_darshanlogs with one input log """ tokiotest.check_darshan() argv = ['--output', tokiotest.TEMP_FILE.name] + [SAMPLE_DARSHAN_LOGS[0]] print("Executing: %s" % " ".join(argv)) tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) verify_index_db(tokiotest.TEMP_FILE.name) assert get_table_len(tokio.cli.index_darshanlogs.HEADERS_TABLE, output_file=tokiotest.TEMP_FILE.name) == 1
def test_multithreaded(): """cli.index_darshanlogs --threads """ raise nose.SkipTest("threading causes deadlocks") tokiotest.check_darshan() argv = ['--threads', '4', '--output', tokiotest.TEMP_FILE.name ] + SAMPLE_DARSHAN_LOGS print("Executing: %s" % " ".join(argv)) tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) verify_index_db(tokiotest.TEMP_FILE.name)
def test_input_dir(): """bin/summarize_darshanlogs.py with input dir """ # Need lots of error/warning suppression since our input dir contains a ton of non-Darshan logs warnings.filterwarnings('ignore') tokiotest.check_darshan() argv = [os.path.dirname(SAMPLE_DARSHAN_LOGS[0])] print "Executing:", " ".join(argv) output_str = tokiotest.run_bin(tokiobin.summarize_darshanlogs, argv) decoded_result = json.loads(output_str) assert len(decoded_result) > 0
def test_multi_file_log(): """cli.darshan_bad_ost: correctly handle multiple input logs """ tokiotest.check_darshan() argv = [ '--json', '-c', MODEST_CORRELATION_CUTOFF, SAMPLE_BAD_DARSHAN_LOG, SAMPLE_GOOD_DARSHAN_LOG ] output_str = tokiotest.run_bin(tokio.cli.darshan_bad_ost, argv) decoded_result = json.loads(output_str) assert len(decoded_result) == 0
def test_max_mb(): """cli.index_darshanlogs, lite parser """ tokiotest.check_darshan() argv = [ '--max-mb', str(1.0 / 1024.0), '--output', tokiotest.TEMP_FILE.name ] + SAMPLE_DARSHAN_LOGS print("Executing: %s" % " ".join(argv)) tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) verify_index_db(tokiotest.TEMP_FILE.name)
def test_input_dir(): """cli.index_darshanlogs with input dir """ tokiotest.check_darshan() argv = ['--quiet', '--output', tokiotest.TEMP_FILE.name ] + [os.path.dirname(SAMPLE_DARSHAN_LOGS[0])] print("Executing: %s" % " ".join(argv)) tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) verify_index_db(tokiotest.TEMP_FILE.name) assert get_table_len(tokio.cli.index_darshanlogs.HEADERS_TABLE, output_file=tokiotest.TEMP_FILE.name) > 1
def test_csv(): """ bin/summarize_job.py: darshan and LMT data (csv) """ tokiotest.check_darshan() argv = [tokiotest.SAMPLE_DARSHAN_LOG] output_str = tokiotest.run_bin(tokiobin.summarize_job, argv) assert verify_output_csv(output_str, key='darshan_agg_perf_by_slowest_posix') assert verify_output_csv(output_str, key='darshan_biggest_read_api') assert verify_output_csv(output_str, key='darshan_biggest_read_fs') assert verify_output_csv(output_str, key='fs_tot_gibs_written')
def test_bad_log(): """cli.darshan_bad_ost: detect a very bad OST """ tokiotest.check_darshan() argv = [ '--json', "-p", STRONG_PVALUE_CUTOFF, "-c", STRONG_CORRELATION_CUTOFF, SAMPLE_BAD_DARSHAN_LOG ] output_str = tokiotest.run_bin(tokio.cli.darshan_bad_ost, argv) decoded_result = json.loads(output_str) print("Received %d very bad OSTs:" % len(decoded_result)) print(json.dumps(decoded_result, indent=4)) assert len(decoded_result) == 1
def test_csv(): """ cli.summarize_job: darshan and LMT data (csv) """ tokiotest.check_darshan() argv = [tokiotest.SAMPLE_DARSHAN_LOG] print("Executing: %s" % ' '.join(argv)) output_str = tokiotest.run_bin(tokio.cli.summarize_job, argv) assert verify_output_csv(output_str, key='darshan_agg_perf_by_slowest_posix') assert verify_output_csv(output_str, key='darshan_biggest_read_api') assert verify_output_csv(output_str, key='darshan_biggest_read_fs') assert verify_output_csv(output_str, key='fs_tot_gibs_written')
def wrap_load_darshanlogs(test_input): """Allow named args to pass through nosetests """ tokiotest.check_darshan() print("Running: %s" % test_input['descr']) print("Test args: %s" % test_input['params']) expected_exception = test_input.get('expect_exception') if expected_exception: nose.tools.assert_raises(expected_exception, tokio.tools.darshan.load_darshanlogs, **(test_input['params'])) else: results = tokio.tools.darshan.load_darshanlogs( **(test_input['params'])) assert (test_input['pass_criteria'])(results)
def test_darshan_summaries(): """ bin/summarize_job.py: multiple Darshan logs (csv) """ tokiotest.check_darshan() argv = [tokiotest.SAMPLE_DARSHAN_LOG, SAMPLE_DARSHAN_LOG_2] output_str = tokiotest.run_bin(tokiobin.summarize_job, argv) assert verify_output_csv(output_str, key='darshan_agg_perf_by_slowest_posix', expected_rows=2) assert verify_output_csv(output_str, key='darshan_biggest_read_api') assert verify_output_csv(output_str, key='darshan_biggest_read_fs') assert verify_output_csv(output_str, key='fs_tot_gibs_written', expected_rows=2)
def test_with_lfsstatus(): """ bin/summarize_job.py --ost --ost-fullness --ost-map """ tokiotest.check_darshan() argv = [ '--json', '--ost', '--ost-fullness', tokiotest.SAMPLE_OSTFULLNESS_FILE, '--ost-map', tokiotest.SAMPLE_OSTMAP_FILE, tokiotest.SAMPLE_DARSHAN_LOG ] output_str = tokiotest.run_bin(tokiobin.summarize_job, argv) assert verify_output_json(output_str, key='darshan_agg_perf_by_slowest_posix') assert verify_output_json(output_str, key='darshan_biggest_read_api') assert verify_output_json(output_str, key='darshan_biggest_read_fs') assert verify_output_json(output_str, key='fs_tot_gibs_written') assert verify_output_json(output_str, key='fshealth_ost_overloaded_pct')
def test_with_nersc_jobsdb(): """ bin/summarize_job.py --concurrentjobs --jobhost """ tokiotest.check_darshan() argv = [ '--json', '--concurrentjobs', tokiotest.SAMPLE_NERSCJOBSDB_FILE, '--jobhost', tokiotest.SAMPLE_DARSHAN_JOBHOST, tokiotest.SAMPLE_DARSHAN_LOG ] output_str = tokiotest.run_bin(tokiobin.summarize_job, argv) assert verify_output_json(output_str, key='darshan_agg_perf_by_slowest_posix') assert verify_output_json(output_str, key='darshan_biggest_read_api') assert verify_output_json(output_str, key='darshan_biggest_read_fs') assert verify_output_json(output_str, key='fs_tot_gibs_written') assert verify_output_json(output_str, key='jobsdb_concurrent_nodehrs')
def test_lite_vs_full(): """cli.index_darshanlogs, lite/full parser equivalence """ tokiotest.check_darshan() for darshan_log in SAMPLE_DARSHAN_LOGS: print("Attempting " + darshan_log) dict1 = tokio.cli.index_darshanlogs.summarize_by_fs(darshan_log) dict2 = tokio.cli.index_darshanlogs.summarize_by_fs_lite(darshan_log) print("=== Full ===") print(json.dumps(dict1, indent=4, sort_keys=True)) print("=== Lite ===") print(json.dumps(dict2, indent=4, sort_keys=True)) # assert dict1 == dict2 for table in 'headers', 'mounts': print("Comparing table '%s'; len full(%d) vs lite(%d)" % (table, len(dict1[table]), len(dict2[table]))) assert len(dict1[table]) == len(dict2[table]) for key, val in dict1[table].items(): print("%s->key[%s]: lite(%s) == full(%s)?" % (table, key, dict2[table].get(key), val)) assert dict2[table].get(key) == val assert len(dict1['summaries']) == len(dict2['summaries']) for mount in dict1['summaries']: assert mount in dict2['summaries'] for key, val in dict1['summaries'][mount].items(): if key in ('posix_files', 'stdio_files', 'f_close_end_timestamp', 'f_open_end_timestamp'): # darshan2 cannot distinguish stdio records from posix records # darshan2 also does not have equivalent start timestamps continue print("summaries->%s->key[%s]: lite(%s) == full(%s)?" % (mount, key, dict2['summaries'][mount].get(key), val)) if dict2['summaries'][mount].get(key) != val: print("=== Full ===") print( json.dumps(dict1['summaries'][mount], indent=4, sort_keys=True)) print("=== Lite ===") print( json.dumps(dict2['summaries'][mount], indent=4, sort_keys=True)) assert dict2['summaries'][mount].get(key) == val
def test_no_bulk_insert(): """cli.index_darshanlogs --no-bulk-insert """ tokiotest.check_darshan() tokiotest.TEMP_FILE.close() # generate database using bulk insert code path (default) assert not os.path.isfile(tokiotest.TEMP_FILE.name) argv = ['--output', tokiotest.TEMP_FILE.name] + SAMPLE_DARSHAN_LOGS print("Executing: %s" % " ".join(argv)) tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) rows_truth = verify_index_db(tokiotest.TEMP_FILE.name) # generate database using non-bulk insert code path os.unlink(tokiotest.TEMP_FILE.name) assert not os.path.isfile(tokiotest.TEMP_FILE.name) argv = ['--no-bulk-insert', '--output', tokiotest.TEMP_FILE.name ] + SAMPLE_DARSHAN_LOGS print("Executing: %s" % " ".join(argv)) tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) rows_test = verify_index_db(tokiotest.TEMP_FILE.name) assert rows_truth assert len(rows_truth) == len(rows_test) for rowid, row in enumerate(rows_truth): print("Truth row: %s" % str(row)) print("Pinserted row: %s" % str(rows_test[rowid])) # note the [2:]; skip the log_id and fs_id since they are arbitrary # assert row[2:] == rows_test[rowid][2:] compared_rows = 0 for rowname in row.keys(): if not rowname.endswith('_id'): compared_rows += 1 print("(%s)%s == (%s)%s?" % (rowname, row[rowname], rowname, rows_test[rowid][rowname])) assert row[rowname] == rows_test[rowid][rowname] assert compared_rows # might as well check idempotence too! assert os.path.isfile(tokiotest.TEMP_FILE.name) argv = ['--no-bulk-insert', '--output', tokiotest.TEMP_FILE.name ] + SAMPLE_DARSHAN_LOGS print("Executing: %s" % " ".join(argv)) tokiotest.run_bin(tokio.cli.index_darshanlogs, argv) rows_test = verify_index_db(tokiotest.TEMP_FILE.name)
def wrap_function(test_input): """Allow named args to pass through nosetests """ print("Running: %s" % test_input['descr']) argv = [] if test_input['params']['datetime_start'] is not None: argv += [ '--start', test_input['params']['datetime_start'].strftime("%Y-%m-%d") ] if test_input['params']['datetime_end'] is not None: argv += [ '--end', test_input['params']['datetime_end'].strftime("%Y-%m-%d") ] if test_input['params']['username'] is not None: argv += ['--username', test_input['params']['username']] if test_input['params']['jobid'] is not None: argv += ['--jobid', str(test_input['params']['jobid'])] if 'which' in test_input['params']: tokiotest.check_darshan() argv += ['--load', test_input['params']['which']] if 'system' in test_input['params']: argv += ['--host', test_input['params']['system']] print("Test args: %s" % argv) expected_exception = test_input.get('expect_exception') if expected_exception: # assert_raises doesn't seem to work correctly here # nose.tools.assert_raises(expected_exception, # tokiotest.run_bin(tokio.cli.find_darshanlogs, argv)) caught = False try: output_str = tokiotest.run_bin(tokio.cli.find_darshanlogs, argv) except expected_exception: caught = True assert caught else: output_str = tokiotest.run_bin(tokio.cli.find_darshanlogs, argv) results = output_str.splitlines() assert (test_input['pass_criteria'])(results)
def test_with_topology(): """ bin/summarize_job.py --topology --slurm-jobid requires either an SDB cache file or access to xtdb2proc requires either access to Slurm or a Slurm job cache file (to map jobid to node list) """ tokiotest.check_darshan() argv = [ '--topology', tokiotest.SAMPLE_XTDB2PROC_FILE, '--slurm-jobid', tokiotest.SAMPLE_SLURM_CACHE_FILE, '--json', tokiotest.SAMPLE_DARSHAN_LOG ] output_str = tokiotest.run_bin(tokiobin.summarize_job, argv) assert verify_output_json(output_str, key='darshan_agg_perf_by_slowest_posix') assert verify_output_json(output_str, key='darshan_biggest_read_api') assert verify_output_json(output_str, key='darshan_biggest_read_fs') assert verify_output_json(output_str, key='fs_tot_gibs_written') assert verify_output_json(output_str, key='topology_job_max_radius')
def test_all(): """ ensure that all parsers produce non-conflicting keys """ tokiotest.check_darshan() # try parsing in different orders just to make sure that no method is nuking the others darshan = tokio.connectors.darshan.Darshan(tokiotest.SAMPLE_DARSHAN_LOG) darshan.darshan_parser_perf() darshan.darshan_parser_base() darshan.darshan_parser_total() verify_darshan(darshan) verify_perf_counters(darshan) verify_base_counters(darshan) verify_total_counters(darshan) darshan = tokio.connectors.darshan.Darshan(tokiotest.SAMPLE_DARSHAN_LOG) darshan.darshan_parser_base() darshan.darshan_parser_perf() darshan.darshan_parser_total() verify_darshan(darshan) verify_perf_counters(darshan) verify_base_counters(darshan) verify_total_counters(darshan) darshan = tokio.connectors.darshan.Darshan(tokiotest.SAMPLE_DARSHAN_LOG) darshan.darshan_parser_base() darshan.darshan_parser_total() darshan.darshan_parser_perf() verify_darshan(darshan) verify_perf_counters(darshan) verify_base_counters(darshan) verify_total_counters(darshan) darshan = tokio.connectors.darshan.Darshan(tokiotest.SAMPLE_DARSHAN_LOG) darshan.darshan_parser_perf() darshan.darshan_parser_total() darshan.darshan_parser_base() verify_darshan(darshan) verify_perf_counters(darshan) verify_base_counters(darshan) verify_total_counters(darshan)
def test_bogus_darshans(): """ bin/summarize_job.py: mix of valid and invalid Darshan logs """ tokiotest.check_darshan() argv = [ '--silent-errors', tokiotest.SAMPLE_DARSHAN_LOG, # valid log tokiotest.SAMPLE_XTDB2PROC_FILE, # not valid log SAMPLE_DARSHAN_LOG_2, # valid log 'garbagefile' ] # file doesn't exist output_str = tokiotest.run_bin(tokiobin.summarize_job, argv) assert verify_output_csv(output_str, key='darshan_agg_perf_by_slowest_posix', expected_rows=2) assert verify_output_csv(output_str, key='darshan_biggest_read_api') assert verify_output_csv(output_str, key='darshan_biggest_read_fs') assert verify_output_csv(output_str, key='fs_tot_gibs_written', expected_rows=2)