def test_workflow_postproc_client(local_service, hello_world): """Test preparing and accessing post-processing results.""" # -- Setup ---------------------------------------------------------------- # # Create four groups for the 'Hello World' workflow with one successful # run each. with local_service() as api: user_1 = create_user(api) workflow_id = hello_world(api).workflow_id with local_service(user_id=user_1) as api: create_ranking(api, workflow_id, 4) # -- Get ranking in decreasing order of avg_count. ------------------------ with local_service(user_id=user_1) as api: ranking = api.workflows().ranking_manager.get_ranking( workflow=api.workflows().workflow_repo.get_workflow(workflow_id)) # Prepare data for the post-processing workflow. rundir = postproc.prepare_postproc_data( input_files=['results/analytics.json'], ranking=ranking, run_manager=api.runs().run_manager) # Test the post-processing client that accesses the prepared data. runs = Runs(rundir) assert len(runs) == 4 assert [r.run_id for r in ranking] == [r.run_id for r in runs] for i in range(len(runs)): run = runs.get_run(runs.at_rank(i).run_id) assert run.get_file(name='results/analytics.json') is not None assert os.path.isfile(run.get_file(name='results/analytics.json')) assert run.get_file(name='results/greeting.txt') is None
def main(rundir, outputfile): """Write greeting for every name in a given input file to the output file. The optional waiting period delays the output between each input name. """ # Read avg_count for all runs in the ranking results = list() for run in Runs(rundir): filename = run.get_file(name='results/analytics.json') doc = util.read_object(filename=filename) results.append(doc) # Write analytics results. Ensure that output directory exists: # influenced by http://stackoverflow.com/a/12517490 if not os.path.exists(os.path.dirname(outputfile)): try: os.makedirs(os.path.dirname(outputfile)) except OSError as exc: # guard against race condition if exc.errno != errno.EEXIST: raise with open(outputfile, "at") as f: json.dump(results, f)
def main(rundir, outputfile): """ Create summary of analytics results for all runs. """ # Read avg_count for all runs in the ranking results = list() for run in Runs(rundir): filename = run.get_file(name='results/analytics.json') doc = util.read_object(filename=filename) results.append(doc) # Delay execution to allow for testing running post-processing # workflows time.sleep(1) # Write analytics results. Ensure that output directory exists: # influenced by http://stackoverflow.com/a/12517490 if not os.path.exists(os.path.dirname(outputfile)): try: os.makedirs(os.path.dirname(outputfile)) except OSError as exc: # guard against race condition if exc.errno != errno.EEXIST: raise with open(outputfile, "at") as f: json.dump(results, f)
def main(rundir, k=25, timeout=10, outputfile=None): """Create a csv file containing the frequency of the k most frequent n-grams in the greeting files of all runs. Counts only those n-grams that do not contain a whitespace character. """ # Count frequency of n-grams for all runs. ngrams = Counter() for run in Runs(rundir): with open(run.get_file('results/greetings.txt'), 'r') as f: for line in f: line = line.strip() if len(line) >= 3: for i in range(len(line) - 2): ng = line[i:i + 3].upper() if ' ' not in ng: ngrams[ng] += 1 # Delay execution to allow for testing running post-processing # workflows time.sleep(timeout) # Output csv file with two columns: ngram,count with open(outputfile, 'w') as f: for ngram, count in ngrams.most_common(k): f.write('{},{}\n'.format(ngram, count))
def test_empty_run_set(tmpdir): """Test initializing a client for an empty run set.""" filename = os.path.join(tmpdir, RUNS_FILE) util.write_object(filename=filename, obj=[]) runs = Runs(tmpdir) assert runs.get_run('0000') is None
import plot as plt if __name__ == '__main__': """This is the main function to call the plot function unsing the result from a set of taggers. Selects the median model based on AUC. """ args = sys.argv[1:] if len(args) != 3: prog = os.path.basename(sys.argv[0]) args = ['<in_dir>', '<groundtruth-file>', '<output-dir>'] print('Usage: {} {}'.format(prog, ' '.join(args))) sys.exit(-1) in_dir = args[0] input_truth_file = args[1] out_dir = args[2] # Read the submission information results = list() for run in Runs(in_dir): filename = run.get_file('results/yProbBest.pkl') results.append({'name': run.name, 'file': filename}) # [{'name': 'TreeNiN', 'file': args[0]}] plt.plot( tagger_results=results, input_truth_file=input_truth_file, start_index=400000, output_file=os.path.join(out_dir, 'ROC-AUC.png'), get_model=plt.get_median_model )