def test_workflow_postproc_client(local_service, hello_world):
    """Test preparing and accessing post-processing results."""
    # -- Setup ----------------------------------------------------------------
    #
    # Create four groups for the 'Hello World' workflow with one successful
    # run each.
    with local_service() as api:
        user_1 = create_user(api)
        workflow_id = hello_world(api).workflow_id
    with local_service(user_id=user_1) as api:
        create_ranking(api, workflow_id, 4)
    # -- Get ranking in decreasing order of avg_count. ------------------------
    with local_service(user_id=user_1) as api:
        ranking = api.workflows().ranking_manager.get_ranking(
            workflow=api.workflows().workflow_repo.get_workflow(workflow_id))
        # Prepare data for the post-processing workflow.
        rundir = postproc.prepare_postproc_data(
            input_files=['results/analytics.json'],
            ranking=ranking,
            run_manager=api.runs().run_manager)
        # Test the post-processing client that accesses the prepared data.
        runs = Runs(rundir)
        assert len(runs) == 4
        assert [r.run_id for r in ranking] == [r.run_id for r in runs]
        for i in range(len(runs)):
            run = runs.get_run(runs.at_rank(i).run_id)
            assert run.get_file(name='results/analytics.json') is not None
            assert os.path.isfile(run.get_file(name='results/analytics.json'))
            assert run.get_file(name='results/greeting.txt') is None
Пример #2
0
def main(rundir, outputfile):
    """Write greeting for every name in a given input file to the output file.
    The optional waiting period delays the output between each input name.

    """
    # Read avg_count for all runs in the ranking
    results = list()
    for run in Runs(rundir):
        filename = run.get_file(name='results/analytics.json')
        doc = util.read_object(filename=filename)
        results.append(doc)
    # Write analytics results. Ensure that output directory exists:
    # influenced by http://stackoverflow.com/a/12517490
    if not os.path.exists(os.path.dirname(outputfile)):
        try:
            os.makedirs(os.path.dirname(outputfile))
        except OSError as exc:  # guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    with open(outputfile, "at") as f:
        json.dump(results, f)
Пример #3
0
def main(rundir, outputfile):
    """
    Create summary of analytics results for all runs.
    """
    # Read avg_count for all runs in the ranking
    results = list()
    for run in Runs(rundir):
        filename = run.get_file(name='results/analytics.json')
        doc = util.read_object(filename=filename)
        results.append(doc)
        # Delay execution to allow for testing running post-processing
        # workflows
        time.sleep(1)
    # Write analytics results. Ensure that output directory exists:
    # influenced by http://stackoverflow.com/a/12517490
    if not os.path.exists(os.path.dirname(outputfile)):
        try:
            os.makedirs(os.path.dirname(outputfile))
        except OSError as exc:  # guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    with open(outputfile, "at") as f:
        json.dump(results, f)
Пример #4
0
def main(rundir, k=25, timeout=10, outputfile=None):
    """Create a csv file containing the frequency of the k most frequent
    n-grams in the greeting files of all runs. Counts only those n-grams that
    do not contain a whitespace character.
    """
    # Count frequency of n-grams for all runs.
    ngrams = Counter()
    for run in Runs(rundir):
        with open(run.get_file('results/greetings.txt'), 'r') as f:
            for line in f:
                line = line.strip()
                if len(line) >= 3:
                    for i in range(len(line) - 2):
                        ng = line[i:i + 3].upper()
                        if ' ' not in ng:
                            ngrams[ng] += 1
        # Delay execution to allow for testing running post-processing
        # workflows
        time.sleep(timeout)
    # Output csv file with two columns: ngram,count
    with open(outputfile, 'w') as f:
        for ngram, count in ngrams.most_common(k):
            f.write('{},{}\n'.format(ngram, count))
def test_empty_run_set(tmpdir):
    """Test initializing a client for an empty run set."""
    filename = os.path.join(tmpdir, RUNS_FILE)
    util.write_object(filename=filename, obj=[])
    runs = Runs(tmpdir)
    assert runs.get_run('0000') is None
Пример #6
0
import plot as plt


if __name__ == '__main__':
    """This is the main function to call the plot function unsing the result
    from a set of taggers. Selects the median model based on AUC.
    """
    args = sys.argv[1:]
    if len(args) != 3:
        prog = os.path.basename(sys.argv[0])
        args = ['<in_dir>', '<groundtruth-file>', '<output-dir>']
        print('Usage: {} {}'.format(prog, ' '.join(args)))
        sys.exit(-1)
    in_dir = args[0]
    input_truth_file = args[1]
    out_dir = args[2]
    # Read the submission information
    results = list()
    for run in Runs(in_dir):
        filename = run.get_file('results/yProbBest.pkl')
        results.append({'name': run.name, 'file': filename})
    # [{'name': 'TreeNiN', 'file': args[0]}]
    plt.plot(
        tagger_results=results,
        input_truth_file=input_truth_file,
        start_index=400000,
        output_file=os.path.join(out_dir, 'ROC-AUC.png'),
        get_model=plt.get_median_model
    )