Пример #1
0
def add_cutoff_values(tasks, ll_cutoff):
    from dreamcoder.domains.regex.makeRegexTasks import makeNewTasks
    if ll_cutoff is None or ll_cutoff == "None":
        for task in tasks:
            task.ll_cutoff = None
        return tasks
    if ll_cutoff == "gt":
        from dreamcoder.domains.regex.makeRegexTasks import regexHeldOutExamples
        for task in tasks:
            task.ll_cutoff = None
            task.gt = get_gt_ll(task.name,
                                [example[1] for example in task.examples])
            task.gt_test = get_gt_ll(
                task.name,
                [example[1] for example in regexHeldOutExamples(task)])
        return tasks
    elif ll_cutoff == "plus":
        for task in tasks:
            task.ll_cutoff = regex_plus_bound(
                [example[1] for example in task.examples])
        return tasks
    elif ll_cutoff == "bigram":
        eprint("WARNING: using entire corpus to make bigram model")
        #this means i do it twice, which is eh whatever
        model = make_corpus_bigram(show_tasks(makeNewTasks()))
        for task in tasks:
            task.ll_cutoff = bigram_corpus_score(
                [example[1] for example in task.examples], model)
        return tasks
    elif ll_cutoff == "unigram":
        eprint("WARNING: using entire corpus to make unigram model")
        #this means i do it twice, which is eh whatever
        model = make_corpus_unigram(show_tasks(makeNewTasks()))
        for task in tasks:
            task.ll_cutoff = unigram_corpus_score(
                [example[1] for example in task.examples], model)
        return tasks
    elif ll_cutoff == "mix":
        eprint("WARNING: using entire corpus to make bigram model")
        eprint("WARNING: using entire corpus to make unigram model")
        #this means i do it twice, which is eh whatever
        unigram = make_corpus_unigram(show_tasks(makeNewTasks()))
        bigram = make_corpus_bigram(show_tasks(makeNewTasks()))
        for task in tasks:
            uniscore = unigram_corpus_score(
                [example[1] for example in task.examples], unigram)
            biscore = bigram_corpus_score(
                [example[1] for example in task.examples], bigram)
            task.ll_cutoff = math.log(0.75 * math.exp(biscore) +
                                      0.25 * math.exp(uniscore))
        return tasks
    else:
        eprint("not implemented")
        eprint("cutoff val:")
        eprint(ll_cutoff)
        assert False
Пример #2
0
def getTestingLikelihood(likelihood, result, iteration):
    from dreamcoder.domains.regex.groundtruthRegexes import badRegexTasks
    testingTasks = [t for t in result.getTestingTasks()
                    if t.name not in badRegexTasks]

    print("Getting testing likelihoods; we have to do this once per checkpoint and once per iteration so hang on to your seat!")
    from dreamcoder.domains.regex.makeRegexTasks import regexHeldOutExamples
    totalCharacters = sum( len(s)
        for t in testingTasks
        for _,s in regexHeldOutExamples(t))
    print("Total number of characters in testing tasks is",totalCharacters)    
    return sum(getLikelihood(likelihood, result, task, iteration)
               for task in testingTasks )/totalCharacters
Пример #3
0
def testingRegexLikelihood(task, program):
    global REGEXCACHINGTABLE
    from dreamcoder.domains.regex.makeRegexTasks import regexHeldOutExamples
    import pregex as pre

    testing = regexHeldOutExamples(task)
    program = program.visit(ConstantVisitor(task.str_const))
    r = program.evaluate([])(pre.String(""))

    ll = 0.
    for _, s in testing:
        if (r, s) not in REGEXCACHINGTABLE:
            REGEXCACHINGTABLE[(r, s)] = r.match(s)
        ll += REGEXCACHINGTABLE[(r, s)]
    return ll
Пример #4
0
            from dreamcoder.domains.regex.groundtruthRegexes import badRegexTasks
            test = [t for t in test if t.name not in badRegexTasks]

        n_tasks = len(test)
        print(n_tasks, "testing tasks")
        n_hits = 0
        total_likelihood = 0
        for i, task in enumerate(test):
            hit = test_task(m, task, arguments.timeout)
            if arguments.domain == 'regex':
                total_likelihood += hit
            else:
                if hit: n_hits += 1
            print("for task ", i, ", hit=", hit, flush=True)

        if arguments.domain == 'regex':
            from dreamcoder.domains.regex.makeRegexTasks import regexHeldOutExamples
            totalCharacters = sum(
                len(s) for t in test for _, s in regexHeldOutExamples(t))
            if arguments.taskLikelihood:
                print(
                    "average marginal likelihood of held out task (normalized per character)",
                    total_likelihood / totalCharacters)
            else:
                print(
                    "average likelihood of held out examples (normalized per character)",
                    total_likelihood / totalCharacters)
        else:
            print("final score:")
            print(n_hits / float(n_tasks))
Пример #5
0
    marginalHits = 0
    marginalHits_test = 0

    totalTasks = 0
    for task in tasks:
        #if task.name in badRegexTasks: continue

        try:
            frontier = checkpoint.recognitionTaskMetrics[task]['frontier']
        except KeyError:
            continue
        print(task.name)
        totalTasks += 1
        print("\tTRAIN\t", ["".join(example[1]) for example in task.examples])

        testingExamples = regexHeldOutExamples(task)
        print("\tTEST\t", [example[1] for example in testingExamples])

        gt_preg = gt_dict[int(task.name.split(" ")[-1])]
        print("\tHuman written regex:", gt_preg)

        eprint(verbatimTable(["".join(example[1]) for example in task.examples] + [None] + \
                             [gt_preg,None] + \
                             [example[1] for example in testingExamples]))
        eprint("&")

        gt_preg = pre.create(gt_preg)

        def examineProgram(entry):
            global preg
            global diff_lookup