示例#1
0
def test_drift_from_original(path):
    """Tests to ensure the behavior of NDChild has not changed.

    The directory tests/run_data contains a set of logs generated by NDChild
    runs on random languages, using randomly generated learning rates. Each
    file represents snapshots from a single child on a single language. The
    language, and learning rates are contained in the filename.

    Each of these files is a gzipped jsonl file. Each line in the file is a
    snapshot of the learner's grammar after consuming a sentence.

    """
    lang, rate, cons = re.search(r'lang(\d+):rate(\d+.\d+):cons(\d+.\d+)',
                                 path).groups()
    rate, cons = float(rate), float(cons)

    child = NDChild(rate, cons, lang)
    cached_child = CachedChild(rate, cons, lang)

    with gzip.open(path) as fh:
        for line in fh:
            data = json.loads(line)
            s = DOMAIN.sentences[data['sentence_id']]
            expected = data['grammar']
            child.consumeSentence(s)
            cached_child.consumeSentence(s)
            assert child.grammar == expected
            assert child.grammar == cached_child.grammar
示例#2
0
def run_trial(params: TrialParameters):
    """ Runs a single echild simulation and reports the results """
    logging.debug('running echild with %s', params)

    language = params.language
    noise_level = params.noise
    child = NDChild(params.rate, params.conservativerate, language)

    then = datetime.now()

    for i in range(params.numberofsentences):
        if random() < noise_level:
            s = DOMAIN.get_sentence_not_in_language(grammar_id=language)
        else:
            s = DOMAIN.get_sentence_in_language(grammar_id=language)
        child.consumeSentence(s)

    now = datetime.now()

    result = NDResult(trial_params=params,
                      timestamp=now,
                      duration=now - then,
                      language=child.target_language,
                      grammar=child.grammar)

    logging.debug('experiment result: %s', result)

    return result
示例#3
0
文件: main.py 项目: kghowitt/NDLNoise
def run_traced_trial(params: TrialParameters, output_directory):
    """ Runs a single echild simulation and reports the results """

    logging.debug('running echild with %s', params)

    language = params.language
    noise_level = params.noise
    child = NDChild(params.rate, params.conservativerate, language)

    history = []

    then = datetime.now()

    sample_period = params.numberofsentences / 1000

    for i in range(params.numberofsentences):
        if random() < noise_level:
            s = DOMAIN.get_sentence_not_in_language(grammar_id=language)
        else:
            s = DOMAIN.get_sentence_in_language(grammar_id=language)
        child.consumeSentence(s)
        if i % sample_period == 0:
            history.append(dict(child.grammar))

    now = datetime.now()

    result = NDResult(trial_params=params,
                      timestamp=now,
                      duration=now - then,
                      language=child.target_language,
                      grammar=child.grammar)

    logging.debug('experiment result: %s', result)

    import pandas as pd
    import matplotlib.pyplot as plt
    import numpy as np

    plt.rcParams['figure.figsize'] = 15, 10

    df = pd.DataFrame(history)
    df = df + (np.arange(len(df.columns)) / 500)
    df.index = df.index * 1000
    ax = df.plot(title='lang={}, noise={}'.format(params.language,
                                                  params.noise),
                 sharey=True,
                 subplots=True)
    # ax.set_xscale('log')
    plt.savefig(
        os.path.join(output_directory,
                     '{}-{}.png'.format(language, noise_level)))

    return result
示例#4
0
def childLearnsLanguage(ndr, languageDomain, language):
    ndr.resetThresholdDict()
    aChild = NDChild(rate, conservativerate, language)

    for j in xrange(numberofsentences):
        s = pickASentence(languageDomain)
        aChild.consumeSentence(s)
        # If a parameter value <= to the threshold for the first time,
        # this is recorded in ndr for writing output
        ndr.checkIfParametersMeetThreshold(threshold, aChild.grammar, j)

    return [aChild.grammar, ndr.thresholdDict]
示例#5
0
def test_ndchild_parity(language):
    """Compares the behavior of NDChild and CachedChild to ensure identical
    behavior

    """
    child = NDChild(0.9, 0.005, 2253)
    cached_child = CachedChild(0.9, 0.005, 2253)

    num_sents = int(50000)
    for i in range(num_sents):
        s = DOMAIN.get_sentence_in_language(grammar_id=language)
        child.pre = dict(child.grammar)
        cached_child.pre = dict(child.grammar)
        child.consumeSentence(s)
        cached_child.consumeSentence(s)
        assert child.grammar == cached_child.grammar
示例#6
0
def run_simulations(params: ExperimentParameters):
    """Runs echild simulations according to `params`.

    Returns a generator that yields one result dictionary (as returned by
    run_trial) for each simulation run.

    """

    trials = (TrialParameters(
        language=lang,
        noise=noise,
        rate=params.learningrate,
        numberofsentences=params.num_sentences,
        conservativerate=params.conservative_learningrate)
              for lang in params.languages for noise in params.noise_levels
              for _ in range(params.num_echildren))

    # for reporting progress during the run
    num_trials = (params.num_echildren * len(params.languages) *
                  len(params.noise_levels))

    DOMAIN.init_from_flatfile()

    # compute all "static" triggers once for each sentence in the domain and
    # store the cached value.
    NDChild.precompute_domain(DOMAIN)

    with multiprocessing.Pool(params.num_procs) as p:
        # run trials across processors (this doesn't actually start them
        # running- `results` is a generator. the actual computation is deferred
        # until somebody iterates through the generator object.
        if params.trace:
            results = p.imap_unordered(run_traced_trial, trials)
        else:
            results = p.imap_unordered(run_trial, trials)

        # report output
        results = progress_bar(results,
                               total=num_trials,
                               desc="running simulations")
        yield from results
示例#7
0
文件: main2.py 项目: zionsoumik/lang
def childLearnsLanguage(ndr, languageDomain, language, numberofsentences):
    ndr.resetThresholdDict()
    global distribution
    sets = [[[], []], [[], []], [[], []], [[], []], [[], []], [[], []], [[],
                                                                         []],
            [[], []], [[], []], [[], []], [[], []], [[], []], [[], []]]
    #print(sets[1][0])
    aChild = NDChild(rate, conservativerate, language, sets)
    #print numberofsentences
    #probs = []
    #for l in languageDomain:
    #probs.append(1 / len(languageDomain))
    for j in xrange(numberofsentences):
        s = pickASentence(languageDomain)  #,probs)
        distribution.append(s.sentenceList)
        #print(distribution)
        aChild.consumeSentence(s)
        # If a parameter value <= to the threshold for the first time,
        # this is recorded in ndr for writing output
        ndr.checkIfParametersMeetThreshold(threshold, aChild.grammar, j)
    #print(sets)
    #return [aChild.grammar, ndr.thresholdDict]
    return [[aChild.grammar, ndr.thresholdDict], distribution, sets]