def test_drift_from_original(path): """Tests to ensure the behavior of NDChild has not changed. The directory tests/run_data contains a set of logs generated by NDChild runs on random languages, using randomly generated learning rates. Each file represents snapshots from a single child on a single language. The language, and learning rates are contained in the filename. Each of these files is a gzipped jsonl file. Each line in the file is a snapshot of the learner's grammar after consuming a sentence. """ lang, rate, cons = re.search(r'lang(\d+):rate(\d+.\d+):cons(\d+.\d+)', path).groups() rate, cons = float(rate), float(cons) child = NDChild(rate, cons, lang) cached_child = CachedChild(rate, cons, lang) with gzip.open(path) as fh: for line in fh: data = json.loads(line) s = DOMAIN.sentences[data['sentence_id']] expected = data['grammar'] child.consumeSentence(s) cached_child.consumeSentence(s) assert child.grammar == expected assert child.grammar == cached_child.grammar
def run_trial(params: TrialParameters): """ Runs a single echild simulation and reports the results """ logging.debug('running echild with %s', params) language = params.language noise_level = params.noise child = NDChild(params.rate, params.conservativerate, language) then = datetime.now() for i in range(params.numberofsentences): if random() < noise_level: s = DOMAIN.get_sentence_not_in_language(grammar_id=language) else: s = DOMAIN.get_sentence_in_language(grammar_id=language) child.consumeSentence(s) now = datetime.now() result = NDResult(trial_params=params, timestamp=now, duration=now - then, language=child.target_language, grammar=child.grammar) logging.debug('experiment result: %s', result) return result
def run_traced_trial(params: TrialParameters, output_directory): """ Runs a single echild simulation and reports the results """ logging.debug('running echild with %s', params) language = params.language noise_level = params.noise child = NDChild(params.rate, params.conservativerate, language) history = [] then = datetime.now() sample_period = params.numberofsentences / 1000 for i in range(params.numberofsentences): if random() < noise_level: s = DOMAIN.get_sentence_not_in_language(grammar_id=language) else: s = DOMAIN.get_sentence_in_language(grammar_id=language) child.consumeSentence(s) if i % sample_period == 0: history.append(dict(child.grammar)) now = datetime.now() result = NDResult(trial_params=params, timestamp=now, duration=now - then, language=child.target_language, grammar=child.grammar) logging.debug('experiment result: %s', result) import pandas as pd import matplotlib.pyplot as plt import numpy as np plt.rcParams['figure.figsize'] = 15, 10 df = pd.DataFrame(history) df = df + (np.arange(len(df.columns)) / 500) df.index = df.index * 1000 ax = df.plot(title='lang={}, noise={}'.format(params.language, params.noise), sharey=True, subplots=True) # ax.set_xscale('log') plt.savefig( os.path.join(output_directory, '{}-{}.png'.format(language, noise_level))) return result
def childLearnsLanguage(ndr, languageDomain, language): ndr.resetThresholdDict() aChild = NDChild(rate, conservativerate, language) for j in xrange(numberofsentences): s = pickASentence(languageDomain) aChild.consumeSentence(s) # If a parameter value <= to the threshold for the first time, # this is recorded in ndr for writing output ndr.checkIfParametersMeetThreshold(threshold, aChild.grammar, j) return [aChild.grammar, ndr.thresholdDict]
def test_ndchild_parity(language): """Compares the behavior of NDChild and CachedChild to ensure identical behavior """ child = NDChild(0.9, 0.005, 2253) cached_child = CachedChild(0.9, 0.005, 2253) num_sents = int(50000) for i in range(num_sents): s = DOMAIN.get_sentence_in_language(grammar_id=language) child.pre = dict(child.grammar) cached_child.pre = dict(child.grammar) child.consumeSentence(s) cached_child.consumeSentence(s) assert child.grammar == cached_child.grammar
def run_simulations(params: ExperimentParameters): """Runs echild simulations according to `params`. Returns a generator that yields one result dictionary (as returned by run_trial) for each simulation run. """ trials = (TrialParameters( language=lang, noise=noise, rate=params.learningrate, numberofsentences=params.num_sentences, conservativerate=params.conservative_learningrate) for lang in params.languages for noise in params.noise_levels for _ in range(params.num_echildren)) # for reporting progress during the run num_trials = (params.num_echildren * len(params.languages) * len(params.noise_levels)) DOMAIN.init_from_flatfile() # compute all "static" triggers once for each sentence in the domain and # store the cached value. NDChild.precompute_domain(DOMAIN) with multiprocessing.Pool(params.num_procs) as p: # run trials across processors (this doesn't actually start them # running- `results` is a generator. the actual computation is deferred # until somebody iterates through the generator object. if params.trace: results = p.imap_unordered(run_traced_trial, trials) else: results = p.imap_unordered(run_trial, trials) # report output results = progress_bar(results, total=num_trials, desc="running simulations") yield from results
def childLearnsLanguage(ndr, languageDomain, language, numberofsentences): ndr.resetThresholdDict() global distribution sets = [[[], []], [[], []], [[], []], [[], []], [[], []], [[], []], [[], []], [[], []], [[], []], [[], []], [[], []], [[], []], [[], []]] #print(sets[1][0]) aChild = NDChild(rate, conservativerate, language, sets) #print numberofsentences #probs = [] #for l in languageDomain: #probs.append(1 / len(languageDomain)) for j in xrange(numberofsentences): s = pickASentence(languageDomain) #,probs) distribution.append(s.sentenceList) #print(distribution) aChild.consumeSentence(s) # If a parameter value <= to the threshold for the first time, # this is recorded in ndr for writing output ndr.checkIfParametersMeetThreshold(threshold, aChild.grammar, j) #print(sets) #return [aChild.grammar, ndr.thresholdDict] return [[aChild.grammar, ndr.thresholdDict], distribution, sets]