import analysisutil import matplotlib.pyplot as plt import numpy as np import plotnine as pn analysisutil.add_argument('table_name') analysisutil.add_argument('bins', type=int) (args, setup, file_util) = analysisutil.init(use_base_dir=True) data = file_util.load_pandas_csv('pandas_{0}.csv'.format(args.table_name)) comm_cost = np.array(data['comm_cost']) complexity = np.array(data['complexity']) monotonicity = np.array(data['naturalness']) fig = plt.figure() plt.scatter(comm_cost, complexity) plt.show() max_comp = max(complexity) min_comp = min(complexity) max_inf = max(comm_cost) min_inf = min(comm_cost) comp_step = (max_comp - min_comp) / args.bins inf_step = (max_inf - min_inf) / args.bins plot_complexity = [] plot_informativeness = [] plot_avg_monotonicity = []
import analysisutil import plotnine as pn from Languages import LanguageLoader analysisutil.add_argument('complexity_strategy') analysisutil.add_argument('informativeness_strategy') analysisutil.add_argument('--include_natural', dest='include_natural_languages', default=False, action='store_true') (args, setup, file_util) = analysisutil.init() data = LanguageLoader.load_pandas_table(file_util, args.complexity_strategy, args.informativeness_strategy) fig = pn.ggplot(data, pn.aes('comm_cost', 'complexity')) +\ pn.geom_point() #if args.include_natural_languages: # lex_informativeness = [inf for (ex,inf) in file_util.load_dill('informativeness_{0}_{1}.dill'.format(setup.name, args.informativeness_strategy))] # lex_complexity = [com for (ex,com) in file_util.load_dill('complexity_{0}_{1}.dill'.format(setup.name, args.complexity_strategy))] # plt.plot(lex_informativeness, lex_complexity, 'o', color='green') print(fig) file_util.save_plotnine(fig, '{0}_{1}_plot'.format( args.complexity_strategy, args.informativeness_strategy ))
import analysisutil import matplotlib.pyplot as plt analysisutil.add_argument('set') (args, setup, file_util) = analysisutil.init(use_base_dir=True) monotonicities_a_up = file_util.load_dill('monotonicities_{0}_up.dill'.format( args.set)) monotonicities_a_down = file_util.load_dill( 'monotonicities_{0}_down.dill'.format(args.set)) monotonicities = list(map(max, monotonicities_a_down, monotonicities_a_up)) fig = plt.figure() plt.hist(monotonicities, bins=30) plt.show() file_util.save_figure(fig, 'monotonicity_{0}_hist'.format(args.set))
from pathos.pools import ProcessPool import analysisutil from Languages import LanguageLoader from Languages.ComplexityMeasurer import WordCountComplexityMeasurer, SumComplexityMeasurer, SpecialComplexityMeasurer analysisutil.add_argument('max_words', type=int) analysisutil.add_argument('comp_strat') (args, setup, file_util) = analysisutil.init() languages = LanguageLoader.load_languages(file_util) if args.comp_strat == 'wordcount': complexity_measurer = WordCountComplexityMeasurer(args.max_words) elif args.comp_strat == 'wordcomplexity': complexity_measurer = SumComplexityMeasurer(args.max_words, 1) elif args.comp_strat == 'special': complexity_measurer = SpecialComplexityMeasurer(args.max_words) else: raise ValueError('{0} is not a valid complexity strategy.'.format( args.comp_strat)) with ProcessPool(nodes=args.processes) as pool: complexity = pool.map(complexity_measurer, languages) file_util.dump_dill(complexity, 'complexity_{0}.dill'.format(args.comp_strat))
import os import analysisutil import matplotlib.pyplot as plt analysisutil.add_argument('informativeness_strategy') (args, setup, file_util) = analysisutil.init() strategy = args.informativeness_strategy informativeness = file_util.load_dill( 'informativeness_{0}.dill'.format(strategy)) fig = plt.figure() plt.hist(informativeness) plt.xlabel('informativeness') plt.show() filename = 'informativeness_{0}_hist.png'.format(strategy) file_util.save_figure(fig, filename)
import itertools import math import random from copy import copy import pygmo from pathos.multiprocessing import ProcessPool import Generator import analysisutil from Languages import LanguageLoader, LanguageGenerator from Languages.ComplexityMeasurer import SumComplexityMeasurer from Languages.InformativenessMeasurer import SimMaxInformativenessMeasurer analysisutil.add_argument('lang_size', type=int) analysisutil.add_argument('sample_size', type=int) analysisutil.add_argument('generations', type=int) analysisutil.add_argument('-m', '--max_mutations', type=int, default=1) (args, setup, file_util) = analysisutil.init() expressions = LanguageLoader.load_all_evaluated_expressions(file_util) languages = LanguageGenerator.generate_sampled( expressions, args.lang_size, int(args.sample_size / args.lang_size)) universe = Generator.generate_simplified_models(args.model_size) def remove(language): language = copy(language) index = random.randint(0, len(language) - 1)
import analysisutil analysisutil.add_argument('threshold', type=float) (args, setup, file_util) = analysisutil.init(use_base_dir=True) threshold = round(args.threshold, 2) conservativities = file_util.load_dill('conservativities_b.dill') indices = set(i for (i, conservativity) in enumerate(conservativities) if conservativity > threshold) file_util.dump_dill(indices, 'conservative_{0}_expression_indices.dill'.format(threshold))
from urllib.parse import quote_plus import analysisutil import matplotlib.pyplot as plt analysisutil.add_argument('complexity_strategy') analysisutil.add_argument('informativeness_strategy') analysisutil.add_argument('run_names', nargs='+') (args, setup, file_util) = analysisutil.init(use_base_dir=True) fig = plt.figure() for run_name in args.run_names: informativeness = file_util.load_dill( '{0}/informativeness_{1}.dill'.format(run_name, args.informativeness_strategy)) complexity = file_util.load_dill('{0}/complexity_{1}.dill'.format( run_name, args.complexity_strategy)) plt.scatter(informativeness, complexity, label=run_name) plt.legend() plt.xlabel('informativeness') plt.ylabel('complexity') plt.show() file_util.save_figure( fig, '{0}_{1}_{2}_multirun_plot.png'.format(args.complexity_strategy, args.informativeness_strategy, '-'.join(args.run_names)))
import analysisutil import statsmodels.formula.api as smf import plotnine as pn analysisutil.add_argument('table_name') (args, setup, file_util) = analysisutil.init() file_util_base = file_util.get_base_file_util() df = file_util_base.load_pandas_csv("pandas_{0}.csv".format(args.table_name)) print(df.head()) def standardize(series): return (series - series.mean()) / series.std() #df['conservativity'] = standardize(df['conservativity']) #df['monotonicity'] = standardize(df['monotonicity']) #df['naturalness'] = standardize(df['naturalness']) #plt = (pn.ggplot(df, pn.aes('naturalness', 'pareto_closeness')) # + pn.geom_point() # + pn.stat_smooth(method='lm',color='r')) # #print(plt) model = smf.ols(formula='pareto_closeness ~ naturalness', data=df) result = model.fit() print(result.summary())
import analysisutil import matplotlib.pyplot as plt import numpy as np analysisutil.add_argument('complexity_strategy') analysisutil.add_argument('informativeness_strategy') analysisutil.add_argument('bins', type=int) (args, setup, file_util) = analysisutil.init() informativeness = file_util.load_dill('informativeness_{0}.dill'.format( args.informativeness_strategy)) complexity = file_util.load_dill('complexity_{0}.dill'.format( args.complexity_strategy)) monotonicity = file_util.load_dill('monotonicity.dill') max_comp = max(complexity) max_inf = max(informativeness) comp_step = max_comp / args.bins inf_step = max_inf / args.bins plot_complexity = [] plot_informativeness = [] plot_avg_monotonicity = [] for comp_start in np.arange(0, max_comp, comp_step): comp_end = comp_start + comp_step for inf_start in np.arange(0, max_inf, inf_step): monotonicities = [] inf_end = inf_start + inf_step for (i, (inf, comp, mono)) in enumerate(
import random from collections import namedtuple from pathos.multiprocessing import ProcessPool import Generator import analysisutil from Languages.ComplexityMeasurer import WordCountComplexityMeasurer from Languages.InformativenessMeasurer import InformativenessMeasurer, SimMaxInformativenessMeasurer from Languages.LanguageGenerator import generate_all, generate_sampled, EvaluatedExpression analysisutil.add_argument('max_words', type=int) analysisutil.add_argument('--sample', type=int) (args, setup, file_util) = analysisutil.init() languages = [] universe = Generator.generate_simplified_models(args.model_size) FakeEvaluatedExpression = namedtuple('FakeEvaluatedExpression', 'meaning') expressions = [FakeEvaluatedExpression(tuple([random.choice([True, False]) for model in universe])) for i in range(10000)] if args.sample is None: languages = generate_all(expressions, args.max_words, args.fixedwordcount) else: languages = generate_sampled(expressions, args.max_words, args.sample) complexity_measurer = WordCountComplexityMeasurer(args.max_words) informativeness_measurer_exact = InformativenessMeasurer(len(universe))
import analysisutil import matplotlib.pyplot as plt analysisutil.add_argument('complexity_strategy') (args, setup, file_util) = analysisutil.init() complexity = file_util.load_dill('complexity_{0}.dill'.format(args.complexity_strategy)) monotonicity = file_util.load_dill('monotonicity.dill') fig = plt.figure() plt.scatter(monotonicity, complexity) plt.ylabel('complexity') plt.xlabel('monotonicity') plt.show() file_util.save_figure(fig, '{0}_plot_monotonicity'.format( args.complexity_strategy ))
from pathos.multiprocessing import ProcessPool import analysisutil from Languages import LanguageLoader import pandas as pd import numpy as np from numpy.linalg import norm import pygmo import plotnine as pn analysisutil.add_argument('table_name') analysisutil.add_argument('pareto') analysisutil.add_argument('run_names', nargs='*') (args, setup, file_util) = analysisutil.init(use_base_dir=True) pareto_data = LanguageLoader.load_pandas_table(file_util.get_sub_file_util( args.pareto), 'wordcomplexity', 'simmax', include_monotonicity=False) run_df = pd.DataFrame({ 'complexity': [], 'comm_cost': [], 'run': [], 'monotonicity': [] }) run_dfs = {} for run_name in args.run_names: df = LanguageLoader.load_pandas_table( file_util.get_sub_file_util(run_name), 'wordcomplexity', 'simmax') run_dfs[run_name] = df
import analysisutil analysisutil.add_argument('length', type=int) (args, setup, file_util) = analysisutil.init(use_base_dir=True) expressions = file_util.load_dill('expressions.dill') indices = [ i for (i, expr) in enumerate(expressions) if expr.length() <= args.length ] file_util.dump_dill(indices, 'upto{0}_expression_indices.dill'.format(args.length))
from pathos.pools import ProcessPool import Generator import analysisutil from Languages import LanguageLoader from Languages.InformativenessMeasurer import SimMaxInformativenessMeasurer, InformativenessMeasurer analysisutil.add_argument('inf_strat') (args, setup, file_util) = analysisutil.init() languages = LanguageLoader.load_languages(file_util) universe = Generator.generate_simplified_models(args.model_size) if args.inf_strat == 'exact': informativeness_measurer = InformativenessMeasurer(len(universe)) elif args.inf_strat == 'simmax': informativeness_measurer = SimMaxInformativenessMeasurer(universe) else: raise ValueError('{0} is not a valid informativeness strategy.'.format( args.inf_strat)) with ProcessPool(nodes=args.processes) as pool: informativeness = pool.map(informativeness_measurer, languages) file_util.dump_dill(informativeness, 'informativeness_{0}.dill'.format(args.inf_strat))
import statsmodels import analysisutil import statsmodels.formula.api as smf import plotnine as pn analysisutil.add_argument('table_name') analysisutil.add_argument('natural_run') analysisutil.add_argument('random_run') (args, setup, file_util) = analysisutil.init(use_base_dir=True) df = file_util.load_pandas_csv("pandas_{0}.csv".format(args.table_name)) df = df[df.apply(lambda row: row.run in [args.natural_run, args.random_run], axis=1)] def standardize(series): return (series - series.mean()) / series.std() df['conservativity'] = standardize(df['conservativity']) df['monotonicity'] = standardize(df['monotonicity']) df['natural'] = list( map(lambda run_name: 1 if run_name == args.natural_run else 0, df['run'].values)) df['natural'] = df['natural'].astype('category') print(df.head())
import analysisutil import matplotlib.pyplot as plt analysisutil.add_argument('-i', '--indices', nargs='*') (args, setup, file_util) = analysisutil.init(use_base_dir=True) monotonicities = file_util.load_dill('monotonicities_max.dill') if args.indices is not None: index_sets = [] for indices_name in args.indices: index_sets.append( set( file_util.load_dill( '{0}_expression_indices.dill'.format(indices_name)))) indices = set.intersection(*index_sets) monotonicities = [monotonicities[i] for i in indices] fig = plt.figure() plt.hist(monotonicities, bins=30) plt.show() file_util.save_figure( fig, 'monotonicity_hist{0}'.format( '_{0}'.format(args.indices) if args.indices is not None else ''))
import random import analysisutil from Languages.LanguageGenerator import random_combinations analysisutil.add_argument('indices') analysisutil.add_argument('max_words', type=int) analysisutil.add_argument('sample', type=int) (args, setup, file_util_out) = analysisutil.init() file_util_in = file_util_out.get_base_file_util() natural_indices = set(file_util_in.load_dill('{0}_expression_indices.dill'.format(args.indices))) expressions = file_util_in.load_dill('expressions.dill') non_natural_indices = set(range(len(expressions))) - natural_indices language_indices = [] naturalness = [] sizes = [] for lang_size in range(1,args.max_words+1): for i in range(args.sample): len_natural = random.randint(0,lang_size) len_random = lang_size - len_natural lang_random = next(random_combinations(non_natural_indices, len_random, 1)) lang_natural = next(random_combinations(natural_indices, len_natural, 1)) naturalness.append(len_natural / lang_size) language_indices.append(lang_random + lang_natural) file_util_out.dump_dill(language_indices, 'language_indices.dill') file_util_out.dump_dill(naturalness, 'naturalness.dill') file_util_out.save_stringlist([list(map(lambda i: str(expressions[i]), lang)) for lang in language_indices], 'languages.txt')