def abnf2bnf(grammar, start): with timeit('parsing the grammar'): trees = abnf_parser.parse(grammar) assert len(trees) == 1 tree = trees[0] with timeit('converting into BNF'): productions, terminals = tree.to_productions_dict() return productions, terminals, start
def run_batch(batch_folder, species_depth, n_gene_trees, n_ind, n_sp, n_sp_trees, Ne, prob_missing, methods, flow_dict, dists, force=False): # set up folders directories = ["data", "nexus", "solutions", "stats"] dpath = lambda s: os.path.join(batch_folder, s) dpaths = list(map(dpath, directories)) data, nexus, solutions, stats = dpaths batch_base = os.path.basename(batch_folder) # create folders if they don't already exist mdir = lambda d: None if os.path.isdir(d) else os.makedirs(d) list(map(mdir, dpaths)) current_step_is = make_flow(flow_dict) tag = lambda s, lst: list(map(lambda x: s + str(x), lst)) m_tags = tag("m", methods) p_tags = tag("p", prob_missing) # generate trees if current_step_is("generate"): if force or not exists(batch_folder, "nexus"): generate(nexus, species_depth, n_gene_trees, n_sp, n_ind, Ne, n_sp_trees) # drop leaves if current_step_is("drop"): if force or not exists(batch_folder, "data", p_tags): drop(batch_folder, nexus, data, n_sp, prob_missing, force) # impute if current_step_is("impute"): if force or not exists(batch_folder, "solutions", m_tags): impute(batch_folder, data, solutions, batch_base, methods) # get summary stats for each file if current_step_is("analyze"): if force or not exists(batch_folder, "stats", m_tags + p_tags): timeit(lambda: analyze(batch_folder, dists), "analyzing {}".format(os.path.basename(batch_folder)), logging.getLogger("analyze"))
def drop(batch_folder, nexus, data, n_sp, prob_missing, force): # function to call RandomGenerator.R def call_R(args): # setup p_drop, fname = args in_name = os.path.join(nexus, fname) namelist = fname[:-4].split("_") namelist.append("p{}".format(p_drop)) out_name = os.path.join(data, "_".join(namelist)) # remove old files if force is true if force: if os.path.exists(out_name + ".txt"): os.remove(out_name + ".txt") if os.path.exists(out_name + "_true.txt"): os.remove(out_name + "_true.txt") # call the R script get_output([ "Rscript", "RandomGenerator.R", in_name, "-o", out_name, "-p{}".format(p_drop), "-s{}".format(n_sp) ], logging.getLogger("drop")) # compress data files dest_root = os.path.split(out_name)[0] gzip_to(dest_root, out_name + '.txt') gzip_to(dest_root, out_name + '_true.txt') # identify names of gene trees gene_trees = lambda fname: '_e' in fname.lower() basenames = filter(gene_trees, os.listdir(nexus)) # call RandomGenerator.R f = lambda: list(map(call_R, itertools.product(prob_missing, basenames))) timeit(f, "dropping leaves", logging.getLogger("drop"))
def bnf2parglare(productions, terminals, original_start): grammar, start = to_parglare_grammar(productions, terminals, original_start) with timeit('computing parse table'): table = create_table( grammar, start_production=grammar.get_production_id(start), itemset_type=LALR, prefer_shifts=False, prefer_shifts_over_empty=False, lexical_disambiguation=False, ) serializable_table = table_to_serializable(table) return productions, terminals, original_start, start, serializable_table
def find_crossing_1(seq, subseq): """ Readable but slow """ return [ i for i in range(len(seq) - len(subseq)) if seq[i:i + len(subseq)] == subseq ] def find_crossing_2(seq, subseq): """ Fast but hardly readable """ # See stackoverflow.com / "python-numpy-first-occurrence-of-subarray" target = np.dot(subseq, subseq) candidates = np.where(np.correlate(seq, subseq, mode='valid') == target)[0] # some of the candidates entries may be false positives, double check check = candidates[:, np.newaxis] + np.arange(len(subseq)) mask = np.all((np.take(seq, check) == subseq), axis=-1) return candidates[mask] if __name__ == "__main__": from tools import timeit timeit("random_walk_1(n=10000)", globals()) timeit("random_walk_2(n=10000)", globals()) timeit("random_walk_3(n=10000)", globals()) print() W = random_walk_3(n=1000) timeit("find_crossing_1(list(W), [+1,0,-1])", globals()) timeit("find_crossing_2(W, [+1,0,-1])", globals())
# check java installation needs_java = flow_dict["analyze"] if needs_java: tools.get_output(['java', '-version']) # batch folder naming scheme batch_folder = os.path.join(exp_folder, tools.batch_general) setup_args = [batch_folder, methods, probs, flow_dict, dists, force] batch_run = partial(setup, *setup_args) batch_iterator = product(depths, genes, inds, pop_sizes, species, trees) # choose run method def run_parallel(): tools.parmap(batch_run, batch_iterator) def run_serial(): for batch in batch_iterator: batch_run(batch) f = run_parallel if parallel else run_serial # run experiment tools.timeit(f, "solving all problems", logger.getLogger(__name__)) if opts.flow_dict['analyze'] or diag_plots: compile_stats(exp_folder, dists) if diag_plots: make_plots(exp_folder)
def solution_3_bis(): # Iterator using intricated iterations # 486 iterations, no test return ((a, b, c, (10 - a - b - c)) for a in range(11) for b in range(11 - a) for c in range(11 - a - b)) def solution_4(): # Author: Yaser Martinez # Numpy indices # No iterations, 1331 (= 11*11*11) tests X123 = np.indices((11, 11, 11)).reshape(3, 11 * 11 * 11) X4 = 10 - X123.sum(axis=0) return np.vstack((X123, X4)).T[X4 > -1] if __name__ == '__main__': from tools import timeit timeit("solution_1()", globals()) timeit("solution_2()", globals()) timeit("solution_3()", globals()) timeit("solution_4()", globals()) # timeit("solution_5()", globals()) print() timeit("solution_3_bis()", globals()) print(type(solution_3())) print(type(solution_3_bis()))
# ----------------------------------------------------------------------------- # From Numpy to Python # Copyright (2017) Nicolas P. Rougier - BSD license # More information at https://github.com/rougier/numpy-book # ----------------------------------------------------------------------------- import numpy as np from tools import timeit Z = np.ones(4 * 1000000, np.float32) print(">>> Z.view(np.float16)[...] = 0") timeit("Z.view(np.float16)[...] = 0", globals()) print(">>> Z.view(np.int16)[...] = 0") timeit("Z.view(np.int16)[...] = 0", globals()) print(">>> Z.view(np.int32)[...] = 0") timeit("Z.view(np.int32)[...] = 0", globals()) print(">>> Z.view(np.float32)[...] = 0") timeit("Z.view(np.float32)[...] = 0", globals()) print(">>> Z.view(np.int64)[...] = 0") timeit("Z.view(np.int64)[...] = 0", globals()) print(">>> Z.view(np.float64)[...] = 0") timeit("Z.view(np.float64)[...] = 0", globals()) print(">>> Z.view(np.complex128)[...] = 0") timeit("Z.view(np.complex128)[...] = 0", globals())
steps = random.choices([-1, 1], k=n) return [0] + list(accumulate(steps)) def random_walk_np(n): steps = np.random.choice([-1, 1], n) return np.cumsum(steps) if __name__ == '__main__': random.seed() walker = RandomWalker() print('Object Oriented') start_time = time.perf_counter() timeit('[position for position in walker.walk(n=10000)]', globals()) print('Elapsed Time: {0:.8f}'.format(time.perf_counter() - start_time)) print('') print('Procedural') start_time = time.perf_counter() timeit('random_walk(n=10000)', globals()) print('Elapsed Time: {0:.8f}'.format(time.perf_counter() - start_time)) print('') print('Itertools') start_time = time.perf_counter() timeit('random_walk_iter(n=10000)', globals()) print('Elapsed Time: {0:.8f}'.format(time.perf_counter() - start_time)) print('') print('Numpy') start_time = time.perf_counter() timeit('random_walk_np(n=10000)', globals())
result += x[i] * y[j] return result def compute_2(X, Y): """ Numpy version, faster """ return (X.reshape(len(X), 1) * Y.reshape(1, len(Y))).sum() def compute_3(X, Y): """ Numpy version, fastest """ return X.sum() * Y.sum() def compute_4(X, Y): """ Pure python version, fastesr """ return sum(X) * sum(Y) if __name__ == '__main__': from tools import timeit X = np.arange(1000) timeit("compute_1(X,X)", globals()) timeit("compute_2(X,X)", globals()) timeit("compute_3(X,X)", globals()) timeit("compute_4(X,X)", globals())
def generate(nexus, d, n_gene_trees, n_sp, n_ind, Ne, n_sp_trees): # call generate trees f = lambda: generateTrees([d], n_gene_trees, n_sp, n_ind, Ne, n_sp_trees, nexus) timeit(f, "generating trees", logging.getLogger("generate_trees"))
def call_imp(args): """ Calls the cpp imputation software. """ # set up variables basename, method = args nameroot = basename[:-ext_len] program = "./missing{}.o".format(method) # call the imputation software def imp_wrapper(): """ Wraps the imputation software call in a try/catch to continue imputing other files in the batch if some calls fail. """ logger = logging.getLogger("impute") try: p = Popen([program, nameroot], stdout=PIPE, stderr=PIPE) output, err = p.communicate() if output: logger.debug(f"Output of {[program, nameroot]}: {output}") except CalledProcessError: p.terminate() logger.error(f"Imputation error for basename '{args[0]}'" + f" using method '{args[1]}'.") except OSError as e: logger.error("OSError while imputing {}".format(nameroot)) raise e timeit(imp_wrapper, "imputing {}".format(nameroot), logging.getLogger("impute")) # garbage collection gc.collect() # rename solution file to include the method number namelist = nameroot.split("_") namelist.insert(-1, "m{}".format(method)) dest = "_".join(namelist) + ".sol" src = nameroot + ".sol" cpp_sol = os.path.abspath('sol') imputed = os.path.join(cpp_sol, src) imputed_with_method = os.path.join(cpp_sol, dest) logger = logging.getLogger("impute") logger.debug("Checking existence of imputed file:") if os.path.isfile(imputed): logger.debug("Imputed file {} exists.".format(imputed)) else: logger.debug("Failed to find imputed file {}.".format(imputed)) logger.debug("Checking existence of imputed file with method name:") if os.path.isfile(imputed_with_method): logger.debug( "Imputed file with method name {} exists.".format(imputed)) else: logger.debug( "Failed to find imputed file with method name {}.".format( imputed)) renamed = False try: logger.debug("Attempting to move file with os.rename():") os.rename(imputed, imputed_with_method) logger.debug( "Successfully renamed solution file for basename '{}' and method '{}' with os.rename()." .format(*args)) renamed = True except FileNotFoundError as e: logger.error( "Failed to rename solution file for basename '{}' and method '{}' with os.rename()." .format(*args)) if not renamed: try: logger.debug("Attempting to move file with shutil.move():") shutil.move(imputed, imputed_with_method) logger.debug( "Successfully renamed solution file for basename '{}' and method '{}' with shutil.move()." .format(*args)) except FileNotFoundError as e: logger.error( "Failed to rename solution file for basename '{}' and method '{}' with shutil.move()." .format(*args)) open(imputed_with_method, 'a').close()
# %% codeblock # write random walk, object oriented class RandomWalker: def __init__(self): self.position = 0 def walk(self,n): self.position = 0 for i in range(n): yield self.position self.position += 2*np.random.randint(0,2) -1 walker = RandomWalker() timeit("[position for position in walker.walk(1000)]",globals()) # %% codeblock def random_walk_faster(n= 1000): from itertools import accumulate steps = 2*np.random.randint(0,2,n) -1 timeit('random_walk_faster(n=10000)',globals()) grid = np.indices((2, 3)) grid grid_2 = np.indices((2, 3), sparse = True)
return f def take_raw(i): def f(_, c): return c[i] return f concat_0_2_actions = [ lambda _, c: c[0] + [c[2]], lambda _, c: c, ] with timeit('making grammar parser'): abnf_parser = parglare.GLRParser( abnf_grammar, ws='', actions={ 'dec?': parglare.actions.optional, 'hex?': parglare.actions.optional, 'hex': lambda _, v: int(v, 16), 'dec': lambda _, v: int(v, 10), 'hex_string': concat_0_2_actions,
def find_crossing_1(seq, sub): return [ i for i in range(len(seq) - len(sub)) if seq[i:i + len(sub)] == sub ] # Fast but hardly readable def find_crossing_2(seq, sub): # See stackoverflow.com / "python-numpy-first-occurrence-of-subarray" target = np.dot(sub, sub) candidates = np.where(np.correlate(seq, sub, mode='valid') == target)[0] # some of the candidates entries may be false positives, double check check = candidates[:, np.newaxis] + np.arange(len(sub)) mask = np.all((np.take(seq, check) == sub), axis=-1) return candidates[mask] if __name__ == "__main__": from tools import timeit walker = RandomWalker() timeit("[position for position in walker.walk(n=10000)]", globals()) timeit("random_walk(n=10000)", globals()) timeit("random_walk_faster(n=10000)", globals()) timeit("random_walk_fastest(n=10000)", globals()) print() W = random_walk_fastest(n=1000) timeit("find_crossing_1(list(W), [+1,0,-1])", globals()) timeit("find_crossing_2(W, [+1,0,-1])", globals())
# ----------------------------------------------------------------------------- import random import numpy as np from tools import timeit def random_walk_slow(n): position = 0 walk = [position] for i in range(n): position += 2*random.randint(0, 1)-1 walk.append(position) return walk def random_walk_faster(n=1000): from itertools import accumulate # Only available from Python 3.6 steps = random.choices([-1,+1], k=n) return [0]+list(accumulate(steps)) def random_walk_fastest(n=1000): steps = np.random.choice([-1,+1], n) return np.cumsum(steps) if __name__ == '__main__': timeit("random_walk_slow(1000)", globals()) timeit("random_walk_faster(1000)", globals()) timeit("random_walk_fastest(1000)", globals())
Z = Z[I] Xi, Yi = Xi[I], Yi[I] C = C[I] return Z_.T, N_.T if __name__ == '__main__': from matplotlib import colors import matplotlib.pyplot as plt from tools import timeit # Benchmark xmin, xmax, xn = -2.25, +0.75, int(3000 / 3) ymin, ymax, yn = -1.25, +1.25, int(2500 / 3) maxiter = 200 timeit("mandelbrot_1(xmin, xmax, ymin, ymax, xn, yn, maxiter)", globals()) timeit("mandelbrot_2(xmin, xmax, ymin, ymax, xn, yn, maxiter)", globals()) timeit("mandelbrot_3(xmin, xmax, ymin, ymax, xn, yn, maxiter)", globals()) # Visualization xmin, xmax, xn = -2.25, +0.75, int(3000 / 2) ymin, ymax, yn = -1.25, +1.25, int(2500 / 2) maxiter = 200 horizon = 2.0**40 log_horizon = np.log(np.log(horizon)) / np.log(2) Z, N = mandelbrot(xmin, xmax, ymin, ymax, xn, yn, maxiter, horizon) # Normalized recount as explained in: # http://linas.org/art-gallery/escape/smooth.html M = np.nan_to_num(N + 1 - np.log(np.log(abs(Z))) / np.log(2) + log_horizon)