def visit_Assign(self, stmt): if self._is_vararg(stmt.targets[0]): # x = dict(foo='bar', ...) if isinstance(stmt.value, ast.Call) and \ isinstance(stmt.value.func, ast.Name) and \ stmt.value.func.id == 'dict': keys, vals = unzip([(kw.arg, kw.value) for kw in stmt.value.keywords]) stmt.value = ast.Dict([ast.Str(k) for k in keys], vals) self.change = True return self.visit(stmt) # x = {'foo': 'bar', ...} elif isinstance(stmt.value, ast.Dict) and len(stmt.value.keys) > 0: name = stmt.targets[0].id new_stmts = [parse_stmt(f'{name} = {{}}')] for k, v in zip(stmt.value.keys, stmt.value.values): assgn = parse_stmt(f'{name}[_] = _') assgn.targets[0].slice.value = k assgn.value = v new_stmts.append(assgn) self.change = True return [self.visit(stmt) for stmt in new_stmts] self.generic_visit(stmt) return stmt
def tokenize_all(self, program_strings, vocab_index=None): def map_tokenize(s): tokens, program = self.tokenize(s) return list(tokens), program tokens, programs = unzip( par_for(map_tokenize, program_strings, progress=False)) if vocab_index is None: token_to_index = {} for l in tokens: for t in l: if not t in token_to_index: token_to_index[t] = len(token_to_index) token_to_index['UNK'] = len(token_to_index) token_indices = [ torch.tensor([token_to_index[t] for t in l], dtype=torch.long) for l in tokens ] else: token_to_index = vocab_index token_indices = [ torch.tensor([ vocab_index[t] if t in token_to_index else vocab_index['UNK'] for t in l ], dtype=torch.long) for l in tokens ] return tokens, token_to_index, token_indices, programs
def obj_to_ast(obj): if isinstance(obj, tuple): return ast.Tuple(elts=tuple(map(obj_to_ast, obj))) elif isinstance(obj, dict): k, v = unzip([(obj_to_ast(k), obj_to_ast(v)) for k, v in obj.items()]) return ast.Dict(k, v) elif isinstance(obj, list): return ast.List(list(map(obj_to_ast, obj))) elif isinstance(obj, type): return ast.Name(id=obj.__name__) elif isinstance(obj, int): return ast.Num(obj) elif isinstance(obj, str): return ast.Str(obj) elif obj is None: return ast.NameConstant(None) elif isinstance(obj, (typing._GenericAlias, typing._SpecialForm)): # TODO: types # issue was in pandas, where importing pandas._typing.Axis would # resolve module to typing, attempt to do "from typing import Axis" return ast.NameConstant(None) elif isinstance(obj, float) and math.isinf(obj): return parse_expr('float("inf")') elif isinstance(obj, bytes): return ast.Bytes(s=obj) else: raise ObjConversionException(f"No converter for {obj}")
def build_synthetic_dataset(label_set, N, parser, generator, vocab_index=None, unique=False): if unique: programs = [] choices = [] choice_options = [] labels = [] while len(programs) < N: program, choice, choice_option, label = generator.generate() if program not in programs: programs.append(program) choices.append(choice) choice_options.append(choice_option) labels.append(label) else: programs, choices, choice_options, labels = unzip([generator.generate() for _ in range(N)]) # Grammar parser tokens, token_to_index, token_indices, new_programs = parser.tokenize_all(programs, vocab_index) for prog, tok in zip(programs, tokens): if len(tok) == 0: raise Exception(f'Created zero-length program from original source:\n{prog}') vocab_size = len(token_to_index) all_choices = {} for opts in choice_options: all_choices = {**opts, **all_choices} choice_indices = {s: i for i, s in enumerate(all_choices.keys())} # Logistic classification label_list = list(label_set) program_labels = [torch.tensor(int(prog_label), dtype=torch.long) for prog_label in labels] dataset = ProgramDataset( new_programs, token_indices, program_labels, choices, choice_indices) return SyntheticDataset( dataset=dataset, vocab_size=vocab_size, vocab_index=token_to_index, label_set=label_list, choices=all_choices, choice_indices=choice_indices, parser=parser)
def choice(self, name, options): if name not in self.choices: assert options is not None values, weights = unzip(options.items()) if self.adaptive: weights = self.apply_penalties(values, weights) probs = torch.tensor(weights, dtype=torch.float) / sum(weights) dist = Categorical(probs) # Chooses a sample index = dist.sample().item() # Returns a sample value = values[index] # Returns the actual string self.choices[name] = (index, value) self.choice_options[name] = list(zip(probs.tolist(), values)) self.production_choices.update(self.choice_options) self.choices_counter[value] += 1 # Updates the counter of the choices globally return self.choices[name][1]
def eval(self, video=None, progress=True, *args, **kwargs): if video is not None: if isinstance(video, list): videos = video else: videos = [video] else: videos = list(models.Video.objects.all()) results = [] for video in tqdm(videos): for _ in self._eval(video, *args, **kwargs): results.append((copy(CONTEXT), SCORE)) contexts, scores = unzip(results) sorted_idx = np.argsort(scores) return [[contexts[i] for i in sorted_idx], [scores[i] for i in sorted_idx]]
def _bind_arguments(self, f_ast, call_expr, new_stmts): args_def = f_ast.args # Scope a variable name as unique to the function, and update any references # to it in the function def unique_and_rename(name): unique_name = f'{name}{SEP}{f_ast.name}' renamer = Rename(name, unique_name) for stmt in f_ast.body: renamer.visit(stmt) return unique_name args = call_expr.args[:] # Rename all variables declared in the function that aren't arguments assgn_finder = FindAssignments() assgn_finder.visit(f_ast) arg_names = set([arg.arg for arg in args_def.args]) for name in assgn_finder.names: if name not in arg_names: unique_and_rename(name) # If function is called with f(*args) if len(call_expr.args) > 0 and \ isinstance(call_expr.args[-1], ast.Starred): star_arg = call_expr.args.pop().value # Get the length of the star_arg runtime list star_arg_obj = eval(a2s(star_arg), self.globls, self.globls) # Generate an indexing expression for each element of the list call_star_args = [ ast.Subscript(value=star_arg, slice=ast.Index(value=ast.Num(i))) for i in range(len(star_arg_obj)) ] else: star_arg = None # If function is called with f(**kwargs) star_kwarg = [arg for arg in call_expr.keywords if arg.arg is None] star_kwarg = star_kwarg[0].value if len(star_kwarg) > 0 else None if star_kwarg is not None: star_kwarg_dict = eval(a2s(star_kwarg), self.globls, self.globls) call_star_kwarg = { key: ast.Subscript(value=star_kwarg, slice=ast.Index(value=ast.Str(key))) for key in star_kwarg_dict.keys() } # Function's anonymous arguments, e.g. f(1, 2) becomes [1, 2] call_anon_args = call_expr.args[:] # Function's keyword arguments, e.g. f(x=1, y=2) becomes {'x': 1, 'y': 2} call_kwargs = { arg.arg: arg.value for arg in call_expr.keywords if arg.arg is not None } # Match up defaults with variable names. # # Python convention is that if function has N arguments and K < N defaults, then # the defaults correspond to arguments N - K .. N. nodefault = len(args_def.args) - len(args_def.defaults) anon_defaults = { arg.arg: default for arg, default in zip(args_def.args[nodefault:], args_def.defaults) } # All keyword-only arguments must have defaults. # # kwonlyargs occur if a function definition has args AFTER a *args, e.g. # the var "y" in `def foo(x, *args, y=1)` kw_defaults = { arg.arg: default for arg, default in zip(args_def.kwonlyargs, args_def.kw_defaults) } # For each non-keyword-only argument, match it up with the corresponding # syntax from the call expression for arg in args_def.args: k = arg.arg # First, match with anonymous arguments if len(call_anon_args) > 0: v = call_anon_args.pop(0) # Then use *args if it exists elif star_arg is not None and len(call_star_args) > 0: v = call_star_args.pop(0) # Then use keyword arguments elif k in call_kwargs: v = call_kwargs.pop(k) # Then use **kwargs if it exists elif star_kwarg is not None and k in call_star_kwarg: v = call_star_kwarg.pop(k) # Otherwise use the default value else: v = anon_defaults.pop(k) # Add a binding from function argument to call argument uniq_k = unique_and_rename(k) stmt = ast.Assign(targets=[make_name(uniq_k)], value=v) new_stmts.append(stmt) # Perform equivalent procedure as above, but for keyword-only arguments for arg in args_def.kwonlyargs: k = arg.arg if k in call_kwargs: v = call_kwargs.pop(k) elif star_kwarg is not None and k in call_star_kwarg: v = call_star_kwarg.pop(k) else: v = kw_defaults.pop(k) uniq_k = unique_and_rename(k) stmt = ast.Assign(targets=[make_name(uniq_k)], value=v) new_stmts.append(stmt) # If function definition uses *args, then assign it to the remaining anonymous # arguments from the call_expr if args_def.vararg is not None: k = unique_and_rename(args_def.vararg.arg) v = call_anon_args[:] if star_arg is not None: v += call_star_args new_stmts.append( ast.Assign(targets=[make_name(k)], value=ast.List(elts=v))) # Similarly for **kwargs in the function definition if args_def.kwarg is not None: k = unique_and_rename(args_def.kwarg.arg) items = call_kwargs.items() if star_kwarg is not None: items = itertools.chain(items, call_star_kwarg.items()) kwkeys, kwvalues = unzip(items) new_stmts.append( ast.Assign(targets=[make_name(k)], value=ast.Dict([ast.Str(s) for s in kwkeys], kwvalues)))
def __init__(self, option_map): values, weights = unzip(option_map.items()) self.values = values total = sum(weights) self.dist = dist.Categorical(tensor([w / total for w in weights]))
def generate_trial(self, N_var, condition): names = sample(all_names, k=N_var * 2) var_names, func_names = names[:N_var], names[N_var:] def gen_expr(i): possible_names = var_names[: i - 1] def gen_op(): if len(possible_names) > 0: name = choice(possible_names) return name, set(name) else: return rand_const(), set() if i == 0: return rand_const(), set() else: lhs = var_names[i - 1] free = set(lhs) if i > 1: rhs = choice(var_names[: i - 1]) free.add(rhs) else: rhs = rand_const() op = choice(all_operators) expr = ( f"{lhs} {op} {rhs}" if choice([True, False]) else f"{rhs} {op} {lhs}" ) return expr, free def gen_func(i, condition): expr, free = gen_expr(i) free = list(free) if condition == self.Condition.SimpleFunction: func_args = free call_args = free else: all_free = set([func_names[i]] + free) func_args = sample(list(set(all_names) - all_free), k=len(free)) for (old, new) in zip(free, func_args): expr = expr.replace(old, new) if condition == self.Condition.RenameArgsFunction: call_args = free # elif condition == self.Condition.RandomOrderFunction: # permutation = shuffle(list(range(len(func_args)))) # func_args = [func_args[i] for i in permutation] # call_args = [free[i] for i in permutation] fbody = f'def {func_names[i]}({",".join(func_args)}):\n return {expr}' fcall = f'{var_names[i]} = {func_names[i]}({",".join(call_args)})' return fcall, fbody final_expr, _ = gen_expr(N_var) if condition == self.Condition.NoFunction: program = "\n".join( [f"{var_names[i]} = {gen_expr(i)[0]}" for i in range(N_var)] + [final_expr] ) else: calls, bodies = unzip([gen_func(i, condition) for i in range(N_var)]) bodies = sample(bodies, k=len(bodies)) program = "\n".join(bodies) + "\n\n" + "\n".join(calls + [final_expr]) globls = {} exec(program, globls, globls) return { "program": program, "call": final_expr, "condition": str(condition), "answer": eval(final_expr, globls, globls), }