Пример #1
0
    def visit_Assign(self, stmt):
        if self._is_vararg(stmt.targets[0]):
            # x = dict(foo='bar', ...)
            if isinstance(stmt.value, ast.Call) and \
               isinstance(stmt.value.func, ast.Name) and \
               stmt.value.func.id == 'dict':
                keys, vals = unzip([(kw.arg, kw.value)
                                    for kw in stmt.value.keywords])
                stmt.value = ast.Dict([ast.Str(k) for k in keys], vals)

                self.change = True
                return self.visit(stmt)

            # x = {'foo': 'bar', ...}
            elif isinstance(stmt.value, ast.Dict) and len(stmt.value.keys) > 0:
                name = stmt.targets[0].id
                new_stmts = [parse_stmt(f'{name} = {{}}')]
                for k, v in zip(stmt.value.keys, stmt.value.values):
                    assgn = parse_stmt(f'{name}[_] = _')
                    assgn.targets[0].slice.value = k
                    assgn.value = v
                    new_stmts.append(assgn)

                self.change = True
                return [self.visit(stmt) for stmt in new_stmts]

        self.generic_visit(stmt)
        return stmt
Пример #2
0
    def tokenize_all(self, program_strings, vocab_index=None):
        def map_tokenize(s):
            tokens, program = self.tokenize(s)
            return list(tokens), program

        tokens, programs = unzip(
            par_for(map_tokenize, program_strings, progress=False))

        if vocab_index is None:
            token_to_index = {}
            for l in tokens:
                for t in l:
                    if not t in token_to_index:
                        token_to_index[t] = len(token_to_index)

            token_to_index['UNK'] = len(token_to_index)

            token_indices = [
                torch.tensor([token_to_index[t] for t in l], dtype=torch.long)
                for l in tokens
            ]
        else:
            token_to_index = vocab_index
            token_indices = [
                torch.tensor([
                    vocab_index[t]
                    if t in token_to_index else vocab_index['UNK'] for t in l
                ],
                             dtype=torch.long) for l in tokens
            ]

        return tokens, token_to_index, token_indices, programs
Пример #3
0
def obj_to_ast(obj):
    if isinstance(obj, tuple):
        return ast.Tuple(elts=tuple(map(obj_to_ast, obj)))
    elif isinstance(obj, dict):
        k, v = unzip([(obj_to_ast(k), obj_to_ast(v)) for k, v in obj.items()])
        return ast.Dict(k, v)
    elif isinstance(obj, list):
        return ast.List(list(map(obj_to_ast, obj)))
    elif isinstance(obj, type):
        return ast.Name(id=obj.__name__)
    elif isinstance(obj, int):
        return ast.Num(obj)
    elif isinstance(obj, str):
        return ast.Str(obj)
    elif obj is None:
        return ast.NameConstant(None)
    elif isinstance(obj, (typing._GenericAlias, typing._SpecialForm)):
        # TODO: types
        # issue was in pandas, where importing pandas._typing.Axis would
        # resolve module to typing, attempt to do "from typing import Axis"
        return ast.NameConstant(None)
    elif isinstance(obj, float) and math.isinf(obj):
        return parse_expr('float("inf")')
    elif isinstance(obj, bytes):
        return ast.Bytes(s=obj)
    else:
        raise ObjConversionException(f"No converter for {obj}")
Пример #4
0
def build_synthetic_dataset(label_set, N, parser, generator, vocab_index=None, unique=False):
    if unique:
        programs = []
        choices = []
        choice_options = []
        labels = []

        while len(programs) < N:
            program, choice, choice_option, label = generator.generate()

            if program not in programs:
                programs.append(program)
                choices.append(choice)
                choice_options.append(choice_option)
                labels.append(label)
    else:
        programs, choices, choice_options, labels = unzip([generator.generate() for _ in range(N)])

    # Grammar parser
    tokens, token_to_index, token_indices, new_programs = parser.tokenize_all(programs, vocab_index)

    for prog, tok in zip(programs, tokens):
        if len(tok) == 0:
            raise Exception(f'Created zero-length program from original source:\n{prog}')

    vocab_size = len(token_to_index)

    all_choices = {}
    for opts in choice_options:
        all_choices = {**opts, **all_choices}
    choice_indices = {s: i for i, s in enumerate(all_choices.keys())}

    # Logistic classification
    label_list = list(label_set)
    program_labels = [torch.tensor(int(prog_label), dtype=torch.long) for prog_label in labels]

    dataset = ProgramDataset(
        new_programs, token_indices, program_labels, choices, choice_indices)

    return SyntheticDataset(
        dataset=dataset,
        vocab_size=vocab_size,
        vocab_index=token_to_index,
        label_set=label_list,
        choices=all_choices,
        choice_indices=choice_indices,
        parser=parser)
Пример #5
0
    def choice(self, name, options):
        if name not in self.choices:
            assert options is not None
            values, weights = unzip(options.items())

            if self.adaptive:
                weights = self.apply_penalties(values, weights)

            probs = torch.tensor(weights, dtype=torch.float) / sum(weights)
            dist = Categorical(probs) # Chooses a sample
            index = dist.sample().item() # Returns a sample
            value = values[index] # Returns the actual string

            self.choices[name] = (index, value)
            self.choice_options[name] = list(zip(probs.tolist(), values))
            self.production_choices.update(self.choice_options)
            self.choices_counter[value] += 1 # Updates the counter of the choices globally
        return self.choices[name][1]
    def eval(self, video=None, progress=True, *args, **kwargs):
        if video is not None:
            if isinstance(video, list):
                videos = video
            else:
                videos = [video]
        else:
            videos = list(models.Video.objects.all())

        results = []
        for video in tqdm(videos):
            for _ in self._eval(video, *args, **kwargs):
                results.append((copy(CONTEXT), SCORE))

        contexts, scores = unzip(results)
        sorted_idx = np.argsort(scores)
        return [[contexts[i] for i in sorted_idx],
                [scores[i] for i in sorted_idx]]
Пример #7
0
    def _bind_arguments(self, f_ast, call_expr, new_stmts):
        args_def = f_ast.args

        # Scope a variable name as unique to the function, and update any references
        # to it in the function
        def unique_and_rename(name):
            unique_name = f'{name}{SEP}{f_ast.name}'
            renamer = Rename(name, unique_name)
            for stmt in f_ast.body:
                renamer.visit(stmt)
            return unique_name

        args = call_expr.args[:]

        # Rename all variables declared in the function that aren't arguments
        assgn_finder = FindAssignments()
        assgn_finder.visit(f_ast)
        arg_names = set([arg.arg for arg in args_def.args])
        for name in assgn_finder.names:
            if name not in arg_names:
                unique_and_rename(name)

        # If function is called with f(*args)
        if len(call_expr.args) > 0 and \
           isinstance(call_expr.args[-1], ast.Starred):
            star_arg = call_expr.args.pop().value

            # Get the length of the star_arg runtime list
            star_arg_obj = eval(a2s(star_arg), self.globls, self.globls)

            # Generate an indexing expression for each element of the list
            call_star_args = [
                ast.Subscript(value=star_arg,
                              slice=ast.Index(value=ast.Num(i)))
                for i in range(len(star_arg_obj))
            ]
        else:
            star_arg = None

        # If function is called with f(**kwargs)
        star_kwarg = [arg for arg in call_expr.keywords if arg.arg is None]
        star_kwarg = star_kwarg[0].value if len(star_kwarg) > 0 else None
        if star_kwarg is not None:
            star_kwarg_dict = eval(a2s(star_kwarg), self.globls, self.globls)
            call_star_kwarg = {
                key: ast.Subscript(value=star_kwarg,
                                   slice=ast.Index(value=ast.Str(key)))
                for key in star_kwarg_dict.keys()
            }

        # Function's anonymous arguments, e.g. f(1, 2) becomes [1, 2]
        call_anon_args = call_expr.args[:]

        # Function's keyword arguments, e.g. f(x=1, y=2) becomes {'x': 1, 'y': 2}
        call_kwargs = {
            arg.arg: arg.value
            for arg in call_expr.keywords if arg.arg is not None
        }

        # Match up defaults with variable names.
        #
        # Python convention is that if function has N arguments and K < N defaults, then
        # the defaults correspond to arguments N - K .. N.
        nodefault = len(args_def.args) - len(args_def.defaults)
        anon_defaults = {
            arg.arg: default
            for arg, default in zip(args_def.args[nodefault:],
                                    args_def.defaults)
        }

        # All keyword-only arguments must have defaults.
        #
        # kwonlyargs occur if a function definition has args AFTER a *args, e.g.
        # the var "y" in `def foo(x, *args, y=1)`
        kw_defaults = {
            arg.arg: default
            for arg, default in zip(args_def.kwonlyargs, args_def.kw_defaults)
        }

        # For each non-keyword-only argument, match it up with the corresponding
        # syntax from the call expression
        for arg in args_def.args:
            k = arg.arg

            # First, match with anonymous arguments
            if len(call_anon_args) > 0:
                v = call_anon_args.pop(0)

            # Then use *args if it exists
            elif star_arg is not None and len(call_star_args) > 0:
                v = call_star_args.pop(0)

            # Then use keyword arguments
            elif k in call_kwargs:
                v = call_kwargs.pop(k)

            # Then use **kwargs if it exists
            elif star_kwarg is not None and k in call_star_kwarg:
                v = call_star_kwarg.pop(k)

            # Otherwise use the default value
            else:
                v = anon_defaults.pop(k)

            # Add a binding from function argument to call argument
            uniq_k = unique_and_rename(k)
            stmt = ast.Assign(targets=[make_name(uniq_k)], value=v)
            new_stmts.append(stmt)

        # Perform equivalent procedure as above, but for keyword-only arguments
        for arg in args_def.kwonlyargs:
            k = arg.arg

            if k in call_kwargs:
                v = call_kwargs.pop(k)
            elif star_kwarg is not None and k in call_star_kwarg:
                v = call_star_kwarg.pop(k)
            else:
                v = kw_defaults.pop(k)

            uniq_k = unique_and_rename(k)
            stmt = ast.Assign(targets=[make_name(uniq_k)], value=v)
            new_stmts.append(stmt)

        # If function definition uses *args, then assign it to the remaining anonymous
        # arguments from the call_expr
        if args_def.vararg is not None:
            k = unique_and_rename(args_def.vararg.arg)
            v = call_anon_args[:]
            if star_arg is not None:
                v += call_star_args
            new_stmts.append(
                ast.Assign(targets=[make_name(k)], value=ast.List(elts=v)))

        # Similarly for **kwargs in the function definition
        if args_def.kwarg is not None:
            k = unique_and_rename(args_def.kwarg.arg)
            items = call_kwargs.items()
            if star_kwarg is not None:
                items = itertools.chain(items, call_star_kwarg.items())
            kwkeys, kwvalues = unzip(items)
            new_stmts.append(
                ast.Assign(targets=[make_name(k)],
                           value=ast.Dict([ast.Str(s) for s in kwkeys],
                                          kwvalues)))
Пример #8
0
 def __init__(self, option_map):
     values, weights = unzip(option_map.items())
     self.values = values
     total = sum(weights)
     self.dist = dist.Categorical(tensor([w / total for w in weights]))
Пример #9
0
    def generate_trial(self, N_var, condition):
        names = sample(all_names, k=N_var * 2)
        var_names, func_names = names[:N_var], names[N_var:]

        def gen_expr(i):
            possible_names = var_names[: i - 1]

            def gen_op():
                if len(possible_names) > 0:
                    name = choice(possible_names)
                    return name, set(name)
                else:
                    return rand_const(), set()

            if i == 0:
                return rand_const(), set()
            else:
                lhs = var_names[i - 1]
                free = set(lhs)
                if i > 1:
                    rhs = choice(var_names[: i - 1])
                    free.add(rhs)
                else:
                    rhs = rand_const()

                op = choice(all_operators)
                expr = (
                    f"{lhs} {op} {rhs}"
                    if choice([True, False])
                    else f"{rhs} {op} {lhs}"
                )
                return expr, free

        def gen_func(i, condition):
            expr, free = gen_expr(i)
            free = list(free)

            if condition == self.Condition.SimpleFunction:
                func_args = free
                call_args = free
            else:
                all_free = set([func_names[i]] + free)
                func_args = sample(list(set(all_names) - all_free), k=len(free))
                for (old, new) in zip(free, func_args):
                    expr = expr.replace(old, new)

                if condition == self.Condition.RenameArgsFunction:
                    call_args = free
                # elif condition == self.Condition.RandomOrderFunction:
                #     permutation = shuffle(list(range(len(func_args))))
                #     func_args = [func_args[i] for i in permutation]
                #     call_args = [free[i] for i in permutation]

            fbody = f'def {func_names[i]}({",".join(func_args)}):\n    return {expr}'
            fcall = f'{var_names[i]} = {func_names[i]}({",".join(call_args)})'
            return fcall, fbody

        final_expr, _ = gen_expr(N_var)
        if condition == self.Condition.NoFunction:
            program = "\n".join(
                [f"{var_names[i]} = {gen_expr(i)[0]}" for i in range(N_var)]
                + [final_expr]
            )
        else:
            calls, bodies = unzip([gen_func(i, condition) for i in range(N_var)])
            bodies = sample(bodies, k=len(bodies))
            program = "\n".join(bodies) + "\n\n" + "\n".join(calls + [final_expr])

        globls = {}
        exec(program, globls, globls)

        return {
            "program": program,
            "call": final_expr,
            "condition": str(condition),
            "answer": eval(final_expr, globls, globls),
        }