Пример #1
0
def train(repository: FunctionRepository, params: Asm2VecParams) -> None:
    context = TrainingContext(repository, params)
    context.add_counter(TrainingContext.TOKENS_HANDLED_COUNTER)

    asm2vec_logger().debug('Total number of functions: %d',
                           len(context.repo().funcs()))
    progress = Atomic(1)

    def train_function(fn: VectorizedFunction):
        for seq in fn.sequential().sequences():
            _train_sequence(fn, seq, context)

        asm2vec_logger().debug(
            'Function "%s" trained, progress: %f%%',
            fn.sequential().name(),
            progress.value() / len(context.repo().funcs()) * 100)
        with progress.lock() as prog_proxy:
            prog_proxy.set(prog_proxy.value() + 1)

    executor = concurrent.futures.ThreadPoolExecutor(
        max_workers=context.params().jobs)
    futures = []
    for f in context.repo().funcs():
        futures.append(executor.submit(train_function, f))

    done, not_done = concurrent.futures.wait(
        futures, return_when=concurrent.futures.FIRST_EXCEPTION)
    if len(not_done) > 0:
        raise RuntimeError('Train failed due to one or more failed task.')
Пример #2
0
    def train_function(fn: VectorizedFunction):
        for seq in fn.sequential().sequences():
            _train_sequence(fn, seq, context)

        asm2vec_logger().debug(
            'Function "%s" trained, progress: %f%%',
            fn.sequential().name(),
            progress.value() / len(context.repo().funcs()) * 100)
        with progress.lock() as prog_proxy:
            prog_proxy.set(prog_proxy.value() + 1)
Пример #3
0
    def func_handler(f: Function):
        with vec_funcs_atomic.lock() as vfa:
            vfa.value().append(
                VectorizedFunction(
                    make_sequential_function(f, num_of_rnd_walks)))

        tokens = _get_function_tokens(f, dim)
        for tk in tokens:
            with vocab_atomic.lock() as va:
                if tk.name() in va.value():
                    va.value()[tk.name()].count += 1
                else:
                    va.value()[tk.name()] = Token(tk)

        asm2vec_logger().debug(
            'Sequence generated for function "%s", progress: %f%%', f.name(),
            progress.value() / len(funcs) * 100)
        with progress.lock() as prog:
            prog.set(prog.value() + 1)