Exemplo n.º 1
0
def worker(args, queue, worker_id):
    #lm = config.lm
    if args.partial_lm:
        lm = build_language_model(args.num_props, new=args.gen_lm, _all=False)
    else:
        lm = load_language_model(_all=False, new=args.gen_lm)
        if args.data_sampling > 0:
            load_proof_steps_into_lm(lm, ['train'], args.data_sampling)
    #lm = load_language_model(_all=False, new=args.gen_lm)
    #load_proof_steps_into_lm(lm, ['train'], args.data_sampling)
    config = get_config(lm)
    device_idx = args.num_gpus - 1 - (worker_id // args.num_workers)
    if args.random_generate:
        device_idx = 0
    device_id = args.gpu_list[device_idx]
    print ('build a worker on gpu %d' % (device_id))
    torch.cuda.set_device(device_id)
    args.device = torch.device('cuda:'+str(device_id))
    interface = None if args.random_generate else interface_lm.LMInterface(args, lm)
    generator = Constructor(args, config, interface)
    generator.initialize()
    _logger = log.get_logger('worker%d'%(worker_id), args, append=True)
    _logger.info('worker %d initialize', worker_id)
    tt = 0
    cnt = 0
    while True:
        t = time.time()
        if args.random_generate:
            expr = generator.random_generate()
        else:
            expr = generator.parameterized_generate()
        tt += time.time() - t
        if expr is not None:
            if args.task == 'pred' and len(expr.prop.e) > 0:
                queue.put(generator.encode_pred_tasks([expr]))
            if args.task == 'gen' and len(expr.unconstrained) > 0:
                queue.put(generator.encode_gen_tasks([expr]))
            #if not (args.task == 'gen' and len(expr.unconstrained) == 0):
            #    if args.task == 'pred':
            #        queue.put(generator.encode_pred_tasks([expr]))
            #    else:
            #        queue.put(generator.encode_gen_tasks([expr]))
            if len(generator.expressions_list) > args.num_cons_exprs+generator.num_initial_expr:
                generator.reinitialize_expressions()
                _logger.info('worker %d initialize', worker_id)
            if cnt == 5000:
                _logger.info('worker %d generate time per expr %s seconds', worker_id, tt/cnt)
                cnt = 0
                tt = 0
            cnt += 1
Exemplo n.º 2
0
def worker_pre(args, queue, batch_size, ii):
    _logger = log.get_logger('worker_pre', args, append=True)
    _logger.info('worker_pre initialize')
    if args.partial_lm:
        lm = build_language_model(args.num_props, new=args.gen_lm, _all=False)
    else:
        lm = load_language_model(_all=False, new=args.gen_lm, iset=args.iset)
    config = get_config(lm)
    #exprs = load_exprs(args.expr_list[0], lm)
    #interface = interface_lm.LMInterface(args, lm)
    #generator = Constructor(args, config)
    #generator.initialize_prop()
    #generator.expressions_list = exprs
    #for e in exprs:
    #    generator.expressions[e.id] = e
    #generator.num_initial_expr = len(generator.expressions)
    #generator.initialize_searcher()
    #_logger.info('initialize generator with %d exprs', generator.num_initial_expr)

    fl = os.listdir(args.exprs_pre)
    if True:
        if args.data_sampling > 0:
            exprs = load_exprs(args.expr_list[0], lm)
            generator = Constructor(args, config)
            generator.initialize_prop()
            generator.expressions_list = exprs
            for e in exprs:
                generator.expressions[e.id] = e
            generator.num_initial_expr = len(generator.expressions)
            generator.initialize_searcher()
        else:
            generator = Constructor(args, config)
            generator.initialize()
        #generator.reinitialize_expressions()
        print ('--loading pre exprs--')
        exprs_pre = load_exprs(os.path.join(args.exprs_pre, fl[ii]), lm)
        print ('--done--')
        #if args.train_from_queue:
        #    exprs_pre = exprs_pre[:300000]
        generator.expressions_list += exprs_pre
        for e in exprs_pre:
            generator.expressions[e.id] = e
        _logger.info('load %d exprs' % (len(generator.expressions)))
        i = 0
        while True:
        #for i in range(len(exprs_pre)//batch_size//5):
            _exprs = []
            while len(_exprs) < batch_size:
                expr = random.choice(exprs_pre)
                if args.task == 'gen' and len(expr.unconstrained) == 0:
                    continue
                if args.task == 'pred' and len(expr.prop.e) == 0:
                    continue
                _exprs.append(expr)
            if args.task == 'pred':
                data = generator.encode_pred_tasks(_exprs)
            else:
                data = generator.encode_gen_tasks(_exprs)
            #print (i, data)
            queue.put(data)
            i += 1
            if i >= len(exprs_pre)//batch_size//5 and (not args.train_from_queue) and (not args.cons_pre_one):
                break
        print ('finish processing current exprs')