def fun(S, path_config, gpu, nsamples): model, tokenizer, config, device = getModel(path_config=path_config, gpu=gpu) path_target = 'data/new.txt' #if not os.path.exists(path_target): #os.mkdir(path_target) N = 0 n = 0 for i in range(len(S)): if i % 10 == 0: print( "proceed {} poem (total {}), get {} prefix and generate {} poems" .format(i, len(S), n, N)) sents = peomSplit(S[i]) for s in sents: n += 1 if '□' in s: continue r = generating(s, model, config, tokenizer, device, nsamples, gpu) N += len(r) if len(r) == 0: continue r = [s + '\t' + rr[len(s):] for rr in r] with open(path_target, 'a+') as f: f.write('\n'.join(r) + '\n') f.close()
def main(path_data, mode, path_config, path_target, topk, temp): ii = int(mode) model, tokenizer, config, device = gpt_gen.getModel( path_config=path_config) config['topk'] = topk config['temperature'] = temp with open(path_data, 'r') as f: s = f.read().strip().split('\n') D = [] t0 = time.time() for data in s: result = [] for _ in range(1): if ii == 1: r0 = gpt_gen.generating_poem(app, data, model, config, tokenizer, device, quick, num0[ii], batchGenerating=batchGenerating) else: r0 = gpt_gen.generating(app, data, model, config, tokenizer, device, ConfigPredict, quick=quick, num=num0[ii], removeHighFreqWords=rmHFW[ii], batchGenerating=batchGenerating) r0 = [rr + tags[ii] for rr in r0] result.extend(r0) d = {'input': data, 'outputs': result, 'num': len(result)} D.append(d) with open(path_target, 'w') as f: json.dump(D, f, ensure_ascii=False, indent=4) t1 = time.time() print('predict time is {} for parameter topk={}'.format(t1 - t0, topk))
path_configs = ConfigPredict.model_configs num0 = ConfigPredict.predict_nums tags = ConfigPredict.tags rmHFW = ConfigPredict.rmHFW maxNext = ConfigPredict.maxNext_JLX path_next = ConfigPredict.path_JLX_next path_simi = ConfigPredict.path_JLX_simi model,tokenizer,config,device,GPUs = [],[],[],[],[] ModelIndex = [] for ii in range(len(path_configs)): M0,T0,C0,D0 = [],[],[],[] gpus = ConfigPredict.gpus[ii].split(',') idx = path_configs[ii].index('config_')+len('config_') key = path_configs[ii][idx:-5] for gpu in gpus: m0,t0,c0,d0 = gpt_gen.getModel(path_config=path_configs[ii],gpu=gpu) c0['repetition_penalty'] = ConfigPredict.repetition_penalty[ii] c0['temperature'] = ConfigPredict.temperature[ii] c0['length'] = ConfigPredict.length[ii] M0.append(m0) T0.append(t0) C0.append(c0) D0.append(d0) model.append(M0) tokenizer.append(T0) config.append(C0) device.append(D0) ModelIndex.append([kk for kk in range(len(gpus))]) GPUs.append(gpus) D_simi = json.load(open(path_simi,'r',encoding='utf-8')) D_next = json.load(open(path_next,'r',encoding='utf-8'))
num0 = [c.predict_nums for c in ConfigPredict] tags = [c.tags for c in ConfigPredict] rmHFW = [c.rmHFW for c in ConfigPredict] ''' maxNext = ConfigPredict.maxNext_JLX path_next = ConfigPredict.path_JLX_next path_simi = ConfigPredict.path_JLX_simi D_simi = json.load(open(path_simi,'r',encoding='utf-8')) D_next = json.load(open(path_next,'r',encoding='utf-8')) D_simi = {k:json.loads(D_simi[k]) for k in D_simi} D_next = {k:json.loads(D_next[k]) for k in D_next} ''' D_simi,D_next,maxNext=[],[],[] model,tokenizer,config,device = [], [], [], [] for ii in range(len(path_configs)): m0,t0,c0,d0 = gpt_gen.getModel(path_config=path_configs[ii],gpu=ConfigPredict[ii].gpus) c0['repetition_penalty'] = ConfigPredict[ii].repetition_penalty c0['temperature'] = ConfigPredict[ii].temperature c0['length'] = ConfigPredict[ii].length model.append(m0) tokenizer.append(t0) config.append(c0) device.append(d0) @app.route('/api/gen', methods=['POST']) def test2(): r = request.json #print(type(r)) #print(request.json) #r = '{"input": "们"}' #r = json.loads(r) data = r["input"]
port = 7000 from Config_gou import config_predict gpus = '0,1,2,3' style = 'gou' ConfigPredict = config_predict(gpus=gpus) batchGenerating = ConfigPredict.batchGenerating path_configs = ConfigPredict.model_configs num0 = ConfigPredict.predict_nums tags = ConfigPredict.tags rmHFW = ConfigPredict.rmHFW gpus = ConfigPredict.gpus.split(',') Model = [] for gpu in gpus: model, tokenizer, config, device = gpt_gen.getModel( path_config=path_configs, gpu=gpu) Model.append((model, gpu)) @app.route('/', methods=['POST']) def test1(): r = request.json data = r["input"] quick = False if "quick" in r: print("quick pattern") if r["quick"] == "True": quick = True try: random.shuffle(Model) model, gpu = Model[0][0], Model[0][1]