def testFun(app, prefix, model, config, tokenizer, device, config_predict, quick=False, num=5, continue_writing=False, removeHighFreqWords=False, batchGenerating=False, gpu='0', onlyMax=False, maxNb=20): #print("start:",prefix) #os.environ["CUDA_VISIBLE_DEVICES"] = gpu if len(prefix) == 0 or len(prefix) > model.config.n_ctx: return [] if gpu: torch.cuda.set_device(int(gpu)) device = "cuda" if torch.cuda.is_available() else "cpu" else: device = 'cpu' #print("use device:%s" % device) prefix0 = prefix if config_predict.prefixTrim: prefix = sentTriming(prefix0) if len(prefix) == 0: prefix = prefix0 punc = '.,?!;\t 。,?!;' global a a = app fast_pattern = config_predict.fast_pattern n_ctx = model.config.n_ctx len_prefix = len(prefix) if len_prefix < 5: max_genlen = 5 * len_prefix elif len_prefix < 10: max_genlen = 3 * len_prefix else: max_genlen = config['length'] length = min(max_genlen, n_ctx - len_prefix - 1) nsamples = num maxNb = max(nsamples, maxNb) temperature = config['temperature'] topk = config['topk'] topp = config['topp'] quick_pattern = quick repetition_penalty = config['repetition_penalty'] if length == -1: length = model.config.n_ctx raw_text = prefix context_tokens = tokenizer.convert_tokens_to_ids( tokenizer.tokenize(raw_text)) t0 = time.time() S = [] rev_repitition_penalty = 1.0 / repetition_penalty inputs = [context_tokens] * nsamples inputs = torch.tensor(inputs, dtype=torch.long, device=device) _, past = model(inputs[:, :-1], None)[:2] prev = inputs[:, -1].view(-1, 1) context = context_tokens generate = [[t for t in context] for _ in range(nsamples)] A0 = [] A1 = [] for kk in range(len(generate)): for jj in range(len(generate[kk])): A0.append(kk) A1.append(generate[kk][jj]) with torch.no_grad(): for i in range(nsamples): output = model(prev, past=past) output, past = output[:2] output = output.squeeze(1) output[A0, A1] *= rev_repitition_penalty output /= temperature filtered_logits = top_k_top_p_filtering(output, top_k=topk, top_p=0) next_token = torch.multinomial(torch.softmax(filtered_logits, dim=-1), num_samples=1) prev = next_token NT_np = next_token.cpu().numpy() for ii in range(nsamples): generate[ii].append(NT_np[ii][0]) A0.append(ii) A1.append(NT_np[ii]) outs = generate for out in outs: tmptext = untokenization(out, config, tokenizer, punc, continue_writing) S.append(tmptext) return S outs = fast_sample_sequence_batch(model, context_tokens, length, nsamples=maxNb, temperature=temperature, top_k=topk, repitition_penalty=repetition_penalty, device=device) return outs for out in outs: tmptext = untokenization(out, config, tokenizer, punc, continue_writing) S.append(tmptext) t1 = time.time() if config_predict.prefixTrim: S = [prefix0 + s[len(prefix):] for s in S] S = postprocess(S, prefix0, config_predict, removeHighFreqWords=removeHighFreqWords) S = dropDuplicateContent(S) if config_predict.resort: if len(S) > 0: S = resort(prefix0, S, config_predict) t2 = time.time() #print('text generating and posprocess time:%0.4f and %0.4f' % (t1 - t0,t2-t1)) S = S[:nsamples] return S
def generating(app, prefix, model, config, tokenizer, device, config_predict, quick=False, num=5, continue_writing=False, removeHighFreqWords=False, batchGenerating=False, gpu='0', onlyMax=False, maxNb=20, style=''): #print("start:",prefix) #os.environ["CUDA_VISIBLE_DEVICES"] = gpu if len(prefix) == 0 or len(prefix) > model.config.n_ctx: return [] if gpu: torch.cuda.set_device(int(gpu)) device = "cuda" if torch.cuda.is_available() else "cpu" else: device = 'cpu' #if style=='prose': #prefix = prefix[0] + prefix prefix0 = prefix if config_predict.prefixTrim: prefix = sentTriming(prefix0) if len(prefix) == 0: prefix = prefix0 punc = '.,?!;\t 。,?!;' global a a = app fast_pattern = config_predict.fast_pattern n_ctx = model.config.n_ctx len_prefix = len(prefix) if len_prefix < 5: max_genlen = 20 elif len_prefix < 10: max_genlen = 25 else: max_genlen = config['length'] length = min(max_genlen, n_ctx - len_prefix - 1) nsamples = num maxNb = max(nsamples, maxNb) temperature = config['temperature'] topk = config['topk'] topp = config['topp'] quick_pattern = quick repetition_penalty = config['repetition_penalty'] if length == -1: length = model.config.n_ctx raw_text = '[MASK]' + prefix context_tokens = tokenizer.convert_tokens_to_ids( tokenizer.tokenize(raw_text)) if batchGenerating: S = [] if onlyMax: outs = sample_sequence_batch_max( model, context_tokens, length, n_ctx, tokenizer, nsamples=2, temperature=temperature, top_k=topk, top_p=topp, repitition_penalty=repetition_penalty, device=device) else: if fast_pattern: outs = fast_sample_sequence_batch( model, context_tokens, length, nsamples=maxNb, temperature=temperature, top_k=topk, repitition_penalty=repetition_penalty, device=device) else: outs = sample_sequence_batch_opti( model, context_tokens, length, n_ctx, tokenizer, maxNb, temperature=temperature, top_k=topk, top_p=topp, repitition_penalty=repetition_penalty, device=device) for out in outs: tmptext = untokenization(out, config, tokenizer, punc, continue_writing) S.append(tmptext) else: S = [] for _ in range(maxNb): out = generate(n_ctx=n_ctx, model=model, context=context_tokens, length=length, is_fast_pattern=fast_pattern, tokenizer=tokenizer, is_quick=quick_pattern, temperature=temperature, top_k=topk, top_p=topp, repitition_penalty=repetition_penalty, device=device) tmptext = untokenization(out, config, tokenizer, punc, continue_writing) S.append(tmptext) if config_predict.prefixTrim: S = [prefix0 + s[len(prefix):] for s in S] S = postprocess(S, prefix0, config_predict, removeHighFreqWords=removeHighFreqWords) S = dropDuplicateContent(S) if config_predict.resort: if len(S) > 0: S = resort(prefix0, S, config_predict) S = S[:nsamples] #if style == 'prose': #S = [r[1:] for r in S] return S
def generating(app,prefix,model,config,tokenizer,device,config_predict,quick=False,num=5,continue_writing=False,removeHighFreqWords=False,batchGenerating=False,gpu='0',onlyMax=False,maxNb = 20): #print("start:",prefix) #os.environ["CUDA_VISIBLE_DEVICES"] = gpu if len(prefix)==0 or len(prefix)>model.config.n_ctx: return [] torch.cuda.set_device(int(gpu)) prefix0 = prefix if config_predict.prefixTrim: prefix = sentTriming(prefix0) if len(prefix)==0: prefix = prefix0 punc = '.,?!;\t 。,?!;' global a a = app fast_pattern = config_predict.fast_pattern n_ctx = model.config.n_ctx len_prefix = len(prefix) if len_prefix<5: max_genlen = 5*len_prefix elif len_prefix<10: max_genlen = 3*len_prefix else: max_genlen = config['length'] length = min(max_genlen,n_ctx-len_prefix-1) nsamples = num maxNb = max(nsamples,maxNb) temperature = config['temperature'] topk = config['topk'] topp = config['topp'] quick_pattern = quick repetition_penalty = config['repetition_penalty'] if length == -1: length = model.config.n_ctx raw_text = prefix context_tokens = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(raw_text)) t0 = time.time() if batchGenerating: S = [] if onlyMax: outs = sample_sequence_batch_max(model, context_tokens, length, n_ctx, tokenizer, nsamples=2, temperature=temperature, top_k=topk, top_p=topp, repitition_penalty=repetition_penalty, device=device) else: if fast_pattern: outs = fast_sample_sequence_batch(model, context_tokens, length, nsamples=maxNb, temperature=temperature, top_k=topk, repitition_penalty=repetition_penalty,device=device) else: outs = sample_sequence_batch_opti(model, context_tokens, length, n_ctx, tokenizer, maxNb, temperature=temperature, top_k=topk, top_p=topp, repitition_penalty=repetition_penalty, device=device) #print('model predict all time:%0.4f' % (t1 - t0)) for out in outs: tmptext = untokenization(out, config, tokenizer, punc, continue_writing) S.append(tmptext) #print('model untokenization time:%0.4f' % (t2 - t1)) else: S = [] for _ in range(maxNb): out = generate( n_ctx=n_ctx, model=model, context=context_tokens, length=length, is_fast_pattern=fast_pattern, tokenizer=tokenizer,is_quick=quick_pattern, temperature=temperature, top_k=topk, top_p=topp, repitition_penalty=repetition_penalty, device=device ) tmptext = untokenization(out,config,tokenizer,punc,continue_writing) S.append(tmptext) t1 = time.time() if config_predict.prefixTrim: S = [prefix0+s[len(prefix):] for s in S] S = postprocess(S,prefix0,config_predict,removeHighFreqWords=removeHighFreqWords) S = dropDuplicateContent(S) if config_predict.resort: if len(S)>0: S = resort(prefix0, S, config_predict) t2 = time.time() #print('text generating and posprocess time:%0.4f and %0.4f' % (t1 - t0,t2-t1)) S = S[:nsamples] return S