prefix = code_str[:index + 1] tail = code_str[index + 1:] gen_code = sample_solo(model=model, prefix=prefix, max_gen_length=hparams.max_gen_length, token_to_idx=token_to_idx, idx_to_token=idx_to_token, segment_length=hparams.segment_length, new_line_number=hparams.new_line_number, temperature=hparams.temperature, sample=hparams.sample) # 根据参数判断是续写生成还是插入生成 if hparams.new_line_number == -1: new_testcase = cut(gen_code) # 插入生成 else: assert hparams.new_line_number > 0, '请指定正确的new_line_number参数,应该为-1或者大于0' # 对tail做修剪,防止tail中遗留的上一句内容导致用例语法不正确 tail = tail[tail.find(';') + 1:] new_testcase = cut(gen_code + tail) # 对new_testcase格式化 new_testcase = re.sub( ' +', ' ', new_testcase.strip().replace('\n', ' ').replace('\t', ' ')) # 根据用例是否通过的结果,将其写入不同的数据库
transfer=True) # 恢复模型(注意load方法没有device参数) model = torch.load(hparams.gen_model).to(device) model.device = device # 批量生成 import time start_time = time.time() print("开始生成: ") with open(os.path.join(workspace_path, hparams.gen_file), 'a+', encoding='utf-8') as f: n_batches = int(hparams.gen_number / hparams.gen_batch_size) for _ in trange(n_batches): gen_code_list = sample_multi(model, prefix=hparams.prefix, batch_size=hparams.gen_batch_size, max_gen_length=hparams.max_gen_length, char_to_idx=char_to_idx, idx_to_char=idx_to_char, segment_length=hparams.segment_length, temperature=hparams.temperature) gen_code_list = [cut(code) for code in gen_code_list] f.write('\n'.join(gen_code_list) + '\n') print(f'生成{hparams.gen_number}条用例总共花费{int(time.time() - start_time)}秒')