parser.add_argument('--seed', dest='seed', default=666, type=int, help='随机种子') # parser.add_argument('--gpu', dest='gpu', default=True, type=bool, help='是否使用gpu') # parser.add_argument('--max_epoch', dest='max_epoch', default=20, type=int, help='最大训练epoch') args = parser.parse_args() # 程序运行参数 config = Config() # 模型配置PYTORCH_TRANSFORMERS_CACHE torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) def main(): trainset, validset, testset = [], [], [] if args.inference: # 测试时只载入测试集 with open(args.testset_path, 'r', encoding='utf8') as fr: for line in fr: testset.append(json.loads(line)) print(f'载入测试集{len(testset)}条') else: # 训练时载入训练集和验证集 with open(args.trainset_path, 'r', encoding='utf8') as fr:
parser.add_argument('--seed', dest='seed', default=666, type=int, help='随机种子') # parser.add_argument('--gpu', dest='gpu', default=True, type=bool, help='是否使用gpu') # parser.add_argument('--max_epoch', dest='max_epoch', default=40, type=int, help='最大训练epoch') args = parser.parse_args() # 程序运行参数 config = Config() # 模型配置 torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) def main(): # 载入数据集 trainset, validset, testset = [], [], [] if args.inference: # 测试时只载入测试集 with open(args.testset_path, 'r', encoding='utf8') as fr: for line in fr: testset.append(json.loads(line)) print('载入测试集%d条' % len(testset))
dest='embed_path', default='data/embed.txt', type=str, help='词向量位置') parser.add_argument('--vad_path', dest='vad_path', default='data/vad.txt', type=str, help='vad位置') parser.add_argument('--gpu', dest='gpu', default=True, type=bool, help='是否使用gpu') args = parser.parse_args() config = Config() def filter_by_emotion(args): vocab, embeds = [], [] with open(args.embed_path, 'r', encoding='utf8') as fr: for line in fr: line = line.strip() word = line[:line.find(' ')] vec = line[line.find(' ') + 1:].split() embed = [float(v) for v in vec] assert len(embed) == config.embedding_size # 检测词向量维度 vocab.append(word) embeds.append(embed) print(f'载入词汇表: {len(vocab)}个') print(f'词向量维度: {config.embedding_size}')