max_txt = max(len(i[0]), max_txt) if len(i[0]) != len(i[1]): unsame += 1 else: same += 1 ss = normalizeString(i[0]) sl = list(ss) for sli in sl: if sli not in all_expect_alphabets: all_expect_alphabets.append(sli) all_expect_alphabets.sort() tokenize_alphabets = dg_alphabets.Diagnosis_alphabets() for a in all_expect_alphabets: tokenize_alphabets.addalphabet(a) config = { 'hidden_size': 64, 'max_position_embeddings': 1350, 'eps': 1e-7, 'input_size': tokenize_alphabets.n_alphabets, 'vocab_size': tokenize_alphabets.n_alphabets, 'hidden_dropout_prob': 0.1, 'num_attention_heads': 8, 'attention_probs_dropout_prob': 0.1, 'intermediate_size': 256, 'num_hidden_layers': 16, }
'[', ']', '{', '}', '|', '#0#', '#1#', '#2#', '#3#', '#4#', '#5#', '#6#', '#7#', ] tokenize_alphabets = dg_alphabets.Diagnosis_alphabets() for a in all_expect_alphabets: tokenize_alphabets.addalphabet(a) config = { 'hidden_size': 64, 'max_position_embeddings': 1350, 'eps': 1e-12, 'input_size': tokenize_alphabets.n_alphabets, 'vocab_size': tokenize_alphabets.n_alphabets, 'hidden_dropout_prob': 0.1, 'num_attention_heads': 16, 'attention_probs_dropout_prob': 0.1, 'intermediate_size': 256, 'num_hidden_layers': 16, }