Exemplo n.º 1
0
parser.add_argument('--init_restore_dir', type=str, default=MODEL_DIR)
parser.add_argument('--checkpoint_dir', type=str, default='check_points')
parser.add_argument('--setting_file', type=str, default='setting.txt')
parser.add_argument('--log_file', type=str, default='log.txt')
args = parser.parse_args()

args.train_dir = os.path.join(args.train_dir, args.suffix_name + "_train.pkl")
args.dev_dir = os.path.join(args.dev_dir, args.suffix_name + "_dev.pkl")
args.bert_config_file = os.path.join(args.bert_config_file, args.model_name,
                                     'bert_config.json')
args.vocab_file = os.path.join(args.vocab_file, args.model_name, 'vocab.txt')
args.init_restore_dir = os.path.join(args.init_restore_dir, args.model_name,
                                     'pytorch_model.pth')
args.checkpoint_dir = os.path.join(args.checkpoint_dir,
                                   args.model_name + "_" + args.suffix_name)
args = utils.check_args(args)

# bert initialization
bert_config = BertConfig.from_json_file(args.bert_config_file)
tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file,
                                       do_lower_case=True)
model = BertForTokenClassification(config=bert_config,
                                   num_labels=args.class_num)

# set seed
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)

# set gpu
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids
Exemplo n.º 2
0
parser.add_argument(
    '--init_restore_dir',
    type=str,
    default='./pretrained_models/bert_chinese/pytorch_model.pth')
parser.add_argument('--vocab_file',
                    type=str,
                    default='./pretrained_models/bert_chinese/vocab.txt')
parser.add_argument('--checkpoint_dir',
                    type=str,
                    default='check_points/base_ner')
parser.add_argument('--setting_file', type=str, default='setting.txt')
parser.add_argument('--log_file', type=str, default='log.txt')
parser.add_argument('--log_dev_file', type=str, default='log_dev.txt')
# set args
args = parser.parse_args()
utils.check_args(args)

# bert initialization
bert_config = BertConfig.from_json_file(args.bert_config_file)
bert_config.attention_probs_dropout_prob = args.dropout
bert_config.hidden_dropout_prob = args.dropout
tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file,
                                       do_lower_case=True)
model = BertForTokenClassification(bert_config,
                                   num_labels=len(DICT_LABEL_REV) + 1)

# set seed
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)