def run_crf(epoch, score_map, bag_label, bag_index, co_exp_net_isoform, co_exp_net_lncRNA, training_size, testing_size, theta, sigma = 10): bag_label = bag_label[0: training_size] bag_index = bag_index[0: training_size] positive_unary_energy = 1 - score_map crf_isoform = CRF(training_size, testing_size, positive_unary_energy, co_exp_net_isoform, theta, bag_label, bag_index) crf_lncRNA = CRF(training_size, testing_size, positive_unary_energy, co_exp_net_lncRNA, theta, bag_label, bag_index) label_update_i, pos_prob_crf_i, unary_potential_i, pairwise_potential_i = crf_isoform.inference(10) label_update_l, pos_prob_crf_l, unary_potential_l, pairwise_potential_l = crf_lncRNA.inference(10) label_update = label_update_i + label_update_l pos_prob_crf = pos_prob_crf_i + pos_prob_crf_l unary_potential = unary_potential_i + unary_potential_l pairwise_potential = pairwise_potential_i + pairwise_potential_l if epoch == 0: theta_prime_isoform = crf_isoform.parameter_learning(bag_label[0:training_size], theta, sigma) theta_prime_lncRNA = crf_lncRNA.parameter_learning(bag_label[0:training_size], theta, sigma) else: theta_prime_isoform = crf_isoform.parameter_learning(label_update, theta, sigma) theta_prime_lncRNA = crf_lncRNA.parameter_learning(label_update, theta, sigma) theta_prime = theta_prime_isoform + theta_prime_lncRNA return label_update, theta_prime, pos_prob_crf, unary_potential, pairwise_potential
def __init__(self, char_init_embed, word_init_embed, pos_init_embed, spo_embed_dim, sentence_length, hidden_size, num_classes, dropout=0.3, id2words=None, encoding_type='bieso', weight=None): super().__init__() # self.Embedding = nn.Embedding(init_embed) # print(char_init_embed) self.char_embed = nn.Embedding(char_init_embed[0], char_init_embed[1]) self.word_embed = nn.Embedding(word_init_embed[0], word_init_embed[1]) # word2vec self.word_embed.weight.data.copy_(torch.from_numpy(weight)) self.pos_embed = nn.Embedding(pos_init_embed[0], pos_init_embed[1]) # spo embed size: 50 self.embed_dim = self.char_embed.embedding_dim + self.word_embed.embedding_dim + self.pos_embed.embedding_dim + spo_embed_dim # sentence length #self.sen_len = sentence_length #self.zeros = torch.zeros(self.sen_len, dtype=torch.long) self.norm1 = torch.nn.LayerNorm(self.embed_dim) self.Rnn = nn.LSTM(input_size=self.embed_dim, hidden_size=hidden_size, num_layers=2, dropout=dropout, bidirectional=True, batch_first=True) self.Linear1 = nn.Linear(hidden_size * 2, hidden_size * 2 // 3) self.norm2 = torch.nn.LayerNorm(hidden_size * 2 // 3) self.relu = torch.nn.LeakyReLU() self.drop = torch.nn.Dropout(dropout) self.Linear2 = nn.Linear(hidden_size * 2 // 3, num_classes) if id2words is None: self.Crf = CRF(num_classes, include_start_end_trans=False) else: self.Crf = CRF(num_classes, include_start_end_trans=False, allowed_transitions=allowed_transitions(id2words, encoding_type=encoding_type))
def __init__(self, char_init_embed, word_init_embed, pos_init_embed, spo_embed_dim, num_classes, num_layers, inner_size, key_size, value_size, num_head, dropout=0.1, id2words=None, encoding_type='bieso', weight=None): super().__init__() # self.Embedding = nn.Embedding(init_embed) #print(char_init_embed) self.char_embed = nn.Embedding(char_init_embed[0], char_init_embed[1]) self.word_embed = nn.Embedding(word_init_embed[0], word_init_embed[1]) self.word_embed.weight.data.copy_(torch.from_numpy(weight)) self.pos_embed = nn.Embedding(pos_init_embed[0], pos_init_embed[1]) # spo embed size: 50 self.embed_dim = self.char_embed.embedding_dim + self.word_embed.embedding_dim + self.pos_embed.embedding_dim + spo_embed_dim self.norm1 = torch.nn.LayerNorm(self.embed_dim) self.transformer = encoder.TransformerEncoder( num_layers=num_layers, model_size=self.embed_dim, inner_size=inner_size, key_size=key_size, value_size=value_size, num_head=num_head, dropout=dropout) self.Linear1 = nn.Linear(self.embed_dim, self.embed_dim // 3) self.norm2 = torch.nn.LayerNorm(self.embed_dim // 3) self.relu = torch.nn.LeakyReLU() self.drop = torch.nn.Dropout(dropout) self.Linear2 = nn.Linear(self.embed_dim // 3, num_classes) self.Linear = nn.Linear(self.embed_dim, num_classes) if id2words is None: self.Crf = CRF(num_classes, include_start_end_trans=False) else: self.Crf = CRF(num_classes, include_start_end_trans=False, allowed_transitions=allowed_transitions( id2words, encoding_type=encoding_type))
def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim, batch_size, max_len): super(BiLSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.batch_size = batch_size self.tag_to_ix = tag_to_ix self.tagset_size = len(tag_to_ix) self.max_len = max_len self.crf = CRF(len(tag_to_ix), batch_first=True) self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, num_layers=1, batch_first=True, bidirectional=True) # Maps the output of the LSTM into tag space. self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = nn.Parameter( torch.randn(self.tagset_size, self.tagset_size)) # These two statements enforce the constraint that we never transfer # to the start tag and we never transfer from the stop tag self.transitions.data[tag_to_ix["<START>"], :] = -10000 self.transitions.data[:, tag_to_ix["<STOP>"]] = -10000 self.hidden = self.init_hidden()
def create_NER_model(self): ner_model = Sequential() # keras_contrib 2.0.8, keras 2.2.5,下 当mask_zero=True 会报 # Tensors in list passed to 'values' of 'ConcatV2' Op have types [bool, float32] that don't all match.` # 错误。 # 改成keras 2.2.4 解决 embedding = Embedding(input_dim=VOCAB_SIZE, output_dim=EMBED_DIM, mask_zero=False, embeddings_initializer=constant( load_word2vec_embedding(config.vocab_size))) ner_model.add(embedding) ner_model.add(Masking(mask_value=config.src_padding, )) ner_model.add( Bidirectional( LSTM(BiRNN_UNITS // 2, return_sequences=True, dropout=DROPOUT_RATE))) crf = CRF(len(LABEL_DIC), sparse_target=True) ner_model.add(crf) # 以下两种损失和度量写法都可以 ner_model.compile(Adam(lr=LEARN_RATE, decay=1e-3), loss=crf_loss, metrics=[crf_accuracy]) # ner_model.compile(Adam(lr=LEARN_RATE), loss=crf.loss_function, metrics=[crf.accuracy]) return ner_model
def create_NER_model(self): # 注意,尽管可以设置seq_len=None,但是仍要保证序列长度不超过512 bert_model = load_trained_model_from_checkpoint(BERT_CONFIG, BERT_CKPT, seq_len=None) for layer in bert_model.layers: layer.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 1:])(x) # 取出每个单词对应的输出到CRF # 加入双向LSTM网络 x = Bidirectional( LSTM(BiRNN_UNITS // 2, return_sequences=True, return_state=False, dropout=DROPOUT_RATE))(x) # x = K.concatenate(x) x = Masking(mask_value=0, )(x) rst = CRF(len(LABEL_DIC), sparse_target=True)(x) ner_model = kerasModel([x1_in, x2_in], rst) ner_model.compile( loss=crf_loss, metrics=[crf_accuracy], optimizer=Adam(LEARN_RATE), # 用足够小的学习率 ) return ner_model
def __init__(self, data, circul_time, deepth): super(SeqModel_circulationBiLSTM, self).__init__() self.use_crf = data.use_crf self.use_trans = data.use_trans self.use_mapping = data.use_mapping print "build network..." print "use_char: ", data.use_char if data.use_char: print "char feature extractor: ", data.char_seq_feature print "use_trans: ", data.use_trans print "word feature extractor: ", data.word_feature_extractor print "use crf: ", self.use_crf self.gpu = data.gpu self.average_batch = data.average_batch_loss # add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.word_hidden = WordSequence_circulationBiLSTM( data, circul_time, deepth) if self.use_crf: self.crf = CRF(label_size, self.gpu)
def __init__(self): super().__init__() self.bert = BertModel.from_pretrained('bert_base/') if args.bert_freeze: for param in self.bert.parameters(): param.requires_grad = False self.lstm = BiLSTM( input_size=args.bert_hidden_size + args.cnn_output_size, hidden_size=args.rnn_hidden_size + args.cnn_output_size, num_layers=args.rnn_num_layers, num_dirs=args.rnn_num_dirs) self.lstm_dropout = nn.Dropout(p=args.rnn_dropout) self.cnn = CharCNN(embedding_num=len(CHAR_VOCAB), embedding_dim=args.cnn_embedding_dim, filters=eval(args.cnn_filters), output_size=args.cnn_output_size) self.crf = CRF(target_size=len(VOCAB) + 2, use_cuda=args.crf_use_cuda) self.linear = nn.Linear(in_features=args.rnn_hidden_size + args.cnn_output_size, out_features=len(VOCAB) + 2) self.attn = MultiHeadAttention(model_dim=args.rnn_hidden_size + args.cnn_output_size, num_heads=args.attn_num_heads, dropout=args.attn_dropout) self.feat_dropout = nn.Dropout(p=args.feat_dropout)
def create_model(vocab_size, max_length, embedding_dim, word_index, tag_index): embeddings_index = {} with io.open('static/glove.6B.100d.txt', 'r', encoding='utf-8') as f: for line in f: values = line.strip().split() curr_word = values[0] coefs = np.asarray(values[1:], dtype='float64') embeddings_index[curr_word] = coefs embeddings_matrix = np.zeros((vocab_size, embedding_dim)) for word, i in word_index.items(): if i > vocab_size: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embeddings_matrix[i] = embedding_vector model = Sequential() model.add( Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length, weights=[embeddings_matrix], mask_zero=True)) model.add( Bidirectional( LSTM(units=embedding_dim, return_sequences=True, recurrent_dropout=0.01))) model.add(TimeDistributed(Dense(len(tag_index)))) # model.add(Activation('relu')) crf = CRF(len(tag_index), sparse_target=True) model.add(crf) model.compile(optimizer='adam', loss=crf.loss, metrics=[crf.accuracy]) model.summary() return model
def __init__(self, config): super(BertNer, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.crf = CRF(config.num_labels) self.init_weights()
def __init__(self, word2id, char2id, tag2id, pretrain_embedding, embed_dim, char_embed_dim, n_hidden): super(LSTMTagger, self).__init__() self.word2id = word2id #通过预训练emdedding得到的word字典 self.char2id = char2id self.tag2id = tag2id self.word_num = len(word2id) self.char_num = len(char2id) self.tag_num = len(tag2id) self.embed_dim = embed_dim self.embedding = torch.nn.Embedding.from_pretrained( torch.FloatTensor(pretrain_embedding), freeze=False) #加载预训练embedding矩阵并设置为可变 # self.pre_embedding = nn.Embedding(self.word_num,self.embedding_dim) self.clstm = CharLSTM( chrdim=self.char_num, embdim=embed_dim, char_embed=char_embed_dim, ) self.wlstm = nn.LSTM(input_size=embed_dim + char_embed_dim, hidden_size=n_hidden // 2, num_layers=1, batch_first=True, bidirectional=True) self.out = nn.Linear(n_hidden, self.tag_num) self.crf = CRF(self.tag_num) self.drop = nn.Dropout()
def __init__(self, args): super(BertBiLstmCrf, self).__init__(args) self.args = args self.vector_path = args.vector_path # 预训练词向量的路径 txt self.embedding_dim = args.embedding_dim self.hidden_dim = args.hidden_dim # 隐藏层 self.tag_num = args.tag_num self.batch_size = args.batch_size self.bidirectional = True # BiLstm self.num_layers = args.num_layers self.pad_index = args.pad_index self.dropout = args.dropout # 训练时网络中连接 dropout 的概率 self.save_path = args.save_path embedding_dimension = args.bert_embedding_dim self.embedding = BertModel( config=BertConfig.from_json_file(args.bert_config_json)).to(DEVICE) self.embedding.load_state_dict(torch.load(args.bert_weight)) self.drop = nn.Dropout(0.5) self.lstm = nn.LSTM(embedding_dimension, self.hidden_dim, bidirectional=self.bidirectional, num_layers=self.num_layers, dropout=self.dropout).to(DEVICE) # hidden 除以 2 是因为,双向lstm输出的时候会翻倍,不除以二改成下面的 linear层中 hidden*2 也行 self.linear1 = nn.Linear(self.hidden_dim * 2, self.hidden_dim).to(DEVICE) self.lin_drop = nn.Dropout(0.5) self.linear2 = nn.Linear(self.hidden_dim, self.tag_num + 2).to( DEVICE) # 隐藏层到 label 的线性转换,即维度变换。 self.crf_layer = CRF(self.tag_num).to(DEVICE)
def do_infer(args): config = ConfigParser() config.read_file(args.config) model = CRF(config) reader = csv.reader(args.input, delimiter='\t') header = next(reader) assert all(w in header for w in ["id", "words", "lemmas", "pos_tags", "doc_char_begin", "doc_char_end", "gloss"]), "Input doesn't have required annotations." Sentence = namedtuple('Sentence', header) def parse_input(row): sentence = Sentence(*row) words, lemmas, pos_tags = [parse_psql_array(arr) for arr in (sentence.words, sentence.lemmas, sentence.pos_tags)] return sentence._replace(words=words, lemmas=lemmas, pos_tags=pos_tags) writer = csv.writer(args.output, delimiter='\t') writer.writerow([ 'id', 'speaker_token_begin', 'speaker_token_end', 'cue_token_begin', 'cue_token_end', 'content_token_begin', 'content_token_end', 'content_tokens', 'speaker', 'cue', 'content']) for sentences in tqdm(grouper(map(parse_input, reader), args.batch_size)): conll = [zip(s.words, s.lemmas, s.pos_tags) for s in sentences] for sentence, tags in zip(sentences, model.infer(conll)): if "SPKR" not in tags or "CTNT" not in tags: continue writer.writerow([sentence.id,] + extract_quote_entries(sentence, tags))
def __init__(self, nwords, nchars, ntags, pretrained_list): super().__init__() # Create word embeddings pretrained_tensor = torch.FloatTensor(pretrained_list) self.word_embedding = torch.nn.Embedding.from_pretrained( pretrained_tensor, freeze=False) # Create input dropout parameter self.word_dropout = torch.nn.Dropout(1 - KEEP_PROB) # Create LSTM parameters self.lstm = torch.nn.LSTM(DIM_EMBEDDING + CHAR_LSTM_HIDDEN, LSTM_HIDDEN, num_layers=LSTM_LAYER, batch_first=True, bidirectional=True) # Create output dropout parameter self.lstm_output_dropout = torch.nn.Dropout(1 - KEEP_PROB) # Character-level LSTMs self.char_embedding = torch.nn.Embedding(nchars, CHAR_DIM_EMBEDDING) self.char_lstm = torch.nn.LSTM(CHAR_DIM_EMBEDDING, CHAR_LSTM_HIDDEN, num_layers=1, batch_first=True, bidirectional=False) # Create final matrix multiply parameters self.hidden_to_tag = torch.nn.Linear(LSTM_HIDDEN * 2, ntags + 2) self.crf = CRF(target_size=ntags)
def five_two(): '''implement your experiments for question 5.2 here''' file = open('Q5_2.txt', 'w') crf_test = CRF(L=CHARS, F=321) W_F = np.load('W_F_{}.npy'.format(7), 'r') W_T = np.load('W_T_{}.npy'.format(7), 'r') crf_test.set_params(W_F, W_T) Y_gen = [] X_gen = [] samples_per_length = 50 for length in range(1, 21): Y_gen.append(np.random.choice(CHARS, [samples_per_length, length])) X_gen.append( np.random.randint(2, size=(samples_per_length, length, 321))) t0 = time.time() for x, y in zip(X_gen[length - 1], Y_gen[length - 1]): predictions = crf_test.predict(x) t1 = time.time() print('Average time to predict ', samples_per_length, 'samples of length ', length, 'is', (t1 - t0) / samples_per_length) file.write( str(length) + ',' + str((t1 - t0) / samples_per_length) + '\n') file.close() pass
def __init__( self, nwords, nchars, ntags, pretrained_list, run_name, exp_name, list_of_possible_tags, use_char=True, use_crf=False, class_weights=[], learning_rate=0.015, learning_decay_rate=0.05, weight_decay=1e-8, ): super().__init__() self.run_name = run_name self.exp_name = exp_name self.class_weights = torch.Tensor(class_weights) # Create word embeddings pretrained_tensor = torch.FloatTensor(pretrained_list) self.word_embedding = torch.nn.Embedding.from_pretrained( pretrained_tensor, freeze=False) self.list_of_possible_tags = list_of_possible_tags # Create input dropout parameter # self.word_dropout = torch.nn.Dropout(1 - KEEP_PROB) char_lstm_hidden = 0 self.use_char = use_char if self.use_char: # Character-level LSTMs self.char_embedding = torch.nn.Embedding(nchars, CHAR_DIM_EMBEDDING) self.char_lstm = torch.nn.LSTM( CHAR_DIM_EMBEDDING, CHAR_LSTM_HIDDEN, num_layers=1, batch_first=True, bidirectional=True, ) char_lstm_hidden = CHAR_LSTM_HIDDEN # Create LSTM parameters self.lstm = torch.nn.LSTM( DIM_EMBEDDING + char_lstm_hidden, LSTM_HIDDEN, num_layers=LSTM_LAYER, batch_first=True, bidirectional=True, ) # Create output dropout parameter self.lstm_output_dropout = torch.nn.Dropout(1 - KEEP_PROB) # Create final matrix multiply parameters self.hidden_to_tag = torch.nn.Linear(LSTM_HIDDEN * 2, ntags) self.ntags = ntags self.use_crf = use_crf if self.use_crf: self.crf = CRF(target_size=ntags)
def __init__(self, words_num, embed_dim, hidden_dim, num_layers, out_class, word2idx, dropout=0.2, bi_direction=True): super(LSTMCRF, self).__init__() self.word2idx = word2idx self.bi_direction = bi_direction self.hidden_dim = hidden_dim self.embed_layer = nn.Embedding(words_num, embed_dim) if bi_direction: self.rnn = nn.LSTM(embed_dim, hidden_dim // 2, num_layers=num_layers, bidirectional=True) else: self.rnn = nn.LSTM(embed_dim, hidden_dim, num_layers=num_layers, bidirectional=False) self.fc = nn.Linear(hidden_dim, out_class) self.crf = CRF(out_class)
def __init__(self, vocab_tag, char_embed_size, num_hidden_layer, channel_size, kernel_size, dropout_rate=0.2): super(CharWordSeg, self).__init__() self.vocab_tag = vocab_tag self.char_embed_size = char_embed_size self.num_hidden_layer = num_hidden_layer self.channel_size = channel_size self.kernel_size = kernel_size self.dropout_rate = dropout_rate num_tags = len(self.vocab_tag['tag_to_index']) vocab_size = len(self.vocab_tag['token_to_index']) self.char_embedding = nn.Embedding(vocab_size, char_embed_size) self.dropout_embed = nn.Dropout(dropout_rate) self.glu_layers = nn.ModuleList([ ConvGLUBlock(in_channels=char_embed_size, out_channels=channel_size, kernel_size=kernel_size, drop_out=0.2, padding=1) ] + [ ConvGLUBlock(in_channels=channel_size, out_channels=channel_size, kernel_size=kernel_size, drop_out=0.2, padding=1) for _ in range(num_hidden_layer - 1) ]) self.hidden_to_tag = nn.Linear(char_embed_size, num_tags) self.crf_layer = CRF(num_tags, batch_first=True)
def __init__(self, data): super(SeqModel, self).__init__() self.use_crf = data.use_crf print "build network..." print "use_char: ", data.use_char if data.use_char: print "char feature extractor: ", data.char_feature_extractor print "word feature extractor: ", data.word_feature_extractor print "use crf: ", self.use_crf self.gpu = data.HP_gpu self.average_batch = data.average_batch_loss ## add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size # data.label_alphabet_size += 2 # self.word_hidden = WordSequence(data, False, True, data.use_char) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(data.HP_hidden_dim, label_size + 2) if self.use_crf: self.crf = CRF(label_size, self.gpu) if torch.cuda.is_available(): self.hidden2tag = self.hidden2tag.cuda(self.gpu) self.frozen = False
def __init__(self, data): super(SeqModel, self).__init__() self.use_crf = data.use_crf print "build network..." print "use_char: ", data.use_char if data.use_char: print "char feature extractor: ", data.char_feature_extractor print "word feature extractor: ", data.word_feature_extractor print "use crf: ", self.use_crf self.gpu = data.HP_gpu self.average_batch = data.average_batch_loss label_size = {} for idtask in range(data.HP_tasks): label_size[idtask] = data.label_alphabet_sizes[idtask] data.label_alphabet_sizes[idtask] += 2 self.word_hidden = WordSequence(data) if self.use_crf: self.crf = {} self.crf = { idtask: CRF(label_size[idtask], self.gpu) for idtask in range(data.HP_tasks) } self.data = data self.tasks_weights = self.data.HP_tasks_weights
def __init__(self, dicts, config): super(EntityDetection, self).__init__() self.config = config self.embed = Embeddings(word_vec_size=config.d_embed, dicts=dicts) if self.config.rnn_type.lower() == 'gru': self.rnn = nn.GRU(input_size=config.d_embed, hidden_size=config.d_hidden, num_layers=config.n_layers, dropout=config.dropout_prob, bidirectional=config.birnn) else: self.rnn = nn.LSTM(input_size=config.d_embed, hidden_size=config.d_hidden, num_layers=config.n_layers, dropout=config.dropout_prob, bidirectional=config.birnn) self.dropout = nn.Dropout(p=config.dropout_prob) self.relu = nn.ReLU() seq_in_size = config.d_hidden if self.config.birnn: seq_in_size *= 2 self.hidden2tag = nn.Sequential( nn.Linear(seq_in_size, seq_in_size), nn.BatchNorm1d(seq_in_size), self.relu, self.dropout, nn.Linear(seq_in_size, config.n_out) ) self.crf=CRF(config.n_out)
def __init__(self, hidden_size: int, output_size: int, num_layers: int=1, bidirectional: bool=False, dropout_p: float=0.1, device: str="cpu", weights: Optional=None, num_embeddings: Optional=None, embedding_dim: Optional=None): super(NERTagger, self).__init__() if weights is not None: self.embedding = nn.Embedding.from_pretrained(weights, padding_idx=PAD_IDX) else: self.embedding = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=PAD_IDX) self.hidden_size = hidden_size self.output_size = output_size self.num_layers = num_layers self.dropout_p = dropout_p self.bidirectional = bidirectional self.device = device self.dropout = nn.Dropout(p=dropout_p) self.lstm = nn.LSTM(input_size=self.embedding.embedding_dim, hidden_size=hidden_size, bidirectional=bidirectional, num_layers=num_layers, batch_first=True) if self.bidirectional: hidden_size = 2 * hidden_size self.crf = CRF(hidden_size, output_size, device=device)
def __init__(self, config): super(BertCrfForNer, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.crf = CRF(num_tags=config.num_labels, batch_first=True) self.init_weights()
def gru_layers(x, batch, n_fin, n_h, n_y, n_layers=1): params = [] for i in xrange(n_layers): if i == 0: layer = GRU(n_i=n_fin, n_h=n_h) layer_input = relu(T.dot(x.dimshuffle(1, 0, 2), layer.W)) # h0: 1D: Batch, 2D: n_h h0 = T.zeros((batch, n_h), dtype=theano.config.floatX) else: layer = GRU(n_i=n_h * 2, n_h=n_h) # h: 1D: n_words, 2D: Batch, 3D n_h layer_input = relu( T.dot(T.concatenate([layer_input, h], 2), layer.W))[::-1] h0 = layer_input[0] xr = T.dot(layer_input, layer.W_xr) xz = T.dot(layer_input, layer.W_xz) xh = T.dot(layer_input, layer.W_xh) h, _ = theano.scan(fn=layer.forward, sequences=[xr, xz, xh], outputs_info=[h0]) params.extend(layer.params) layer = CRF(n_i=n_h * 2, n_h=n_y) params.extend(layer.params) h = relu(T.dot(T.concatenate([layer_input, h], 2), layer.W)) if n_layers % 2 == 0: emit = h[::-1] else: emit = h return params, layer, emit
def __init__(self, base_path, oov, num_labels, lstm_hidden_size=128, dropout=0.3, lm_flag=False): super(Bert_CRF, self).__init__() bert_config = BertConfig.from_json_file( os.path.join(base_path, 'config.json')) bert_config.num_labels = num_labels #hidden_states (tuple(torch.FloatTensor), optional, returned when config.output_hidden_states=True): bert_config.output_hidden_states = True bert_config.output_attentions = True self.bert = BertModel.from_pretrained(os.path.join( base_path, 'pytorch_model.bin'), config=bert_config) self.tokenizer = tokenizer self.oov = oov self._oov_embed() self.dropout = nn.Dropout(dropout) #lstm input_size = bert_config.hidden_size hidden_size(第二个参数)= 跟Linear 的第一个参数对上 # 尝试下双向LSTM self.lm_flag = lm_flag self.lstm = nn.LSTM(bert_config.hidden_size, lstm_hidden_size, num_layers=1, bidirectional=True, dropout=0.3, batch_first=True) self.clf = nn.Linear(256, bert_config.num_labels + 2) self.layer_norm = nn.LayerNorm(lstm_hidden_size * 2) self.crf = CRF(target_size=bert_config.num_labels, average_batch=True, use_cuda=True)
def __init__(self, opt, tag2label): super(Bilstm_crf, self).__init__() self.embedding_length = opt.embedding_length self.hidden_size = opt.hidden_size self.output_size = len(tag2label) self.batch_size = opt.batch_size self.vocab_size = opt.vocab_size self.dropout = opt.dropout self.dropout_embed = nn.Dropout(opt.dropout) self.word_embeddings = nn.Embedding(self.vocab_size, self.embedding_length) self.word_embeddings.weight.data.copy_(torch.from_numpy( opt.embeddings)) self.dropout_embed = nn.Dropout(opt.dropout) self.lstm = nn.LSTM(self.embedding_length, self.hidden_size, bidirectional=True, dropout=opt.dropout) if self.lstm.bidirectional: self.label = nn.Linear(self.hidden_size * 2, self.output_size) else: self.label = nn.Linear(self.hidden_size, self.output_size) self.crf = CRF(self.output_size)
def extractPhraseFromCRFWithColor(phrasedir, systemdir): crf_reader = CRF() aligner = AlignPhraseAnnotation() lectures = annotation.Lectures for i, lec in enumerate(lectures): path = phrasedir + str(lec) + '/' fio.NewPath(path) for prompt in ['q1', 'q2']: filename = path + prompt + '.' + method + '.key' extracted_phrases = [] extracted_colors = [] crf_file = os.path.join(systemdir, 'extraction', 'all_output', 'test_%i_%s.out' % (i, prompt)) for tokens, tags, color0, color1 in crf_reader.read_file_generator_index( crf_file, [0, -1, -4, -3]): phrases, phrase_colors = aligner.get_phrase_with_colors( tokens, tags, [color0, color1]) for phrase, phrase_color in zip(phrases, phrase_colors): extracted_phrases.append(phrase.lower()) extracted_colors.append(phrase_color) fio.SaveList(extracted_phrases, filename) filename = path + prompt + '.' + method + '.key.color' fio.SaveDict2Json(extracted_colors, filename)
def crf_model(cfg): x, inputs = return_input(cfg) mask = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))(inputs['word']) x = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))([x, mask]) x = Bidirectional(CuDNNGRU(cfg['unit1'], return_sequences=True, name='gru1'), merge_mode='sum')(x) x = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))([x, mask]) x = SpatialDropout1D(0.3)(x) x = Bidirectional(CuDNNGRU(cfg['unit2'], return_sequences=True, name='gru2'), merge_mode='sum')(x) x = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))([x, mask]) x = SpatialDropout1D(0.15)(x) crf = CRF(cfg['num_tags'], sparse_target=True,name='crf') output = crf(x, mask=mask) model = Model(inputs=list(inputs.values()), outputs=[output]) multipliers = { 'emb':0.1, 'embbound':0.1, 'embpos':0.1, # 'crf':cfg['lr_crf'], 'gru1':cfg['lr_layer1'] } model.compile(optimizer=M_Nadam(cfg['lr'],multipliers=multipliers), loss=crf.loss_function) return model
def __init__(self, data): super(BiLSTM_CRF, self).__init__() print "build batched lstmcrf..." self.gpu = data.HP_gpu label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu)
def __init__(self, data, opt): super(SeqModel, self).__init__() self.gpu = opt.gpu ## add two more label for downlayer lstm, use original label size for CRF self.word_hidden = WordSequence(data, opt) self.crf = CRF(data.label_alphabet.size(), self.gpu)