def __init__(self, config): self.config = config self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]] self.embedding_size = sum(size for size, _ in self.embedding_info) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.embedding_dicts = [ util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"] ] self.max_mention_width = config["max_mention_width"] self.max_context_width = config["max_context_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} self.eval_data = None # Load eval data lazily. input_props = [] input_props.append( (tf.float32, [None, None, self.embedding_size])) # Text embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay( self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): self.config = config self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]] self.embedding_size = sum(size for size, _ in self.embedding_info) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.embedding_dicts = [util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"]] self.max_mention_width = config["max_mention_width"] self.genres = { g:i for i,g in enumerate(config["genres"]) } self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.float32, [None, None, self.embedding_size])) # Text embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam" : tf.train.AdamOptimizer, "sgd" : tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
char_index = char_index[sentence_offset:sentence_offset + max_training_sentences, :, :] text_len = text_len[sentence_offset:sentence_offset + max_training_sentences] speaker_ids = speaker_ids[word_offset: word_offset + num_words] gold_spans = np.logical_and(gold_ends >= word_offset, gold_starts < word_offset + num_words) gold_starts = gold_starts[gold_spans] - word_offset gold_ends = gold_ends[gold_spans] - word_offset cluster_ids = cluster_ids[gold_spans] return word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids if __name__ == "__main__": objective = "train" config = util.get_config("experiments.conf")['best'] word_embeddings = [util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"]] train_dataset = TrainCorefDataset(config, word_embeddings, objective) res = "" mention_dict = {} for i in range(train_dataset.length): example = train_dataset.train_examples[i] flat_list = [item for sublist in example["sentences"] for item in sublist] clean_flat_list = flat_list[:] for j, cluster in enumerate(example["clusters"]): words = "Cluster {}: [".format(j) for mention in cluster: word = "" flag = False for k in range(mention[0], mention[1] + 1):
def __init__(self, config): self.config = config self.pos_tag_dict = util.load_pos_tags(config["pos_tag_path"]) self.ner_tag_dict = util.load_pos_tags(config["ner_tag_path"]) self.categories_dict = util.load_pos_tags(config["categories_path"]) self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]] self.embedding_size = sum(size for size, _ in self.embedding_info) # 350 self.char_embedding_size = config["char_embedding_size"] self.glove_embedding_size = 300 self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.l = float(config["l"]) print "l value:", self.l print "l adapted:", self.config["l_adapted"] # glove and turian self.embedding_dicts = [util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"]] # glove only glove_emb = config["embeddings"][0] self.glove_embedding_dict = util.load_embedding_dict(glove_emb["path"], glove_emb["size"], glove_emb["format"]) self.max_mention_width = config["max_mention_width"] self.genres = { g:i for i,g in enumerate(config["genres"]) } self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.float32, [None, None, self.embedding_size])) # Text embeddings. --> sentences x words x embedding size input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. input_props.append((tf.float32, [None, None, len(self.pos_tag_dict)])) # POS tags --> sentences x tags input_props.append((tf.float32, [None, None, len(self.ner_tag_dict)])) # NER indicator variable input_props.append((tf.float32, [None, None, len(self.categories_dict)])) # categories input_props.append((tf.int32, [None])) # NER IDs. # matching speakers input_props.append((tf.float32, [None, None, self.glove_embedding_size])) # categories with glove embeddings # DOMAIN ADAPTATION THING input_props.append((tf.float32, [len(self.genres)])) # domain labels input_props.append((tf.float32, [])) # l self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss, self.domain_loss, self.domain_predictions, self.values = self.get_predictions_and_loss(*self.input_tensors) # self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) self.total_loss = self.loss + self.domain_loss trainable_params = tf.trainable_variables() # gradients = tf.gradients(self.loss, trainable_params) gradients = tf.gradients(self.total_loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam" : tf.train.AdamOptimizer, "sgd" : tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): self.config = config self.embedding_info = [ (emb["size"], emb["lowercase"]) for emb in config["embeddings"] ] #[(300,false)(50,false)] self.embedding_size = sum( size for size, _ in self.embedding_info) #350 = 300+50 self.char_embedding_size = config["char_embedding_size"] #8 self.char_dict = util.load_char_dict( config["char_vocab_path"]) #all characters + <unk> size 115 self.embedding_dicts = [ util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"] ] #dictionary [(43994?,300)(268822,50)] self.max_mention_width = config["max_mention_width"] #10 self.genres = {g: i for i, g in enumerate(config["genres"]) } #types of corpus documents #(news = nw, conversational telephone speech=tc, weblogs=wb, usenet newsgroups, broadcast=bc, talk shows) #[bc, bn, mz, nw, pt, tc, wb] self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.FloatTensor, [None, None, self.embedding_size ])) # Text embeddings. [?,?,350] input_props.append((tf.IntTensor, [None, None, None])) # Character indices. input_props.append((tf.IntTensor, [None])) # Text lengths. input_props.append((tf.IntTensor, [None])) # Speaker IDs. input_props.append((tf.IntTensor, [])) # Genre. input_props.append((tf.ByteTensor, [])) # Is training. input_props.append((tf.IntTensor, [None])) # Gold starts. input_props.append((tf.IntTensor, [None])) # Gold ends. input_props.append((tf.IntTensor, [None])) # Cluster ids. self.queue_input_tensors = [ tf.zeros(shape).type(dtype) for dtype, shape in input_props ] # dtypes, shapes = zip(*input_props) # queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) # self.enqueue_op = queue.enqueue(self.queue_input_tensors) # self.input_tensors = queue.dequeue() self.input_tensors = self.queue_input_tensors #9 items from input_props that are split when calling get_prediction_and_loss # this is the training step more or less self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) self.global_step = tf.zeros( ) #.Variable(0, name="global_step", trainable=False) # self.reset_global_step = tf.assign(self.global_step, 0) #here you update something based on yout prediction and loss trainable_params = autograd.Variable( 0 ) #this is equivalent to model.parameters() tf.trainable_variables() gradients = tf.gradients( self.loss, trainable_params) #this is autograd backward pass # Constructs symbolic derivatives of sum of self.loss w.r.t. x in trainable_params gradients, _ = nn.utils.clip_grad_norm( gradients, self.config["max_gradient_norm"]) optimizers = { "adam": optim.Adam(trainable_params, lr=self.config["learning_rate"], weight_decay=self.config["decay_rate"]), "sgd": optim.SGD(trainable_params, lr=self.config["learning_rate"], weight_decay=self.config["decay_rate"]) } optimizer = optimizers[self.config["optimizer"]] learning_rate = optim.lr_scheduler.ExponentialLR( optimizer, gamma=self.config["decay_frequency"]) learning_rate.step()
if __name__ == "__main__": config = util.get_config("experiments.conf")['best'] device = -1 model = cm.CorefModel(config) if device >= 0: model = model.cuda(device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] parameters = [param for name, param in parameters] optimizer = optim.Adam(parameters, lr=config["learning_rate"]) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, mode="max", patience=2) word_embeddings = [ util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"] ] train_dataset = cmdata.TrainCorefDataset(config, word_embeddings, "train") val_dataset = cmdata.TrainCorefDataset(config, word_embeddings, "test") trainer = Trainer(model=model, optimizer=optimizer, train_dataset=train_dataset, validation_dataset=val_dataset, patience=10, validation_metric="+coref_f1", num_epochs=15, cuda_device=device, grad_norm=config["max_gradient_norm"], grad_clipping=config["max_gradient_norm"], learning_rate_scheduler=None)