def add_model_specific_valuables(self, config): self.context_embeddings = util.EmbeddingDictionary( config["context_embeddings"]) self.head_embeddings = util.EmbeddingDictionary( config["head_embeddings"], maybe_cache=self.context_embeddings) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.max_span_width = config["max_span_width"] if config["lm_path"]: self.lm_file = h5py.File(self.config["lm_path"], "r") else: self.lm_file = None self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.string, [None, None])) # Tokens. input_props.append( (tf.float32, [None, None, self.context_embeddings.size ])) # Context embeddings. input_props.append( (tf.float32, [None, None, self.head_embeddings.size])) # Head embeddings. input_props.append( (tf.float32, [None, None, self.lm_size, self.lm_layers])) # LM embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. return input_props
def __init__(self, config): self.config = config self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]] self.embedding_size = sum(size for size, _ in self.embedding_info) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.embedding_dicts = [ util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"] ] self.max_mention_width = config["max_mention_width"] self.max_context_width = config["max_context_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} self.eval_data = None # Load eval data lazily. input_props = [] input_props.append( (tf.float32, [None, None, self.embedding_size])) # Text embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay( self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary(config["context_embeddings"]) self.head_embeddings = util.EmbeddingDictionary(config["head_embeddings"], maybe_cache=self.context_embeddings) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.max_span_width = config["max_span_width"] self.genres = { g:i for i,g in enumerate(config["genres"]) } if config["lm_path"]: self.lm_file = h5py.File(self.config["lm_path"], "r") else: self.lm_file = None self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.eval_data = None # Load eval data lazily. self.scene_emb_size = self.config['scene_emb_size'] if (self.config['use_video']): self.scene_embedding = util.load_scene_embedding(config["scene_embedding_dir"]) input_props = [] input_props.append((tf.string, [None, None])) # Tokens. input_props.append((tf.float32, [None, None, self.context_embeddings.size])) # Context embeddings. input_props.append((tf.float32, [None, None, self.head_embeddings.size])) # Head embeddings. input_props.append((tf.float32, [None, None, self.lm_size, self.lm_layers])) # LM embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. input_props.append((tf.float32, [None, self.scene_emb_size])) # Video Scene Embedding input_props.append((tf.int32, [None])) # Token Genders input_props.append((tf.int32, [None])) # Token is First Pronoun self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.global_step = tf.Variable(0, name="global_step", trainable=False) self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam" : tf.train.AdamOptimizer, "sgd" : tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): super(CorefModel, self).__init__() self.config = config self.config = config self.embedding_info = [ (emb["size"], emb["lowercase"]) for emb in config["embeddings"] ] # [(300,false)(50,false)] self.embedding_size = sum( size for size, _ in self.embedding_info) # 350 = 300+50 self.char_embedding_size = config["char_embedding_size"] # 8 self.char_dict = util.load_char_dict( config["char_vocab_path"]) # all characters + <unk> size 115 self.max_mention_width = config["max_mention_width"] # 10 self.genres = {g: i for i, g in enumerate(config["genres"])} self.dropout = nn.Dropout(self.config["dropout_rate"]) # 0.2 self.lexical_dropout = nn.Dropout( self.config["lexical_dropout_rate"]) # 0.5 self.char_embeddings = nn.Embedding(115, 8) self.char_cnn = CNN() self.bilstm = nn.LSTM(input_size=500, hidden_size=200, num_layers=1, dropout=0.2, bidirectional=True) self._endpoint_span_extractor = EndpointSpanExtractor( 800, combination="x,y", num_width_embeddings=10, span_width_embedding_dim=20, bucket_widths=False) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=400) self.genre_emb = nn.Embedding(len(self.genres), self.config["feature_size"]) # self.mention_width_emb = nn.Embedding(self.config["max_mention_width"], self.config["feature_size"]) # self.head_scores = nn.Linear(400, 1) self.mention = SpanPruner(FFNNMention()) self.same_speaker_emb = nn.Embedding(2, self.config["feature_size"]) self.mention_distance_emb = nn.Embedding(10, self.config["feature_size"]) self.antecedent = FFNNAntecedent() self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() self._regularizer = None self.weights_init(self.char_cnn.parameters()) self.hidden = self.bilstm_init(self.bilstm.hidden_size) self.weights_init(self.mention.parameters()) self.weights_init(self.antecedent.parameters())
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary(config["context_embeddings"]) self.context_embeddings_size = self.context_embeddings.size self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) if self.config["lm_path"].lower() == "none": self.lm_file = None else: self.lm_file = h5py.File(self.config["lm_path"], "r") self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.eval_data = None # Load eval data lazily. self.ner_types = self.config['ner_types'] self.ner_maps = {ner: (i + 1) for i, ner in enumerate(self.ner_types)} self.num_types = len(self.ner_types) input_props = [] input_props.append((tf.string, [None, None])) # Tokens. input_props.append((tf.float32, [None, None, self.context_embeddings_size])) # Context embeddings. input_props.append((tf.float32, [None, None, self.lm_size, self.lm_layers])) # LM embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold NER Label self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss(self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): super(CorefModel, self).__init__() self.config = config self.config = config self.embedding_info = [ (emb["size"], emb["lowercase"]) for emb in config["embeddings"] ] # [(300,false)(50,false)] self.embedding_size = sum( size for size, _ in self.embedding_info) # 350 = 300+50 self.char_embedding_size = config["char_embedding_size"] # 8 self.char_dict = util.load_char_dict( config["char_vocab_path"]) # all characters + <unk> size 115 self.max_mention_width = config["max_mention_width"] # 10 self.genres = {g: i for i, g in enumerate(config["genres"])} self.char_embeddings = nn.Parameter( torch.randn( [len(self.char_dict), self.config["char_embedding_size"]])) self.char_cnn = CNN() # TODO check if the input to the BILSTM should be a pack(_padded)_sequence so that minibatches can be used self.bilstm = nn.LSTM(input_size=500, hidden_size=200, num_layers=1, dropout=0.2, bidirectional=True) self.genre_tensor = nn.Parameter( torch.randn([len(self.genres), self.config["feature_size"]])) self.mention_width_tensor = nn.Parameter( torch.randn([ self.config["max_mention_width"], self.config["feature_size"] ])) self.head_scores = nn.Linear(400, 1) self.mention = FFNNMention() self.same_speaker_emb = nn.Parameter( torch.randn([2, self.config["feature_size"]])) self.mention_distance_emb = nn.Parameter( torch.zeros([10, self.config["feature_size"]])) self.antecedent = FFNNAntecedent() nn.init.xavier_uniform_(self.char_embeddings) self.weights_init(self.char_cnn.parameters()) self.hidden = self.bilstm_init(self.bilstm.hidden_size) nn.init.xavier_uniform_(self.genre_tensor) nn.init.xavier_uniform_(self.mention_width_tensor) self.weights_init(self.mention.parameters()) nn.init.xavier_uniform_(self.same_speaker_emb) nn.init.xavier_uniform_(self.mention_distance_emb) self.weights_init(self.antecedent.parameters())
def __init__(self, config, embedding_dicts, dataset="train"): self.config = config self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]] self.embedding_size = sum(size for size, _ in self.embedding_info) self.embedding_dicts = embedding_dicts self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.genres = {g: i for i, g in enumerate(config["genres"])} if dataset == "train": file_path = self.config["train_path"] elif dataset == "dev": file_path = self.config["dev_path"] elif dataset == "test": file_path = self.config["test_path"] else: raise ValueError("None of the specified keys exist: {} Select from: train,dev or test".format(dataset)) with open(file_path) as f: self.train_examples = [json.loads(jsonline) for jsonline in f.readlines()] random.shuffle(self.train_examples) self.length = len(self.train_examples)
def __init__(self, config): self.config = config self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]] self.embedding_size = sum(size for size, _ in self.embedding_info) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.embedding_dicts = [util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"]] self.max_mention_width = config["max_mention_width"] self.genres = { g:i for i,g in enumerate(config["genres"]) } self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.float32, [None, None, self.embedding_size])) # Text embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam" : tf.train.AdamOptimizer, "sgd" : tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): self.config = config self.embedding_info = [ (emb["size"], emb["lowercase"]) for emb in config["embeddings"] ] #[(300,false)(50,false)] self.embedding_size = sum( size for size, _ in self.embedding_info) #350 = 300+50 self.char_embedding_size = config["char_embedding_size"] #8 self.char_dict = util.load_char_dict( config["char_vocab_path"]) #all characters + <unk> size 115 self.embedding_dicts = [ util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"] ] #dictionary [(43994?,300)(268822,50)] self.max_mention_width = config["max_mention_width"] #10 self.genres = {g: i for i, g in enumerate(config["genres"]) } #types of corpus documents #(news = nw, conversational telephone speech=tc, weblogs=wb, usenet newsgroups, broadcast=bc, talk shows) #[bc, bn, mz, nw, pt, tc, wb] self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.FloatTensor, [None, None, self.embedding_size ])) # Text embeddings. [?,?,350] input_props.append((tf.IntTensor, [None, None, None])) # Character indices. input_props.append((tf.IntTensor, [None])) # Text lengths. input_props.append((tf.IntTensor, [None])) # Speaker IDs. input_props.append((tf.IntTensor, [])) # Genre. input_props.append((tf.ByteTensor, [])) # Is training. input_props.append((tf.IntTensor, [None])) # Gold starts. input_props.append((tf.IntTensor, [None])) # Gold ends. input_props.append((tf.IntTensor, [None])) # Cluster ids. self.queue_input_tensors = [ tf.zeros(shape).type(dtype) for dtype, shape in input_props ] # dtypes, shapes = zip(*input_props) # queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) # self.enqueue_op = queue.enqueue(self.queue_input_tensors) # self.input_tensors = queue.dequeue() self.input_tensors = self.queue_input_tensors #9 items from input_props that are split when calling get_prediction_and_loss # this is the training step more or less self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) self.global_step = tf.zeros( ) #.Variable(0, name="global_step", trainable=False) # self.reset_global_step = tf.assign(self.global_step, 0) #here you update something based on yout prediction and loss trainable_params = autograd.Variable( 0 ) #this is equivalent to model.parameters() tf.trainable_variables() gradients = tf.gradients( self.loss, trainable_params) #this is autograd backward pass # Constructs symbolic derivatives of sum of self.loss w.r.t. x in trainable_params gradients, _ = nn.utils.clip_grad_norm( gradients, self.config["max_gradient_norm"]) optimizers = { "adam": optim.Adam(trainable_params, lr=self.config["learning_rate"], weight_decay=self.config["decay_rate"]), "sgd": optim.SGD(trainable_params, lr=self.config["learning_rate"], weight_decay=self.config["decay_rate"]) } optimizer = optimizers[self.config["optimizer"]] learning_rate = optim.lr_scheduler.ExponentialLR( optimizer, gamma=self.config["decay_frequency"]) learning_rate.step()
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary( config["context_embeddings"]) self.head_embeddings = util.EmbeddingDictionary( config["head_embeddings"], maybe_cache=self.context_embeddings) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.max_span_width = config["max_span_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} if config["lm_path"]: self.config["lm_path"] = "/scratch/pp1953/dataset/elmo_cache.hdf5" self.lm_file = h5py.File(self.config["lm_path"], "r") else: self.lm_file = None self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.eval_data = None # Load eval data lazily. self.swag_train_dir = config["swag_train_dir"] self.swag_val_dir = config["swag_val_dir"] self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.string, [None, None])) # Tokens. input_props.append((tf.float32, [None, None, 1024])) # Context embeddings. input_props.append((tf.float32, [None, None, 1024])) # Head embeddings. input_props.append( (tf.float32, [None, None, self.lm_size, self.lm_layers])) # LM embeddings. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. # SWAG input_props.append((tf.float32, [None, None, 1024])) # sentence embeddings input_props.append((tf.int32, [None])) # text length input_props.append((tf.int32, [1, 5])) # the labe self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] self.swag_embeddings = iter([ f for f in listdir(self.swag_train_dir) if isfile(join(self.swag_train_dir, f)) ]) self.swag_test_embeddings = iter([ f for f in listdir(self.swag_val_dir) if isfile(join(self.swag_val_dir, f)) ]) dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.swag_predictions, self.multitask_loss1 = self.get_predictions_and_loss_cm( *self.input_tensors) self.multitask_loss1 = self.multitask_loss1 / 10 self.global_step1 = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step1 = tf.assign(self.global_step1, 0) learning_rate1 = tf.train.exponential_decay( self.config["learning_rate"], self.global_step1, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) optimizers1 = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer1 = optimizers1[self.config["optimizer"]](learning_rate1) self.predictions2, self.loss2 = self.get_predictions_and_loss( *self.input_tensors) self.loss = self.loss2 + self.multitask_loss1 trainable_params1 = tf.trainable_variables() gradients1 = tf.gradients(self.loss, trainable_params1) gradients1, _ = tf.clip_by_global_norm( gradients1, self.config["max_gradient_norm"]) self.train_op = optimizer1.apply_gradients( zip(gradients1, trainable_params1), global_step=self.global_step1)
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary( config["context_embeddings"]) self.head_embeddings = util.EmbeddingDictionary( config["head_embeddings"], maybe_cache=self.context_embeddings) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.lm_file = None self.lm_hub = None self.lm_layers = 0 # TODO: Remove these. self.lm_size = 0 if config["lm_path"]: if "tfhub" in config["lm_path"]: print "Using tensorflow hub:", config["lm_path"] self.lm_hub = hub.Module(config["lm_path"].encode("utf-8"), trainable=False) else: self.lm_file = h5py.File(self.config["lm_path"], "r") self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.adjunct_roles, self.core_roles = split_srl_labels( config["srl_labels"], config["include_c_v"]) self.srl_labels_inv = [""] + self.adjunct_roles + self.core_roles self.srl_labels = {l: i for i, l in enumerate(self.srl_labels_inv)} # IO Stuff. # Need to make sure they are in the same order as input_names + label_names self.input_props = [ (tf.string, [None]), # String tokens. (tf.float32, [None, self.context_embeddings.size ]), # Context embeddings. (tf.float32, [None, self.head_embeddings.size]), # Head embeddings. (tf.float32, [None, self.lm_size, self.lm_layers]), # LM embeddings. (tf.int32, [None, None]), # Character indices. (tf.int32, []), # Text length. (tf.int32, []), # Document ID. (tf.bool, []), # Is training. (tf.int32, [None]), # Gold predicate ids (for input). (tf.int32, []), # Num gold predicates (for input). (tf.int32, [None]), # Predicate ids (length=num_srl_relations). (tf.int32, [None]), # Argument starts. (tf.int32, [None]), # Argument ends. (tf.int32, [None]), # SRL labels. (tf.int32, []) # Number of SRL relations. ] self.input_names = _input_names self.label_names = _label_names self.predict_names = _predict_names self.batch_size = self.config["batch_size"] dtypes, shapes = zip(*self.input_props) if self.batch_size > 0 and self.config["max_tokens_per_batch"] < 0: # Use fixed batch size if number of words per batch is not limited (-1). self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in self.input_props ] queue = tf.PaddingFIFOQueue(capacity=self.batch_size * 2, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue_many(self.batch_size) else: # Use dynamic batch size. new_shapes = [[None] + shape for shape in shapes] self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in zip(dtypes, new_shapes) ] queue = tf.PaddingFIFOQueue(capacity=2, dtypes=dtypes, shapes=new_shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() num_features = len(self.input_names) self.input_dict = dict( zip(self.input_names, self.input_tensors[:num_features])) self.labels_dict = dict( zip(self.label_names, self.input_tensors[num_features:]))
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary( config["context_embeddings"]) self.head_embeddings = util.EmbeddingDictionary( config["head_embeddings"], maybe_cache=self.context_embeddings) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.max_span_width = config["max_span_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} if config["lm_path"]: self.lm_file = h5py.File(self.config["lm_path"], "r") else: self.lm_file = None self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.eval_data = None # Load eval data lazily. self.undersampling_probability = self.config[ "undersampling_probability"] self.second_undersampling_probability = self.config[ "second_undersampling_probability"] self.cross_validation_fold = self.config["cross_validation_fold"] self.skip_comparative_bridging = 'skip_comparative_bridging' in self.config and self.config[ 'skip_comparative_bridging'] input_props = [] input_props.append( (tf.float32, [None, None, self.context_embeddings.size ])) # Context embeddings. input_props.append( (tf.float32, [None, None, self.head_embeddings.size])) # Head embeddings. input_props.append( (tf.float32, [None, None, self.lm_size, self.lm_layers])) # LM embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. input_props.append( (tf.int32, [None])) # Bridging antecedent cluster ids input_props.append( (tf.int32, [None])) # IS status 0-DN 1-DO 2-Bridging input_props.append((tf.bool, [None])) # undersampling mask self.input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) learning_rate = tf.train.exponential_decay( self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary( config["context_embeddings"]) self.head_embeddings = util.EmbeddingDictionary( config["head_embeddings"], maybe_cache=self.context_embeddings) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.max_span_width = config["max_span_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} self.softmax_threshold = config['softmax_threshold'] if config["lm_path"]: self.lm_file = h5py.File(self.config["lm_path"], "r") else: self.lm_file = None self.lm_layers = self.config["lm_layers"] # 3 self.lm_size = self.config["lm_size"] # 1024 self.eval_data = None # Load eval data lazily. print('Start to load the eval data') st = time.time() if not config["predict"]: self.load_eval_data() print("Finished in {:.2f}".format(time.time() - st)) input_props = [] input_props.append((tf.string, [None, None])) # Tokens. input_props.append( (tf.float32, [None, None, self.context_embeddings.size ])) # Context embeddings. input_props.append( (tf.float32, [None, None, self.head_embeddings.size])) # Head embeddings. input_props.append( (tf.float32, [None, None, self.lm_size, self.lm_layers])) # LM embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [])) # pronoun lengths. input_props.append((tf.int32, [])) # name lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # gold_starts. input_props.append((tf.int32, [None])) # gold_ends. input_props.append((tf.int32, [None, None])) # number_features. input_props.append((tf.int32, [None, None])) # candidate_positions. input_props.append((tf.int32, [None, None])) # pronoun_positions. input_props.append((tf.int32, [None, None])) # name_position. input_props.append((tf.int32, [None, None])) # status_positions. input_props.append((tf.int32, [None, None])) # order_features. input_props.append((tf.bool, [None, None])) # labels input_props.append((tf.float32, [None, None])) # candidate_masks self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay( self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): self.config = config self.pos_tag_dict = util.load_pos_tags(config["pos_tag_path"]) self.ner_tag_dict = util.load_pos_tags(config["ner_tag_path"]) self.categories_dict = util.load_pos_tags(config["categories_path"]) self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]] self.embedding_size = sum(size for size, _ in self.embedding_info) # 350 self.char_embedding_size = config["char_embedding_size"] self.glove_embedding_size = 300 self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.l = float(config["l"]) print "l value:", self.l print "l adapted:", self.config["l_adapted"] # glove and turian self.embedding_dicts = [util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"]] # glove only glove_emb = config["embeddings"][0] self.glove_embedding_dict = util.load_embedding_dict(glove_emb["path"], glove_emb["size"], glove_emb["format"]) self.max_mention_width = config["max_mention_width"] self.genres = { g:i for i,g in enumerate(config["genres"]) } self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.float32, [None, None, self.embedding_size])) # Text embeddings. --> sentences x words x embedding size input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. input_props.append((tf.float32, [None, None, len(self.pos_tag_dict)])) # POS tags --> sentences x tags input_props.append((tf.float32, [None, None, len(self.ner_tag_dict)])) # NER indicator variable input_props.append((tf.float32, [None, None, len(self.categories_dict)])) # categories input_props.append((tf.int32, [None])) # NER IDs. # matching speakers input_props.append((tf.float32, [None, None, self.glove_embedding_size])) # categories with glove embeddings # DOMAIN ADAPTATION THING input_props.append((tf.float32, [len(self.genres)])) # domain labels input_props.append((tf.float32, [])) # l self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss, self.domain_loss, self.domain_predictions, self.values = self.get_predictions_and_loss(*self.input_tensors) # self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) self.total_loss = self.loss + self.domain_loss trainable_params = tf.trainable_variables() # gradients = tf.gradients(self.loss, trainable_params) gradients = tf.gradients(self.total_loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam" : tf.train.AdamOptimizer, "sgd" : tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary(config["context_embeddings"]) # TODO-Ahmed what is head embeddings self.head_embeddings = util.EmbeddingDictionary(config["head_embeddings"], maybe_cache=self.context_embeddings) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.lm_file = None self.lm_hub = None self.lm_layers = 0 # TODO: Remove these. self.lm_size = 0 # Not applied in the best experiment case if config["lm_path"]: if "tfhub" in config["lm_path"]: print "Using tensorflow hub:", config["lm_path"] #self.lm_hub = hub.Module(config["lm_path"].encode("utf-8"), trainable=False) else: # TODO-Ahmed investigate lm self.lm_file = h5py.File(self.config["lm_path"], "r") self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] # self.adjunct_roles, self.core_roles = split_srl_labels( # config["srl_labels"], config["include_c_v"]) # self.srl_labels_inv = [""] + self.adjunct_roles + self.core_roles # self.srl_labels = { l:i for i,l in enumerate(self.srl_labels_inv) } self.ner_labels = { l:i for i,l in enumerate([""] + config["ner_labels"]) } self.ner_labels_inv = [""] + config["ner_labels"] if "relation_labels" in config: self.rel_labels_inv = [""] + config["relation_labels"] if config["filter_reverse_relations"]: self.rel_labels_inv = [r for r in self.rel_labels_inv if "REVERSE" not in r] self.rel_labels = { l:i for i,l in enumerate(self.rel_labels_inv) } print "Filtered relations:", self.rel_labels else: self.rel_labels = None self.rel_labels_inv = None # IO Stuff. # Need to make sure they are in the same order as input_names + label_names self.input_props = [ (tf.string, [None]), # String tokens. (tf.float32, [None, self.context_embeddings.size]), # Context embeddings. (tf.float32, [None, self.head_embeddings.size]), # Head embeddings. (tf.float32, [None, self.lm_size, self.lm_layers]), # LM embeddings. (tf.int32, [None, None]), # Character indices. (tf.int32, []), # Text length. (tf.int32, []), # Document ID. (tf.bool, []), # Is training. (tf.int32, [None]), # NER starts. (tf.int32, [None]), # NER ends. (tf.int32, [None]), # NER labels. 10 (tf.int32, []), # Number of NER spans. (tf.int32, [None]), # Coref mention starts. (tf.int32, [None]), # Coref mention ends. (tf.int32, [None]), # Coref cluster ids. (tf.int32, []), # Number of coref mentions. (tf.int32, [None]), # Relation entity1 starts. (tf.int32, [None]), # Relation entity1 ends. (tf.int32, [None]), # Relation entity2 starts. (tf.int32, [None]), # Relation entity2 ends. (tf.int32, [None]), # Relation labels. (tf.int32, []), # Number of relations. ] self.input_names = _input_names self.label_names = _label_names self.predict_names = _predict_names self.batch_size = self.config["batch_size"] dtypes, shapes = zip(*self.input_props) if self.batch_size > 0 and self.config["max_tokens_per_batch"] < 0: # Use fixed batch size if number of words per batch is not limited (-1). self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in self.input_props] queue = tf.PaddingFIFOQueue(capacity=self.batch_size * 2, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue_many(self.batch_size) else: # Use dynamic batch size. new_shapes = [[None] + shape for shape in shapes] self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in zip(dtypes, new_shapes)] queue = tf.PaddingFIFOQueue(capacity=2, dtypes=dtypes, shapes=new_shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() num_features = len(self.input_names) self.input_dict = dict(zip(self.input_names, self.input_tensors[:num_features])) self.labels_dict = dict(zip(self.label_names, self.input_tensors[num_features:]))
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary( config["context_embeddings"]) self.head_embeddings = util.EmbeddingDictionary( config["head_embeddings"], maybe_cache=self.context_embeddings) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.max_span_width = config["max_span_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} if config["lm_path"]: self.lm_file = h5py.File(self.config["lm_path"], "r") else: self.lm_file = None self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.eval_data = None # Load eval data lazily. self.seq_length = 500 self.new_dim = 512 self.sample_transformer = Transformer( num_layers=2, d_model=self.new_dim, num_heads=8, dff=2048, input_vocab_size=self.seq_length, target_vocab_size=self.seq_length) input_props = [] input_props.append((tf.string, [None, None])) # Tokens. input_props.append( (tf.float32, [None, None, self.context_embeddings.size ])) # Context embeddings. input_props.append( (tf.float32, [None, None, self.head_embeddings.size])) # Head embeddings. input_props.append( (tf.float32, [None, None, self.lm_size, self.lm_layers])) # LM embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. # input_props.append((tf.float32, [None])) # learning rate. self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = step_decay(self.global_step) self.reset_global_step = tf.assign(self.global_step, 0) vars = tf.trainable_variables(scope=None) var_list1 = [] var_list2 = [] for var in vars: if 'transformer' in var.name: var_list2 += [var] else: var_list1 += [var] learning_rate1 = tf.train.exponential_decay( self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) # learning_rate2 = CustomSchedule(self.new_dim) gradients1 = tf.gradients(self.loss, var_list1) gradients2 = tf.gradients(self.loss, var_list2) gradients1, _ = tf.clip_by_global_norm( gradients1, self.config["max_gradient_norm"]) gradients2, _ = tf.clip_by_global_norm( gradients2, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer1 = optimizers[self.config["optimizer"]](learning_rate1) optimizer2 = optimizers[self.config["optimizer"]](learning_rate) train_op1 = optimizer1.apply_gradients(zip(gradients1, var_list1), global_step=self.global_step) train_op2 = optimizer2.apply_gradients(zip(gradients2, var_list2), global_step=self.global_step) self.train_op = tf.group(train_op1, train_op2)
def __init__(self, config, tokenizer, check_point_load=True): bert_config = modeling.BertConfig.from_json_file( config["bert_folder"] + "/bert_config.json") seq_length = 502 self.max_sentence_length = 263 self.max_sentence_no = 90 self.config = config self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.max_span_width = config["max_span_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} self.lm_file = None self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.eval_data = None # Load eval data lazily. input_ids = tf.placeholder(dtype=tf.int32, shape=(1, seq_length)) input_mask = tf.placeholder(dtype=tf.int32, shape=(1, seq_length)) input_type_ids = tf.placeholder(dtype=tf.int32, shape=(1, seq_length)) model = modeling.BertModel(config=bert_config, is_training=True, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=False) self.tokenizer = tokenizer tvars = tf.trainable_variables() init_checkpoint = config["bert_folder"] + "/bert_model.ckpt" (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if check_point_load: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) self.usesful = [] for var in tvars: var.name if len(var.name.split("/")) < 3: continue if var.name.split("/")[2][0] == "l": temp = var.name.split("/")[2][6:] if int(temp) > 18: self.usesful += [var] self.usesful += [tvars[-1], tvars[-2]] all_layers = model.get_all_encoder_layers() embeddings = tf.concat([ tf.expand_dims(all_layers[-1], 3), tf.expand_dims(all_layers[-2], 3), tf.expand_dims(all_layers[-3], 3), tf.expand_dims(all_layers[-4], 3) ], 3) embeddings = embeddings[:, 1:501, :, :] embeddings = tf.reshape(embeddings, [500, 1024, 4]) self.yo = embeddings # <tf.Tensor 'strided_slice_4:0' shape=(1, 500, 1024, 4) dtype=float32> input_props = [] input_props.append((tf.string, [None, None])) # Tokens. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. input_props.append((tf.int32, [self.max_sentence_no])) # splits input_props.append((tf.int32, [2])) # param # self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] self.queue_input_tensors = [input_ids, input_mask, input_type_ids] + [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] # dtypes, shapes = zip(*input_props) # dtypes = (tf.int32, tf.int32 , tf.int32) + dtypes # shapes = ([1, seq_length] , [1, seq_length], [1, seq_length]) + shapes # queue = tf.PaddingFIFOQueue(capacity=3, dtypes=dtypes, shapes=shapes) # self.enqueue_op = queue.enqueue(self.queue_input_tensors) # self.input_tensors = queue.dequeue() self.input_tensors = self.queue_input_tensors self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors[3:], embeddings) self.global_step1 = tf.Variable(0, name="global_step", trainable=False) self.global_step2 = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step1 = tf.assign(self.global_step1, 0) self.reset_global_step2 = tf.assign(self.global_step2, 0) learning_rate1 = tf.train.exponential_decay( 0.0001, self.global_step1, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) learning_rate2 = tf.train.exponential_decay( self.config["learning_rate"], self.global_step2, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() Lee_param = [] for param in trainable_params: temp = param.name.split("/")[0] if "bert" == temp or temp == "context_word_emb:0" or temp == "head_word_emb:0" or temp == "total_embedding:0": continue else: Lee_param += [param] gradients1 = tf.gradients(self.loss, self.usesful) gradients2 = tf.gradients(self.loss, Lee_param) gradients1, _ = tf.clip_by_global_norm( gradients1, self.config["max_gradient_norm"]) gradients2, _ = tf.clip_by_global_norm( gradients2, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer1 = tf.train.AdamOptimizer(learning_rate1) optimizer2 = optimizers[self.config["optimizer"]](learning_rate2) self.train_op1 = optimizer1.apply_gradients( zip(gradients1, self.usesful), global_step=self.global_step1) self.train_op2 = optimizer2.apply_gradients( zip(gradients2, Lee_param), global_step=self.global_step2) self.train_op = tf.group(self.train_op1, self.train_op2)