def main(argv): task = WordSvAgreementLM(task_params=get_task_params(), data_dir='data') # Create the Model model_params = get_model_params(task, hparams.model_name, hparams.model_config) print("model_params: ", model_params.__dict__) cl_token = task.databuilder.sentence_encoder().encode(constants.bos) model = MODELS[hparams.model_name](hparams=get_model_params(task, hparams.model_name, hparams.model_config), cl_token=cl_token) trainer_params = get_train_params(hparams.train_config) if len(hparams.prefix) > 0: hparams.prefix = hparams.prefix + "_" log_dir = os.path.join(hparams.logdir, task.name, hparams.prefix+model.model_name + "_" + str(hparams.model_config) + "_" + str( trainer_params.learning_rate) + "_" + hparams.exp_name) ckpt_dir = os.path.join(hparams.chkpt_dir, task.name, hparams.prefix+model.model_name + "_" + str(hparams.model_config) + "_" + ((str( trainer_params.learning_rate) + "_") if hparams.withlr else '') + hparams.exp_name) print(ckpt_dir) trainer = Trainer(task=task, model=model, train_params=trainer_params, log_dir=log_dir, ckpt_dir=ckpt_dir) trainer.restore() distance_hits, distance_total, diff_hits, diff_total = evaluate_vp(trainer.model, trainer.task, hparams.split) compute_and_print_acc_stats(distance_hits, distance_total, diff_hits, diff_total)
def run(): gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Currently, memory growth needs to be the same across GPUs try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) except RuntimeError as e: print(e) strategy = tf.distribute.MirroredStrategy() log_dir = "logs" chkpt_dir = "tf_ckpts" # Create task with strategy.scope(): task = TASKS[hparams.task](get_task_params()) # Create the Model model_params = get_model_params(task, hparams.model, hparams.model_config) print("model_params: ", model_params.__dict__) model = MODELS[hparams.model](hparams=get_model_params( task, hparams.model, hparams.model_config)) trainer_params = get_train_params(hparams.train_config) log_dir = os.path.join( log_dir, task.name, model.model_name + "_" + str(hparams.model_config) + "_" + str(trainer_params.learning_rate) + "_" + hparams.exp_name) ckpt_dir = os.path.join( chkpt_dir, task.name, model.model_name + "_" + str(hparams.model_config) + "_" + str(trainer_params.learning_rate) + "_" + hparams.exp_name) # Create task trainer = Trainer(hparams, strategy=strategy, task=task, model=model, train_params=trainer_params, log_dir=log_dir, ckpt_dir=ckpt_dir) trainer.restore() trainer.train()
from absl import flags import numpy as np from util.models import MODELS from util.tasks import TASKS from notebook_utils import * import pandas as pd import seaborn as sns sns.set() from collections import Counter from tqdm import tqdm log_dir = "../logs" chkpt_dir = "../tf_ckpts" task = TASKS['word_sv_agreement_vp'](task_params=get_task_params(), data_dir='../data') cl_token = task.databuilder.sentence_encoder().encode(constants.bos) modelz = {} ckptz = {} config = { 'student_exp_name': 'gc_f_std124', 'teacher_exp_name': 'gc_o_tchr124', 'task_name': 'word_sv_agreement_vp', 'teacher_model': 'cl_lstm', 'student_model': 'cl_lstm', 'teacher_config': 'small_lstm_v4', 'student_config': 'small_lstm_v4', 'distill_config': 'pure_dstl_4_crs_slw',
res_labels_to_index, input_nodes=input_nodes, output_nodes=output_nodes, length=full_att_mat.shape[-1]) n_layers = full_att_mat.shape[0] length = full_att_mat.shape[-1] final_layer_attention = flow_values[(layer + 1) * length:, layer * length:(layer + 1) * length] relevance_attention_flow = final_layer_attention[output_index] return relevance_attention_flow task_name = 'word_sv_agreement_lm' task_params = get_task_params(batch_size=1) task = TASKS[task_name](task_params, data_dir='../InDist/data') cl_token = task.sentence_encoder().encode(constants.bos) task_tokenizer = task.sentence_encoder()._tokenizer tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = DistilBertForMaskedLM.from_pretrained('distilbert-base-uncased', output_hidden_states=True, output_attentions=True) all_examples_x = [] all_examples_vp = [] all_examples_y = [] all_examples_attentions = [] all_examples_blankout_relevance = []
from absl import flags import numpy as np from util.models import MODELS from util.tasks import TASKS from notebook_utils import * import pandas as pd import seaborn as sns; sns.set() from collections import Counter from tqdm import tqdm log_dir = "../logs" chkpt_dir = "../tf_ckpts" task = TASKS['word_sv_agreement_vp'](task_params=get_task_params(),data_dir='../data') cl_token = task.databuilder.sentence_encoder().encode(constants.bos) models = {} labels = [] config={'student_exp_name':'gc_f_std124', 'teacher_exp_name':'gc_o_tchr124', 'task_name':'word_sv_agreement_vp', 'teacher_model':'cl_lstm', 'student_model':'cl_lstm', 'teacher_config':'small_lstm_v4', 'student_config':'small_lstm_v4', 'distill_config':'pure_dstl_4_crs_slw',
str(hparams.teacher_config), hparams.teacher_exp_name, "student", student_model.model_name, str(hparams.student_config), hparams.student_exp_name ])) return teacher_model, student_model, teacher_log_dir, teacher_ckpt_dir, student_log_dir, student_ckpt_dir DISTILLER = { 'offline': Distiller, 'online': OnlineDistiller, } if __name__ == '__main__': # Create task task = TASKS[hparams.task](get_task_params(batch_size=hparams.batch_size)) # Create the Model teacher_model, student_model, \ teacher_log_dir, teacher_ckpt_dir, student_log_dir, student_ckpt_dir = create_and_load_models() distiller = DISTILLER[hparams.distill_mode]( hparams=hparams, distill_params=get_distill_params(hparams.distill_config), teacher_model=teacher_model, student_model=student_model, task=task, teacher_ckpt_dir=teacher_ckpt_dir, teacher_log_dir=teacher_log_dir, student_ckpt_dir=student_ckpt_dir, student_log_dir=student_log_dir,
from util.tasks import TASKS from notebook_utils import * import pandas as pd import seaborn as sns sns.set() from collections import Counter from tqdm import tqdm import logging tf.get_logger().setLevel(logging.ERROR) log_dir = "../logs" chkpt_dir = "../tf_ckpts" task = TASKS['word_sv_agreement_vp']( task_params=get_task_params(batch_size=512), data_dir='../data') cl_token = task.databuilder.sentence_encoder().encode(constants.bos) students = [] models = [] labels = [] #Bert to LSTM config = { 'student_exp_name': 'gc_f_std9303', 'teacher_exp_name': 'gc_o_tchr8323', 'task_name': 'word_sv_agreement_vp', 'teacher_model': 'cl_bert', 'student_model': 'cl_lstm', 'teacher_config': 'small_gpt_v9',
student_model.model_name, str(hparams.student_config), hparams.student_exp_name ])) return teacher_model, student_model, teacher_log_dir, teacher_ckpt_dir, student_log_dir, student_ckpt_dir DISTILLER = { 'offline': Distiller, 'online': OnlineDistiller, 'off_schdld': ScheduledDistiller } if __name__ == '__main__': # Create task task = TASKS[hparams.task](get_task_params()) # Create the Model teacher_model, student_model, \ teacher_log_dir, teacher_ckpt_dir, student_log_dir, student_ckpt_dir = create_and_load_models() distiller = DISTILLER[hparams.distill_mode]( hparams=hparams, distill_params=get_distill_params(hparams.distill_config), teacher_model=teacher_model, student_model=student_model, task=task, teacher_ckpt_dir=teacher_ckpt_dir, teacher_log_dir=teacher_log_dir, student_ckpt_dir=student_ckpt_dir, student_log_dir=student_log_dir,
import numpy as np from util.models import MODELS from util.tasks import TASKS from notebook_utils import * import pandas as pd import seaborn as sns; sns.set() from collections import Counter from tqdm import tqdm import logging tf.get_logger().setLevel(logging.ERROR) log_dir = "../logs" chkpt_dir = "../tf_ckpts" task = TASKS['word_sv_agreement_vp'](task_params=get_task_params(batch_size=512),data_dir='../data') cl_token = task.databuilder.sentence_encoder().encode(constants.bos) models = [] students = [] labels = [] #Bert to LSTM config={'student_exp_name':'gc_f_std4104', 'teacher_exp_name':'gc_o_tchr4112', 'task_name':'word_sv_agreement_vp', 'teacher_model':'cl_gpt2_shared',