Пример #1
0
 def __print_epoch_results(self, ep_no, all_ep, train_tasks, valid_tasks):
     result = 'epoch: {}/{}> '.format(ep_no, all_ep)
     for cur_task in train_tasks.items():
         if cur_task[1].size > 0:
             result += '|T: {}, tr-loss: {:.3f}, tr-f1: {:.3f} '.format(
                 cur_task[0], cur_task[1].loss, cur_task[1].f1)
     if valid_tasks is not None:
         for cur_task in valid_tasks.items():
             if cur_task[1].size > 0:
                 result += '|T: {}, va-loss: {:.3f}, va-f1: {:.3f} '.format(
                     cur_task[0], cur_task[1].loss, cur_task[1].f1)
     result += '\t' + ELib.get_time()
     print(result)
Пример #2
0
 def test(self, test_bundle, return_output_vecs=False, weighted_instance_loss=False,
          print_perf=True, title=None, report_number_of_intervals=20, return_output_vecs_get_details=True):
     if len(test_bundle.task_list) > 1:
         print('only one task is allowed for testing')
         return None
     if len(test_bundle.tws) == 0:
         return list(), list(), list(), list()
     if title is None:
         title = ''
     else:
         title += ' '
     self.bert_classifier.to(self.config.device)
     self.bert_classifier.zero_grad()
     self.bert_classifier.eval()
     self.setup_objective(weighted_instance_loss)
     test_dt = EBertDataset(test_bundle, self.tokenizer, self.config.max_seq)
     batches = self.generate_batches([test_dt], self.config, False, False, 0, EInputListMode.sequential)
     result_vecs = list()
     result_vecs_detail = list()
     tasks = {test_bundle.task_list[0] : ETaskState(test_bundle.task_list[0])}
     print(title + 'labeling ', end=' ', flush=True)
     with torch.no_grad():
         for ba_ind, cur_batch in enumerate(batches):
             outcome = self.bert_classifier(cur_batch, False)
             self.__process_loss(outcome, cur_batch, tasks, False, weighted_instance_loss)
             if return_output_vecs:
                 result_vecs.extend(self.bert_classifier.output_vecs)
                 if self.bert_classifier.output_vecs_detail is not None and return_output_vecs_get_details:
                     result_vecs_detail.extend(self.bert_classifier.output_vecs_detail)
             if ELib.progress_made(ba_ind, cur_batch['batch_count'], report_number_of_intervals):
                 print(ELib.progress_percent(ba_ind, cur_batch['batch_count']), end=' ', flush=True)
             self.delete_batch_from_gpu(cur_batch, EInputListMode.sequential)
             del cur_batch, outcome
     print()
     task_out = tasks[test_bundle.task_list[0]]
     task_out.loss /= task_out.size
     perf = ELib.calculate_metrics(task_out.lbl_true, task_out.lbl_pred)
     if print_perf:
         print('Test Results L1> Loss: {:.3f} F1: {:.3f} Pre: {:.3f} Rec: {:.3f}'.format(
             task_out.loss, perf[0], perf[1], perf[2]) + '\t\t' + ELib.get_time())
     self.bert_classifier.cpu()
     return task_out.lbl_pred, task_out.logits, [result_vecs, result_vecs_detail], perf
Пример #3
0
def main():
    parser = argparse.ArgumentParser()
    # general params
    parser.add_argument("--cmd",
                        default=None,
                        type=str,
                        required=True,
                        help='')
    parser.add_argument("--TEMP_DIR",
                        default=temp_dir,
                        type=str,
                        required=False,
                        help='')

    # pretrain params
    parser.add_argument("--itr", default=1, type=int, required=False, help='')
    parser.add_argument("--model_path",
                        default=None,
                        type=str,
                        required=True,
                        help='')
    parser.add_argument("--train_path",
                        default=None,
                        type=str,
                        required=True,
                        help='')
    parser.add_argument("--test_path",
                        default=None,
                        type=str,
                        required=True,
                        help='')
    parser.add_argument("--unlabeled_path",
                        default=None,
                        type=str,
                        required=True,
                        help='')
    parser.add_argument("--output_dir",
                        default=None,
                        type=str,
                        required=True,
                        help='')
    parser.add_argument("--device",
                        default=None,
                        type=int,
                        required=True,
                        help='')
    parser.add_argument("--seed",
                        default=None,
                        type=int,
                        required=True,
                        help='')
    parser.add_argument("--train_sample",
                        default=None,
                        type=int,
                        required=True,
                        help='')
    parser.add_argument("--unlabeled_sample",
                        default=None,
                        type=int,
                        required=True,
                        help='')

    ## ignore these!
    parser.add_argument("--per_query",
                        default=False,
                        type=bool,
                        required=False,
                        help='')  # ignore it
    parser.add_argument("--model_path_2",
                        default=None,
                        type=str,
                        required=False,
                        help='')  # ignore it
    parser.add_argument("--lm_model_path",
                        default=None,
                        type=str,
                        required=False,
                        help='')  # ignore it
    parser.add_argument("--t_lbl_path_1",
                        default=None,
                        type=str,
                        required=False,
                        help='')  # ignore it
    parser.add_argument("--t_lbl_path_2",
                        default=None,
                        type=str,
                        required=False,
                        help='')  # ignore it
    parser.add_argument("--valid_path",
                        default=None,
                        type=str,
                        required=False,
                        help='')  # ignore it
    parser.add_argument("--device_2",
                        default=None,
                        type=int,
                        required=False,
                        help='')  # ignore it

    args, unknown = parser.parse_known_args()

    device = 'cpu'
    device_name = device
    if args.device >= 0:
        device = 'cuda:' + str(args.device)
        device_name = torch.cuda.get_device_name(args.device)
    device_2 = 'cpu'
    if 'device_2' in args and (args.device_2 is not None
                               and args.device_2 >= 0):
        device_2 = 'cuda:' + str(args.device_2)
        device_name = device_name + ', ' + torch.cuda.get_device_name(
            args.device)
    print('setup:', '| python>', platform.python_version(), '| numpy>',
          np.__version__, '| pytorch>', torch.__version__, '| device>',
          device_name)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    if args.cmd.startswith('bert'):
        seed = args.seed
        for cur_itr in range(args.itr):
            print('------------------------------------')
            print('iteration ' + str(cur_itr + 1) + ' began with seed=\'' +
                  str(seed) + '\'   at ' + ELib.get_time())
            if cur_itr >= 0:
                output_dir = args.output_dir + '_' + str(cur_itr)
                EPretrainProj.run(args.cmd, args.per_query, args.train_path,
                                  args.valid_path, args.test_path,
                                  args.unlabeled_path, args.model_path,
                                  args.model_path_2, args.lm_model_path,
                                  args.t_lbl_path_1, args.t_lbl_path_2,
                                  output_dir, device, device_2, seed,
                                  args.train_sample, args.unlabeled_sample)
            seed += 1230
        ELib.PASS()
    ELib.PASS()
Пример #4
0
          device_name)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    if args.cmd.startswith('bert'):
        seed = args.seed
        for cur_itr in range(args.itr):
            print('------------------------------------')
            print('iteration ' + str(cur_itr + 1) + ' began with seed=\'' +
                  str(seed) + '\'   at ' + ELib.get_time())
            if cur_itr >= 0:
                output_dir = args.output_dir + '_' + str(cur_itr)
                EPretrainProj.run(args.cmd, args.per_query, args.train_path,
                                  args.valid_path, args.test_path,
                                  args.unlabeled_path, args.model_path,
                                  args.model_path_2, args.lm_model_path,
                                  args.t_lbl_path_1, args.t_lbl_path_2,
                                  output_dir, device, device_2, seed,
                                  args.train_sample, args.unlabeled_sample)
            seed += 1230
        ELib.PASS()
    ELib.PASS()


if __name__ == "__main__":
    print("Started at", ELib.get_time())
    main()
    print("\nDone at", ELib.get_time())
    pass