Exemplo n.º 1
0
        def create_model(self):
            input_ids = BertModelTest.ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = BertModelTest.ids_tensor([self.batch_size, self.seq_length], vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = BertModelTest.ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)

            config = modeling.BertConfig(
                vocab_size=self.vocab_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                initializer_range=self.initializer_range)

            model = modeling.BertModel(config=config)

            all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)

            outputs = {
                "sequence_output": all_encoder_layers[-1],
                "pooled_output": pooled_output,
                "all_encoder_layers": all_encoder_layers,
            }
            return outputs
def bert_train_fn():
    is_training=True
    hidden_size = 768
    num_labels = 10
    #batch_size=128
    max_seq_length=512
    use_one_hot_embeddings = False
    bert_config = modeling.BertConfig(vocab_size=21128, hidden_size=hidden_size, num_hidden_layers=12,
                                      num_attention_heads=12,intermediate_size=3072)

    input_ids = tf.placeholder(tf.int32, [batch_size, max_seq_length], name="input_ids")
    input_mask = tf.placeholder(tf.int32, [batch_size, max_seq_length], name="input_mask")
    segment_ids = tf.placeholder(tf.int32, [batch_size,max_seq_length],name="segment_ids")
    label_ids = tf.placeholder(tf.float32, [batch_size,num_labels], name="label_ids")
    loss, per_example_loss, logits, probabilities, model = create_model(bert_config, is_training, input_ids, input_mask,
                                                                        segment_ids, label_ids, num_labels,
                                                                        use_one_hot_embeddings)
    # 1. generate or load training/validation/test data. e.g. train:(X,y). X is input_ids,y is labels.

    # 2. train the model by calling create model, get loss
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    sess = tf.Session(config=gpu_config)
    sess.run(tf.global_variables_initializer())
    for i in range(1000):
        input_ids_=np.ones((batch_size,max_seq_length),dtype=np.int32)
        input_mask_=np.ones((batch_size,max_seq_length),dtype=np.int32)
        segment_ids_=np.ones((batch_size,max_seq_length),dtype=np.int32)
        label_ids_=np.ones((batch_size,num_labels),dtype=np.float32)
        feed_dict = {input_ids: input_ids_, input_mask: input_mask_,segment_ids:segment_ids_,label_ids:label_ids_}
        loss_ = sess.run([loss], feed_dict)
        print("loss:",loss_)
Exemplo n.º 3
0
 def bert_model(self):
     real_len = tf.reduce_sum(tf.cast(tf.not_equal(tf.to_int32(0), self._input_ids), tf.int32), axis=1)
     input_mask = tf.cast(tf.sequence_mask(real_len, self._max_seq_length), tf.int32)
     base_model = modeling.BertModel(
         config=modeling.BertConfig(vocab_size=self._vocab_size),
         is_training=self._is_training,
         input_ids=self._input_ids,
         input_mask=input_mask,
         token_type_ids=tf.zeros_like(self._input_ids, tf.int32),
         use_one_hot_embeddings=False
     )
     output_layer = base_model.get_pooled_output()
     self._inference(output_layer)
     self._build_train_op()
        def create_model(self):
            input_ids = BertModelTest.ids_tensor(
                [self.batch_size, self.seq_length], self.vocab_size)

            dist_ids = BertModelTest.dist_tensor(
                [self.batch_size, self.seq_length])

            input_mask = None
            if self.use_input_mask:
                input_mask = BertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = BertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], self.type_vocab_size)

            config = modeling.BertConfig(
                vocab_size=self.vocab_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                initializer_range=self.initializer_range)

            model = modeling.BertModel(config=config,
                                       is_training=self.is_training,
                                       input_ids=input_ids,
                                       position_ids=dist_ids,
                                       input_mask=input_mask,
                                       token_type_ids=token_type_ids,
                                       scope=self.scope)

            outputs = {
                "embedding_output": model.get_embedding_output(),
                "sequence_output": model.get_sequence_output(),
                "pooled_output": model.get_pooled_output(),
                "all_encoder_layers": model.get_all_encoder_layers(),
            }
            return outputs
Exemplo n.º 5
0
        def create_model(self):
            input_ids = BertModelTest.ids_tensor(
                [self.batch_size, self.seq_length], self.s_vocab)

            input_mask = None
            if self.use_input_mask:
                input_mask = BertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], s_vocab=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = BertModelTest.ids_tensor(
                    [self.batch_size, self.seq_length], self.n_typ)

            config = modeling.BertConfig(
                s_vocab=self.s_vocab,
                d_hidden=self.d_hidden,
                n_lays=self.n_lays,
                n_heads=self.n_heads,
                d_ff=self.d_ff,
                act=self.act,
                drop=self.drop,
                drop_attn=self.drop_attn,
                n_pos=self.n_pos,
                n_typ=self.n_typ,
                init_range=self.init_range,
            )

            model = modeling.BertModel(
                config=config,
                is_training=self.is_training,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=token_type_ids,
                scope=self.scope,
            )

            outputs = {
                "embedding_output": model.get_embedding_output(),
                "sequence_output": model.get_sequence_output(),
                "pooled_output": model.get_pooled_output(),
                "all_encoder_layers": model.get_all_encoder_layers(),
            }
            return outputs
Exemplo n.º 6
0
  INPUT_FILE = "drop_0_test.pkl"
  RANDOM_SEED = 12345
  MAX_PREDICTIONS_PER_SEQ = 20
  MAX_SEQ_LENGTH = 128
  DO_LOWER_CASE = True


  # LEARNING_RATE = 2e-5
  # NUM_TRAIN_STEPS = 1
  # NUM_WARMUP_STEPS = 10
  # USE_TPU = False
  # BATCH_SIZE = 1


  # load model
  bert_config = modeling.BertConfig(BERT_CONFIG_FILE)
  device = torch.device("cpu")
  model1 = modeling.BertForPreTraining(bert_config)
  # model2 = modeling.BertForPreTraining(bert_config)

  model1.load_state_dict(torch.load(INIT_CHECKPOINT_PT, map_location='cpu'))
  # model1.bert.from_pretrained(INIT_DIRECTORY)
  model1.to(device)
  print ('model loaded')


  #resolve features
  with open(INPUT_FILE, 'rb') as f:
    features = pickle.load(f)

  print ("%d total samples" % len(features))
Exemplo n.º 7
0
def build_graph(opts, is_training=True):
    train_graph = tf.Graph()
    strategy = None

    if opts['use_popdist']:
        strategy = create_popdist_strategy()

    with train_graph.as_default(), ExitStack() as stack:
        if strategy:
            stack.enter_context(strategy.scope())

        if opts["groupbert"]:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.GroupBertConfig(vocab_size=None))
        else:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.BertConfig(vocab_size=None))

        bert_config.dtype = tf.float32 if opts[
            "precision"] == '32' else tf.float16

        # define placeholders
        placeholders = {
            'learning_rate': tf.placeholder(tf.float32, shape=[]),
            'loss_scaling': tf.placeholder(tf.float32, shape=[])
        }
        learning_rate = placeholders['learning_rate']
        loss_scaling = placeholders['loss_scaling']

        # define input, datasets must be defined outside the ipu device scope.
        train_iterator = ipu.ipu_infeed_queue.IPUInfeedQueue(
            data_loader.load(opts, is_training=is_training))
        # define output
        outfeed_queue = ipu.ipu_outfeed_queue.IPUOutfeedQueue()

        # building networks with pipeline
        def bert_net():
            return build_network(train_iterator, outfeed_queue, bert_config,
                                 opts, learning_rate, loss_scaling,
                                 is_training)

        with ipu.scopes.ipu_scope('/device:IPU:0'):
            train = training_step_with_infeeds_and_outfeeds(
                train_iterator, outfeed_queue, bert_config, opts,
                learning_rate, loss_scaling, is_training)

        # get result from outfeed queue
        outfeed = outfeed_queue.dequeue()

        if strategy:
            # Take the mean of all the outputs across the distributed workers
            outfeed = [
                strategy.reduce(tf.distribute.ReduceOp.MEAN, v)
                for v in outfeed
            ]

        if opts['distributed_worker_index'] == 0 or opts['log_all_workers']:
            log.print_trainable_variables(opts)

        model_and_optimiser_variables = tf.global_variables()
        model_variables = tf.trainable_variables() + tf.get_collection(
            tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)
        restore = tf.train.Saver(
            var_list=model_and_optimiser_variables
            if opts['restore_optimiser_from_checkpoint'] else model_variables)

        train_saver = tf.train.Saver(
            var_list=model_and_optimiser_variables
            if opts['save_optimiser_to_checkpoint'] else model_variables,
            max_to_keep=5)

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()

    # calculate the number of required IPU
    num_ipus = (max(opts['device_mapping']) + 1) * opts['replicas']
    num_ipus = ipu_utils.next_power_of_two(num_ipus)

    ipu_config = ipu_utils.get_config(
        fp_exceptions=opts["fp_exceptions"],
        enable_recomputation=opts["enable_recomputation"],
        disable_graph_outlining=False,
        num_required_ipus=num_ipus,
        enable_stochastic_rounding=opts['stochastic_rounding'],
        minimum_remote_tensor_size=opts['min_remote_tensor_size'],
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'],
        scheduler_selection=opts['scheduler'],
        compile_only=opts['compile_only'],
        ipu_id=opts['select_ipu'])

    if opts['use_popdist']:
        ipu_config = popdist.tensorflow.set_ipu_config(ipu_config,
                                                       opts['shards'],
                                                       configure_device=False)

    # Do not acquire a device, compile only.
    if opts["compile_only"]:
        ipu_config.device_connection.version = "ipu2"
        ipu_config.device_connection.enable_remote_buffers = True
        # PRE_COMPILE allows for runing execuatables on graph without being online
        ipu_config.device_connection.type = DeviceConnectionType.PRE_COMPILE

        # Enforce using a exe cache dir, defaulting if not given
        if ("TF_POPLAR_FLAGS" in os.environ):
            if ("--executable_cache_path"
                    not in os.environ["TF_POPLAR_FLAGS"]):
                print(
                    "Warning: --executable_cache_path in TF_POPLAR_FLAGS " +
                    "(for 'poprun --mpi_local_args') not set. Setting to default "
                    + "path: ./tmp/tf_cache/")
                os.environ[
                    "TF_POPLAR_FLAGS"] = "--executable_cache_path=/tmp/tf_cache"

        # Sometimes TF_POPLAR_FLAGS might not even exist
        else:
            print(
                "Warning: TF_POPLAR_FLAGS environment variable (for 'poprun " +
                "--mpi_local_args') not set. --executable_cache_path must be "
                +
                "defined when using --compile-only. Setting to default path: "
                + "./tmp/tf_cache/")
            os.environ[
                "TF_POPLAR_FLAGS"] = "--executable_cache_path=/tmp/tf_cache"

    ipu_config.configure_ipu_system()

    train_sess = tf.Session(graph=train_graph)

    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, train_saver, restore, tvars)
Exemplo n.º 8
0
flags.DEFINE_string("precision","fp32","precision fp32 or fp16")

# batch and seq size that fit into a single GPU collected from https://github.com/ROCmSoftwarePlatform/BERT#out-of-memory-issues
batch_size = FLAGS.batch
seq_length = FLAGS.seq_length
heads = FLAGS.heads
layers = FLAGS.layers

if FLAGS.precision == "fp32":
# this is set to LARGE Bert model 
   bert_config = modeling.BertConfig(attention_probs_dropout_prob= 0.1,
      hidden_act= "gelu",
      hidden_dropout_prob= 0.1,
      hidden_size = 1024,
      initializer_range = 0.02,
      intermediate_size = 4096,
      max_position_embeddings = 512,
      num_attention_heads = heads,
      num_hidden_layers = layers,
      type_vocab_size =  2,
      vocab_size = 30522,
      precision=tf.float32)
else:
   bert_config = modeling.BertConfig(attention_probs_dropout_prob= 0.1,
      hidden_act= "gelu",
      hidden_dropout_prob= 0.1,
      hidden_size = 1024,
      initializer_range = 0.02,
      intermediate_size = 4096,
      max_position_embeddings = 512,
      num_attention_heads = heads,
      num_hidden_layers = layers,
Exemplo n.º 9
0
def build_graph(opts, iterations_per_step=1, is_training=True):

    train_graph = tf.Graph()
    with train_graph.as_default():
        bert_config = bert_ipu.BertConfig.from_dict(
            opts, config=bert_ipu.BertConfig(vocab_size=None))
        bert_config.dtype = tf.float32 if opts[
            "precision"] == '32' else tf.float16
        placeholders = dict()

        learning_rate = None
        opts['version_2_with_negative'] = False
        train_iterator = ipu_infeed_queue.IPUInfeedQueue(
            data_loader.load(opts, is_training=is_training))
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue()

        # building networks with pipeline
        if not should_be_pipeline_when_inference(opts):

            def bert_net():
                return build_infer_network_without_pipeline(
                    train_iterator,
                    outfeed_queue,
                    iterations_per_step,
                    bert_config=bert_config,
                    opts=opts)
        else:

            def bert_net():
                return build_network(train_iterator, outfeed_queue,
                                     iterations_per_step, bert_config, opts,
                                     learning_rate, is_training)

        with ipu_scope('/device:IPU:0'):
            embedded = opts["embedded_runtime"]

            if embedded and is_training:
                raise ValueError(
                    "embedded_runtime is only to be used for inference.")

            train = ipu.ipu_compiler.compile(bert_net,
                                             []) if not embedded else None

        exec_path = None
        compile_op = None
        poplar_exec_filepath = get_exec_path(
            opts['seq_length'], opts['micro_batch_size'],
            opts['device_mapping'], should_be_pipeline_when_inference(opts))
        exec_path = os.path.join(poplar_exec_filepath)
        compile_op = application_compile_op.experimental_application_compile_op(
            bert_net, output_path=exec_path, freeze_variables=True)

        outfeed = outfeed_queue.dequeue()

        restore = tf.train.Saver(var_list=tf.global_variables())

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()

    # Calculate the number of required IPU"""
    num_ipus = (max(opts['device_mapping']) + 1) * int(opts['replicas'])
    # The number of acquired IPUs must be the power of 2.
    if num_ipus & (num_ipus - 1) != 0:
        num_ipus = 2**int(math.ceil(math.log(num_ipus) / math.log(2)))
    ipu_config = get_config(
        fp_exceptions=opts["fp_exceptions"],
        enable_recomputation=opts["enable_recomputation"],
        disable_graph_outlining=False,
        num_required_ipus=num_ipus,
        enable_stochastic_rounding=opts['stochastic_rounding'],
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'],
        scheduler_selection='CLUSTERING',
        compile_only=False,
        ipu_id=None,
        partials_type=opts["partials_type"],
        available_memory_proportion=opts['available_memory_proportion'])

    ipu_config.configure_ipu_system()

    train_sess = tf.Session(graph=train_graph)
    _ = train_sess.run(train_init, [])
    # -----------------
    # Checkpoints    restore and save
    init_checkpoint_path = opts['init_checkpoint']
    logger.info(f"At the checkpoint location {init_checkpoint_path}")
    if init_checkpoint_path:
        logger.info("Loading checkpoint...")
        if os.path.isfile(init_checkpoint_path):
            init_checkpoint_path = os.path.splitext(init_checkpoint_path)[0]
            logger.info(f"checkpoint path: {init_checkpoint_path}")

        (assignment_map, initialized_variable_names
         ) = bert_ipu.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint_path)

        for var in tvars:
            if var.name in initialized_variable_names:
                mark = "*"
            else:
                mark = " "
            logger.info("%-60s [%s]\t%s (%s)", var.name, mark, var.shape,
                        var.dtype.name)

        reader = tf.train.NewCheckpointReader(init_checkpoint_path)
        load_vars = reader.get_variable_to_shape_map()

        saver_restore = tf.train.Saver(assignment_map)
        saver_restore.restore(train_sess, init_checkpoint_path)
    # -----------------
    if compile_op is not None:
        logger.info(
            f"Compiling and saving Poplar executable to {poplar_exec_filepath}"
        )
        _ = train_sess.run(compile_op, [])
    else:
        exec_path = None
    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, restore, tvars,
                    exec_path), ipu_config
Exemplo n.º 10
0
import tensorflow as tf
import modeling

input_ids = tf.constant([[31, 51, 99], [15, 5, 0]])
input_mask = tf.constant([[1, 1, 1], [1, 1, 0]])
token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]])
flat_token_type_ids = tf.reshape(token_type_ids, [-1])
one_hot_token_type_ids = tf.one_hot(flat_token_type_ids, depth=2)

config = modeling.BertConfig(vocab_size=32000,
                             hidden_size=512,
                             num_hidden_layers=8,
                             num_attention_heads=8,
                             intermediate_size=1024,
                             type_vocab_size=2)

model = modeling.BertModel(config=config,
                           is_training=True,
                           input_ids=input_ids,
                           input_mask=input_mask,
                           token_type_ids=token_type_ids)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    print(sess.run(one_hot_token_type_ids))
    print(sess.run(model.get_all_encoder_layers()))
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--data_dir",
                        default=None,
                        type=str,
                        required=True,
                        help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
    parser.add_argument("--dataset_name",
                        default="top300_kl", 
                        type=str, 
                        required=True, 
                        help="The name of dataset to inference (without extention ex) top300_kl)")
    parser.add_argument("--model_type",
                        default="baseline_tfidf", 
                        type=str, 
                        required=True, 
                        help="baseline, baseline_tfidf, ir-v0, ir-v1")
    parser.add_argument("--model_path",
                        default=None, 
                        type=str, 
                        required=True, 
                        help="path to model dir")
    parser.add_argument("--output_dir",
                        default=None,
                        type=str,
                        required=True,
                        help="save_path")

    ## Other parameters
    parser.add_argument("--bert_model",
                        default="bert-base-multilingual-cased",
                        type=str,
                        help="Default: bert-base-multilingual-cased" 
                         "Bert pre-trained model selected in the list: bert-base-uncased, "
                        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
                        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--model_file",
                        default="pytorch_model.bin",
                        type=str,
                        help="The file of model (.bin), default is pytorhc_model.bin,\n" 
                             "특정 파일이 필요시 이름 설정 필요")
    parser.add_argument("--max_seq_length",
                        default=384,
                        type=int,
                        help="The maximum total input sequence length after WordPiece tokenization. \n"
                             "Sequences longer than this will be truncated, and sequences shorter \n"
                             "than this will be padded.")
    parser.add_argument("--eval_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument('--fp16',
                        action='store_true',
                        help="Whether to use 16-bit float precision instead of 32-bit")
    args = parser.parse_args()

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
    logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
        device, n_gpu, bool(args.local_rank != -1), args.fp16))

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    processor = IRProcessor()

    label_list = processor.get_labels()
    num_labels = len(label_list)

    print("model:", args.model_type)
    if args.model_type == "baseline": # load model (finetuned baseline on IR)
        tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=False)
        config = BertConfig(os.path.join(args.model_path + "bert_config.json"))
        model = BertForPreTraining(config)
        model.load_state_dict(torch.load(os.path.join(args.model_path, args.model_file)))
    elif args.model_type == "baseline_tfidf": # load model (baseline_tfidf)
        tokenizer = BertTFIDFTokenizer.from_pretrained(args.bert_model, do_lower_case=False, do_tf_idf=True)
        TFIDFconfig = modeling.BertConfig(os.path.join(args.model_path + "bert_config.json"))
        model = modeling.BertTFIDFForPreTraining(TFIDFconfig)
        model.load_state_dict(torch.load(os.path.join(args.model_path, args.model_file)))
    elif args.model_type == "ir-v0": # load model (*-head)
        tokenizer = BertTFIDFTokenizer.from_pretrained(args.bert_model, do_lower_case=False, do_tf_idf=True)
        head_config = modeling_ir.BertForIRConfig(os.path.join(args.model_path + "bert_config.json"))
        model = modeling_ir.BertForIRForPreTraining(head_config)
        model.load_state_dict(torch.load(os.path.join(args.model_path, args.model_file)))
    elif args.model_type == "ir-v1": # load model (*-head)
        tokenizer = BertTFIDFTokenizer.from_pretrained(args.bert_model, do_lower_case=False, do_tf_idf=True)
        head_config = modeling_ir_2.BertForIRConfig(os.path.join(args.model_path + "bert_config.json"))
        model = modeling_ir_2.BertForIRForPreTraining(head_config)
        model.load_state_dict(torch.load(os.path.join(args.model_path, args.model_file)))

    if args.fp16:
        model.half()
    model.to(device)

    tfidf_dict = pickle_load(os.path.join(args.data_dir, args.dataset_name + '_tfidf.pkl'))

    results_logit = dict()
    results_softmax = dict()

    eval_set, documents, queries = processor.make_eval_set(args.data_dir, args.dataset_name)
    logger.info("***** Running evaluation *****")
    logger.info("  Batch size = %d", args.eval_batch_size)
    for q_num, query in tqdm(enumerate(queries), total=len(queries), desc="Evaluating"):
    # for query in queries[0:1]: # for testing

        logger.info(f"Current Query Num : {q_num}")
        eval_examples = processor._create_examples(eval_set, query, documents)
        # logger.info("  Num examples = %d", len(eval_examples))
        if args.model_type == "baseline": # baseline or baseline_finetuned
            eval_features = convert_examples_to_features_for_vanilla(
                eval_examples, label_list, args.max_seq_length, tokenizer)
            all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
            all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
            all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
            all_label_ids = torch.tensor([f.label for f in eval_features], dtype=torch.long)
            eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
            eval_sampler = SequentialSampler(eval_data)
            eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)

            model.eval()
            eval_loss = 0
            nb_eval_steps = 0
            preds = []

            for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Query"):
                input_ids = input_ids.to(device)
                input_mask = input_mask.to(device)
                segment_ids = segment_ids.to(device)
                label_ids = label_ids.to(device)

                with torch.no_grad():
                    _, logits = model(input_ids, segment_ids, input_mask)

                # loss_fct = CrossEntropyLoss()
                # tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1))
                
                # eval_loss += tmp_eval_loss.mean().item()
                # nb_eval_steps += 1
                if len(preds) == 0:
                    preds.append(logits.detach().cpu().numpy())
                else:
                    preds[0] = np.append(
                        preds[0], logits.detach().cpu().numpy(), axis=0)
        else: # baseline_tfidf or *-head model
            eval_data = LazyDatasetClassifier(eval_examples, label_list, args.max_seq_length, tokenizer, tfidf_dict)
            eval_sampler = SequentialSampler(eval_data)
            eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)

            model.eval()
            eval_loss = 0
            nb_eval_steps = 0
            preds = []
                
            for batch in tqdm(eval_dataloader, desc="Query"):
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_weights, input_mask, segment_ids, label_ids = batch

                with torch.no_grad():
                    _, logits = model(input_ids, input_weights, segment_ids, input_mask)
                
                # loss_fct = CrossEntropyLoss()
                # tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1))
                
                # eval_loss += tmp_eval_loss.mean().item()
                nb_eval_steps += 1
                if len(preds) == 0:
                    preds.append(logits.detach().cpu().numpy())
                else:
                    preds[0] = np.append(
                        preds[0], logits.detach().cpu().numpy(), axis=0)


        # eval_loss = eval_loss / nb_eval_steps
        preds = preds[0]

        results_softmax[query] = []
        for i, pred in enumerate(softmax(preds)): # using softmax
            pair = dict()
            pair["score"] = pred[1]
            pair["doc_id"] = list(documents.keys())[i]
            results_softmax[query].append(pair)
        results_softmax[query].sort(reverse=True, key=lambda x: x["score"])

        ranked_doc_list = []
        for doc in results_logit[query]:
            ranked_doc_list.append(doc["doc_id"])
        results_logit[query] = ranked_doc_list

        ranked_doc_list = []
        for doc in results_softmax[query]:
            ranked_doc_list.append(doc["doc_id"])
        results_softmax[query] = ranked_doc_list

    save_name2 = args.model_path.split('/')[0] + '_' + args.model_file.split('.')[0] \
                 + '_' + args.dataset_name + '_output.json'
    path2 = os.path.join(args.output_dir,
                         save_name2)

    with open(path2, 'w', encoding="utf8") as f:
        json.dump(results_softmax, f, indent=4, sort_keys=True, ensure_ascii=False)
Exemplo n.º 12
0
def build_graph(opts, iterations_per_step=1, is_training=True):

    train_graph = tf.Graph()
    with train_graph.as_default():
        if opts["groupbert"]:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.GroupBertConfig(vocab_size=None))
        else:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.BertConfig(vocab_size=None))
        bert_config.dtype = tf.float32 if opts[
            "precision"] == '32' else tf.float16
        placeholders = dict()

        if is_training:
            placeholders['learning_rate'] = tf.placeholder(bert_config.dtype,
                                                           shape=[])
            learning_rate = placeholders['learning_rate']
        else:
            learning_rate = None

        train_iterator = ipu_infeed_queue.IPUInfeedQueue(
            data_loader.load(opts, is_training=is_training))
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue()

        # building networks with pipeline
        if not should_be_pipeline_when_inference(opts):

            def bert_net():
                return build_infer_network_without_pipeline(
                    train_iterator,
                    outfeed_queue,
                    iterations_per_step,
                    bert_config=bert_config,
                    opts=opts)
        else:

            def bert_net():
                return build_network(train_iterator, outfeed_queue,
                                     iterations_per_step, bert_config, opts,
                                     learning_rate, is_training)

        with ipu_scope('/device:IPU:0'):
            train = ipu.ipu_compiler.compile(bert_net, [])

        outfeed = outfeed_queue.dequeue()

        restore = tf.train.Saver(var_list=tf.global_variables())
        train_saver = tf.train.Saver(max_to_keep=5)

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()
    """calculate the number of required IPU"""
    num_ipus = (max(opts['device_mapping']) + 1) * int(opts['replicas'])
    # The number of acquired IPUs must be the power of 2.
    if num_ipus & (num_ipus - 1) != 0:
        num_ipus = 2**int(math.ceil(math.log(num_ipus) / math.log(2)))
    ipu_config = get_config(
        fp_exceptions=opts["fp_exceptions"],
        enable_recomputation=opts["enable_recomputation"],
        disable_graph_outlining=False,
        num_required_ipus=num_ipus,
        enable_stochastic_rounding=opts['stochastic_rounding'],
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'],
        scheduler_selection='CLUSTERING',
        compile_only=False,
        ipu_id=None,
        partials_type=opts["partials_type"])

    ipu_config.configure_ipu_system()

    train_sess = tf.Session(graph=train_graph)

    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, train_saver, restore, tvars)
Exemplo n.º 13
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    bert_config = modeling.BertConfig(256)

    model_fn = model_fn_builder(bert_config=bert_config,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=FLAGS.num_train_steps,
                                num_warmup_steps=FLAGS.num_warmup_steps)

    max_seq_length = FLAGS.max_seq_length
    max_predictions_per_seq = FLAGS.max_predictions_per_seq

    with tf.name_scope("input"):
        input_ids = tf.placeholder(
            shape=[FLAGS.train_batch_size, max_seq_length], dtype=tf.int32)
        input_mask = tf.placeholder(
            shape=[FLAGS.train_batch_size, max_seq_length], dtype=tf.int32)
        segment_ids = tf.placeholder(
            shape=[FLAGS.train_batch_size, max_seq_length], dtype=tf.int32)
        masked_lm_positions = tf.placeholder(
            shape=[FLAGS.train_batch_size, max_predictions_per_seq],
            dtype=tf.int32)
        masked_lm_ids = tf.placeholder(
            shape=[FLAGS.train_batch_size, max_predictions_per_seq],
            dtype=tf.int32)
        masked_lm_weights = tf.placeholder(
            shape=[FLAGS.train_batch_size, max_predictions_per_seq],
            dtype=tf.float32)
        next_sentence_labels = tf.placeholder(
            shape=[FLAGS.train_batch_size, 1], dtype=tf.int32)

    features = {
        "input_ids": input_ids,
        "input_mask": input_mask,
        "segment_ids": segment_ids,
        "masked_lm_positions": masked_lm_positions,
        "masked_lm_ids": masked_lm_ids,
        "masked_lm_weights": masked_lm_weights,
        "next_sentence_labels": next_sentence_labels
    }

    train_op = model_fn(features, None, None, None)

    infer_shape_ops = add_infer_shape_ops()

    hooks = [
        # Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states
        # from rank 0 to all other processes. This is necessary to ensure consistent
        # initialization of all workers when training is started with random weights
        # or restored from a checkpoint.

        # Horovod: adjust number of steps based on number of GPUs.
        tf.train.StopAtStepHook(last_step=205),
    ]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(0)

    training_batch_generator = train_input_generator(features)

    with tf.train.MonitoredTrainingSession(hooks=hooks,
                                           config=config) as mon_sess:
        mon_sess = TimelineSession(mon_sess, infer_shape_ops)
        while not mon_sess.should_stop():
            # Run a training step synchronously.
            feed_dict = next(training_batch_generator)
            mon_sess.run([train_op], feed_dict=feed_dict)
Exemplo n.º 14
0
 def test_config_to_json_string(self):
     config = modeling.BertConfig(s_vocab=99, d_hidden=37)
     obj = json.loads(config.to_json_string())
     self.assertEqual(obj["s_vocab"], 99)
     self.assertEqual(obj["d_hidden"], 37)
Exemplo n.º 15
0
#input_ids = tf.constant(np.random.randint(1,128, [2, 3]))
input_ids = tf.placeholder(shape=[2, 3], dtype=tf.int32, name='input_ids')

#input_mask = tf.constant([[1, 1, 1], [1, 1, 0]])
#input_mask = tf.constant(np.random.randint(0,1, [2, 3]))
input_mask = tf.placeholder(shape=[2, 3], dtype=tf.int32, name='input_mask')

#token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]])
#token_type_ids = tf.constant(np.random.randint(0,2, [2, 3]))
token_type_ids = tf.placeholder(shape=[2, 3],
                                dtype=tf.int32,
                                name='token_type_ids')

config = modeling.BertConfig(vocab_size=32000,
                             hidden_size=768,
                             num_hidden_layers=8,
                             num_attention_heads=6,
                             intermediate_size=1024)

model = modeling.BertModel(config=config,
                           is_training=True,
                           input_ids=input_ids,
                           input_mask=input_mask,
                           token_type_ids=token_type_ids)

label_embeddings = tf.get_variable(
    name="word_embeddings",
    shape=[768, 12],
    initializer=tf.truncated_normal_initializer(0.02))
pooled_output = model.get_pooled_output()
logits = tf.matmul(pooled_output, label_embeddings)
Exemplo n.º 16
0
 def test_config_to_json_string(self):
     config = modeling.BertConfig(vocab_size=99, hidden_size=37)
     obj = json.loads(config.to_json_string())
     self.assertEqual(obj["vocab_size"], 99)
     self.assertEqual(obj["hidden_size"], 37)
Exemplo n.º 17
0
def build_graph(opts, iterations_per_step=1, is_training=True):

    train_graph = tf.Graph()
    with train_graph.as_default():
        if opts["groupbert"]:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.GroupBertConfig(vocab_size=None))
        else:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.BertConfig(vocab_size=None))
        bert_config.dtype = tf.float32 if opts[
            "precision"] == '32' else tf.float16
        placeholders = dict()

        if is_training:
            placeholders['learning_rate'] = tf.placeholder(bert_config.dtype,
                                                           shape=[])
            learning_rate = placeholders['learning_rate']
        else:
            learning_rate = None

        # Need to load the Glue File here
        label_list = opts["pass_in"][1]
        bert_config.num_lables = len(label_list)
        if opts['do_training'] and opts['current_mode'] == 'train':
            input_file = os.path.join(opts["output_dir"],
                                      f"train_{opts['task_type']}.tf_record")
        elif opts['do_eval'] and opts['current_mode'] == 'eval':
            input_file = os.path.join(opts["output_dir"],
                                      f"eval_{opts['task_type']}.tf_record")
        elif opts['do_predict'] and opts['current_mode'] == 'predict':
            input_file = os.path.join(
                opts["output_dir"], f"predict_{opts['task_type']}.tf_record")
        else:
            raise NotImplementedError()

        opts['input_file'] = input_file
        opts['drop_remainder'] = True

        train_iterator = ipu_infeed_queue.IPUInfeedQueue(
            data_loader.load(opts, is_training=is_training))
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue()

        def bert_net():
            return build_network(train_iterator, outfeed_queue,
                                 iterations_per_step, bert_config, opts,
                                 learning_rate, is_training)

        with ipu_scope('/device:IPU:0'):
            train = ipu.ipu_compiler.compile(bert_net, [])

        outfeed = outfeed_queue.dequeue()

        log.print_trainable_variables(opts)

        restore = tf.train.Saver(var_list=tf.global_variables())
        train_saver = tf.train.Saver(max_to_keep=5)

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()
    """calculate the number of required IPU"""
    num_ipus = (max(opts['device_mapping']) + 1) * int(opts['replicas'])
    # The number of acquired IPUs must be the power of 2.
    if num_ipus & (num_ipus - 1) != 0:
        num_ipus = 2**int(math.ceil(math.log(num_ipus) / math.log(2)))
    ipu_config = get_config(
        fp_exceptions=opts["fp_exceptions"],
        enable_recomputation=opts["enable_recomputation"],
        disable_graph_outlining=False,
        num_required_ipus=num_ipus,
        enable_stochastic_rounding=opts['stochastic_rounding'],
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'],
        scheduler_selection='CLUSTERING',
        compile_only=False,
        ipu_id=None,
        available_memory_proportion=opts["available_memory_proportion"])

    ipu_config.configure_ipu_system()

    train_sess = tf.Session(graph=train_graph)

    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, train_saver, restore, tvars)