def create_dataset(args): # a simple copy of main bert.py until the dataset creation config = BertConfig() model = Bert(config, builder=popart.Builder()) indices, positions, segments, masks, labels = bert_add_inputs( args, model) inputs = [indices, positions, segments, masks, labels] embedding_dict, positional_dict = model.get_model_embeddings() shapeOf = model.builder.getTensorShape inputs = reduce(chain, inputs[3:], inputs[:3]) tensor_shapes = [(tensorId, shapeOf(tensorId)) for tensorId in inputs] dataset = get_bert_dataset(tensor_shapes, input_file=args.input_files, output_dir=args.output_dir, sequence_length=args.sequence_length, vocab_file=args.vocab_file, vocab_length=args.vocab_length, batch_size=args.batch_size, batches_per_step=args.batches_per_step, embedding_dict=embedding_dict, positional_dict=positional_dict, generated_data=args.generated_data, is_training=False, no_drop_remainder=True, shuffle=args.shuffle, mpi_size=args.mpi_size, is_distributed=(args.mpi_size > 1)) return dataset
def training_run(bert_args, config, initializers, checkpoint_paths): logger.info("Building Model") model = Bert(config, builder=popart.Builder(opsets={ "ai.onnx": 9, "ai.onnx.ml": 1, "ai.graphcore": 1 }), initializers=initializers, execution_mode=bert_args.execution_mode) indices, positions, segments, masks, labels = bert_add_inputs( bert_args, model) logits = bert_logits_graph(model, indices, positions, segments, masks, bert_args.execution_mode) predictions, probs = bert_infer_graph(model, logits) losses = bert_loss_graph(model, probs, labels) outputs = bert_add_validation_outputs(model, predictions, losses) embedding_dict, positional_dict = model.get_model_embeddings() dataset = get_bert_dataset(model, bert_args, [indices, positions, segments, masks, labels], embedding_dict, positional_dict) data_flow = popart.DataFlow(dataset.batches_per_step, outputs) request_ipus, _ = calc_required_ipus(bert_args, model) device = acquire_device(bert_args, request_ipus) logger.info(f"Dataset length: {len(dataset)}") writer = bert_writer(bert_args) iteration = Iteration( bert_args, batches_per_step=dataset.batches_per_step, steps_per_epoch=len(dataset), writer=writer, recording_steps=bert_args.aggregate_metrics_over_steps) optimizer_factory = ScheduledOptimizerFactory(bert_args, iteration, "SGD", model.tensors) session, anchors = bert_training_session(model, bert_args, data_flow, losses, device, optimizer_factory) for path in checkpoint_paths: ckpt_name = os.path.splitext(os.path.basename(path))[0] session.resetHostWeights(os.path.abspath(path)) session.weightsFromHost() logger.info(f"Fine-tuning started for checkpoint: {path}") run_fine_tuning_store_ckpt(bert_args, model, ckpt_name, session, dataset, predictions, losses, labels, anchors) device.detach()
def create_dataset(args): # a simple copy of main bert.py until the dataset creation config = BertConfig() model = Bert(config, builder=popart.Builder()) indices, positions, segments, masks, labels = bert_add_inputs( args, model) inputs = [indices, positions, segments, masks, labels] embedding_dict, positional_dict = model.get_model_embeddings() shapeOf = model.builder.getTensorShape inputs = reduce(chain, inputs[3:], inputs[:3]) tensor_shapes = [(tensorId, shapeOf(tensorId)) for tensorId in inputs] dataset = get_bert_dataset(args, tensor_shapes) return dataset
def run_embedding_layer(args): set_library_seeds(args.seed) config = bert_config_from_args(args) initializers = bert_pretrained_initialisers(config, args) logger.info("Building Model") # Specifying ai.onnx opset9 for the slice syntax # TODO: Change slice to opset10 model = Bert(config, builder=popart.Builder(opsets={ "ai.onnx": 9, "ai.onnx.ml": 1, "ai.graphcore": 1 }), initializers=initializers, execution_mode=args.execution_mode) # If config.host_embedding is enabled, indices and positions will have the matrices instead of the index vector. indices, positions, segments, masks, labels = bert_add_inputs(args, model) logits = tuple([model.embedding(indices, positions, segments)]) if args.inference: outputs = bert_add_infer_outputs(model, logits) losses = [] writer = None embedding_dict, positional_dict = model.get_model_embeddings() dataset = get_bert_dataset( model, args, [indices, positions, segments, masks, labels], embedding_dict, positional_dict) data_flow = popart.DataFlow(dataset.batches_per_step, outputs) iteration = Iteration( args, batches_per_step=dataset.batches_per_step, steps_per_epoch=len(dataset), writer=writer, recording_steps=args.aggregate_metrics_over_steps) request_ipus, required_ipus = calc_required_ipus(args, model) device = acquire_device(args, request_ipus) session, anchors = bert_inference_session(model, args, data_flow, losses, device) logger.info("Inference Started") inputs = [indices, positions, segments, *masks] """bert_infer_loop(args, session, dataset, inputs, logits, anchors, iteration)""" save_results = args.task == "SQUAD" and not args.synthetic_data start_times = defaultdict(list) end_times = defaultdict(list) # Create the stepio once outside of the inference loop: static_data = {} if args.low_latency_inference and args.task == "SQUAD": stepio = create_callback_stepio(static_data, anchors, start_times, end_times, dataset.batches_per_step) else: stepio = None enable_realtime_scheduling(args) output = [] logger.info(dataset) for data in dataset: static_data.update({t: data[t] for t in inputs}) result = bert_process_infer_data(args, session, static_data, anchors, logits, iteration, start_times, end_times, stepio) if save_results: output.append(result) break disable_realtime_scheduling(args) device.detach() return output return None
def main(args): set_library_seeds(args.seed) config = bert_config_from_args(args) initializers = bert_pretrained_initialisers(config, args) logger.info("Building Model") # Specifying ai.onnx opset9 for the slice syntax model = Bert(config, builder=popart.Builder(opsets={ "ai.onnx": 9, "ai.onnx.ml": 1, "ai.graphcore": 1 }), initializers=initializers, execution_mode=args.execution_mode) # If config.host_embedding is enabled, indices and positions will have the matrices instead of the index vector. indices, positions, segments, masks, labels = bert_add_inputs(args, model) logits = bert_logits_graph(model, indices, positions, segments, masks) if args.inference: predictions = None losses = [] if args.task == "PRETRAINING": # If this is a pretraining session, labels for NSP and MLM are already within the dataset, # so we can always calculate prediction performance predictions, _ = bert_infer_graph(model, logits, include_probs=False) if args.inference_lm_perplexity: losses = bert_perplexity_graph(model, logits, labels) outputs = bert_add_validation_outputs(model, predictions, losses) else: if args.inference_lm_perplexity: raise RuntimeError( "Masked LM perplexity is only supported in pretraining.") outputs = bert_add_logit_outputs(model, logits) writer = None else: predictions, probs = bert_infer_graph(model, logits) losses = bert_loss_graph(model, probs, labels) outputs = bert_add_validation_outputs(model, predictions, losses) writer = bert_writer(args) embedding_dict, positional_dict = model.get_model_embeddings() dataset = get_bert_dataset(model, args, [indices, positions, segments, masks, labels], embedding_dict, positional_dict, config.host_embedding == "MERGE") logger.info(f"Dataset length: {len(dataset)}") data_flow = popart.DataFlow(dataset.batches_per_step, outputs) iteration = Iteration(args, batches_per_step=dataset.batches_per_step, steps_per_epoch=len(dataset), writer=writer, recording_steps=args.aggregate_metrics_over_steps) request_ipus, required_ipus = calc_required_ipus(args, model) device = acquire_device(args, request_ipus) if args.inference: session, anchors = bert_inference_session(model, args, data_flow, device) logger.info("Inference Started") inputs = [indices, positions, segments, *masks, *labels] bert_infer_loop(args, session, dataset, inputs, logits, anchors, labels, predictions, losses, iteration) device.detach() else: if not args.no_training: optimizer_factory = ScheduledOptimizerFactory( args, iteration, model.tensors) session, anchors = bert_training_session(model, args, data_flow, losses, device, optimizer_factory) logger.info("Training Started") bert_train_loop(args, session, writer, dataset, labels, predictions, losses, anchors, iteration, optimizer_factory) device.detach() logger.info("Training Finished") return session, iteration
def main(args): set_library_seeds(args.seed) config = bert_config_from_args(args) initializers = bert_pretrained_initialisers(config, args) logger.info("Building Model") # Specifying ai.onnx opset9 for the slice syntax # TODO: Change slice to opset10 model = Bert(config, builder=popart.Builder(opsets={ "ai.onnx": 9, "ai.onnx.ml": 1, "ai.graphcore": 1 }), initializers=initializers, execution_mode=args.execution_mode) # If config.host_embedding is enabled, indices and positions will have the matrices instead of the index vector. indices, positions, segments, masks, labels = bert_add_inputs(args, model) logits = bert_logits_graph(model, indices, positions, segments, masks) if args.inference: outputs = bert_add_infer_outputs(model, logits) losses = [] writer = None else: predictions, probs = bert_infer_graph(model, logits) losses = bert_loss_graph(model, probs, labels) outputs = bert_add_validation_outputs(model, predictions, losses) writer = bert_writer(args) embedding_dict, positional_dict = model.get_model_embeddings() dataset = get_bert_dataset(model, args, [indices, positions, segments, masks, labels], embedding_dict, positional_dict) logger.info(f"Dataset length: {len(dataset)}") data_flow = popart.DataFlow(dataset.batches_per_step, outputs) iteration = Iteration(args, batches_per_step=dataset.batches_per_step, steps_per_epoch=len(dataset), writer=writer, recording_steps=args.aggregate_metrics_over_steps) request_ipus, required_ipus = calc_required_ipus(args, model) device = acquire_device(args, request_ipus) if args.inference: session, anchors = bert_inference_session(model, args, data_flow, losses, device) logger.info("Inference Started") inputs = [indices, positions, segments, *masks] bert_infer_loop(args, session, dataset, inputs, logits, anchors, iteration) device.detach() else: if not args.no_training: optimizer_factory = ScheduledOptimizerFactory( args, iteration, model.pipeline_stage_tensors) session, anchors = bert_training_session(model, args, data_flow, losses, device, optimizer_factory) logger.info("Training Started") bert_train_loop(args, session, writer, dataset, labels, predictions, losses, anchors, iteration, optimizer_factory) device.detach() logger.info("Training Finished") return session, iteration