def test_onnx_to_caffe2_zipfile(self): buf = tempfile.NamedTemporaryFile() onnx_model = zipfile.ZipFile(buf, 'w') node_def = helper.make_node( "MatMul", ["X", "W"], ["Y"]) X = np.random.rand(2, 3).astype(np.float32) W = np.random.rand(3, 2).flatten().astype(np.float32) graph_def = helper.make_graph( [node_def], "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), helper.make_tensor_value_info("W", TensorProto.FLOAT, (3, 2))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 2))], initializer=[helper.make_tensor("W", TensorProto.FLOAT, [3, 2], b'__EXTERNAL', raw=True)]) model_def = helper.make_model(graph_def, producer_name='onnx-to-caffe2-test') onnx_model.writestr('__MODEL_PROTO', model_def.SerializeToString()) onnx_model.writestr('W', W.tobytes()) onnx_model.close() W = W.reshape((3, 2)) Y_expect = np.matmul(X, W) c2_model = c2.prepare_zip_archive(buf) Y = c2_model.run(X).Y np.testing.assert_allclose(Y, Y_expect)
def pytorch_to_caffe2( model, export_input, external_input_names, output_names, export_path, export_onnx_path=None, ): num_tensors = 0 for inp in export_input: num_tensors += len(inp) if isinstance(inp, (tuple, list)) else 1 assert len(external_input_names) == num_tensors all_input_names = external_input_names[:] for name, _ in model.named_parameters(): all_input_names.append(name) # export the pytorch model to ONNX if export_onnx_path: print(f"Saving onnx model to: {export_onnx_path}") else: export_onnx_path = export_path model.eval() with torch.no_grad(): torch.onnx._export( model, export_input, export_onnx_path, input_names=all_input_names, output_names=output_names, export_params=True, operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK, export_type=ExportTypes.ZIP_ARCHIVE, ) # Convert the ONNX model to a caffe2 net c2_prepared = caffe2_backend.prepare_zip_archive(export_onnx_path) return c2_prepared
def test_onnx_to_caffe2_zipfile(self): buf = tempfile.NamedTemporaryFile() onnx_model = zipfile.ZipFile(buf, 'w') output = tempfile.NamedTemporaryFile() init_net_output = tempfile.NamedTemporaryFile() node_def = helper.make_node( "MatMul", ["X", "W"], ["Y"]) X = np.random.rand(2, 3).astype(np.float32) W = np.random.rand(3, 2).flatten().astype(np.float32) graph_def = helper.make_graph( [node_def], "test", [helper.make_tensor_value_info("X", TensorProto.FLOAT, (2, 3)), helper.make_tensor_value_info("W", TensorProto.FLOAT, (3, 2))], [helper.make_tensor_value_info("Y", TensorProto.FLOAT, (2, 2))], initializer=[helper.make_tensor("W", TensorProto.FLOAT, [3, 2], b'__EXTERNAL', raw=True)]) model_def = helper.make_model(graph_def, producer_name='onnx-to-caffe2-test') onnx_model.writestr('__MODEL_PROTO', model_def.SerializeToString()) onnx_model.writestr('W', W.tobytes()) onnx_model.close() W = W.reshape((3, 2)) Y_expect = np.matmul(X, W) c2_model = c2.prepare_zip_archive(buf) Y = c2_model.run(X).Y np.testing.assert_allclose(Y, Y_expect)
def _test_ensemble_encoder_object_export(self, encoder_ensemble): tmp_dir = tempfile.mkdtemp() encoder_pb_path = os.path.join(tmp_dir, "encoder.pb") encoder_ensemble.onnx_export(encoder_pb_path) src_dict = encoder_ensemble.models[0].src_dict token_list = [src_dict.unk()] * 4 + [src_dict.eos()] src_tokens = torch.LongTensor( np.array(token_list, dtype="int64").reshape(-1, 1)) src_lengths = torch.IntTensor( np.array([len(token_list)], dtype="int32")) pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) onnx_encoder = caffe2_backend.prepare_zip_archive(encoder_pb_path) srclen = src_tokens.size(1) beam_size = 1 src_tokens = src_tokens.repeat(1, beam_size).view(-1, srclen).numpy() src_lengths = src_lengths.repeat(beam_size).numpy() caffe2_encoder_outputs = onnx_encoder.run((src_tokens, src_lengths)) for i in range(len(pytorch_encoder_outputs)): caffe2_out_value = caffe2_encoder_outputs[i] pytorch_out_value = pytorch_encoder_outputs[i].detach().numpy() np.testing.assert_allclose(caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6) encoder_ensemble.save_to_db( os.path.join(tmp_dir, "encoder.predictor_export"))
def _test_full_ensemble_export(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(models.build_model(test_args, src_dict, tgt_dict)) encoder_ensemble = EncoderEnsemble(model_list) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) decoder_step_ensemble = DecoderStepEnsemble(model_list, beam_size=5) tmp_dir = tempfile.mkdtemp() decoder_step_pb_path = os.path.join(tmp_dir, "decoder_step.pb") decoder_step_ensemble.onnx_export(decoder_step_pb_path, pytorch_encoder_outputs) # single EOS input_token = torch.LongTensor( np.array([[model_list[0].dst_dict.eos()]])) timestep = torch.LongTensor(np.array([[0]])) pytorch_decoder_outputs = decoder_step_ensemble( input_token, timestep, *pytorch_encoder_outputs) onnx_decoder = caffe2_backend.prepare_zip_archive(decoder_step_pb_path) decoder_inputs_numpy = [input_token.numpy(), timestep.numpy()] for tensor in pytorch_encoder_outputs: decoder_inputs_numpy.append(tensor.detach().numpy()) caffe2_decoder_outputs = onnx_decoder.run(tuple(decoder_inputs_numpy)) for i in range(len(pytorch_decoder_outputs)): caffe2_out_value = caffe2_decoder_outputs[i] pytorch_out_value = pytorch_decoder_outputs[i].detach().numpy() np.testing.assert_allclose(caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6) decoder_step_ensemble.save_to_db( os.path.join(tmp_dir, "decoder_step.predictor_export"), pytorch_encoder_outputs, )
def test_onnx_export(self): # Setup embedding num_embeddings = 5 lstm_dim = 8 embedding_module = WordSeqEmbedding( lstm_config=BiLSTM.Config(lstm_dim=lstm_dim, num_layers=2, bidirectional=True), num_embeddings=num_embeddings, word_embed_dim=4, embeddings_weight=None, init_range=[-1, 1], unk_token_idx=4, ) input_batch_size, max_seq_len, max_token_count = 1, 3, 5 seq_token_idx = torch.randint( low=0, high=num_embeddings, size=[input_batch_size, max_seq_len, max_token_count], ) seq_token_count = torch.randint(low=1, high=max_token_count, size=[input_batch_size, max_seq_len]) dummy_inputs = (seq_token_idx, seq_token_count) with tempfile.TemporaryFile() as tmp_file: with torch.no_grad(): torch.onnx._export( embedding_module, dummy_inputs, tmp_file, input_names=["seq_token_idx", "seq_token_count"], output_names=["embedding"], export_params=True, operator_export_type=OperatorExportTypes. ONNX_ATEN_FALLBACK, opset_version=9, export_type=ExportTypes.ZIP_ARCHIVE, ) # make sure caffe2 can load caffe2_backend.prepare_zip_archive(tmp_file)
def save_to_db(self, output_path): """ Save encapsulated beam search. """ tmp_dir = tempfile.mkdtemp() tmp_file = os.path.join(tmp_dir, 'beam_search.pb') self.onnx_export(tmp_file) beam_search = caffe2_backend.prepare_zip_archive(tmp_file) save_caffe2_rep_to_db( caffe2_backend_rep=beam_search, output_path=output_path, input_names=self.input_names, output_names=self.output_names, num_workers=2 * len(self.models), )
def save_to_db(self, output_path): """ Save encapsulated encoder export file. """ tmp_dir = tempfile.mkdtemp() tmp_file = os.path.join(tmp_dir, "encoder.pb") self.onnx_export(tmp_file) onnx_encoder = caffe2_backend.prepare_zip_archive(tmp_file) save_caffe2_rep_to_db( caffe2_backend_rep=onnx_encoder, output_path=output_path, input_names=["encoder_inputs", "encoder_lengths"], output_names=self.output_names, num_workers=2 * len(self.models), )
def _test_ensemble_encoder_export_char_source(self, test_args): _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) encoder_ensemble = CharSourceEncoderEnsemble(model_list) tmp_dir = tempfile.mkdtemp() encoder_pb_path = os.path.join(tmp_dir, "char_encoder.pb") encoder_ensemble.onnx_export(encoder_pb_path) length = 5 src_tokens = torch.LongTensor(np.ones((length, 1), dtype="int64")) src_lengths = torch.IntTensor(np.array([length], dtype="int32")) word_length = 3 char_inds = torch.LongTensor( np.ones((1, length, word_length), dtype="int64")) word_lengths = torch.IntTensor( np.array([word_length] * length, dtype="int32")).reshape( (1, length)) pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths, char_inds, word_lengths) onnx_encoder = caffe2_backend.prepare_zip_archive(encoder_pb_path) caffe2_encoder_outputs = onnx_encoder.run(( src_tokens.numpy(), src_lengths.numpy(), char_inds.numpy(), word_lengths.numpy(), )) for i in range(len(pytorch_encoder_outputs)): caffe2_out_value = caffe2_encoder_outputs[i] pytorch_out_value = pytorch_encoder_outputs[i].detach().numpy() np.testing.assert_allclose(caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6) encoder_ensemble.save_to_db( os.path.join(tmp_dir, "encoder.predictor_export"))
def save_to_db(self, output_path, encoder_ensemble_outputs): """ Save encapsulated decoder step export file. Example encoder_ensemble_outputs (PyTorch tensors) from corresponding encoder are necessary to run through network once. """ tmp_dir = tempfile.mkdtemp() tmp_file = os.path.join(tmp_dir, "decoder_step.pb") self.onnx_export(tmp_file, encoder_ensemble_outputs) onnx_decoder_step = caffe2_backend.prepare_zip_archive(tmp_file) save_caffe2_rep_to_db( caffe2_backend_rep=onnx_decoder_step, output_path=output_path, input_names=self.input_names, output_names=self.output_names, num_workers=2 * len(self.models), )
def test_int8_export(self): class MyModel(torch.nn.Module): def __init__(self): super(MyModel, self).__init__() self.param = torch.ByteTensor(3, 4).random_() def forward(self, x): return x * self.param.float() import io f = io.BytesIO() from torch.onnx import ExportTypes torch.onnx._export(MyModel(), (torch.rand(3, 4),), f, verbose=True, export_type=ExportTypes.ZIP_ARCHIVE) X = np.random.rand(3, 4).astype(np.float32) f.seek(0) import caffe2.python.onnx.backend as c2 model = c2.prepare_zip_archive(f) model.run(X)
def _test_ensemble_encoder_export(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) encoder_ensemble = EncoderEnsemble(model_list) tmp_dir = tempfile.mkdtemp() encoder_pb_path = os.path.join(tmp_dir, "encoder.pb") encoder_ensemble.onnx_export(encoder_pb_path) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) onnx_encoder = caffe2_backend.prepare_zip_archive(encoder_pb_path) caffe2_encoder_outputs = onnx_encoder.run( (src_tokens.numpy(), src_lengths.numpy())) for i in range(len(pytorch_encoder_outputs)): caffe2_out_value = caffe2_encoder_outputs[i] pytorch_out_value = pytorch_encoder_outputs[i].detach().numpy() np.testing.assert_allclose(caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6) encoder_ensemble.save_to_db( os.path.join(tmp_dir, "encoder.predictor_export"))
def _test_batched_beam_decoder_step(self, test_args, return_caffe2_rep=False): beam_size = 5 samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) encoder_ensemble = EncoderEnsemble(model_list) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) decoder_step_ensemble = DecoderBatchedStepEnsemble(model_list, tgt_dict, beam_size=beam_size) tmp_dir = tempfile.mkdtemp() decoder_step_pb_path = os.path.join(tmp_dir, "decoder_step.pb") decoder_step_ensemble.onnx_export(decoder_step_pb_path, pytorch_encoder_outputs) # single EOS in flat array input_tokens = torch.LongTensor(np.array([tgt_dict.eos()])) prev_scores = torch.FloatTensor(np.array([0.0])) timestep = torch.LongTensor(np.array([0])) pytorch_first_step_outputs = decoder_step_ensemble( input_tokens, prev_scores, timestep, *pytorch_encoder_outputs) # next step inputs (input_tokesn shape: [beam_size]) next_input_tokens = torch.LongTensor(np.array([i for i in range(4, 9)])) next_prev_scores = pytorch_first_step_outputs[1] next_timestep = timestep + 1 next_states = list(pytorch_first_step_outputs[4:]) # Tile these for the next timestep for i in range(len(model_list)): next_states[i] = next_states[i].repeat(1, beam_size, 1) pytorch_next_step_outputs = decoder_step_ensemble( next_input_tokens, next_prev_scores, next_timestep, *next_states) onnx_decoder = caffe2_backend.prepare_zip_archive(decoder_step_pb_path) if return_caffe2_rep: return onnx_decoder decoder_inputs_numpy = [ next_input_tokens.numpy(), next_prev_scores.detach().numpy(), next_timestep.detach().numpy(), ] for tensor in next_states: decoder_inputs_numpy.append(tensor.detach().numpy()) caffe2_next_step_outputs = onnx_decoder.run( tuple(decoder_inputs_numpy)) for i in range(len(pytorch_next_step_outputs)): caffe2_out_value = caffe2_next_step_outputs[i] pytorch_out_value = pytorch_next_step_outputs[i].detach().numpy() np.testing.assert_allclose(caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6) decoder_step_ensemble.save_to_db( output_path=os.path.join(tmp_dir, "decoder.predictor_export"), encoder_ensemble_outputs=pytorch_encoder_outputs, )