def backward(self, input, target): """ NB: It's for debug only, please use optimizer.optimize() in production. Performs a back-propagation step through the criterion, with respect to the given input. :param input: ndarray or list of ndarray :param target: ndarray or list of ndarray :return: ndarray """ output = callBigDlFunc(self.bigdl_type, "criterionBackward", self.value, Model.check_input(input), Model.check_input(target)) return Model.convert_output(output)
def load_keras(json_path=None, hdf5_path=None, by_name=False): """ Load a pre-trained Keras model. :param json_path: The json path containing the keras model definition. Default is None. :param hdf5_path: The HDF5 path containing the pre-trained keras model weights with or without the model architecture. Default is None. :param by_name: by default the architecture should be unchanged. If set as True, only layers with the same name will be loaded. :return: A BigDL model. """ BModel.load_keras(json_path, hdf5_path, by_name)
def forward(self, input, target): """ NB: It's for debug only, please use optimizer.optimize() in production. Takes an input object, and computes the corresponding loss of the criterion, compared with `target` :param input: ndarray or list of ndarray :param target: ndarray or list of ndarray :return: value of loss """ output = callBigDlFunc(self.bigdl_type, "criterionForward", self.value, Model.check_input(input), Model.check_input(target)) return output
def test_tf_load(self): linear = Linear(10, 2)() sigmoid = Sigmoid()(linear) softmax = SoftMax().set_name("output")(sigmoid) model = BModel(linear, softmax) input = np.random.random((4, 10)) tmp_path = create_tmp_path() + "/model.pb" model.save_tensorflow([("input", [4, 10])], tmp_path) model_reloaded = Net.load_tf(tmp_path, ["input"], ["output"]) expected_output = model.forward(input) output = model_reloaded.forward(input) self.assert_allclose(output, expected_output)
def convert(input_ops, output_ops, byte_order, bigdl_type): """ Convert tensorflow model to bigdl model :param input_ops: operation list used for input, should be placeholders :param output_ops: operations list used for output :return: bigdl model """ input_names = map(lambda x: x.name.split(":")[0], input_ops) output_names = map(lambda x: x.name.split(":")[0], output_ops) temp = tempfile.mkdtemp() dump_model(path=temp) model_path = temp + '/model.pb' bin_path = temp + '/model.bin' model = Model.load_tensorflow(model_path, input_names, output_names, byte_order, bin_path, bigdl_type) try: shutil.rmtree(temp) except OSError as e: if e.errno != errno.ENOENT: raise return model
def load_orca_checkpoint(self, path, version, prefix=None): """ Load existing checkpoint :param path: Path to the existing checkpoint. :param version: checkpoint version, which is the suffix of model.* file, i.e., for model.4 file, the version is 4. :param prefix: optimMethod prefix, for example 'optimMethod-TorchModelf53bddcc' :return: """ import os from bigdl.nn.layer import Model from bigdl.optim.optimizer import OptimMethod assert prefix is not None, "You should provide optimMethod prefix, " \ "for example 'optimMethod-TorchModelf53bddcc'" try: self.model = Model.load( os.path.join(path, "model.{}".format(version))) optimizer = OptimMethod.load( os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError( "Cannot load PyTorch checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, optimizer, self.model_dir)
def convert(input_ops, output_ops, byte_order, bigdl_type): """ Convert tensorflow model to bigdl model :param input_ops: operation list used for input, should be placeholders :param output_ops: operations list used for output :param sess: current tensorflow session :return: bigdl model """ sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) input_names = map(lambda x: x.name.split(":")[0], input_ops) output_names = map(lambda x: x.name.split(":")[0], output_ops) temp = tempfile.mkdtemp() saver = tf.train.Saver() saver.save(sess, temp + '/model.chkp') tf.train.write_graph(sess.graph, temp, 'model.pbtxt') merge_checkpoint(temp + '/model.pbtxt', temp + '/model.chkp', output_names, temp + '/model.pb', sess) model = Model.load_tensorflow(temp + '/model.pb', input_names, output_names, byte_order, bigdl_type) try: shutil.rmtree(temp) except OSError as e: if e.errno != errno.ENOENT: raise return model
def load_orca_checkpoint(self, path, version=None, prefix=None): """ Load existing checkpoint. To load a specific checkpoint, please provide both `version` and `perfix`. If `version` is None, then the latest checkpoint will be loaded. :param path: Path to the existing checkpoint (or directory containing Orca checkpoint files). :param version: checkpoint version, which is the suffix of model.* file, i.e., for modle.4 file, the version is 4. If it is None, then load the latest checkpoint. :param prefix: optimMethod prefix, for example 'optimMethod-TorchModelf53bddcc'. :return: """ import os from bigdl.nn.layer import Model from bigdl.optim.optimizer import OptimMethod from zoo.orca.learn.utils import find_latest_checkpoint from zoo.pipeline.api.torch import TorchModel if version is None: path, prefix, version = find_latest_checkpoint(path, model_type="pytorch") if path is None: raise ValueError("Cannot find PyTorch checkpoint, please check your checkpoint" " path.") else: assert prefix is not None, "You should provide optimMethod prefix, " \ "for example 'optimMethod-TorchModelf53bddcc'" try: loaded_model = Model.load(os.path.join(path, "model.{}".format(version))) self.model = TorchModel.from_value(loaded_model.value) self.optimizer = OptimMethod.load(os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError("Cannot load PyTorch checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir)
def load_orca_checkpoint(self, path, version, prefix=None): """ Load existing checkpoint :param path: Path to the existing checkpoint. :param version: checkpoint version, which is the suffix of model.* file, i.e., for modle.4 file, the version is 4. :param prefix: optimMethod prefix, for example 'optimMethod-Sequentialf53bddcc' :return: """ from bigdl.nn.layer import Model, Container from bigdl.optim.optimizer import OptimMethod import os try: self.model = Model.load( os.path.join(path, "model.{}".format(version))) assert isinstance(self.model, Container), \ "The loaded model should be a Container, please check your checkpoint type." self.optimizer = OptimMethod.load( os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError( "Cannot load BigDL checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is not None: self.nn_estimator.setOptimMethod(self.optimizer) self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing)
def optimize(self): """ Do an optimization. """ jmodel = callJavaFunc(get_spark_context(), self.value.optimize) from bigdl.nn.layer import Model return Model.of(jmodel)
def load_graph(self, graph_proto): if not graph_proto: raise ValueError("Graph proto is required") input_nodes = list() output_nodes = list() tensor_map = dict() initialized_tensors = set() module_map = dict() root_nodes = list() dummy_root = Identity()() for tensor in graph_proto.initializer: if not tensor.name.strip(): raise ValueError("Tensor's name is required") initialized_tensors.add(tensor.name) tensor_data = parse_tensor_data(tensor) tensor_map[tensor.name] = (tensor_data, tensor_data.shape) for gin in graph_proto.input: if gin.name not in initialized_tensors: input_nodes.append(gin.name) shape = tuple( [dim.dim_value for dim in gin.type.tensor_type.shape.dim]) module_map[gin.name] = Identity()(dummy_root) tensor_map[gin.name] = (None, shape) for gout in graph_proto.output: if gout.name not in initialized_tensors: output_nodes.append(gout.name) for node in graph_proto.node: name = node.name.strip() op_type = node.op_type inputs = [tensor_map[n] for n in node.input] outputs = node.output prev_modules = [ module_map[n] for n in node.input if n not in initialized_tensors ] attrs = parse_node_attr(node) if len(prev_modules) == 0: root_nodes.append((name, op_type)) prev_modules = [dummy_root] bigdl_module, outputs_shape = self._make_module_from_onnx_node( op_type, inputs, prev_modules, attrs, outputs) assert len(outputs) == len(outputs_shape) for out, out_shape in zip(outputs, outputs_shape): module_map[out] = bigdl_module tensor_map[out] = (None, out_shape) in_modules = [module_map[m] for m in input_nodes] out_modules = [module_map[m] for m in output_nodes] model = Model([dummy_root], out_modules) return model
def inference(image_path, model_path, sc): imageDF = NNImageReader.readImages(image_path, sc, resizeH=300, resizeW=300, image_codec=1) getName = udf(lambda row: row[0], StringType()) transformer = ChainedPreprocessing( [RowToImageFeature(), ImageResize(256, 256), ImageCenterCrop(224, 224), ImageChannelNormalize(123.0, 117.0, 104.0), ImageMatToTensor(), ImageFeatureToTensor()]) model = Model.loadModel(model_path) classifier_model = NNClassifierModel(model, transformer)\ .setFeaturesCol("image").setBatchSize(4) predictionDF = classifier_model.transform(imageDF).withColumn("name", getName(col("image"))) return predictionDF
def load_orca_checkpoint(self, path, version, prefix=None): import os from bigdl.nn.layer import Model from bigdl.optim.optimizer import OptimMethod assert prefix is not None, "You should provide optimMethod prefix, " \ "for example 'optimMethod-TorchModelf53bddcc'" try: self.model = Model.load(os.path.join(path, "model.{}".format(version))) optimizer = OptimMethod.load(os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError("Cannot load PyTorch checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, optimizer, self.model_dir)
def inference(image_path, model_path, sc): imageDF = NNImageReader.readImages(image_path, sc) getName = udf(lambda row: row[0], StringType()) transformer = ChainedPreprocessing( [RowToImageFeature(), ImageResize(256, 256), ImageCenterCrop(224, 224), ImageChannelNormalize(123.0, 117.0, 104.0), ImageMatToTensor(), ImageFeatureToTensor()]) model = Model.loadModel(model_path) classifier_model = NNClassifierModel(model, transformer)\ .setFeaturesCol("image").setBatchSize(4) predictionDF = classifier_model.transform(imageDF).withColumn("name", getName(col("image"))) return predictionDF
def load_orca_checkpoint(self, path, version=None, prefix=None): """ Load existing checkpoint. To load a specific checkpoint, please provide both `version` and `perfix`. If `version` is None, then the latest checkpoint under the specified directory will be loaded. :param path: Path to the existing checkpoint (or directory containing Orca checkpoint files). :param version: checkpoint version, which is the suffix of model.* file, i.e., for modle.4 file, the version is 4. If it is None, then load the latest checkpoint. :param prefix: optimMethod prefix, for example 'optimMethod-Sequentialf53bddcc' :return: """ from bigdl.nn.layer import Model, Container from bigdl.optim.optimizer import OptimMethod from zoo.orca.learn.utils import find_latest_checkpoint import os if version is None: path, prefix, version = find_latest_checkpoint(path, model_type="bigdl") if path is None: raise ValueError( "Cannot find BigDL checkpoint, please check your checkpoint" " path.") else: assert prefix is not None, "You should provide optimMethod prefix, " \ "for example 'optimMethod-TorchModelf53bddcc'" try: self.model = Model.load( os.path.join(path, "model.{}".format(version))) assert isinstance(self.model, Container), \ "The loaded model should be a Container, please check your checkpoint type." self.optimizer = OptimMethod.load( os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError( "Cannot load BigDL checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is not None: self.nn_estimator.setOptimMethod(self.optimizer) self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing)
def main(): tf.set_random_seed(1234) input = tf.placeholder(tf.float32, [None, 5]) weight = tf.Variable(tf.random_uniform([5, 10])) bias = tf.Variable(tf.random_uniform([10])) middle = tf.nn.bias_add(tf.matmul(input, weight), bias) output = tf.nn.tanh(middle) tensor = np.random.rand(5, 5) # construct BigDL model and get the result form bigdl_model = Model(input, output, model_type="tensorflow") bigdl_result = bigdl_model.forward(tensor) # get result from tensorflow and compare with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) tensorflow_result = sess.run(output, {input: tensor}) print("Tensorflow forward result is " + str(tensorflow_result)) print("BigDL forward result is " + str(bigdl_result)) np.testing.assert_almost_equal(tensorflow_result, bigdl_result, 6) print("The results are almost equal in 6 decimals")
def inference(image_path, model_path, batch_size, sc): imageDF = NNImageReader.readImages(image_path, sc, resizeH=300, resizeW=300, image_codec=1) getName = udf(lambda row: row[0], StringType()) transformer = ChainedPreprocessing( [RowToImageFeature(), ImageResize(256, 256), ImageCenterCrop(224, 224), ImageChannelNormalize(123.0, 117.0, 104.0), ImageMatToTensor(), ImageFeatureToTensor()]) model = Model.loadModel(model_path) est = Estimator.from_bigdl(model=model, feature_preprocessing=transformer) predictionDF = est.predict(data=imageDF, batch_size=batch_size, feature_cols="image" ).withColumn("name", getName(col("image"))) return predictionDF
def convert(input_ops, output_ops, sess): """ Convert tensorflow model to bigdl model :param input_ops: operation list used for input, should be placeholders :param output_ops: operations list used for output :param sess: current tensorflow session :return: bigdl model """ input_names = map(lambda x: x.name.split(":")[0], input_ops) output_names = map(lambda x: x.name.split(":")[0], output_ops) temp = tempfile.mkdtemp() saver = tf.train.Saver() saver.save(sess, temp + '/model.chkp') tf.train.write_graph(sess.graph, temp, 'model.pbtxt') merge_checkpoint(temp + '/model.pbtxt', temp + '/model.chkp', output_names, temp + '/model.pb', sess) return Model.load_tensorflow(temp + '/model.pb', input_names, output_names)
def load_orca_checkpoint(self, path, version, prefix=None): from bigdl.nn.layer import Model, Container from bigdl.optim.optimizer import OptimMethod import os try: self.model = Model.load(os.path.join(path, "model.{}".format(version))) assert isinstance(self.model, Container), \ "The loaded model should be a Container, please check your checkpoint type." self.optimizer = OptimMethod.load(os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError("Cannot load BigDL checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is not None: self.nn_estimator.setOptimMethod(self.optimizer) self.nn_model = NNModel(self.model, feature_preprocessing=self.feature_preprocessing)
def test_model_save_and_load(self): class SimpleTorchModel(nn.Module): def __init__(self): super(SimpleTorchModel, self).__init__() self.dense1 = nn.Linear(2, 4) self.dense2 = nn.Linear(4, 1) def forward(self, x): x = self.dense1(x) x = torch.sigmoid(self.dense2(x)) return x torch_model = SimpleTorchModel() az_model = TorchModel.from_pytorch(torch_model) with tempfile.TemporaryDirectory() as tmp_dir_name: path = tmp_dir_name + "/model.obj" az_model.save(path, True) loaded_model = Model.load(path) loaded_torchModel = TorchModel.from_value(loaded_model.value) dummy_input = torch.ones(16, 2) loaded_torchModel.forward(dummy_input.numpy()) loaded_torchModel.to_pytorch()
def load(self, checkpoint, loss=None): from zoo.orca.learn.utils import find_latest_checkpoint from bigdl.nn.layer import Model from bigdl.optim.optimizer import OptimMethod import os if loss is not None: from zoo.pipeline.api.torch import TorchLoss self.loss = TorchLoss.from_pytorch(loss) path, prefix, version = find_latest_checkpoint(checkpoint, model_type="pytorch") if path is None: raise ValueError( "Cannot find PyTorch checkpoint, please check your checkpoint path." ) try: self.model = Model.load( os.path.join(path, "model.{}".format(version))) optimizer = OptimMethod.load( os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError( "Cannot load PyTorch checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, optimizer, self.model_dir)
def trainNetwork(model, args): # open up a game state to communicate with emulator game_state = game.GameState() # store the previous observations in replay memory D = deque() # get the first state by doing nothing and preprocess the image to 80x80x4 do_nothing = np.zeros(ACTIONS) do_nothing[0] = 1 x_t, r_0, terminal = game_state.frame_step(do_nothing) x_t = skimage.color.rgb2gray(x_t) x_t = skimage.transform.resize(x_t, (80, 80)) x_t = skimage.exposure.rescale_intensity(x_t, out_range=(0, 255)) x_t = x_t / 255.0 s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # print (s_t.shape) # In Keras, need to reshape s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2]) # 1*80*80*4 if args['mode'] == 'Run': OBSERVE = 999999999 # We keep observe, never train epsilon = FINAL_EPSILON print("Now we load weight") model = Model.load_keras(json_path=None, hdf5_path="./model/model100000.h5") print("Weight load successfully") else: # We go to training mode OBSERVE = OBSERVATION epsilon = INITIAL_EPSILON t = 0 while (True): loss = 0 Q_sa = 0 action_index = 0 r_t = 0 a_t = np.zeros([ACTIONS]) # choose an action epsilon greedy if t % FRAME_PER_ACTION == 0: if random.random() <= epsilon: print("----------Random Action----------") action_index = random.randrange(ACTIONS) a_t[action_index] = 1 else: q = model.predict( s_t) # input a stack of 4 images, get the prediction max_Q = np.argmax(q) action_index = max_Q a_t[max_Q] = 1 # We reduced the epsilon gradually if epsilon > FINAL_EPSILON and t > OBSERVE: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE # run the selected action and observed next state and reward x_t1_colored, r_t, terminal = game_state.frame_step(a_t) x_t1 = skimage.color.rgb2gray(x_t1_colored) x_t1 = skimage.transform.resize(x_t1, (80, 80)) x_t1 = skimage.exposure.rescale_intensity(x_t1, out_range=(0, 255)) x_t1 = x_t1 / 255.0 x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) # 1x80x80x1 s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # store the transition in D D.append((s_t, action_index, r_t, s_t1, terminal)) if len(D) > REPLAY_MEMORY: D.popleft() # only train if done observing if t > OBSERVE: # sample a minibatch to train on minibatch = random.sample(D, BATCH) # Now we do the experience replay state_t, action_t, reward_t, state_t1, terminal = zip(*minibatch) state_t = np.concatenate(state_t) state_t1 = np.concatenate(state_t1) targets = np.zeros((BATCH, ACTIONS)) Q_sa = model.predict(state_t1) result_qsa = Q_sa.map(lambda elem: max(elem)).collect() for i in range(BATCH): targets[i][action_t[i]] = reward_t[ i] + GAMMA * result_qsa[i] * np.invert(terminal[i]) model.fit(state_t, targets) loss = getLoss(model, state_t, targets) result_qsa = None s_t = s_t1 t = t + 1 # save progress every 10000 iterations if t % 100 == 0 and args['mode'] == 'Train': print("Now we save model") model.saveModel("/model/model.bigdl", "/model/model.bin", True) # print info state = "" if t <= OBSERVE: state = "observe" elif t > OBSERVE and t <= OBSERVE + EXPLORE: state = "explore" else: state = "train" print("TIMESTEP", t, "/ STATE", state, \ "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, "/ Loss ", loss) print("Episode finished!") print("************************")
def _load(self, path): return Model.loadModel(path, bigdl_type=self.bigdl_type)
def load(self, checkpoint, optimizer=None, loss=None, feature_preprocessing=None, label_preprocessing=None, model_dir=None, is_checkpoint=False): if loss is not None: self.loss = loss if optimizer is not None: self.optimizer = optimizer if feature_preprocessing is not None: self.feature_preprocessing = feature_preprocessing if label_preprocessing is not None: self.label_preprocessing = label_preprocessing if model_dir is not None: self.model_dir = model_dir if is_checkpoint: from zoo.orca.learn.utils import find_latest_checkpoint from zoo.pipeline.api.net import Net from bigdl.nn.layer import Model, Container from bigdl.optim.optimizer import OptimMethod import os path, prefix, version = find_latest_checkpoint(checkpoint, model_type="bigdl") if path is None: raise ValueError( "Cannot find BigDL checkpoint, please check your checkpoint path." ) try: self.model = Model.load( os.path.join(path, "model.{}".format(version))) assert isinstance(self.model, Container), \ "The loaded model should be a Container, please check your checkpoint type." self.optimizer = OptimMethod.load( os.path.join(path, "{}.{}".format(prefix, version))) except Exception: raise ValueError( "Cannot load BigDL checkpoint, please check your checkpoint path " "and checkpoint type.") self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is not None: self.nn_estimator.setOptimMethod(self.optimizer) self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing) else: from zoo.pipeline.api.net import Net self.model = Net.load_bigdl(checkpoint + ".bigdl", checkpoint + ".bin") self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing, self.label_preprocessing) if self.optimizer is None: from bigdl.optim.optimizer import SGD self.optimizer = SGD() self.nn_estimator.setOptimMethod(self.optimizer) self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir) self.nn_model = NNModel( self.model, feature_preprocessing=self.feature_preprocessing) return self
def loadModel(modelPath): model = Model.loadModel(modelPath=modelPath + '.bigdl', weightPath=modelPath + '.bin') return model