def get_training_data(self): sample_rdd = self.rdd.map( lambda t: Sample.from_ndarray(nest.flatten(t), np.array([0.0]))) fs = FeatureSet.sample_rdd(sample_rdd, sequential_order=self.sequential_order, shuffle=self.shuffle) return fs
def predict(self, input_fn, checkpoint_path=None): with tf.Graph().as_default() as g: result = self.estimator._call_input_fn( input_fn, tf.estimator.ModeKeys.PREDICT) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, None, tf.estimator.ModeKeys.PREDICT, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: saver = tf.train.Saver() if checkpoint_path: saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) inputs = nest.flatten(result.feature_tensors) outputs = nest.flatten(spec.predictions) tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs) rdd = result.rdd.map(lambda t: Sample.from_ndarray( nest.flatten(t), np.array([0.0]))) results = tfnet.predict(rdd, result.batch_per_thread) return results return self.estimator.predict(input_fn, checkpoint_path=checkpoint_path)
def get_validation_data(self): if self.validation_text_set is not None: return self.validation_text_set.get_samples().map( lambda sample: Sample.from_jtensor( features=sample.features + sample.labels, labels=JTensor.from_ndarray(np.array([0.0])))) return None
def get_prediction_data(self): rdd = self.rdd.map(lambda t: Sample.from_ndarray( nest.flatten(t[0] if isinstance(t, tuple) else t), np.array([0.0]))) rdd_wrapper = callZooFunc("float", "zooRDDSampleToMiniBatch", rdd, self.batch_per_thread) return rdd_wrapper.value().toJavaRDD()
def get_prediction_data(self): rdd = self.text_set.get_samples().map( lambda sample: Sample.from_jtensor(features=sample.features, labels=JTensor.from_ndarray( np.array([0.0])))) rdd_wrapper = callZooFunc("float", "zooRDDSampleToMiniBatch", rdd, self.batch_per_thread) return rdd_wrapper.value().toJavaRDD()
def get_training_data(self): sample_rdd = self.text_set.get_samples().map( lambda sample: Sample.from_jtensor( features=sample.features + sample.labels, labels=JTensor.from_ndarray(np.array([0.0])))) return FeatureSet.sample_rdd(sample_rdd, sequential_order=self.sequential_order, shuffle=self.shuffle)
def get_validation_data(self): if self.val_rdd is not None: sample_rdd = self.val_rdd.map(lambda t: Sample.from_ndarray( nest.flatten(t), np.array([0.0]))) return FeatureSet.sample_rdd( sample_rdd, sequential_order=self.sequential_order, shuffle=self.shuffle) return None
def get_validation_data(self): if self.val_rdd is not None: sample_rdd = self.val_rdd.map(lambda t: Sample.from_ndarray( nest.flatten(t), np.array([0.0]))) fs = FeatureSet.sample_rdd(sample_rdd, sequential_order=self.sequential_order, shuffle=self.shuffle) fs = fs.transform(SampleToMiniBatch(self.batch_size)) return fs return None
def get_validation_data(self): if self.validation_text_set is not None: sample_rdd = self.validation_text_set.get_samples().map( lambda sample: Sample.from_jtensor( features=sample.features + sample.labels, labels=JTensor.from_ndarray(np.array([0.0])))) return FeatureSet.sample_rdd( sample_rdd, sequential_order=self.sequential_order, shuffle=self.shuffle) return None
def evaluate(self, input_fn, eval_methods, steps=None, checkpoint_path=None): if not all( isinstance(metric, six.string_types) for metric in eval_methods): raise ValueError("All metrics should be string types") with tf.Graph().as_default() as g: result = self.estimator._call_input_fn(input_fn, tf.estimator.ModeKeys.EVAL) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, result.label_tensors, tf.estimator.ModeKeys.PREDICT, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: if checkpoint_path: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) inputs = nest.flatten(result._original_tensors[0]) outputs = nest.flatten(spec.predictions) tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs) rdd = result.rdd.map(lambda t: Sample.from_ndarray( nest.flatten(t[0]), nest.flatten(t[1]))) if result.batch_per_thread < 0: batch_size = result.batch_size else: batch_size = result.batch_per_thread * result.rdd.getNumPartitions( ) eval_methods = [ self._to_bigdl_metric(m) for m in eval_methods ] results = tfnet.evaluate(rdd, batch_size, eval_methods) final_result = dict([(r.method, r.result) for r in results]) return final_result return self.estimator.evaluate(input_fn, steps, checkpoint_path=checkpoint_path)
def get_validation_data(self): if self.validation_text_set is not None: sample_rdd = self.validation_text_set.get_samples().map( lambda sample: Sample.from_jtensor( features=sample.features + sample.labels, labels=JTensor.from_ndarray(np.array([0.0])))) fs = FeatureSet.sample_rdd(sample_rdd, sequential_order=self.sequential_order, shuffle=self.shuffle) fs = fs.transform(SampleToMiniBatch(self.batch_size)) return fs return None
def to_sample_rdd(x, y, sc, num_slices=None): """ Convert x and y into RDD[Sample] :param sc: SparkContext :param x: ndarray and the first dimension should be batch :param y: ndarray and the first dimension should be batch :param num_slices: The number of partitions for x and y. :return: """ x_rdd = sc.parallelize(x, num_slices) y_rdd = sc.parallelize(y, num_slices) return x_rdd.zip(y_rdd).map(lambda item: Sample.from_ndarray(item[0], item[1]))
def get_training_data(self): return self.rdd.map(lambda t: Sample.from_ndarray(nest.flatten(t), np.array([0.0])))
def get_evaluation_data(self): if isinstance(self.tensor_structure, tuple): return self.rdd.map( lambda t: Sample.from_ndarray(nest.flatten(t[0]), nest.flatten(t[1]))) return self.rdd.map(lambda t: Sample.from_ndarray(nest.flatten(t), np.array([0.0])))
def get_prediction_data(self): data = self.rdd.map(lambda t: Sample.from_ndarray( nest.flatten(t[0] if isinstance(t, tuple) else t), np.array([0.0]))) return data
def get_training_data(self): return self.text_set.get_samples().map( lambda sample: Sample.from_jtensor(features=sample.features + sample.labels, labels=JTensor.from_ndarray(np.array([0.0]))))
def __init__(self, loss, optim_method, sess=None, dataset=None, inputs=None, grads=None, variables=None, graph=None, val_outputs=None, val_labels=None, val_method=None, val_split=0.0): import tensorflow as tf from zoo.util.tf import export_tf ''' TFOptimizer is used for distributed training of tensorflow on Spark/BigDL. :param loss: The loss tensor of the tensorflow model, should be a scalar :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam :param sess: the current tensorflow Session, if you want to used a pre-trained model, you should use the Session to load the pre-trained variables and pass it to TFOptimizer. ''' if dataset is None: args = TFOptimizer._get_arguments_from_loss( loss, optim_method, sess, val_outputs, val_labels, val_method) loss, optim_method, sess, dataset, inputs = args[:5] grads, variables, graph, val_outputs, val_labels, val_method = args[ 5:] self.optim_method = optim_method self.sess = sess self.dataset = dataset self.inputs = inputs self.graph = graph from zoo.util.tf import process_grad grads = [process_grad(grad) for grad in grads] if self.dataset.batch_size <= 0: raise ValueError( "You should set batch_size instead of batch_per_thread for training" ) if val_outputs is not None and val_labels is not None: with self.graph.as_default(): val_labels = [tf.identity(v) for v in val_labels] outputs = val_outputs + val_labels + [loss] else: outputs = [loss] self.export_dir = tempfile.mkdtemp() export_tf(self.sess, self.export_dir, inputs=self.inputs, outputs=grads + outputs) variable_names = [v.name for v in variables] grad_names = [g.name for g in grads] output_names = [o.name for o in outputs] meta = { "input_names": [i.name for i in self.inputs], "output_names": output_names, "variables": variable_names, "grad_variables": grad_names } with open(os.path.join(self.export_dir, "training_meta.json"), "w") as f: f.write(json.dumps(meta)) self.variable_placeholders = [] with self.graph.as_default(): assigns = [] for v in variables: p = tf.placeholder(dtype=tf.float32, shape=v.shape) a = tf.assign(v, p) self.variable_placeholders.append(p) assigns.append(a) assign = tf.group(*assigns) self.assign = assign try: self.training_helper_layer = TFTrainingHelper(self.export_dir) except Py4JJavaError as e: if "expects to be colocated with unknown node" in str(e): raise Exception(""" If you are using the embedding layer in tf.keras, then this is a known issue of tensorflow, see https://github.com/tensorflow/tensorflow/issues/21889. Please add zoo.util.tf.variable_creator_scope before model construction. For example: from zoo.util.tf import variable_creator_scope with variable_creator_scope(): model = tf.keras.models.Sequential([ tf.keras.layers.Embedding(1, 1, input_length=1)]) """) else: raise e data = self.dataset.rdd batch_size = self.dataset.batch_size sample_rdd = data.map( lambda t: Sample.from_ndarray(t, [np.array([0.0])])) if val_outputs is not None and val_labels is not None: if self.dataset.val_rdd is not None: val_rdd = self.dataset.val_rdd \ .map(lambda t: Sample.from_ndarray(t, [np.array([0.0])])) val_method = [ TFValidationMethod(m, len(val_outputs), len(val_labels)) for m in to_list(val_method) ] training_rdd = sample_rdd elif val_split != 0.0: training_rdd, val_rdd = sample_rdd.randomSplit( [1 - val_split, val_split]) val_method = [ TFValidationMethod(m, len(val_outputs), len(val_labels)) for m in to_list(val_method) ] else: raise ValueError( "Validation data is not specified. Please set " + "val rdd in TFDataset, or set val_split larger than zero") self.optimizer = Optimizer.create(self.training_helper_layer, training_rdd, IdentityCriterion(), batch_size=batch_size, optim_method=self.optim_method) self.optimizer.set_validation(self.dataset.batch_size, val_rdd, EveryEpoch(), val_method) else: training_rdd = sample_rdd self.optimizer = Optimizer.create(self.training_helper_layer, training_rdd, IdentityCriterion(), batch_size=batch_size, optim_method=self.optim_method)
def predict(self): rdd = self.dataset.rdd sample_rdd = rdd.map(lambda x: Sample.from_ndarray(x, np.array([0.0]))) return self.tfnet.predict(sample_rdd, self.dataset.batch_per_thread)
def to_sample(t): if isinstance(t, list): t = tuple(t) return Sample.from_ndarray(nest.flatten(t), [np.array([0.0])])
def get_validation_data(self): if self.val_rdd is not None: return self.val_rdd.map(lambda t: Sample.from_ndarray(nest.flatten(t), np.array([0.0]))) return None
def __init__(self, loss, optim_method, sess=None, val_outputs=None, val_labels=None, val_method=None): import tensorflow as tf from zoo.util.tf import export_tf ''' TFOptimizer is used for distributed training of tensorflow on Spark/BigDL. :param loss: The loss tensor of the tensorflow model, should be a scalar :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam :param sess: the current tensorflow Session, if you want to used a pre-trained model, you should use the Session to load the pre-trained variables and pass it to TFOptimizer. ''' self.optim_method = optim_method if sess is None: self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) else: self.sess = sess grads_vars = tf.train.GradientDescentOptimizer(0).compute_gradients( loss) variables = [] grads = [] for (grad, var) in grads_vars: variables.append(var) grads.append(grad) self.export_dir = tempfile.mkdtemp() all_required_inputs = _find_placeholders([loss]) self.dataset = tf.get_collection(all_required_inputs[0].name)[0] if self.dataset.batch_size <= 0: raise ValueError( "You should set batch_size instead of batch_per_thread for training" ) self.inputs = self.dataset.tensors _check_the_same(all_required_inputs, self.inputs) if val_outputs is not None and val_labels is not None: outputs = val_outputs + val_labels + [loss] else: outputs = [loss] export_tf(self.sess, self.export_dir, inputs=self.inputs, outputs=grads + outputs) variable_names = [v.name for v in variables] grad_names = [g.name for g in grads] output_names = [o.name for o in outputs] meta = { "input_names": [i.name for i in self.inputs], "output_names": output_names, "variables": variable_names, "grad_variables": grad_names } with open(os.path.join(self.export_dir, "training_meta.json"), "w") as f: f.write(json.dumps(meta)) self.training_helper_layer = TFTrainingHelper(self.export_dir) self.variable_placeholders = [] assigns = [] for v in variables: p = tf.placeholder(dtype=tf.float32, shape=v.shape) a = tf.assign(v, p) self.variable_placeholders.append(p) assigns.append(a) self.assign = tf.group(*assigns) data = self.dataset.rdd batch_size = self.dataset.batch_size sample_rdd = data.map( lambda t: Sample.from_ndarray(t, [np.array([0.0])])) self.optimizer = Optimizer.create(self.training_helper_layer, sample_rdd, IdentityCriterion(), batch_size=batch_size, optim_method=self.optim_method) if val_outputs is not None and val_labels is not None: val_sample_rdd = self.dataset.val_rdd\ .map(lambda t: Sample.from_ndarray(t, [np.array([0.0])])) val_method = TFValidationMethod(val_method, len(val_outputs), len(val_labels)) self.optimizer.set_validation(self.dataset.batch_size, val_sample_rdd, EveryEpoch(), val_method)
def __init__(self, loss, optim_method, sess=None, dataset=None, inputs=None, grads=None, variables=None, graph=None, val_outputs=None, val_labels=None, val_method=None, add_sample_weights_num=0): import tensorflow as tf from zoo.util.tf import export_tf ''' TFOptimizer is used for distributed training of tensorflow on Spark/BigDL. :param loss: The loss tensor of the tensorflow model, should be a scalar :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam :param sess: the current tensorflow Session, if you want to used a pre-trained model, you should use the Session to load the pre-trained variables and pass it to TFOptimizer. ''' if dataset is None: args = TFOptimizer._get_arguments_from_loss( loss, optim_method, sess, val_outputs, val_labels, val_method) loss, optim_method, sess, dataset, inputs = args[:5] grads, variables, graph, val_outputs, val_labels, val_method = args[ 5:] self.optim_method = optim_method self.sess = sess self.dataset = dataset self.inputs = inputs self.graph = graph if self.dataset.batch_size <= 0: raise ValueError( "You should set batch_size instead of batch_per_thread for training" ) if val_outputs is not None and val_labels is not None: with self.graph.as_default(): val_labels = [tf.identity(v) for v in val_labels] outputs = val_outputs + val_labels + [loss] else: outputs = [loss] self.export_dir = tempfile.mkdtemp() export_tf(self.sess, self.export_dir, inputs=self.inputs, outputs=grads + outputs) variable_names = [v.name for v in variables] grad_names = [g.name for g in grads] output_names = [o.name for o in outputs] meta = { "input_names": [i.name for i in self.inputs], "output_names": output_names, "variables": variable_names, "grad_variables": grad_names } with open(os.path.join(self.export_dir, "training_meta.json"), "w") as f: f.write(json.dumps(meta)) self.variable_placeholders = [] with self.graph.as_default(): assigns = [] for v in variables: p = tf.placeholder(dtype=tf.float32, shape=v.shape) a = tf.assign(v, p) self.variable_placeholders.append(p) assigns.append(a) assign = tf.group(*assigns) self.assign = assign self.training_helper_layer = TFTrainingHelper(self.export_dir) data = self.dataset.rdd batch_size = self.dataset.batch_size sample_rdd = data.map(lambda t: Sample.from_ndarray( t + [np.array(1.0)] * add_sample_weights_num, [np.array([0.0])])) self.optimizer = Optimizer.create(self.training_helper_layer, sample_rdd, IdentityCriterion(), batch_size=batch_size, optim_method=self.optim_method) if val_outputs is not None and val_labels is not None: val_sample_rdd = self.dataset.val_rdd\ .map(lambda t: Sample.from_ndarray(t + [np.array(1.0)] * add_sample_weights_num, [np.array([0.0])])) val_method = [ TFValidationMethod(m, len(val_outputs), len(val_labels)) for m in to_list(val_method) ] self.optimizer.set_validation(self.dataset.batch_size, val_sample_rdd, EveryEpoch(), val_method)
def to_sample(t): return Sample.from_ndarray(nest.flatten(t), [np.array([0.0])])