def test_reshape_inputs(self): """Test that layers can automatically reshape inconsistent inputs.""" value1 = np.random.uniform(size=(2, 3)).astype(np.float32) value2 = np.random.uniform(size=(1, 6, 1)).astype(np.float32) with self.session() as sess: out_tensor = ReduceSquareDifference()(tf.constant(value1), tf.constant(value2)) result = out_tensor.eval() diff = value1.reshape((1, 6, 1)) - value2 loss = np.mean(diff**2) assert (loss - result) / loss < 1e-6
def test_reshape_inputs(self): """Test that layers can automatically reshape inconsistent inputs.""" value1 = np.random.uniform(size=(2, 3)).astype(np.float32) value2 = np.random.uniform(size=(1, 6, 1)).astype(np.float32) with self.session() as sess: out_tensor = ReduceSquareDifference()(tf.constant(value1), tf.constant(value2)) result = out_tensor.eval() diff = value1.reshape((1, 6, 1)) - value2 loss = np.mean(diff**2) assert (loss - result) / loss < 1e-6
def test_reduce_square_difference(self): """Test that ReduceSquareDifference can be invoked.""" batch_size = 10 n_features = 5 in_tensor_1 = np.random.rand(batch_size, n_features) in_tensor_2 = np.random.rand(batch_size, n_features) with self.session() as sess: in_tensor_1 = tf.convert_to_tensor(in_tensor_1, dtype=tf.float32) in_tensor_2 = tf.convert_to_tensor(in_tensor_2, dtype=tf.float32) out_tensor = ReduceSquareDifference()(in_tensor_1, in_tensor_2) out_tensor = out_tensor.eval() assert isinstance(out_tensor, np.float32)
def test_reduce_square_difference(self): """Test that ReduceSquareDifference can be invoked.""" batch_size = 10 n_features = 5 in_tensor_1 = np.random.rand(batch_size, n_features) in_tensor_2 = np.random.rand(batch_size, n_features) with self.session() as sess: in_tensor_1 = tf.convert_to_tensor(in_tensor_1, dtype=tf.float32) in_tensor_2 = tf.convert_to_tensor(in_tensor_2, dtype=tf.float32) out_tensor = ReduceSquareDifference()(in_tensor_1, in_tensor_2) out_tensor = out_tensor.eval() assert isinstance(out_tensor, np.float32)
def test_saliency_mapping(self): """Test computing a saliency map.""" n_tasks = 3 n_features = 5 features = Feature(shape=(None, n_features)) dense = Dense(out_channels=n_tasks, in_layers=[features], activation_fn=tf.tanh) label = Label(shape=(None, n_tasks)) loss = ReduceSquareDifference(in_layers=[dense, label]) model = dc.models.TensorGraph() model.add_output(dense) model.set_loss(loss) x = np.random.random(n_features) s = model.compute_saliency(x) assert s.shape[0] == n_tasks assert s.shape[1] == n_features # Take a tiny step in the direction of s and see if the output changes by # the expected amount. delta = 0.01 for task in range(n_tasks): norm = np.sqrt(np.sum(s[task]**2)) step = 0.5 * delta / norm pred1 = model.predict_on_batch((x + s[task] * step).reshape( (1, n_features))).flatten() pred2 = model.predict_on_batch((x - s[task] * step).reshape( (1, n_features))).flatten() self.assertAlmostEqual(pred1[task], (pred2 + norm * delta)[task], places=4)
def test_compute_model_performance_multitask_regressor(self): random_seed = 42 n_data_points = 20 n_features = 2 n_tasks = 2 np.random.seed(seed=random_seed) X = np.random.rand(n_data_points, n_features) y1 = np.array([0.5 for x in range(n_data_points)]) y2 = np.array([-0.5 for x in range(n_data_points)]) y = np.stack([y1, y2], axis=1) dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) label = Label(shape=(None, n_tasks)) dense = Dense(out_channels=n_tasks, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) tg = dc.models.TensorGraph(random_seed=random_seed, learning_rate=0.1) tg.add_output(dense) tg.set_loss(loss) tg.fit(dataset, nb_epoch=1000) metric = [ dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression"), ] scores = tg.evaluate_generator(tg.default_generator(dataset), metric, labels=[label], per_task_metrics=True) scores = list(scores[1].values()) assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=1.0)))
def _create_graph(self, feature_shape, label_shape): """This is called to create the full TensorGraph from the added layers.""" if self.built: return # The graph has already been created. # Add in features features = Feature(shape=feature_shape) # Add in labels labels = Label(shape=label_shape) # Add in all layers prev_layer = features if len(self._layer_list) == 0: raise ValueError("No layers have been added to model.") for ind, layer in enumerate(self._layer_list): if len(layer.in_layers) > 1: raise ValueError("Cannot specify more than one " "in_layer for Sequential.") layer.in_layers += [prev_layer] prev_layer = layer # The last layer is the output of the model self.outputs.append(prev_layer) if self._loss_function == "binary_crossentropy": smce = SoftMaxCrossEntropy(in_layers=[labels, prev_layer]) self.set_loss(ReduceMean(in_layers=[smce])) elif self._loss_function == "mse": mse = ReduceSquareDifference(in_layers=[prev_layer, labels]) self.set_loss(mse) else: # TODO(rbharath): Add in support for additional # losses. raise ValueError("Unsupported loss.") self.build()
def test_ReduceSquareDifference_pickle(): tg = TensorGraph() feature = Feature(shape=(tg.batch_size, 1)) layer = ReduceSquareDifference(in_layers=[feature, feature]) tg.add_output(layer) tg.set_loss(layer) tg.build() tg.save()
def test_compute_model_performance_multitask_regressor(self): random_seed = 42 n_data_points = 20 n_features = 2 np.random.seed(seed=random_seed) X = np.random.rand(n_data_points, n_features) y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1) y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]), axis=-1) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] labels = [] for i in range(2): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) labels.append(label) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(mode="regression", batch_size=20, random_seed=random_seed, learning_rate=0.1) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) metric = [ dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression"), ] scores = tg.evaluate_generator(databag.iterbatches(), metric, labels=labels, per_task_metrics=True) scores = list(scores[1].values()) assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=1.0)))
def test_compute_model_performance_singletask_regressor_ordering(self): n_data_points = 1000 n_features = 1 X = np.array(range(n_data_points)) X = np.expand_dims(X, axis=-1) y1 = X + 1 X = NumpyDataset(X) ys = [NumpyDataset(y1)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] labels = [] for i in range(1): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) labels.append(label) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(mode="regression", learning_rate=0.1) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) metric = [ dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression"), dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression") ] scores = tg.evaluate_generator(databag.iterbatches(batch_size=1), metric, labels=labels, per_task_metrics=True) print(scores) scores = list(scores[1].values()) assert_true(np.all(np.isclose(scores, [0.0], atol=0.5)))
def fit(self, dataset, loss, **kwargs): """Fits on the specified dataset. If called for the first time, constructs the TensorFlow graph for this model. Fits this graph on the specified dataset according to the specified loss. Parameters ---------- dataset: dc.data.Dataset Dataset with data loss: string Only "binary_crossentropy" or "mse" for now. """ X_shape, y_shape, _, _ = dataset.get_shape() # Calling fit() for first time if not self.built: feature_shape = X_shape[1:] label_shape = y_shape[1:] # Add in features features = Feature(shape=(None, ) + feature_shape) # Add in labels labels = Label(shape=(None, ) + label_shape) # Add in all layers prev_layer = features if len(self._layer_list) == 0: raise ValueError("No layers have been added to model.") for ind, layer in enumerate(self._layer_list): if len(layer.in_layers) > 1: raise ValueError("Cannot specify more than one " "in_layer for Sequential.") layer.in_layers += [prev_layer] prev_layer = layer # The last layer is the output of the model self.outputs.append(prev_layer) if loss == "binary_crossentropy": smce = SoftMaxCrossEntropy(in_layers=[labels, prev_layer]) self.set_loss(ReduceMean(in_layers=[smce])) elif loss == "mse": mse = ReduceSquareDifference(in_layers=[prev_layer, labels]) self.set_loss(mse) else: # TODO(rbharath): Add in support for additional # losses. raise ValueError("Unsupported loss.") super(Sequential, self).fit(dataset, **kwargs)
def test_single_task_regressor(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [0.5 for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=1, in_layers=[features]) label = Label(shape=(None, 1)) loss = ReduceSquareDifference(in_layers=[dense, label]) tg = dc.models.TensorGraph(learning_rate=0.01) tg.add_output(dense) tg.set_loss(loss) tg.fit(dataset, nb_epoch=1000) prediction = np.squeeze(tg.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, y, atol=3.0)))
def test_multi_task_regressor(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1) y2 = np.expand_dims(np.array([-0.5 for x in range(n_data_points)]), axis=-1) X = NumpyDataset(X) ys = [NumpyDataset(y1), NumpyDataset(y2)] databag = Databag() features = Feature(shape=(None, n_features)) databag.add_dataset(features, X) outputs = [] losses = [] for i in range(2): label = Label(shape=(None, 1)) dense = Dense(out_channels=1, in_layers=[features]) loss = ReduceSquareDifference(in_layers=[dense, label]) outputs.append(dense) losses.append(loss) databag.add_dataset(label, ys[i]) total_loss = ReduceMean(in_layers=losses) tg = dc.models.TensorGraph(learning_rate=0.01) for output in outputs: tg.add_output(output) tg.set_loss(total_loss) tg.fit_generator( databag.iterbatches(epochs=1000, batch_size=tg.batch_size, pad_batches=True)) predictions = tg.predict_on_generator(databag.iterbatches()) for i in range(2): y_real = ys[i].X y_pred = predictions[i] assert_true(np.all(np.isclose(y_pred, y_real, atol=1.5)))
def test_weighted_combo(self): """Tests that weighted linear combinations can be built""" N = 10 n_features = 5 X1 = NumpyDataset(np.random.rand(N, n_features)) X2 = NumpyDataset(np.random.rand(N, n_features)) y = NumpyDataset(np.random.rand(N)) features_1 = Feature(shape=(None, n_features)) features_2 = Feature(shape=(None, n_features)) labels = Label(shape=(None,)) combo = WeightedLinearCombo(in_layers=[features_1, features_2]) out = ReduceSum(in_layers=[combo], axis=1) loss = ReduceSquareDifference(in_layers=[out, labels]) databag = Databag({features_1: X1, features_2: X2, labels: y}) tg = dc.models.TensorGraph(learning_rate=0.1, use_queue=False) tg.set_loss(loss) tg.fit_generator(databag.iterbatches(epochs=1))