def predict_proba_on_batch(self, X): if self.pad_batches: X = pad_features(self.batch_size, X) if not self._restored_model: self.restore() with self.eval_graph.graph.as_default(): # run eval data through the model n_tasks = self.n_tasks with self._get_shared_session(train=False).as_default(): feed_dict = self.construct_feed_dict(X) data = self._get_shared_session(train=False).run( self.eval_graph.output, feed_dict=feed_dict) batch_outputs = np.asarray(data[:n_tasks], dtype=float) # transfer 2D prediction tensor to 2D x n_classes(=2) complimentary = np.ones(np.shape(batch_outputs)) complimentary = complimentary - batch_outputs batch_outputs = np.concatenate([complimentary, batch_outputs], axis=batch_outputs.ndim - 1) # reshape to batch_size x n_tasks x ... if batch_outputs.ndim == 3: batch_outputs = batch_outputs.transpose((1, 0, 2)) elif batch_outputs.ndim == 2: batch_outputs = batch_outputs.transpose((1, 0)) else: raise ValueError( 'Unrecognized rank combination for output: %s ' % (batch_outputs.shape, )) outputs = batch_outputs return np.copy(outputs)
def predict_proba_on_batch(self, X): if self.pad_batches: X = pad_features(self.batch_size, X) if not self._restored_model: self.restore() with self.eval_graph.graph.as_default(): # run eval data through the model n_tasks = self.n_tasks with self._get_shared_session(train=False).as_default(): feed_dict = self.construct_feed_dict(X) data = self._get_shared_session(train=False).run( self.eval_graph.output, feed_dict=feed_dict) batch_outputs = np.asarray(data[:n_tasks], dtype=float) # transfer 2D prediction tensor to 2D x n_classes(=2) complimentary = np.ones(np.shape(batch_outputs)) complimentary = complimentary - batch_outputs batch_outputs = np.concatenate( [complimentary, batch_outputs], axis=batch_outputs.ndim - 1) # reshape to batch_size x n_tasks x ... if batch_outputs.ndim == 3: batch_outputs = batch_outputs.transpose((1, 0, 2)) elif batch_outputs.ndim == 2: batch_outputs = batch_outputs.transpose((1, 0)) else: raise ValueError('Unrecognized rank combination for output: %s ' % (batch_outputs.shape,)) outputs = batch_outputs return np.copy(outputs)
def predict_on_batch(self, X, pad_batch=False): """Return model output for the provided input. Restore(checkpoint) must have previously been called on this object. Args: dataset: dc.data.Dataset object. Returns: Tuple of three numpy arrays with shape n_examples x n_tasks (x ...): output: Model outputs. labels: True labels. weights: Example weights. Note that the output and labels arrays may be more than 2D, e.g. for classifier models that return class probabilities. Raises: AssertionError: If model is not in evaluation mode. ValueError: If output and labels are not both 3D or both 2D. """ len_unpadded = len(X) if pad_batch: X = pad_features(self.batch_size, X) if not self._restored_model: self.restore() with self.eval_graph.graph.as_default(): # run eval data through the model n_tasks = self.n_tasks outputs = [] with self._get_shared_session(train=False).as_default(): n_samples = len(X) feed_dict = self.construct_feed_dict(X) data = self._get_shared_session(train=False).run( self.eval_graph.output, feed_dict=feed_dict) batch_outputs = np.asarray(data[:n_tasks], dtype=float) # reshape to batch_size x n_tasks x ... if batch_outputs.ndim == 3: batch_outputs = batch_outputs.transpose((1, 0, 2)) elif batch_outputs.ndim == 2: batch_outputs = batch_outputs.transpose((1, 0)) # Handle edge case when batch-size is 1. elif batch_outputs.ndim == 1: n_samples = len(X) batch_outputs = batch_outputs.reshape((n_samples, n_tasks)) else: raise ValueError( 'Unrecognized rank combination for output: %s' % (batch_outputs.shape)) # Prune away any padding that was added batch_outputs = batch_outputs[:n_samples] outputs.append(batch_outputs) outputs = np.squeeze(np.concatenate(outputs)) outputs = np.copy(outputs) return outputs[:len_unpadded]
def default_generator(self, dataset, epochs=1, predict=False, deterministic=True, pad_batches=True): """ Same generator as Weave models """ for epoch in range(epochs): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches( batch_size=self.batch_size, deterministic=deterministic, pad_batches=False): X_b = pad_features(self.batch_size, X_b) feed_dict = dict() if y_b is not None: if self.mode == 'classification': feed_dict[self.labels[0]] = to_one_hot(y_b.flatten(), self.n_classes).reshape( -1, self.n_tasks, self.n_classes) else: feed_dict[self.labels[0]] = y_b if w_b is not None: feed_dict[self.task_weights[0]] = w_b atom_feat = [] pair_feat = [] atom_split = [] atom_to_pair = [] pair_split = [] start = 0 for im, mol in enumerate(X_b): n_atoms = mol.get_num_atoms() # number of atoms in each molecule atom_split.extend([im] * n_atoms) # index of pair features C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms)) atom_to_pair.append( np.transpose( np.array([C1.flatten() + start, C0.flatten() + start]))) # number of pairs for each atom pair_split.extend(C1.flatten() + start) start = start + n_atoms # atom features atom_feat.append(mol.get_atom_features()) # pair features pair_feat.append( np.reshape(mol.get_pair_features(), (n_atoms * n_atoms, self.n_pair_feat))) feed_dict[self.atom_features] = np.concatenate(atom_feat, axis=0) feed_dict[self.pair_features] = np.concatenate(pair_feat, axis=0) feed_dict[self.atom_split] = np.array(atom_split) feed_dict[self.atom_to_pair] = np.concatenate(atom_to_pair, axis=0) yield feed_dict
def predict_on_batch(self, X, pad_batch=True): """ Makes predictions on batch of data. """ if pad_batch: len_unpadded = len(X) Xpad = pad_features(self.model_instance.batch_size, X) return self.model_instance.predict_on_batch(Xpad)[:len_unpadded] else: return self.model_instance.predict_on_batch(X)
def default_generator(self, dataset, epochs=1, mode='fit', deterministic=True, pad_batches=True): for epoch in range(epochs): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(batch_size=self.batch_size, deterministic=deterministic, pad_batches=pad_batches): n_samples = np.array(X_b.shape[0]) X_b = pad_features(self.batch_size, X_b) if y_b is not None and self.mode == 'classification': y_b = to_one_hot(y_b.flatten(), self.n_classes).reshape( -1, self.n_tasks, self.n_classes) atom_feat = [] pair_feat = [] atom_split = [] atom_to_pair = [] pair_split = [] start = 0 for im, mol in enumerate(X_b): n_atoms = mol.get_num_atoms() # number of atoms in each molecule atom_split.extend([im] * n_atoms) # index of pair features C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms)) atom_to_pair.append( np.transpose( np.array( [C1.flatten() + start, C0.flatten() + start]))) # number of pairs for each atom pair_split.extend(C1.flatten() + start) start = start + n_atoms # atom features atom_feat.append(mol.get_atom_features()) # pair features pair_feat.append( np.reshape(mol.get_pair_features(), (n_atoms * n_atoms, self.n_pair_feat))) inputs = [ np.concatenate(atom_feat, axis=0), np.concatenate(pair_feat, axis=0), np.array(atom_split), np.concatenate(atom_to_pair, axis=0), n_samples ] yield (inputs, [y_b], [w_b])
def predict_proba_on_batch(self, X, n_classes=2): """Returns class probabilities on batch""" # run eval data through the model if self.pad_batches: X = pad_features(self.batch_size, X) n_tasks = self.n_tasks with self.sess.as_default(): feed_dict = self.construct_feed_dict(X) batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict) n_samples = len(X) outputs = np.zeros((n_samples, self.n_tasks, n_classes)) for task, output in enumerate(batch_outputs): outputs[:, task, :] = output return outputs
def predict_on_batch(self, X): """Return model output for the provided input. """ if self.pad_batches: X = pad_features(self.batch_size, X) # run eval data through the model n_tasks = self.n_tasks with self.sess.as_default(): feed_dict = self.construct_feed_dict(X) # Shape (n_samples, n_tasks) batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict) n_samples = len(X) outputs = np.zeros((n_samples, self.n_tasks)) for task, output in enumerate(batch_outputs): outputs[:, task] = np.argmax(output, axis=1) return outputs
def predict_proba_on_batch(self, X): """Return model output for the provided input. Restore(checkpoint) must have previously been called on this object. Args: dataset: dc.data.Dataset object. Returns: Tuple of three numpy arrays with shape n_examples x n_tasks (x ...): output: Model outputs. Note that the output arrays may be more than 2D, e.g. for classifier models that return class probabilities. Raises: AssertionError: If model is not in evaluation mode. ValueError: If output and labels are not both 3D or both 2D. """ if self.pad_batches: X = pad_features(self.batch_size, X) if not self._restored_model: self.restore() with self.eval_graph.graph.as_default(): # run eval data through the model n_tasks = self.n_tasks with self._get_shared_session(train=False).as_default(): feed_dict = self.construct_feed_dict(X) data = self._get_shared_session(train=False).run( self.eval_graph.output, feed_dict=feed_dict) batch_outputs = np.asarray(data[:n_tasks], dtype=float) # reshape to batch_size x n_tasks x ... if batch_outputs.ndim == 3: batch_outputs = batch_outputs.transpose((1, 0, 2)) elif batch_outputs.ndim == 2: batch_outputs = batch_outputs.transpose((1, 0)) else: raise ValueError( 'Unrecognized rank combination for output: %s ' % (batch_outputs.shape, )) # Note that softmax is already applied in construct_grpah outputs = batch_outputs return np.copy(outputs)
def predict_proba_on_batch(self, X, pad_batch=False): """Return model output for the provided input. Restore(checkpoint) must have previously been called on this object. Args: dataset: dc.data.Dataset object. Returns: Tuple of three numpy arrays with shape n_examples x n_tasks (x ...): output: Model outputs. Note that the output arrays may be more than 2D, e.g. for classifier models that return class probabilities. Raises: AssertionError: If model is not in evaluation mode. ValueError: If output and labels are not both 3D or both 2D. """ if pad_batch: X = pad_features(self.batch_size, X) if not self._restored_model: self.restore() with self.eval_graph.graph.as_default(): # run eval data through the model n_tasks = self.n_tasks with self._get_shared_session(train=False).as_default(): feed_dict = self.construct_feed_dict(X) data = self._get_shared_session(train=False).run( self.eval_graph.output, feed_dict=feed_dict) batch_outputs = np.asarray(data[:n_tasks], dtype=float) # reshape to batch_size x n_tasks x ... if batch_outputs.ndim == 3: batch_outputs = batch_outputs.transpose((1, 0, 2)) elif batch_outputs.ndim == 2: batch_outputs = batch_outputs.transpose((1, 0)) else: raise ValueError( 'Unrecognized rank combination for output: %s ' % (batch_outputs.shape,)) # Note that softmax is already applied in construct_grpah outputs = batch_outputs return np.copy(outputs)
def predict_on_batch(self, X, pad_batch=False): if pad_batch: X = pad_features(self.batch_size, X) if not self._restored_model: self.restore() with self.eval_graph.graph.as_default(): # run eval data through the model n_tasks = self.n_tasks output = [] start = time.time() with self._get_shared_session(train=False).as_default(): feed_dict = self.construct_feed_dict(X) data = self._get_shared_session(train=False).run( self.eval_graph.output, feed_dict=feed_dict) batch_output = np.asarray(data[:n_tasks], dtype=float) # transfer 2D prediction tensor to 2D x n_classes(=2) complimentary = np.ones(np.shape(batch_output)) complimentary = complimentary - batch_output batch_output = np.squeeze(np.stack(arrays = [complimentary, batch_output], axis = 2)) # reshape to batch_size x n_tasks x ... if batch_output.ndim == 3: batch_output = batch_output.transpose((1, 0, 2)) elif batch_output.ndim == 2: batch_output = batch_output.transpose((1, 0)) else: raise ValueError( 'Unrecognized rank combination for output: %s' % (batch_output.shape,)) output.append(batch_output) outputs = np.array(from_one_hot( np.squeeze(np.concatenate(output)), axis=-1)) return np.copy(outputs)
def predict_on_batch(self, X, pad_batch=False): if pad_batch: X = pad_features(self.batch_size, X) if not self._restored_model: self.restore() with self.eval_graph.graph.as_default(): # run eval data through the model n_tasks = self.n_tasks output = [] start = time.time() with self._get_shared_session(train=False).as_default(): feed_dict = self.construct_feed_dict(X) data = self._get_shared_session(train=False).run( self.eval_graph.output, feed_dict=feed_dict) batch_output = np.asarray(data[:n_tasks], dtype=float) # transfer 2D prediction tensor to 2D x n_classes(=2) complimentary = np.ones(np.shape(batch_output)) complimentary = complimentary - batch_output batch_output = np.squeeze( np.stack(arrays=[complimentary, batch_output], axis=2)) # reshape to batch_size x n_tasks x ... if batch_output.ndim == 3: batch_output = batch_output.transpose((1, 0, 2)) elif batch_output.ndim == 2: batch_output = batch_output.transpose((1, 0)) else: raise ValueError( 'Unrecognized rank combination for output: %s' % (batch_output.shape, )) output.append(batch_output) outputs = np.array( from_one_hot(np.squeeze(np.concatenate(output)), axis=-1)) return np.copy(outputs)
def default_generator(self, dataset, epochs=1, predict=False, deterministic=True, pad_batches=True): """ Same generator as Weave models """ for epoch in range(epochs): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(batch_size=self.batch_size, deterministic=deterministic, pad_batches=False): X_b = pad_features(self.batch_size, X_b) feed_dict = dict() if y_b is not None: if self.mode == 'classification': feed_dict[self.labels[0]] = to_one_hot( y_b.flatten(), self.n_classes).reshape(-1, self.n_tasks, self.n_classes) else: feed_dict[self.labels[0]] = y_b if w_b is not None: feed_dict[self.task_weights[0]] = w_b atom_feat = [] pair_feat = [] atom_split = [] atom_to_pair = [] pair_split = [] start = 0 for im, mol in enumerate(X_b): n_atoms = mol.get_num_atoms() # number of atoms in each molecule atom_split.extend([im] * n_atoms) # index of pair features C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms)) atom_to_pair.append( np.transpose( np.array( [C1.flatten() + start, C0.flatten() + start]))) # number of pairs for each atom pair_split.extend(C1.flatten() + start) start = start + n_atoms # atom features atom_feat.append(mol.get_atom_features()) # pair features pair_feat.append( np.reshape(mol.get_pair_features(), (n_atoms * n_atoms, self.n_pair_feat))) feed_dict[self.atom_features] = np.concatenate(atom_feat, axis=0) feed_dict[self.pair_features] = np.concatenate(pair_feat, axis=0) feed_dict[self.atom_split] = np.array(atom_split) feed_dict[self.atom_to_pair] = np.concatenate(atom_to_pair, axis=0) yield feed_dict