def test_scissors_tanimotos(self): """Test default Tanimoto approximation.""" basis = np.random.randint(self.n_mols, size=200) bb_ip = self.data['ab_overlap'][basis][:, basis] lb_ip = self.data['ab_overlap'][:, basis] s = SCISSORS(bb_ip) tanimotos = s.get_tanimotos(lb_ip, max_dim=100) assert_scissors(tanimotos, self.data['tanimotos'])
def test_scissors_tanimotos_with_overlaps(self): """ Test Tanimoto approximation using precalculated self-overlap values. """ basis = np.random.randint(self.n_mols, size=200) bb_ip = self.data['ab_overlap'][basis][:, basis] lb_ip = self.data['ab_overlap'][:, basis] s = SCISSORS(bb_ip) tanimotos = s.get_tanimotos(lb_ip, self_overlap=self.data['a_overlap'], max_dim=100) assert_scissors(tanimotos, self.data['tanimotos'])
def test_parsimonious_scissors_tanimotos(self): """ Test default Tanimoto approximation using parsimonious overlap values. """ basis = np.random.randint(self.n_mols, size=200) bb_ip = self.data['tanimotos'][basis][:, basis] bb_ip = SCISSORS.get_inner_products_from_tanimotos(bb_ip) lb_ip = self.data['tanimotos'][:, basis] lb_ip = SCISSORS.get_inner_products_from_tanimotos(lb_ip) s = SCISSORS(bb_ip) tanimotos = s.get_tanimotos(lb_ip, max_dim=100) assert_scissors(tanimotos, self.data['tanimotos'])
def test_parsimonious_scissors_tanimotos_with_overlaps(self): """ Test Tanimoto approximation using parsimonious overlap values and precalculated self-overlap values. """ basis = np.random.randint(self.n_mols, size=200) bb_ip = self.data['tanimotos'][basis][:, basis] bb_ip = SCISSORS.get_inner_products_from_tanimotos(bb_ip) lb_ip = self.data['tanimotos'][:, basis] lb_ip = SCISSORS.get_inner_products_from_tanimotos(lb_ip) s = SCISSORS(bb_ip) tanimotos = s.get_tanimotos(lb_ip, self_overlap=np.ones(lb_ip.shape[0], dtype=float), max_dim=100) assert_scissors(tanimotos, self.data['tanimotos'])
def load(filename, overlap): """ Load ROCS data from HDF5. Parameters ---------- filename : str File containing ROCS overlay results. overlap : bool Whether to use actual pairwise overlaps as inner products. If False, use overlaps calculated under the parsimonious assumption of unity molecular self-overlap values. """ with h5py.File(filename) as f: if overlap: shape_ip = f['shape_overlap'][:] color_ip = f['color_overlap'][:] else: shape_ip = SCISSORS.get_inner_products_from_tanimotos( f['shape_tanimoto'][:]) color_ip = SCISSORS.get_inner_products_from_tanimotos( f['color_tanimoto'][:]) return shape_ip, color_ip
def main(): # load input data shape_bb_ip, color_bb_ip = load(args.bb, args.overlap) shape_lb_ip, color_lb_ip = load(args.lb, args.overlap) if args.transpose: shape_lb_ip = shape_lb_ip.T color_lb_ip = color_lb_ip.T # setup dimensionality shape_dim = None color_dim = None if args.dim: shape_dim = args.dim color_dim = args.dim if args.shape_dim: shape_dim = args.shape_dim if args.color_dim: color_dim = args.color_dim # generate SCISSORS vectors shape_s = SCISSORS(shape_bb_ip) shape_vectors = shape_s.get_vectors(shape_lb_ip, shape_dim) color_s = SCISSORS(color_bb_ip) color_vectors = color_s.get_vectors(color_lb_ip, color_dim) data = {'shape_vectors': shape_vectors, 'shape_projection_matrix': shape_s.get_projection_matrix(), 'shape_eigenvalues': shape_s.get_eigenvalues(), 'color_vectors': color_vectors, 'color_eigenvalues': color_s.get_eigenvalues(), 'color_projection_matrix': color_s.get_projection_matrix()} if args.y: with open(args.y) as f: y = cPickle.load(f) data['y'] = y save(data, args.output, attrs=vars(args))