def test_save_txt_errors(self): """Check errors/warnings raised when saving bitstrings.""" import warnings from scipy.sparse import csr_matrix from e3fp.util import E3FPEfficiencyWarning from e3fp.fingerprint.db import FingerprintDatabase from e3fp.fingerprint.fprint import CountFingerprint from e3fp.fingerprint.util import E3FPInvalidFingerprintError array = np.array( [[1, 0, 0, 1, 1], [0, 0, 0, 1, 0], [0, 1, 1, 1, 1]], dtype=np.bool_ ) db = FingerprintDatabase.from_array( array, ["1", "2", "3"], fp_type=CountFingerprint ) desc, txt_file = tempfile.mkstemp(suffix=".txt.gz") os.close(desc) with self.assertRaises(E3FPInvalidFingerprintError): db.savetxt(txt_file) array = csr_matrix((3, 2 ** 15), dtype=np.bool_) db = FingerprintDatabase.from_array(array, ["1", "2", "3"]) with warnings.catch_warnings(record=True): warnings.simplefilter("error") with self.assertRaises(E3FPEfficiencyWarning): db.savetxt(txt_file) os.unlink(txt_file)
def test_create_from_array(self): from e3fp.fingerprint.fprint import Fingerprint, CountFingerprint from e3fp.fingerprint.db import FingerprintDatabase array = (np.random.uniform(0, 1, size=(10, 1024)) > 0.9).astype( np.uint16 ) fprints = [Fingerprint.from_vector(array[i, :]) for i in range(10)] fp_names = [] for i, fp in enumerate(fprints): name = str(i) fp.name = name fp.level = 5 fp_names.append(name) db1 = FingerprintDatabase( fp_type=CountFingerprint, level=5, name="Test" ) db1.add_fingerprints(fprints) db2 = FingerprintDatabase.from_array( array, fp_names, level=5, name="Test" ) self.assertEqual(db1.fp_type, db2.fp_type) np.testing.assert_array_equal( db1.array.todense().getA(), db2.array.todense().getA() )
def test_load_efficiency_warning(self): import warnings from e3fp.util import E3FPEfficiencyWarning from e3fp.fingerprint.db import FingerprintDatabase import scipy array = (np.random.uniform(0, 1, size=(10, 1024)) > 0.9).astype( np.uint16 ) fp_names = [] for i in range(array.shape[0]): fp_names.append(str(i)) db = FingerprintDatabase.from_array( array, fp_names=fp_names, level=5, props={"index": range(10)} ) desc, db_file = tempfile.mkstemp(suffix=".fpz") os.close(desc) db.savez(db_file) with warnings.catch_warnings(record=True): warnings.simplefilter("error") scipy.__version__ = "0.19" with self.assertRaises(E3FPEfficiencyWarning): db.load(db_file) scipy.__version__ = "1.0" db.load(db_file) os.unlink(db_file)
def test_save_txt(self): """Ensure bitstrings saved to txt correctly.""" from e3fp.fingerprint.db import FingerprintDatabase from python_utilities.io_tools import smart_open array = np.array( [[1, 0, 0, 1, 1], [0, 0, 0, 1, 0], [0, 1, 1, 1, 1]], dtype=np.bool_ ) db = FingerprintDatabase.from_array(array, ["1", "2", "3"]) desc, txt_file = tempfile.mkstemp(suffix=".txt.gz") os.close(desc) db.savetxt(txt_file) exp_bitstring = b"10011 1\n00010 2\n01111 3\n" with smart_open(txt_file, "r") as f: bitstring = f.read() self.assertEqual(bitstring, exp_bitstring) os.unlink(txt_file) desc, txt_file = tempfile.mkstemp(suffix=".txt.gz") os.close(desc) db.savetxt(txt_file, with_names=False) exp_bitstring = b"10011\n00010\n01111\n" with smart_open(txt_file, "r") as f: bitstring = f.read() self.assertEqual(bitstring, exp_bitstring) os.unlink(txt_file)
def test_get_db_subset(self): from e3fp.fingerprint.db import FingerprintDatabase array = (np.random.uniform(0, 1, size=(10, 1024)) > .9).astype( np.uint16) fp_names = [] for i in range(array.shape[0]): fp_names.append(str(i)) db = FingerprintDatabase.from_array(array, fp_names=fp_names, level=5) sub_db = db.get_subset(fp_names[:-2]) self.assertEqual(db.level, sub_db.level) self.assertEqual(db.fp_type, sub_db.fp_type) self.assertEqual((db.array[:-2, :] - sub_db.array).nnz, 0)
def test_fingerprint_has_props(self): from e3fp.fingerprint.db import FingerprintDatabase array = (np.random.uniform(0, 1, size=(10, 1024)) > 0.9).astype( np.uint16 ) fp_names = [str(i) for i in range(10)] indices = [float(i) for i in range(10)] db = FingerprintDatabase.from_array( array, fp_names, level=5, name="Test", props={"index": indices} ) for i, x in enumerate(fp_names): self.assertEqual(db[x][0].get_prop("index"), indices[i])
def test_lookup(self): from e3fp.fingerprint.fprint import Fingerprint from e3fp.fingerprint.db import FingerprintDatabase db = FingerprintDatabase(name="TestDB") array = (np.random.uniform(0, 1, size=(2, 1024)) > .9).astype(np.bool_) fp_names = [] for i in range(array.shape[0]): fp_names.append("fp" + str(i)) db = FingerprintDatabase.from_array(array, fp_names, name="Test") for i in range(array.shape[0]): self.assertEqual(Fingerprint.from_vector(array[i, :]), db[i]) self.assertEqual(Fingerprint.from_vector(array[i, :]), db[db.fp_names[i]][0])
def test_concat_dbs(self): from e3fp.fingerprint.db import concat, FingerprintDatabase array = (np.random.uniform(0, 1, size=(10, 1024)) > .9).astype( np.uint16) fp_names = [str(i) for i in range(10)] indices = [float(i) for i in range(10)] dbs = [] for i in range(10)[::2]: db = FingerprintDatabase.from_array( array[i:i + 2, :], fp_names[i:i + 2], level=5, name="Test", props={"index": indices[i:i + 2]}) dbs.append(db) join_db = concat(dbs) np.testing.assert_array_equal(join_db.array.todense().getA(), array)
def test_roundtrip_zlib(self): """Ensure DB is the same after saving with savez and loading.""" from e3fp.fingerprint.db import FingerprintDatabase array = (np.random.uniform(0, 1, size=(10, 1024)) > .9).astype( np.uint16) fp_names = [] for i in range(array.shape[0]): fp_names.append(str(i)) db = FingerprintDatabase.from_array(array, fp_names=fp_names, level=5, props={"index": range(10)}) desc, db_file = tempfile.mkstemp(suffix=".fpz") os.close(desc) db.savez(db_file) db2 = db.load(db_file) os.unlink(db_file) self.assertEqual(db, db2) self.assertListEqual(db2.get_prop("index").tolist(), list(range(10)))