def merge_grid_cover(final_db, other_dbs): if not isfile(final_db): copyfile(other_dbs[0], final_db) other_dbs = other_dbs[1:] final_db = GridCoverDB.load_from_filepath(final_db) for other_db_filename in other_dbs: other_db = GridCoverDB.load_from_filepath(other_db_filename) final_db.load_other(other_db)
def cli_dump_kmers(outfile, cluster_ids, grid_cover): grid = GridCoverDB.load_from_filepath(grid_cover) for centroid_index, kmer in grid.get_kmers(): if cluster_ids: print(f'{centroid_index},{kmer}', file=outfile) else: print(kmer, file=outfile)
def cli_dump_kmers(outfile, grid_cover): grid = GridCoverDB.load_from_filepath(grid_cover) counts = {} for centroid_index, _ in grid.get_kmers(): counts[centroid_index] = 1 + counts.get(centroid_index, 0) for centroid_index, count in counts.items(): print(f'{centroid_index},{count}', file=outfile)
def build_grid_cover(grid_db): db = GridCoverDB.load_from_filepath(grid_db) start = time() n_centers = db.centroids().shape[0] with click.progressbar(list(range(n_centers))) as centroid_ids: for centroid_id in centroid_ids: db.build_and_store_bloom_grid(centroid_id) db.close() add_time = time() - start click.echo(f'Built {n_centers} bloom filters in {add_time:.5}s.', err=True)
def cli_dump_kmers(grid_cover): click.echo(grid_cover) grid = GridCoverDB.load_from_filepath(grid_cover) n_centers = grid.centroids().shape[0] click.echo(f'centers\t{n_centers}') n_kmers = len(grid.get_kmers()) click.echo(f'kmers\t{n_kmers}') box_side = grid.box_side_len click.echo(f'box_side\t{box_side}') dims = grid.ramifier.d click.echo(f'dims\t{dims}')
def test_save_and_reload(self): DB_SAVE_TEMP_FILE = join(dirname(__file__), 'temp.db_save_temp.sqlite') ramifier = RotatingRamifier.from_file(4, KMER_ROTATION) db = GridCoverDB(sqlite3.connect(DB_SAVE_TEMP_FILE), ramifier=ramifier, box_side_len=0.5) db.py_add_point_to_cluster(np.array([0., 0., 0., 0.]), KMER_31) db.close() del db db = GridCoverDB.load_from_filepath(DB_SAVE_TEMP_FILE) members = db.py_get_cluster_members(0) self.assertEqual(len(members), 1) self.assertIn(KMER_31, [reverse_convert_kmer(member) for member in members]) remove(DB_SAVE_TEMP_FILE)
def cli_dump_kmers(outfile, grid_cover): grid = GridCoverDB.load_from_filepath(grid_cover) pd.DataFrame(grid.centroids()).to_csv(outfile, header=None, index=None)