def test_repos(mocked, tmpdir): db_path = _run_repos(tmpdir) db = sqlite_utils.Database(db_path) assert db.table_names() == [ "users", "licenses", "repos", "licenses_fts", "licenses_fts_data", "licenses_fts_idx", "licenses_fts_docsize", "licenses_fts_config", "repos_fts", "repos_fts_data", "repos_fts_idx", "repos_fts_docsize", "repos_fts_config", "users_fts", "users_fts_data", "users_fts_idx", "users_fts_docsize", "users_fts_config", ] assert db["repos"].count == 1 repo = next(iter(db["repos"].rows)) assert repo["full_name"] == "dogsheep/github-to-sqlite" assert repo["readme"] == "# This is the README" assert repo["readme_html"] is not None
def test_redact(connection, tmpdir, cli_runner): db_path = str(tmpdir / "test_redact.db") result = cli_runner( [ connection, db_path, "--all", "--redact", "products", "name", "--redact", "products", "vendor_id", ] ) assert 0 == result.exit_code, (result.output, result.exception) db = sqlite_utils.Database(db_path) assert [ {"id": 1, "name": "***", "cat_id": 1, "vendor_id": "***"}, {"id": 2, "name": "***", "cat_id": 1, "vendor_id": "***"}, ] == list(db["products"].rows) assert [ ForeignKey( table="products", column="cat_id", other_table="categories", other_column="id", ) ] == sorted(db["products"].foreign_keys)
def update_live_config_db(datasette, database_name, table_name, data): assert database_name and table_name, "Database and table names blank!" assert isinstance(data, str) database_path = get_db_path(datasette) db = sqlite_utils.Database(sqlite3.connect(database_path)) configs = db[TABLE_NAME] existing = configs.rows_where( "database_name=? and table_name=?", (database_name, table_name), ) if not len(list(existing)): configs.insert( { "database_name": database_name, "table_name": table_name, "data": data, }, pk=("database_name", "table_name"), replace=True) configs.create_index([ "database_name", "table_name", ], unique=True) else: configs.update((database_name, table_name), { "data": data, })
def drop_view(path, view): "Drop the specified view" db = sqlite_utils.Database(path) if view in db.view_names(): db[view].drop() else: raise click.ClickException('View "{}" does not exist'.format(view))
def update_readme(): db = sqlite_utils.Database(ROOT_PATH / "til.db") by_topic = {} for row in db["til"].rows_where(order_by="topic"): by_topic.setdefault(row["topic"], []).append(row) index = ["<!-- TILs start -->"] for topic, rows in by_topic.items(): index.append("## {}\n".format(topic)) for row in rows: index.append("- [{}]({}) ({})".format( row["title"], row["url"], row["updated"].split("T")[0])) index.append("") if index[-1] == "": index.pop() index.append("<!-- TILs end -->") if "--rewrite" in sys.argv: readme = ROOT_PATH / "README.md" index_txt = "\n".join(index).strip() readme_contents = readme.open().read() rewritten = INDEX_REGEX.sub(index_txt, readme_contents) rewritten = COUNT_REGEX.sub(COUNT_TEMPLATE.format(db["til"].count), rewritten) readme.open("w").write(rewritten) else: print("\n".join(index))
def subscriptions(db_path, auth): """Download feedly goals for the authenticated user""" db = sqlite_utils.Database(db_path) try: data = json.load(open(auth)) token = data["developer_token"] except (KeyError, FileNotFoundError): utils.error( "Cannot find authentication data, please run `feedly_to_sqlite auth`!" ) click.echo("Downloading subscriptions") r = requests.get( FEEDLY_API_URL + "/v3/collections", headers={"Authorization": "Bearer {}".format(token)}, ) r.raise_for_status() collections = r.json() for coll in collections: feeds = coll["feeds"] coll_id = coll["id"] coll_data = {k: coll.get(k) for k in COLLECTION_KEYS} db["collections"].upsert(coll_data, pk="id") for f in feeds: feed_data = {k: f.get(k) for k in FEED_KEYS} db["collections"].update(coll_id).m2m(db.table("feeds", pk="id"), feed_data)
def create_table(path, table, columns, pk, not_null, default, fk, ignore, replace): "Add an index to the specified table covering the specified columns" db = sqlite_utils.Database(path) if len(columns) % 2 == 1: raise click.ClickException( "columns must be an even number of 'name' 'type' pairs") coltypes = {} columns = list(columns) while columns: name = columns.pop(0) ctype = columns.pop(0) if ctype.upper() not in VALID_COLUMN_TYPES: raise click.ClickException( "column types must be one of {}".format(VALID_COLUMN_TYPES)) coltypes[name] = ctype.upper() # Does table already exist? if table in db.table_names(): if ignore: return elif replace: db[table].drop() else: raise click.ClickException( 'Table "{}" already exists. Use --replace to delete and replace it.' .format(table)) db[table].create(coltypes, pk=pk, not_null=not_null, defaults=dict(default), foreign_keys=fk)
def user(db_path, username): "Fetch all content submitted by this user" db = sqlite_utils.Database(db_path) ensure_tables(db) user = requests.get( "https://hacker-news.firebaseio.com/v0/user/{}.json".format(username) ).json() submitted = user.pop("submitted", None) or [] with db.conn: db["users"].upsert( user, column_order=("id", "created", "karma", "about"), pk="id" ) # Only do IDs we have not yet fetched done = set() if "items" in db.table_names(): done = set( r[0] for r in db.conn.execute( "select id from items where id in ({})".format( ", ".join(map(str, submitted)) ) ).fetchall() ) to_do = [id for id in submitted if id not in done] for id in tqdm.tqdm(to_do, desc="Importing items"): item = requests.get( "https://hacker-news.firebaseio.com/v0/item/{}.json".format(id) ).json() with db.conn: db["items"].upsert( item, column_order=("id", "type", "by", "time"), pk="id", alter=True ) ensure_fts(db)
def test_cli_import_zip_file(import_test_zip): tmpdir, archive = import_test_zip output = str(tmpdir / "output.db") result = CliRunner().invoke(cli.cli, ["import", output, archive]) assert 0 == result.exit_code, result.stdout db = sqlite_utils.Database(output) assert_imported_db(db)
def test_fts(db_and_path, tmpdir, fts_version): db, path = db_and_path db["dogs"].enable_fts(["name"], fts_version=fts_version) conn = sqlite3.connect(path) lines = list(iterdump(conn)) db2_path = str(tmpdir / "restored.db") conn2 = sqlite3.connect(db2_path) conn2.executescript("\n".join(lines)) db2 = sqlite_utils.Database(conn2) assert list(db2["dogs"].rows) == [ { "id": 1, "name": "Cleo", "age": 5 }, { "id": 2, "name": "Pancakes", "age": 4 }, ] assert db2["dogs"].detect_fts() == "dogs_fts" assert db2["dogs_fts"].schema == ( "CREATE VIRTUAL TABLE [dogs_fts] USING {} (\n".format(fts_version) + " [name],\n" + " content=[dogs]\n" + " )")
def fetch_projects(db_path, big_local_token, contact, skip): db = sqlite_utils.Database(db_path) # Drop uri and uriType columns if they exist if db["files"].exists() and "uri" in db["files"].columns_dict: db["files"].transform(drop={"uri", "uriType"}) response = requests.post( "https://api.biglocalnews.org/graphql", json={"query": graphql_query}, headers={"Authorization": "JWT {}".format(big_local_token)}, ) assert 200 == response.status_code, response.status_code data = response.json() for edge in data["data"]["openProjects"]["edges"]: project = edge["node"] files = project.pop("files") if contact and project["contact"] not in contact: continue if project["id"] in skip: continue db["projects"].insert(project, pk="id", replace=True) if files: db["files"].upsert_all( [ dict(project=project["id"], ext=fileinfo["name"].split(".")[-1], **fileinfo) for fileinfo in files ], pk=("project", "name"), foreign_keys=("project", ), ) # If there's a README, download it try: db["projects"].add_column("readme_markdown", str) except Exception: pass
def test_alters_if_necessary(tmpdir): db_path = tmpdir / "db.db" assert ( 0 == CliRunner() .invoke(cli.cli, [str(db_path), "items", "-"], input=TEST_YAML) .exit_code ) more_input = textwrap.dedent( """ - name: some-other-thing new_column: A new column """ ) assert ( 0 == CliRunner() .invoke(cli.cli, [str(db_path), "items", "-"], input=more_input) .exit_code ) db = sqlite_utils.Database(str(db_path)) assert db["items"].columns_dict == { "name": str, "url": str, "nested_with_date": str, "new_column": str, }
def test_sql_query(connection, tmpdir, cli_runner): db_path = str(tmpdir / "test_sql.db") # Without --output it throws an error result = cli_runner( [connection, db_path, "--sql", "select name, cat_id from products"]) assert 0 != result.exit_code assert "Error: --sql must be accompanied by --output" == result.output.strip( ) # With --output it does the right thing result = cli_runner([ connection, db_path, "--sql", "select name, cat_id from products", "--output", "out", ]) assert 0 == result.exit_code, result.output db = sqlite_utils.Database(db_path) assert {"out"} == set(db.table_names()) assert [ { "name": "Bobcat Statue", "cat_id": 1 }, { "name": "Yoga Scarf", "cat_id": 1 }, ] == list(db["out"].rows)
def test_db_to_sqlite(connection, tmpdir, cli_runner): db_path = str(tmpdir / "test.db") cli_runner([connection, db_path, "--all"]) db = sqlite_utils.Database(db_path) assert {"categories", "products", "vendors"} == set(db.table_names()) assert [ { "id": 1, "name": "Bobcat Statue", "cat_id": 1, "vendor_id": 1 }, { "id": 2, "name": "Yoga Scarf", "cat_id": 1, "vendor_id": None }, ] == list(db["products"].rows) assert [{"id": 1, "name": "Junk"}] == list(db["categories"].rows) assert [ ForeignKey( table="products", column="cat_id", other_table="categories", other_column="id", ), ForeignKey( table="products", column="vendor_id", other_table="vendors", other_column="id", ), ] == sorted(db["products"].foreign_keys)
def index_foreign_keys(path, load_extension): """ Ensure every foreign key column has an index on it. """ db = sqlite_utils.Database(path) _load_extensions(db, load_extension) db.index_foreign_keys()
def test_cli_import_folder(tmpdir, zip_contents_path): output = str(tmpdir / "output.db") result = CliRunner().invoke( cli.cli, ["import", output, str(zip_contents_path)]) assert 0 == result.exit_code, result.stdout db = sqlite_utils.Database(output) assert_imported_db(db)
def update_readme(c, rewrite=False): db = sqlite_utils.Database(db_path) by_topic = {} for row in db["til"].rows_where(order_by="title"): by_topic.setdefault(row["topic"], []).append(row) index = ["<!-- index starts -->"] for topic, rows in sorted(by_topic.items()): index.append(f"## {topic}\n") for row in rows: date = row["created"].split("T")[0] index.append(f"- [{row['title']}]({row['path']}) - {date}") index.append("") if index[-1] == "": index.pop() index.append("<!-- index ends -->") if rewrite: readme = root / "README.md" readme_contents = readme.open().read() index_txt = "\n".join(index).strip() rewritten = index_re.sub(index_txt, readme_contents) rewritten = count_re.sub(format_count(db["til"].count), rewritten) readme.open("w").write(rewritten) else: print("\n".join(index))
def test_recipe_jsonsplit(tmpdir, delimiter): db_path = str(pathlib.Path(tmpdir) / "data.db") db = sqlite_utils.Database(db_path) db["example"].insert_all( [ { "id": 1, "tags": (delimiter or ",").join(["foo", "bar"]) }, { "id": 2, "tags": (delimiter or ",").join(["bar", "baz"]) }, ], pk="id", ) code = "r.jsonsplit(value)" if delimiter: code = 'recipes.jsonsplit(value, delimiter="{}")'.format(delimiter) args = ["convert", db_path, "example", "tags", code] result = CliRunner().invoke(cli.cli, args) assert 0 == result.exit_code, result.output assert list(db["example"].rows) == [ { "id": 1, "tags": '["foo", "bar"]' }, { "id": 2, "tags": '["bar", "baz"]' }, ]
def update_readme(repo_path, rewrite): """Iterate the MD files and update the main README.md file""" index_re = re.compile( r"<!\-\- index starts \-\->.*<!\-\- index ends \-\->", re.DOTALL) til_db = sqlite_utils.Database(repo_path / "til.db") by_topic = {} for row in til_db["til"].rows_where(order_by="created_utc"): by_topic.setdefault(row["topic"], []).append(row) index = ["<!-- index starts -->"] for topic, rows in by_topic.items(): index.append("## {}\n".format(topic)) for row in rows: index.append("* [{title}]({url}) - {date}".format( date=row["created"].split("T")[0], **row)) index.append("") if index[-1] == "": index.pop() index.append("<!-- index ends -->") if rewrite: readme = repo_path / "README.md" index_txt = "\n".join(index).strip() readme_contents = readme.open().read() readme.open("w").write(index_re.sub(index_txt, readme_contents)) else: print("\n".join(index))
def build_database(repo_path): all_times = created_changed_times(repo_path) db = sqlite_utils.Database(repo_path / "til.db") table = db.table("til", pk="path") with open("topics.yaml") as f: topics = yaml.load(f) for filepath in root.glob("*/*.md"): fp = filepath.open() title = fp.readline().lstrip("#").strip() body = fp.read().strip() path = str(filepath.relative_to(root)) topic_dir = path.split("/")[0] topic_name = topics.get(topic_dir, topic_dir) url = "https://github.com/chunkaichang/til/blob/master/{}".format(path) record = { "path": path.replace("/", "_"), "topic": topic_name, "title": title, "url": url, "body": body, } record.update(all_times[path]) table.insert(record) if "til_fts" not in db.table_names(): table.enable_fts(["title", "body"])
def drop_table(path, table): "Drop the specified table" db = sqlite_utils.Database(path) if table in db.table_names(): db[table].drop() else: raise click.ClickException('Table "{}" does not exist'.format(table))
def plays(database, auth, since, since_date): if since and since_date: raise click.UsageError("use either --since or --since-date, not both") db = sqlite_utils.Database(database) if since and db["plays"].exists: since_date = db.conn.execute( "select max(timestamp) from plays").fetchone()[0] if since_date: since_date = dateutil.parser.parse(since_date) auth = json.load(open(auth)) network = lastfm.get_network( auth["lastfm_network"], key=auth["lastfm_api_key"], secret=auth["lastfm_shared_secret"], ) user = network.get_user(auth["lastfm_username"]) playcount = user.get_playcount() history = lastfm.recent_tracks(user, since_date) # FIXME: the progress bar is wrong if there's a since_date with click.progressbar(history, length=playcount, label="Importing plays", show_pos=True) as progress: for track in progress: lastfm.save_artist(db, track["artist"]) lastfm.save_album(db, track["album"]) lastfm.save_track(db, track["track"]) lastfm.save_play(db, track["play"])
def build_database(repo_path): all_times = created_changed_times(repo_path) db = sqlite_utils.Database(ROOT_PATH / "til.db") table = db.table("til", pk="path") for filepath in ROOT_PATH.glob("*/*.md"): fp = filepath.open() title = fp.readline().lstrip("#").strip() body = fp.read().strip() path = str(filepath.relative_to(ROOT_PATH)) url = "https://github.com/fosdickio/til/blob/main/{}".format(path) record = { "path": path, "topic": path.split("/")[0], "title": title, "url": url, "body": body } if path in all_times: record.update(all_times[path]) else: raise RuntimeError("{} not found in all_time.".format(path)) with db.conn: table.upsert(record, alter=True) if "til_fts" not in db.table_names(): table.enable_fts(["title", "body"])
def main( target: str = typer.Argument(str, help="u/username or r/subreddit to collect"), auth: Path = typer.Option( Path("~/.config/reddit-to-sqlite.json"), help="File to retrieve/save Reddit auth", ), db: Path = typer.Option(Path("reddit.db"), help="database file"), post_reload: int = typer.Option(7, help="Age of posts to reload (days)"), comment_reload: int = typer.Option(7, help="Age of posts to reload (days)"), verbose: int = typer.Option(0, "--verbose", "-v", count=True, help="More logging"), ): """Load posts and comments from Reddit to sqlite.""" set_loglevel(verbosity=verbose) reddit = reddit_instance(get_auth(auth.expanduser())) saver, save_me = interpret_target(target) database = sqlite_utils.Database(db.expanduser()) saver( database, reddit, save_me, post_reload_sec=post_reload * SECONDS_IN_DAY, comment_reload_sec=comment_reload * SECONDS_IN_DAY, ), ITEM_VIEW_DEF = (Path(__file__).parent / "view_def.sql").read_text() database.create_view("items", ITEM_VIEW_DEF, replace=True) setup_ddl(database)
def build_database(repo_path): all_times = created_changed_times(repo_path) db = sqlite_utils.Database(repo_path / "til.db") table = db.table("til", pk="path") for filepath in root.glob("*/*.md"): fp = filepath.open() title = fp.readline().lstrip("#").strip() body = fp.read().strip() path = str(filepath.relative_to(root)) # Original https://github.com/nryberg/til/{} url = "https://github.com/nryberg/til/blob/master/{}".format(path) record = { "path": path.replace("/", "_"), "topic": path.split("/")[0], "title": title, "url": url, "body": body, } print("Path: " + path) # Iterate over key/value pairs in dict and print them for key, value in all_times.items(): print('dict:', key, ' : ', value) record.update(all_times[path]) table.insert(record) if "til_fts" not in db.table_names(): table.enable_fts(["title", "body"])
def test_location_history(): path = pathlib.Path(__file__).parent / "zip_contents" zf = create_zip(path) db = sqlite_utils.Database(memory=True) save_location_history(db, zf) assert {"location_history"} == set(db.table_names()) location_history = list(sorted(db["location_history"].rows, key=lambda r: r["id"])) assert [ { "id": "2015-07-18T23:57:26.012000-bc0cdf", "latitude": 37.6955289, "longitude": -121.9287261, "accuracy": 45, "timestampMs": 1437263846012, "when": "2015-07-18T23:57:26.012000", }, { "id": "2015-07-18T23:58:25.529000-138e18", "latitude": 37.6955454, "longitude": -121.9287454, "accuracy": 43, "timestampMs": 1437263905529, "when": "2015-07-18T23:58:25.529000", }, ] == location_history
def create_index(path, table, column, name, unique, if_not_exists): "Add an index to the specified table covering the specified columns" db = sqlite_utils.Database(path) db[table].create_index(column, index_name=name, unique=unique, if_not_exists=if_not_exists)
def triggers( ctx, path, tables, nl, arrays, csv, tsv, no_headers, table, fmt, json_cols, load_extension, ): "Show triggers configured in this database" sql = "select name, tbl_name as [table], sql from sqlite_master where type = 'trigger'" if tables: quote = sqlite_utils.Database(memory=True).quote sql += " and [table] in ({})".format( ", ".join(quote(table) for table in tables) ) ctx.invoke( query, path=path, sql=sql, nl=nl, arrays=arrays, csv=csv, tsv=tsv, no_headers=no_headers, table=table, fmt=fmt, json_cols=json_cols, load_extension=load_extension, )
def issue_comments(db_path, repo, issue, auth): "Retrieve issue comments for a specific repository" db = sqlite_utils.Database(db_path) token = load_token(auth) for comment in utils.fetch_issue_comments(repo, token, issue): utils.save_issue_comment(db, comment) utils.ensure_db_shape(db)
def create_db(tmp_path_factory): db_directory = tmp_path_factory.mktemp("dbs") db_path = db_directory / "test.db" db = sqlite_utils.Database(db_path) db["dogs"].insert_all( [ { "id": 1, "name": "Cleo", "age": 5, "status": "good dog" }, { "id": 2, "name": "Pancakes", "age": 4, "status": "bad dog" }, { "id": 3, "name": "Fido", "age": 3, "status": "bad dog" }, { "id": 4, "name": "Scratch", "age": 3, "status": "good dog" }, ], pk="id", ) return db_path