def setup(): """Creates and sets up git repository.""" with app.app_context(): click.echo(f"Creating new git repo in {app.config['USER_DIR']}...") repo = gitpython.Repo.init(app.config["USER_DIR"]) branch = click.prompt("Main branch", type=str, default="main") repo.index.add("data/") repo.index.commit("Initial commit") repo.active_branch.rename(branch) while True: remote_url = click.prompt( "Enter the url of the remote you'd like to sync to. " "Ex: https://github.com/archivy/archivy", type=str, ) username = click.prompt("Enter your username", type=str) password = click.prompt("Enter your personal access token", type=str, hide_input=True) remote_url = remote_url.replace("https://", f"https://{username}:{password}@") origin = repo.create_remote("origin", remote_url) if origin.exists(): break click.echo("Remote does not exist.") origin.push(branch) origin.fetch() repo.active_branch.set_tracking_branch(getattr(origin.refs, branch)) click.echo("Successfully setup repository.")
def test_app(): """Instantiate the app for each test with its own temporary data directory Each test using this fixture will use its own db.json and its own data directory, and then delete them. """ # create a temporary file to isolate the database for each test app_dir = tempfile.mkdtemp() app.config['APP_PATH'] = app_dir data_dir = os.path.join(app_dir, "data") os.mkdir(data_dir) app.config['TESTING'] = True app.config["WTF_CSRF_ENABLED"] = False # This setups a TinyDB instance, using the `app_dir` temporary # directory defined above # Required so that `flask.current_app` can be called in data.py and # models.py # See https://flask.palletsprojects.com/en/1.1.x/appcontext/ for more # information. with app.app_context(): _ = get_db() yield app # close and remove the temporary database shutil.rmtree(app_dir)
def complete(): with app.app_context(): db = get_db() try: pocket = db.search(Query().type == "pocket_key")[0] except: click.echo("Key not found") return auth_data = { "consumer_key": pocket["consumer_key"], "code": pocket["code"] } resp = requests.post( "https://getpocket.com/v3/oauth/authorize", json=auth_data, headers={ "X-Accept": "application/json", "Content-Type": "application/json", }, ) db.update( operations.set("access_token", resp.json()["access_token"]), Query().type == "pocket_key", ) click.echo( "Successfully completed auth process, you can now run archivy pocket sync to load the data" )
def auth(api_key): with app.app_context(): db = get_db() pocket = Query() request_data = { "consumer_key": api_key, "redirect_uri": "https://getpocket.com", } resp = requests.post( "https://getpocket.com/v3/oauth/request", json=request_data, headers={ "X-Accept": "application/json", "Content-Type": "application/json", }, ) new_data = { "type": "pocket_key", "consumer_key": api_key, "code": resp.json()["code"], } if db.search(pocket.type == "pocket_key"): db.update(new_data, pocket.type == "pocket_key") else: db.insert(new_data) click.echo( f"Allow archivy_pocket to retrieve data to your pocket account " f"by visiting https://getpocket.com/auth/authorize?request_token={resp.json()['code']}" f"&redirect_uri=https://getpocket.com")
def setup(author, location): """Save metadata values.""" with app.app_context(): # save data in db get_db().insert({ "type": "metadata", "author": author, "location": location }) click.echo("Metadata saved!")
def sync(force): with app.app_context(): db = get_db() # update pocket dictionary pocket = db.search(Query().type == "pocket_key")[0] pocket_data = { "consumer_key": pocket["consumer_key"], "access_token": pocket["access_token"], "sort": "newest", } # get date of latest call to pocket api since = datetime(1970, 1, 1) create_dir("pocket") already_saved = set() for post in get_items(path="pocket/", structured=False): date = datetime.strptime(post["date"].replace("-", "/"), "%x") already_saved.add(post["url"]) since = max(date, since) if since != datetime(1970, 1, 1) and not force: since = datetime.timestamp(since) pocket_data["since"] = since bookmarks = requests.post("https://getpocket.com/v3/get", json=pocket_data).json() # api spec: https://getpocket.com/developer/docs/v3/retrieve # for some reason, if the `list` attribute is empty it returns a list instead of a dict. if not len(bookmarks["list"]): click.echo("No new bookmarks.") else: for pocket_bookmark in bookmarks["list"].values(): url = pocket_bookmark.get("resolved_url", pocket_bookmark["given_url"]) if int(pocket_bookmark["status"] ) != 2 and url not in already_saved: bookmark = DataObj( url=url, date=datetime.now(), type="pocket_bookmark", path="pocket", ) try: bookmark.process_bookmark_url() click.echo(f"Saving {bookmark.title}...") bookmark.insert() except: click.echo( f"Could not save {bookmark.url} - website may already be down." ) click.echo("Done!")
def push(paths): """Pushes local changes to the remote.""" repo = get_repo() if not paths or "." in paths: repo.git.add(all=True) else: with app.app_context(): prefixed_paths = [ os.path.join(app.config["USER_DIR"], path) for path in paths ] repo.index.add(prefixed_paths) repo.index.commit("Sync local changes to remote git repo.") push_event = repo.remotes.origin.push()[0] if check_errored(push_event.flags): click.echo(push_event.summary) else: click.echo("Successfully pushed changes to remote!")
def add_metadata(dataobj): with app.app_context(): metadata = get_db().search(Query().type == "metadata")[0] dataobj.content += f"Made by {metadata['author']} in {metadata['location']}."
def get_repo(): with app.app_context(): return gitpython.Repo(app.config["USER_DIR"])
def get_random_dataobj_title(): with app.app_context(): dataobjs = get_items(structured=False) click.echo(dataobjs[randint(0, len(dataobjs))]["title"])
def hn_sync(save_comments, post_type, username, hn_password, force): global num_ask_hn, num_links, num_links_processed with app.app_context(): session = requests.Session() print("\nLogging in...") r = session.post(f"{BASE_URL}/login", data={ "acct": username, "pw": hn_password }) if session.cookies.get("user", None) is None: print("Error logging in. Verify the credentials and try again.") sys.exit(1) print("Logged in successfully.\n") url = f"{BASE_URL}/{post_type}?id={username}&p=" headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:75.0) Gecko/20100101 Firefox/75.0", } i = 1 # create folders in archivy to store content create_dir("hacker_news") create_dir("hacker_news/" + post_type) # store titles of previous posts seen_posts = set([ post["url"] for post in get_items(path=f"hacker_news/{post_type}/", structured=False) ]) while True: links_processed_prev = num_links_processed print(f"Getting results of page {i}") r = session.get(url + str(i), headers=headers) tree = BeautifulSoup(r.text, features="lxml") tree_subtext = tree.select(".subtext") post_links = tree.select(".titlelink") # Number of links on the page n = len(tree_subtext) if not n: print(f"Processing page {i}. No links found.") break for j in range(n): tree_subtext_each = tree_subtext[j].find_all("a") # This is to take care of situations where flag link may not be # present in the subtext. So number of links could be either 3 # or 4. num_subtext = len(tree_subtext_each) # get post id by parsing link to comments post_id = int( tree_subtext_each[num_subtext - 1]["href"].split("=")[1].split("&")[0]) post_url = post_links[j]["href"] hn_link = f"{BASE_URL}/item?id={post_id}" if (post_url in seen_posts or hn_link in seen_posts) and not force: # we have already seen this upvoted story # this means that all stories that follow will also be seen finish() if (post_url in seen_posts or hn_link in seen_posts) and force: print(f"{post_url} already saved.") continue # call algolia api try: res = requests.get( f"https://hn.algolia.com/api/v1/items/{post_id}").json( ) except: print(f"Could not save {post_url}.") continue # might return a 404 if not indexed, so we check if we got a response by calling .get() if res.get("type") and res["type"] == "story": bookmark = DataObj( path=f"hacker_news/{post_type}/", date=datetime.utcfromtimestamp(res["created_at_i"]), type="bookmark", ) if res["url"]: num_links += 1 bookmark.url = post_url bookmark.process_bookmark_url() else: num_ask_hn += 1 bookmark.url = hn_link bookmark.content = (res["title"].replace( "<p>", "").replace("</p>", "")) bookmark.title = res["title"] bookmark.content = f"{res['points']} points on [Hacker News]({hn_link})\n\n{bookmark.content}" # save comments if user requests it through option or if story is an ASK HN if save_comments or not res["url"]: bookmark.content += "\n\n## Comments from Hacker News" for comment in res["children"]: comments = "<ul>" + build_comments( comment) + "</ul>" bookmark.content += "\n\n" + html2text( comments, bodywidth=0).replace("\n\n", "\n") bookmark.insert() num_links_processed += 1 print(f"Saving {res['title']}...") if n < 30: # no more links break elif links_processed_prev == num_links_processed: sleep( 1 ) # throttling if no new links have been saved (when we're running force.) i += 1 finish()