Пример #1
0
def setup():
    """Creates and sets up git repository."""
    with app.app_context():
        click.echo(f"Creating new git repo in {app.config['USER_DIR']}...")
        repo = gitpython.Repo.init(app.config["USER_DIR"])
        branch = click.prompt("Main branch", type=str, default="main")
        repo.index.add("data/")
        repo.index.commit("Initial commit")
        repo.active_branch.rename(branch)

        while True:
            remote_url = click.prompt(
                "Enter the url of the remote you'd like to sync to. "
                "Ex: https://github.com/archivy/archivy",
                type=str,
            )
            username = click.prompt("Enter your username", type=str)
            password = click.prompt("Enter your personal access token",
                                    type=str,
                                    hide_input=True)
            remote_url = remote_url.replace("https://",
                                            f"https://{username}:{password}@")
            origin = repo.create_remote("origin", remote_url)
            if origin.exists():
                break
            click.echo("Remote does not exist.")
        origin.push(branch)
        origin.fetch()
        repo.active_branch.set_tracking_branch(getattr(origin.refs, branch))
        click.echo("Successfully setup repository.")
Пример #2
0
def test_app():
    """Instantiate the app for each test with its own temporary data directory

    Each test using this fixture will use its own db.json and its own data
    directory, and then delete them.
    """
    # create a temporary file to isolate the database for each test
    app_dir = tempfile.mkdtemp()
    app.config['APP_PATH'] = app_dir
    data_dir = os.path.join(app_dir, "data")
    os.mkdir(data_dir)

    app.config['TESTING'] = True
    app.config["WTF_CSRF_ENABLED"] = False
    # This setups a TinyDB instance, using the `app_dir` temporary
    # directory defined above
    # Required so that `flask.current_app` can be called in data.py and
    # models.py
    # See https://flask.palletsprojects.com/en/1.1.x/appcontext/ for more
    # information.
    with app.app_context():
        _ = get_db()
        yield app

    # close and remove the temporary database
    shutil.rmtree(app_dir)
Пример #3
0
def complete():
    with app.app_context():
        db = get_db()
        try:
            pocket = db.search(Query().type == "pocket_key")[0]
        except:
            click.echo("Key not found")
            return
        auth_data = {
            "consumer_key": pocket["consumer_key"],
            "code": pocket["code"]
        }
        resp = requests.post(
            "https://getpocket.com/v3/oauth/authorize",
            json=auth_data,
            headers={
                "X-Accept": "application/json",
                "Content-Type": "application/json",
            },
        )
        db.update(
            operations.set("access_token",
                           resp.json()["access_token"]),
            Query().type == "pocket_key",
        )
        click.echo(
            "Successfully completed auth process, you can now run archivy pocket sync to load the data"
        )
Пример #4
0
def auth(api_key):
    with app.app_context():
        db = get_db()
        pocket = Query()
        request_data = {
            "consumer_key": api_key,
            "redirect_uri": "https://getpocket.com",
        }
        resp = requests.post(
            "https://getpocket.com/v3/oauth/request",
            json=request_data,
            headers={
                "X-Accept": "application/json",
                "Content-Type": "application/json",
            },
        )
        new_data = {
            "type": "pocket_key",
            "consumer_key": api_key,
            "code": resp.json()["code"],
        }
        if db.search(pocket.type == "pocket_key"):
            db.update(new_data, pocket.type == "pocket_key")
        else:
            db.insert(new_data)
        click.echo(
            f"Allow archivy_pocket to retrieve data to your pocket account "
            f"by visiting https://getpocket.com/auth/authorize?request_token={resp.json()['code']}"
            f"&redirect_uri=https://getpocket.com")
Пример #5
0
def setup(author, location):
    """Save metadata values."""
    with app.app_context():
        # save data in db
        get_db().insert({
            "type": "metadata",
            "author": author,
            "location": location
        })
    click.echo("Metadata saved!")
Пример #6
0
def sync(force):
    with app.app_context():
        db = get_db()

        # update pocket dictionary
        pocket = db.search(Query().type == "pocket_key")[0]

        pocket_data = {
            "consumer_key": pocket["consumer_key"],
            "access_token": pocket["access_token"],
            "sort": "newest",
        }

        # get date of latest call to pocket api
        since = datetime(1970, 1, 1)
        create_dir("pocket")
        already_saved = set()
        for post in get_items(path="pocket/", structured=False):
            date = datetime.strptime(post["date"].replace("-", "/"), "%x")
            already_saved.add(post["url"])
            since = max(date, since)

        if since != datetime(1970, 1, 1) and not force:
            since = datetime.timestamp(since)
            pocket_data["since"] = since
        bookmarks = requests.post("https://getpocket.com/v3/get",
                                  json=pocket_data).json()

        # api spec: https://getpocket.com/developer/docs/v3/retrieve
        # for some reason, if the `list` attribute is empty it returns a list instead of a dict.
        if not len(bookmarks["list"]):
            click.echo("No new bookmarks.")
        else:
            for pocket_bookmark in bookmarks["list"].values():
                url = pocket_bookmark.get("resolved_url",
                                          pocket_bookmark["given_url"])
                if int(pocket_bookmark["status"]
                       ) != 2 and url not in already_saved:
                    bookmark = DataObj(
                        url=url,
                        date=datetime.now(),
                        type="pocket_bookmark",
                        path="pocket",
                    )
                    try:
                        bookmark.process_bookmark_url()
                        click.echo(f"Saving {bookmark.title}...")
                        bookmark.insert()
                    except:
                        click.echo(
                            f"Could not save {bookmark.url} - website may already be down."
                        )
            click.echo("Done!")
Пример #7
0
def push(paths):
    """Pushes local changes to the remote."""
    repo = get_repo()
    if not paths or "." in paths:
        repo.git.add(all=True)
    else:
        with app.app_context():
            prefixed_paths = [
                os.path.join(app.config["USER_DIR"], path) for path in paths
            ]
        repo.index.add(prefixed_paths)
    repo.index.commit("Sync local changes to remote git repo.")
    push_event = repo.remotes.origin.push()[0]
    if check_errored(push_event.flags):
        click.echo(push_event.summary)
    else:
        click.echo("Successfully pushed changes to remote!")
Пример #8
0
def add_metadata(dataobj):
    with app.app_context():
        metadata = get_db().search(Query().type == "metadata")[0]
        dataobj.content += f"Made by {metadata['author']} in {metadata['location']}."
Пример #9
0
def get_repo():
    with app.app_context():
        return gitpython.Repo(app.config["USER_DIR"])
Пример #10
0
def get_random_dataobj_title():
    with app.app_context():
        dataobjs = get_items(structured=False)
        click.echo(dataobjs[randint(0, len(dataobjs))]["title"])
Пример #11
0
def hn_sync(save_comments, post_type, username, hn_password, force):
    global num_ask_hn, num_links, num_links_processed
    with app.app_context():
        session = requests.Session()

        print("\nLogging in...")

        r = session.post(f"{BASE_URL}/login",
                         data={
                             "acct": username,
                             "pw": hn_password
                         })

        if session.cookies.get("user", None) is None:
            print("Error logging in. Verify the credentials and try again.")
            sys.exit(1)
        print("Logged in successfully.\n")

        url = f"{BASE_URL}/{post_type}?id={username}&p="
        headers = {
            "User-Agent":
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:75.0) Gecko/20100101 Firefox/75.0",
        }

        i = 1

        # create folders in archivy to store content
        create_dir("hacker_news")
        create_dir("hacker_news/" + post_type)

        # store titles of previous posts
        seen_posts = set([
            post["url"] for post in get_items(path=f"hacker_news/{post_type}/",
                                              structured=False)
        ])
        while True:
            links_processed_prev = num_links_processed
            print(f"Getting results of page {i}")
            r = session.get(url + str(i), headers=headers)

            tree = BeautifulSoup(r.text, features="lxml")
            tree_subtext = tree.select(".subtext")
            post_links = tree.select(".titlelink")
            # Number of links on the page
            n = len(tree_subtext)

            if not n:
                print(f"Processing page {i}. No links found.")
                break

            for j in range(n):
                tree_subtext_each = tree_subtext[j].find_all("a")

                # This is to take care of situations where flag link may not be
                # present in the subtext. So number of links could be either 3
                # or 4.
                num_subtext = len(tree_subtext_each)
                # get post id by parsing link to comments
                post_id = int(
                    tree_subtext_each[num_subtext -
                                      1]["href"].split("=")[1].split("&")[0])
                post_url = post_links[j]["href"]
                hn_link = f"{BASE_URL}/item?id={post_id}"

                if (post_url in seen_posts
                        or hn_link in seen_posts) and not force:
                    # we have already seen this upvoted story
                    # this means that all stories that follow will also be seen
                    finish()
                if (post_url in seen_posts or hn_link in seen_posts) and force:
                    print(f"{post_url} already saved.")
                    continue
                # call algolia api
                try:
                    res = requests.get(
                        f"https://hn.algolia.com/api/v1/items/{post_id}").json(
                        )
                except:
                    print(f"Could not save {post_url}.")
                    continue
                # might return a 404 if not indexed, so we check if we got a response by calling .get()
                if res.get("type") and res["type"] == "story":
                    bookmark = DataObj(
                        path=f"hacker_news/{post_type}/",
                        date=datetime.utcfromtimestamp(res["created_at_i"]),
                        type="bookmark",
                    )
                    if res["url"]:
                        num_links += 1
                        bookmark.url = post_url
                        bookmark.process_bookmark_url()
                    else:
                        num_ask_hn += 1
                        bookmark.url = hn_link
                        bookmark.content = (res["title"].replace(
                            "<p>", "").replace("</p>", ""))

                    bookmark.title = res["title"]
                    bookmark.content = f"{res['points']} points on [Hacker News]({hn_link})\n\n{bookmark.content}"

                    # save comments if user requests it through option or if story is an ASK HN
                    if save_comments or not res["url"]:
                        bookmark.content += "\n\n## Comments from Hacker News"
                        for comment in res["children"]:
                            comments = "<ul>" + build_comments(
                                comment) + "</ul>"
                            bookmark.content += "\n\n" + html2text(
                                comments, bodywidth=0).replace("\n\n", "\n")
                    bookmark.insert()
                    num_links_processed += 1
                    print(f"Saving {res['title']}...")

            if n < 30:
                # no more links
                break
            elif links_processed_prev == num_links_processed:
                sleep(
                    1
                )  # throttling if no new links have been saved (when we're running force.)

            i += 1
        finish()