Exemplo n.º 1
0
def update(corpus, docix, path):
    """Reindex a document by index number. """

    docix = int(docix)
    with click_spinner.spinner():
        c = Corpus(corpus)
        c.update(docix=docix, path=Path(path))

    if c.work_by_docix(docix).searchable:
        click.echo(f"[+] updated document {docix} in '{corpus}'")
    else:
        click.echo(f"[-] failed")
Exemplo n.º 2
0
def verify(corpus, verbose, dry_run):
    """Verify the integrity of corpus indexes and manifest. """

    c = Corpus(corpus)
    manifest = c.manifest
    if c.index_dir.exists() and click.confirm(
        f"{len(manifest)} documents manifested for corpus '{c.name}'. "
        + (f"This might take a while! " if len(manifest) > 30 else "")
        + f"Proceed?",
        default=True,
    ):
        verified = []
        passes = fixes = adds = orphans = 0
        missing = {}

        with click.progressbar(
            manifest,
            length=len(manifest),
            show_percent=True,
            label=f"Verifying '{c.name}'",
        ) as bar:
            for item in bar:
                (
                    status,
                    (docix, author, title, filename, info),
                ) = c.verify_by_docix(item, dry_run=dry_run)
                msg = f"[{docix}] {author}, {title} ({filename})"
                if status == 0:
                    msg += ", passed!"
                    passes += 1
                elif status == 1:
                    msg += ", fixed in manifest"
                    fixes += 1
                elif status == 2:
                    msg += ", added to manifest"
                    adds += 1
                elif status == 3:
                    msg += ", deleted orphaned index files"
                    orphans += 1
                elif status == 4:
                    msg = (
                        f"[{docix}] {manifest[item]['author']}, {manifest[item]['title']} ("
                        f"{manifest[item]['filename']})"
                    )
                    msg += ", missing index files!"
                    missing[item] = manifest[item]
                if info is not None and cylleneus.settings.DEBUG_LEVEL:
                    msg += f" (= {info})"
                verified.append((docix, msg))
        if verbose and len(verified) != 0:
            click.echo_via_pager(
                "\n".join(
                    [
                        ("*" if dry_run else "") + item[1]
                        for item in sorted(verified, key=lambda item: item[0])
                    ]
                )
            )
        click.echo(
            f"[-] '{corpus}': {len(manifest)} checked, {passes} passed"
            + (f", {fixes} fixed in manifest" if fixes else "")
            + (f", {adds} added to manifest, " if adds else "")
            + (f", {orphans} orphaned files deleted" if orphans else "")
            + (
                f" -- changes have NOT been committed!"
                if dry_run and passes < len(manifest)
                else ""
            )
        )
        if len(missing) != 0 and click.confirm(
            f"Try to re-index {len(missing)} missing documents?", default=True,
        ):
            for docix, meta in missing.items():
                if meta["filename"]:
                    path = c.text_dir / Path(meta["filename"])
                    with click_spinner.spinner():
                        updated_docix = (
                            c.update(docix, path) if not dry_run else None
                        )
                    if updated_docix is not None:
                        click.echo(
                            f"[{updated_docix}] {meta['author']}, {meta['title']} ({meta['filename']}), "
                            f"index created!"
                        )
                    else:
                        if dry_run:
                            click.echo(
                                f"*[-] {meta['author']}, {meta['title']} "
                                f"({meta['filename']}) -- document NOT re-indexed!"
                            )
                        else:
                            click.echo(
                                f"[-] {meta['author']}, {meta['title']} ({meta['filename']}), failed"
                            )