Exemplo n.º 1
0
def show(run_id: str) -> None:
    """Describe a provided ingestion run to datahub"""

    payload_obj = {"runId": run_id, "dryRun": True, "hardDelete": True}
    (
        structured_rows,
        entities_affected,
        aspects_modified,
        aspects_affected,
        unsafe_entity_count,
        unsafe_entities,
    ) = post_rollback_endpoint(payload_obj, "/runs?action=rollback")

    if aspects_modified >= ELASTIC_MAX_PAGE_SIZE:
        click.echo(
            f"this run created at least {entities_affected} new entities and updated at least {aspects_modified} aspects"
        )
    else:
        click.echo(
            f"this run created {entities_affected} new entities and updated {aspects_modified} aspects"
        )
    click.echo(
        "rolling back will delete the entities created and revert the updated aspects"
    )
    click.echo()
    click.echo(
        f"showing first {len(structured_rows)} of {aspects_modified} aspects touched by this run"
    )
    click.echo(tabulate(structured_rows, RUN_TABLE_COLUMNS, tablefmt="grid"))
Exemplo n.º 2
0
def rollback(run_id: str, dry_run: bool) -> None:
    """Rollback a provided ingestion run to datahub"""

    cli_utils.test_connectivity_complain_exit("ingest")

    if not dry_run:
        click.confirm(
            "This will permanently delete data from DataHub. Do you want to continue?",
            abort=True,
        )

    payload_obj = {"runId": run_id, "dryRun": dry_run}
    structured_rows, entities_affected, aspects_affected = post_rollback_endpoint(
        payload_obj, "/runs?action=rollback"
    )

    click.echo(
        "Rolling back deletes the entities created by a run and reverts the updated aspects"
    )
    click.echo(
        f"This rollback {'will' if dry_run else ''} {'delete' if dry_run else 'deleted'} {entities_affected} entities and {'will roll' if dry_run else 'rolled'} back {aspects_affected} aspects"
    )
    click.echo(
        f"showing first {len(structured_rows)} of {aspects_affected} aspects {'that will be' if dry_run else ''} reverted by this run"
    )
    click.echo(tabulate(structured_rows, RUN_TABLE_COLUMNS, tablefmt="grid"))
Exemplo n.º 3
0
def rollback(run_id: str) -> None:
    """Rollback a provided ingestion run to datahub"""
    click.confirm(
        "This will permanently delete data from DataHub. Do you want to continue?",
        abort=True,
    )

    payload_obj = {"runId": run_id, "dryRun": False}
    structured_rows, entities_affected, aspects_affected = post_rollback_endpoint(
        payload_obj, "/runs?action=rollback")

    click.echo(
        "rolling back deletes the entities created by a run and reverts the updated aspects"
    )
    click.echo(
        f"this rollback deleted {entities_affected} entities and rolled back {aspects_affected} aspects"
    )
    click.echo(
        f"showing first {len(structured_rows)} of {aspects_affected} aspects reverted by this run"
    )
    click.echo(tabulate(structured_rows, RUN_TABLE_COLUMNS, tablefmt="grid"))
Exemplo n.º 4
0
def delete_for_registry(
    registry_id: str,
    soft: bool,
    dry_run: bool,
) -> DeletionResult:
    deletion_result = DeletionResult()
    deletion_result.num_entities = 1
    deletion_result.num_records = UNKNOWN_NUM_RECORDS  # Default is unknown
    registry_delete = {"registryId": registry_id, "dryRun": dry_run, "soft": soft}
    (
        structured_rows,
        entities_affected,
        aspects_affected,
        unsafe_aspects,
        unsafe_entity_count,
        unsafe_entities,
    ) = cli_utils.post_rollback_endpoint(registry_delete, "/entities?action=deleteAll")
    deletion_result.num_entities = entities_affected
    deletion_result.num_records = aspects_affected
    deletion_result.sample_records = structured_rows
    deletion_result.end()
    return deletion_result
Exemplo n.º 5
0
def rollback(
    run_id: str, force: bool, dry_run: bool, safe: bool, report_dir: str
) -> None:
    """Rollback a provided ingestion run to datahub"""

    cli_utils.test_connectivity_complain_exit("ingest")

    if not force and not dry_run:
        click.confirm(
            "This will permanently delete data from DataHub. Do you want to continue?",
            abort=True,
        )

    payload_obj = {"runId": run_id, "dryRun": dry_run, "safe": safe}
    (
        structured_rows,
        entities_affected,
        aspects_reverted,
        aspects_affected,
        unsafe_entity_count,
        unsafe_entities,
    ) = post_rollback_endpoint(payload_obj, "/runs?action=rollback")

    click.echo(
        "Rolling back deletes the entities created by a run and reverts the updated aspects"
    )
    click.echo(
        f"This rollback {'will' if dry_run else ''} {'delete' if dry_run else 'deleted'} {entities_affected} entities and {'will roll' if dry_run else 'rolled'} back {aspects_reverted} aspects"
    )

    click.echo(
        f"showing first {len(structured_rows)} of {aspects_reverted} aspects {'that will be ' if dry_run else ''}reverted by this run"
    )
    click.echo(tabulate(structured_rows, RUN_TABLE_COLUMNS, tablefmt="grid"))

    if aspects_affected > 0:
        if safe:
            click.echo(
                f"WARNING: This rollback {'will hide' if dry_run else 'has hidden'} {aspects_affected} aspects related to {unsafe_entity_count} entities being rolled back that are not part ingestion run id."
            )
        else:
            click.echo(
                f"WARNING: This rollback {'will delete' if dry_run else 'has deleted'} {aspects_affected} aspects related to {unsafe_entity_count} entities being rolled back that are not part ingestion run id."
            )

    if unsafe_entity_count > 0:
        now = datetime.now()
        current_time = now.strftime("%Y-%m-%d %H:%M:%S")

        try:
            folder_name = report_dir + "/" + current_time

            ingestion_config_file_name = folder_name + "/config.json"
            os.makedirs(os.path.dirname(ingestion_config_file_name), exist_ok=True)
            with open(ingestion_config_file_name, "w") as file_handle:
                json.dump({"run_id": run_id}, file_handle)

            csv_file_name = folder_name + "/unsafe_entities.csv"
            with open(csv_file_name, "w") as file_handle:
                writer = csv.writer(file_handle)
                writer.writerow(["urn"])
                for row in unsafe_entities:
                    writer.writerow([row.get("urn")])

        except IOError as e:
            print(e)
            sys.exit("Unable to write reports to " + report_dir)