Exemplo n.º 1
0
"""
Example demonstrating how to OCR a document and access the text at the document, page, and 
block (or paragraph) level.
"""

from indico import IndicoClient, IndicoConfig
from indico.queries import DocumentExtraction, JobStatus, RetrieveStorageObject

# Get the OCR object
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path=".path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

files_to_extract = client.call(
    DocumentExtraction(files=["./test_paragraphs.pdf"],
                       json_config={"preset_config": "standard"}))
extracted_file = client.call(JobStatus(id=files_to_extract[0].id, wait=True))
json_result = client.call(RetrieveStorageObject(extracted_file.result))

# The code below shows how to get the OCR text from the 'json_result' object.
# Note: it may vary slightly if you use DocumentExtraction configurations other than 'standard'

# Full Text
full_document_text = json_result["text"]

# Doucment Text split by page
text_by_page = list()
for page in json_result["pages"]:
    text_by_page.append(page["text"])

# Document Text split by block (or paragraph)
from indico import IndicoClient, IndicoConfig
from indico.queries import GraphQLRequest

my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")

client = IndicoClient(config=my_config)

# GraphQL Query to list my datasets
qstr = """{
            datasets {
                id
                name
                status
                rowCount
                numModelGroups
                modelGroups {
                    id
                }
            }
        }"""

response = client.call(GraphQLRequest(query=qstr))
print(response)
Exemplo n.º 3
0
from indico import IndicoClient, IndicoConfig
from indico.queries import DocumentExtraction, JobStatus, RetrieveStorageObject

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

# OCR a single file and wait for it to complete
job = client.call(
    DocumentExtraction(files=["./path_to_doc.pdf"],
                       json_config=dict(preset_config="ondocument")))
extracted_file = client.call(JobStatus(id=job[0].id, wait=True))

if extracted_file.status == "SUCCESS":
    result = client.call(RetrieveStorageObject(extracted_file.result))
    print(result)
from indico import IndicoClient, IndicoConfig
from indico.queries import GetModelGroup, GetTrainingModelWithProgress

# The model group ID can be found on the review page of the indico platform
model_group_id = 4305

my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")

client = IndicoClient(config=my_config)

# Get the model group and training status
mg = client.call(GetModelGroup(model_group_id))
training_mg = client.call(GetTrainingModelWithProgress(model_group_id))

print(f"Model Name: {mg.name}")
print(f"Training status: {training_mg.status}")
print(
    f"Percent complete: {training_mg.training_progress.percent_complete:.2f}")
Exemplo n.º 5
0
import os
from indico import IndicoClient, IndicoConfig

# Will connect to https://app.indico.io
client = IndicoClient()

# Environment variables override defaults
os.environ["INDICO_HOST"] = "foo.bar.com"

# Will connect to https://foo.bar.com
client = IndicoClient()

# IndicoConfig will override environment variables and defaults
my_config = IndicoConfig(
    host="indico.my-company.com",  # Overrides environment variable
    api_token_path="../path/to/custom_api_token.txt",
)

# Will connect to https://indico.my-company.com
client = IndicoClient(config=my_config)
"""
Getting Image Predictions

Image Predictions work slightly differently from generating predictions from text documents
in that they require you to first upload the documents. The script below provides a sample. 
"""
from indico import IndicoClient, IndicoConfig
from indico.queries import ModelGroupPredict, UploadImages, JobStatus

config = IndicoConfig(host="app.indico.io",
                      api_token_path="./indico_api_token.txt")
client = IndicoClient(config)

# UploadImages returns a list of upload URLs that you can use for gathering predictions
urls = client.call(
    UploadImages(files=["./path/to/image.png", "./path/to/image2.png"]))

# Get your Selected Model ID (from the model's Explain page in the app or using the API)
job = client.call(ModelGroupPredict(model_id=30970, data=urls))

# Wait for the predictions to finish
predictions = client.call(JobStatus(job.id))

# Prediction results are ready
print(predictions.result)
Exemplo n.º 7
0
"""
Creating an Image Dataset from Local PNGs/JPGs

There are two ways two create a data of PNGs/JPGs depending on whether the images are
stored locally on your computer or at a publicly accessible URL. The code snippet 
below shows you what to do if the images are on your computer.
"""

from indico import IndicoClient, IndicoConfig
from indico.queries import CreateDataset
import pandas as pd

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

# With local images you should create a CSV formatted (here for demonstration) like below
# Where one column contains the paths from the csv to where the images are stored on your computer
image_dataset = pd.DataFrame()
image_dataset["image_files"] = [
    "./path/from/csv/to/image.png",
    "./path/from/csv/to/image2.png",
]
image_dataset.to_csv("./image_dataset.csv", index=False)

# Use the CSV you created (like above) to create the dataset
dataset = client.call(
    CreateDataset(
        name="My Image Dataset",
        files="./image_dataset.csv",
Exemplo n.º 8
0
from indico import IndicoClient, IndicoConfig
from indico.queries import (
    JobStatus,
    ListWorkflows,
    RetrieveStorageObject,
    WorkflowSubmission,
)

# Use your dataset's id to call it's associated workflow
dataset_id = 6826

my_config = IndicoConfig(
    host="app.indico.io", api_token_path="./path/to/indico_api_token.txt"
)
client = IndicoClient(config=my_config)

# Return a list of workflows for this dataset id or an empty list if there are none
workflows = client.call(ListWorkflows(dataset_ids=[dataset_id]))

if workflows:
    # Send a document through the workflow
    # Get back one Job per file
    jobs = client.call(
        WorkflowSubmission(
            workflow_id=workflows[0].id,
            files=["./path/to/sample.pdf"],
            submission=False,
        )
    )
    job = jobs[0]
Exemplo n.º 9
0
from indico import IndicoConfig, IndicoClient
import os


# edit this with the path to your api token
API_TOKEN_PATH = '/home/fitz/Documents/POC/chicken-parm/indico_api_token.txt'
INDICO_PROD_URL = 'app.indico.io'

indico_prod_config = IndicoConfig(
        host=INDICO_PROD_URL,
        api_token_path=API_TOKEN_PATH,
)

PROD_CLIENT = IndicoClient(config=indico_prod_config)

detailed_pdf_extraction_config = {
    "preset_config": 'detailed'
}

# DataFolders
DATA_DIR = "/home/fitz/Documents/POC/chicken-parm/data"
SNAPSHOT_DIR = os.path.join(DATA_DIR, "snapshots")
DEMO_FILE_DIR = os.path.join(DATA_DIR, "demo_files")
Exemplo n.º 10
0
    GenerateSubmissionResult,
    JobStatus,
    ListSubmissions,
    RetrieveStorageObject,
    SubmissionResult,
    SubmitReview,
    UpdateSubmission,
    WaitForSubmissions,
    WorkflowSubmission,
    WorkflowSubmissionDetailed,
)

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

workflow_id = 5
"""
Example 1
Create a new submission
Generate a submission result as soon as the submission is done processing
Then mark the submission has having been retrieved
"""

submission_ids = client.call(
    WorkflowSubmission(workflow_id=workflow_id, files=["./path_to_doc.pdf"]))
submission_id = submission_ids[0]

result_url = client.call(SubmissionResult(submission_id, wait=True))
result = client.call(RetrieveStorageObject(result_url.result))
Exemplo n.º 11
0
from datetime import datetime, timedelta

from indico import IndicoConfig, IndicoClient
from indico.filters import or_, UserMetricsFilter
from indico.queries import JobStatus, RetrieveStorageObject
from indico.queries.usermetrics import GetUserSummary, GetUserSnapshots, GenerateChangelogReport, GetUserChangelog
from indico.types.user_metrics import UserSummary
"""
Example 1: User Summary
"""
# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

user_summary: UserSummary = client.call(GetUserSummary())
print("Wow! there's " + str(user_summary.users.enabled) +
      " users enabled on the app!")
print("Did you know there are " + str(len(user_summary.app_roles)) +
      " roles available here?")
"""

Example 2: User Snapshots
Snapshots are paginated and iterable, 
so you can continue to iterate over them to build a full set
"""
snapshots = []
for snapshot in client.paginate(GetUserSnapshots(date=datetime.now())):
    snapshots.extend(snapshot)

print("Fetched " + str(len(snapshots)) + " users for analysis")
from indico import IndicoClient, IndicoConfig
from indico.queries import CreateDataset, CreateModelGroup, ModelGroupPredict

# Create an Indico API client
my_config = IndicoConfig(host="app.indico.io",
                         api_token_path="./path/to/indico_api_token.txt")
client = IndicoClient(config=my_config)

# create the dataset
dataset = client.call(
    CreateDataset(name="airline_comments", files=["./airline-comments.csv"]))

# train the model w/ the relevant csv columns
model_group = client.call(
    CreateModelGroup(
        name="my_classification_model",
        dataset_id=dataset.id,
        source_column_id=dataset.datacolumn_by_name(
            "text").id,  # csv text column
        labelset_id=dataset.labelset_by_name(
            "Target_1").id,  # csv target class column
        wait=True,  # wait for training to finish
    ))

# predict on the model
job = client.call(
    ModelGroupPredict(
        model_id=model_group.selected_model.id,
        data=["Sample Text to predict on", "More Sample text to predict on"],
    ))