示例#1
0
only_files = [
    os.path.join(path, name) for path, subdirs, files in os.walk(path_dir)
    for name in files
]

kauth = KiliAuth(email=email, password=password)
playground = Playground(kauth)

for filepath in tqdm(only_files[:MAX_NUMBER_OF_ASSET]):
    with open(filepath, 'r') as f:
        content = f.read()
    external_id = filepath
    # Insert asset
    playground.append_to_dataset(project_id=project_id,
                                 content=escape_content(content),
                                 external_id=external_id)
    asset = playground.get_assets_(project_id=project_id,
                                   external_id_contains=[external_id])
    asset_id = asset[0]['id']

    # Prioritize assets
    playground.update_properties_in_asset(asset_id=asset_id, priority=1)

    # Insert pre-annotations
    response = analyze_entities(content)
    entities = [
        e for e in response['entities']
        if isinstance(e['type'], str) and e['type'] != 'OTHER'
    ]
    json_response = {'entities': add_id_to_entities(entities)}
email = input('Enter email: ')
password = getpass.getpass()
project_id = input('Enter project id: ')

api_endpoint = 'https://cloud.kili-technology.com/api/label/graphql'
kauth = KiliAuth(email, password, api_endpoint)
playground = Playground(kauth)

s3_client = boto3.client('s3',
                         aws_access_key_id=S3_ACCESS_KEY,
                         aws_secret_access_key=S3_SECRET_KEY,
                         endpoint_url=S3_ENDPOINT,
                         verify=False)

with open('./conf/new_assets_with_s3.yml', 'r') as f:
    configuration = yaml.safe_load(f)
assets = configuration['assets']

for asset in tqdm(assets):
    # Uploads asset to S3 bucket
    path = get(asset, 'path')
    key = str(random.getrandbits(128))
    s3_client.upload_file(path, S3_BUCKET, key)
    # Inserts asset with S3 key in Kili
    content = f'https://cloud.kili-technology.com/api/label/files?id={key}'
    external_id = get(asset, 'externalId')
    json_metadata = json.loads(get(asset, 'metadata'))
    project = playground.append_to_dataset(client, project_id, content,
                                           external_id)