Exemplo n.º 1
0
def main():
    args = parse_args()
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s:%(levelname)s:%(name)s:%(message)s')
    logging.getLogger('requests').setLevel(logging.WARN)
    logging.info('Starting.')
    session = KiveAPI("http://localhost:8000")
    session.login("kive", "kive")
    cdt = session.get_cdt(args.cdt)
    pipeline = session.get_pipeline(args.pipeline)
    response = session.get('/api/datasets/?filters[0][key]=uploaded&page_size=1', is_json=True)
    dataset_count = response.json()['count']
    response = session.get('/api/runs/?page_size=1', is_json=True)
    run_count = response.json()['count']
    while dataset_count < args.datasets or run_count < args.runs:
        dataset_count += 1
        filename = 'pairs_{}.csv'.format(dataset_count)
        dataset = create_dataset(session, filename, cdt)
        session.run_pipeline(pipeline, [dataset])
        run_count += 1
        while True:
            response = session.get('/api/runs/status/?filters[0][key]=active&page_size=1')
            active_count = response.json()['count']
            if active_count < args.max_active:
                break
            sleep(5)
        logging.info('%d datasets, %d runs', dataset_count, run_count)
Exemplo n.º 2
0
def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(
        logging.WARN)
    args = parse_args()
    print('Starting.')

    kive = KiveAPI(args.server)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(args.username, args.password)

    all_pipelines = kive.get_pipelines()
    pipelines = list(recent_pipelines(all_pipelines))
    hostname = urlparse(kive.server_url).hostname
    print('Recent pipelines from {}:'.format(hostname))
    for pipeline in pipelines:
        # noinspection PyUnresolvedReferences
        print('{} - {}, id {}'.format(pipeline.family, pipeline,
                                      pipeline.pipeline_id))
    # noinspection PyCompatibility
    pipeline_request = input("Enter pipeline id to dump, or 'm' for more:")
    if pipeline_request == 'm':
        for pipeline in all_pipelines:
            print('{} - {}, id {}'.format(pipeline.family, pipeline,
                                          pipeline.pipeline_id))
        # noinspection PyCompatibility
        pipeline_request = input("Enter pipeline id to dump:")
    pipeline_id = int(pipeline_request)
    dump_folder = os.path.abspath('dump/{}_pipeline{}'.format(
        hostname, pipeline_id))

    if not os.path.isdir(dump_folder):
        os.makedirs(dump_folder)

    compound_datatypes = {}  # {id: columns}
    for compound_datatype in kive.get_cdts():
        columns = compound_datatype.name
        compound_datatypes[compound_datatype.cdt_id] = columns
    code_resources = {}  # {id: {'filename': filename}}
    for code_resource in kive.get('/api/coderesources/').json():
        dump = {}
        for field in ('groups_allowed', 'users_allowed', 'filename'):
            dump[field] = code_resource[field]
        code_resources[code_resource['name']] = dump
    code_resource_revisions = {}  # {id: revision}
    for revision in kive.get('/api/coderesourcerevisions/').json():
        code_resource_revisions[revision['id']] = CodeResourceRevision(
            revision, code_resources)
    code_resource_revisions[None] = None
    api_end_points = kive.get('/api/').json()
    if 'dockerimages' in api_end_points:
        docker_images = {
            img['url']: img['full_name']
            for img in kive.get('/api/dockerimages/').json()
        }
    else:
        # Old server doesn't have docker image support.
        docker_images = {}
    docker_images[None] = None
    methods = {}  # {id: method}
    for method in kive.get('/api/methods/').json():
        for dep in method['dependencies']:
            dep['requirement'] = code_resource_revisions[dep['requirement']]
            if dep['path'] == '././':
                dep['path'] = '.'
        method['dependencies'].sort(key=lambda x: (x['path'], x['filename'], x[
            'requirement']['coderesource']['filename']))
        dump = {
            'driver': code_resource_revisions[method['driver']],
            'docker_image': docker_images[method.get('docker_image')]
        }
        for field in ('groups_allowed', 'users_allowed', 'reusable', 'threads',
                      'memory', 'dependencies'):
            dump[field] = method[field]
        methods[method['id']] = dump

    used_revisions = set()
    pipeline_wrapper = kive.get_pipeline(pipeline_id)
    pipeline = pipeline_wrapper.details
    print('Dumping {} in {}.'.format(pipeline_wrapper, dump_folder))
    dump = dict(positions=dict(inputs={}, outputs={}, steps={}))
    for input_item in pipeline['inputs']:
        input_name = input_item['dataset_name']
        dump['positions']['inputs'][input_name] = dict(x=input_item['x'],
                                                       y=input_item['y'])
        del input_item['x']
        del input_item['y']
        replace_structure(input_item, compound_datatypes)
    dump['inputs'] = pipeline['inputs']
    for output_item in pipeline['outputs']:
        output_name = output_item['dataset_name']
        dump['positions']['outputs'][output_name] = dict(x=output_item['x'],
                                                         y=output_item['y'])
        del output_item['x']
        del output_item['y']
        del output_item['dataset_idx']
        replace_structure(output_item, compound_datatypes)
    pipeline['outputs'].sort()
    dump['outputs'] = pipeline['outputs']
    for outcable in pipeline['outcables']:
        del outcable['pk']
        del outcable['source']
        if outcable['output_cdt']:
            columns = compound_datatypes[outcable['output_cdt']]
            outcable['output_cdt'] = columns
    pipeline['outcables'].sort(key=itemgetter('output_idx'))
    dump['outcables'] = pipeline['outcables']
    for step in pipeline['steps']:
        step_name = step['name']
        dump['positions']['steps'][step_name] = dict(x=step['x'], y=step['y'])
        del step['x']
        del step['y']
        step['cables_in'].sort(key=itemgetter('dest_dataset_name'))
        for cable in step['cables_in']:
            del cable['dest']
            del cable['source']
        for input_item in step['inputs']:
            replace_structure(input_item, compound_datatypes)
        for output_item in step['outputs']:
            replace_structure(output_item, compound_datatypes)
        del step['transformation_family']
        step['transformation'] = methods[step['transformation']]
        driver = step['transformation']['driver']
        if driver is not None:
            used_revisions.add(driver)
        used_revisions.update(
            map(itemgetter('requirement'),
                step['transformation']['dependencies']))
    dump['steps'] = pipeline['steps']

    pipeline_filename = 'pipeline.json'
    with open(os.path.join(dump_folder, pipeline_filename), 'w') as f:
        json.dump(dump, f, indent=4, sort_keys=True)

    pipeline_deadline = datetime.now() + timedelta(seconds=90)
    filename_counts = Counter()
    for revision in used_revisions:
        filename = revision['coderesource']['filename']
        filename_counts[filename] += 1
        response = kive.get(revision.url, is_json=False, stream=True)
        deadline = max(pipeline_deadline,
                       datetime.now() + timedelta(seconds=10))
        is_complete = True
        with open(os.path.join(dump_folder, filename), 'w') as f:
            for block in response.iter_content():
                f.write(block)
                if datetime.now() > deadline:
                    is_complete = False
                    break
        if not is_complete:
            os.remove(os.path.join(dump_folder, filename))
            with open(os.path.join(dump_folder, filename + '_timed_out'), 'w'):
                pass
    duplicate_filenames = [
        filename for filename, count in filename_counts.items() if count > 1
    ]
    if duplicate_filenames:
        raise RuntimeError('Multiple versions found: ' +
                           ', '.join(duplicate_filenames))

    print('Dumped {}.'.format(pipeline_wrapper))
Exemplo n.º 3
0
def main():
    args = parse_args()
    logger.info('Starting.')
    kive_server = os.environ.get('KIVE_SERVER', 'http://*****:*****@api_runs', json=params, is_json=True)
            response_json = response.json()
            launched_run_ids.add(response_json['id'])
        except Exception:
            logger.error('Failed to rerun run id %d: %r.',
                         run['id'],
                         run,
                         exc_info=True)
    logger.info('Done.')
Exemplo n.º 4
0
def main():
    args = parse_args()
    logger.info('Starting.')
    kive_server = os.environ.get('KIVE_SERVER', 'http://*****:*****@api_runs', json=params, is_json=True)
            response_json = response.json()
            launched_run_ids.add(response_json['id'])
        except Exception:
            logger.error('Failed to rerun run id %d: %r.',
                         run['id'],
                         run,
                         exc_info=True)
    logger.info('Done.')
Exemplo n.º 5
0
# Use HTTPS on a real server, so your password is encrypted.
# Don't put your real password in source code, store it in a text file
# that is only readable by your user account or some more secure storage.
kive = KiveAPI('http://localhost:8000')
kive.login('kive', 'kive')

# Upload (or retrieve) an input file
dataset = example_tools.upload_or_retrieve_dataset(kive,
                                                   "API Example 2 Names File",
                                                   open("names.csv", "r"),
                                                   groups=["Everyone"])

# Get the app from a container family.
containerfamily = kive.filter("/api/containerfamilies/", "name",
                              "samplecode").json()[0]
container = kive.get(containerfamily["containers"]).json()[0]
app = kive.filter(
    container["app_list"], "smart",
    "Minimal example that can run simple Python scripts").json()[0]

# Create a run of this app using the file we uploaded
appargs = kive.get(app["argument_list"]).json()
inputarg = next(a for a in appargs if a["type"] == "I")

runspec = {
    "name": "uploaded-file-example",
    "app": app["url"],
    "datasets": [{
        "argument": inputarg["url"],
        "dataset": dataset.raw["url"],
    }],
Exemplo n.º 6
0
def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(
        logging.WARN)
    args = parse_args()
    print('Starting.')

    kive = KiveAPI(args.server)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(args.username, args.password)

    all_pipelines = kive.get_pipelines()
    pipelines = list(recent_pipelines(all_pipelines))
    hostname = urlparse(kive.server_url).hostname
    print('Recent pipelines from {}:'.format(hostname))
    for pipeline in pipelines:
        # noinspection PyUnresolvedReferences
        print('{} - {}, id {}'.format(pipeline.family,
                                      pipeline,
                                      pipeline.pipeline_id))
    # noinspection PyCompatibility
    pipeline_request = input("Enter pipeline id to dump, or 'm' for more:")
    if pipeline_request == 'm':
        for pipeline in all_pipelines:
            print('{} - {}, id {}'.format(pipeline.family,
                                          pipeline,
                                          pipeline.pipeline_id))
        # noinspection PyCompatibility
        pipeline_request = input("Enter pipeline id to dump:")
    pipeline_id = int(pipeline_request)
    dump_folder = os.path.abspath(
        'dump/{}_pipeline{}'.format(hostname, pipeline_id))

    if not os.path.isdir(dump_folder):
        os.makedirs(dump_folder)

    compound_datatypes = {}  # {id: columns}
    for compound_datatype in kive.get_cdts():
        columns = compound_datatype.name
        compound_datatypes[compound_datatype.cdt_id] = columns
    code_resources = {}  # {id: {'filename': filename}}
    for code_resource in kive.get('/api/coderesources/').json():
        dump = {}
        for field in ('groups_allowed', 'users_allowed', 'filename'):
            dump[field] = code_resource[field]
        code_resources[code_resource['name']] = dump
    code_resource_revisions = {}  # {id: revision}
    for revision in kive.get('/api/coderesourcerevisions/').json():
        code_resource_revisions[revision['id']] = CodeResourceRevision(
            revision,
            code_resources)
    code_resource_revisions[None] = None
    api_end_points = kive.get('/api/').json()
    if 'dockerimages' in api_end_points:
        docker_images = {img['url']: img['full_name']
                         for img in kive.get('/api/dockerimages/').json()}
    else:
        # Old server doesn't have docker image support.
        docker_images = {}
    docker_images[None] = None
    methods = {}  # {id: method}
    for method in kive.get('/api/methods/').json():
        for dep in method['dependencies']:
            dep['requirement'] = code_resource_revisions[dep['requirement']]
            if dep['path'] == '././':
                dep['path'] = '.'
        method['dependencies'].sort(
            key=lambda x: (x['path'],
                           x['filename'],
                           x['requirement']['coderesource']['filename']))
        dump = {'driver': code_resource_revisions[method['driver']],
                'docker_image': docker_images[method.get('docker_image')]}
        for field in ('groups_allowed',
                      'users_allowed',
                      'reusable',
                      'threads',
                      'memory',
                      'dependencies'):
            dump[field] = method[field]
        methods[method['id']] = dump

    used_revisions = set()
    pipeline_wrapper = kive.get_pipeline(pipeline_id)
    pipeline = pipeline_wrapper.details
    print('Dumping {} in {}.'.format(pipeline_wrapper, dump_folder))
    dump = dict(positions=dict(inputs={},
                               outputs={},
                               steps={}))
    for input_item in pipeline['inputs']:
        input_name = input_item['dataset_name']
        dump['positions']['inputs'][input_name] = dict(x=input_item['x'],
                                                       y=input_item['y'])
        del input_item['x']
        del input_item['y']
        replace_structure(input_item, compound_datatypes)
    dump['inputs'] = pipeline['inputs']
    for output_item in pipeline['outputs']:
        output_name = output_item['dataset_name']
        dump['positions']['outputs'][output_name] = dict(x=output_item['x'],
                                                         y=output_item['y'])
        del output_item['x']
        del output_item['y']
        del output_item['dataset_idx']
        replace_structure(output_item, compound_datatypes)
    pipeline['outputs'].sort()
    dump['outputs'] = pipeline['outputs']
    for outcable in pipeline['outcables']:
        del outcable['pk']
        del outcable['source']
        if outcable['output_cdt']:
            columns = compound_datatypes[outcable['output_cdt']]
            outcable['output_cdt'] = columns
    pipeline['outcables'].sort(key=itemgetter('output_idx'))
    dump['outcables'] = pipeline['outcables']
    for step in pipeline['steps']:
        step_name = step['name']
        dump['positions']['steps'][step_name] = dict(x=step['x'], y=step['y'])
        del step['x']
        del step['y']
        step['cables_in'].sort(key=itemgetter('dest_dataset_name'))
        for cable in step['cables_in']:
            del cable['dest']
            del cable['source']
        for input_item in step['inputs']:
            replace_structure(input_item, compound_datatypes)
        for output_item in step['outputs']:
            replace_structure(output_item, compound_datatypes)
        del step['transformation_family']
        step['transformation'] = methods[step['transformation']]
        driver = step['transformation']['driver']
        if driver is not None:
            used_revisions.add(driver)
        used_revisions.update(map(itemgetter('requirement'),
                                  step['transformation']['dependencies']))
    dump['steps'] = pipeline['steps']

    pipeline_filename = 'pipeline.json'
    with open(os.path.join(dump_folder, pipeline_filename), 'w') as f:
        json.dump(dump, f, indent=4, sort_keys=True)

    pipeline_deadline = datetime.now() + timedelta(seconds=90)
    filename_counts = Counter()
    for revision in used_revisions:
        filename = revision['coderesource']['filename']
        filename_counts[filename] += 1
        response = kive.get(revision.url, is_json=False, stream=True)
        deadline = max(pipeline_deadline,
                       datetime.now() + timedelta(seconds=10))
        is_complete = True
        with open(os.path.join(dump_folder, filename), 'w') as f:
            for block in response.iter_content():
                f.write(block)
                if datetime.now() > deadline:
                    is_complete = False
                    break
        if not is_complete:
            os.remove(os.path.join(dump_folder, filename))
            with open(os.path.join(dump_folder, filename + '_timed_out'), 'w'):
                pass
    duplicate_filenames = [filename
                           for filename, count in filename_counts.items()
                           if count > 1]
    if duplicate_filenames:
        raise RuntimeError('Multiple versions found: ' +
                           ', '.join(duplicate_filenames))

    print('Dumped {}.'.format(pipeline_wrapper))