Exemplo n.º 1
0
def lab_show(experiment_id=None):
    """ Show a Lab Experiment """

    is_lab_project()
    is_empty_project()

    models_directory = 'experiments'

    experiments = next(os.walk(models_directory))[1]

    if experiment_id is None:
        experiments = next(os.walk('experiments'))[1]
        p = graphviz.Digraph(name='lab_project', format='png')
        p.graph_attr['rankdir'] = 'LR'

        for e in experiments:
            p.subgraph(show_experiment(e))
    else:
        experiment_dir = os.path.join('experiments', experiment_id)
        if not os.path.exists(experiment_dir):
            click.secho("Can't find experiment [" + experiment_id +
                        '] in the current '
                        'directory.\nEnsure that you are in Lab Project root',
                        fg='red')
            click.Abort()
        else:
            p = show_experiment(experiment_id)

    p.render()
Exemplo n.º 2
0
def lab_rm(experiment_id):
    """ Remove a Lab Experiment """

    is_lab_project()

    experiment_dir = os.path.join('experiments', experiment_id)
    logs_dir = os.path.join('logs', experiment_id)

    if not os.path.exists(experiment_dir):
        click.secho("Can't find experiment [" + experiment_id +
                    '] in the current '
                    'directory.\nEnsure that you are in Lab Project root',
                    fg='red')
    else:
        shutil.rmtree(experiment_dir)
        shutil.rmtree(logs_dir)
        click.secho('[' + experiment_id + '] removed', fg='blue')
Exemplo n.º 3
0
def lab_notebook():
    """ Publish Lab project as a jupyter kernel """
    is_lab_project()

    with open(os.path.join(os.getcwd(), 'config', 'runtime.yaml'),
              'r') as file:
        config = yaml.load(file)
    project_name = config['name'] + '_' +\
        ''.join(e for e in config['timestamp'] if e.isalnum())

    click.secho('Generating jupyter kernel for ' + config['name'] + '...',
                fg='cyan')

    try:
        _install_jupyter_kernel(project_name)
        click.secho('Kernel generated: ' + project_name)
    except Exception as e:
        print(e)
        click.secho('Failed to generate kernel.', fg='red')
Exemplo n.º 4
0
def lab_run(script):
    """ Run a training script """

    home_dir = os.getcwd()

    is_lab_project()
    is_venv(home_dir)

    try:
        with open(os.path.join(os.getcwd(), 'config', 'runtime.yaml'),
                  'r') as file:
            config = yaml.load(file)
            home_dir = config['path']

            # Update project directory if it hasn't been updated
            if home_dir != os.getcwd():
                config['path'] = os.getcwd()
                home_dir = config['path']

                with open(os.path.join(os.getcwd(), 'config', 'runtime.yaml'),
                          'w') as file:
                    yaml.dump(config, file, default_flow_style=False)

    except KeyError:
        click.secho(
            'Looks like this Project was configured with an earlier '
            'version of Lab. Check that config/runtime.yaml file '
            'has a valid path key and value.',
            fg='red')
        raise click.Abort()

    # Extract lab version from virtual environment
    click.secho('Intializing', fg='cyan')

    python_bin = os.path.join(home_dir, '.venv', 'bin/python')

    click.secho('Running ' + str(script), fg='green')
    subprocess.call([python_bin] + list(script))
    click.secho('Finished!', fg='green')
Exemplo n.º 5
0
def lab_ls(sort_by=None):
    """ Compare multiple Lab Experiments """
    models_directory = 'experiments'
    logs_directory = 'logs'
    TICK = '█'

    is_lab_project()
    is_empty_project()

    experiments = next(os.walk(models_directory))[1]
    comparisons = []

    # Get unique metric names
    metrics_names = []
    for e in experiments:
        metrics_file = os.path.join(models_directory, e, 'metrics.yaml')
        with open(metrics_file, 'r') as file:
            metrics = yaml.load(file)
            metrics_names.append(list(metrics.keys()))

    metrics_names = list(set(metrics_names[0]).intersection(*metrics_names))

    # Get all experiments
    for e in experiments:
        metrics_file = os.path.join(models_directory, e, 'metrics.yaml')
        try:
            with open(metrics_file, 'r') as file:
                metrics = yaml.load(file)
            for k, v in metrics.items():
                metrics[k] = round(v, 2)

            metrics = {k: metrics[k] for k in metrics_names}
            metrics_list = list(metrics.values())

            meta_file = os.path.join(logs_directory, e, 'meta.yaml')
            with open(meta_file, 'r') as file:
                meta = yaml.load(file)

            # Truncate source name if too long
            source_name = meta['source']
            meta['source'] = (source_name[:20] +
                              '..') if len(source_name) > 20 else source_name

            record = [meta['experiment_uuid'], meta['source'],
                      meta['start_time'].strftime("%m/%d/%Y, %H:%M:%S")] + \
                metrics_list
            comparisons.append(record)
        except FileNotFoundError:
            pass

    # Create visualisation of numeric metrics
    A = pd.DataFrame(comparisons)
    meta_data = A[[0, 1, 2]]
    metrics_data = A.drop([0, 1, 2], axis=1)

    row_max = metrics_data.abs().max(axis=0)
    scaled_metrics_data = metrics_data.abs().divide(row_max, axis=1)
    scaled_metrics_data = scaled_metrics_data.fillna(value=0)

    sparklines = np.empty(shape=metrics_data.shape, dtype=object)
    for row in range(metrics_data.shape[0]):
        for column in range(metrics_data.shape[1]):
            value = metrics_data.iloc[row, column]
            scaled_value = scaled_metrics_data.iloc[row, column]
            scaled_value = scaled_value
            spark = (format(value, '.2f') + ': ' +
                     TICK * int(round(scaled_value * 10)))
            sparklines[row, column] = spark

    result = pd.concat([meta_data, pd.DataFrame(sparklines)], axis=1)
    result.columns = (['Experiment', 'Source', 'Date'] + list(metrics.keys()))

    result.sort_values(by=['Date'], axis=0, ascending=False, inplace=True)

    if sort_by is not None:
        result.sort_values(by=[sort_by], axis=0, ascending=False, inplace=True)

    header = ['Experiment', 'Source', 'Date'] + list(metrics.keys())
    click.echo('')
    click.echo(tabulate.tabulate(result.values, headers=header))

    # Check the last time lab project was synced with minio
    with open(os.path.join('config', 'runtime.yaml'), 'r') as file:
        minio_config = yaml.load(file)
        push_time = datetime.datetime.fromtimestamp(0)
        try:
            push_time = \
                datetime.datetime.strptime(
                    minio_config['last_push'],
                    '%Y-%m-%d %H:%M:%S.%f')

            now_time = datetime.datetime.now()
            td = now_time - push_time
            (days, hours) = (td.days, td.seconds // 3600)
        except Exception:
            (days, hours) = (0, 0)

    click.secho('\nLast push: ' + str(days) + 'd, ' + str(hours) + 'h ago',
                fg='yellow')

    # Find the latest file and print its timestamp
    list_of_files = glob.glob(os.path.join(os.getcwd(), '*'))
    latest_file = max(list_of_files, key=os.path.getctime)
    latest_file_timestamp = \
        datetime.datetime.fromtimestamp(os.path.getmtime(latest_file))

    recommend = '| Project is in sync with remote'
    if latest_file_timestamp > push_time:
        recommend = ' | Recommend to run <lab push>'
    click.secho('Last modified: ' + str(latest_file_timestamp) + recommend,
                fg='yellow')