Пример #1
0
def get_projects(ctx, credential):  # shared options
    max_len = [32, 24, 10, 20, 16]
    projects = dr.Project.list()
    columns = [
        'name',
        'project_id',
        'stage',
        'description',
        'partition',
    ]

    data = []
    for project in projects:
        status = project.get_status()
        partition = project.partition
        partitioning = "None"
        if partition != {}:
            if 'cv_method' not in partition:
                cv_method = 'random'
            else:
                cv_method = partition['cv_method']
            partitioning = cv_method
            partitioning += f": {partition['validation_type']}"

        values = [
            project.project_name, project.id, status['stage'],
            status['stage_description'], partitioning
        ]
        data.append(values)
    Utils.prettyprint_table(columns, data, max_len)
Пример #2
0
def frozen(
        ctx,
        credential,  # shared options
        config_path):
    config = ctx.obj['config']
    cmd = 'frozen'

    if not config.is_valid(cmds=[cmd]):
        raise Exception('configuration file error')

    project = Project(**config.get_params(cmd='environment'))
    if not hasattr(project, 'dr_project') or project.dr_project is None:
        raise NotSpecifiedValidParam(
            'project id is not supplied. you should run build command befor, or apply command'
        )

    model_id = config.get_param(cmd, param='model_id')

    with section('Freezing model'):
        if model_id is None:
            sort_key = Utils.get_appropriate_dataset(project.cv_method,
                                                     project.validation_type,
                                                     project.validation_params)
            model = project.search_models(sort_key=sort_key)[0]
            model_id = model.id
        config.set_params(cmd, params={'model_id': model_id})

        new_model_id = project.frozen(**config.get_params(cmd))
        ctx.obj['model_id'] = new_model_id

    click.echo('frozen command is succeed.')
    click.echo(f"'{model_id}' is frozen -> new model_id: {new_model_id}")
Пример #3
0
def fit(
        ctx,
        credential,  # shared options
        config_path):
    config = ctx.obj['config']
    cmd = 'fit'

    if not config.is_valid(cmds=[cmd]):
        raise Exception('configuration file error')

    project = Project(**config.get_params(cmd='environment'))
    if not hasattr(project, 'dr_project') or project.dr_project is None:
        raise NotSpecifiedValidParam(
            'project id is not supplied. you should run build command befor, or apply command'
        )

    with section("Training"):
        featurelist_name, model_type = project.fit(**config.get_params(cmd))

        # set params for successor process
        ctx.obj['featurelist_name'] = featurelist_name
        if model_type == 'autopilot':
            sort_key = Utils.get_appropriate_dataset(project.cv_method,
                                                     project.validation_type,
                                                     project.validation_params)
            ctx.obj['model_id'] = project.search_models(
                featurelist_name=featurelist_name, sort_key=sort_key)[0].id
        else:
            ctx.obj['model_id'] = config.get_param(cmd, param='model_id')

    click.echo("fit command is succeed.")
    click.echo(f"'{model_type}' with featurelist: '{featurelist_name}'")
    click.echo(f"model_id: {ctx.obj['model_id']}")
Пример #4
0
def build(
        ctx,
        credential,  # shared options
        config_path):
    config = ctx.obj['config']

    if not config.is_valid():
        raise Exception('configuration file error')

    with section('Build project'):
        project = Project(**config.get_params(cmd='environment'))
        if project.project_id is None:
            if project.project_name is None:
                project.project_name = Utils.get_currenttime_string()
            df = IOManager(io_type='input',
                           io_params=config.get_param(
                               cmd='environment', param='dataset')).to_df()
            project.build_project(df)

        # set params for successor process
        ctx.obj['project'] = project

    click.echo('project building is succeed')
    click.echo(
        f"project name: {project.project_name}, project_id: {project.project_id}"
    )
Пример #5
0
def get_project(
        ctx,
        credential,  # shared options
        project_id,
        verbose):
    # FIXME: awful
    project = Project(project_id=project_id)
    metric = project.metric

    click.echo("### params")
    for k, v in yaml.load(project.to_yml()).items():
        click.echo(f"{k} : {v}")

    click.echo("\n### feature list")
    for featurelist in project.dr_project.get_featurelists():
        click.echo(f"* {featurelist.name}")
        if verbose:
            data = []
            for feature, attr in project.get_features(
                    featurelist.name).items():
                d = [feature]
                for k, v in attr.items():
                    d.append(v)
                data.append(d)
            else:
                columns = ['name'] + list(attr.keys())
            Utils.prettyprint_table(columns, data, [24])
        click.echo()

    click.echo(f"### top 10 models with {project.metric}")
    sort_key = Utils.get_appropriate_dataset(project.cv_method,
                                             project.validation_type,
                                             project.validation_params)

    models = project.search_models(featurelist_name=None,
                                   sort_key=sort_key)[:10]
    data = [(model.model_type, model.id, model.is_frozen,
             model.metrics[project.metric][sort_key], model.featurelist_name)
            for model in models]

    columns = ['type', 'id', 'is_frozen', f"{sort_key}", 'featurelist_name']
    Utils.prettyprint_table(columns, data, max_len=[16, 24, 9, 16, 16])

    click.echo()
    if verbose:
        click.echo(f"### frozen models")
        models = project.search_models(featurelist_name=None,
                                       is_frozen=True,
                                       sort_key=sort_key)
        data = [(model.model_type, model.id, model.is_frozen,
                 model.metrics[project.metric][sort_key]) for model in models]

        columns = ['type', 'id', 'is_frozen', f"{sort_key} : {project.metric}"]
        Utils.prettyprint_table(columns, data, max_len=[16, 24, 9, 24])
Пример #6
0
 def preprocess(self):
     self.filename = Utils.fetch_file(self.url, './')
Пример #7
0
def create_dataset(path):
    Utils.create_dataset(path)
Пример #8
0
def predict(
        ctx,
        credential,  # shared options
        config_path):
    cmd = 'predict'
    config = ctx.obj['config']

    if not config.is_valid(cmds=[cmd]):
        raise Exception('configuration file error')

    project = Project(**config.get_params(cmd='environment'))
    if not hasattr(project, 'dr_project') or project.dr_project is None:
        raise NotSpecifiedValidParam(
            'project id is not supplied. you should run build command befor, or apply command'
        )

    prediction_column = Utils.get_appropriate_prediction_column(
        project.dr_project.target_type)

    params = config.get_params(cmd)
    model_id = config.get_param(cmd, param='model_id')

    with section('Upload dataset'):
        input_df = IOManager(io_type='input',
                             io_params=config.get_param(
                                 cmd, param='input')).to_df()
        dataset = project.upload_dataset(input_df)
        click.echo(
            f"#rows: {dataset.num_rows}, #columns: {dataset.num_columns}")

    with section('Prediction'):
        # auto select best result
        if model_id is None:
            sort_key = Utils.get_appropriate_dataset(project.cv_method,
                                                     project.validation_type,
                                                     project.validation_params)
            model = project.search_models(sort_key=sort_key)[0]
            model_id = model.id

        predictions = project.predict(model_id=model_id, dataset_id=dataset.id)

        if config.get_param(cmd, param='feature_impact') or config.get_param(
                cmd, param='reasoncode'):
            feature_impacts = project.get_feature_impact(model_id)
            feature_impacts = pd.DataFrame(feature_impacts)
            # feature_impacts.to_csv(f"feature_impact_{model_id}.csv", index=False)

        if config.get_param(cmd, 'reasoncode'):
            max_codes = config.get_param(cmd, param='max_codes') or 3

            rc_job = project.get_reasoncode_job(model_id,
                                                dataset.id,
                                                max_codes=max_codes)

            # TODO: parameterize in config
            time_to_wait_for_reasoncode = 20 * 60  # sec
            reasoncode = rc_job.get_result_when_complete(
                time_to_wait_for_reasoncode)
            reasoncodes = pd.DataFrame(reasoncode.get_all_as_dataframe())
            reasoncodes.drop(columns=[prediction_column], inplace=True)
            predictions = pd.merge(left=predictions,
                                   right=reasoncodes,
                                   on='row_id')

    if config.get_param(cmd, param='merge_origin'):
        predictions = merge_dataset(predictions, input_df)

    with section('Output result'):
        IOManager(io_type='output',
                  io_params=config.get_param(
                      cmd, param='output')).output(predictions)

    with section('clearning'):
        if config.get_param(cmd, param='del_dataset') is not False:
            project.delete_dataset(dataset.id)