def execute_module_step(module, step, return_dict=True, **kwargs):
    module_name = module.name if hasattr(module, 'name') else str(module)
    module = get_module(module)

    if module is None:
        raise ValueError("Module '{}' was not found.".format(module_name))
    module = module.id
    step = step.id if hasattr(step, 'id') else step

    body = {'inputs': [{'name': k, 'value': v} for k, v in kwargs.items()]}
    r = post(SERVICE_ROOT + '/modules/{}/steps/{}'.format(module, step),
             json=body)

    # Convert list of name/value pair dictionaries to single dict
    outputs = OrderedDict()
    for output in r.get('outputs', []):
        k, v = output['name'], output.get('value')

        # Remove padding from CHAR columns
        if isinstance(v, str):
            v = v.strip()

        outputs[k] = v

    if return_dict:
        # Return results as k=v pairs
        return outputs
    else:
        # Return only the values, as if calling another Python function.
        outputs = tuple(outputs.values())
        if len(outputs) == 1:
            return outputs[0]
        else:
            return outputs
示例#2
0
def import_model_from_zip(name,
                          project,
                          file,
                          description=None,
                          version='latest'):
    # TODO: Allow import into folder if no project is given
    # TODO: Create new version if model already exists
    project = get_project(project)

    if project is None:
        raise ValueError('Project `%s` could not be found.' % str(project))

    params = {
        'name': name,
        'description': description,
        'type': 'ZIP',
        'projectId': project.id,
        'versionOption': version
    }
    params = '&'.join(['{}={}'.format(k, v) for k, v in params.items()])

    r = post(ROOT_PATH + '/models#octetStream',
             data=file.read(),
             params=params,
             headers={'Content-Type': 'application/octet-stream'})

    return r
示例#3
0
def create_folder(name, parent=None, description=None):
    """

    Parameters
    ----------
    name : str
        The name of the new folder
    parent : str or dict, optional
        The parent folder for this folder, if any.  Can be a folder name, id, or dict response from get_folder
    description : str, optional
        A description of the folder

    Returns
    -------

    """
    parent = get_folder(parent)

    body = {
        'name': name,
        'description': description,
        'folderType': 'folder',
        'parentFolderUri': parent.id if parent else None
    }

    return post(
        _SERVICE_ROOT + '/folders',
        json=body,
        headers={'Content-Type': 'application/vnd.sas.content.folder+json'})
示例#4
0
def create_performance_definition(model,
                                  library_name,
                                  table_name,
                                  name=None,
                                  description=None,
                                  cas_server=None):
    from .model_repository import get_model, get_project

    model = get_model(model)
    project = get_project(model.projectId)

    # Performance data cannot be captured unless certain project properties have been configured.
    for required in ['targetVariable', 'targetLevel', 'predictionVariable']:
        if getattr(project, required, None) is None:
            raise ValueError("Project %s must have the '%s' property set." %
                             (project.name, required))

    request = {
        'projectId': project.id,
        'modelIds': [model.id],
        'name': name or model.name + ' Performance',
        'description': description
        or 'Performance definition for model ' + model.name,
        'casServerId': cas_server or 'cas-shared-default',
        'resultLibrary': 'ModelPerformanceData',
        'dataLibrary': library_name,
        'dataTable': table_name
    }

    # If model doesn't specify input/output variables, try to pull from project definition
    if len(model.get('inputVariables', [])) > 0:
        request['inputVariables'] = [
            v.get('name') for v in model['inputVariables']
        ]
        request['outputVariables'] = [
            v.get('name') for v in model['outputVariables']
        ]
    else:
        request['inputVariables'] = [
            v.get('name') for v in project.get('variables', [])
            if v.get('role') == 'input'
        ]
        request['outputVariables'] = [
            v.get('name') for v in project.get('variables', [])
            if v.get('role') == 'output'
        ]

    return post(SERVICE_ROOT + '/performanceTasks',
                json=request,
                headers={
                    'Content-Type':
                    'application/vnd.sas.models.performance.task+json'
                })
示例#5
0
def create_project(project, repository, **kwargs):
    if isinstance(project, str):
        project = {'name': project}

    repository = get_repository(repository)

    project['repositoryId'] = repository['id']
    project['folderId'] = repository['folderId']

    project.update(kwargs)
    return post(
        ROOT_PATH + '/projects',
        json=project,
        headers={'Content-Type': 'application/vnd.sas.models.project+json'})
示例#6
0
def create_file(file, folder=None, filename=None, expiration=None):
    """Create a new file on the server by uploading a local file.

    Parameters
    ----------
    file : str or file_like
        Path to the file to upload or a file-like object.
    folder : str or dict, optional
        Name, or, or folder information as returned by :func:`.get_folder`.
    filename : str, optional
        Name to assign to the uploaded file.  Defaults to the filename if `file` is a path, otherwise required.
    expiration : datetime, optional
        A timestamp that indicates when to expire the file.  Defaults to no expiration.

    Returns
    -------
    RestObj
        A dictionary containing the file attributes.

    """
    if isinstance(file, six.string_types):
        filename = filename or os.path.splitext(os.path.split(file)[1])[0]

        with open(file, 'rb') as f:
            file = f.read()
    else:
        if filename is None:
            raise ValueError(
                '`filename` must be specified if `file` is not a path.')

        file = file.read()

    params = {}

    if folder is not None:
        _folder = get_folder(folder)

        if _folder is None:
            raise ValueError("Folder '%s' could not be found." % folder)
        else:
            params['parentFolderUri'] = get_link(_folder, 'self')['href']

    if expiration is not None:
        pass
        # TODO: add 'expirationTimeStamp' to params.  Need to determine correct format

    return post(_SERVICE_ROOT + '/files#multipartUpload',
                files={filename: file},
                params=params)
示例#7
0
def add_model_content(model, file, name=None, role=None):
    if is_uuid(model):
        id = model
    elif isinstance(model, dict) and 'id' in model:
        id = model['id']
    else:
        model = get_model(model)
        id = model['id']

    metadata = {'role': role}
    if name is not None:
        metadata['name'] = name

    return post(ROOT_PATH + '/models/{}/contents'.format(id),
                files={name: file},
                data=metadata)
def create_module(name=None,
                  description=None,
                  source=None,
                  language='python',
                  scope='public'):
    """

    Parameters
    ----------
    name : str
    description : str
    source : str
    language : str { 'python', 'ds2' }
    scope : str { 'public', 'private' }

    Returns
    -------

    """

    if source is None:
        raise ValueError('The `source` parameter is required.')
    else:
        source = str(source)

    if language == 'python':
        t = 'text/x-python'
    elif language == 'ds2':
        t = 'text/vnd.sas.source.ds2'
    else:
        raise ValueError('Unrecognized source code language `%s`.' % language)

    data = {
        'id': name,
        'type': t,
        'description': description,
        'source': source,
        'scope': scope
    }

    r = post(SERVICE_ROOT + '/modules', json=data)
    return r
示例#9
0
def publish_model(model, destination, name=None, force=False):
    from .model_repository import get_model, get_model_link

    model_obj = get_model(model)

    if model_obj is None:
        model_name = model.name if hasattr(model, 'name') else str(model)
        raise ValueError("Model '{}' was not found.".format(model_name))

    model_uri = get_model_link(model_obj, 'self')

    # TODO: Verify allowed formats by destination type.
    # As of 19w04 MAS throws HTTP 500 if name is in invalid format.
    model_name = name or '{}_{}'.format(model_obj['name'].replace(' ', ''),
                                        model_obj['id']).replace('-', '')

    request = {
        "name":
        model_obj.get('name'),
        "notes":
        model_obj.get('description'),
        "modelContents": [{
            "modelName": _publish_name(model_obj.get('name')),
            "sourceUri": model_uri.get('uri'),
            "publishLevel": "model"
        }],
        "destinationName":
        destination
    }

    # Publishes a model that has already been registered in the model repository.
    # Unlike model_publish service, does not require Code to be specified.
    r = post(
        SERVICE_ROOT + '/publish',
        json=request,
        params=dict(force=force),
        headers={
            'Content-Type':
            'application/vnd.sas.models.publishing.request.asynchronous+json'
        })
    return r
示例#10
0
def publish_model(model, destination, name=None, code=None, notes=None):
    from .model_repository import get_model, get_model_link

    code_types = {
        'ds2package': 'ds2',
        'datastep': 'datastep',
        '': ''
    }

    model = get_model(model)
    model_uri = get_model_link(model, 'self')

    # Get score code from registry if no code specified
    if code is None:
        code_link = get_model_link(model, 'scoreCode', True)
        if code_link:
            code = get(code_link['href'])

    request = dict(
        name=name or model.get('name'),
        notes=notes,
        destinationName=destination,
    )

    modelContents = {
        'modelName': model.get('name'),
        'modelId': model.get('id'),
        'sourceUri': model_uri.get('href'),
        'publishLevel': 'model',        # ?? What are the options?
        'codeType': code_types[model.get('scoreCodeType', '').lower()],
        'codeUri': '',          # ??  Not needed if code is specified?
        'code': code
    }

    request['modelContents'] = [modelContents]
    return post(ROOT_PATH + '/models', json=request, headers={'Content-Type': 'application/vnd.sas.models.publishing.request+json'})
示例#11
0
def create_project(name, description=None, image=None):
    """

    Parameters
    ----------
    name : str
    description : str
    image : str
        URI of an image to use as the project avatar

    Returns
    -------
    RestObj

    """

    body = {'name': name,
            'description': description,
            'imageUri': image
            }

    return post(ROOT_PATH + '/projects',
                json=body,
                headers={'Content-Type': 'application/vnd.sas.project+json'})
示例#12
0
def parse_documents(documents,
                    caslib=None,
                    id_column=None,
                    text_column=None,
                    description=None,
                    standard_entities=False,
                    noun_groups=False,
                    min_doc_count=10,
                    concept_model=None,
                    output_postfix=None,
                    spell_check=False,
                    override_list=None,
                    stop_list=None,
                    start_list=None,
                    synonym_list=None,
                    language='en'):
    """Performs natural language parsing on the input data.

    Creates a text parsing job that executes asynchronously.  There are two
    different interactions for parsing: parsing documents in CAS tables and
    parsing documents that are uploaded directly.

    Parameters
    ----------
    documents : str or dict or list_like:
        Documents to parse.  May be either the URI to a CAS table where the
        documents are currently stored, or an iterable of strings containing
        the documents' text.
    caslib : str or dict, optional
        URI of a caslib in which the documents will be stored.  Required if
        `documents` is a list of strings.
    id_column : str, optional
        The column in `documents` that contains a unique id for each
        document.  Required if `documents` is a CAS table URI.
    text_column : str, optional
        The column in `documents` that contains the document text to parse.
        Required if `documents` is a CAS table URI.
    description : str, optional
        Description to add to the text parsing job.
    standard_entities : bool, optional
    noun_groups : bool, optional
    min_doc_count : int, optional
        Minimum number of documents in which a term must appear to be kept.
        Defaults to 10.
    output_postfix : str, optional
        Text to be added to the end of all output table names.
    spell_check : bool, optional
        Whether spell checking should be performed during parsing.
    concept_model : str or dict, optional
        URI of a table containing the concept LITI binaries to apply during
        parsing.
    override_list : str or dict, optional
        URI of a table containing overrides for the keep and drop terms.
    language : str, optional
        Two letter
        `ISO 639-1 <https://en.wikipedia.org/wiki/ISO_639>`_
        code indicating the source language.  Defaults to 'en'.

    Returns
    -------
    RestObj
        The submitted job

    See Also
    --------
    :func:`.cas_management.get_caslib`
    :func:`.cas_management.get_table`

    """

    if documents is None:
        raise TypeError('`documents` cannot be None.')

    if isinstance(documents, (dict, six.string_types)):
        data = {
            "inputUri": uri_as_str(documents),
            "documentIdVariable": id_column,
            "textVariable": text_column,
            "version": 1
        }
    else:
        data = {
            'caslibUri': uri_as_str(caslib),
            'documents': documents,
            'version': 1
        }

    data.update({
        "description": description,
        "language": language,
        "includeStandardEntities": standard_entities,
        "includeNounGroups": noun_groups,
        "startListUri": uri_as_str(start_list),
        "stopListUri": uri_as_str(stop_list),
        "synonymListUri": uri_as_str(synonym_list),
        "minimumDocumentCount": min_doc_count,
        "conceptModelUri": uri_as_str(concept_model),
        "outputTableNamePostfix": output_postfix,
        "enableSpellChecking": spell_check,
        "overrideListUri": uri_as_str(override_list),
    })

    # Optional fields are not ignored if None so explicitly remove before
    # sending.
    for k in list(data.keys()):
        if data[k] is None:
            del data[k]

    url = _SERVICE_ROOT + '/jobs'

    # Update URL if passing in raw documents.
    if 'documents' in data:
        url += '#data'
        headers = {
            'Content-Type':
                'application/vnd.sas.text.parsing.job.request.documents+json',
            'Accept': 'application/vnd.sas.text.parsing.job+json'

        }
    else:
        headers = {
            'Content-Type':
                'application/vnd.sas.text.parsing.job.request+json',
            'Accept': 'application/vnd.sas.text.parsing.job+json'
        }

    return post(url, json=data, headers=headers)
示例#13
0
def create_model(model,
                 project,
                 description=None,
                 modeler=None,
                 function=None,
                 algorithm=None,
                 tool=None,
                 is_champion=False,
                 properties={},
                 **kwargs):
    """

    Parameters
    ----------
    model
    project
    description : str, optional
    modeler : str, optional
        Name of the user that created the model.  Current user name will be used if unspecified.

    function
    algorithm
    tool
    modeler
    scoreCodeType
    trainTable
    classificationEventProbabilityVariableName
    classificationTargetEventValue
    champion (T/F)
    role
    location
    targetVariable
    projectId, projectName, projectVersionId, projectVersionName???
    suggestedChampion (T/F)
    retrainable
    immutable
    modelVersionName
    properties  (custom properties)
        name
        value
        type
    inputVariables
        -
    outputVariables
        -

    properties
    kwargs

    Returns
    -------

    """

    if isinstance(model, str):
        model = {'name': model}

    assert isinstance(model, dict)

    p = get_project(project)
    if p is None:
        raise ValueError("Unable to find project '%s'" % project)

    model['projectId'] = p['id']
    model['modeler'] = modeler or current_session().user

    model['description'] = description or model.get('description')
    model['function'] = function or model.get('function')
    model['algorithm'] = algorithm or model.get('algorithm')
    model['tool'] = tool or model.get('tool')
    model['champion'] = is_champion or model.get('champion')
    model['role'] = 'Champion' if model.get('champion',
                                            False) else 'Challenger'
    model['description'] = description or model.get('description')
    model.setdefault('properties', [{
        'name': k,
        'value': v
    } for k, v in properties.items()])

    # TODO: add kwargs (pop)
    #     model.update(kwargs)
    return post(
        ROOT_PATH + '/models',
        json=model,
        headers={'Content-Type': 'application/vnd.sas.models.model+json'})