Exemplo n.º 1
0
def get_id_from_object(obj: Dict[str, Any],
                       key,
                       name=None,
                       required=False) -> Optional[UUID]:
    '''
    Given a dict, get a sample ID from the dict if it exists.

    If None or an empty dict is passed to the method, None is returned.

    :param obj: the dict wherein the ID can be found.
    :param key: the key in the dict for the ID value.
    :param name: the name of the ID to use in an exception, defaulting to the key.
    :param required: if no ID is present, throw an exception.
    :returns: the ID, if it exists, or None.
    :raises MissingParameterError: if the ID is required but not present.
    :raises IllegalParameterError: if the ID is provided but is invalid.
    '''
    id_ = None
    _check_string(key, 'key')
    name = name if name else key
    if required and (not obj or not obj.get(key)):
        raise _MissingParameterError(name)
    if obj and obj.get(key):
        id_ = validate_sample_id(obj[key], name)
    return id_
Exemplo n.º 2
0
 def __init__(
         self,
         name: str,
         type_: SubSampleType = SubSampleType.BIOLOGICAL_REPLICATE,
         parent: Optional[str] = None,
         controlled_metadata: Optional[Dict[str,
                                            Dict[str,
                                                 PrimitiveType]]] = None,
         user_metadata: Optional[Dict[str, Dict[str,
                                                PrimitiveType]]] = None,
         source_metadata: Optional[List[SourceMetadata]] = None):
     '''
     Create a sample node.
     :param name: The name of the sample node.
     :param type_: The type of this sample nde.
     :param parent: The parent SampleNode of this node. BIOLOGICAL_REPLICATEs, and only
         BIOLOGICAL_REPLICATEs, cannot have parents.
     :param controlled_metadata: Sample metadata that has been checked against a controlled
         vocabulary.
     :param user_metadata: Unrestricted sample metadata.
     :param source_metadata: Information about the controlled metadata as it existed at the data
         source, prior to any possible transformations for ingest.
     :raises MissingParameterError: if the name is None or whitespace only.
     :raises IllegalParameterError: if the name or parent is too long or contains illegal
         characters, the parent is missing and the node type is not BIOLOGICAL_REPLICATE,
         or basic metadata constraints are violated.
     '''
     # could make a bioreplicate class... meh for now
     self.name = _cast(
         str,
         _check_string(name, 'subsample name',
                       max_len=_MAX_SAMPLE_NAME_LEN))
     self.type = _not_falsy(type_, 'type')
     self.parent = _check_string(parent,
                                 'parent',
                                 max_len=_MAX_SAMPLE_NAME_LEN,
                                 optional=True)
     cm = controlled_metadata if controlled_metadata else {}
     _check_meta(cm, True)
     self.controlled_metadata = _fz(cm)
     um = user_metadata if user_metadata else {}
     _check_meta(um, False)
     self.user_metadata = _fz(um)
     sm = source_metadata if source_metadata else []
     _check_source_meta(sm, self.controlled_metadata)
     self.source_metadata = tuple(sm)
     isbiorep = type_ == SubSampleType.BIOLOGICAL_REPLICATE
     if not _xor(bool(parent), isbiorep):
         raise IllegalParameterError(
             f'Node {self.name} is of type {type_.value} and therefore ' +
             f'{"cannot" if isbiorep else "must"} have a parent')
Exemplo n.º 3
0
    def __init__(self, userid):
        '''
        Create the user id.

        :param id: the user's id, a maximum of 256 unicode characters.
        '''
        self.id = _check_string(userid, 'userid', max_len=256)
Exemplo n.º 4
0
def get_admin_request_from_object(
        params: Dict[str, Any], as_admin: str, as_user: str) -> Tuple[bool, Optional[UserID]]:
    '''
    Get information about a request for administration mode from an object.

    :param params: the dict containing the information.
    :param as_admin: the name of the key containing a truish value that indicates that the user
        wishes to be recognized as an administrator.
    :param as_user: the name of the key containing a string that indicates the user the admin
        wishes to impersonate, or None if the the admin does not wish to impersonate a user.
    :returns: A tuple where the first element is a boolean denoting whether the user wishes
        to be recognized as an administrator and the second element is the user that admin wishes
        to impersonate, if any. The user is always None if the boolean is False.
    '''
    _check_params(params)
    as_ad = bool(params.get(_cast(str, _check_string(as_admin, 'as_admin'))))
    _check_string(as_user, 'as_user')
    if not as_ad:
        return (as_ad, None)
    user = get_user_from_object(params, as_user)
    return (as_ad, user)
Exemplo n.º 5
0
    def __init__(self, sample: SampleAddress, node: str):
        '''
        Create the address.

        :param sample: The sample address.
        :param node: The ID of the sample node.
        '''

        self.sampleid = _not_falsy(sample, 'sample').sampleid
        self.version = sample.version
        self.node = _cast(
            str, _check_string(node, 'node', max_len=_MAX_SAMPLE_NAME_LEN))
    def __init__(self, bootstrap_servers: str, topic: str):
        """
        Create the notifier.

        :param bootstrap_servers: the Kafka bootstrap servers parameter.
        :param topic: the topic where messages will be sent. The notifier requires the topic
            name to consist of ASCII alphanumeric values and the hyphen to avoid Kafka issues
            around ambiguity between period and underscore values.
        """
        _check_string(bootstrap_servers, 'bootstrap_servers')
        self._topic = _check_string(topic, 'topic', max_len=249)
        match = self._KAFKA_TOPIC_ILLEGAL_CHARS_RE.search(
            _cast(str, self._topic))
        if match:
            raise ValueError(
                f'Illegal character in Kafka topic {self._topic}: {match.group()}'
            )

        # TODO LATER KAFKA support delivery.timeout.ms when the client supports it
        # https://github.com/dpkp/kafka-python/issues/1723
        # since not supported, we half ass it with a retry count.
        # See https://kafka.apache.org/documentation/#producerconfigs
        # this will fail if it can't connect
        self._prod = _KafkaProducer(
            # can't test multiple servers without a massive PITA
            bootstrap_servers=bootstrap_servers.split(','),
            acks='all',
            # retries can occur from 100-30000 ms by default. If we allow 300 retries, that means
            # the send can take from 30s to 150m. Presumably another server timeout will kill
            # the request before then.
            retries=300,
            retry_backoff_ms=100,  # default is 100
            # low timeouts can cause message loss, apparently:
            # https://github.com/dpkp/kafka-python/issues/1723
            request_timeout_ms=30000,  # default is 30000
            # presumably this can be removed once idempotence is supported
            max_in_flight_requests_per_connection=1,
        )
        self._closed = False
Exemplo n.º 7
0
    def __init__(self, upa: UPA, dataid: str = None):
        '''
        Create the DUID.

        :param upa: The workspace object's UPA.
        :param dataid: The id of the data within the object that this DUID references with a
            maximum of 256 characters. None if the data unit is the entire object.
        '''
        self.upa = _not_falsy(upa, 'upa')
        self.dataid = _check_string(dataid,
                                    'dataid',
                                    max_len=256,
                                    optional=True)
Exemplo n.º 8
0
 def __init__(
     self,
     nodes: List[SampleNode],
     name: Optional[str] = None,
 ):
     '''
     Create the the sample.
     :param nodes: The tree nodes in the sample. BIOLOGICAL_REPLICATES must come first in
         the list, and parents must come before children in the list.
     :param name: The name of the sample. Cannot contain control characters or be longer than
         255 characters.
     :raise MissingParameterError: if no nodes are provided.
     :raises IllegalParameterError: if the name is too long or contains illegal characters,
         the first node in the list is not a BIOLOGICAL_REPLICATE, all the BIOLOGICAL_REPLICATES
         are not at the start of this list, node names are not unique, or parent nodes
         do not appear in the list prior to their children.
     '''
     self.name = _check_string(name,
                               'name',
                               max_len=_MAX_SAMPLE_NAME_LEN,
                               optional=True)
     if not nodes:
         raise MissingParameterError(
             'At least one node per sample is required')
     if len(nodes) > _MAX_SAMPLE_NODES:
         raise IllegalParameterError(
             f'At most {_MAX_SAMPLE_NODES} nodes are allowed per sample')
     if nodes[0].type != SubSampleType.BIOLOGICAL_REPLICATE:
         raise IllegalParameterError(
             f'The first node in a sample must be a {SubSampleType.BIOLOGICAL_REPLICATE.value}'
         )
     no_more_bio = False
     seen_names: _Set[str] = set()
     for n in nodes:
         if no_more_bio and n.type == SubSampleType.BIOLOGICAL_REPLICATE:
             raise IllegalParameterError(
                 f'{SubSampleType.BIOLOGICAL_REPLICATE.value}s must be the first '
                 + 'nodes in the list of sample nodes.')
         if n.type != SubSampleType.BIOLOGICAL_REPLICATE:
             no_more_bio = True
         if n.name in seen_names:
             raise IllegalParameterError(
                 f'Duplicate sample node name: {n.name}')
         if n.parent and n.parent not in seen_names:
             raise IllegalParameterError(
                 f'Parent {n.parent} of node {n.name} does not ' +
                 'appear in node list prior to node.')
         seen_names.add(n.name)
     self.nodes = tuple(nodes)  # make hashable
Exemplo n.º 9
0
def split_value(d: Dict[str, str], key: str):
    '''
    Get a list of comma separated values given a string taken from a configuration dict.
    :param config: The configuration dict containing the string to be processed as a value.
    :param key: The key in the dict containing the value.
    :returns: a list of strings split from the source comma separated string, or an empty list
    if the key does not exist or contains only whitespace.
    :raises ValueError: if the value contains control characters.
    '''
    if d is None:
        raise ValueError('d cannot be None')
    rstr = _check_string(d.get(key), 'config param ' + key, optional=True)
    if not rstr:
        return []
    return [x.strip() for x in rstr.split(',') if x.strip()]
Exemplo n.º 10
0
def get_user_from_object(params: Dict[str, Any], key: str) -> Optional[UserID]:
    '''
    Get a user ID from a key in an object.

    :param params: the dict containing the user.
    :param key: the key in the dict where the value is the username.
    :returns: the user ID or None if the user is not present.
    :raises IllegalParameterError: if the user if invalid.
    '''
    _check_params(params)
    u = params.get(_cast(str, _check_string(key, 'key')))
    if u is None:
        return None
    if type(u) is not str:
        raise _IllegalParameterError(f'{key} must be a string if present')
    else:
        return UserID(u)
Exemplo n.º 11
0
def build_samples(
        config: Dict[str, str]) -> Tuple[Samples, KBaseUserLookup, List[str]]:
    '''
    Build the sample service instance from the SDK server provided parameters.

    :param cfg: The SDK generated configuration.
    :returns: A samples instance.
    '''
    if not config:
        raise ValueError('config is empty, cannot start service')
    arango_url = _check_string_req(config.get('arango-url'),
                                   'config param arango-url')
    arango_db = _check_string_req(config.get('arango-db'),
                                  'config param arango-db')
    arango_user = _check_string_req(config.get('arango-user'),
                                    'config param arango-user')
    arango_pwd = _check_string_req(config.get('arango-pwd'),
                                   'config param arango-pwd')

    col_sample = _check_string_req(config.get('sample-collection'),
                                   'config param sample-collection')
    col_version = _check_string_req(config.get('version-collection'),
                                    'config param version-collection')
    col_ver_edge = _check_string_req(config.get('version-edge-collection'),
                                     'config param version-edge-collection')
    col_node = _check_string_req(config.get('node-collection'),
                                 'config param node-collection')
    col_node_edge = _check_string_req(config.get('node-edge-collection'),
                                      'config param node-edge-collection')
    col_data_link = _check_string_req(config.get('data-link-collection'),
                                      'config param data-link-collection')
    col_ws_obj_ver = _check_string_req(
        config.get('workspace-object-version-shadow-collection'),
        'config param workspace-object-version-shadow-collection')
    col_schema = _check_string_req(config.get('schema-collection'),
                                   'config param schema-collection')

    auth_root_url = _check_string_req(config.get('auth-root-url'),
                                      'config param auth-root-url')
    auth_token = _check_string_req(config.get('auth-token'),
                                   'config param auth-token')
    full_roles = split_value(config, 'auth-full-admin-roles')
    read_roles = split_value(config, 'auth-read-admin-roles')
    read_exempt_roles = split_value(config, 'auth-read-exempt-roles')

    ws_url = _check_string_req(config.get('workspace-url'),
                               'config param workspace-url')
    ws_token = _check_string_req(config.get('workspace-read-admin-token'),
                                 'config param workspace-read-admin-token')

    kafka_servers = _check_string(config.get('kafka-bootstrap-servers'),
                                  'config param kafka-bootstrap-servers',
                                  optional=True)
    kafka_topic = None
    if kafka_servers:  # have to start the server twice to test no kafka scenario
        kafka_topic = _check_string(config.get('kafka-topic'),
                                    'config param kafka-topic')

    metaval_url = _check_string(config.get('metadata-validator-config-url'),
                                'config param metadata-validator-config-url',
                                optional=True)

    # meta params may have info that shouldn't be logged so don't log any for now.
    # Add code to deal with this later if needed
    print(f'''
        Starting server with config:
            arango-url: {arango_url}
            arango-db: {arango_db}
            arango-user: {arango_user}
            arango-pwd: [REDACTED FOR YOUR SAFETY AND COMFORT]
            sample-collection: {col_sample}
            version-collection: {col_version}
            version-edge-collection: {col_ver_edge}
            node-collection: {col_node}
            node-edge-collection: {col_node_edge}
            data-link-collection: {col_data_link}
            workspace-object-version-shadow-collection: {col_ws_obj_ver}
            schema-collection: {col_schema}
            auth-root-url: {auth_root_url}
            auth-token: [REDACTED FOR YOUR CONVENIENCE AND ENJOYMENT]
            auth-full-admin-roles: {', '.join(full_roles)}
            auth-read-admin-roles: {', '.join(read_roles)}
            auth-read-exempt-roles: {', '.join(read_exempt_roles)}
            workspace-url: {ws_url}
            workspace-read-admin-token: [REDACTED FOR YOUR ULTIMATE PLEASURE]
            kafka-bootstrap-servers: {kafka_servers}
            kafka-topic: {kafka_topic}
            metadata-validators-config-url: {metaval_url}
    ''')

    # build the validators before trying to connect to arango
    metaval = get_validators(
        metaval_url) if metaval_url else MetadataValidatorSet()

    arangoclient = _arango.ArangoClient(hosts=arango_url)
    arango_db = arangoclient.db(arango_db,
                                username=arango_user,
                                password=arango_pwd,
                                verify=True)
    storage = _ArangoSampleStorage(
        arango_db,
        col_sample,
        col_version,
        col_ver_edge,
        col_node,
        col_node_edge,
        col_ws_obj_ver,
        col_data_link,
        col_schema,
    )
    storage.start_consistency_checker()
    kafka = _KafkaNotifer(kafka_servers, _cast(
        str, kafka_topic)) if kafka_servers else None
    user_lookup = KBaseUserLookup(auth_root_url, auth_token, full_roles,
                                  read_roles)
    ws = _WS(_Workspace(ws_url, token=ws_token))
    return Samples(storage, user_lookup, metaval, ws,
                   kafka), user_lookup, read_exempt_roles
Exemplo n.º 12
0
def _check_string_req(s: Optional[str], name: str) -> str:
    return _cast(str, _check_string(s, name))