Пример #1
0
 def cast_any(self, value, fmt=None):
     if isinstance(value, self.python_type):
         return value
     try:
         return parse_datetime_day_first(value)
     except (TypeError, ValueError) as e:
         raise_with_traceback(InvalidDateType(e))
Пример #2
0
 def cast_any(self, value, fmt=None):
     if isinstance(value, self.python_type):
         return value
     try:
         return date_parse(value, dayfirst=True).date()
     except (TypeError, ValueError) as e:
         raise_with_traceback(InvalidDateType(e))
Пример #3
0
 def inner(*args, **kwargs):
     """Wraps specified exceptions"""
     try:
         return a_func(*args, **kwargs)
     # pylint: disable=catching-non-exception
     except tuple(errors) as exception:
         raise_with_traceback(GaxError('RPC failed', cause=exception))
Пример #4
0
 def _get_channel_number(section_name):
     match = re.match(r'^.*_channel<(\d{4})>$', section_name)
     if match:
         result = int(match.group(1))
         return result
     else:
         raise_with_traceback(RuntimeError("Unable to detect channel name from \"%s\""%section_name))
Пример #5
0
  def _type_check(self, type_constraint, datum, is_input):
    """Typecheck a PTransform related datum according to a type constraint.

    This function is used to optionally type-check either an input or an output
    to a PTransform.

    Args:
        type_constraint: An instance of a typehints.TypeContraint, one of the
          white-listed builtin Python types, or a custom user class.
        datum: An instance of a Python object.
        is_input: True if 'datum' is an input to a PTransform's DoFn. False
          otherwise.

    Raises:
      TypeError: If 'datum' fails to type-check according to 'type_constraint'.
    """
    datum_type = 'input' if is_input else 'output'

    try:
      check_constraint(type_constraint, datum)
    except CompositeTypeHintError as e:
      raise_with_traceback(TypeCheckError(e.args[0]))
    except SimpleTypeHintError:
      error_msg = ("According to type-hint expected %s should be of type %s. "
                   "Instead, received '%s', an instance of type %s."
                   % (datum_type, type_constraint, datum, type(datum)))
      raise_with_traceback(TypeCheckError(error_msg))
Пример #6
0
  def _run_pants_with_retry(self, port, retries=3):
    """Runs pants remotely with retry and recovery for nascent executions."""
    attempt = 1
    while 1:
      logger.debug(
        'connecting to pantsd on port {} (attempt {}/{})'.format(port, attempt, retries)
      )
      try:
        return self._connect_and_execute(port)
      except self.RECOVERABLE_EXCEPTIONS as e:
        if attempt > retries:
          raise self.Fallback(e)

        self._backoff(attempt)
        logger.warn(
          'pantsd was unresponsive on port {}, retrying ({}/{})'
          .format(port, attempt, retries)
        )

        # One possible cause of the daemon being non-responsive during an attempt might be if a
        # another lifecycle operation is happening concurrently (incl teardown). To account for
        # this, we won't begin attempting restarts until at least 1 second has passed (1 attempt).
        if attempt > 1:
          port = self._restart_pantsd()
        attempt += 1
      except NailgunClient.NailgunError as e:
        # Ensure a newline.
        logger.fatal('')
        logger.fatal('lost active connection to pantsd!')
        raise_with_traceback(
          self.Terminated,
          'abruptly lost active connection to pantsd runner: {!r}'.format(e)
        )
Пример #7
0
    def connect(self):
        """
        This method causes the `Duct` instance to connect to the service, if it
        is not already connected. It is not normally necessary for a user to
        manually call this function, since when a connection is required, it is
        automatically created.

        Subclasses should implement `Duct._connect` to do whatever is necessary
        to bring a connection into being.

        Compared to base `Duct.connect`, this method will automatically catch
        the first `DuctAuthenticationError` error triggered by `Duct.connect`
        if any smartcards have been configured, before trying once more.

        Returns:
            `Duct` instance: A reference to the current object.
        """
        try:
            Duct.connect(self)
        except DuctServerUnreachable as e:
            raise_with_traceback(e)
        except DuctAuthenticationError as e:
            if self.smartcards and self.prepare_smartcards():
                Duct.connect(self)
            else:
                raise_with_traceback(e)
        return self
Пример #8
0
def jsonify(obj, pretty=False):
    """
    Turn a nested object into a (compressed) JSON string.

    Parameters
    ----------
    obj : dict
        Any kind of dictionary structure.
    pretty : bool, optional
        Whether to format the resulting JSON in a more legible way (
        default False).

    """
    if pretty:
        params = dict(sort_keys=True, indent=2, allow_nan=False,
                      separators=(",", ": "), ensure_ascii=False)
    else:
        params = dict(sort_keys=False, indent=None, allow_nan=False,
                      separators=(",", ":"), ensure_ascii=False)
    try:
        return json.dumps(obj, **params)
    except (TypeError, ValueError) as error:
        LOGGER.critical(
            "The memote result structure is incompatible with the JSON "
            "standard.")
        log_json_incompatible_types(obj)
        raise_with_traceback(error)
Пример #9
0
        def __call__(self, *args, **kwargs):
            if not current_app.config.get('INDEXING_ENABLED', True):
                return self._route(*args, **kwargs)

            log = PageView(
                page=request.full_path,
                endpoint=request.endpoint,
                user_id=current_user.id,
                ip_address=request.remote_addr,
                version=__version__
            )
            errorlog = None
            log.object_id, log.object_type, log.object_action, reextract_after_request = self.extract_objects(*args, **kwargs)
            db_session.add(log)  # Add log here to ensure pageviews are accurate

            try:
                return self._route(*args, **kwargs)
            except Exception as e:
                db_session.rollback()  # Ensure no lingering database changes remain after crashed route
                db_session.add(log)
                errorlog = ErrorLog.from_exception(e)
                db_session.add(errorlog)
                db_session.commit()
                raise_with_traceback(e)
            finally:
                # Extract object id and type after response generated (if requested) to ensure
                # most recent data is collected
                if reextract_after_request:
                    log.object_id, log.object_type, log.object_action, _ = self.extract_objects(*args, **kwargs)

                if errorlog is not None:
                    log.id_errorlog = errorlog.id
                db_session.add(log)
                db_session.commit()
Пример #10
0
 def inner(*args, **kwargs):
     """Wraps specified exceptions"""
     try:
         return a_func(*args, **kwargs)
     # pylint: disable=catching-non-exception
     except tuple(to_catch) as exception:
         utils.raise_with_traceback(errors.create_error("RPC failed", cause=exception))
Пример #11
0
        def start_response(status, response_headers, exc_info=None):
            if exc_info:
                try:
                    if headers_sent:
                        # Re-raise if too late
                        raise_with_traceback(exc_info[0](exc_info[1]))
                finally:
                    exc_info = None  # avoid dangling circular ref
            else:
                assert not headers_set, 'Headers already set!'

            assert type(status) is str, 'Status must be a string'
            assert len(status) >= 4, 'Status must be at least 4 characters'
            assert int(status[:3]), 'Status must begin with 3-digit code'
            assert status[3] == ' ', 'Status must have a space after code'
            assert type(response_headers) is list, 'Headers must be a list'
            if FCGI_DEBUG:
                logging.debug('response headers:')
                for name, val in response_headers:
                    assert type(name) is str, 'Header name "%s" must be a string' % name
                    assert type(val) is str, 'Value of header "%s" must be a string' % name
                    logging.debug('%s: %s' % (name, val))

            headers_set[:] = [status, response_headers]
            return write
Пример #12
0
 def cast_default(self, value):
     try:
         struct_time = time.strptime(value, self.ISO8601)
         return datetime.time(struct_time.tm_hour, struct_time.tm_min,
                              struct_time.tm_sec)
     except (TypeError, ValueError) as e:
         raise_with_traceback(exceptions.InvalidTimeType(e))
Пример #13
0
    def _execute(self, statement, cursor, wait, session_properties):
        """
        If something goes wrong, `PrestoClient` will attempt to parse the error
        log and present the user with useful debugging information. If that fails,
        the full traceback will be raised instead.
        """
        from pyhive import presto  # Imported here due to slow import performance in Python 3
        from pyhive.exc import DatabaseError  # Imported here due to slow import performance in Python 3
        try:
            cursor = cursor or presto.Cursor(
                host=self.host, port=self.port, username=self.username, password=self.password,
                catalog=self.catalog, schema=self.schema, session_props=session_properties,
                poll_interval=1, source=self.source, protocol=self.server_protocol
            )
            cursor.execute(statement)
            status = cursor.poll()
            if wait:
                logger.progress(0)
                # status None means command executed successfully
                # See https://github.com/dropbox/PyHive/blob/master/pyhive/presto.py#L234
                while status is not None and status['stats']['state'] != "FINISHED":
                    if status['stats'].get('totalSplits', 0) > 0:
                        pct_complete = round(status['stats']['completedSplits'] / float(status['stats']['totalSplits']), 4)
                        logger.progress(pct_complete * 100)
                    status = cursor.poll()
                logger.progress(100, complete=True)
            return cursor
        except (DatabaseError, pandas.io.sql.DatabaseError) as e:
            # Attempt to parse database error, before ultimately reraising the same
            # exception, maintaining the full stacktrace.
            exception, exception_args, traceback = sys.exc_info()

            try:
                message = e.args[0]
                if isinstance(message, six.string_types):
                    message = ast.literal_eval(re.match("[^{]*({.*})[^}]*$", message).group(1))

                linenumber = message['errorLocation']['lineNumber'] - 1
                splt = statement.splitlines()
                splt[linenumber] += '   <--  {errorType} ({errorName}) occurred. {message} '.format(**message)
                context = '\n\n[Error Context]\n{}\n'.format('\n'.join([splt[l] for l in range(max(linenumber - 1, 0),
                                                                                               min(linenumber + 2, len(splt)))]))

                class ErrContext(object):

                    def __repr__(self):
                        return context

                # logged twice so that both notebook and console users see the error context
                exception_args.args = [exception_args, ErrContext()]
                logger.error(context)
            except:
                logger.warn(("Omniduct was unable to parse the database error messages. Refer to the "
                             "traceback below for full error details."))

            if isinstance(exception, type):
                exception = exception(exception_args)

            raise_with_traceback(exception, traceback)
Пример #14
0
 def __setattr__(self, name, value):
     if name not in self._parameters.keys():
         return object.__setattr__(self, name, value)
     else:
         if self._parameters[name][1] == type(value):
             self._parameters[name] = (value, type(value))
         else:
             raise_with_traceback(TypeError("Invalid type %s for parameter \"%s\""%(type(value), name)))
Пример #15
0
def reraise():
    """Reraise the current contextmanager exception, if any"""
    typ,val,tb = _exc_info.get(get_ident(), nones)
    if typ:
        try:
            raise_with_traceback(typ(val))
        finally:
            del typ,val,tb
Пример #16
0
 def wrapped(*args, **kwargs):
     try:
         return function(*args, **kwargs)
     except Exception as e:
         db_session.rollback()
         db_session.add(ErrorLog.from_exception(e))
         db_session.commit()
         raise_with_traceback(e)
Пример #17
0
 def raise_critical_error(self, err):
     """
     This logs the error, releases any lock files and throws an exception.
     The expectation is that the application exits after this.
     """
     self.logger.critical(err)
     self._run_exit_hooks()
     raise_with_traceback(CriticalApplicationError(err))
Пример #18
0
def exception_traceback_example():
    """ Throw an exception with traceback

    >>> exception_traceback_example()
    Traceback (most recent call last):
    ValueError: exceptional
    """
    from future.utils import raise_with_traceback
    raise_with_traceback(ValueError('exceptional'))
Пример #19
0
 def wrapper(self, method, args, kwargs):
   try:
     result = method(*args, **kwargs)
   except TypeCheckError as e:
     error_msg = ('Runtime type violation detected within ParDo(%s): '
                  '%s' % (self.full_label, e))
     raise_with_traceback(TypeCheckError(error_msg))
   else:
     return self._check_type(result)
Пример #20
0
    def cast_binary(self, value):
        if not self._type_check(value):
            raise exceptions.InvalidStringType()

        try:
            base64.b64decode(value)
        except binascii.Error as e:
            raise_with_traceback(exceptions.InvalidBinary(e))
        return value
Пример #21
0
 def _raise_deferred_exc(self):
   """Raises deferred exceptions from the daemon's synchronous path in the post-fork client."""
   if self._deferred_exception:
     try:
       exc_type, exc_value, exc_traceback = self._deferred_exception
       raise_with_traceback(exc_value, exc_traceback)
     except TypeError:
       # If `_deferred_exception` isn't a 3-item tuple (raising a TypeError on the above
       # destructuring), treat it like a bare exception.
       raise self._deferred_exception
Пример #22
0
 def cast_default(self, value):
     if isinstance(value, self.py):
         return value
     try:
         json_value = json.loads(value)
         if isinstance(json_value, self.py):
             return json_value
         else:
             raise exceptions.InvalidObjectType()
     except (TypeError, ValueError) as e:
         raise_with_traceback(exceptions.InvalidObjectType(e))
Пример #23
0
def decode(data, encoding='utf-8'):
    assert encoding == 'utf-8', "Only UTF-8 encoding is currently supported."
    if encoding is not None:
        try:
            data = data.decode(encoding)
        except Exception as e:
            if os.environ.get('DEBUG'):
                raise_with_traceback(e)
            logger.warning("An decoding error has occurred... continuing anyway. To capture these errors, rerun the current command prefixed with `DEBUG=1 `.")
            data = data.decode(encoding, errors='ignore')
    return data
Пример #24
0
 def logging_scope(func, *args, **kwargs):
     logger._scope_enter(name, *wargs, **wkwargs)
     success = True
     try:
         f = func(*args, **kwargs)
         return f
     except Exception as e:
         success = False
         raise_with_traceback(e)
     finally:
         logger._scope_exit(success)
Пример #25
0
 def _raise_deferred_exc(self):
   """Raises deferred exceptions from the daemon's synchronous path in the post-fork client."""
   if self._deferred_exception:
     try:
       # Expect `_deferred_exception` to be a 3-item tuple of the values returned by sys.exc_info().
       # This permits use the 3-arg form of the `raise` statement to preserve the original traceback.
       exc_type, exc_value, exc_traceback = self._deferred_exception
       raise_with_traceback(exc_type(exc_value), exc_traceback)
     except ValueError:
       # If `_deferred_exception` isn't a 3-item tuple, treat it like a bare exception.
       raise self._deferred_exception
Пример #26
0
 def add_input(self, accumulator, element, *args, **kwargs):
   if self._input_type_hint:
     try:
       _check_instance_type(
           self._input_type_hint[0][0].tuple_types[1], element, 'element',
           True)
     except TypeCheckError as e:
       error_msg = ('Runtime type violation detected within %s: '
                    '%s' % (self._label, e))
       raise_with_traceback(TypeCheckError(error_msg))
   return self._combinefn.add_input(accumulator, element, *args, **kwargs)
Пример #27
0
 def extract_output(self, accumulator, *args, **kwargs):
   result = self._combinefn.extract_output(accumulator, *args, **kwargs)
   if self._output_type_hint:
     try:
       _check_instance_type(
           self._output_type_hint.tuple_types[1], result, None, True)
     except TypeCheckError as e:
       error_msg = ('Runtime type violation detected within %s: '
                    '%s' % (self._label, e))
       raise_with_traceback(TypeCheckError(error_msg))
   return result
Пример #28
0
    def _get_attr(self, attr_name, default_value=None):

        try:
            attr_value = self.attrs[attr_name]

        except KeyError:
            if default_value is None:
                raise_with_traceback(IpcMessageException("Missing attribute " + attr_name))
            else:
                attr_value = default_value

        return attr_value
Пример #29
0
    def cast_uuid(self, value):
        """Return `value` if is a uuid, else return False."""

        if not self._type_check(value):
            raise exceptions.InvalidStringType(
                '{0} is not of type {1}'.format(value, self.py)
            )
        try:
            uuid.UUID(value, version=4)
            return value
        except ValueError as e:
            raise_with_traceback(exceptions.InvalidUUID(e))
Пример #30
0
    def get_param(self, param_name, default_value=None):

        try:
            param_value = self.attrs['params'][param_name]

        except KeyError:
            if default_value is None:
                raise_with_traceback(IpcMessageException("Missing parameter " + param_name))
            else:
                param_value = default_value

        return param_value
Пример #31
0
def two_dim_dict_param(obj, param_name, key_type=string_types, value_type=None):
    if not isinstance(obj, dict):
        raise_with_traceback(_param_type_mismatch_exception(obj, dict, param_name))

    return _check_two_dim_key_value_types(obj, key_type, param_name, value_type)
Пример #32
0
def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
    """Encode a sequence of two-element tuples or dictionary into a URL query string.

    If any values in the query arg are sequences and doseq is true, each
    sequence element is converted to a separate parameter.

    If the query arg is a sequence of two-element tuples, the order of the
    parameters in the output will match the order of parameters in the
    input.

    The query arg may be either a string or a bytes type. When query arg is a
    string, the safe, encoding and error parameters are sent the quote_plus for
    encoding.
    """

    if hasattr(query, "items"):
        query = query.items()
    else:
        # It's a bother at times that strings and string-like objects are
        # sequences.
        try:
            # non-sequence items should not work with len()
            # non-empty strings will fail this
            if len(query) and not isinstance(query[0], tuple):
                raise TypeError
            # Zero-length sequences of all types will get here and succeed,
            # but that's a minor nit.  Since the original implementation
            # allowed empty dicts that type of behavior probably should be
            # preserved for consistency
        except TypeError:
            ty, va, tb = sys.exc_info()
            raise_with_traceback(
                TypeError("not a valid non-string sequence "
                          "or mapping object"), tb)

    l = []
    if not doseq:
        for k, v in query:
            if isinstance(k, bytes):
                k = quote_plus(k, safe)
            else:
                k = quote_plus(str(k), safe, encoding, errors)

            if isinstance(v, bytes):
                v = quote_plus(v, safe)
            else:
                v = quote_plus(str(v), safe, encoding, errors)
            l.append(k + '=' + v)
    else:
        for k, v in query:
            if isinstance(k, bytes):
                k = quote_plus(k, safe)
            else:
                k = quote_plus(str(k), safe, encoding, errors)

            if isinstance(v, bytes):
                v = quote_plus(v, safe)
                l.append(k + '=' + v)
            elif isinstance(v, str):
                v = quote_plus(v, safe, encoding, errors)
                l.append(k + '=' + v)
            else:
                try:
                    # Is this a sufficient test.py for sequence-ness?
                    x = len(v)
                except TypeError:
                    # not a sequence
                    v = quote_plus(str(v), safe, encoding, errors)
                    l.append(k + '=' + v)
                else:
                    # loop over the sequence
                    for elt in v:
                        if isinstance(elt, bytes):
                            elt = quote_plus(elt, safe)
                        else:
                            elt = quote_plus(str(elt), safe, encoding, errors)
                        l.append(k + '=' + elt)
    return str('&').join(l)
Пример #33
0
def numeric_param(obj: Any, param_name: str) -> Numeric:
    if not isinstance(obj, (int, float)):
        raise_with_traceback(
            _param_type_mismatch_exception(obj, (int, float), param_name))
    return obj
Пример #34
0
def failed(desc: str) -> NoReturn:  # type: ignore[misc]
    if not isinstance(desc, str):
        raise_with_traceback(CheckError("desc argument must be a string"))

    raise_with_traceback(
        CheckError("Failure condition: {desc}".format(desc=desc)))
Пример #35
0
def subclass(obj, superclass, desc=None):
    if not issubclass(obj, superclass):
        raise_with_traceback(_type_mismatch_error(obj, superclass, desc))

    return obj
Пример #36
0
def _score_population(predictions_location, cf_dir_location):
    """
    Scores estimations of treatment effect size over the population.

    Args:
        predictions_location (str): Path to a single tabular file where the effect estimations are located.
                                    Files must of tabular format
                                     * containing 4 columns: HEADER_POP_IDX, HEADER_EFFECT_SIZE, HEADER_CI_LEFT,
                                       HEADER_CI_RIGHT.
                                     * delimited by TABULAR_DELIMITER.
                                     * have FILENAME_EXTENSION extension to them.
                                    These global variables specified above can be changed when importing the module.
        cf_dir_location (str): Path to a directory containing the counter-factual files (i.e. labeled, ground-truth
                               data).
                               Files must be of tabular format
                                * containing 3 columns: HEADER_IND_IDX, HEADER_Y1, HEADER_Y0.
                                * delimited by TABULAR_DELIMITER.
                                * have the suffix specified in COUNTERFACTUAL_FILE_SUFFIX.
                                * have FILENAME_EXTENSION extension to them.
                               These global variables specified above can be changed when importing the module.

    Returns:
        pd.Series: Scores. Where Series' Index is the metric name and the value is the evaluation of that metric.
    """
    ufids = os.listdir(cf_dir_location)
    ufids = [
        f.rsplit(COUNTERFACTUAL_FILE_SUFFIX + FILENAME_EXTENSION)[0]
        for f in ufids
        if f.lower().endswith(COUNTERFACTUAL_FILE_SUFFIX + FILENAME_EXTENSION)
    ]
    # Gather scoring statistics:
    ratio = pd.Series(index=ufids, name="population_ratio")
    bias = pd.Series(index=ufids, name="population_bias")
    ci_size = pd.Series(index=ufids, name="population_ci-size")
    coverage = pd.Series(data=False,
                         index=ufids,
                         dtype=np.dtype(bool),
                         name="population_coverage")

    # Get data:      # HEADER_POP_IDX | HEADER_EFFECT_SIZE | HEADER_CI_LEFT | HEADER_CI_RIGHT
    estimates = pd.read_csv(predictions_location,
                            index_col=HEADER_POP_IDX,
                            sep=TABULAR_DELIMITER)

    if set(ufids) - set(estimates.index):
        raise_with_traceback(
            AssertionError(
                "Seems there are ground-truth files with no corresponding predictions\n"
                "Unmatched files are:\n" + "\n".join(
                    [str(i) for i in (set(ufids) - set(estimates.index))])))

    true_effects = pd.Series(index=ufids)
    dataset_sizes = pd.Series(index=ufids, name="size")

    for ufid in ufids:
        # Get the true effect:
        gt = pd.read_csv(os.path.join(
            cf_dir_location,
            ufid + COUNTERFACTUAL_FILE_SUFFIX + FILENAME_EXTENSION),
                         sep=TABULAR_DELIMITER)
        true_effect = np.mean(gt[HEADER_Y1] - gt[HEADER_Y0])
        true_effects[ufid] = true_effect

        # Get the population estimates:     | HEADER_EFFECT_SIZE | HEADER_CI_LEFT | HEADER_CI_RIGHT |
        estimate = estimates.loc[
            ufid, :]  # No need to "try:" due to content assertion above

        # Calculate the sufficient statistics:
        ratio[ufid] = (estimate[HEADER_EFFECT_SIZE] + EPSILON) / (true_effect +
                                                                  EPSILON)
        bias[ufid] = estimate[HEADER_EFFECT_SIZE] - true_effect
        ci_size[ufid] = estimate[HEADER_CI_RIGHT] - estimate[
            HEADER_CI_LEFT]  # right - left -> non-negative
        coverage[ufid] = estimate[HEADER_CI_LEFT] <= true_effect <= estimate[
            HEADER_CI_RIGHT]

        # Save the size of the current dataset:
        dataset_sizes[ufid] = gt.index.size
    dataset_sizes = dataset_sizes.astype(int)  # type: pd.Series

    # Calculate metrics
    enormse = 1.0 - ratio  # type: pd.Series
    encis = ci_size / (true_effects.abs() + EPSILON)  # type: pd.Series
    cic = bias.abs() / ci_size
    # Aggregate by sizes:
    enormse_by_size = enormse.pow(2).groupby(by=dataset_sizes).mean().pow(0.5)
    rmse_by_size = bias.pow(2).groupby(by=dataset_sizes).mean().pow(0.5)
    bias_by_size = bias.groupby(by=dataset_sizes).mean()
    coverage_by_size = coverage.groupby(by=dataset_sizes).mean()
    encis_by_size = encis.groupby(by=dataset_sizes).mean()
    cic_by_size = cic.groupby(by=dataset_sizes).mean()

    results = pd.Series()
    if dataset_sizes.nunique() == 1:
        # return the by_sizes, they are enough since there's one size so just extract the scalar value they hold
        results["enormse"] = enormse_by_size.iloc[0]
        results["rmse"] = rmse_by_size.iloc[0]
        results["bias"] = bias_by_size.iloc[0]
        results["coverage"] = coverage_by_size.iloc[0]
        results["encis"] = encis_by_size.iloc[0]
        results["cic"] = cic_by_size.iloc[0]

    else:
        # weighted_sum = lambda x, w: x.mul(w).sum() / w.sum()
        def weighted_sum(x, w):
            return x.mul(w).sum() / w.sum()

        # Calculate the Weights for aggregation:
        weights = __get_weights(dataset_sizes)

        # Aggregate
        results["enormse"] = np.sqrt(
            weighted_sum(enormse_by_size.pow(2), weights))
        results["rmse"] = np.sqrt(weighted_sum(rmse_by_size.pow(2), weights))
        results["bias"] = weighted_sum(bias_by_size, weights)
        results["coverage"] = weighted_sum(coverage_by_size, weights)
        results["encis"] = weighted_sum(encis_by_size, weights)
        results["cic"] = weighted_sum(cic_by_size, weights)
        results = results.append(enormse_by_size.add_prefix("enormse_"))

    return results
Пример #37
0
def opt_nonempty_str_param(obj, param_name, default=None):
    if obj is not None and not isinstance(obj, string_types):
        raise_with_traceback(_param_type_mismatch_exception(obj, str, param_name))
    return default if obj is None or obj == '' else obj
Пример #38
0
def opt_int_param(obj, param_name, default=None):
    if obj is not None and not isinstance(obj, integer_types):
        raise_with_traceback(
            _param_type_mismatch_exception(obj, int, param_name))
    return default if obj is None else obj
Пример #39
0
def int_param(obj, param_name):
    if not isinstance(obj, integer_types):
        raise_with_traceback(
            _param_type_mismatch_exception(obj, int, param_name))
    return obj
Пример #40
0
def opt_callable_param(obj, param_name, default=None):
    if obj is not None and not callable(obj):
        raise_with_traceback(_not_callable_exception(obj, param_name))
    return default if obj is None else obj
Пример #41
0
def callable_param(obj, param_name):
    if not callable(obj):
        raise_with_traceback(_not_callable_exception(obj, param_name))
    return obj
Пример #42
0
def inst_param(obj, param_name, ttype, additional_message=None):
    if not isinstance(obj, ttype):
        raise_with_traceback(
            _param_type_mismatch_exception(
                obj, ttype, param_name, additional_message=additional_message))
    return obj
Пример #43
0
def not_implemented(desc):
    if not is_str(desc):
        raise_with_traceback(CheckError("desc argument must be a string"))

    raise_with_traceback(
        NotImplementedCheckError("Not implemented: {desc}".format(desc=desc)))
Пример #44
0
def tuple_param(obj, param_name):
    if not isinstance(obj, tuple):
        raise_with_traceback(_param_type_mismatch_exception(obj, tuple, param_name))
    return obj
Пример #45
0
def bool_param(obj, param_name):
    if not isinstance(obj, bool):
        raise_with_traceback(_param_type_mismatch_exception(obj, bool, param_name))
    return obj
Пример #46
0
def opt_int_param(obj, param_name):
    if obj is not None and not isinstance(obj, int):
        raise_with_traceback(_param_type_mismatch_exception(obj, int, param_name))
    return obj
Пример #47
0
def float_param(obj, param_name):
    if not isinstance(obj, float):
        raise_with_traceback(
            _param_type_mismatch_exception(obj, float, param_name))
    return obj
Пример #48
0
def inst(obj, ttype, desc=None):
    if not isinstance(obj, ttype):
        raise_with_traceback(_type_mismatch_error(obj, ttype, desc))
    return obj
Пример #49
0
def str_param(obj, param_name):
    if not _is_str(obj):
        raise_with_traceback(_param_type_mismatch_exception(obj, str, param_name))
    return obj
Пример #50
0
def opt_numeric_param(obj, param_name):
    if obj is not None and not isinstance(obj, (int, float)):
        raise_with_traceback(
            _param_type_mismatch_exception(obj, (int, float), param_name))
    return obj
Пример #51
0
def _score_individual(predictions_location, cf_dir_location):
    """
    Scores estimations of treatment effect size on individuals (i.e. the prediction of both outcome under no treatment
    and outcome under positive treatment for each individual).

    Args:
        predictions_location (str): Path to a directory containing tabular files with individual effect estimations
                               (i.e. prediction of factual and counterfactual outcomes for each individual).
                               Files must of tabular format
                                * containing 3 columns: HEADER_IND_IDX, HEADER_Y1, HEADER_Y0.
                                * delimited by TABULAR_DELIMITER.
                                * have FILENAME_EXTENSION extension to them.
                               These global variables specified above can be changed when importing the module.
        cf_dir_location (str): Path to a directory containing the counter-factual files (i.e. labeled, ground-truth
                               data).
                               Files must be of tabular format
                                * containing 3 columns: HEADER_IND_IDX, HEADER_Y1, HEADER_Y0.
                                * delimited by TABULAR_DELIMITER.
                                * have the suffix specified in COUNTERFACTUAL_FILE_SUFFIX.
                                * have FILENAME_EXTENSION extension to them.
                               These global variables specified above can be changed when importing the module.

    Returns:
        pd.Series: Scores. Where Series' Index is the metric name and the value is the evaluation of that metric.
    """
    ufids = os.listdir(predictions_location)
    ufids = [
        f.rsplit(FILENAME_EXTENSION)[0] for f in ufids
        if f.lower().endswith(FILENAME_EXTENSION)
    ]

    enormse = pd.Series(index=ufids, name="individual_enormse")
    rmse = pd.Series(index=ufids, name="individual_rmse")
    bias = pd.Series(index=ufids, name="individual_bias")
    dataset_sizes = pd.Series(index=ufids, name="size")

    for ufid in ufids:
        # Get the true effect:
        gt = pd.read_csv(os.path.join(
            cf_dir_location,
            ufid + COUNTERFACTUAL_FILE_SUFFIX + FILENAME_EXTENSION),
                         index_col=HEADER_IND_IDX,
                         sep=TABULAR_DELIMITER)
        true_effect = gt[HEADER_Y1] - gt[HEADER_Y0]

        # Get estimated effect:                         submission format:    N rows: patient_ID | Y0 | Y1
        try:
            estimates = pd.read_csv(os.path.join(predictions_location,
                                                 ufid + FILENAME_EXTENSION),
                                    index_col=HEADER_IND_IDX,
                                    sep=TABULAR_DELIMITER)
        except IOError as e:  # Python 2 compatible for FileNotFoundError
            e.args = (
                e.args[0] + "\n\t" + "A prediction might be missing.\n"
                "Seems that the file ({fn}) was found in the ground-truth directory but no "
                "corresponding estimation was found in your "
                "predictions.".format(fn=ufid), ) + e.args[1:]

            raise_with_traceback(e)

        estimated_effect = estimates[HEADER_Y1] - estimates[HEADER_Y0]

        # Evaluate:
        individual_bias = estimated_effect - true_effect
        bias[ufid] = individual_bias.mean()
        rmse[ufid] = individual_bias.pow(2).mean()
        # enormse[ufid] = np.mean((individual_bias) / (true_effect + EPSILON) ** 2)
        enormse[ufid] = np.mean(
            (1 - ((estimated_effect + EPSILON) / (true_effect + EPSILON)))**2)

        # Save the size of the current dataset:
        dataset_sizes[ufid] = gt.index.size

    dataset_sizes = dataset_sizes.astype(int)  # type: pd.Series
    enormse_by_size = enormse.groupby(by=dataset_sizes).mean().pow(0.5)
    rmse_by_size = rmse.groupby(by=dataset_sizes).mean().pow(0.5)
    bias_by_size = bias.groupby(by=dataset_sizes).mean()

    results = pd.Series()
    if dataset_sizes.nunique() == 1:
        results["enormse"] = enormse_by_size.iloc[0]
        results["rmse"] = rmse_by_size.iloc[0]
        results["bias"] = bias_by_size.iloc[0]

    else:
        weights = __get_weights(dataset_sizes)
        results["enormse"] = np.sqrt(
            enormse_by_size.pow(2).mul(weights).sum() / weights.sum())
        results["rmse"] = np.sqrt(
            rmse_by_size.pow(2).mul(weights).sum() / weights.sum())
        results["bias"] = bias_by_size.mul(weights).sum() / weights.sum()
        results = results.append(enormse_by_size.add_prefix("enormse_"))
    return results
Пример #52
0
def failed(desc):
    if not _is_str(desc):
        raise_with_traceback(CheckError('desc argument must be a string'))

    raise_with_traceback(CheckError('Failure condition: {desc}'.format(desc=desc)))
Пример #53
0
def find_direct_metabolites(model, reaction, tolerance=1E-06):
    """
    Return list of possible direct biomass precursor metabolites.

    The term direct metabolites describes metabolites that are involved only
    in either transport and/or boundary reactions, AND the biomass reaction(s),
    but not in any purely metabolic reactions.

    Parameters
    ----------
    model : cobra.Model
        The metabolic model under investigation.
    reaction : cobra.Reaction
        The biomass reaction of the model under investigation.
    tolerance : float, optional
        Tolerance below which values will be regarded as zero.

    Returns
    -------
    list
        Metabolites that qualify as direct metabolites i.e. biomass precursors
        that are taken up to be consumed by the biomass reaction only.

    """
    biomass_rxns = set(helpers.find_biomass_reaction(model))
    tra_bou_bio_rxns = helpers.find_interchange_biomass_reactions(
        model, biomass_rxns)
    try:
        precursors = find_biomass_precursors(model, reaction)
        main_comp = helpers.find_compartment_id_in_model(model, 'c')
        ext_space = helpers.find_compartment_id_in_model(model, 'e')
    except KeyError:
        LOGGER.error("Failed to properly identify cytosolic and extracellular "
                     "compartments.")
        raise_with_traceback(
            KeyError("The cytosolic and/or extracellular "
                     "compartments could not be identified."))
    except RuntimeError:
        LOGGER.error("Failed to properly identify cytosolic and extracellular "
                     "compartments.")
        raise_with_traceback(
            RuntimeError("The cytosolic and/or extracellular "
                         "compartments could not be "
                         "identified."))
    else:
        tra_bou_bio_mets = [
            met for met in precursors
            if met.reactions.issubset(tra_bou_bio_rxns)
        ]
        rxns_of_interest = set([
            rxn for met in tra_bou_bio_mets for rxn in met.reactions
            if rxn not in biomass_rxns
        ])

    solution = model.optimize(raise_error=True)
    if np.isclose(solution.objective_value, 0, atol=tolerance):
        LOGGER.error("Failed to generate a non-zero objective value with "
                     "flux balance analysis.")
        raise OptimizationError(
            "The flux balance analysis on this model returned an "
            "objective value of zero. Make sure the model can "
            "grow! Check if the constraints are not too strict!")

    tra_bou_bio_fluxes = {r: solution[r.id] for r in rxns_of_interest}
    met_flux_sum = {m: 0 for m in tra_bou_bio_mets}

    return detect_false_positive_direct_metabolites(tra_bou_bio_mets,
                                                    biomass_rxns, main_comp,
                                                    ext_space,
                                                    tra_bou_bio_fluxes,
                                                    met_flux_sum)
Пример #54
0
def subclass_param(obj, param_name, superclass):
    type_param(obj, param_name)
    if not issubclass(obj, superclass):
        raise_with_traceback(_param_subclass_mismatch_exception(obj, superclass, param_name))

    return obj
Пример #55
0
def str_param(obj: Any, param_name: str) -> str:
    if not isinstance(obj, str):
        raise_with_traceback(
            _param_type_mismatch_exception(obj, str, param_name))
    return obj
Пример #56
0
def opt_type_param(obj, param_name, default=None):
    if obj is not None and not isinstance(obj, type):
        raise_with_traceback(_not_type_param_subclass_mismatch_exception(obj, param_name))
    return obj if obj is not None else default
Пример #57
0
def int_param(obj: Any, param_name: str) -> int:
    if not isinstance(obj, int):
        raise_with_traceback(
            _param_type_mismatch_exception(obj, int, param_name))
    return obj
Пример #58
0
def type_param(obj, param_name):
    if not isinstance(obj, type):
        raise_with_traceback(_not_type_param_subclass_mismatch_exception(obj, param_name))
    return obj
Пример #59
0
def param_invariant(condition, param_name, desc=None):
    if not condition:
        raise_with_traceback(_param_invariant_exception(param_name, desc))
Пример #60
0
    def exec_one_test(self, test_name, test_method):
        """Executes one test and update test results.

        Executes setup_test, the test method, and teardown_test; then creates a
        records.TestResultRecord object with the execution information and adds
        the record to the test class's test results.

        Args:
            test_name: string, Name of the test.
            test_method: function, The test method to execute.
        """
        tr_record = records.TestResultRecord(test_name, self.TAG)
        tr_record.test_begin()
        self.current_test_info = runtime_test_info.RuntimeTestInfo(
            test_name, self.log_path, tr_record)
        expects.recorder.reset_internal_states(tr_record)
        logging.info('%s %s', TEST_CASE_TOKEN, test_name)
        # Did teardown_test throw an error.
        teardown_test_failed = False
        try:
            try:
                try:
                    self._setup_test(test_name)
                except signals.TestFailure as e:
                    raise_with_traceback(signals.TestError(
                        e.details, e.extras))
                test_method()
            except signals.TestPass:
                raise
            except Exception:
                logging.exception('Exception occurred in %s.',
                                  self.current_test_name)
                raise
            finally:
                before_count = expects.recorder.error_count
                try:
                    self._teardown_test(test_name)
                except signals.TestAbortSignal:
                    raise
                except Exception as e:
                    logging.exception(e)
                    tr_record.test_error()
                    tr_record.add_error(STAGE_NAME_TEARDOWN_TEST, e)
                    teardown_test_failed = True
                else:
                    # Check if anything failed by `expects`.
                    if before_count < expects.recorder.error_count:
                        teardown_test_failed = True
        except (signals.TestFailure, AssertionError) as e:
            tr_record.test_fail(e)
        except signals.TestSkip as e:
            # Test skipped.
            tr_record.test_skip(e)
        except signals.TestAbortSignal as e:
            # Abort signals, pass along.
            tr_record.test_fail(e)
            raise e
        except signals.TestPass as e:
            # Explicit test pass.
            tr_record.test_pass(e)
        except Exception as e:
            # Exception happened during test.
            tr_record.test_error(e)
        else:
            # No exception is thrown from test and teardown, if `expects` has
            # error, the test should fail with the first error in `expects`.
            if expects.recorder.has_error and not teardown_test_failed:
                tr_record.test_fail()
            # Otherwise the test passed.
            elif not teardown_test_failed:
                tr_record.test_pass()
        finally:
            tr_record.update_record()
            try:
                if tr_record.result in (
                        records.TestResultEnums.TEST_RESULT_ERROR,
                        records.TestResultEnums.TEST_RESULT_FAIL):
                    self._exec_procedure_func(self._on_fail, tr_record)
                elif tr_record.result == records.TestResultEnums.TEST_RESULT_PASS:
                    self._exec_procedure_func(self._on_pass, tr_record)
                elif tr_record.result == records.TestResultEnums.TEST_RESULT_SKIP:
                    self._exec_procedure_func(self._on_skip, tr_record)
            finally:
                logging.info(RESULT_LINE_TEMPLATE, tr_record.test_name,
                             tr_record.result)
                self.results.add_record(tr_record)
                self.summary_writer.dump(tr_record.to_dict(),
                                         records.TestSummaryEntryType.RECORD)
                self.current_test_info = None
                self.current_test_name = None