Exemplo n.º 1
0
def fix_type_error(exc_info, callable, varargs, kwargs):
    """
    Given an exception, this will test if the exception was due to a
    signature error, and annotate the error with better information if
    so.

    Usage::

      try:
          val = callable(*args, **kw)
      except TypeError:
          exc_info = fix_type_error(None, callable, args, kw)
          raise exc_info[0], exc_info[1], exc_info[2]
    """
    if exc_info is None:
        exc_info = sys.exc_info()
    if (exc_info[0] != TypeError
            or str(exc_info[1]).find('argument') == -1
            or getattr(exc_info[1], '_type_error_fixed', False)):
        return exc_info
    exc_info[1]._type_error_fixed = True
    argspec = inspect.formatargspec(*getfullargspec(callable))
    args = ', '.join(map(_short_repr, varargs))
    if kwargs and args:
        args += ', '
    if kwargs:
        kwargs = sorted(kwargs.keys())
        args += ', '.join(f'{n}=...' for n in kwargs)
    gotspec = f'({args})'
    msg = f'{exc_info[1]}; got {gotspec}, wanted {argspec}'
    exc_info[1].args = (msg,)
    return exc_info
Exemplo n.º 2
0
def __extract_payload_from_request(trans, func, kwargs):
    content_type = trans.request.headers.get('content-type', '')
    if content_type.startswith(
            'application/x-www-form-urlencoded') or content_type.startswith(
                'multipart/form-data'):
        # If the content type is a standard type such as multipart/form-data, the wsgi framework parses the request body
        # and loads all field values into kwargs. However, kwargs also contains formal method parameters etc. which
        # are not a part of the request body. This is a problem because it's not possible to differentiate between values
        # which are a part of the request body, and therefore should be a part of the payload, and values which should not be
        # in the payload. Therefore, the decorated method's formal arguments are discovered through reflection and removed from
        # the payload dictionary. This helps to prevent duplicate argument conflicts in downstream methods.
        payload = kwargs.copy()
        named_args = getfullargspec(func).args
        for arg in named_args:
            payload.pop(arg, None)
        for k, v in payload.items():
            if isinstance(v, str):
                try:
                    # note: parse_non_hex_float only needed here for single string values where something like
                    # 40000000000000e5 will be parsed as a scientific notation float. This is as opposed to hex strings
                    # in larger JSON structures where quoting prevents this (further below)
                    payload[k] = loads(v, parse_float=parse_non_hex_float)
                except Exception:
                    # may not actually be json, just continue
                    pass
    else:
        # Assume application/json content type and parse request body manually, since wsgi won't do it. However, the order of this check
        # should ideally be in reverse, with the if clause being a check for application/json and the else clause assuming a standard encoding
        # such as multipart/form-data. Leaving it as is for backward compatibility, just in case.
        payload = loads(unicodify(trans.request.body))
        run_as = trans.request.headers.get('run-as')
        if run_as:
            payload['run_as'] = run_as
    return payload
Exemplo n.º 3
0
def lint_tool_source_with(lint_context, tool_source, extra_modules=[]):
    import galaxy.tool_util.linters
    tool_xml = getattr(tool_source, "xml_tree", None)
    linter_modules = submodules.import_submodules(galaxy.tool_util.linters,
                                                  ordered=True)
    linter_modules.extend(extra_modules)
    for module in linter_modules:
        tool_type = tool_source.parse_tool_type() or "default"
        lint_tool_types = getattr(module, "lint_tool_types", ["default"])
        if not ("*" in lint_tool_types or tool_type in lint_tool_types):
            continue

        for (name, value) in inspect.getmembers(module):
            if callable(value) and name.startswith("lint_"):
                # Look at the first argument to the linter to decide
                # if we should lint the XML description or the abstract
                # tool parser object.
                first_arg = getfullargspec(value).args[0]
                if first_arg == "tool_xml":
                    if tool_xml is None:
                        # XML linter and non-XML tool, skip for now
                        continue
                    else:
                        lint_context.lint(name, value, tool_xml)
                else:
                    lint_context.lint(name, value, tool_source)
Exemplo n.º 4
0
    def __invoke_expand_function(self, expand_function, destination):
        function_arg_names = getfullargspec(expand_function).args
        app = self.job_wrapper.app
        possible_args = {
            "job_id": self.job_wrapper.job_id,
            "tool": self.job_wrapper.tool,
            "tool_id": self.job_wrapper.tool.id,
            "job_wrapper": self.job_wrapper,
            "rule_helper": RuleHelper(app),
            "app": app,
            "referrer": destination
        }

        actual_args = {}

        # Send through any job_conf.xml defined args to function
        for destination_param in destination.params.keys():
            if destination_param in function_arg_names:
                actual_args[destination_param] = destination.params[destination_param]

        # Populate needed args
        for possible_arg_name in possible_args:
            if possible_arg_name in function_arg_names:
                actual_args[possible_arg_name] = possible_args[possible_arg_name]

        # Don't hit the DB to load the job object if not needed
        require_db = False
        for param in ["job", "user", "user_email", "resource_params", "workflow_invocation_uuid"]:
            if param in function_arg_names:
                require_db = True
                break
        if require_db:
            job = self.job_wrapper.get_job()
            user = job.user
            user_email = user and str(user.email)

            if "job" in function_arg_names:
                actual_args["job"] = job

            if "user" in function_arg_names:
                actual_args["user"] = user

            if "user_email" in function_arg_names:
                actual_args["user_email"] = user_email

            if "resource_params" in function_arg_names:
                actual_args["resource_params"] = self.job_wrapper.get_resource_parameters(job)

            if "workflow_invocation_uuid" in function_arg_names:
                param_values = job.raw_param_dict()
                workflow_invocation_uuid = param_values.get("__workflow_invocation_uuid__", None)
                actual_args["workflow_invocation_uuid"] = workflow_invocation_uuid

            if "workflow_resource_params" in function_arg_names:
                param_values = job.raw_param_dict()
                workflow_resource_params = param_values.get("__workflow_resource_params__", None)
                actual_args["workflow_resource_params"] = workflow_resource_params

        return expand_function(**actual_args)
Exemplo n.º 5
0
def verify_assertion(data, assertion_description):
    tag = assertion_description["tag"]
    assert_function_name = "assert_" + tag
    assert_function = None
    for assertion_module in assertion_modules:
        if hasattr(assertion_module, assert_function_name):
            assert_function = getattr(assertion_module, assert_function_name)

    if assert_function is None:
        errmsg = "Unable to find test function associated with XML tag '%s'. Check your tool file syntax." % tag
        raise AssertionError(errmsg)

    assert_function_args = getfullargspec(assert_function).args
    args = {}
    for attribute, value in assertion_description["attributes"].items():
        if attribute in assert_function_args:
            args[attribute] = value

    # Three special arguments automatically populated independently of
    # tool XML attributes. output is passed in as the contents of the
    # output file. verify_assertions_function is passed in as the
    # verify_assertions function defined above, this allows
    # recursively checking assertions on subsections of
    # output. children is the parsed version of the child elements of
    # the XML element describing this assertion. See
    # assert_element_text in test/base/asserts/xml.py as an example of
    # how to use verify_assertions_function and children in conjuction
    # to apply assertion checking to a subset of the input. The parsed
    # version of an elements child elements do not need to just define
    # assertions, developers of assertion functions can also use the
    # child elements in novel ways to define inputs the assertion
    # checking function (for instance consider the following fictional
    # assertion function for checking column titles of tabular output
    # - <has_column_titles><with_name name="sequence"><with_name
    # name="probability"></has_column_titles>.)
    if "output" in assert_function_args:
        # If the assert_function will have an attribute called "output"
        # the data passed from the test to the function will be unicodified.
        # This is because most of the assert functions are working on pure
        # text files.
        args["output"] = unicodify(data)
    if "output_bytes" in assert_function_args:
        # This will read in data as bytes and will not change it prior passing
        # it to the assert_function
        args["output_bytes"] = data

    if "verify_assertions_function" in assert_function_args:
        args["verify_assertions_function"] = verify_assertions

    if "children" in assert_function_args:
        args["children"] = assertion_description["children"]

    # TODO: Verify all needed function arguments are specified.
    assert_function(**args)
Exemplo n.º 6
0
def do_merge(job_wrapper, task_wrappers):
    parallel_settings = job_wrapper.get_parallelism().attributes
    # Syntax: merge_outputs="export" pickone_outputs="genomesize"
    # Designates outputs to be merged, or selected from as a representative
    merge_outputs = parallel_settings.get("merge_outputs")
    if merge_outputs is None:
        merge_outputs = []
    else:
        merge_outputs = [x.strip() for x in merge_outputs.split(",")]
    pickone_outputs = parallel_settings.get("pickone_outputs")
    if pickone_outputs is None:
        pickone_outputs = []
    else:
        pickone_outputs = [x.strip() for x in pickone_outputs.split(",")]

    illegal_outputs = [x for x in merge_outputs if x in pickone_outputs]
    if len(illegal_outputs) > 0:
        return ('Tool file error',
                'Outputs have conflicting parallelism attributes: %s' %
                str(illegal_outputs))

    stdout = ''
    stderr = ''

    try:
        working_directory = job_wrapper.working_directory
        task_dirs = [
            os.path.join(working_directory, x)
            for x in os.listdir(working_directory) if x.startswith('task_')
        ]
        assert task_dirs, "Should be at least one sub-task!"
        # TODO: Output datasets can be very complex. This doesn't handle metadata files
        outputs = job_wrapper.get_output_hdas_and_fnames()
        output_paths = job_wrapper.get_output_fnames()
        pickone_done = []
        task_dirs = [
            os.path.join(working_directory, x)
            for x in os.listdir(working_directory) if x.startswith('task_')
        ]
        task_dirs.sort(key=lambda x: int(x.split('task_')[-1]))
        for index, output in enumerate(outputs):
            output_file_name = str(
                output_paths[index])  # Use false_path if set, else real path.
            base_output_name = os.path.basename(output_file_name)
            if output in merge_outputs:
                output_dataset = outputs[output][0]
                output_type = output_dataset.datatype
                output_files = [
                    os.path.join(dir, base_output_name) for dir in task_dirs
                ]
                # Just include those files f in the output list for which the
                # file f exists; some files may not exist if a task fails.
                output_files = [f for f in output_files if os.path.exists(f)]
                if output_files:
                    log.debug('files %s ' % output_files)
                    if len(output_files) < len(task_dirs):
                        log.debug(
                            'merging only %i out of expected %i files for %s' %
                            (len(output_files), len(task_dirs),
                             output_file_name))
                    # First two args to merge always output_files and path of dataset. More
                    # complicated merge methods may require more parameters. Set those up here.
                    extra_merge_arg_names = getfullargspec(
                        output_type.merge).args[2:]
                    extra_merge_args = {}
                    if "output_dataset" in extra_merge_arg_names:
                        extra_merge_args["output_dataset"] = output_dataset
                    output_type.merge(output_files, output_file_name,
                                      **extra_merge_args)
                    log.debug('merge finished: %s' % output_file_name)
                else:
                    msg = 'nothing to merge for %s (expected %i files)' \
                          % (output_file_name, len(task_dirs))
                    log.debug(msg)
                    stderr += msg + "\n"
            elif output in pickone_outputs:
                # just pick one of them
                if output not in pickone_done:
                    task_file_name = os.path.join(task_dirs[0],
                                                  base_output_name)
                    shutil.move(task_file_name, output_file_name)
                    pickone_done.append(output)
            else:
                log_error = "The output '%s' does not define a method for implementing parallelism" % output
                log.exception(log_error)
                raise Exception(log_error)
    except Exception as e:
        stdout = 'Error merging files'
        log.exception(stdout)
        stderr = util.unicodify(e)

    for tw in task_wrappers:
        # Prevent repetitive output, e.g. "Sequence File Aligned"x20
        # Eventually do a reduce for jobs that output "N reads mapped", combining all N for tasks.
        out = tw.get_task().stdout.strip()
        err = tw.get_task().stderr.strip()
        if len(out) > 0:
            stdout += "\n" + tw.working_directory + ':\n' + out
        if len(err) > 0:
            stderr += "\n" + tw.working_directory + ':\n' + err
    return (stdout, stderr)