Exemplo n.º 1
0
def test_trim_function_name_cocoa():
    assert trim_function_name('+[foo:(bar)]', 'objc') == '+[foo:(bar)]'
    assert trim_function_name('[foo:(bar)]', 'objc') == '[foo:(bar)]'
    assert trim_function_name('-[foo:(bar)]', 'objc') == '-[foo:(bar)]'
    assert trim_function_name(
        '(anonymous namespace)::foo(int)',
        'native') == '(anonymous namespace)::foo'
    assert trim_function_name('foo::bar::foo(int)', 'native') == 'foo::bar::foo'
Exemplo n.º 2
0
def test_trim_function_name_cocoa():
    assert trim_function_name('+[foo:(bar)]', 'objc') == '+[foo:(bar)]'
    assert trim_function_name('[foo:(bar)]', 'objc') == '[foo:(bar)]'
    assert trim_function_name('-[foo:(bar)]', 'objc') == '-[foo:(bar)]'
    assert trim_function_name('(anonymous namespace)::foo(int)',
                              'native') == '(anonymous namespace)::foo'
    assert trim_function_name('foo::bar::foo(int)',
                              'native') == 'foo::bar::foo'
Exemplo n.º 3
0
def test_trim_function_name_cocoa():
    assert trim_function_name("+[foo:(bar)]", "objc") == "+[foo:(bar)]"
    assert trim_function_name("[foo:(bar)]", "objc") == "[foo:(bar)]"
    assert trim_function_name("-[foo:(bar)]", "objc") == "-[foo:(bar)]"
    assert (trim_function_name("(anonymous namespace)::foo(int)",
                               "native") == "(anonymous namespace)::foo")
    assert trim_function_name("foo::bar::foo(int)",
                              "native") == "foo::bar::foo"
Exemplo n.º 4
0
def merge_symbolicated_frame(new_frame, sfrm):
    if sfrm.get('function'):
        raw_func = trim(sfrm['function'], 256)
        func = trim(trim_function_name(sfrm['function'], 'native'), 256)

        # if function and raw function match, we can get away without
        # storing a raw function
        if func == raw_func:
            new_frame['function'] = raw_func
        # otherwise we store both
        else:
            new_frame['raw_function'] = raw_func
            new_frame['function'] = func
    if sfrm.get('instruction_addr'):
        new_frame['instruction_addr'] = sfrm['instruction_addr']
    if sfrm.get('symbol'):
        new_frame['symbol'] = sfrm['symbol']
    if sfrm.get('abs_path'):
        new_frame['abs_path'] = sfrm['abs_path']
        new_frame['filename'] = posixpath.basename(sfrm['abs_path'])
    if sfrm.get('filename'):
        new_frame['filename'] = sfrm['filename']
    if sfrm.get('lineno'):
        new_frame['lineno'] = sfrm['lineno']
    if sfrm.get('colno'):
        new_frame['colno'] = sfrm['colno']
    if sfrm.get('package'):
        new_frame['package'] = sfrm['package']
    if sfrm.get('trust'):
        new_frame['trust'] = sfrm['trust']
    if sfrm.get('status'):
        frame_meta = new_frame.setdefault('data', {})
        frame_meta['symbolicator_status'] = sfrm['status']
Exemplo n.º 5
0
def merge_symbolicated_frame(new_frame, sfrm, platform=None):
    if sfrm.get('function'):
        raw_func = trim(sfrm['function'], 256)
        func = trim(trim_function_name(sfrm['function'], platform), 256)

        # if function and raw function match, we can get away without
        # storing a raw function
        if func == raw_func:
            new_frame['function'] = raw_func
        # otherwise we store both
        else:
            new_frame['raw_function'] = raw_func
            new_frame['function'] = func
    if sfrm.get('instruction_addr'):
        new_frame['instruction_addr'] = sfrm['instruction_addr']
    if sfrm.get('symbol'):
        new_frame['symbol'] = sfrm['symbol']
    if sfrm.get('abs_path'):
        new_frame['abs_path'] = sfrm['abs_path']
        new_frame['filename'] = posixpath.basename(sfrm['abs_path'])
    if sfrm.get('filename'):
        new_frame['filename'] = sfrm['filename']
    if sfrm.get('lineno'):
        new_frame['lineno'] = sfrm['lineno']
    if sfrm.get('colno'):
        new_frame['colno'] = sfrm['colno']
    if sfrm.get('package'):
        new_frame['package'] = sfrm['package']
    if sfrm.get('trust'):
        new_frame['trust'] = sfrm['trust']
    if sfrm.get('status'):
        frame_meta = new_frame.setdefault('data', {})
        frame_meta['symbolicator_status'] = sfrm['status']
Exemplo n.º 6
0
def _merge_frame(new_frame, symbolicated):
    if symbolicated.get("function"):
        raw_func = trim(symbolicated["function"], 256)
        func = trim(trim_function_name(symbolicated["function"], "native"),
                    256)

        # if function and raw function match, we can get away without
        # storing a raw function
        if func == raw_func:
            new_frame["function"] = raw_func
        # otherwise we store both
        else:
            new_frame["raw_function"] = raw_func
            new_frame["function"] = func
    if symbolicated.get("instruction_addr"):
        new_frame["instruction_addr"] = symbolicated["instruction_addr"]
    if symbolicated.get("symbol"):
        new_frame["symbol"] = symbolicated["symbol"]
    if symbolicated.get("abs_path"):
        new_frame["abs_path"] = symbolicated["abs_path"]
        new_frame["filename"] = posixpath.basename(symbolicated["abs_path"])
    if symbolicated.get("filename"):
        new_frame["filename"] = symbolicated["filename"]
    if symbolicated.get("lineno"):
        new_frame["lineno"] = symbolicated["lineno"]
    if symbolicated.get("colno"):
        new_frame["colno"] = symbolicated["colno"]
    if symbolicated.get("package"):
        new_frame["package"] = symbolicated["package"]
    if symbolicated.get("trust"):
        new_frame["trust"] = symbolicated["trust"]
    if symbolicated.get("status"):
        frame_meta = new_frame.setdefault("data", {})
        frame_meta["symbolicator_status"] = symbolicated["status"]
Exemplo n.º 7
0
def normalize_stacktraces_for_grouping(data, grouping_config=None):
    """
    Applies grouping enhancement rules and ensure in_app is set on all frames.
    This also trims functions if necessary.
    """

    stacktraces = []
    stacktrace_exceptions = []

    for stacktrace_info in find_stacktraces_in_data(data, include_raw=True):
        frames = get_path(stacktrace_info.stacktrace,
                          "frames",
                          filter=True,
                          default=())
        if frames:
            stacktraces.append(frames)
            stacktrace_exceptions.append(
                stacktrace_info.container if stacktrace_info.
                is_exception else None)

    if not stacktraces:
        return

    platform = data.get("platform")

    # Put the trimmed function names into the frames.  We only do this if
    # the trimming produces a different function than the function we have
    # otherwise stored in `function` to not make the payload larger
    # unnecessarily.
    for frames in stacktraces:
        for frame in frames:
            # Restore the original in_app value before the first grouping
            # enhancers have been run. This allows to re-apply grouping
            # enhancers on the original frame data.
            orig_in_app = get_path(frame, "data", "orig_in_app")
            if orig_in_app is not None:
                frame["in_app"] = None if orig_in_app == -1 else bool(
                    orig_in_app)

            if frame.get("raw_function") is not None:
                continue
            raw_func = frame.get("function")
            if not raw_func:
                continue
            function_name = trim_function_name(
                raw_func,
                frame.get("platform") or platform)
            if function_name != raw_func:
                frame["raw_function"] = raw_func
                frame["function"] = function_name

    # If a grouping config is available, run grouping enhancers
    if grouping_config is not None:
        for frames, exception_data in zip(stacktraces, stacktrace_exceptions):
            grouping_config.enhancements.apply_modifications_to_frame(
                frames, platform, exception_data)

    # normalize in-app
    for stacktrace in stacktraces:
        _normalize_in_app(stacktrace)
Exemplo n.º 8
0
def normalize_stacktraces_for_grouping(data, grouping_config=None):
    """
    Applies grouping enhancement rules and ensure in_app is set on all frames.
    This also trims functions if necessary.
    """

    stacktraces = []

    for stacktrace_info in find_stacktraces_in_data(data, include_raw=True):
        frames = get_path(stacktrace_info.stacktrace,
                          'frames',
                          filter=True,
                          default=())
        if frames:
            stacktraces.append(frames)

    if not stacktraces:
        return

    platform = data.get('platform')

    # Put the trimmed function names into the frames.  We only do this if
    # the trimming produces a different function than the function we have
    # otherwise stored in `function` to not make the payload larger
    # unnecessarily.
    for frames in stacktraces:
        for frame in frames:
            if frame.get('raw_function') is not None:
                continue
            raw_func = frame.get('function')
            if not raw_func:
                continue
            function_name = trim_function_name(
                raw_func,
                frame.get('platform') or platform)
            if function_name != raw_func:
                frame['raw_function'] = raw_func
                frame['function'] = function_name

    # If a grouping config is available, run grouping enhancers
    if grouping_config is not None:
        for frames in stacktraces:
            grouping_config.enhancements.apply_modifications_to_frame(
                frames, platform)

    # normalize in-app
    for stacktrace in stacktraces:
        _normalize_in_app(stacktrace, platform=platform)
Exemplo n.º 9
0
def normalize_stacktraces_for_grouping(data, grouping_config=None):
    """
    Applies grouping enhancement rules and ensure in_app is set on all frames.
    This also trims functions if necessary.
    """

    stacktraces = []

    for stacktrace_info in find_stacktraces_in_data(data, include_raw=True):
        frames = get_path(stacktrace_info.stacktrace, 'frames', filter=True, default=())
        if frames:
            stacktraces.append(frames)

    if not stacktraces:
        return

    platform = data.get('platform')

    # Put the trimmed function names into the frames.  We only do this if
    # the trimming produces a different function than the function we have
    # otherwise stored in `function` to not make the payload larger
    # unnecessarily.
    for frames in stacktraces:
        for frame in frames:
            if frame.get('raw_function') is not None:
                continue
            raw_func = frame.get('function')
            if not raw_func:
                continue
            function_name = trim_function_name(
                raw_func, frame.get('platform') or platform)
            if function_name != raw_func:
                frame['raw_function'] = raw_func
                frame['function'] = function_name

    # If a grouping config is available, run grouping enhancers
    if grouping_config is not None:
        for frames in stacktraces:
            grouping_config.enhancements.apply_modifications_to_frame(frames, platform)

    # normalize in-app
    for stacktrace in stacktraces:
        _normalize_in_app(stacktrace, platform=platform)
Exemplo n.º 10
0
def get_function_component(
    context,
    function,
    raw_function,
    platform,
    sourcemap_used=False,
    context_line_available=False,
):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.  Related to this is
    the `prefer_raw_function_name` flag which just flat out prefers the
    raw function name over the non raw one.
    """
    from sentry.stacktraces.functions import trim_function_name

    behavior_family = get_behavior_family_for_platform(platform)

    # We started trimming function names in csharp late which changed the
    # inputs to the grouping code.  Where previously the `function` attribute
    # contained the raw and untrimmed strings, it now contains the trimmed one
    # which is preferred by the frame component.  Because of this we tell the
    # component to prefer the raw function name over the function name for
    # csharp.
    # TODO: if a frame:v5 is added the raw function name should not be preferred
    # for csharp.
    prefer_raw_function_name = platform == "csharp"

    if context["legacy_function_logic"] or prefer_raw_function_name:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id="function")

    function_component = GroupingComponent(id="function",
                                           values=[func],
                                           similarity_encoder=ident_encoder)

    if platform == "ruby":
        if func.startswith("block "):
            function_component.update(values=["block"], hint="ruby block")
        else:
            new_function = _ruby_erb_func.sub("", func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint="removed generated erb template suffix")

    elif platform == "php":
        if func.startswith(("[Anonymous", "class@anonymous\x00")):
            function_component.update(contributes=False,
                                      hint="ignored anonymous function")
        if context["php_detect_anonymous_classes"] and func.startswith(
                "class@anonymous"):
            new_function = func.rsplit("::", 1)[-1]
            if new_function != func:
                function_component.update(values=[new_function],
                                          hint="anonymous class method")

    elif platform == "java":
        if func.startswith("lambda$"):
            function_component.update(contributes=False,
                                      hint="ignored lambda function")

    elif behavior_family == "native":
        if func in ("<redacted>", "<unknown>"):
            function_component.update(contributes=False,
                                      hint="ignored unknown function")
        elif context["legacy_function_logic"]:
            new_function = trim_function_name(func,
                                              platform,
                                              normalize_lambdas=False)
            if new_function != func:
                function_component.update(values=[new_function],
                                          hint="isolated function")

    elif context["javascript_fuzzing"] and behavior_family == "javascript":
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit(".", 1)[-1]
        if new_function != func:
            function_component.update(values=[new_function],
                                      hint="trimmed javascript function")

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                contributes_to_similarity=True,
                hint=
                "ignored because sourcemap used and context line available",
            )

    if function_component.values and context["hierarchical_grouping"]:
        function_component.update(tree_label=function_component.values[0])

    return function_component
Exemplo n.º 11
0
def get_function_component(function, platform, legacy_function_logic,
                           sourcemap_used=False, context_line_available=False,
                           raw_function=None, javascript_fuzzing=False):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.
    """
    from sentry.stacktraces.functions import trim_function_name
    behavior_family = get_behavior_family_for_platform(platform)

    if legacy_function_logic:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id='function')

    function_component = GroupingComponent(
        id='function',
        values=[func],
    )

    if platform == 'ruby':
        if func.startswith('block '):
            function_component.update(
                values=['block'],
                hint='ruby block'
            )
        else:
            new_function = _ruby_erb_func.sub('', func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='removed generated erb template suffix'
                )

    elif platform == 'php':
        if func.startswith('[Anonymous'):
            function_component.update(
                contributes=False,
                hint='ignored anonymous function'
            )

    elif platform == 'java':
        if func.startswith('lambda$'):
            function_component.update(
                contributes=False,
                hint='ignored lambda function'
            )

    elif behavior_family == 'native':
        if func in ('<redacted>', '<unknown>'):
            function_component.update(
                contributes=False,
                hint='ignored unknown function'
            )
        elif legacy_function_logic:
            new_function = trim_function_name(func, platform)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint='isolated function'
                )

    elif javascript_fuzzing and behavior_family == 'javascript':
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit('.', 1)[-1]
        if new_function != func:
            function_component.update(
                values=[new_function],
                hint='trimmed javascript function'
            )

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                hint='ignored because sourcemap used and context line available'
            )

    return function_component
Exemplo n.º 12
0
    def process_frame(self, processable_frame, processing_task):
        frame = processable_frame.frame
        raw_frame = dict(frame)
        errors = []

        # Ensure that package is set in the raw frame, mapped from the
        # debug_images array in the payload. Grouping and UI can use this path
        # to infer in_app and exclude frames from grouping.
        if raw_frame.get('package') is None:
            obj = processable_frame.data['obj']
            raw_frame['package'] = obj and obj.code_file or None

        if processable_frame.cache_value is None:
            # Construct a raw frame that is used by the symbolizer
            # backend.  We only assemble the bare minimum we need here.
            instruction_addr = processable_frame.data['instruction_addr']

            debug_id = processable_frame.data['debug_id']
            if debug_id is not None:
                self.difs_referenced.add(debug_id)

            try:
                symbolicated_frames = self.sym.symbolize_frame(
                    instruction_addr,
                    self.sdk_info,
                    symbolserver_match=processable_frame.
                    data['symbolserver_match'],
                    symbolicator_match=processable_frame.data.get(
                        'symbolicator_match'),
                    trust=raw_frame.get('trust'),
                )
                if not symbolicated_frames:
                    if raw_frame.get('trust') == 'scan':
                        return [], [raw_frame], []
                    else:
                        return None, [raw_frame], []
            except SymbolicationFailed as e:
                errors = []
                self._handle_symbolication_failed(e, errors=errors)
                return [raw_frame], [raw_frame], errors

            _ignored = None  # Used to be in_app
            processable_frame.set_cache_value([_ignored, symbolicated_frames])

        else:  # processable_frame.cache_value is present
            _ignored, symbolicated_frames = processable_frame.cache_value

        platform = raw_frame.get('platform') or self.data.get('platform')
        new_frames = []
        for sfrm in symbolicated_frames:
            new_frame = dict(raw_frame)

            raw_func = trim(sfrm['function'], 256)
            func = trim(trim_function_name(sfrm['function'], platform), 256)

            # if function and raw function match, we can get away without
            # storing a raw function
            if func == raw_func:
                new_frame['function'] = raw_func
            # otherwise we store both
            else:
                new_frame['raw_function'] = raw_func
                new_frame['function'] = func

            if sfrm.get('symbol'):
                new_frame['symbol'] = sfrm['symbol']
            if sfrm.get('abs_path'):
                new_frame['abs_path'] = sfrm['abs_path']
                new_frame['filename'] = posixpath.basename(sfrm['abs_path'])
            if sfrm.get('filename'):
                new_frame['filename'] = sfrm['filename']
            if sfrm.get('lineno'):
                new_frame['lineno'] = sfrm['lineno']
            if sfrm.get('colno'):
                new_frame['colno'] = sfrm['colno']
            if sfrm.get('package'):
                new_frame['package'] = sfrm['package']
            new_frames.append(new_frame)

        return new_frames, [raw_frame], []
Exemplo n.º 13
0
def test_trim_native_function_name(input, output):
    assert trim_function_name(input, "native") == output
Exemplo n.º 14
0
def test_trim_csharp_function_name(input, output):
    assert trim_function_name(input, "csharp") == output
Exemplo n.º 15
0
def test_trim_function_name(input, output):
    assert trim_function_name(input, 'native') == output
Exemplo n.º 16
0
def get_function_component(
    function,
    platform,
    legacy_function_logic,
    sourcemap_used=False,
    context_line_available=False,
    raw_function=None,
    javascript_fuzzing=False,
):
    """
    Attempt to normalize functions by removing common platform outliers.

    - Ruby generates (random?) integers for various anonymous style functions
      such as in erb and the active_support library.
    - Block functions have metadata that we don't care about.

    The `legacy_function_logic` parameter controls if the system should
    use the frame v1 function name logic or the frame v2 logic.  The difference
    is that v2 uses the function name consistently and v1 prefers raw function
    or a trimmed version (of the truncated one) for native.
    """
    from sentry.stacktraces.functions import trim_function_name

    behavior_family = get_behavior_family_for_platform(platform)

    if legacy_function_logic:
        func = raw_function or function
    else:
        func = function or raw_function
        if not raw_function and function:
            func = trim_function_name(func, platform)

    if not func:
        return GroupingComponent(id="function")

    function_component = GroupingComponent(id="function", values=[func])

    if platform == "ruby":
        if func.startswith("block "):
            function_component.update(values=["block"], hint="ruby block")
        else:
            new_function = _ruby_erb_func.sub("", func)
            if new_function != func:
                function_component.update(
                    values=[new_function],
                    hint="removed generated erb template suffix")

    elif platform == "php":
        if func.startswith("[Anonymous"):
            function_component.update(contributes=False,
                                      hint="ignored anonymous function")

    elif platform == "java":
        if func.startswith("lambda$"):
            function_component.update(contributes=False,
                                      hint="ignored lambda function")

    elif behavior_family == "native":
        if func in ("<redacted>", "<unknown>"):
            function_component.update(contributes=False,
                                      hint="ignored unknown function")
        elif legacy_function_logic:
            new_function = trim_function_name(func,
                                              platform,
                                              normalize_lambdas=False)
            if new_function != func:
                function_component.update(values=[new_function],
                                          hint="isolated function")

    elif javascript_fuzzing and behavior_family == "javascript":
        # This changes Object.foo or Foo.foo into foo so that we can
        # resolve some common cross browser differences
        new_function = func.rsplit(".", 1)[-1]
        if new_function != func:
            function_component.update(values=[new_function],
                                      hint="trimmed javascript function")

        # if a sourcemap was used for this frame and we know that we can
        # use the context line information we no longer want to use the
        # function name.  The reason for this is that function names in
        # sourcemaps are unreliable by the nature of sourcemaps and thus a
        # bad indicator for grouping.
        if sourcemap_used and context_line_available:
            function_component.update(
                contributes=False,
                hint="ignored because sourcemap used and context line available"
            )

    return function_component
Exemplo n.º 17
0
def test_trim_cocoa_function_name(input, output):
    assert trim_function_name(input, "cocoa") == output
Exemplo n.º 18
0
def test_trim_function_name(input, output):
    assert trim_function_name(input, 'native') == output