Exemplo n.º 1
0
def derive(strings):
    values = set()
    observables = set()

    for string in iterify(strings):
        if string:
            try:
                t = Observable.guess_type(string)
                observable = t(value=string)
                observable.normalize()
                observables.add(observable)
                values.add(observable.value)
            except ObservableValidationError:
                values.add(string)

    new = []
    for observable in observables:
        for a in analyzers.get(observable.__class__, []):
            new.extend([
                n for n in a.analyze_string(observable.value)
                if n and n not in values
            ])

    if len(new) == 0:
        return values, values
    else:
        _, extended = derive(new + list(values))
        return values, extended
Exemplo n.º 2
0
    def post(self):
        q = request.get_json(silent=True)
        params = q.pop("params", {})
        observables = []

        for o in q["observables"]:
            try:
                obs = Observable.guess_type(o['value'])(value=o['value'])
                obs.clean()
                observables.append(obs.value)

                # Save observables & eventual tags to database
                if params.get('save_query', False):
                    obs = obs.save()
                    obs.tag(o.get("tags", []))
                    obs.add_source("query")
            except ObservableValidationError:
                continue

        # match observables with known indicators
        data = match_observables([o for o in observables])

        # find related observables (eg. URLs for domain, etc.)
        # related_observables = [obs.get_related() for obs in observables]
        # data = self.match_observables(related_observable)
        #
        # we need to find a way to degrade the "confidence" in
        # hits obtained from related observables

        return render(data, "analysis.html")
Exemplo n.º 3
0
def derive(observables):
    """Indicate that the module needs a specific attribute to work properly.

    This function is only useful in abstract modules, in order to make sure
    that modules that inherit from this class correctly defines needed class
    attributes.

    Args:
        variables: a string or an array of strings containing the name of
            needed class attributes.

    Raises:
        ModuleInitializationError: One of the needed attributes is not
            correctly defined.
    """

    new = []
    observables = list(iterify(observables))
    for i, observable in enumerate(observables):
        try:
            t = Observable.guess_type(observable)
            temp = t(value=observable)
            temp.clean()
            observable = temp.value
            observables[i] = observable
            for a in analyzers.get(t, []):
                new.extend([n for n in a.analyze_string(observable) if n and n not in observables])
        except ObservableValidationError:
            pass

    if len(new) == 0:
        return observables
    else:
        return observables + derive(new)
Exemplo n.º 4
0
    def _make_threat_nodes(threat, context, tags):
        # extract Url and Hash info
        threats = dict()
        if threat['threatStatus'] != 'active':
            # FIXME, clear out false positive ?
            log.warning("threatStatus %s for threat %s",
                        threat['threatStatus'], threat['threatID'])
            log.debug(pprint.pformat(threat))
            return None
        log.debug('_make_threat_nodes for threat %s', threat['threatID'])
        # threattype, classification
        # url, phish: url leads to phishing page (threat is url)
        # url, malware: url leads to malware download (threat is url, threatid is maybe sha256)
        # attachment, malware: attachement is malware (threat is sha256)
        # spam, url
        if threat['threatType'] == 'url':
            if threat['classification'] == 'phish':
                pass  # just keep the url
            elif threat['classification'] == 'malware':
                # get url and hash
                threats['attachment'] = threat
            elif threat['classification'] == 'spam':
                log.info('URL threat - ignore classification %s',
                         threat['classification'])
            else:
                log.error('Type: url, Unsupported classification %s',
                          threat['classification'])
                log.debug(pprint.pformat(threat))
                return None
            threats['url'] = threat
        elif threat['threatType'] == 'attachment':
            if threat['classification'] == 'malware':
                threats['attachment'] = threat
            else:
                log.error('Type: attachment, Unsupported classification %s',
                          threat['classification'])
                log.debug(pprint.pformat(threat))
                return None
        else:
            log.error('Unsupported threatType %s classification %s',
                      threat['threatType'], threat['classification'])
            log.debug(pprint.pformat(threat))
            return None
        # FIXME check if they exist already.
        # if they do, do not parse the threat a second time ?
        threat_nodes = []
        if 'url' in threats:
            #Proofpoint sometimes supplies a hostname marked as a Url.
            #this relies on Yeti to determine the type/class and add act appropriately
            threat_nodes.append(
                Observable.guess_type(threats['url']['threat']).get_or_create(
                    value=threats['url']['threat'], context=[context]))

        if 'attachment' in threats:
            threat_nodes.append(
                Hash.get_or_create(value=threats['attachment']['threatID'],
                                   context=[context]))
        for o in threat_nodes:
            o.tag([t['name'] for t in tags])
        return threat_nodes
Exemplo n.º 5
0
def derive(observables):
    """Indicate that the module needs a specific attribute to work properly.

    This function is only useful in abstract modules, in order to make sure
    that modules that inherit from this class correctly defines needed class
    attributes.

    Args:
        variables: a string or an array of strings containing the name of
            needed class attributes.

    Raises:
        ModuleInitializationError: One of the needed attributes is not
            correctly defined.
    """

    new = []
    for observable in iterify(observables):
        try:
            t = Observable.guess_type(observable)
            for a in analyzers.get(t, []):
                new.extend([
                    n for n in a.analyze_string(observable)
                    if n and n not in observables
                ])
        except ObservableValidationError:
            pass

    if len(new) == 0:
        return observables
    else:
        return derive(new + observables)
Exemplo n.º 6
0
def derive(strings):
    values = set()
    observables = set()

    for string in iterify(strings):
        if string:
            try:
                t = Observable.guess_type(string)
                observable = t(value=string)
                observable.normalize()
                observables.add(observable)
                values.add(observable.value)
            except ObservableValidationError:
                values.add(string)

    new = []
    for observable in observables:
        for a in analyzers.get(observable.__class__, []):
            new.extend([
                n for n in a.analyze_string(observable.value)
                if n and n not in values
            ])

    if len(new) == 0:
        return values, values
    else:
        _, extended = derive(new + list(values))
        return values, extended
Exemplo n.º 7
0
 def each(url):
     try:
         host = ProcessUrl.analyze_string(url.value)[0]
         h = Observable.guess_type(host).get_or_create(value=host)
         h.add_source("analytics")
         Link.connect(src=url, dst=h)
     except ObservableValidationError:
         logging.error("An error occurred when trying to add {} to the database".format(host))
Exemplo n.º 8
0
 def each(url):
     try:
         host = ProcessUrl.analyze_string(url.value)[0]
         h = Observable.guess_type(host).get_or_create(value=host)
         h.add_source("analytics")
         url.active_link_to(h, "hostname", "ProcessUrl", clean_old=False)
         return h
     except ObservableValidationError:
         logging.error("An error occurred when trying to add {} to the database".format(host))
Exemplo n.º 9
0
 def each(url):
     try:
         host = ProcessUrl.analyze_string(url.value)[0]
         h = Observable.guess_type(host).get_or_create(value=host)
         h.add_source("analytics")
         url.active_link_to(h, "hostname", "ProcessUrl", clean_old=False)
         return h
     except ObservableValidationError:
         logging.error(
             "An error occurred when trying to add {} to the database".
             format(host))
Exemplo n.º 10
0
def derive(observables):
    if isinstance(observables, (str, unicode)):
        observables = [observables]

    new = []
    for observable in observables:
        t = Observable.guess_type(observable)
        for a in analyzers.get(t, []):
            new.extend([n for n in a.analyze_string(observable) if n and n not in observables])

    if len(new) == 0:
        return observables
    else:
        return derive(new + observables)
Exemplo n.º 11
0
def derive(observables):
    new = []
    for observable in iterify(observables):
        try:
            t = Observable.guess_type(observable)
            for a in analyzers.get(t, []):
                new.extend([n for n in a.analyze_string(observable) if n and n not in observables])
        except ObservableValidationError:
            pass

    if len(new) == 0:
        return observables
    else:
        return derive(new + observables)
Exemplo n.º 12
0
import sys
import logging
from os import path

YETI_ROOT = path.normpath(path.dirname(path.dirname(path.abspath(__file__))))
sys.path.append(YETI_ROOT)

from core.analytics import OneShotAnalytics
from core.scheduling import Scheduler
from core.observables import Observable

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)

    Scheduler()

    if len(sys.argv) == 1:
        print("Re-run using a analytic name as argument")
        for f in OneShotAnalytics.objects():
            print("  {}".format(f.name))

    if len(sys.argv) > 1:
        name = sys.argv[1]
        f = OneShotAnalytics.objects.get(name=name)
        print("Running {}...".format(f.name))
        observable = Observable.guess_type(
            sys.argv[2]).get_or_create(value=sys.argv[2])
        f.analyze(observable, {})
Exemplo n.º 13
0
def match_observables(observables, save_matches=False, fetch_neighbors=True):
    # Remove empty observables
    observables, extended_query = derive(observables)
    observables = list(observables)

    data = {
        "matches": [],
        "unknown": set(observables),
        "entities": {},
        "known": [],
        "neighbors": [],
    }

    # add to "known"
    for o in Observable.objects(value__in=list(extended_query)):
        data['known'].append(o.info())
        del_from_set(data['unknown'], o.value)

        if fetch_neighbors:
            for link, node in (o.incoming()):
                if isinstance(node, Observable):
                    if (link.src.value not in extended_query or link.dst.value
                            not in extended_query) and node.tags:
                        data['neighbors'].append((link.info(), node.info()))

        for nodes in o.neighbors("Entity").values():
            for l, node in nodes:
                # add node name and link description to indicator
                node_data = {
                    "entity": node.type,
                    "name": node.name,
                    "link_description": l.description
                }

                # uniquely add node information to related entitites
                ent = data['entities'].get(node.name, node.info())
                if 'matches' not in ent:
                    ent['matches'] = {"observables": []}
                if 'observables' not in ent['matches']:
                    ent['matches']['observables'] = []

                info = node.info()
                o_info = o.info()
                info['matched_observable'] = {
                    "value": o_info['value'],
                    "tags": [t['name'] for t in o_info['tags']],
                    "human_url": o_info['human_url'],
                    "url": o_info['url'],
                    "context": o_info['context']
                }
                if info not in ent['matches']['observables']:
                    ent['matches']['observables'].append(info)
                data['entities'][node.name] = ent

    # add to "matches"
    for o, i in Indicator.search(extended_query):
        if save_matches:
            o = Observable.add_text(o)
        else:
            o = Observable.guess_type(o)(value=o)
            try:
                o.validate()
            except ObservableValidationError:
                pass
            try:
                o = Observable.objects.get(value=o.value)
            except Exception:
                pass

        match = i.info()
        match.update({
            "observable": o.info() if o.id else o.value,
            "related": [],
            "suggested_tags": set()
        })

        for nodes in i.neighbors("Entity").values():
            for l, node in nodes:
                # add node name and link description to indicator
                node_data = {
                    "entity": node.type,
                    "name": node.name,
                    "link_description": l.description
                }
                match["related"].append(node_data)

                # uniquely add node information to related entitites
                ent = data['entities'].get(node.name, node.info())
                if 'matches' not in ent:
                    ent['matches'] = {"indicators": []}
                if 'indicators' not in ent['matches']:
                    ent['matches']['indicators'] = []

                info = i.info()
                info['matched_observable'] = o.value
                if info not in ent['matches']['indicators']:
                    ent['matches']['indicators'].append(info)
                data['entities'][node.name] = ent

                o_tags = o.get_tags()
                for tag in node.generate_tags():
                    if tag not in o_tags:
                        match["suggested_tags"].add(tag)

        data["matches"].append(match)

    data['entities'] = data['entities'].values()
    return data
Exemplo n.º 14
0
def match_observables(observables, save_matches=False):
    # Remove empty observables
    observables = [observable for observable in observables if observable]
    extended_query = set(observables) | set(derive(observables))
    added_entities = set()

    data = {
        "matches": [],
        "unknown": set(observables),
        "entities": [],
        "known": [],
        "neighbors": [],
    }


    # add to "known"
    for o in Observable.objects(value__in=list(extended_query)):
        data['known'].append(o.info())
        del_from_set(data['unknown'], o.value)

        for link, node in (o.incoming()):
            if isinstance(node, Observable):
                if (link.src.value not in extended_query or link.dst.value not in extended_query) and node.tags:
                    data['neighbors'].append((link.info(), node.info()))

    # add to "matches"
    for o, i in Indicator.search(extended_query):
        del_from_set(data["unknown"], o)
        if save_matches:
            o = Observable.add_text(o)
        else:
            o = Observable.guess_type(o)(value=o)
            o.validate()
            try:
                o = Observable.objects.get(value=o.value)
            except Exception:
                pass

        match = i.info()
        match.update({"observable": o.info(), "related": [], "suggested_tags": set()})

        for nodes in i.neighbors("Entity").values():
            for l, node in nodes:
                # add node name and link description to indicator
                node_data = {"entity": node.type, "name": node.name, "link_description": l.description}
                match["related"].append(node_data)

                # uniquely add node information to related entitites
                if node.name not in added_entities:
                    nodeinfo = node.info()
                    nodeinfo['type'] = node.type
                    data["entities"].append(nodeinfo)
                    added_entities.add(node.name)

                o_tags = o.get_tags()
                [match["suggested_tags"].add(tag) for tag in node.generate_tags() if tag not in o_tags]

        data["matches"].append(match)
        del_from_set(data["unknown"], o.value)

    return data
Exemplo n.º 15
0
def match_observables(observables, save_matches=False, fetch_neighbors=True):
    # Remove empty observables
    observables = [refang(observable) for observable in observables if observable]
    extended_query = set(observables) | set(derive(observables))

    data = {
        "matches": [],
        "unknown": set(observables),
        "entities": {},
        "known": [],
        "neighbors": [],
    }

    # add to "known"
    for o in Observable.objects(value__in=list(extended_query)):
        data['known'].append(o.info())
        del_from_set(data['unknown'], o.value)

        if fetch_neighbors:
            for link, node in (o.incoming()):
                if isinstance(node, Observable):
                    if (link.src.value not in extended_query or link.dst.value not in extended_query) and node.tags:
                        data['neighbors'].append((link.info(), node.info()))

        for nodes in o.neighbors("Entity").values():
            for l, node in nodes:
                # add node name and link description to indicator
                node_data = {"entity": node.type, "name": node.name, "link_description": l.description}

                # uniquely add node information to related entitites
                ent = data['entities'].get(node.name, node.info())
                if 'matches' not in ent:
                    ent['matches'] = {"observables": []}
                if 'observables' not in ent['matches']:
                    ent['matches']['observables'] = []

                info = node.info()
                o_info = o.info()
                info['matched_observable'] = {
                    "value": o_info['value'],
                    "tags": [t['name'] for t in o_info['tags']],
                    "human_url": o_info['human_url'],
                    "url": o_info['url']
                }
                if info not in ent['matches']['observables']:
                    ent['matches']['observables'].append(info)
                data['entities'][node.name] = ent

    # add to "matches"
    for o, i in Indicator.search(extended_query):
        if save_matches:
            o = Observable.add_text(o)
        else:
            o = Observable.guess_type(o)(value=o)
            try:
                o.validate()
            except ObservableValidationError:
                pass
            try:
                o = Observable.objects.get(value=o.value)
            except Exception:
                pass

        match = i.info()
        match.update({"observable": o.info(), "related": [], "suggested_tags": set()})

        for nodes in i.neighbors("Entity").values():
            for l, node in nodes:
                # add node name and link description to indicator
                node_data = {"entity": node.type, "name": node.name, "link_description": l.description}
                match["related"].append(node_data)

                # uniquely add node information to related entitites
                ent = data['entities'].get(node.name, node.info())
                if 'matches' not in ent:
                    ent['matches'] = {"indicators": []}
                if 'indicators' not in ent['matches']:
                    ent['matches']['indicators'] = []

                info = i.info()
                info['matched_observable'] = o.value
                if info not in ent['matches']['indicators']:
                    ent['matches']['indicators'].append(info)
                data['entities'][node.name] = ent

                o_tags = o.get_tags()
                [match["suggested_tags"].add(tag) for tag in node.generate_tags() if tag not in o_tags]

        data["matches"].append(match)

    data['entities'] = data['entities'].values()
    return data
Exemplo n.º 16
0
    def _make_threat_nodes(threat, context, tags):
        # extract Url and Hash info
        threats = dict()
        if threat['threatStatus'] != 'active':
            # FIXME, clear out false positive ?
            log.warning(
                "threatStatus %s for threat %s", threat['threatStatus'],
                threat['threatID'])
            log.debug(pprint.pformat(threat))
            return None
        log.debug('_make_threat_nodes for threat %s', threat['threatID'])
        # threattype, classification
        # url, phish: url leads to phishing page (threat is url)
        # url, malware: url leads to malware download (threat is url, threatid is maybe sha256)
        # attachment, malware: attachement is malware (threat is sha256)
        # spam, url
        if threat['threatType'] == 'url':
            if threat['classification'] == 'phish':
                pass  # just keep the url
            elif threat['classification'] == 'malware':
                # get url and hash
                threats['attachment'] = threat
            elif threat['classification'] == 'spam':
                log.info(
                    'URL threat - ignore classification %s',
                    threat['classification'])
            else:
                log.error(
                    'Type: url, Unsupported classification %s',
                    threat['classification'])
                log.debug(pprint.pformat(threat))
                return None
            threats['url'] = threat
        elif threat['threatType'] == 'attachment':
            if threat['classification'] == 'malware':
                threats['attachment'] = threat
            else:
                log.error(
                    'Type: attachment, Unsupported classification %s',
                    threat['classification'])
                log.debug(pprint.pformat(threat))
                return None
        else:
            log.error(
                'Unsupported threatType %s classification %s',
                threat['threatType'], threat['classification'])
            log.debug(pprint.pformat(threat))
            return None
        # FIXME check if they exist already.
        # if they do, do not parse the threat a second time ?
        threat_nodes = []
        if 'url' in threats:
            #Proofpoint sometimes supplies a hostname marked as a Url.
            #this relies on Yeti to determine the type/class and add act appropriately
            threat_nodes.append(
                Observable.guess_type(threats['url']['threat']).get_or_create(
                    value=threats['url']['threat'], context=[context]))

        if 'attachment' in threats:
            threat_nodes.append(
                Hash.get_or_create(
                    value=threats['attachment']['threatID'], context=[context]))
        for o in threat_nodes:
            o.tag([t['name'] for t in tags])
        return threat_nodes