Пример #1
0
    def generate_daterange(report):
        """
        Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS
        based on begin and end dates for easier parsing in Kibana.

        Move to utils to avoid duplication w/ elastic?
        """

        metadata = report["report_metadata"]
        begin_date = human_timestamp_to_datetime(metadata["begin_date"])
        end_date = human_timestamp_to_datetime(metadata["end_date"])
        begin_date_human = begin_date.strftime("%Y-%m-%dT%H:%M:%S")
        end_date_human = end_date.strftime("%Y-%m-%dT%H:%M:%S")
        date_range = [begin_date_human, end_date_human]
        logger.debug("date_range is {}".format(date_range))
        return date_range
Пример #2
0
 def save_report_to_s3(self, report, report_type):
     report_date = human_timestamp_to_datetime(
         report["report_metadata"]["begin_date"])
     report_id = report["report_metadata"]["report_id"]
     path_template = "{0}/{1}/year={2}/month={3:02d}/day={4:02d}/{5}.json"
     object_path = path_template.format(self.bucket_path, report_type,
                                        report_date.year, report_date.month,
                                        report_date.day, report_id)
     logger.debug("Saving {0} report to s3://{1}/{2}".format(
         report_type, self.bucket_name, object_path))
     object_metadata = {
         k: v
         for k, v in report["report_metadata"].items()
         if k in self.metadata_keys
     }
     self.bucket.put_object(Body=json.dumps(report),
                            Key=object_path,
                            Metadata=object_metadata)
Пример #3
0
def save_forensic_report_to_elasticsearch(forensic_report,
                                          index_suffix=None,
                                          monthly_indexes=False,
                                          number_of_shards=1,
                                          number_of_replicas=1):
    """
        Saves a parsed DMARC forensic report to ElasticSearch

        Args:
            forensic_report (OrderedDict): A parsed forensic report
            index_suffix (str): The suffix of the name of the index to save to
            monthly_indexes (bool): Use monthly indexes instead of daily
                                    indexes
            number_of_shards (int): The number of shards to use in the index
            number_of_replicas (int): The number of replicas to use in the
                                      index

        Raises:
            AlreadySaved

        """
    logger.debug("Saving forensic report to Elasticsearch")
    forensic_report = forensic_report.copy()
    sample_date = None
    if forensic_report["parsed_sample"]["date"] is not None:
        sample_date = forensic_report["parsed_sample"]["date"]
        sample_date = human_timestamp_to_datetime(sample_date)
    original_headers = forensic_report["parsed_sample"]["headers"]
    headers = OrderedDict()
    for original_header in original_headers:
        headers[original_header.lower()] = original_headers[original_header]

    arrival_date_human = forensic_report["arrival_date_utc"]
    arrival_date = human_timestamp_to_datetime(arrival_date_human)

    search = Search(index="dmarc_forensic*")
    arrival_query = {"match": {"arrival_date": arrival_date}}
    q = Q(arrival_query)

    from_ = None
    to_ = None
    subject = None
    if "from" in headers:
        from_ = headers["from"]
        from_query = {"match": {"sample.headers.from": from_}}
        q = q & Q(from_query)
    if "to" in headers:
        to_ = headers["to"]
        to_query = {"match": {"sample.headers.to": to_}}
        q = q & Q(to_query)
    if "subject" in headers:
        subject = headers["subject"]
        subject_query = {"match": {"sample.headers.subject": subject}}
        q = q & Q(subject_query)

    search.query = q
    existing = search.execute()

    if len(existing) > 0:
        raise AlreadySaved("A forensic sample to {0} from {1} "
                           "with a subject of {2} and arrival date of {3} "
                           "already exists in "
                           "Elasticsearch".format(to_, from_, subject,
                                                  arrival_date_human))

    parsed_sample = forensic_report["parsed_sample"]
    sample = _ForensicSampleDoc(
        raw=forensic_report["sample"],
        headers=headers,
        headers_only=forensic_report["sample_headers_only"],
        date=sample_date,
        subject=forensic_report["parsed_sample"]["subject"],
        filename_safe_subject=parsed_sample["filename_safe_subject"],
        body=forensic_report["parsed_sample"]["body"])

    for address in forensic_report["parsed_sample"]["to"]:
        sample.add_to(display_name=address["display_name"],
                      address=address["address"])
    for address in forensic_report["parsed_sample"]["reply_to"]:
        sample.add_reply_to(display_name=address["display_name"],
                            address=address["address"])
    for address in forensic_report["parsed_sample"]["cc"]:
        sample.add_cc(display_name=address["display_name"],
                      address=address["address"])
    for address in forensic_report["parsed_sample"]["bcc"]:
        sample.add_bcc(display_name=address["display_name"],
                       address=address["address"])
    for attachment in forensic_report["parsed_sample"]["attachments"]:
        sample.add_attachment(filename=attachment["filename"],
                              content_type=attachment["mail_content_type"],
                              sha256=attachment["sha256"])
    try:
        forensic_doc = _ForensicReportDoc(
            feedback_type=forensic_report["feedback_type"],
            user_agent=forensic_report["user_agent"],
            version=forensic_report["version"],
            original_mail_from=forensic_report["original_mail_from"],
            arrival_date=arrival_date,
            domain=forensic_report["reported_domain"],
            original_envelope_id=forensic_report["original_envelope_id"],
            authentication_results=forensic_report["authentication_results"],
            delivery_results=forensic_report["delivery_result"],
            source_ip_address=forensic_report["source"]["ip_address"],
            source_country=forensic_report["source"]["country"],
            source_reverse_dns=forensic_report["source"]["reverse_dns"],
            source_base_domain=forensic_report["source"]["base_domain"],
            authentication_mechanisms=forensic_report[
                "authentication_mechanisms"],
            auth_failure=forensic_report["auth_failure"],
            dkim_domain=forensic_report["dkim_domain"],
            original_rcpt_to=forensic_report["original_rcpt_to"],
            sample=sample)

        index = "dmarc_forensic"
        if index_suffix:
            index = "{0}_{1}".format(index, index_suffix)
        if monthly_indexes:
            index_date = arrival_date.strftime("%Y-%m")
        else:
            index_date = arrival_date.strftime("%Y-%m-%d")
        index = "{0}-{1}".format(index, index_date)
        index_settings = dict(number_of_shards=number_of_shards,
                              number_of_replicas=number_of_replicas)
        create_indexes([index], index_settings)
        forensic_doc.meta.index = index
        try:
            forensic_doc.save()
        except Exception as e:
            raise ElasticsearchError("Elasticsearch error: {0}".format(
                e.__str__()))
    except KeyError as e:
        raise InvalidForensicReport(
            "Forensic report missing required field: {0}".format(e.__str__()))
Пример #4
0
def save_aggregate_report_to_elasticsearch(aggregate_report,
                                           index_suffix=None,
                                           monthly_indexes=False,
                                           number_of_shards=1,
                                           number_of_replicas=1):
    """
    Saves a parsed DMARC aggregate report to ElasticSearch

    Args:
        aggregate_report (OrderedDict): A parsed forensic report
        index_suffix (str): The suffix of the name of the index to save to
        monthly_indexes (bool): Use monthly indexes instead of daily indexes
        number_of_shards (int): The number of shards to use in the index
        number_of_replicas (int): The number of replicas to use in the index

    Raises:
            AlreadySaved
    """
    logger.debug("Saving aggregate report to Elasticsearch")
    aggregate_report = aggregate_report.copy()
    metadata = aggregate_report["report_metadata"]
    org_name = metadata["org_name"]
    report_id = metadata["report_id"]
    domain = aggregate_report["policy_published"]["domain"]
    begin_date = human_timestamp_to_datetime(metadata["begin_date"])
    end_date = human_timestamp_to_datetime(metadata["end_date"])
    begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%S")
    end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%S")
    if monthly_indexes:
        index_date = begin_date.strftime("%Y-%m")
    else:
        index_date = begin_date.strftime("%Y-%m-%d")
    aggregate_report["begin_date"] = begin_date
    aggregate_report["end_date"] = end_date
    date_range = [aggregate_report["begin_date"], aggregate_report["end_date"]]

    org_name_query = Q(dict(match=dict(org_name=org_name)))
    report_id_query = Q(dict(match=dict(report_id=report_id)))
    domain_query = Q(dict(match={"published_policy.domain": domain}))
    begin_date_query = Q(dict(match=dict(date_range=begin_date)))
    end_date_query = Q(dict(match=dict(date_range=end_date)))

    search = Search(index="dmarc_aggregate*")
    query = org_name_query & report_id_query & domain_query
    query = query & begin_date_query & end_date_query
    search.query = query

    existing = search.execute()
    if len(existing) > 0:
        raise AlreadySaved("An aggregate report ID {0} from {1} about {2} "
                           "with a date range of {3} UTC to {4} UTC already "
                           "exists in "
                           "Elasticsearch".format(report_id, org_name, domain,
                                                  begin_date_human,
                                                  end_date_human))
    published_policy = _PublishedPolicy(
        domain=aggregate_report["policy_published"]["domain"],
        adkim=aggregate_report["policy_published"]["adkim"],
        aspf=aggregate_report["policy_published"]["aspf"],
        p=aggregate_report["policy_published"]["p"],
        sp=aggregate_report["policy_published"]["sp"],
        pct=aggregate_report["policy_published"]["pct"],
        fo=aggregate_report["policy_published"]["fo"])

    for record in aggregate_report["records"]:
        agg_doc = _AggregateReportDoc(
            xml_schema=aggregate_report["xml_schema"],
            org_name=metadata["org_name"],
            org_email=metadata["org_email"],
            org_extra_contact_info=metadata["org_extra_contact_info"],
            report_id=metadata["report_id"],
            date_range=date_range,
            errors=metadata["errors"],
            published_policy=published_policy,
            source_ip_address=record["source"]["ip_address"],
            source_country=record["source"]["country"],
            source_reverse_dns=record["source"]["reverse_dns"],
            source_base_domain=record["source"]["base_domain"],
            message_count=record["count"],
            disposition=record["policy_evaluated"]["disposition"],
            dkim_aligned=record["policy_evaluated"]["dkim"] is not None
            and record["policy_evaluated"]["dkim"].lower() == "pass",
            spf_aligned=record["policy_evaluated"]["spf"] is not None
            and record["policy_evaluated"]["spf"].lower() == "pass",
            header_from=record["identifiers"]["header_from"],
            envelope_from=record["identifiers"]["envelope_from"],
            envelope_to=record["identifiers"]["envelope_to"])

        for override in record["policy_evaluated"]["policy_override_reasons"]:
            agg_doc.add_policy_override(type_=override["type"],
                                        comment=override["comment"])

        for dkim_result in record["auth_results"]["dkim"]:
            agg_doc.add_dkim_result(domain=dkim_result["domain"],
                                    selector=dkim_result["selector"],
                                    result=dkim_result["result"])

        for spf_result in record["auth_results"]["spf"]:
            agg_doc.add_spf_result(domain=spf_result["domain"],
                                   scope=spf_result["scope"],
                                   result=spf_result["result"])

        index = "dmarc_aggregate"
        if index_suffix:
            index = "{0}_{1}".format(index, index_suffix)
        index = "{0}-{1}".format(index, index_date)
        index_settings = dict(number_of_shards=number_of_shards,
                              number_of_replicas=number_of_replicas)
        create_indexes([index], index_settings)
        agg_doc.meta.index = index

        try:
            agg_doc.save()
        except Exception as e:
            raise ElasticsearchError("Elasticsearch error: {0}".format(
                e.__str__()))
Пример #5
0
def save_forensic_report_to_elasticsearch(forensic_report,
                                          index="dmarc_forensic"):
    """
        Saves a parsed DMARC forensic report to ElasticSearch

        Args:
            forensic_report (OrderedDict): A parsed forensic report
            index (str): The name of the index to save to

        Raises:
            AlreadySaved

        """
    logger.debug("Saving forensic report to Elasticsearch")
    forensic_report = forensic_report.copy()
    sample_date = None
    if forensic_report["parsed_sample"]["date"] is not None:
        sample_date = forensic_report["parsed_sample"]["date"]
        sample_date = human_timestamp_to_datetime(sample_date)
    original_headers = forensic_report["parsed_sample"]["headers"]
    headers = OrderedDict()
    for original_header in original_headers:
        headers[original_header.lower()] = original_headers[original_header]

    arrival_date_human = forensic_report["arrival_date_utc"]
    arrival_date = human_timestamp_to_datetime(arrival_date_human)

    search = Index(index).search()
    arrival_query = {"match": {"arrival_date": arrival_date}}
    q = Q(arrival_query)

    from_ = None
    to_ = None
    subject = None
    if "from" in headers:
        from_ = headers["from"]
        from_query = {"match": {"sample.headers.from": from_}}
        q = q & Q(from_query)
    if "to" in headers:
        to_ = headers["to"]
        to_query = {"match": {"sample.headers.to": to_}}
        q = q & Q(to_query)
    if "subject" in headers:
        subject = headers["subject"]
        subject_query = {"match": {"sample.headers.subject": subject}}
        q = q & Q(subject_query)

    search.query = q
    existing = search.execute()

    if len(existing) > 0:
        raise AlreadySaved("A forensic sample to {0} from {1} "
                           "with a subject of {2} and arrival date of {3} "
                           "already exists in "
                           "Elasticsearch".format(to_,
                                                  from_,
                                                  subject,
                                                  arrival_date_human
                                                  ))

    parsed_sample = forensic_report["parsed_sample"]
    sample = _ForensicSampleDoc(
        raw=forensic_report["sample"],
        headers=headers,
        headers_only=forensic_report["sample_headers_only"],
        date=sample_date,
        subject=forensic_report["parsed_sample"]["subject"],
        filename_safe_subject=parsed_sample["filename_safe_subject"],
        body=forensic_report["parsed_sample"]["body"]
    )

    for address in forensic_report["parsed_sample"]["to"]:
        sample.add_to(display_name=address["display_name"],
                      address=address["address"])
    for address in forensic_report["parsed_sample"]["reply_to"]:
        sample.add_reply_to(display_name=address["display_name"],
                            address=address["address"])
    for address in forensic_report["parsed_sample"]["cc"]:
        sample.add_cc(display_name=address["display_name"],
                      address=address["address"])
    for address in forensic_report["parsed_sample"]["bcc"]:
        sample.add_bcc(display_name=address["display_name"],
                       address=address["address"])
    for attachment in forensic_report["parsed_sample"]["attachments"]:
        sample.add_attachment(filename=attachment["filename"],
                              content_type=attachment["mail_content_type"])

    forensic_doc = _ForensicReportDoc(
        feedback_type=forensic_report["feedback_type"],
        user_agent=forensic_report["user_agent"],
        version=forensic_report["version"],
        original_mail_from=forensic_report["original_mail_from"],
        arrival_date=arrival_date,
        domain=forensic_report["reported_domain"],
        original_envelope_id=forensic_report["original_envelope_id"],
        authentication_results=forensic_report["authentication_results"],
        delivery_results=forensic_report["delivery_result"],
        source_ip_address=forensic_report["source"]["ip_address"],
        source_country=forensic_report["source"]["country"],
        source_reverse_dns=forensic_report["source"]["reverse_dns"],
        source_base_domain=forensic_report["source"]["base_domain"],
        authentication_mechanisms=forensic_report["authentication_mechanisms"],
        auth_failure=forensic_report["auth_failure"],
        dkim_domain=forensic_report["dkim_domain"],
        original_rcpt_to=forensic_report["original_rcpt_to"],
        sample=sample
    )

    forensic_doc.meta.index = index
    try:
        forensic_doc.save()
    except Exception as e:
        raise ElasticsearchError(
            "Elasticsearch error: {0}".format(e.__str__()))
Пример #6
0
def save_aggregate_report_to_elasticsearch(aggregate_report,
                                           index="dmarc_aggregate"):
    """
    Saves a parsed DMARC aggregate report to ElasticSearch

    Args:
        aggregate_report (OrderedDict): A parsed forensic report
        index (str): The name of the index to save to

    Raises:
            AlreadySaved
    """
    logger.debug("Saving aggregate report to Elasticsearch")
    aggregate_report = aggregate_report.copy()
    metadata = aggregate_report["report_metadata"]
    org_name = metadata["org_name"]
    report_id = metadata["report_id"]
    domain = aggregate_report["policy_published"]["domain"]
    begin_date = human_timestamp_to_datetime(metadata["begin_date"])
    end_date = human_timestamp_to_datetime(metadata["end_date"])
    begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%S")
    end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%S")
    aggregate_report["begin_date"] = begin_date
    aggregate_report["end_date"] = end_date
    date_range = (aggregate_report["begin_date"],
                  aggregate_report["end_date"])

    org_name_query = Q(dict(match=dict(org_name=org_name)))
    report_id_query = Q(dict(match=dict(report_id=report_id)))
    domain_query = Q(dict(match=dict(domain=domain)))
    begin_date_query = Q(dict(match=dict(date_range=begin_date)))
    end_date_query = Q(dict(match=dict(date_range=end_date)))

    search = Index(index).search()
    search.query = org_name_query & report_id_query & domain_query & \
        begin_date_query & end_date_query

    existing = search.execute()
    if len(existing) > 0:
        raise AlreadySaved("An aggregate report ID {0} from {1} about {2} "
                           "with a date range of {3} UTC to {4} UTC already "
                           "exists in "
                           "Elasticsearch".format(report_id,
                                                  org_name,
                                                  domain,
                                                  begin_date_human,
                                                  end_date_human))
    published_policy = _PublishedPolicy(
        domain=aggregate_report["policy_published"]["domain"],
        adkim=aggregate_report["policy_published"]["adkim"],
        aspf=aggregate_report["policy_published"]["aspf"],
        p=aggregate_report["policy_published"]["p"],
        sp=aggregate_report["policy_published"]["sp"],
        pct=aggregate_report["policy_published"]["pct"],
        fo=aggregate_report["policy_published"]["fo"]
    )

    for record in aggregate_report["records"]:
        agg_doc = _AggregateReportDoc(
            xml_schemea=aggregate_report["xml_schema"],
            org_name=metadata["org_name"],
            org_email=metadata["org_email"],
            org_extra_contact_info=metadata["org_extra_contact_info"],
            report_id=metadata["report_id"],
            date_range=date_range,
            errors=metadata["errors"],
            published_policy=published_policy,
            source_ip_address=record["source"]["ip_address"],
            source_country=record["source"]["country"],
            source_reverse_dns=record["source"]["reverse_dns"],
            source_base_domain=record["source"]["base_domain"],
            message_count=record["count"],
            disposition=record["policy_evaluated"]["disposition"],
            dkim_aligned=record["policy_evaluated"]["dkim"] == "pass",
            spf_aligned=record["policy_evaluated"]["spf"] == "pass",
            header_from=record["identifiers"]["header_from"],
            envelope_from=record["identifiers"]["envelope_from"],
            envelope_to=record["identifiers"]["envelope_to"]
        )

        for override in record["policy_evaluated"]["policy_override_reasons"]:
            agg_doc.add_policy_override(type_=override["type"],
                                        comment=override["comment"])

        for dkim_result in record["auth_results"]["dkim"]:
            agg_doc.add_dkim_result(domain=dkim_result["domain"],
                                    selector=dkim_result["selector"],
                                    result=dkim_result["result"])

        for spf_result in record["auth_results"]["spf"]:
            agg_doc.add_spf_result(domain=spf_result["domain"],
                                   scope=spf_result["scope"],
                                   result=spf_result["result"])

        agg_doc.meta.index = index
        try:
            agg_doc.save()
        except Exception as e:
            raise ElasticsearchError(
                "Elasticsearch error: {0}".format(e.__str__()))