示例#1
0
def main():

    #
    # initialize module
    #

    p = ArgumentParser(
        description=textwrap.dedent('''\

        cgmail is a CLI tool for debugging, it allows you to easily input
        a email message and print out the py-cgmail data structure.

        example usage:
            $ cat test.eml | cgmail
            $ cgmail --file test.eml
        '''),
        formatter_class=RawDescriptionHelpFormatter,
        prog='cgmail'
    )

    p.add_argument("-f", "--file", dest="file", help="specify email file")

    args = p.parse_args()

    loglevel = logging.INFO
    console = logging.StreamHandler()
    logging.getLogger('').setLevel(loglevel)
    console.setFormatter(logging.Formatter(LOG_FORMAT))
    logging.getLogger('').addHandler(console)

    options = vars(args)

    # get email from file or stdin
    if options.get("file"):
        with open(options["file"]) as f:
            email = f.read()
    else:
        email = sys.stdin.read()

    # parse email message
    results = cgmail.parse_email_from_string(email) 
     
    pprint(results)
示例#2
0
    def process(self, data=None):
        defaults = self._defaults()

        rv = []

        for d in self.fetcher.process(split=False):

            body = parse_email_from_string(d)

            obs = {}
            for k, v in defaults.items():
                obs[k] = v

            if self.headers:
                for h in self.headers:
                    if body[0]['headers'].get(h):
                        obs[self.headers[h]] = body[0]['headers'][h][0]

            obs['msg'] = d

            try:
                i = Indicator(**obs)
            except InvalidIndicator as e:
                self.logger.error(e)
                self.logger.info('skipping: {}'.format(obs['indicator']))
            else:
                if self.is_archived(i.indicator, i.provider, i.group, i.tags, i.firsttime, i.lasttime):
                    self.logger.info('skipping: {}/{}'.format(i.provider, i.indicator))
                else:
                    r = self.client.indicators_create(i)
                    self.archive(i.indicator, i.provider, i.group, i.tags, i.firsttime, i.lasttime)
                    rv.append(r)

                    if self.limit:
                        self.limit -= 1

                        if self.limit == 0:
                            self.logger.debug('limit reached...')
                            break

        return rv
def main():
    """
    A script to extract email addresses in the body of spam email messages and submit the following to
    csirtg.io:

    * From
    * Subject
    * Description
    * Email Address
    """

    p = ArgumentParser(
        description=textwrap.dedent('''\
        example usage:
            $ cat test.eml | cgmail
            $ cgmail --file test.eml
        '''),
        formatter_class=RawDescriptionHelpFormatter,
        prog='cgmail'
    )

    p.add_argument('-d', '--debug', dest='debug', action="store_true")
    p.add_argument("-f", "--file", dest="file", help="specify email file")

    args = p.parse_args()

    loglevel = logging.INFO
    if args.debug:
        loglevel = logging.DEBUG

    console = logging.StreamHandler()
    logging.getLogger('').setLevel(loglevel)
    console.setFormatter(logging.Formatter(LOG_FORMAT))
    logging.getLogger('').addHandler(console)

    options = vars(args)

    # get email from file or stdin
    if options.get("file"):
        with open(options["file"]) as f:
            email = f.read()
    else:
        email = sys.stdin.read()
        logger.info("wf-email-addresses processing email")


    # Initiate wf client object
    cli = Client(token=WHITEFACE_TOKEN)

    # parse email message
    results = cgmail.parse_email_from_string(email)

    sent_count = 0

    for result in results:
        adata = {}
        data = {}
        if result['body_email_addresses']:
            for email_address in result['body_email_addresses']:

                if find_exclusions(email_address):
                    # skip the indicator as it was found in the excludes list
                    logger.info("skipping {0} as it was marked for exclusion".format(email_address))
                    continue
                else:
                    # add from to adata if exists
                    if 'from' in result['headers']:
                        adata['from'] = sanitize(result['headers']['from'][0])
                    # add subject to adata if exists
                    if 'subject' in result['headers']:
                        adata['subject'] = sanitize(result['headers']['subject'][0])

                    data = {
                        "user": WHITEFACE_USER,
                        "feed": WHITEFACE_FEED,
                        "indicator": email_address,
                        "tags": "uce, email-address",
                        "description": "email addresses parsed out of the message body sourced from unsolicited " \
                                       "commercial email (spam)"
                    }

                    # add adata as a comment if populated
                    if adata:
                        comment = json.dumps(adata)
                        data['comment'] = comment

                    try:
                        ret = Indicator(cli, data).submit()
                        if ret['indicator']['id']:
                            sent_count += 1
                    except Exception as e:
                        raise Exception(e)

    logger.info("sent {0} email addresses to csirtg.io".format(sent_count))
# -*- coding: utf-8 -*

import cgmail

TEST_FILE = 'samples/email/multi_mixed_plain_rfc822_plain_02.eml'

with open(TEST_FILE) as f:
    email = f.read()

results = cgmail.parse_email_from_string(email)

def test_message_headers():
    assert results[0]['headers']['return-path'][0] == '<*****@*****.**>'
    assert results[1]['headers']['delivered-to'][0] == '*****@*****.**'

def test_message_parts():
    assert results[0]['mail_parts'][0]['decoded_body'].startswith('give me your credentials')
    assert results[1]['mail_parts'][0]['decoded_body'].startswith('forward attachment as inline')

def test_extract_urls():
    assert "http://www.example.com" in results[0]['urls']

示例#5
0
def main():
    """
    A script to extract URLs in the body of spam email messages and submitting the following to
    whiteface:

    * Date
    * From
    * Subject
    * Description
    * URL
    """

    p = ArgumentParser(
        description=textwrap.dedent('''\
        example usage:
            $ cat test.eml | cgmail -v
            $ cgmail --file test.eml
        '''),
        formatter_class=RawDescriptionHelpFormatter,
        prog='cgmail'
    )

    p.add_argument('-d', '--debug', dest='debug', action="store_true")
    p.add_argument("-f", "--file", dest="file", help="specify email file")
    p.add_argument('--urls', action='store_true')

    args = p.parse_args()

    loglevel = logging.INFO
    if args.debug:
        loglevel = logging.DEBUG

    console = logging.StreamHandler()
    logging.getLogger('').setLevel(loglevel)
    console.setFormatter(logging.Formatter(LOG_FORMAT))
    logging.getLogger('').addHandler(console)

    options = vars(args)

    # get email from file or stdin
    if options.get("file"):
        with open(options["file"]) as f:
            email = f.read()
    else:
        email = sys.stdin.read()
        logger.info("processing email")

    # parse email message
    results = cgmail.parse_email_from_string(email)

    for result in results:
        adata = {}
        if result['urls']:
            for url in result['urls']:

                if 'date' in result['headers']:
                    adata['date'] = result['headers']['date'][0]
                if 'from' in result['headers']:
                    adata['from'] = result['headers']['from'][0]
                if 'subject' in result['headers']:
                    adata['subject'] = result['headers']['subject'][0]

                adata['description'] = 'urls parsed out of the message body sourced from unsolicited commercial ' \
                                       'email (spam)'

                comment = json.dumps(adata)
                c = Client(token=WHITEFACE_TOKEN)
                i = Indicator(c, {
                    'feed': WHITEFACE_FEED,
                    'user': WHITEFACE_USER,
                    'indicator': url,
                    'tags': 'uce,uce-urls',
                    'comment': comment
                }).submit()
                logger.info('logged to whiteface %s ' % i['indicator']['location'])
示例#6
0
def main():

    #
    # initialize module
    #

    p = ArgumentParser(
        description=textwrap.dedent('''\
        example usage:
            $ cat test.eml | cgmail
            $ cgmail --file test.eml
        '''),
        formatter_class=RawDescriptionHelpFormatter,
        prog='cgmail'
    )

    p.add_argument("-v", "--verbose", dest="verbose", action="count",
                   help="set verbosity level [default: %(default)s]")
    p.add_argument('-d', '--debug', dest='debug', action="store_true")

    p.add_argument("-f", "--file", dest="file", help="specify email file")

    # cif arguments
    p.add_argument("--confidence", help="specify confidence for submitting to CIF", default=CONFIDENCE)
    p.add_argument("--remote", help="specify CIF remote")
    p.add_argument("--token", help="specify CIF token")
    p.add_argument("--config", help="specify CIF config [default: %(default)s",
                   default=os.path.expanduser("~/.cif.yml"))
    p.add_argument("--tags", help="specify CIF tags [default: %(default)s", default=["phishing"])
    p.add_argument("--group", help="specify CIF group [default: %(default)s", default="everyone")
    p.add_argument("--tlp", help="specify CIF TLP [default: %(default)s", default=TLP)
    p.add_argument("--no-verify-ssl", action="store_true", default=False)
    p.add_argument("--raw", action="store_true", help="include raw message data")
    p.add_argument("--provider", help="specify feed provider [default: %(default)s]", default=PROVIDER)

    p.add_argument('--exclude', help='url patterns to exclude [default: %(default)s', default=EXCLUDE)
    p.add_argument('--confidence-lower', help='patterns to automatically lower confidence', default=CONFIDENCE_LOWER)
    p.add_argument('-n', '--not-really', help='do not submit', action='store_true')
    p.add_argument('--cache', help='location to cache whitelist [default: %(default)s', default=WHITELIST_CACHE)
    p.add_argument('--blacklist-cache', default=BLACKLIST_CACHE)

     # Process arguments
    args = p.parse_args()
    setup_logging(args)
    logger = logging.getLogger(__name__)

    exclude = None
    if args.exclude:
        exclude = re.compile(args.exclude)

    confidence_lower = None
    if args.confidence_lower:
        confidence_lower = re.compile(args.confidence_lower)

    o = read_config(args)
    options = vars(args)
    for v in options:
        if options[v] is None:
            options[v] = o.get(v)

    if not options.get('token'):
        raise RuntimeError('missing --token')

    if options.get("file"):
        with open(options["file"]) as f:
            email = f.read()
    else:
        email = sys.stdin.read()

    # extract urls from message body and mail parts
    bits = cgmail.parse_email_from_string(email)
    urls = set()

    for n in bits:
        if n.get('urls'):
            for u in n['urls']:
                urls.add(u)

    verify_ssl = True
    if options.get('no_verify_ssl'):
        verify_ssl = False

    # initialize cif client
    cli = Client(remote=options["remote"], token=options["token"], verify_ssl=verify_ssl)

    update_cache = True
    if os.path.isfile(args.cache):
        modified = os.path.getmtime(args.cache)
        if arrow.utcnow() < arrow.get(modified + 84600):
            update_cache = False

    if update_cache:
        # pull FQDN whitelist

        filters = {
            'tags': 'whitelist',
            'otype': 'fqdn',
            'confidence': 25,
        }
        now = arrow.utcnow()
        filters['reporttimeend'] = '{0}Z'.format(now.format('YYYY-MM-DDTHH:mm:ss'))
        now = now.replace(days=-7)
        filters['reporttime'] = '{0}Z'.format(now.format('YYYY-MM-DDTHH:mm:ss'))

        ret = cli.search(limit=50000, filters=filters, sort='reporttime', sort_direction='desc')
        with open(args.cache, 'w') as f:
            for r in ret:
                f.write("{0}\n".format(r['observable']))

    update_cache = True
    if os.path.isfile(args.blacklist_cache):
        modified = os.path.getmtime(args.blacklist_cache)
        if arrow.utcnow() < arrow.get(modified + 84600):
            update_cache = False

    if update_cache:
        filters = {
            'tags': 'phishing,suspicious,malware',
            'otype': 'fqdn',
            'confidence': 75,
        }
        now = arrow.utcnow()
        filters['reporttimeend'] = '{0}Z'.format(now.format('YYYY-MM-DDTHH:mm:ss'))
        now = now.replace(days=-7)
        filters['reporttime'] = '{0}Z'.format(now.format('YYYY-MM-DDTHH:mm:ss'))

        ret = cli.search(limit=50000, filters=filters, sort='reporttime', sort_direction='desc')
        with open(args.blacklist_cache, 'w') as f:
            for r in ret:
                f.write("{0}\n".format(r['observable']))

    fqdns = set()
    with open(args.cache) as f:
        for l in f:
            fqdns.add(l.rstrip("\n"))

    fqdns_blacklist = set()
    with open(args.blacklist_cache) as f:
        for l in f:
            fqdns_blacklist.add(l.rstrip("\n"))

    for u in urls:
        u = u.rstrip('\/')
        u = urlparse(u)

        fqdn = url_to_fqdn(u.geturl())
        if exclude and exclude.search(fqdn):
            continue

        confidence = options['confidence']

        if match_whitelist(fqdns, u.netloc):
            if (u.netloc not in URL_SHORTNERS) and (not match_whitelist(HOSTING_PROVIDERS, u.netloc)):
                confidence = options['confidence'] - 15
            else:
                confidence = options['confidence'] + 5
        elif match_whitelist(fqdns_blacklist, u.netloc):
            confidence = options['confidence'] + 10
        else:
            confidence = options['confidence'] + 5

        # else
        # raise confidence
        logger.info("submitting: {0}".format(u.geturl()))

        o = Observable(
            observable=u.geturl(),
            confidence=confidence,
            tlp=options["tlp"],
            group=options["group"],
            tags=options["tags"],
            provider=options.get('provider')
        )

        o = o.__dict__
        del o['logger']

        if options.get('raw'):
            o.raw = email

        if not args.not_really:
            r = cli.submit(o)
            logger.info("submitted: {0}".format(r))
示例#7
0
def main():

    #
    # initialize module
    #

    p = ArgumentParser(description=textwrap.dedent('''\
        example usage:
            $ cat test.eml | cgmail
            $ cgmail --file test.eml
        '''),
                       formatter_class=RawDescriptionHelpFormatter,
                       prog='cgmail')

    p.add_argument("-v",
                   "--verbose",
                   dest="verbose",
                   action="count",
                   help="set verbosity level [default: %(default)s]")
    p.add_argument('-d', '--debug', dest='debug', action="store_true")

    p.add_argument("-f", "--file", dest="file", help="specify email file")

    # cif arguments
    p.add_argument("--confidence",
                   help="specify confidence for submitting to CIF",
                   default=CONFIDENCE)
    p.add_argument("--remote", help="specify CIF remote")
    p.add_argument("--token", help="specify CIF token")
    p.add_argument("--config",
                   help="specify CIF config [default: %(default)s",
                   default=os.path.expanduser("~/.cif.yml"))
    p.add_argument("--tags",
                   help="specify CIF tags [default: %(default)s",
                   default=["phishing"])
    p.add_argument("--group",
                   help="specify CIF group [default: %(default)s",
                   default="everyone")
    p.add_argument("--tlp",
                   help="specify CIF TLP [default: %(default)s",
                   default=TLP)
    p.add_argument("--no-verify-ssl", action="store_true", default=False)
    p.add_argument("--raw",
                   action="store_true",
                   help="include raw message data")
    p.add_argument("--provider",
                   help="specify feed provider [default: %(default)s]",
                   default=PROVIDER)

    p.add_argument('--exclude',
                   help='url patterns to exclude [default: %(default)s',
                   default=EXCLUDE)
    p.add_argument('--confidence-lower',
                   help='patterns to automatically lower confidence',
                   default=CONFIDENCE_LOWER)
    p.add_argument('-n',
                   '--not-really',
                   help='do not submit',
                   action='store_true')
    p.add_argument('--cache',
                   help='location to cache whitelist [default: %(default)s',
                   default=WHITELIST_CACHE)
    p.add_argument('--blacklist-cache', default=BLACKLIST_CACHE)

    # Process arguments
    args = p.parse_args()
    setup_logging(args)
    logger = logging.getLogger(__name__)

    exclude = None
    if args.exclude:
        exclude = re.compile(args.exclude)

    confidence_lower = None
    if args.confidence_lower:
        confidence_lower = re.compile(args.confidence_lower)

    o = read_config(args)
    options = vars(args)
    for v in options:
        if options[v] is None:
            options[v] = o.get(v)

    if not options.get('token'):
        raise RuntimeError('missing --token')

    if options.get("file"):
        with open(options["file"]) as f:
            email = f.read()
    else:
        email = sys.stdin.read()

    # extract urls from message body and mail parts
    bits = cgmail.parse_email_from_string(email)
    urls = set()

    for n in bits:
        if n.get('urls'):
            for u in n['urls']:
                urls.add(u)

    verify_ssl = True
    if options.get('no_verify_ssl'):
        verify_ssl = False

    # initialize cif client
    cli = Client(remote=options["remote"],
                 token=options["token"],
                 verify_ssl=verify_ssl)

    update_cache = True
    if os.path.isfile(args.cache):
        modified = os.path.getmtime(args.cache)
        if arrow.utcnow() < arrow.get(modified + 84600):
            update_cache = False

    if update_cache:
        # pull FQDN whitelist

        filters = {
            'tags': 'whitelist',
            'otype': 'fqdn',
            'confidence': 25,
        }
        now = arrow.utcnow()
        filters['reporttimeend'] = '{0}Z'.format(
            now.format('YYYY-MM-DDTHH:mm:ss'))
        now = now.shift(days=-7)
        filters['reporttime'] = '{0}Z'.format(
            now.format('YYYY-MM-DDTHH:mm:ss'))

        ret = cli.search(limit=50000,
                         filters=filters,
                         sort='reporttime',
                         sort_direction='desc')
        with open(args.cache, 'w') as f:
            for r in ret:
                f.write("{0}\n".format(r['observable']))

    update_cache = True
    if os.path.isfile(args.blacklist_cache):
        modified = os.path.getmtime(args.blacklist_cache)
        if arrow.utcnow() < arrow.get(modified + 84600):
            update_cache = False

    if update_cache:
        filters = {
            'tags': 'phishing,suspicious,malware',
            'otype': 'fqdn',
            'confidence': 75,
        }
        now = arrow.utcnow()
        filters['reporttimeend'] = '{0}Z'.format(
            now.format('YYYY-MM-DDTHH:mm:ss'))
        now = now.shift(days=-7)
        filters['reporttime'] = '{0}Z'.format(
            now.format('YYYY-MM-DDTHH:mm:ss'))

        ret = cli.search(limit=50000,
                         filters=filters,
                         sort='reporttime',
                         sort_direction='desc')
        with open(args.blacklist_cache, 'w') as f:
            for r in ret:
                f.write("{0}\n".format(r['observable']))

    fqdns = set()
    with open(args.cache) as f:
        for l in f:
            fqdns.add(l.rstrip("\n"))

    fqdns_blacklist = set()
    with open(args.blacklist_cache) as f:
        for l in f:
            fqdns_blacklist.add(l.rstrip("\n"))

    for u in urls:
        u = u.rstrip('\/')
        u = urlparse(u)

        fqdn = url_to_fqdn(u.geturl())
        if exclude and exclude.search(fqdn):
            continue

        confidence = options['confidence']

        if match_whitelist(fqdns, u.netloc):
            if (u.netloc not in URL_SHORTNERS) and (not match_whitelist(
                    HOSTING_PROVIDERS, u.netloc)):
                confidence = options['confidence'] - 15
            else:
                confidence = options['confidence'] + 5
        elif match_whitelist(fqdns_blacklist, u.netloc):
            confidence = options['confidence'] + 10
        else:
            confidence = options['confidence'] + 5

        # else
        # raise confidence
        logger.info("submitting: {0}".format(u.geturl()))

        o = Observable(observable=u.geturl(),
                       confidence=confidence,
                       tlp=options["tlp"],
                       group=options["group"],
                       tags=options["tags"],
                       provider=options.get('provider'))

        o = o.__dict__
        del o['logger']

        if options.get('raw'):
            o.raw = email

        if not args.not_really:
            r = cli.submit(o)
            logger.info("submitted: {0}".format(r))
示例#8
0
def main():
    """
    A script to parse spam emails and submit threat intelligence to csirtg.io.

    :return: int
    """

    # Setup

    p = ArgumentParser(
        description=textwrap.dedent('''\
        example usage:
            $ cat test.eml | cgmail -v
            $ cgmail --file test.eml
        '''),
        formatter_class=RawDescriptionHelpFormatter,
        prog='cgmail'
    )

    p.add_argument('-d', '--debug', dest='debug', action="store_true")
    p.add_argument("-f", "--file", dest="file", help="specify email file")
    p.add_argument('--urls', action='store_true')

    args = p.parse_args()

    loglevel = logging.INFO
    if args.debug:
        loglevel = logging.DEBUG

    console = logging.StreamHandler()
    logging.getLogger('').setLevel(loglevel)
    console.setFormatter(logging.Formatter(LOG_FORMAT))
    logging.getLogger('').addHandler(console)

    options = vars(args)

    # load config file from users homes directory (e.g: ~/)
    try:
        with open(os.path.expanduser("~/.csirtg.yml"), 'r') as stream:
            config = yaml.load(stream)
    except FileNotFoundError as e:
        logger.error("Cannot load the configuration file: {0}".format(e))
        return 1

    # test to ensure required values are specified in the config file
    required_config = ['token', 'username', 'feed-email-addresses', 'feed-urls', 'feed-uce-ip', 'hostname']

    for required in required_config:
        if not config[required]:
            logger.error("Required config value \"{0}\" is empty".format(required))
            return 1

    # get email from file or stdin
    if options.get("file"):
        logger.debug("open email through file handle")
        with open(options["file"]) as f:
            email = f.read()
    else:
        logger.debug("read email through stdin")
        email = sys.stdin.read()

    # post-setup

    # parse email message
    logger.info("parsing email via cgmail")
    results = cgmail.parse_email_from_string(email)

    if results:
        # parse urls out of the message body
        submission_count = parse_urls(config, results)
        logger.info("{0},urls,submitted to csirtg.io".format(submission_count))

        # parse email addresses out of message body
        submission_count = parse_email_addresses(config, results)
        logger.info("{0},email-addresses,submitted to csirtg.io".format(submission_count))

        # parse ip addresses out of received headers
        submission_count = parse_received_headers(config, results)
        logger.info("{0},ip-addresses,submitted to csirtg.io".format(submission_count))

        # parse email address seen in return-path header
        email_address_address_headers = ['return-path', 'from', 'reply-to']
        for value in email_address_address_headers:
            submission_count = parse_email_address_headers(config, value, results)
            logger.info("{0},email-addresses,submitted to csirtg.io".format(submission_count))

        # parse email attachments
        submission_count = parse_attachments(config, results)
        logger.info("{0},attachments,submitted to csirtg.io".format(submission_count))

    else:
        logger.error("email did not parse correctly, exiting")
        return 1

    return 0