def main(): # # initialize module # p = ArgumentParser( description=textwrap.dedent('''\ cgmail is a CLI tool for debugging, it allows you to easily input a email message and print out the py-cgmail data structure. example usage: $ cat test.eml | cgmail $ cgmail --file test.eml '''), formatter_class=RawDescriptionHelpFormatter, prog='cgmail' ) p.add_argument("-f", "--file", dest="file", help="specify email file") args = p.parse_args() loglevel = logging.INFO console = logging.StreamHandler() logging.getLogger('').setLevel(loglevel) console.setFormatter(logging.Formatter(LOG_FORMAT)) logging.getLogger('').addHandler(console) options = vars(args) # get email from file or stdin if options.get("file"): with open(options["file"]) as f: email = f.read() else: email = sys.stdin.read() # parse email message results = cgmail.parse_email_from_string(email) pprint(results)
def process(self, data=None): defaults = self._defaults() rv = [] for d in self.fetcher.process(split=False): body = parse_email_from_string(d) obs = {} for k, v in defaults.items(): obs[k] = v if self.headers: for h in self.headers: if body[0]['headers'].get(h): obs[self.headers[h]] = body[0]['headers'][h][0] obs['msg'] = d try: i = Indicator(**obs) except InvalidIndicator as e: self.logger.error(e) self.logger.info('skipping: {}'.format(obs['indicator'])) else: if self.is_archived(i.indicator, i.provider, i.group, i.tags, i.firsttime, i.lasttime): self.logger.info('skipping: {}/{}'.format(i.provider, i.indicator)) else: r = self.client.indicators_create(i) self.archive(i.indicator, i.provider, i.group, i.tags, i.firsttime, i.lasttime) rv.append(r) if self.limit: self.limit -= 1 if self.limit == 0: self.logger.debug('limit reached...') break return rv
def main(): """ A script to extract email addresses in the body of spam email messages and submit the following to csirtg.io: * From * Subject * Description * Email Address """ p = ArgumentParser( description=textwrap.dedent('''\ example usage: $ cat test.eml | cgmail $ cgmail --file test.eml '''), formatter_class=RawDescriptionHelpFormatter, prog='cgmail' ) p.add_argument('-d', '--debug', dest='debug', action="store_true") p.add_argument("-f", "--file", dest="file", help="specify email file") args = p.parse_args() loglevel = logging.INFO if args.debug: loglevel = logging.DEBUG console = logging.StreamHandler() logging.getLogger('').setLevel(loglevel) console.setFormatter(logging.Formatter(LOG_FORMAT)) logging.getLogger('').addHandler(console) options = vars(args) # get email from file or stdin if options.get("file"): with open(options["file"]) as f: email = f.read() else: email = sys.stdin.read() logger.info("wf-email-addresses processing email") # Initiate wf client object cli = Client(token=WHITEFACE_TOKEN) # parse email message results = cgmail.parse_email_from_string(email) sent_count = 0 for result in results: adata = {} data = {} if result['body_email_addresses']: for email_address in result['body_email_addresses']: if find_exclusions(email_address): # skip the indicator as it was found in the excludes list logger.info("skipping {0} as it was marked for exclusion".format(email_address)) continue else: # add from to adata if exists if 'from' in result['headers']: adata['from'] = sanitize(result['headers']['from'][0]) # add subject to adata if exists if 'subject' in result['headers']: adata['subject'] = sanitize(result['headers']['subject'][0]) data = { "user": WHITEFACE_USER, "feed": WHITEFACE_FEED, "indicator": email_address, "tags": "uce, email-address", "description": "email addresses parsed out of the message body sourced from unsolicited " \ "commercial email (spam)" } # add adata as a comment if populated if adata: comment = json.dumps(adata) data['comment'] = comment try: ret = Indicator(cli, data).submit() if ret['indicator']['id']: sent_count += 1 except Exception as e: raise Exception(e) logger.info("sent {0} email addresses to csirtg.io".format(sent_count))
# -*- coding: utf-8 -* import cgmail TEST_FILE = 'samples/email/multi_mixed_plain_rfc822_plain_02.eml' with open(TEST_FILE) as f: email = f.read() results = cgmail.parse_email_from_string(email) def test_message_headers(): assert results[0]['headers']['return-path'][0] == '<*****@*****.**>' assert results[1]['headers']['delivered-to'][0] == '*****@*****.**' def test_message_parts(): assert results[0]['mail_parts'][0]['decoded_body'].startswith('give me your credentials') assert results[1]['mail_parts'][0]['decoded_body'].startswith('forward attachment as inline') def test_extract_urls(): assert "http://www.example.com" in results[0]['urls']
def main(): """ A script to extract URLs in the body of spam email messages and submitting the following to whiteface: * Date * From * Subject * Description * URL """ p = ArgumentParser( description=textwrap.dedent('''\ example usage: $ cat test.eml | cgmail -v $ cgmail --file test.eml '''), formatter_class=RawDescriptionHelpFormatter, prog='cgmail' ) p.add_argument('-d', '--debug', dest='debug', action="store_true") p.add_argument("-f", "--file", dest="file", help="specify email file") p.add_argument('--urls', action='store_true') args = p.parse_args() loglevel = logging.INFO if args.debug: loglevel = logging.DEBUG console = logging.StreamHandler() logging.getLogger('').setLevel(loglevel) console.setFormatter(logging.Formatter(LOG_FORMAT)) logging.getLogger('').addHandler(console) options = vars(args) # get email from file or stdin if options.get("file"): with open(options["file"]) as f: email = f.read() else: email = sys.stdin.read() logger.info("processing email") # parse email message results = cgmail.parse_email_from_string(email) for result in results: adata = {} if result['urls']: for url in result['urls']: if 'date' in result['headers']: adata['date'] = result['headers']['date'][0] if 'from' in result['headers']: adata['from'] = result['headers']['from'][0] if 'subject' in result['headers']: adata['subject'] = result['headers']['subject'][0] adata['description'] = 'urls parsed out of the message body sourced from unsolicited commercial ' \ 'email (spam)' comment = json.dumps(adata) c = Client(token=WHITEFACE_TOKEN) i = Indicator(c, { 'feed': WHITEFACE_FEED, 'user': WHITEFACE_USER, 'indicator': url, 'tags': 'uce,uce-urls', 'comment': comment }).submit() logger.info('logged to whiteface %s ' % i['indicator']['location'])
def main(): # # initialize module # p = ArgumentParser( description=textwrap.dedent('''\ example usage: $ cat test.eml | cgmail $ cgmail --file test.eml '''), formatter_class=RawDescriptionHelpFormatter, prog='cgmail' ) p.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]") p.add_argument('-d', '--debug', dest='debug', action="store_true") p.add_argument("-f", "--file", dest="file", help="specify email file") # cif arguments p.add_argument("--confidence", help="specify confidence for submitting to CIF", default=CONFIDENCE) p.add_argument("--remote", help="specify CIF remote") p.add_argument("--token", help="specify CIF token") p.add_argument("--config", help="specify CIF config [default: %(default)s", default=os.path.expanduser("~/.cif.yml")) p.add_argument("--tags", help="specify CIF tags [default: %(default)s", default=["phishing"]) p.add_argument("--group", help="specify CIF group [default: %(default)s", default="everyone") p.add_argument("--tlp", help="specify CIF TLP [default: %(default)s", default=TLP) p.add_argument("--no-verify-ssl", action="store_true", default=False) p.add_argument("--raw", action="store_true", help="include raw message data") p.add_argument("--provider", help="specify feed provider [default: %(default)s]", default=PROVIDER) p.add_argument('--exclude', help='url patterns to exclude [default: %(default)s', default=EXCLUDE) p.add_argument('--confidence-lower', help='patterns to automatically lower confidence', default=CONFIDENCE_LOWER) p.add_argument('-n', '--not-really', help='do not submit', action='store_true') p.add_argument('--cache', help='location to cache whitelist [default: %(default)s', default=WHITELIST_CACHE) p.add_argument('--blacklist-cache', default=BLACKLIST_CACHE) # Process arguments args = p.parse_args() setup_logging(args) logger = logging.getLogger(__name__) exclude = None if args.exclude: exclude = re.compile(args.exclude) confidence_lower = None if args.confidence_lower: confidence_lower = re.compile(args.confidence_lower) o = read_config(args) options = vars(args) for v in options: if options[v] is None: options[v] = o.get(v) if not options.get('token'): raise RuntimeError('missing --token') if options.get("file"): with open(options["file"]) as f: email = f.read() else: email = sys.stdin.read() # extract urls from message body and mail parts bits = cgmail.parse_email_from_string(email) urls = set() for n in bits: if n.get('urls'): for u in n['urls']: urls.add(u) verify_ssl = True if options.get('no_verify_ssl'): verify_ssl = False # initialize cif client cli = Client(remote=options["remote"], token=options["token"], verify_ssl=verify_ssl) update_cache = True if os.path.isfile(args.cache): modified = os.path.getmtime(args.cache) if arrow.utcnow() < arrow.get(modified + 84600): update_cache = False if update_cache: # pull FQDN whitelist filters = { 'tags': 'whitelist', 'otype': 'fqdn', 'confidence': 25, } now = arrow.utcnow() filters['reporttimeend'] = '{0}Z'.format(now.format('YYYY-MM-DDTHH:mm:ss')) now = now.replace(days=-7) filters['reporttime'] = '{0}Z'.format(now.format('YYYY-MM-DDTHH:mm:ss')) ret = cli.search(limit=50000, filters=filters, sort='reporttime', sort_direction='desc') with open(args.cache, 'w') as f: for r in ret: f.write("{0}\n".format(r['observable'])) update_cache = True if os.path.isfile(args.blacklist_cache): modified = os.path.getmtime(args.blacklist_cache) if arrow.utcnow() < arrow.get(modified + 84600): update_cache = False if update_cache: filters = { 'tags': 'phishing,suspicious,malware', 'otype': 'fqdn', 'confidence': 75, } now = arrow.utcnow() filters['reporttimeend'] = '{0}Z'.format(now.format('YYYY-MM-DDTHH:mm:ss')) now = now.replace(days=-7) filters['reporttime'] = '{0}Z'.format(now.format('YYYY-MM-DDTHH:mm:ss')) ret = cli.search(limit=50000, filters=filters, sort='reporttime', sort_direction='desc') with open(args.blacklist_cache, 'w') as f: for r in ret: f.write("{0}\n".format(r['observable'])) fqdns = set() with open(args.cache) as f: for l in f: fqdns.add(l.rstrip("\n")) fqdns_blacklist = set() with open(args.blacklist_cache) as f: for l in f: fqdns_blacklist.add(l.rstrip("\n")) for u in urls: u = u.rstrip('\/') u = urlparse(u) fqdn = url_to_fqdn(u.geturl()) if exclude and exclude.search(fqdn): continue confidence = options['confidence'] if match_whitelist(fqdns, u.netloc): if (u.netloc not in URL_SHORTNERS) and (not match_whitelist(HOSTING_PROVIDERS, u.netloc)): confidence = options['confidence'] - 15 else: confidence = options['confidence'] + 5 elif match_whitelist(fqdns_blacklist, u.netloc): confidence = options['confidence'] + 10 else: confidence = options['confidence'] + 5 # else # raise confidence logger.info("submitting: {0}".format(u.geturl())) o = Observable( observable=u.geturl(), confidence=confidence, tlp=options["tlp"], group=options["group"], tags=options["tags"], provider=options.get('provider') ) o = o.__dict__ del o['logger'] if options.get('raw'): o.raw = email if not args.not_really: r = cli.submit(o) logger.info("submitted: {0}".format(r))
def main(): # # initialize module # p = ArgumentParser(description=textwrap.dedent('''\ example usage: $ cat test.eml | cgmail $ cgmail --file test.eml '''), formatter_class=RawDescriptionHelpFormatter, prog='cgmail') p.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]") p.add_argument('-d', '--debug', dest='debug', action="store_true") p.add_argument("-f", "--file", dest="file", help="specify email file") # cif arguments p.add_argument("--confidence", help="specify confidence for submitting to CIF", default=CONFIDENCE) p.add_argument("--remote", help="specify CIF remote") p.add_argument("--token", help="specify CIF token") p.add_argument("--config", help="specify CIF config [default: %(default)s", default=os.path.expanduser("~/.cif.yml")) p.add_argument("--tags", help="specify CIF tags [default: %(default)s", default=["phishing"]) p.add_argument("--group", help="specify CIF group [default: %(default)s", default="everyone") p.add_argument("--tlp", help="specify CIF TLP [default: %(default)s", default=TLP) p.add_argument("--no-verify-ssl", action="store_true", default=False) p.add_argument("--raw", action="store_true", help="include raw message data") p.add_argument("--provider", help="specify feed provider [default: %(default)s]", default=PROVIDER) p.add_argument('--exclude', help='url patterns to exclude [default: %(default)s', default=EXCLUDE) p.add_argument('--confidence-lower', help='patterns to automatically lower confidence', default=CONFIDENCE_LOWER) p.add_argument('-n', '--not-really', help='do not submit', action='store_true') p.add_argument('--cache', help='location to cache whitelist [default: %(default)s', default=WHITELIST_CACHE) p.add_argument('--blacklist-cache', default=BLACKLIST_CACHE) # Process arguments args = p.parse_args() setup_logging(args) logger = logging.getLogger(__name__) exclude = None if args.exclude: exclude = re.compile(args.exclude) confidence_lower = None if args.confidence_lower: confidence_lower = re.compile(args.confidence_lower) o = read_config(args) options = vars(args) for v in options: if options[v] is None: options[v] = o.get(v) if not options.get('token'): raise RuntimeError('missing --token') if options.get("file"): with open(options["file"]) as f: email = f.read() else: email = sys.stdin.read() # extract urls from message body and mail parts bits = cgmail.parse_email_from_string(email) urls = set() for n in bits: if n.get('urls'): for u in n['urls']: urls.add(u) verify_ssl = True if options.get('no_verify_ssl'): verify_ssl = False # initialize cif client cli = Client(remote=options["remote"], token=options["token"], verify_ssl=verify_ssl) update_cache = True if os.path.isfile(args.cache): modified = os.path.getmtime(args.cache) if arrow.utcnow() < arrow.get(modified + 84600): update_cache = False if update_cache: # pull FQDN whitelist filters = { 'tags': 'whitelist', 'otype': 'fqdn', 'confidence': 25, } now = arrow.utcnow() filters['reporttimeend'] = '{0}Z'.format( now.format('YYYY-MM-DDTHH:mm:ss')) now = now.shift(days=-7) filters['reporttime'] = '{0}Z'.format( now.format('YYYY-MM-DDTHH:mm:ss')) ret = cli.search(limit=50000, filters=filters, sort='reporttime', sort_direction='desc') with open(args.cache, 'w') as f: for r in ret: f.write("{0}\n".format(r['observable'])) update_cache = True if os.path.isfile(args.blacklist_cache): modified = os.path.getmtime(args.blacklist_cache) if arrow.utcnow() < arrow.get(modified + 84600): update_cache = False if update_cache: filters = { 'tags': 'phishing,suspicious,malware', 'otype': 'fqdn', 'confidence': 75, } now = arrow.utcnow() filters['reporttimeend'] = '{0}Z'.format( now.format('YYYY-MM-DDTHH:mm:ss')) now = now.shift(days=-7) filters['reporttime'] = '{0}Z'.format( now.format('YYYY-MM-DDTHH:mm:ss')) ret = cli.search(limit=50000, filters=filters, sort='reporttime', sort_direction='desc') with open(args.blacklist_cache, 'w') as f: for r in ret: f.write("{0}\n".format(r['observable'])) fqdns = set() with open(args.cache) as f: for l in f: fqdns.add(l.rstrip("\n")) fqdns_blacklist = set() with open(args.blacklist_cache) as f: for l in f: fqdns_blacklist.add(l.rstrip("\n")) for u in urls: u = u.rstrip('\/') u = urlparse(u) fqdn = url_to_fqdn(u.geturl()) if exclude and exclude.search(fqdn): continue confidence = options['confidence'] if match_whitelist(fqdns, u.netloc): if (u.netloc not in URL_SHORTNERS) and (not match_whitelist( HOSTING_PROVIDERS, u.netloc)): confidence = options['confidence'] - 15 else: confidence = options['confidence'] + 5 elif match_whitelist(fqdns_blacklist, u.netloc): confidence = options['confidence'] + 10 else: confidence = options['confidence'] + 5 # else # raise confidence logger.info("submitting: {0}".format(u.geturl())) o = Observable(observable=u.geturl(), confidence=confidence, tlp=options["tlp"], group=options["group"], tags=options["tags"], provider=options.get('provider')) o = o.__dict__ del o['logger'] if options.get('raw'): o.raw = email if not args.not_really: r = cli.submit(o) logger.info("submitted: {0}".format(r))
def main(): """ A script to parse spam emails and submit threat intelligence to csirtg.io. :return: int """ # Setup p = ArgumentParser( description=textwrap.dedent('''\ example usage: $ cat test.eml | cgmail -v $ cgmail --file test.eml '''), formatter_class=RawDescriptionHelpFormatter, prog='cgmail' ) p.add_argument('-d', '--debug', dest='debug', action="store_true") p.add_argument("-f", "--file", dest="file", help="specify email file") p.add_argument('--urls', action='store_true') args = p.parse_args() loglevel = logging.INFO if args.debug: loglevel = logging.DEBUG console = logging.StreamHandler() logging.getLogger('').setLevel(loglevel) console.setFormatter(logging.Formatter(LOG_FORMAT)) logging.getLogger('').addHandler(console) options = vars(args) # load config file from users homes directory (e.g: ~/) try: with open(os.path.expanduser("~/.csirtg.yml"), 'r') as stream: config = yaml.load(stream) except FileNotFoundError as e: logger.error("Cannot load the configuration file: {0}".format(e)) return 1 # test to ensure required values are specified in the config file required_config = ['token', 'username', 'feed-email-addresses', 'feed-urls', 'feed-uce-ip', 'hostname'] for required in required_config: if not config[required]: logger.error("Required config value \"{0}\" is empty".format(required)) return 1 # get email from file or stdin if options.get("file"): logger.debug("open email through file handle") with open(options["file"]) as f: email = f.read() else: logger.debug("read email through stdin") email = sys.stdin.read() # post-setup # parse email message logger.info("parsing email via cgmail") results = cgmail.parse_email_from_string(email) if results: # parse urls out of the message body submission_count = parse_urls(config, results) logger.info("{0},urls,submitted to csirtg.io".format(submission_count)) # parse email addresses out of message body submission_count = parse_email_addresses(config, results) logger.info("{0},email-addresses,submitted to csirtg.io".format(submission_count)) # parse ip addresses out of received headers submission_count = parse_received_headers(config, results) logger.info("{0},ip-addresses,submitted to csirtg.io".format(submission_count)) # parse email address seen in return-path header email_address_address_headers = ['return-path', 'from', 'reply-to'] for value in email_address_address_headers: submission_count = parse_email_address_headers(config, value, results) logger.info("{0},email-addresses,submitted to csirtg.io".format(submission_count)) # parse email attachments submission_count = parse_attachments(config, results) logger.info("{0},attachments,submitted to csirtg.io".format(submission_count)) else: logger.error("email did not parse correctly, exiting") return 1 return 0