def is_valid(self, i): try: resolve_itype(i['indicator']) except TypeError as e: if logger.getEffectiveLevel() == logging.DEBUG: if not self.skip_invalid: raise e return False return True
def filter_indicator(s, q_filters): if not q_filters.get('indicator'): return s i = q_filters.pop('indicator') try: itype = resolve_itype(i) except InvalidIndicator: if '%' in i: i = i.replace('%', '*') if '*' in i: return s.query("wildcard", indicator=i) s = s.query("match", message=i) return s if itype in ('email', 'url', 'fqdn', 'md5', 'sha1', 'sha256', 'sha512'): s = s.filter('term', indicator=i) return s if itype is 'ipv4': return _filter_ipv4(s, i) if itype is 'ipv6': return _filter_ipv6(s, i) return s
def get(self): parser = reqparse.RequestParser() parser.add_argument('q') args = parser.parse_args() if not args.q: return {'data': '0'} if self.is_whitelisted(args.q): p = 0 else: itype = resolve_itype(args.q) if not itype: api.abort(422) return if itype == 'fqdn': p = predict_domain(args.q) elif itype == 'url': p = predict_url(args.q) else: p = 0 p = str(round((p[0][0] * 100), 2)) return {'data': str(p)}
def expand_ip_idx(data): itype = resolve_itype(data['indicator']) if itype not in ['ipv4', 'ipv6']: return if itype is 'ipv4': match = re.search('^(\S+)\/(\d+)$', data['indicator']) if match: data['indicator_ipv4'] = match.group(1) data['indicator_ipv4_mask'] = match.group(2) else: data['indicator_ipv4'] = data['indicator'] return # compile? match = re.search('^(\S+)\/(\d+)$', data['indicator']) if match: # data['indicator_ipv6'] = \ # binascii.b2a_hex( # socket.inet_pton( # socket.AF_INET6, match.group(1)) # ).decode('utf-8') data['indicator_ipv6'] = match.group(1) data['indicator_ipv6_mask'] = match.group(2) else: # data['indicator_ipv6'] = \ # binascii.b2a_hex( # socket.inet_pton(socket.AF_INET6, data['indicator']) # ).decode('utf-8') data['indicator_ipv6'] = data['indicator']
def filter_indicator(s, q_filters): if not q_filters.get('indicator'): return s i = q_filters.pop('indicator') try: itype = resolve_itype(i) except TypeError: if '%' in i: i = i.replace('%', '*') if '*' in i: return s.query("wildcard", indicator=i) s = s.query("match", message=i) return s if itype in ('email', 'url', 'fqdn', 'md5', 'sha1', 'sha256', 'sha512'): return s.filter('term', indicator=i) if itype is 'ipv4': return _filter_ipv4(s, i) if itype is 'ipv6': return _filter_ipv6(s, i) return s
def process(self): defaults = self._defaults() try: feed = STIXPackage.from_xml(self.rule.remote) except Exception as e: self.logger.error('Error parsing feed: {}'.format(e)) self.logger.error(defaults['remote']) raise e d = feed.to_dict() header = d['stix_header'] for e in d.get('indicators'): if not e['observable']: continue i = copy.deepcopy(defaults) i['description'] = e['title'].lower() i['lasttime'] = e['timestamp'] i['indicator'] = e['observable']['object']['properties']['value'][ 'value'].lower() i['tlp'] = header['handling'][0]['marking_structures'][1][ 'color'].lower() if not i.get('indicator'): continue if self.itype: if resolve_itype(i['indicator']) != self.itype: continue yield i
def _log_search(self, i): if i.get('nolog', '0') == '1': return if i.get('indicator', '') == '': return for e in ['limit', 'nofeed', 'nolog']: if i.get(e): del i[e] for e in ['first_at', 'last_at', 'reported_at']: if not i.get(e): i[e] = str(utcnow()) i['provider'] = 'local' i['itype'] = resolve_itype(i['indicator']) i['tags'] = 'search' i['confidence'] = 4 i['tlp'] = 'amber' i['group'] = 'everyone' i['count'] = 1 i['description'] = 'search' self.store.indicators.create(i)
def _get_elements(l, hints): i = OrderedDict() for e in l: if not isinstance(e, (str, bytes)): continue e = e.rstrip() e = e.lstrip() if re.match('^[a-zA-Z]{2}$', e): i[e] = 'CC' continue t = None try: t = resolve_itype(e.rstrip('/')) # 25553.0 ASN formats trip up FQDN resolve itype if t and not (t == 'fqdn' and re.match('^\d+\.[0-9]$', e)): i[e] = 'indicator' continue except Exception: pass # integers if isinstance(e, int): i[e] = 'int' continue # floats if isinstance(e, float) or re.match('^\d+\.[0-9]$', e): i[e] = 'float' continue # timestamps try: parse_timestamp(e) i[e] = 'timestamp' continue except Exception: pass # basestrings if isinstance(e, (str, bytes)): if hints: for ii in range(0, 25): if len(hints) == ii: break if e.lower() == hints[ii].lower(): i[e] = 'description' break if not i.get(e): i[e] = 'string' return i
def _is_term(e): if e in [ 'ipv4', 'ipv6', 'url', 'fqdn', 'email', 'md5', 'sha1', 'sha256', "\n", "" ]: return if re.search(r'\d+', e): return # we don't care if it's an indicator try: resolve_itype(e) except: pass else: return return e
def peek(f, lines=5, delim=','): n = lines from collections import defaultdict freq_dict = defaultdict(int) for l in f.readlines(): if l.startswith('#'): continue for e in l.split(delim): if e == '': continue if e == "\n": continue if e in ['ipv4', 'ipv6', 'url', 'fqdn']: continue if re.search(r'\d+', e): continue # we don't care if it's an indicator from csirtg_indicator import resolve_itype try: resolve_itype(e) except: pass else: continue freq_dict[e] += 1 n = n-1 if n == 0: break return sorted(freq_dict, reverse=True)
def is_flat(f, mime): if not is_ascii(f, mime): return n = 5 for l in f.readlines(): if isinstance(l, bytes): l = l.decode('utf-8') if l.startswith('#'): continue try: resolve_itype(l.rstrip("\n")) except Exception as e: logger.debug(e) return n -= 1 if n == 0: break return 'csv'
def _translate_itype(indicator, expected_itype=None): if isinstance(indicator, list): i_list = [] for i in indicator: try: itype = resolve_itype(i) except InvalidIndicator as e: continue if not itype or (expected_itype and expected_itype != itype): continue i_list.append(i) indicator = i_list else: try: itype = resolve_itype(indicator) except InvalidIndicator as e: itype = None if not itype or (expected_itype and expected_itype != itype): indicator = None return indicator
def text_to_list(text, known_only=True): separator = find_seperator(text) t_tokens = top_tokens(text) top = set() for t in range(0, 9): top.add(t_tokens[t]) if known_only: if separator not in KNOWN_SEPERATORS: pprint(top) raise SystemError('separator not in known list: {}'.format(separator)) ret = [] for l in text.split("\n"): if l == '': continue if l.startswith('#') or l.startswith(';'): continue cols = l.split(separator) cols = [x.strip() for x in cols] indicator = Indicator() for e in cols: if e: try: i = resolve_itype(e) if i: indicator.indicator = e indicator.itype = i except NotImplementedError: pass try: ts = arrow.get(e) if ts: indicator.lasttime = ts.datetime except (arrow.parser.ParserError, UnicodeDecodeError): pass if e in top: indicator.tags = [e] if indicator.itype and indicator.indicator: ret.append(indicator) return ret
def _is_flat(f, mime): if not _is_ascii(f, mime): return n = 5 for l in f.readlines(): if isinstance(l, bytes): l = l.decode('utf-8') if l.startswith('#'): continue l = l.rstrip("\n") try: resolve_itype(l) except: return False n -= 1 if n == 0: break return 'csv'
def process(self): defaults = self._defaults() try: feed = STIXPackage.from_xml(self.rule.remote) except Exception as e: self.logger.error('Error parsing feed: {}'.format(e)) self.logger.error(defaults['remote']) raise e d = feed.to_dict() header = d['stix_header'] for e in d.get('indicators'): if not e['observable']: continue i = copy.deepcopy(defaults) i['description'] = e['title'].lower() i['lasttime'] = e.get('timestamp') try: i['indicator'] = e['observable']['object']['properties']['value']['value'] except KeyError: if e['observable']['object']['properties'].get('address_value'): i['indicator'] = e['observable']['object']['properties']['address_value']['value'] if e['observable']['object']['properties'].get('hashes'): i['indicator'] = e['observable']['object']['properties']['hashes'][0]['simple_hash_value']['value'] if e['observable']['object']['properties'].get('header'): i['indicator'] = e['observable']['object']['properties']['header']['from']['address_value']['value'] try: i['tlp'] = header['handling'][0]['marking_structures'][1]['color'].lower() except KeyError: i['tlp'] = header['handling'][0]['marking_structures'][0]['color'] i['indicator'] = i['indicator'].lower() i['tlp'] = i['tlp'].lower() if not i.get('indicator'): continue if self.itype: if resolve_itype(i['indicator']) != self.itype: continue yield i
def text_to_list(text, known_only=True): separator = find_seperator(text) t_tokens = top_tokens(text) top = set() for t in range(0, 9): top.add(t_tokens[t]) if known_only: if separator not in KNOWN_SEPERATORS: raise SystemError( 'separator not in known list: {}'.format(separator)) ret = [] for l in text.split("\n"): if l == '': continue if l.startswith('#') or l.startswith(';'): continue cols = l.split(separator) cols = [x.strip() for x in cols] indicator = Indicator() for e in cols: if e: try: i = resolve_itype(e) if i: indicator.indicator = e indicator.itype = i except TypeError: pass try: ts = arrow.get(e) if ts: indicator.lasttime = ts.datetime except (arrow.parser.ParserError, UnicodeDecodeError): pass if e in top: indicator.tags = [e] if indicator.itype and indicator.indicator: ret.append(indicator) return ret
def indicator(self, i): if not i: self._indicator = None return i = i.lower() self.itype = resolve_itype(i) self._indicator = i if self.itype == 'url': u = urlparse(self._indicator) self._indicator = u.geturl().rstrip('/').lower() if self.itype == 'ipv4': self._indicator = ipv4_normalize(self._indicator) if self.mask and (self.itype in ['ipv4', 'ipv6']): self._indicator = '{}/{}'.format(self._indicator, int(self.mask)) self.mask = None
def expand_ip_idx(data): itype = resolve_itype(data['indicator']) if itype not in ['ipv4', 'ipv6']: return if itype is 'ipv4': match = re.search('^(\S+)\/(\d+)$', data['indicator']) if match: data['indicator_ipv4'] = match.group(1) data['indicator_ipv4_mask'] = match.group(2) else: data['indicator_ipv4'] = data['indicator'] return # compile? match = re.search('^(\S+)\/(\d+)$', data['indicator']) if match: data['indicator_ipv6'] = match.group(1) data['indicator_ipv6_mask'] = match.group(2) else: data['indicator_ipv6'] = data['indicator']
def indicators_search(self, m): s1 = time() to_log = [] try: for e in m.data: if e.get('limit') is None: e['limit'] = LIMIT if e.get('indicator') and not e.get('itype'): e['itype'] = resolve_itype(e['indicator']) if e.get('nolog', 0) == 1: continue if e.get('indicator'): to_log.append(e) yield from self.store.indicators.search(m.data) for ee in to_log: self._log_search(ee) except StopIteration as e: yield except Exception as e: logger.error(e) if logger.getEffectiveLevel() == logging.DEBUG: import traceback traceback.print_exc() raise TypeError('invalid search') s2 = time() logger.debug(f"took: {round(s2 - s1, 2)}s")
def expand_ip_idx(data): itype = resolve_itype(data['indicator']) if itype not in ['ipv4', 'ipv6']: return if itype is 'ipv4': match = re.search('^(\S+)\/(\d+)$', data['indicator']) if match: data['indicator_ipv4'] = match.group(1) data['indicator_ipv4_mask'] = match.group(2) else: data['indicator_ipv4'] = data['indicator'] return match = re.search('^(\S+)\/(\d+)$', data['indicator']) if match: data['indicator_ipv6'] = binascii.b2a_hex(socket.inet_pton(socket.AF_INET6, match.group(1))).decode( 'utf-8') data['indicator_ipv6_mask'] = match.group(2) else: data['indicator_ipv6'] = binascii.b2a_hex(socket.inet_pton(socket.AF_INET6, data['indicator'])).decode( 'utf-8')
def get_indicator(l): i = {} # step 1, detect datatypes for e in l: if isinstance(e, int): i[e] = 'int' continue t = None try: t = resolve_itype(e) if t: i[e] = 'indicator' continue except Exception: pass if is_timestamp(e): i[e] = 'timestamp' continue if isinstance(e, basestring): i[e] = 'string' i2 = Indicator() timestamps = [] ports = [] for e in i: if i[e] == 'indicator': i2.indicator = e continue if i[e] == 'timestamp': timestamps.append(e) continue if i[e] == 'int': ports.append(e) continue if i[e] == 'string': if ' ' in e: i2.description = e continue if len(e) < 10: i2.tags = [e] continue timestamps = sorted(timestamps, reverse=True) if len(timestamps) > 0: i2.lasttime = timestamps[0] if len(timestamps) > 1: i2.firsttime = timestamps[1] if len(ports) > 0: if len(ports) == 1: i2.portlist = ports[0] else: if ports[0] > ports[1]: i2.portlist = ports[0] i2.dest_portlist = ports[1] else: i2.portlist = ports[1] i2.dest_portlist = ports[0] return i2
def filter_indicator(filters, s): for k, v in list(filters.items()): if k not in VALID_FILTERS: del filters[k] if not filters.get('indicator'): return s i = filters.pop('indicator') itype = resolve_itype(i) if itype == 'email': s = s.join(Email).filter(or_( Email.email.like('%.{}'.format(i)), Email.email == i) ) return s if itype == 'ipv4': ip = ipaddress.IPv4Network(i) mask = ip.prefixlen if mask < 8: raise TypeError('prefix needs to be >= 8') start = str(ip.network_address) end = str(ip.broadcast_address) logger.debug('{} - {}'.format(start, end)) s = s.join(Ipv4).filter(Ipv4.ip >= start) s = s.filter(Ipv4.ip <= end) return s if itype == 'ipv6': ip = ipaddress.IPv6Network(i) mask = ip.prefixlen if mask < 32: raise TypeError('prefix needs to be >= 32') start = str(ip.network_address) end = str(ip.broadcast_address) logger.debug('{} - {}'.format(start, end)) s = s.join(Ipv6).filter(Ipv6.ip >= start) s = s.filter(Ipv6.ip <= end) return s if itype == 'fqdn': s = s.join(Fqdn).filter(or_( Fqdn.fqdn.like('%.{}'.format(i)), Fqdn.fqdn == i) ) return s if itype == 'url': s = s.join(Url).filter(Url.url == i) return s if itype in HASH_TYPES: s = s.join(Hash).filter(Hash.hash == str(i)) return s raise ValueError
def get_indicator(l, hints=None): i = OrderedDict() if not isinstance(l, list): l = [l] # step 1, detect datatypes for e in l: if not isinstance(e, (str, bytes)): continue e = e.rstrip() e = e.lstrip() if re.match('^[a-zA-Z]{2}$', e): i[e] = 'CC' continue t = None try: t = resolve_itype(e.rstrip('/')) # 25553.0 ASN formats trip up FQDN resolve itype if t and not (t == 'fqdn' and re.match('^\d+\.[0-9]$', e)): i[e] = 'indicator' continue except Exception: pass if isinstance(e, int): i[e] = 'int' continue if isinstance(e, float) or re.match('^\d+\.[0-9]$', e): i[e] = 'float' continue if is_timestamp(e): i[e] = 'timestamp' continue if isinstance(e, (str, bytes)): if hints: for ii in range(0, 25): if len(hints) == ii: break if e.lower() == hints[ii].lower(): i[e] = 'description' break if not i.get(e): i[e] = 'string' i2 = Indicator() timestamps = [] ports = [] for e in i: if i[e] == 'CC': i2.cc = e continue if i[e] == 'indicator': if i2.indicator: i2.reference = e else: i2.indicator = e continue if i[e] == 'timestamp': timestamps.append(parse_timestamp(e)) continue if i[e] == 'float': i2.asn = e continue if i[e] == 'int': ports.append(e) continue if i[e] == 'description': i2.description = e continue if i[e] == 'string': if re.match(r'[0-9A-Za-z\.\s\/]+', e) and i2.asn: i2.asn_desc = e continue if 4 <= len(e) <= 10 and re.match('[a-z-A-Z]+,?', e) and e not in [ 'ipv4', 'fqdn', 'url', 'ipv6' ]: i2.tags = [e] continue if ' ' in e and 5 <= len(e) and not i2.asn_desc: i2.description = e continue timestamps = sorted(timestamps, reverse=True) if len(timestamps) > 0: i2.last_at = timestamps[0] if len(timestamps) > 1: i2.first_at = timestamps[1] if len(ports) > 0: if len(ports) == 1: i2.portlist = ports[0] else: if ports[0] > ports[1]: i2.portlist = ports[0] i2.dest_portlist = ports[1] else: i2.portlist = ports[1] i2.dest_portlist = ports[0] return i2