def analyze(observable, results): links = [] parts = tldextract_parser(observable.value) if parts.subdomain == '': should_add_context = False for context in observable.context: if context['source'] == 'whois': break else: should_add_context = True context = {'source': 'whois'} data = DomainToolsApi.get( "/{}/whois/parsed".format(observable.value), results.settings) results.update(raw=json.dumps(data, indent=2)) context['raw'] = data['response']['whois'] if isinstance(observable, Hostname): links = DTWhois.analyze_domain(observable, data) else: links = DTWhois.analyze_ip(observable, data) if should_add_context: observable.add_context(context) else: observable.save() print(links) return links
def is_valid(cls, match): # Check that the domain is not preceded or followed by a '/' # This ensures that we do not match URLs if match.group("pre") != "/" and match.group("post") != "/": # Check that the domain is valid (by checking TLD) value = refang(match.group("search")) if len(value) <= 255: parts = tldextract_parser(value) if parts.suffix and parts.domain: return True return False
def analyze(observable, results): links = set() parts = tldextract_parser(observable.value) if parts.subdomain == "": data = DomainToolsApi.get( "/{}/whois/history".format(observable.value), results.settings ) results.update(raw=json.dumps(data, indent=2)) for record in data["response"]["history"]: created = datetime.strptime( record["whois"]["registration"]["created"], "%Y-%m-%d" ) expires = datetime.strptime( record["whois"]["registration"]["expires"], "%Y-%m-%d" ) registrar = Company.get_or_create( name=record["whois"]["registration"]["registrar"] ) registrant = Text.get_or_create(value=record["whois"]["registrant"]) links.update( observable.link_to( registrar, "Registrar", "DomainTools", created, expires ) ) links.update( observable.link_to( registrant, "Registrant", "DomainTools", created, expires ) ) parsed = parse_raw_whois([record["whois"]["record"]], normalized=True) email = get_value_at(parsed, "contacts.registrant.email") if email: email = Email.get_or_create(value=email) links.update( observable.link_to( email, "Registrant Email", "DomainTools", created, expires ) ) return list(links)
def normalize(self): self.value = refang(self.value) try: if re.match(r"[^:]+://", self.value) is None: # if no schema is specified, assume http:// self.value = u"http://{}".format(self.value) self.value = urlnorm.norm(self.value).replace(' ', '%20') p = tldextract_parser(self.value) self.value = self.value.replace(p.fqdn, p.fqdn.encode("idna").decode(), 1) self.parse() except urlnorm.InvalidUrl: raise ObservableValidationError("Invalid URL: {}".format( self.value)) except UnicodeDecodeError: raise ObservableValidationError( "Invalid URL (UTF-8 decode error): {}".format(self.value))
def analyze(hostname, results): links = set() parts = tldextract_parser(hostname.value) if parts.subdomain == '': should_add_context = False for context in hostname.context: if context['source'] == 'whois': break else: should_add_context = True context = {'source': 'whois'} data = get_whois_raw(hostname.value) results.update(raw=data[0]) parsed = parse_raw_whois(data, normalized=True) context['raw'] = data[0] if 'creation_date' in parsed: context['creation_date'] = parsed['creation_date'][0] if 'registrant' in parsed['contacts']: fields_to_extract = [ ('email', Email, 'Registrant Email'), ('name', Text, 'Registrant Name'), ('organization', Text, 'Registrant Organization'), ('phone', Text, 'Registrant Phone Number'), ] for field, klass, description in fields_to_extract: links.update( link_from_contact_info( hostname, parsed['contacts']['registrant'], field, klass, description)) if should_add_context: hostname.add_context(context) else: hostname.save() return list(links)
def analyze(observable, results): links = set() parts = tldextract_parser(observable.value) if parts.subdomain == '': data = DomainToolsApi.get( "/{}/whois/history".format(observable.value), results.settings) results.update(raw=json.dumps(data, indent=2)) for record in data['response']['history']: created = datetime.strptime( record['whois']['registration']['created'], "%Y-%m-%d") expires = datetime.strptime( record['whois']['registration']['expires'], "%Y-%m-%d") registrar = Company.get_or_create( name=record['whois']['registration']['registrar']) registrant = Text.get_or_create( value=record['whois']['registrant']) links.update( observable.link_to( registrar, 'Registrar', 'DomainTools', created, expires)) links.update( observable.link_to( registrant, 'Registrant', 'DomainTools', created, expires)) parsed = parse_raw_whois([record['whois']['record']], normalized=True) email = get_value_at(parsed, 'contacts.registrant.email') if email: email = Email.get_or_create(value=email) links.update( observable.link_to( email, 'Registrant Email', 'DomainTools', created, expires)) return list(links)
def each(hostname): parts = tldextract_parser(hostname.value) if parts.suffix in SUSPICIOUS_TLDS: hostname.tag("suspicious_tld") if parts.subdomain != "": hostname.update(domain=False) domain = Hostname.get_or_create(value=parts.registered_domain, domain=True) domain.add_source("analytics") hostname.active_link_to(domain, "domain", "ProcessHostnames", clean_old=False) if domain.has_tag("dyndns"): hostname.tag("dyndns") return domain else: hostname.update(domain=True) return None
def analyze_string(hostname_string): parts = tldextract_parser(hostname_string) return [parts.registered_domain]