def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # Once we are in this state, return immediately. if self.errorState: return None # event was received. self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") # Extract IBAN Card numbers ibanNumbers = self.sf.parseIBANNumbers(eventData) myres = list() for ibanNumber in ibanNumbers: evttype = "IBAN_NUMBER" self.sf.info("Found IBAN number : " + ibanNumber) if ibanNumber in myres: self.sf.debug("Already found from this source") continue myres.append(ibanNumber) evt = SpiderFootEvent(evttype, ibanNumber, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data sourceData = self.sf.hashstring(eventData) if sourceData in self.results: return None else: self.results.append(sourceData) self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # Make potential phone numbers more friendly to parse content = eventData.replace('.','-') for match in phonenumbers.PhoneNumberMatcher(content, region=None): n = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # Once we are in this state, return immediately. if self.errorState: return None # event was received. self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # Extract Credit Card numbers creditCards = self.sf.parseCreditCards(eventData) myres = list() for creditCard in creditCards: evttype = "CREDIT_CARD_NUMBER" self.sf.info("Found credit card number : " + creditCard) if creditCard in myres: self.sf.debug("Already found from this source") continue myres.append(creditCard) evt = SpiderFootEvent(evttype, creditCard, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data sourceData = self.sf.hashstring(eventData) if sourceData in self.results: return None else: self.results.append(sourceData) self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # Make potential phone numbers more friendly to parse content = eventData.replace('.', '-') for match in phonenumbers.PhoneNumberMatcher(content, region=None): n = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data sourceData = self.sf.hashstring(eventData) if sourceData in self.results: return None else: self.results[sourceData] = True self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventName in ['TARGET_WEB_CONTENT', 'DOMAIN_WHOIS', 'NETBLOCK_WHOIS']: # Make potential phone numbers more friendly to parse content = eventData.replace('.', '-') for match in phonenumbers.PhoneNumberMatcher(content, region=None): n = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) if eventName == 'PHONE_NUMBER': try: number = phonenumbers.parse(eventData) except BaseException as e: self.sf.debug('Error parsing phone number: ' + str(e)) return None try: number_carrier = carrier.name_for_number(number, 'en') except BaseException as e: self.sf.debug('Error retrieving phone number carrier: ' + str(e)) return None if number_carrier: evt = SpiderFootEvent("PROVIDER_TELCO", number_carrier, self.__name__, event) self.notifyListeners(evt) else: self.sf.debug("No carrier information found for " + eventData) #try: # location = geocoder.description_for_number(number, 'en') #except BaseException as e: # self.sf.debug('Error retrieving phone number location: ' + str(e)) # return None #if location: # evt = SpiderFootEvent("GEOINFO", location, self.__name__, event) # self.notifyListeners(evt) #else: # self.sf.debug("No location information found for " + eventData) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if eventName.startswith("EMAILADDR"): return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if type(eventData) not in [str, unicode]: self.sf.debug("Unhandled type to find e-mails: " + str(type(eventData))) return None pat = re.compile( "([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)") matches = re.findall(pat, eventData) myres = list() for match in matches: if len(match) < 4: self.sf.debug("Likely invalid address: " + match) continue # Handle messed up encodings if "%" in match: self.sf.debug("Skipped address: " + match) continue # Get the doain and strip potential ending . mailDom = match.lower().split('@')[1].strip('.') if not self.getTarget().matches(mailDom): self.sf.debug( "Ignoring e-mail address on an external domain: " + match) continue self.sf.info("Found e-mail address: " + match) if type(match) == str: mail = unicode(match.strip('.'), 'utf-8', errors='replace') else: mail = match.strip('.') if mail in myres: self.sf.debug("Already found from this source.") continue else: myres.append(mail) evt = SpiderFootEvent("EMAILADDR", mail, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if eventName.startswith("EMAILADDR"): return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if type(eventData) not in [str, unicode]: self.sf.debug("Unhandled type to find e-mails: " + str(type(eventData))) return None pat = re.compile("([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)") matches = re.findall(pat, eventData) myres = list() for match in matches: if len(match) < 4: self.sf.debug("Likely invalid address: " + match) continue # Handle messed up encodings if "%" in match: self.sf.debug("Skipped address: " + match) continue # Get the doain and strip potential ending . mailDom = match.lower().split('@')[1].strip('.') if not self.getTarget().matches(mailDom): self.sf.debug("Ignoring e-mail address on an external domain: " + match) continue self.sf.info("Found e-mail address: " + match) if type(match) == str: mail = unicode(match.strip('.'), 'utf-8', errors='replace') else: mail = match.strip('.') if mail in myres: self.sf.debug("Already found from this source.") continue else: myres.append(mail) evt = SpiderFootEvent("EMAILADDR", mail, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, %s, from %s" % (eventName, srcModuleName)) emails = self.sf.parseEmails(eventData) myres = list() for email in emails: evttype = "EMAILADDR" email = email.lower() # Get the domain and strip potential ending . mailDom = email.split('@')[1].strip('.') if not self.sf.validHost(mailDom, self.opts['_internettlds']): self.sf.debug("Skipping " + email + " as not a valid e-mail.") return None if not self.getTarget().matches( mailDom, includeChildren=True, includeParents=True ) and not self.getTarget().matches(email): self.sf.debug("External domain, so possible affiliate e-mail") evttype = "AFFILIATE_EMAILADDR" if eventName.startswith("AFFILIATE_"): evttype = "AFFILIATE_EMAILADDR" if not evttype.startswith("AFFILIATE_") and email.split( "@")[0] in self.opts['_genericusers'].split(","): evttype = "EMAILADDR_GENERIC" self.sf.info("Found e-mail address: " + email) mail = email.strip('.') if mail in myres: self.sf.debug("Already found from this source.") continue myres.append(mail) evt = SpiderFootEvent(evttype, mail, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if event.moduleDataSource: moduleDataSource = event.moduleDataSource else: moduleDataSource = "Unknown" self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") eventDataHash = self.sf.hashstring(eventData) if eventDataHash in self.results: self.sf.debug(f"Skipping {eventData}, already checked.") return None self.results[eventDataHash] = True countryNames = list() # Process the event data based on incoming event type if eventName == "PHONE_NUMBER": countryNames.append(self.detectCountryFromPhone(eventData)) elif eventName == "DOMAIN_NAME": countryNames.append(self.detectCountryFromDomainName(eventData)) elif eventName == "AFFILIATE_DOMAIN_NAME" and self.opts["affiliate"]: countryNames.append(self.detectCountryFromDomainName(eventData)) elif eventName == "CO_HOSTED_SITE_DOMAIN" and self.opts["cohosted"]: countryNames.append(self.detectCountryFromDomainName(eventData)) elif eventName == "SIMILARDOMAIN" and self.opts["similardomain"]: countryNames.append(self.detectCountryFromDomainName(eventData)) elif eventName == "IBAN_NUMBER": countryNames.append(self.detectCountryFromIBAN(eventData)) elif eventName in ["DOMAIN_WHOIS", "GEOINFO", "PHYSICAL_ADDRESS"]: countryNames.extend(self.detectCountryFromData(eventData)) elif eventName == "AFFILIATE_DOMAIN_WHOIS" and self.opts["affiliate"]: countryNames.extend(self.detectCountryFromData(eventData)) elif eventName == "CO_HOSTED_SITE_DOMAIN_WHOIS" and self.opts["cohosted"]: countryNames.extend(self.detectCountryFromData(eventData)) if not countryNames: self.sf.debug(f"Found no country names associated with {eventName}: {eventData}") return None for countryName in set(countryNames): if not countryName: continue self.sf.debug(f"Found country name: {countryName}") evt = SpiderFootEvent("COUNTRY_NAME", countryName, self.__name__, event) evt.moduleDataSource = moduleDataSource self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) emails = self.sf.parseEmails(eventData) myres = list() for email in emails: evttype = "EMAILADDR" # Get the domain and strip potential ending . mailDom = email.lower().split('@')[1].strip('.') if not self.getTarget().matches( mailDom, includeChildren=True, includeParents=True ) and not self.getTarget().matches(match): self.sf.debug("External domain, so possible affiliate e-mail") evttype = "AFFILIATE_EMAILADDR" if eventName.startswith("AFFILIATE_"): evttype = "AFFILIATE_EMAILADDR" self.sf.info("Found e-mail address: " + email) if type(email) == str: mail = unicode(email.strip('.'), 'utf-8', errors='replace') else: mail = email.strip('.') if mail in myres: self.sf.debug("Already found from this source.") continue myres.append(mail) evt = SpiderFootEvent(evttype, mail, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") hashes = self.sf.parseHashes(eventData) for hashtup in hashes: hashalgo, hashval = hashtup evt = SpiderFootEvent("HASH", "[" + hashalgo + "] " + hashval, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data sourceData = self.sf.hashstring(eventData) if sourceData in self.results: self.sf.debug(f"Skipping {eventData}, already checked.") return None self.results[sourceData] = True self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") if event.moduleDataSource: datasource = event.moduleDataSource else: datasource = "Unknown" if eventName == 'TARGET_WEB_CONTENT': # Google Analytics matches = re.findall(r"\bua\-\d{4,10}\-\d{1,4}\b", eventData, re.IGNORECASE) for m in matches: if m.lower().startswith('ua-000000-'): continue if m.lower().startswith('ua-123456-'): continue if m.lower().startswith('ua-12345678'): continue self.sf.debug("Google Analytics match: " + m) evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Google Analytics: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Google AdSense matches = re.findall(r"\b(pub-\d{10,20})\b", eventData, re.IGNORECASE) for m in matches: if m.lower().startswith('pub-12345678'): continue self.sf.debug("Google AdSense match: " + m) evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Google AdSense: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Google Website Verification # https://developers.google.com/site-verification/v1/getting_started matches = re.findall( r'<meta name="google-site-verification" content="([a-z0-9\-\+_=]{43,44})"', eventData, re.IGNORECASE) for m in matches: self.sf.debug("Google Site Verification match: " + m) evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Google Site Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) matches = re.findall( r'<meta name="verify-v1" content="([a-z0-9\-\+_=]{43,44})"', eventData, re.IGNORECASE) for m in matches: self.sf.debug("Google Site Verification match: " + m) evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Google Site Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Quantcast if '_qevents.push' in eventData: matches = re.findall(r"\bqacct:\"(p-[a-z0-9]+)\"", eventData, re.IGNORECASE) for m in matches: self.sf.debug("Quantcast match: " + m) evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Quantcast: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Ahrefs Site Verification matches = re.findall( r'<meta name="ahrefs-site-verification" content="([a-f0-9]{64})"', eventData, re.IGNORECASE) for m in matches: self.sf.debug("Ahrefs Site Verification match: " + m) evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Ahrefs Site Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) if eventName == 'DNS_TEXT': # Google Website Verification # https://developers.google.com/site-verification/v1/getting_started matches = re.findall( r'google-site-verification=([a-z0-9\-\+_=]{43,44})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Google Site Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # LogMeIn Domain Verification # https://support.logmeininc.com/openvoice/help/adding-a-txt-record-to-a-dns-server-ov710011 matches = re.findall( r'logmein-domain-confirmation ([A-Z0-9]{24})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "LogMeIn Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) matches = re.findall( r'logmein-verification-code=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "LogMeIn Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # DocuSign Domain Verification # https://support.docusign.com/en/guides/org-admin-guide-domains matches = re.findall( r'docusign=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "DocuSign Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # GlobalSign Site Verification # https://support.globalsign.com/customer/en/portal/articles/2167245-performing-domain-verification---dns-txt-record matches = re.findall( r'globalsign-domain-verification=([a-z0-9\-\+_=]{42,44})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "GlobalSign Site Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Atlassian Domain Verification # https://confluence.atlassian.com/cloud/verify-a-domain-for-your-organization-873871234.html matches = re.findall( r'atlassian-domain-verification=([a-z0-9\-\+\/_=]{64})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Atlassian Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Adobe IDP Site Verification # https://helpx.adobe.com/au/enterprise/using/verify-domain-ownership.html matches = re.findall( r'adobe-idp-site-verification=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Adobe IDP Site Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) matches = re.findall( r'adobe-idp-site-verification=([a-f0-9]{64})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Adobe IDP Site Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Adobe Domain Verification # https://helpx.adobe.com/sign/help/domain_claiming.html matches = re.findall(r'adobe-sign-verification=([a-f0-9]{32})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Adobe Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Stripe Domain Verification # https://stripe.com/docs/apple-pay/web#going-live matches = re.findall(r'stripe-verification=([a-f0-9]{64})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Stripe Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # TeamViewer SSO Verification # https://community.teamviewer.com/t5/Knowledge-Base/Single-Sign-On-SSO/ta-p/30784 matches = re.findall( r'teamviewer-sso-verification=([a-f0-9]{32})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "TeamViewer SSO Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Aliyun Site Verification matches = re.findall( r'aliyun-site-verification=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Aliyun Site Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Facebook Domain Verification # https://developers.facebook.com/docs/sharing/domain-verification/ matches = re.findall( r'facebook-domain-verification=([a-z0-9]{30})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Facebook Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Citrix Domain Verification matches = re.findall( r'citrix-verification-code=([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Citrix Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Dropbox Domain Verification # https://help.dropbox.com/teams-admins/admin/domain-insights-account-capture#verify matches = re.findall( r'dropbox-domain-verification=([a-z0-9]{12})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Dropbox Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Detectify Domain Verification # https://support.detectify.com/customer/en/portal/articles/2836806-verification-with-dns-txt- matches = re.findall(r'detectify-verification=([a-f0-9]{32})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Detectify Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Drift Domain Verification matches = re.findall(r'drift-verification=([a-f0-9]{64})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Drift Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Ahrefs Site Verification # https://help.ahrefs.com/en/articles/1431155-how-do-i-finish-crawling-my-website-faster-in-site-audit matches = re.findall(r'ahrefs-site-verification_([a-f0-9]{64})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Ahrefs Site Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Statuspage.io Domain Verification # https://help.statuspage.io/help/domain-ownership matches = re.findall( r'status-page-domain-verification=([a-z0-9]{12})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Statuspage Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Zoom.us Domain Verification # https://support.zoom.us/hc/en-us/articles/203395207-What-is-Managed-Domain- matches = re.findall(r'ZOOM_verify_([a-z0-9\-\+\/_=]{22})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Zoom.us Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Mail.ru Domain Verification matches = re.findall(r'mailru-verification: ([a-z0-9]{16})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Mail.ru Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Yandex Domain Verification matches = re.findall(r'yandex-verification: ([a-z0-9]{16})$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Yandex Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Brave Ledger Verification # https://support.brave.com/hc/en-us/articles/360021408352-How-do-I-verify-my-channel- matches = re.findall(r'brave-ledger-verification=([a-z0-9]+)$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Brave Ledger Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # have-i-been-pwned Verification matches = re.findall( r'have-i-been-pwned-verification=([a-f0-9]+)$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "have-i-been-pwned Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) # Cisco Live Domain Verification # https://www.ciscolive.com/c/dam/r/ciscolive/us/docs/2016/pdf/TECCOL-2982.pdf matches = re.findall(r'cisco-ci-domain-verification=([a-f0-9]+)$', eventData.strip(), re.IGNORECASE) for m in matches: evt = SpiderFootEvent("WEB_ANALYTICS_ID", "Cisco Live Domain Verification: " + m, self.__name__, event) evt.moduleDataSource = datasource self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # If the source event is web content, check if the source URL was javascript # or CSS, in which case optionally ignore it. if eventName == "TARGET_WEB_CONTENT": url = event.actualSource if self.opts['filterjscss'] and (".js" in url or ".css" in url): self.sf.debug("Ignoring web content from CSS/JS.") return None if eventName == "EMAILADDR" and self.opts['emailtoname']: if "." in eventData.split("@")[0]: if type(eventData) == unicode: name = " ".join( map(unicode.capitalize, eventData.split("@")[0].split("."))) else: name = " ".join( map(str.capitalize, eventData.split("@")[0].split("."))) name = unicode(name, 'utf-8', errors='replace') # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None # Stage 1: Find things that look (very vaguely) like names rx = re.compile( "([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)" ) m = re.findall(rx, eventData) for r in m: # Start off each match as 0 points. p = 0 notindict = False # Shouldn't encounter "Firstname's Secondname" first = r[0].lower() if first[len(first) - 2] == "'" or first[len(first) - 1] == "'": continue # Strip off trailing ' or 's secondOrig = r[1].replace("'s", "") secondOrig = secondOrig.rstrip("'") second = r[1].lower().replace("'s", "") second = second.rstrip("'") # If both words are not in the dictionary, add 75 points. if first not in self.d and second not in self.d: self.sf.debug( "Both first and second names are not in the dictionary, so high chance of name: (" + first + ":" + second + ").") p += 75 notindict = True else: self.sf.debug(first + " was found or " + second + " was found in dictionary.") # If the first word is a known popular first name, award 50 points. if first in self.n: p += 50 # If either word is 2 characters, subtract 50 points. if len(first) == 2 or len(second) == 2: p -= 50 # If the first word is in the dictionary but the second isn't, # subtract 40 points. if not notindict: if first in self.d and second not in self.d: p -= 20 # If the second word is in the dictionary but the first isn't, # reduce 20 points. if first not in self.d and second in self.d: p -= 40 name = r[0] + " " + secondOrig self.sf.debug("Name of " + name + " has score: " + str(p)) if p > self.opts['algolimit']: # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data evttype = "COUNTRY_NAME" self.sf.debug("Received event, %s, from %s" % (eventName, srcModuleName)) # Generate event data hash eventDataHash = self.sf.hashstring(eventData) # Do not parse duplicate incoming data if eventDataHash in self.results: self.sf.debug("Already found from this source") return None self.results[eventDataHash] = True countryNames = list() # Process the event data based on incoming event type if eventName == "PHONE_NUMBER": countryNames.append(self.detectCountryFromPhone(eventData)) elif eventName == "DOMAIN_NAME" or ( eventName == "AFFILIATE_DOMAIN_NAME" and self.opts["affiliate"] ) or (eventName == "CO_HOSTED_SITE_DOMAIN" and self.opts["cohosted"]) or (eventName == "SIMILARDOMAIN" and self.opts["similardomain"]): countryNames.append(self.detectCountryFromTLD(eventData)) elif eventName == "IBAN_NUMBER": countryNames.append(self.detectCountryFromIBAN(eventData)) elif eventName in ["DOMAIN_WHOIS", "GEOINFO", "PHYSICAL_ADDRESS" ] or (eventName == "AFFILIATE_DOMAIN_WHOIS" and self.opts["affiliate"]) or ( eventName == "CO_HOSTED_SITE_DOMAIN_WHOIS" and self.opts["cohosted"]): tempDataList = self.detectCountryFromData(eventData) if tempDataList is None: countryNames.append(None) else: countryNames.extend(tempDataList) # Check if countryNames is empty if len(countryNames) == 0: return None # Convert list to set to remove duplicates countryNames = set(countryNames) for countryName in countryNames: if countryName == '' or countryName == None: continue self.sf.debug("Found country name: " + countryName) evt = SpiderFootEvent(evttype, countryName, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # If the source event is web content, check if the source URL was javascript # or CSS, in which case optionally ignore it. if eventName == "TARGET_WEB_CONTENT": url = event.sourceEvent.data if self.opts['filterjscss'] and (".js" in url or ".css" in url): self.sf.debug("Ignoring web content from CSS/JS.") return None if eventName == "EMAILADDR" and self.opts['emailtoname']: if "." in eventData.split("@")[0]: if type(eventData) == unicode: name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split("."))) else: name = " ".join(map(str.capitalize, eventData.split("@")[0].split("."))) name = unicode(name, 'utf-8', errors='replace') # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None # Stage 1: Find things that look (very vaguely) like names rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)") m = re.findall(rx, eventData) for r in m: # Start off each match as 0 points. p = 0 notindict = False # Shouldn't encounter "Firstname's Secondname" first = r[0].lower() if first[len(first) - 2] == "'" or first[len(first) - 1] == "'": continue # Strip off trailing ' or 's secondOrig = r[1].replace("'s", "") secondOrig = secondOrig.rstrip("'") second = r[1].lower().replace("'s", "") second = second.rstrip("'") # If both words are not in the dictionary, add 75 points. if first not in self.d and second not in self.d: self.sf.debug("Both first and second names are not in the dictionary, so high chance of name: (" + first +":" + second +").") p += 75 notindict = True else: self.sf.debug(first + " was found or " + second + " was found in dictionary.") # If the first word is a known popular first name, award 50 points. if first in self.n: p += 50 # If either word is 2 characters, subtract 50 points. if len(first) == 2 or len(second) == 2: p -= 50 # If the first word is in the dictionary but the second isn't, # subtract 40 points. if not notindict: if first in self.d and second not in self.d: p -= 20 # If the second word is in the dictionary but the first isn't, # reduce 20 points. if first not in self.d and second in self.d: p -= 40 name = r[0] + " " + secondOrig self.sf.debug("Name of " + name + " has score: " + str(p)) if p > self.opts['algolimit']: # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug(f"Received event, {eventName}, from {srcModuleName}") # If the source event is web content, check if the source URL was javascript # or CSS, in which case optionally ignore it. if eventName == "TARGET_WEB_CONTENT": url = event.actualSource if url is not None: if self.opts['filterjscss'] and (".js" in url or ".css" in url): self.sf.debug("Ignoring web content from CSS/JS.") return None if eventName == "EMAILADDR" and self.opts['emailtoname']: if "." in eventData.split("@")[0]: if type(eventData) == str: name = " ".join( map(str.capitalize, eventData.split("@")[0].split("."))) else: name = " ".join( map(str.capitalize, eventData.split("@")[0].split("."))) name = str(name) # Names don't have numbers if re.match("[0-9]*", name): return None # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None # For RAW_RIR_DATA, there are only specific modules we # expect to see RELEVANT names within. if eventName == "RAW_RIR_DATA": if srcModuleName not in [ "sfp_arin", "sfp_builtwith", "sfp_clearbit", "sfp_fullcontact", "sfp_github", "sfp_hunter", "sfp_opencorporates", "sfp_slideshare", "sfp_twitter", "sfp_venmo", "sfp_instagram" ]: self.sf.debug("Ignoring RAW_RIR_DATA from untrusted module.") return None # Stage 1: Find things that look (very vaguely) like names rx = re.compile( r"([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)" ) m = re.findall(rx, eventData) for r in m: # Start off each match as 0 points. p = 0 notindict = False # Shouldn't encounter "Firstname's Secondname" first = r[0].lower() if first[len(first) - 2] == "'" or first[len(first) - 1] == "'": continue # Strip off trailing ' or 's secondOrig = r[1].replace("'s", "") secondOrig = secondOrig.rstrip("'") second = r[1].lower().replace("'s", "") second = second.rstrip("'") # If both words are not in the dictionary, add 75 points. if first not in self.d and second not in self.d: self.sf.debug( f"Both first and second names are not in the dictionary, so high chance of name: ({first}:{second})." ) p += 75 notindict = True else: self.sf.debug(first + " was found or " + second + " was found in dictionary.") # If the first word is a known popular first name, award 50 points. if first in self.n: p += 50 # If either word is 2 characters, subtract 50 points. if len(first) == 2 or len(second) == 2: p -= 50 # If the first word is in the dictionary but the second isn't, # subtract 40 points. if not notindict: if first in self.d and second not in self.d: p -= 20 # If the second word is in the dictionary but the first isn't, # reduce 20 points. if first not in self.d and second in self.d: p -= 40 name = r[0] + " " + secondOrig self.sf.debug("Name of " + name + " has score: " + str(p)) if p > self.opts['algolimit']: # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventName == "EMAILADDR" and self.opts['emailtoname']: if "." in eventData.split("@")[0]: if type(eventData) == unicode: name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split("."))) else: name = " ".join(map(str.capitalize, eventData.split("@")[0].split("."))) name = unicode(name, 'utf-8', errors='replace') # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None # Stage 1: Find things that look (very vaguely) like names rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)") m = re.findall(rx, eventData) for r in m: # Start off each match as 0 points. p = 0 notindict = False # Shouldn't encounter "Firstname's Secondname" first = r[0].lower() if first[len(first) - 2] == "'" or first[len(first) - 1] == "'": continue # Strip off trailing ' or 's secondOrig = r[1].replace("'s", "") secondOrig = secondOrig.rstrip("'") second = r[1].lower().replace("'s", "") second = second.rstrip("'") # If both words are not in the dictionary, add 75 points. if first not in self.d and second not in self.d: p += 75 notindict = True # If the first word is a known popular first name, award 50 points. if first in self.n: p += 50 # If either word is 2 characters, subtract 50 points. if len(first) == 2 or len(second) == 2: p -= 50 # If the first word is in the dictionary but the second isn't, # subtract 40 points. if not notindict: if first in self.d and second not in self.d: p -= 20 # If the second word is in the dictionary but the first isn't, # reduce 20 points. if first not in self.d and second in self.d: p -= 40 name = r[0] + " " + secondOrig if p > self.opts['algotune']: # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # The SIMILARDOMAIN and CO_HOSTED_SITE events supply domains, # not URLs. Assume HTTP. if eventName in ['SIMILARDOMAIN', 'CO_HOSTED_SITE']: eventData = 'http://' + eventData.lower() # We are only interested in external sites for the crossref if self.getTarget().matches(self.sf.urlFQDN(eventData)): self.sf.debug("Ignoring " + eventData + " as not external") return None if eventData in self.fetched: self.sf.debug("Ignoring " + eventData + " as already tested") return else: self.fetched[eventData] = True self.sf.debug("Testing for affiliation: " + eventData) res = self.sf.fetchUrl(eventData, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent'], sizeLimit=10000000) if res['content'] is None: self.sf.debug("Ignoring " + eventData + " as no data returned") return None matched = False for name in self.getTarget().getNames(): # Search for mentions of our host/domain in the external site's data pat = re.compile("([\.\'\/\"\ ]" + name + "[\.\'\/\"\ ])", re.IGNORECASE) matches = re.findall(pat, res['content']) if len(matches) > 0: matched = True url = eventData break if not matched: # If the name wasn't found in the affiliate, and checkbase is set, # fetch the base URL of the affiliate to check for a crossref. if eventName == "LINKED_URL_EXTERNAL" and self.opts['checkbase']: # Check the base url to see if there is an affiliation url = self.sf.urlBaseUrl(eventData) if url in self.fetched: return None else: self.fetched[url] = True res = self.sf.fetchUrl(url, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent'], sizeLimit=10000000) if res['content'] is not None: for name in self.getTarget().getNames(): pat = re.compile("([\.\'\/\"\ ]" + name + "[\'\/\"\ ])", re.IGNORECASE) matches = re.findall(pat, res['content']) if len(matches) > 0: matched = True if matched: if not event.moduleDataSource: event.moduleDataSource = "Unknown" self.sf.info("Found affiliate: " + url) evt1 = SpiderFootEvent("AFFILIATE_INTERNET_NAME", self.sf.urlFQDN(url), self.__name__, event) evt1.moduleDataSource = event.moduleDataSource self.notifyListeners(evt1) evt2 = SpiderFootEvent("AFFILIATE_WEB_CONTENT", res['content'], self.__name__, evt1) evt2.moduleDataSource = event.moduleDataSource self.notifyListeners(evt2)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # Various ways to identify companies in text # Support up to three word company names with each starting with # a capital letter, allowing for hyphens brackets and numbers within. pattern_prefix = "(?=[,;:\'\">\(= ]|^)\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.][^ \"\';:><]*)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*|[Aa]nd)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*)?\s+" pattern_match_re = [ 'LLC', 'L\.L\.C\.?', 'AG', 'A\.G\.?', 'GmbH', 'Pty\.?\s+Ltd\.?', 'Ltd\.?', 'Pte\.?', 'Inc\.?', 'INC\.?', 'Incorporated', 'Foundation', 'Corp\.?', 'Corporation', 'SA', 'S\.A\.?', 'SIA', 'BV', 'B\.V\.?', 'NV', 'N\.V\.?' 'PLC', 'Limited', 'Pvt\.?\s+Ltd\.?', 'SARL' ] pattern_match = [ 'LLC', 'L.L.C', 'AG', 'A.G', 'GmbH', 'Pty', 'Ltd', 'Pte', 'Inc', 'INC', 'Foundation', 'Corp', 'SA', 'S.A', 'SIA', 'BV', 'B.V', 'NV', 'N.V' 'PLC', 'Limited', 'Pvt.', 'SARL' ] pattern_suffix = "(?=[ \.,:<\)\'\"]|[$\n\r])" # Filter out anything from the company name which matches the below filterpatterns = [ "Copyright", "\d{4}" # To catch years ] # Don't re-parse company names if eventName in [ "COMPANY_NAME", "AFFILIATE_COMPANY_NAME" ]: return None if eventName == "TARGET_WEB_CONTENT": url = event.sourceEvent.data if self.opts['filterjscss'] and (".js" in url or ".css" in url): self.sf.debug("Ignoring web content from CSS/JS.") return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName + ": " + str(len(eventData)) + " bytes.") if type(eventData) not in [str, unicode]: try: if type(eventData) in [ list, dict ]: eventData = str(eventData) else: self.sf.debug("Unhandled type to find company names: " + \ str(type(eventData))) return None except BaseException as e: self.sf.debug("Unable to convert list/dict to string: " + str(e)) return None # Strip out everything before the O= try: if eventName == "SSL_CERTIFICATE_ISSUED": eventData = eventData.split("O=")[1] except BaseException as e: self.sf.debug("Couldn't strip out O=, proceeding anyway...") # Find chunks of text containing what might be a company name first. # This is to avoid running very expensive regexps on large chunks of # data. chunks = list() for pat in pattern_match: start = 0 m = eventData.find(pat, start) while m > 0: start = m - 50 if start < 0: start = 0 end = m + 10 if end >= len(eventData): end = len(eventData)-1 chunks.append(eventData[start:end]) offset = m + len(pat) m = eventData.find(pat, offset) myres = list() for chunk in chunks: for pat in pattern_match_re: matches = re.findall(pattern_prefix + "(" + pat + ")" + pattern_suffix, chunk, re.MULTILINE|re.DOTALL) for match in matches: matched = 0 for m in match: if len(m) > 0: matched += 1 if matched <= 1: continue fullcompany = "" for m in match: flt = False for f in filterpatterns: if re.match(f, m): flt = True if not flt: fullcompany += m + " " fullcompany = re.sub("\s+", " ", fullcompany.strip()) self.sf.info("Found company name: " + fullcompany) if fullcompany in myres: self.sf.debug("Already found from this source.") continue else: myres.append(fullcompany) if "AFFILIATE_" in eventName: etype = "AFFILIATE_COMPANY_NAME" else: etype = "COMPANY_NAME" evt = SpiderFootEvent(etype, fullcompany, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventName == "EMAILADDR" and self.opts['emailtoname']: if "." in eventData.split("@")[0]: if type(eventData) == unicode: name = " ".join(map(unicode.capitalize, eventData.split("@")[0].split("."))) else: name = " ".join(map(str.capitalize, eventData.split("@")[0].split("."))) name = unicode(name, 'utf-8', errors='replace') # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) self.notifyListeners(evt) return None # Stage 1: Find things that look (very vaguely) like names rx = re.compile("([A-Z][a-z�������������]+)\s+.?.?\s?([A-Z][�������������a-zA-Z\'\-]+)") m = re.findall(rx, eventData) for r in m: # Start off each match as 0 points. p = 0 notindict = False # Shouldn't encounter "Firstname's Secondname" first = r[0].lower() if first[len(first) - 2] == "'" or first[len(first) - 1] == "'": continue # Strip off trailing ' or 's secondOrig = r[1].replace("'s", "") secondOrig = secondOrig.rstrip("'") second = r[1].lower().replace("'s", "") second = second.rstrip("'") # If both words are not in the dictionary, add 75 points. if first not in self.d and second not in self.d: p += 75 notindict = True # If the first word is a known popular first name, award 50 points. if first in self.n: p += 50 # If either word is 2 characters, subtract 50 points. if len(first) == 2 or len(second) == 2: p -= 50 # If the first word is in the dictionary but the second isn't, # subtract 40 points. if not notindict: if first in self.d and second not in self.d: p -= 20 # If the second word is in the dictionary but the first isn't, # reduce 20 points. if first not in self.d and second in self.d: p -= 40 name = r[0] + " " + secondOrig if p > self.opts['algotune']: # Notify other modules of what you've found evt = SpiderFootEvent("HUMAN_NAME", name, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # Various ways to identify companies in text # Support up to three word company names with each starting with # a capital letter, allowing for hyphens brackets and numbers within. pattern_prefix = "(?=[,;:\'\">\(= ]|^)\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.][^ \"\';:><]*)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*|[Aa]nd)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,\.]?[^ \"\';:><]*)?\s+" pattern_match_re = [ 'LLC', 'L\.L\.C\.?', 'AG', 'A\.G\.?', 'GmbH', 'Pty\.?\s+Ltd\.?', 'Ltd\.?', 'Pte\.?', 'Inc\.?', 'INC\.?', 'Incorporated', 'Foundation', 'Corp\.?', 'Corporation', 'SA', 'S\.A\.?', 'SIA', 'BV', 'B\.V\.?', 'NV', 'N\.V\.?' 'PLC', 'Limited', 'Pvt\.?\s+Ltd\.?', 'SARL' ] pattern_match = [ 'LLC', 'L.L.C', 'AG', 'A.G', 'GmbH', 'Pty', 'Ltd', 'Pte', 'Inc', 'INC', 'Foundation', 'Corp', 'SA', 'S.A', 'SIA', 'BV', 'B.V', 'NV', 'N.V' 'PLC', 'Limited', 'Pvt.', 'SARL' ] pattern_suffix = "(?=[ \.,:<\)\'\"]|[$\n\r])" # Filter out anything from the company name which matches the below filterpatterns = [ "Copyright", "\d{4}" # To catch years ] # Don't re-parse company names if eventName in ["COMPANY_NAME", "AFFILIATE_COMPANY_NAME"]: return None if eventName == "TARGET_WEB_CONTENT": url = event.sourceEvent.data if self.opts['filterjscss'] and (".js" in url or ".css" in url): self.sf.debug("Ignoring web content from CSS/JS.") return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName + ": " + str(len(eventData)) + " bytes.") if type(eventData) not in [str, unicode]: try: if type(eventData) in [list, dict]: eventData = str(eventData) else: self.sf.debug("Unhandled type to find company names: " + \ str(type(eventData))) return None except BaseException as e: self.sf.debug("Unable to convert list/dict to string: " + str(e)) return None # Strip out everything before the O= try: if eventName == "SSL_CERTIFICATE_ISSUED": eventData = eventData.split("O=")[1] except BaseException as e: self.sf.debug("Couldn't strip out O=, proceeding anyway...") # Find chunks of text containing what might be a company name first. # This is to avoid running very expensive regexps on large chunks of # data. chunks = list() for pat in pattern_match: start = 0 m = eventData.find(pat, start) while m > 0: start = m - 50 if start < 0: start = 0 end = m + 10 if end >= len(eventData): end = len(eventData) - 1 chunks.append(eventData[start:end]) offset = m + len(pat) m = eventData.find(pat, offset) myres = list() for chunk in chunks: for pat in pattern_match_re: matches = re.findall( pattern_prefix + "(" + pat + ")" + pattern_suffix, chunk, re.MULTILINE | re.DOTALL) for match in matches: matched = 0 for m in match: if len(m) > 0: matched += 1 if matched <= 1: continue fullcompany = "" for m in match: flt = False for f in filterpatterns: if re.match(f, m): flt = True if not flt: fullcompany += m + " " fullcompany = re.sub("\s+", " ", fullcompany.strip()) self.sf.info("Found company name: " + fullcompany) if fullcompany in myres: self.sf.debug("Already found from this source.") continue else: myres.append(fullcompany) if "AFFILIATE_" in eventName: etype = "AFFILIATE_COMPANY_NAME" else: etype = "COMPANY_NAME" evt = SpiderFootEvent(etype, fullcompany, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)
def handleEvent(self, event): # The three most used fields in SpiderFootEvent are: # event.eventType - the event type, e.g. INTERNET_NAME, IP_ADDRESS, etc. # event.module - the name of the module that generated the event, e.g. sfp_dnsresolve # event.data - the actual data, e.g. 127.0.0.1. This can sometimes be megabytes in size (e.g. a PDF) eventName = event.eventType srcModuleName = event.module eventData = event.data # Once we are in this state, return immediately. if self.errorState: return None # Log this before complaining about a missing API key so we know the # event was received. self.sf.debug("Received event, %s, from %s" % (eventName, srcModuleName)) # Always check if the API key is set and complain if it isn't, then set # self.errorState to avoid this being a continual complaint during the scan. if self.opts['api_key'] == "": self.sf.error( "You enabled sfp_template but did not set an API key!", False) self.errorState = True return None # Don't look up stuff twice if eventData in self.results: self.sf.debug("Skipping " + eventData + " as already mapped.") return None else: # If eventData might be something large, set the key to a hash # of the value instead of the value, to avoid memory abuse. self.results[eventData] = True if eventName == 'NETBLOCK_OWNER': # Note here an example of handling the netblocklookup option if not self.opts['netblocklookup']: return None else: if IPNetwork(eventData).prefixlen < self.opts['maxnetblock']: self.sf.debug("Network size bigger than permitted: " + str(IPNetwork(eventData).prefixlen) + " > " + str(self.opts['maxnetblock'])) return None # When handling netblocks/subnets, assuming the user set # netblocklookup/subnetlookup to True, we need to expand it # to the IPs for looking up. if eventName.startswith("NETBLOCK_"): for ipaddr in IPNetwork(eventData): qrylist.append(str(ipaddr)) self.results[str(ipaddr)] = True else: qrylist.append(eventData) for addr in qrylist: # Perform the query to the third party; in this case for each IP # being queried. rec = self.query(addr) # Handle the response being empty/failing if rec is None: continue # For netblocks, we need to create the IP address event so that # the threat intel event is more meaningful and linked to the # IP address within the network, not the whole network. if eventName.startswith('NETBLOCK_'): # This is where the module generates an event for other modules # to process and is a fundamental part of the SpiderFoot architecture. # We are generating an event of type "IP_ADDRESS" here, the data being # the addr variable, the name of the module is the next argument # (self.__name__), and finally the event that is linked as the source # event of this event. This enables SpiderFoot to link events so users # can see what events generated other events, seeing a full chain of # discovery from their target to the data returned here. pevent = SpiderFootEvent("IP_ADDRESS", addr, self.__name__, event) # With the event created, we can now notify any other modules listening # for IP_ADDRESS events (which they define in their watchedEvents() # function). self.notifyListeners(pevent) else: # If the event received wasn't a netblock, then use that event # as the source event for later events. pevent = event # When querying a third party API, always ensure to generate # a RAW_RIR_DATA event. Note that here we are seeing the pevent # event as the source for this, since the IP address is actually # what was queried against the third party, not the netblock. # So now we have NETBLOCK_OWNER (event we received) -> IP_ADDRESS # (event we generated above) -> RAW_RIR_DATA (event from the third # party about the IP Address we queried). evt = SpiderFootEvent("RAW_RIR_DATA", str(rec), self.__name__, pevent) self.notifyListeners(evt) # Whenever operating in a loop, call this to check whether the user # requested the scan to be aborted. if self.checkForStop(): return None # In some cases, you want to override the data source for the event # you're producing to be the data source of the event that you've # received. This is needed, for example, when the module is purely # extracting data from a received event, so the data source is not # actually this module, but the data source of the received event # itself! sfp_email is a good example, since it is purely looking # for e-mail addresses in received content, so an EMAILADDR event # should have a data source of whatever place the EMAILADDR was # actually found in. This is how you'd achieve that: if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: # This should never happen, but just to be safe since other # code might depend on this field existing and not being None. evt.moduleDataSource = "Unknown" # Note that we are using rec.get('os') instead of rec['os'] - this # means we won't get an exception if the 'os' key doesn't exist. In # general, you should always use .get() instead of accessing keys # directly in case the key doesn't exist. os = rec.get('os') if os: evt = SpiderFootEvent("OPERATING_SYSTEM", f"{os} ({addr})", self.__name__, pevent) self.notifyListeners(evt)
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # Don't re-parse e-mail addresses if "EMAILADDR" in eventName: return None # Ignore any web content that isn't from the target. This avoids noise from # pastebin and other content where unrelated e-mails are likely to be found. if "_CONTENT" in eventName and eventName != "TARGET_WEB_CONTENT": return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if type(eventData) not in [str, unicode]: try: if type(eventData) in [list, dict]: eventData = str(eventData) else: self.sf.debug("Unhandled type to find e-mails: " + str(type(eventData))) return None except BaseException as e: self.sf.debug("Unable to convert list/dict to string: " + str(e)) return None pat = re.compile( "([\%a-zA-Z\.0-9_\-]+@[a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)") matches = re.findall(pat, eventData) myres = list() for match in matches: evttype = "EMAILADDR" if len(match) < 4: self.sf.debug("Likely invalid address: " + match) continue # Handle messed up encodings if "%" in match: self.sf.debug("Skipped address: " + match) continue # Get the domain and strip potential ending . mailDom = match.lower().split('@')[1].strip('.') if not self.getTarget().matches( mailDom) and not self.getTarget().matches(match): self.sf.debug("External domain, so possible affiliate e-mail") # Raw RIR data returning external e-mails generates way # too much noise. if eventName == "RAW_RIR_DATA": return None evttype = "AFFILIATE_EMAILADDR" self.sf.info("Found e-mail address: " + match) if type(match) == str: mail = unicode(match.strip('.'), 'utf-8', errors='replace') else: mail = match.strip('.') if mail in myres: self.sf.debug("Already found from this source.") continue else: myres.append(mail) evt = SpiderFootEvent(evttype, mail, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data # Various ways to identify companies in text # Support up to three word company names with each starting with # a capital letter, allowing for hyphens brackets and numbers within. pattern_prefix = "(?=[,;:\'\">\(= ]|^)\s?([A-Z0-9\(\)][A-Za-z0-9\-&,][^ \"\';:><]*)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,]?[^ \"\';:><]*|[Aa]nd)?\s?([A-Z0-9\(\)][A-Za-z0-9\-&,]?[^ \"\';:><]*)?\s+" pattern_match = [ 'LLC', 'L\.L\.C\.?', 'AG', 'A\.G\.?', 'GmbH', 'Pty\.?\s+Ltd\.?', 'Ltd\.?', 'Pte\.?', 'Inc\.?', 'INC\.?', 'Incorporated', 'Foundation', 'Corp\.?', 'Corporation', 'SA', 'S\.A\.?', 'SIA', 'BV', 'B\.V\.?', 'NV', 'N\.V\.?' 'PLC', 'Limited', 'Pvt\.?\s+Ltd\.?', 'SARL' ] pattern_suffix = "(?=[ \.,:<\)\'\"]|$)" # Filter out anything from the company name which matches the below filterpatterns = [ "Copyright", "\d{4}" # To catch years ] # Don't re-parse company names if eventName == "COMPANY_NAME": return None self.sf.debug("Received event, " + eventName + ", from " + srcModuleName + ": " + str(len(eventData)) + " bytes.") if type(eventData) not in [str, unicode]: try: if type(eventData) in [list, dict]: eventData = str(eventData) else: self.sf.debug("Unhandled type to find company names: " + \ str(type(eventData))) return None except BaseException as e: self.sf.debug("Unable to convert list/dict to string: " + str(e)) return None # Strip out everything before the O= try: if eventName == "SSL_CERTIFICATE_ISSUED": eventData = eventData.split("O=")[1] except BaseException as e: self.sf.debug("Couldn't strip out O=, proceeding anyway...") myres = list() for pat in pattern_match: matches = re.findall( pattern_prefix + "(" + pat + ")" + pattern_suffix, eventData, re.MULTILINE) for match in matches: matched = 0 for m in match: if len(m) > 0: matched += 1 if matched <= 1: continue fullcompany = "" for m in match: flt = False for f in filterpatterns: if re.match(f, m): flt = True if not flt: fullcompany += m + " " fullcompany = re.sub("\s+", " ", fullcompany.strip()) self.sf.info("Found company name: " + fullcompany) if fullcompany in myres: self.sf.debug("Already found from this source.") continue else: myres.append(fullcompany) evt = SpiderFootEvent("COMPANY_NAME", fullcompany, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt)