def main(): key = sys.argv[1] db = "../gsb_v4.db" platforms = ["WINDOWS"] sbl = SafeBrowsingList(key, db_path=db, platforms=platforms) #sbl.update_hash_prefix_cache() print(sbl.storage.get_threat_lists()) url = sys.argv[2] u = URL(url) print(u.url) print(u.canonical) for i in u.url_permutations(u.canonical): print(i) print(u.digest(i)) url_hashes = u.hashes print(url_hashes) full_hashes = list(url_hashes) print(full_hashes) cues = [to_hex(fh[0:4]) for fh in full_hashes] print(cues) print(sbl.storage.lookup_hash_prefix(cues)) bl = sbl.lookup_url(url) print(bl)
def main(): sbl = SafeBrowsingList(config.gsb_key, db_path=config.gsb_db_path) #result = sbl.lookup_url('http://www.amazon.esp.bravaidea.com/AWS/mobil/signin.php?https://www.amazon.com/gp/product/B00DBYBNEE/ref=nav_prime_try_btn') #print(result) dbo = db_operations.DBOperator() while True: slds = dbo.get_gsb_queryable_slds() urls = dbo.get_gsb_queryable_urls() domain_urls = slds+urls print(len(domain_urls)) query_time = datetime.now() print ("GSB Update time:", str(query_time)) run_sync(sbl) print ("Got updated GSB list. Now looking up %s domains: %s" % ( len(domain_urls), str(datetime.now()))) for d in domain_urls: print(d) try: result = sbl.lookup_url(d) print(result) result = "%s" % (result,) dbo.update_gsb_table(d, result, query_time) except Exception as e: print ("Exception. Skipping this domain: ", d, e) #print result print ("Done inserting into DB. Will update GSB list again", str(datetime.now())) time.sleep(3600)
def main(): total_attacks = 0 sbl = SafeBrowsingList(gsb_key, db_path=gsb_db_path) all_home_domains = set() counted_hashes = set() for name, labels in se_categories.iteritems(): camp_domains = set() camp_gsb_clusters = 0 camp_total_count = 0 home_domains_campaign_set = set() for label in labels: cluster_gsb = False cluster_domains = set() ad_objs = get_ad_objects(str(label)) # camp_total_count += len(ad_objs) for ad_obj in ad_objs: domain = extractor(ad_obj.ad_url).registered_domain e = extractor(ad_obj.ad_url) land_fqd = '.'.join(part for part in e if part) # home_domain = extractor(ad_obj.home_url).registered_domain e = extractor(seed_domain_data[ad_obj.log_id][0]) home_domain = '.'.join(part for part in e if part) # camp_domains.add(domain) if ad_obj.screenshot_hash in image_domain_dict: # if domain in all_land_tlds: if ad_obj.screenshot_hash not in counted_hashes: mal_ad_hashes.add(ad_obj.screenshot_hash) camp_total_count += image_hash_count[ad_obj.screenshot_hash] counted_hashes.add(ad_obj.screenshot_hash) # home_domains_campaign_set = home_domains_campaign_set.union(image_home_domain_dict[ad_obj.screenshot_hash]) home_domains_campaign_set = home_domains_campaign_set.union(image_seed_domain_dict[ad_obj.screenshot_hash]) camp_domains = camp_domains.union(image_domain_dict[ad_obj.screenshot_hash]) cluster_domains = cluster_domains.union(image_domain_dict[ad_obj.screenshot_hash]) # all_home_domains = all_home_domains.union(image_home_domain_dict[ad_obj.screenshot_hash]) all_home_domains = all_home_domains.union(image_seed_domain_dict[ad_obj.screenshot_hash]) else: print "!!Not here!!" camp_domains.add(land_fqd) cluster_domains.add(land_fqd) camp_total_count += 1 # import ipdb; ipdb.set_trace() all_home_domains.add(home_domain) for domain in cluster_domains: if not domain: continue # print "domain:", domain result = sbl.lookup_url(domain.strip()) if result: cluster_gsb = True if cluster_gsb: camp_gsb_clusters += 1 # print result print name, '\t&', camp_total_count, '\t&', len(camp_domains), '\t&', len(labels), '\t&', camp_gsb_clusters, '\\\\' print len(home_domains_campaign_set) total_attacks += camp_total_count print "# of mal ad hashes:", len(mal_ad_hashes) print "# of unique publisher domains associated with SE ads:", len(all_home_domains) print "# of total SE attacks:", total_attacks # get_category_stats(all_home_domains) get_popularity_stats(all_home_domains)
def update_hash_prefix_cache(): active = get_active() if active and active['ctime'] and active['mtime'] and min( active['ctime'], active['mtime']) >= (time.time() - (30 * 60)): # no need to update, active DB exists and is recent logger.info('active database is fresh') inactive = get_inactive() # remove inactivate database if it exists to free up disk space remove_inactive(inactive) else: # we need to update the inactive DB, so get its info and delete it inactive = get_inactive() remove_inactive(inactive) # download to temporary file name tmp_file = inactive['name'] + '.tmp' logger.info('downloading database to ' + tmp_file) sbl = SafeBrowsingList(gsb_api_key, tmp_file, True) sbl.update_hash_prefix_cache() logger.info("finished creating " + tmp_file) # rename to inactive file name if path.isfile(tmp_file + JOURNAL): rename(tmp_file + JOURNAL, inactive['name'] + JOURNAL) logger.info("renamed " + tmp_file + JOURNAL + ' to ' + inactive['name'] + JOURNAL) rename(tmp_file, inactive['name']) logger.info("renamed " + tmp_file + ' to ' + inactive['name'])
def app_lookup(url): # input validation if not isinstance(url, (str, unicode)): abort(400) # resolve entries active = get_active() if not active or not active['mtime']: abort(503) try: sbl = SafeBrowsingList(gsb_api_key, active['name'], True) resp = sbl.lookup_url(url) except: app.logger.exception("exception handling [" + url + "]") abort(500) else: if resp: matches = [{ 'threat': x.threat_type, 'platform': x.platform_type, 'threat_entry': x.threat_entry_type } for x in resp] return jsonify({'url': url, 'matches': matches}) else: abort(404)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.moderators = self.plugin_config['MODERATORS'] # Initialize Safe Browsing API if self.plugin_config['GOOGLE_SAFE_BROWSING']: self.sbl = SafeBrowsingList( self.plugin_config['GOOGLE_SAFE_BROWSING_API_KEY']) self.sbl.update_hash_prefix_cache() # Populate Blacklist from URLS for url in self.plugin_config['BLACKLISTS']: url = url.strip() if url.endswith('.json'): r = requests.get(url) # Assuming MEW List format for item in r.json(): self.blacklist.append(item['id']) elif url.endswidth('.csv'): print('csv not implemented') # TODO else: print('txt not implement') # TODO print(self.__class__.__name__, 'initialized')
def safebrowsingcheck(url): from gglsbl import SafeBrowsingList sbl = SafeBrowsingList('') if sbl.lookup_url(url) is None: return ':D Not in blacklist' else: return '@@ In the blacklist'
def update_hash_prefix_cache(): logger.info('opening database at ' + dbfile) sbl = SafeBrowsingList(gsb_api_key, dbfile, True) logger.info('updating database at ' + dbfile) sbl.update_hash_prefix_cache() logger.info('checkpointing database at ' + dbfile) with sbl.storage.get_cursor() as dbc: dbc.execute('PRAGMA wal_checkpoint(FULL)') sbl.storage.db.commit() logger.info("all done!")
def google_sb_check(url): import ConfigParser config_file = "conf/parser.cfg" config = ConfigParser.RawConfigParser() config.read(config_file) apikey = config.get('url', 'gsb_api_key') from gglsbl import SafeBrowsingList sbl = SafeBrowsingList(apikey) if sbl.lookup_url(url) is None: return 'URL is not in the blacklist.' else: return '@@ URL is in the blacklist.'
class SafeBrowsing(object): TYPE = "GoogleSBCheck" CP_FMT = '{scheme}://{netloc}/{path}' def __init__(self, name=None, api_key=None, db_path='/tmp/gsb_4.db', update_hash_prefix_cache=False): self.api_key = api_key self.db_path = db_path self.sbl = SafeBrowsingList(api_key, db_path=db_path) self.update_hash_prefix_cache = update_hash_prefix_cache try: os.stat(db_path) except: self.update_hash_prefix_cache = True if self.update_hash_prefix_cache: # this may take a while so be patient (over 1600MB of data) self.sbl.update_hash_prefix_cache() def is_blacklisted(self, url): return not SafeBrowsing.thread_safe_lookup(url) is None def lookup_url(self, url): up = urlparse(url) cp = self.CP_FMT.format(**{ 'scheme': up.scheme, 'netloc': up.netloc, 'path': up.path }).strip('/') + '/' return self.sbl.lookup_url(cp) def handle_domain(self, domain): return self.handle_domains([ domain, ]) def handle_domains(self, domains): results = {} for domain in domains: t = "https://" + domain u = "http://" + domain results[domain] = False if self.lookup_url(t) or self.lookup_url(u): results[domain] = True continue return results
def _lookup(url, api_key, retry=1): # perform lookup global sbl, last_api_key try: if api_key != last_api_key: app.logger.info('re-opening database') sbl = SafeBrowsingList(api_key, dbfile, True) last_api_key = api_key return sbl.lookup_url(url) except: app.logger.exception("exception handling [" + url + "]") if retry >= max_retries: sbl = None last_api_key = None abort(500) else: return _lookup(url, api_key, retry + 1)
def __init__(self, api_key, db_path=LINUX_DFT_PATH, update_hash_prefix_cache=False): global API_KEY, DB_PATH API_KEY = api_key DB_PATH = db_path self.sbl = SafeBrowsingList(api_key, db_path=db_path) self.update_hash_prefix_cache = update_hash_prefix_cache try: os.stat(db_path) except: self.update_hash_prefix_cache = True if self.update_hash_prefix_cache: # this may take a while so be patient (over 1600MB of data) self.sbl.update_hash_prefix_cache()
def __init__(self, name=None, api_key=None, db_path='/tmp/gsb_4.db', update_hash_prefix_cache=False): self.api_key = api_key self.db_path = db_path self.sbl = SafeBrowsingList(api_key, db_path=db_path) self.update_hash_prefix_cache = update_hash_prefix_cache try: os.stat(db_path) except: self.update_hash_prefix_cache = True if self.update_hash_prefix_cache: # this may take a while so be patient (over 1600MB of data) self.sbl.update_hash_prefix_cache()
class SafeBrowsing(object): def __init__(self, api_key, db_path=LINUX_DFT_PATH, update_hash_prefix_cache=False): global API_KEY, DB_PATH API_KEY = api_key DB_PATH = db_path self.sbl = SafeBrowsingList(api_key, db_path=db_path) self.update_hash_prefix_cache = update_hash_prefix_cache try: os.stat(db_path) except: self.update_hash_prefix_cache = True if self.update_hash_prefix_cache: # this may take a while so be patient (over 1600MB of data) self.sbl.update_hash_prefix_cache() def is_blacklisted(self, url): return not SafeBrowsing.thread_safe_lookup(url) is None def lookup_url(self, url): # cp_fmt = '{scheme}://{netloc}/{path}' # up = URLPARSE(url) # cp = cp_fmt.format(**{'scheme':up.scheme, 'netloc':up.netloc, 'path':up.path}).strip('/')+'/' return self.sbl.lookup_url(url) @classmethod def init(cls, api_key): return SafeBrowsing(api_key) @staticmethod def set_global(api_key, db_path='/tmp/gsb_4.db'): global SB_CHECK, API_KEY, DB_PATH API_KEY = api_key, db_path SB_CHECK = SafeBrowsing(api_key, db_path=db_path) @staticmethod def thread_safe_lookup(url): global SB_CHECK sbl = SafeBrowsing(API_KEY, db_path=DB_PATH) return sbl.lookup_url(url)
def main(): args_parser = setupArgsParser() args = args_parser.parse_args() setupLogger(args.log, args.debug) if args.check_url: sbl = SafeBrowsingList(args.api_key, db_path=args.db_path) bl = sbl.lookup_url(args.check_url) if bl is None: print('{} is not blacklisted'.format(args.check_url)) else: print('{} is blacklisted in {}'.format(args.check_url, bl)) sys.exit(0) if args.onetime: sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, discard_fair_use_policy=True) run_sync(sbl) else: sbl = SafeBrowsingList(args.api_key, db_path=args.db_path) while True: run_sync(sbl)
def gsb_init(): """ Returns: """ # noinspection PyBroadException try: return SafeBrowsingList(GSB_API_KEY, db_path=os.getcwd() + GSB_DB_NAME) except: return None
class Security: def __init__(self): self.sbl = SafeBrowsingList(GoogleConfig.SAFEBROWSINGAPIKEY) self.sbl.update_hash_prefix_cache() pass def validate_referer(self, url): threat_list = self.sbl.lookup_url(url) if threat_list is None: return None return threat_list def get_referer(self): referer = request.referrer if not referer: return None return referer @staticmethod def is_safe_url(url): ref_url = urlparse(request.host_url) test_url = urlparse(urljoin(request.host_url, url)) return test_url.scheme in ('http', 'https') and \ ref_url.netloc == test_url.netloc
def find_blacklisted_domains(urls): total_count=0 count=0 start_time = time.time() sbl = SafeBrowsingList('AIzaSyBHPCVVk-tbM0iC93uvulfEFTyBfmKecVA') #sbl = SafeBrowsingList('AIzaSyCj6PXcG8IuHW3cpVB5dZHVWHb2QnALWSU') for url in urls: threat_list = sbl.lookup_url(url) if threat_list: count+=1 with open(r'blacklist.txt', 'a') as f: f.write(url) total_count+=1 elapsed_time = time.time() - start_time print("Number of URLS:"+str(total_count)) print("Phishing URLS:"+str(count)) print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) fields=[str(total_count),str(count)] with open(r'results.csv', 'a') as f: writer = csv.writer(f) writer.writerow(fields)
def main(): args_parser = setupArgsParser() args = args_parser.parse_args() setupLogger(args.log, args.debug) storage_backend = None storage_config = None if args.mysql_db: storage_config = { 'user': args.mysql_user, 'password': args.mysql_password, 'host': args.mysql_host, 'database': args.mysql_db } storage_backend = SafeBrowsingList.STORAGE_BACKEND_MYSQL if args.check_url: sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, timeout=args.timeout, storage_backend=storage_backend, storage_config=storage_config) bl = sbl.lookup_url(args.check_url) if bl is None: print('{} is not blacklisted'.format(args.check_url)) else: print('{} is blacklisted in {}'.format(args.check_url, bl)) sys.exit(args.blacklisted_return_code) sys.exit(0) if args.onetime: sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, discard_fair_use_policy=True, timeout=args.timeout, storage_backend=storage_backend, storage_config=storage_config) run_sync(sbl) else: sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, timeout=args.timeout, storage_backend=storage_backend, storage_config=storage_config) run_sync(sbl)
def main(): args_parser = setupArgsParser() args = args_parser.parse_args() setupLogger(args.log, args.debug) if args.check_url: sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, timeout=args.timeout) bl = sbl.lookup_url(args.check_url) if bl is None: print('{} is not blacklisted'.format(args.check_url)) else: print('{} is blacklisted in {}'.format(args.check_url, bl)) sys.exit(0) if args.onetime: sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, discard_fair_use_policy=True, timeout=args.timeout) run_sync(sbl) else: sbl = SafeBrowsingList(args.api_key, db_path=args.db_path, timeout=args.timeout) while True: run_sync(sbl)
def worker(self, id, queue): with open(r'config\gglsbl.auth', 'r') as auth_file: gglsbl_key = auth_file.read().strip() sbl = SafeBrowsingList(gglsbl_key, db_path=r"dataset\google_safe_browisng_db") # sbl.update_hash_prefix_cache() turn = True while True: # Update Google SBL database every 12 hours at time X (e.g. 3 AM and 3 PM) hour = datetime.datetime.today().hour if hour % 12 == 3 and turn: sbl.update_hash_prefix_cache() turn = False elif hour % 12 != 3: turn = True today = get_date() with open(os.path.join('results', today + '.ioc.csv'), 'a+', encoding='utf_8') as output_file: tweet = queue.get() try: if hasattr(tweet, 'retweeted_status') and hasattr( tweet.retweeted_status, 'extended_tweet' ) and 'full_text' in tweet.retweeted_status.extended_tweet: text = tweet.retweeted_status.extended_tweet[ 'full_text'] elif hasattr(tweet, 'extended_tweet' ) and 'full_text' in tweet.extended_tweet: text = tweet.extended_tweet['full_text'] elif not hasattr(tweet, 'text'): text = tweet['text'] else: text = tweet.text if hasattr(tweet, 'retweeted_status'): if hasattr(tweet.retweeted_status, 'extended_tweet'): final_urls = tweet.retweeted_status.extended_tweet[ 'entities']['urls'] else: final_urls = tweet.retweeted_status.entities[ 'urls'] else: if hasattr(tweet, 'extended_tweet'): final_urls = tweet.extended_tweet['entities'][ 'urls'] else: final_urls = tweet.entities['urls'] for final_url in final_urls: # If a pastebin URL, get the raw content and append it to the tweet content if final_url['expanded_url'].startswith( 'https://pastebin.com/'): pastebin = final_url['expanded_url'] if 'raw' not in pastebin: pastebin = pastebin.replace( 'https://pastebin.com/', 'https://pastebin.com/raw/') req = requests.get(pastebin) text += '\n' + req.content user_type = 'top' if tweet.user.id_str in self.rand_users: user_type = 'rand' print( "###########################$$$$$$$$$$$$$$$$$$$$$$$$$$$" ) print(text) # classifier must be retrained with new data # vector = vectorize(text, self.wordlist) # vector.append(len(tweet.entities['hashtags'])) # vector.append(len(tweet.entities['user_mentions'])) # vector = numpy.array(vector).reshape(1, -1) # estimates = [] # for i in range(number_of_classifiers): # y_estimate = self.classifiers[i].predict(vector) # estimates.append(y_estimate) # vote = statistics.mode([x[0] for x in estimates]) # print("Prediction: "+vote) ips = list(iocextract.extract_ips(text, refang=True)) for ip in ips: if ip not in text: output_file.write('{},{},{},{},{},ip,{}\n'.format( tweet.id, tweet.created_at, user_type, tweet.user.id_str, tweet.user.screen_name, ip)) urls = list(iocextract.extract_urls(text, refang=True)) for url in urls: if url not in text: result = sbl.lookup_url(url.rstrip('.')) if result is not None: output_file.write( '{},{},{},{},{},url,{},{}\n'.format( tweet.id, tweet.created_at, user_type, tweet.user.id_str, tweet.user.screen_name, url.rstrip('.'), result)) else: output_file.write( '{},{},{},{},{},url,{},benign\n'.format( tweet.id, tweet.created_at, user_type, tweet.user.id_str, tweet.user.screen_name, url.rstrip('.'))) emails = list(iocextract.extract_emails(text, refang=True)) for email in emails: if email not in text: output_file.write( '{},{},{},{},{},email,{}\n'.format( tweet.id, tweet.created_at, user_type, tweet.user.id_str, tweet.user.screen_name, email)) hashes = list(iocextract.extract_hashes(text)) for hash in hashes: output_file.write('{},{},{},{},{},hash,{}\n'.format( tweet.id, tweet.created_at, user_type, tweet.user.id_str, tweet.user.screen_name, hash)) except Exception as exp: print(exp) queue.task_done()
def main(): # 引数の解釈の準備 p = argparse.ArgumentParser() p.add_argument("domain_name") p.add_argument('-g', '--http', action="store_true", help="Get http response by each candidate domains") p.add_argument('--safe_site', default="", help="Get google safe sites tool information. must be followed by api key ") p.add_argument('--virustotal', default="", help="Get VirusTotal tool information. must be followed by api key. VERY SLOW ") p.add_argument('--ip', action="store_true", help="Get IP address for each candidate domains") p.add_argument('--debug', action="store_true", help="For debug. It restlicts the length of domain list.") # `$ dscan google.com --genlist qr typo` などとして使う p.add_argument('--genlist', nargs='+', help="Specify using generators as list.") p.add_argument('--in_use', action="store_true", help="It shows only domains in use.") args = p.parse_args() # URL候補を取得 generator_dict = {} template_generator_names = ["qr", "suffix", "bit", "typo", "h**o", "combo"] generator_names = [] # 使うgeneratorが指定された場合 if not args.genlist is None: for generator_name in args.genlist: if generator_name in template_generator_names: generator_names.append(generator_name) else: print("error: \""+ generator_name +"\" is not generator name.", file=sys.stderr) else: generator_names = template_generator_names for generator_name in generator_names: print_progress("generating "+ generator_name +" ...") list_slice = "" if args.debug: # in debug mode, length of domain list is restricted list_slice = "[:1]" generator_dict[generator_name] = eval(generator_name +".near_urls(args.domain_name)" + list_slice) print_progress("generated: " + str(len(generator_dict[generator_name]))) print_progress("fetching domain info ...") # 辞書形式でドメインの情報らを持つ domains_dict = {} for generate_type_name, domain_list in generator_dict.items(): for domain_name in domain_list: if domain_name not in domains_dict: domains_dict[domain_name] = {} # 冗長だがあとでjsonに変換するときに必要 domains_dict[domain_name]["domain_name"] = domain_name if "generate_type" not in domains_dict[domain_name] : domains_dict[domain_name]["generate_type"] = [] domains_dict[domain_name]["generate_type"].append(generate_type_name) # ドメインに関する情報を調べ、記録していく for domain_name, domain_info_dict in tqdm( domains_dict.items() ): # httpレスポンス情報を付加する if args.http: http_status_code = 0 try: # 200番台のステータスコードを取得 http_status_code = urllib.request.urlopen("http://" + domain_name, timeout=0.5).status except urllib.error.HTTPError as e: # 200番台以外のステータスコードを取得 http_status_code = e.code # connection refusedなどになった場合。後でもっとうまく変えたほうがよいかも except urllib.error.URLError as e: http_status_code = -1 except socket.timeout: http_status_code = -1 except ConnectionResetError: http_status_code = -1 domain_info_dict["http_status_code"] = http_status_code # Google Safe Brawsingの情報を取得 if len(args.safe_site)>0: api_key_gsb = args.safe_site sbl = SafeBrowsingList(api_key_gsb) threat_list = sbl.lookup_url(domain_name) if threat_list == None: domain_info_dict["site_threat"] = [] else: domain_info_dict["site_threat"] = threat_list # VirusTotalの情報を取得 if len(args.virustotal)>0: api_key_vt = args.virustotal # TODO:関数とかに後でする interval_seconds_virustotal = 60/4 retry_max_time = 2 retry_sleep_seconds_virustotal = 1 for _ in range(retry_max_time): try: info_virustotal = fetch_pdns_domain_info(domain_name, api_key_vt) except: # virustotalがrate limitなどなどで取得に失敗した場合はすこし待つ time.sleep(retry_sleep_seconds_virustotal) else: try: domain_info_dict["virus_total"] = info_virustotal["Webutation domain info"] except KeyError: domain_info_dict["virus_total"] = {} # virustotalのrate limitにかからないように60/4 = 15秒ほど寝る # 制限は1分間に4クエリなのだから、1クエリにつき15秒まつのではなく、4クエリ投げたら1分待つ方が正当だが面倒なのでこうした time.sleep(interval_seconds_virustotal) break if args.ip: try: # 生成したドメインの IP アドレスを取得 ip = socket.gethostbyname(domain_name) except socket.gaierror: ip = '' finally: domain_info_dict["ip"] = ip # 追加例: # geoip情報を付加する # if args.geoip: # domain_info_dict["geoip"] = country if args.in_use: domains_dict = domain_filter_only_in_use(domains_dict) print_list = [] for domain_info_dict in domains_dict.values(): print_list.append(domain_info_dict) print(json.dumps(print_list, indent=4, separators=(',', ': ')) )
def updateCache(): sbl = SafeBrowsingList(GOOGLE_SAFEBROWSE_API_KEY , db_path="/opt/crawler/gsb_v3.db") sbl.update_hash_prefix_cache()
def testLongURL(long_url): sbl = SafeBrowsingList(GOOGLE_SAFEBROWSE_API_KEY, db_path="/opt/crawler/gsb_v3.db") return sbl.lookup_url(long_url)
# AIzaSyDWcNFNEsDWagvfgjZGHw0Y9LNvtgz3LhE Throwaway key (doesn't matter) from gglsbl import SafeBrowsingList sbl = SafeBrowsingList('AIzaSyDWcNFNEsDWagvfgjZGHw0Y9LNvtgz3LhE') threat_list = sbl.lookup_url('http://github.com/') if threat_list == None: print('None.') else: print('Threats: ' + str(threat_list))
logfile.write(str(getTS()) + "\n" + text + "\n") # begin main while loop while True: try: con = mdb.connect(config['mysql']['host'], config['mysql']['username'], config['mysql']['password'], config['mysql']['database_5'], charset='utf8', cursorclass=mdb.cursors.SSCursor) #cursor = con.cursor() con.autocommit(True) sbl = SafeBrowsingList(config['gsb-api']['key']) # start timer mainStart = time.time() num_urls_to_import = 3000000 # count total number of twitter urls + print #print "Counting total number of Twitter URLs in db..." #num_twitter_urls = countTwitterURLs() #num_twitter_urls = 2000000 #print "Twitter URLs: {:,}".format(num_twitter_urls) total_phishing = 0 total_malware = 0 # import twitter urls
def scrapper(url): url = url.strip() url = "https://www.stuffnice.com/" url5 = url if "www" in url: url = url.replace("www.", "") print(url) else: pass headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36', "Upgrade-Insecure-Requests": "1", "DNT": "1", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate" } final_report = [] final_score = 0 from .result_dict import result_dict domain = tldextract.extract(url).domain suffix = tldextract.extract(url).suffix subdomain = tldextract.extract(url).subdomain pattern = '<a [^>]*href=[\'|"](.*?)[\'"].*?>' # row 15 HTTPS test result = {'name': 'https_test', 'message': '', 'marks': ''} if "https" in url or "http" in url: print("if worked") a = url.split(":") a[0] = "https:" web = "".join(a) print("This is web ", web) try: print("try of if worked") r = requests.get(web, headers=headers) url = web result[ 'message'] = 'Félicitations. Votre site les données transitants par votre site sont sécurisées avec un certificat SSL' result['marks'] = 4 except: a = url.split(":") a[0] = "http:" url3 = "".join(a) print("try of except worked") r = requests.get(url3, headers=headers, verify=False) url = url3 # req = urllib.request.Request(url, headers=headers) # r = urllib.request.urlopen(req) result['message'] = ''' Votre site ne dispose pas de certificat SSL. Les données qui y transitent peuvent donc être récupérés par des parties malveillantes. Google donne une grande importance à la sécurité des visiteurs. ''' result['marks'] = 0 print("HTTPS didn't worked") else: print("else worked") try: url2 = 'https://' + url r = requests.get(url2, headers=headers) url = url2 # req = urllib.request.Request(url, headers=headers) # r = urllib.request.urlopen(req) result[ 'message'] = 'Félicitations. Votre site les données transitants par votre site sont sécurisées avec un certificat SSL' result['marks'] = 4 except: url1 = 'http://' + url print("from else except ", url1) r = requests.get(url1, headers=headers, verify=False) url = url1 # req = urllib.request.Request(url, headers=headers) # r = urllib.request.urlopen(req) result['message'] = ''' Votre site ne dispose pas de certificat SSL. Les données qui y transitent peuvent donc être récupérés par des parties malveillantes. Google donne une grande importance à la sécurité des visiteurs. ''' result['marks'] = 0 print(result) result_dict['https_test'] = result final_score = final_score + result['marks'] soup = BeautifulSoup(r.text, "lxml") # This is for row 1 (title) try: title_content = soup.find('title').text title_ln = len(title_content) if title_ln < 70: result = { 'name': 'title', 'message': 'Félicitations votre site dispose d’un titre avec un nombre de caractères optimale soit moins de 70 caractères', 'title_length': title_ln, 'title_content': title_content, 'marks': 5 } final_score = final_score + 5 result_dict['title'] = result elif title_ln > 70: result = { 'name': 'title', 'message': 'Votre titre est trop long, le nombre de caractères optimal est de 70 caractères, essayez de le raccourcir', 'title_length': title_ln, 'title_content': title_content, 'marks': 2 } final_score = final_score + 2 result_dict['title'] = result except: result = { 'name': 'title', 'message': 'Votre site ne dispose pas de balise meta title. La balise <title> correspond au titre de votre page web. Il s’agit d’un champ essentiel à ne pas négliger dans le cadre d’une bonne stratégie d’optimisation du référencement naturel puisqu’elle est l’un des critères les plus importants pour les moteurs de recherche (Google, Bing...)', 'title_length': 0, 'marks': 0 } final_score = final_score + 0 result_dict['title'] = result # This is for row 2 (meta @description) name = 'meta_description' length_var_name = 'meta_desc_len' try: meta_tag = soup.find("meta", {"name": "description"}) desc_content = meta_tag['content'] #desc_text_ln = len(desc_content) desc_text_ln = int(desc_text_ln) if desc_text_ln < 150: result = { 'name': name, 'message': 'Votre méta-description est trop courte, le nombre de caractère optimale doit être entre 150 et 250 caractères.', length_var_name: desc_text_ln, 'desc_content': desc_content, 'marks': 1 } final_score = final_score + result['marks'] result_dict['meta_description'] = result print('try worked1') elif desc_text_ln > 150 and desc_text_ln < 250: result = { 'name': name, 'message': 'Félicitations votre site dispose d’une méta-description avec un nombre de caractère optimal entre 150 et 250 caractères', length_var_name: desc_text_ln, 'desc_content': desc_content, 'marks': 5 } final_score = final_score + result['marks'] result_dict['meta_description'] = result print('try worked2') elif desc_text_ln > 250: result = { 'name': name, 'message': ' Votre méta-description est trop longue, essayez de la raccourcir, le nombre optimal est entre 150 et 250 caractères, le reste risque d’être tronqué sur l’affichage du résultat sur les moteurs de recherche.', length_var_name: desc_text_ln, 'desc_content': desc_content, 'marks': 2 } final_score = final_score + result['marks'] result_dict['meta_description'] = result print('try worked3') except: result1 = { 'name': name, 'message': 'Votre site ne dispose pas de méta-description, La balise meta description manque sur votre page. Vous devez inclure cette balise afin de fournir une brève description de votre page pouvant être utilisée par les moteurs de recherche. Des méta-descriptions bien écrites et attrayantes peuvent également aider les taux de clics sur votre site dans les résultats de moteur de recherche.', length_var_name: 0, 'marks': 0 } final_score = final_score + result1['marks'] result_dict['meta_description'] = result1 print('except worked') # This is for row 3 (meta @keywords) name = 'meta_keywords' length_var_name = 'meta_key_len' try: meta_tag = soup.find("meta", {"name": "keywords"}) meta_key_content_ln = len(meta_tag['content']) # title_ln = int(meta_key_content_ln) if meta_key_content_ln: result = { 'name': name, 'message': 'Bravo vous avez spécifié des meta keywords . Vos mots-clés principaux doivent apparaître dans vos méta-tags pour vous aider à identifier le sujet de votre page Web dans les moteurs de recherche.', length_var_name: meta_key_content_ln, 'marks': 1 } final_score = final_score + result['marks'] result_dict['meta_keywords'] = result print('try worked1') except: result = { 'name': name, 'message': 'Vos mots-clés principaux doivent apparaître dans vos méta-tags pour vous aider à identifier le sujet de votre page Web dans les moteurs de recherche.', length_var_name: 0, 'marks': 0 } final_score = final_score + result['marks'] result_dict['meta_keywords'] = result print('except worked') # This is for row 4 (meta @robots) name = 'meta_robots' length_var_name = 'meta_robots_len' try: meta_tag = soup.find("meta", {"name": "robots"}) meta_robots_content = len(meta_tag['content']) # title_ln = int(desc_text_ln) if meta_robots_content: result = { 'name': name, 'message': "Votre site dispose d'un fichier robots.txt", length_var_name: meta_robots_content, 'marks': 4 } final_score = final_score + result['marks'] result_dict['meta_robots'] = result print('try worked1') except: result1 = { 'name': name, 'message': ''' Votre site n’a pas de robot.txt Le robots.txt est un fichier texte utilisant un format précis qui permet à un Webmaster de contrôler quelles zones de son site un robot d'indexation est autorisé à analyser. Ce fichier texte sera disponible à une URL bien précise pour un site donné, par exemple http://www.monsite.com/robots.txt Pour bien comprendre à quoi sert un robots.txt, il faut comprendre la manière dont fonctionnent les robots d'indexation des moteurs de recherche (appelés aussi Web spiders, Web crawlers ou Bots) tels que Google, Yahoo ou Bing. Voici leurs actions lorsqu'ils analysent un site tel que www.monsite.com : ils commencent par télécharger et analyser le fichier http://www.monsite.com/robots.txt. ''', length_var_name: 0, 'marks': 0 } final_score = final_score + result1['marks'] result_dict['meta_robots'] = result1 print('except worked') # This is for row 5 (html lang) name = 'html_lang' length_var_name = 'html_lang' try: meta_tag = soup.find("html", {"lang": True}) lang_text = meta_tag['lang'] result = { 'name': name, 'message': "Félicitations. Vous avez spécifié une langue à votre page.", length_var_name: lang_text, 'marks': 3 } final_score = final_score + result['marks'] result_dict['html_lang'] = result print('try worked1') except: result1 = { 'name': name, 'message': ''' Vous devriez spécifier une langue pour votre site, les moteurs de recherches ne comprennent pas quand un site dispose de plusieurs langues par exemple ayant des mots techniques en anglais et un contenu texte en français. Il faut donc bien spécifier la langue. ''', length_var_name: 0, 'marks': 0 } final_score = final_score + result1['marks'] result_dict['html_lang'] = result1 print('except worked') # This is for row 6 (sitemap) url = url.strip() sitemap_url = url + '/sitemap.xml' print("Sitemap url ", sitemap_url) try: code = requests.get(sitemap_url, headers=headers).status_code name = 'sitemap' if code == 200: result = { 'name': name, 'message': "Félicitations, votre site dispose d’un fichier sitemap", 'marks': 2 } final_score = final_score + result['marks'] result_dict['sitemap'] = result else: result = { 'name': name, 'message': "Votre site Web ne dispose pas d'un fichier sitemap. Les sitemaps peuvent aider les robots à indexer votre contenu de manière plus complète et plus rapide. ", 'marks': 0 } final_score = final_score + result['marks'] result_dict['sitemap'] = result except: result = { 'name': name, 'message': "Votre site Web ne dispose pas d'un fichier sitemap. Les sitemaps peuvent aider les robots à indexer votre contenu de manière plus complète et plus rapide. ", 'marks': 0 } final_score = final_score + result['marks'] result_dict['sitemap'] = result # This is for row 7 (google Analytics) searched_word = 'google-analytics' name = 'google_analytics' if searched_word in str(soup): print("Google analytics found") result = { 'name': name, 'message': "Félicitations, votre site dispose de l'outil Google Analytics", 'marks': 2 } final_score = final_score + result['marks'] result_dict['google_analytics'] = result else: result = { 'name': name, 'message': "Votre site ne dispose pas de l'outil Google Analytics.", 'marks': 0 } final_score = final_score + result['marks'] result_dict['google_analytics'] = result # This is for row 8 (page_cache) name = 'page_cache' length_var_name = 'page_cache_desc' try: meta_tag = soup.find("meta", {"http-equiv": "Cache-control"}) lang_text = meta_tag['content'] result = { 'name': name, 'message': "Vous avez activé le cache sur votre page, c'est très bien.", length_var_name: lang_text, 'marks': 3 } final_score = final_score + result['marks'] result_dict['page_cache'] = result print('try worked1') except: result1 = { 'name': name, 'message': "Vous n'avez pas activé la mise en cache sur vos pages. La mise en cache permet un chargement plus rapide des pages.", length_var_name: 0, 'marks': 0 } final_score = final_score + result1['marks'] result_dict['page_cache'] = result1 print('except worked') # API_KEY = AIzaSyD_RLUOcTN1JAq8PL8zJ79X6-kmHIDy_uM # This is for row 9 (Google safe browsing api) api_key = 'AIzaSyCVylpWnsOwzUoeTGg7akZRod-4YbhXoPU' sbl = SafeBrowsingList(api_key) bl = sbl.lookup_url(url) name = 'google_safe_browsing' print("google_safe_browsing ", url) if bl is None: print("Website is safe") result = { 'name': name, 'message': "Votre site est considéré comme sécurisé.", 'marks': 2 } final_score = final_score + result['marks'] result_dict['google_safe_browsing'] = result else: result = { 'name': name, 'message': "Votre site n'est pas considéré comme sécurisé. Google et les autres moteurs de recherche prennent en compte le niveau de sécurité de votre site pour garantir la sécurité des visiteurs.", 'marks': 0, 'threats': bl } final_score = final_score + result['marks'] result_dict['google_safe_browsing'] = result # This is for row 10 (responsive website test) #name = 'responsive_test' #length_var_name = 'responsive_test_desc' try: meta_tag = soup.find("meta", {"name": "viewport"}) lang_text = meta_tag['content'] result = { 'name': name, 'message': "Félicitations. Votre site est responsive.", length_var_name: lang_text, 'marks': 4 } final_score = final_score + result['marks'] result_dict['responsive_test'] = result print('try worked1') except: result1 = { 'name': name, 'message': ''' Nous n'avons pas détécté que votre site internet était responsive, soit adapté au mobile. Google prend énormément en compte ce critère pour un bon référencement. ''', length_var_name: 0, 'marks': 0 } final_score = final_score + result1['marks'] result_dict['responsive_test'] = result1 print('except worked')
def main(): # 引数の解釈の準備 p = argparse.ArgumentParser() p.add_argument("domain_name") p.add_argument('-g', '--http', action="store_true", help="Get http response by each candidate domains") p.add_argument( '--safe_site', default="", help= "Get google safe sites tool information. must be followed by api key ") p.add_argument( '--virustotal', default="", help= "Get google safe sites tool information. must be followed by api key. VERY SLOW " ) args = p.parse_args() # URL候補を取得 generator_dict = {} # TODO: 練習用にリストの長さを制限しているが、本番のときは制限をなくす # TODO: 同じような内容が反復されているので上手くまとめる(呼ぶ関数が違うので単純にforにはしにくいけど) print_progress("generating qr ...") generator_dict["qr"] = qr.near_urls(args.domain_name)[:1] print_progress("generated: " + str(len(generator_dict["qr"]))) print_progress("generating suffix ...") generator_dict["suffix"] = suffix.generate_domain(args.domain_name)[:1] print_progress("generated: " + str(len(generator_dict["suffix"]))) print_progress("generating bit ...") generator_dict["bit"] = bit.near_urls(args.domain_name)[:1] print_progress("generated: " + str(len(generator_dict["bit"]))) print_progress("generating typo ...") generator_dict["typo"] = typo.near_urls(args.domain_name)[:1] print_progress("generated: " + str(len(generator_dict["typo"]))) #domains_dict["h**o"] = h**o.near_urls(domain) #domains_dict["combo"] = combo.near_urls(domain) print_progress("fetching domain info ...") # 辞書形式でドメインの情報らを持つ domains_dict = {} for generate_type_name, domain_list in generator_dict.items(): for domain_name in domain_list: if domain_name not in domains_dict: domains_dict[domain_name] = {} # 冗長だがあとでjsonに変換するときに必要 domains_dict[domain_name]["domain_name"] = domain_name if "generate_type" not in domains_dict[domain_name]: domains_dict[domain_name]["generate_type"] = [] domains_dict[domain_name]["generate_type"].append( generate_type_name) # ドメインに関する情報を調べ、記録していく for domain_name, domain_info_dict in tqdm(domains_dict.items()): # httpレスポンス情報を付加する if args.http: # TODO: httpステータスコードの取得をもっとマシなものにする # https://stackoverflow.com/questions/1726402/in-python-how-do-i-use-urllib-to-see-if-a-website-is-404-or-200 http_status_code = 0 try: urllib.request.urlopen("http://" + domain_name, timeout=0.5) except urllib.error.HTTPError as e: http_status_code = e.code # connection refusedなどになった場合。後でもっとうまく変えたほうがよいかも except urllib.error.URLError as e: http_status_code = -1 except socket.timeout: http_status_code = -1 except ConnectionResetError: http_status_code = -1 else: # エラーにならないのは本当に200だけか...?301とかもあるかもしれないがとりあえず200 http_status_code = 200 domain_info_dict["http_status_code"] = http_status_code # Google Safe Brawsingの情報を取得 if len(args.safe_site) > 0: api_key_gsb = args.safe_site sbl = SafeBrowsingList(api_key_gsb) threat_list = sbl.lookup_url(domain_name) if threat_list == None: domain_info_dict["site_threat"] = [] else: domain_info_dict["site_threat"] = threat_list # VirusTotalの情報を取得 if len(args.virustotal) > 0: api_key_vt = args.virustotal # TODO:関数とかに後でする interval_seconds_virustotal = 60 / 4 retry_max_time = 2 retry_sleep_seconds_virustotal = 1 for _ in range(retry_max_time): try: info_virustotal = fetch_pdns_domain_info( domain_name, api_key_vt) except: # virustotalがrate limitなどなどで取得に失敗した場合はすこし待つ time.sleep(retry_sleep_seconds_virustotal) else: domain_info_dict["virus_total"] = info_virustotal[ "Webutation domain info"] # virustotalのrate limitにかからないように60/4 = 15秒ほど寝る # 制限は1分間に4クエリなのだから、1クエリにつき15秒まつのではなく、4クエリ投げたら1分待つ方が正当だが面倒なのでこうした time.sleep(interval_seconds_virustotal) break # 追加例: # geoip情報を付加する # if args.geoip: # domain_info_dict["geoip"] = country print_list = [] for domain_info_dict in domains_dict.values(): print_list.append(domain_info_dict) print(json.dumps(print_list, indent=4, separators=(',', ': ')))
length_var_name: 0, 'marks': 0 } final_score = final_score + result1['marks'] final_report.append(result1) print('except worked') # In[63]: # API_KEY = AIzaSyD_RLUOcTN1JAq8PL8zJ79X6-kmHIDy_uM # This is for row 9 (Google safe browsing api) from gglsbl import SafeBrowsingList api_key = 'AIzaSyCVylpWnsOwzUoeTGg7akZRod-4YbhXoPU' sbl = SafeBrowsingList(api_key) bl = sbl.lookup_url(url) name = 'google_safe_browsing' if bl is None: print("Website is safe") result = { 'name': name, 'message': "Votre site est considéré comme sécurisé.", 'marks': 2 } final_score = final_score + result['marks'] final_report.append(result) else: result = {
except: print("You need to 'pip install -r requirements.txt'") sys.exit(1) safebrowsing_token = 'AIzaSyBKlevd7lUJpEq0XGnvaojrmS9OJqWY6YA' isc_url = 'https://isc.sans.edu/feeds/suspiciousdomains_Low.txt' topmillion_url = 'http://s3.amazonaws.com/alexa-static/top-1m.csv.zip' safebrowsing_db = os.environ['HOME'] + '/Downloads/safebrowsing.db' suspect_file = os.environ['HOME'] + '/Downloads/suspiciousdomains_Low.txt' topthousand_file = os.environ['HOME'] + '/Downloads/alexa_1000.csv' safebrowsing_bootstrap = not os.path.exists(safebrowsing_db) or (os.path.getsize(safebrowsing_db) < (1024*1024)) # Be sure to occasionally run sbl.update_hash_prefix_cache() sbl = SafeBrowsingList(safebrowsing_token, db_path=safebrowsing_db) ISC_LIST=[] ALEXA_LIST=[] def loadLists(writer=sys.stdout): if isStale(suspect_file): print >> writer, "Updating ISC Suspicious Domains..." new_file = requests.get(isc_url) with open(suspect_file, 'w') as sf_buffer: sf_buffer.write(new_file.content) if safebrowsing_bootstrap: print("Initial download of SafeBrowsing DB... this will take a few minutes.") updateSafebrowsing() elif isStale(safebrowsing_db, maxTime=259200):
def lookup_db(): from gglsbl import SafeBrowsingList from gglsbl.protocol import URL from gglsbl.utils import to_hex res = {} rdict = { 'status':'', 'message':'', } key = '' db = '../gsb_v4.db' platforms = ['WINDOWS'] if os.path.isfile(config): cp = ConfigParser() cp.read(config) if 'api' in cp: if 'key' in cp['api']: key = cp['api']['key'] if 'database' in cp: if 'localdb' in cp['database']: db = cp['database']['localdb'] if not key: logging.error('API key not found.') rdict['status'] = 500 rdict['message'] = 'Internal Server Error' url = '' update = False if request.method == 'GET': url = request.args.get('url') update = request.args.get('update') if not url: rdict['status'] = 400 rdict['message'] = "The parameter 'url' is missing" if not rdict['status']: sbl = SafeBrowsingList(key, db_path=db, platforms=platforms) logging.debug(sbl.storage.get_threat_lists()) #if update: # sbl.update_hash_prefix_cache() u = URL(url) #res['url'] = { res = { 'query': u.url, 'canonical': u.canonical, 'permutations': [], } for i in u.url_permutations(u.canonical): p = { 'pattern': i, 'sha256': to_hex(u.digest(i)) } #res['url']['permutations'].append(p) res['permutations'].append(p) url_hashes = u.hashes full_hashes = list(url_hashes) cues = [to_hex(fh[0:4]) for fh in full_hashes] #res['cues'] = cues res['results'] = [] matched = sbl.storage.lookup_hash_prefix(cues) for m in matched: prefix = to_hex(m[1]) for p in res['permutations']: if re.match(prefix, p['sha256']): result = { 'pattern': p['pattern'], #'prefix': to_hex(m[1]), 'prefix': prefix, 'matched': str(m[0]), } res['results'].append(result) #bl = sbl.lookup_url(url) #res['matched'] = bl logging.info(res) res = jsonify(res) if not res: if not rdict["status"]: rdict["status"] = 400 rdict["message"] = "Invalid request." res = jsonify(rdict) res.status_code = rdict["status"] return res
with open('config.json') as data_file: config = json.load(data_file) # begin main while loop while True: mainStart = time.time() #update GSB dataset start = time.time() print "Updating local GSB dataset..." print datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") try: sbl = SafeBrowsingList(config['gsb-api']['key']) con = mdb.connect(config['mysql']['host'], config['mysql']['username'], config['mysql']['password'], config['mysql']['database_5'], charset='utf8') con.autocommit(True) cur = con.cursor() sbl.update_hash_prefix_cache() #hash_prefixes = sbl.get_all_hash_prefixes() #from my modified version gglsbl_db = "/tmp/gsb_v4.db" sql_db = sqlite3.connect(gglsbl_db) cursor = sql_db.cursor() cursor.execute('''SELECT HEX(value) from hash_prefix''') #get all hash prefixes #cursor.execute('''SELECT value from full_hash''') #get all full hashes all_rows = cursor.fetchall()
class URLMonitor(Plugin): blacklist = [] moderators = [] sbl = None def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.moderators = self.plugin_config['MODERATORS'] # Initialize Safe Browsing API if self.plugin_config['GOOGLE_SAFE_BROWSING']: self.sbl = SafeBrowsingList( self.plugin_config['GOOGLE_SAFE_BROWSING_API_KEY']) self.sbl.update_hash_prefix_cache() # Populate Blacklist from URLS for url in self.plugin_config['BLACKLISTS']: url = url.strip() if url.endswith('.json'): r = requests.get(url) # Assuming MEW List format for item in r.json(): self.blacklist.append(item['id']) elif url.endswidth('.csv'): print('csv not implemented') # TODO else: print('txt not implement') # TODO print(self.__class__.__name__, 'initialized') def process_message(self, data): # print(data) # Private (Groups) or Public Channels if chan.startswith('C') or chan.startswith('G'): chan = data['channel'] text = data['text'] # Find all URLS in message text, extract host and compare against blacklist and Google Safebrowsing urls = re.findall( 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', text) def alert(url): # TODO flag user # TODO early warning system self.slack_client.api_call( 'chat.postMessage', channel=self.plugin_config['MODERATE_CHAN'], ' '.join(self.moderators) + ' ' + text) # TODO can probably use outputs for this if len(self.plugin_config.WARNING_MESSAGE): self.outputs.append( [data['channel'], self.plugin_config.WARNING_MESSAGE]) for u in urls: o = urlparse(u) host = re.split(":\d{,4}", o.netloc)[0] # Check Blacklist if host in self.blacklist: alert(u) break # Check Google Safebrowsing elif sbl.lookup_url(u): alert(u) break