def process_scan_certs(q, es): """ :param q: The Queue object that certs should be pulled off of :param es: An Elasticsearch connection. This way each worker has its own connection and you don't have to share it across multiple workers/processes :return: """ bulk_certs = [] while True: certs = q.get() if certs == "DONE": bulk(es, bulk_certs) return True newcert = process_cert(certs['certs']) if newcert: if 'time' in certs: newcert['import_date'] = certs['time'] newcert['source'] = 'sonar' newcert_action = { "_index": "passive-ssl-certs-sonar", "_type": "cert", '_id': newcert['hash_id'], '_source': newcert } bulk_certs.append(newcert_action) if len(bulk_certs) == 500: bulk(es, bulk_certs) bulk_certs = []
def process_scan_certs(q, es, port): """ :param q: The Queue object that certs should be pulled off of :param es: An Elasticsearch connection. This way each worker has its own connection and you don't have to share it across multiple workers/processes :param port: the port associated with the ssl scan that was done (25, 465, 993, 143 etc) :return: """ bulk_certs = [] while True: certs = q.get() if certs == "DONE": bulk(es, bulk_certs) return True newcert = process_cert(certs['certs']) if newcert: newcert['import_date'] = certs['time'] newcert['source'] = 'sonar' newcert['port'] = port cert_hash = hashlib.sha1(newcert['hash_id'] + str(port) + newcert['source']) cert_hash = cert_hash.hexdigest() newcert_action = { "_index": "passive-ssl-non443-certs-sonar", "_type": "cert", '_id': cert_hash, '_source': newcert } bulk_certs.append(newcert_action) if len(bulk_certs) == 500: bulk(es, bulk_certs) bulk_certs = []
def process_scan_certs(q, es, port): """ :param q: The Queue object that certs should be pulled off of :param es: An Elasticsearch connection. This way each worker has its own connection and you don't have to share it across multiple workers/processes :param port: the port associated with the ssl scan that was done (25, 465, 993, 143 etc) :return: """ bulk_certs = [] while True: certs = q.get() if certs == "DONE": bulk(es, bulk_certs) return True newcert = process_cert(certs['certs']) if newcert: newcert['import_date'] = certs['time'] newcert['source'] = 'sonar' newcert['port'] = port cert_hash = hashlib.sha1(newcert['hash_id']+str(port)+newcert['source']) cert_hash = cert_hash.hexdigest() newcert_action = {"_index": "passive-ssl-non443-certs-sonar", "_type": "cert", '_id': cert_hash, '_source': newcert} bulk_certs.append(newcert_action) if len(bulk_certs) == 500: bulk(es, bulk_certs) bulk_certs = []
def process_certs_file(gzfilename, key, logger, host='localhost', batchsize=16384, index='certs', sourcetype='sonar-cert'): logger.warning("Loading file {f} at {d}".format(f=gzfilename, d=datetime.now())) hec = http_event_collector(key, host) with gzip.open(gzfilename, 'rb') as resultsfile: m = re.search('.*\/(\d{8})', gzfilename) filedate = m.group(1) filedate_struct = time.strptime(filedate, "%Y%m%d") filedate_epoch = time.mktime(filedate_struct) batchcount = 0 for line in resultsfile: cleanline = line.strip('\n') (hash_string, cert_b64) = cleanline.split(',', 1) newcert = process_cert(cert_b64, logger) newcert_dict = json.dumps(newcert) payload = {} payload.update({"index":index}) payload.update({"sourcetype":sourcetype}) payload.update({"source":gzfilename}) payload.update({"event":newcert_dict}) hec.batchEvent(payload) batchcount = batchcount + 1 if batchcount == batchsize: hec.flushBatch() batchcount = 0 if batchcount > 0: hec.flushBatch()
def process_certs_file(file_queue, key, hostlist=['localhost'], index='sonarsslcert', sourcetype='sonarsslcert', batchsize=16384): logger = logging.getLogger("SSLImporter") while True: host = random.choice(hostlist) print host hec = http_event_collector(key, host) gzfilename = file_queue.get() if gzfilename == "DONE": return True logger.warning("Loading file {f} at {d}".format(f=gzfilename, d=datetime.now())) with gzip.open(gzfilename, 'rb') as resultsfile: m = re.search('.*\/(\d{8})', gzfilename) if m: filedate = m.group(1) else: m = re.search('.*\/(\d{4}-\d{2}-\d{2})', gzfilename) filedate = m.group(1) filedate = re.sub('-', '', filedate, 0, 0) filedate_struct = time.strptime(filedate, "%Y%m%d") filedate_epoch = time.mktime(filedate_struct) batchcount = 0 for line in resultsfile: cleanline = line.strip('\n') (hash_string, cert_b64) = cleanline.split(',', 1) newcert = process_cert(cert_b64, logger) newcert_dict = json.dumps(newcert) payload = {} payload.update({"index":index}) payload.update({"sourcetype":sourcetype}) payload.update({"source":gzfilename}) payload.update({"event":newcert_dict}) hec.batchEvent(payload) batchcount = batchcount + 1 if batchcount == batchsize: hec.flushBatch() batchcount = 0 if batchcount > 0: hec.flushBatch()
def process_scan_certs(q, es): """ :param q: The Queue object that certs should be pulled off of :param es: An Elasticsearch connection. This way each worker has its own connection and you don't have to share it across multiple workers/processes :return: """ bulk_certs = [] while True: certs = q.get() if certs == "DONE": bulk(es, bulk_certs) return True newcert = process_cert(certs['certs']) if newcert: if 'time' in certs: newcert['import_date'] = certs['time'] newcert['source'] = 'sonar' newcert_action = {"_index": "passive-ssl-certs-sonar", "_type": "cert", '_id': newcert['hash_id'], '_source': newcert} bulk_certs.append(newcert_action) if len(bulk_certs) == 500: bulk(es, bulk_certs) bulk_certs = []