def adptrExtract_asDict(srcData, isUpdateNewDataOnly=True): sNAMEFUNC = 'adptrExtract_asDict()' sndMSG('Called...', 'INFO', sNAMEFUNC) if not srcData: return (None) #srcData can not be empty ### Generally required for all Adpters from lib.utils.mngDateTime import getUTCTime ### Get Localy Data - Used to Remove duplicates data = getDB_local(srcData) srcDict = cnvt_csv2dict(srcData) localData = data from lib.utils.mngFiles import getFile_JSON2Dict newData = getFile_JSON2Dict('db_pattern.json') ### From srcDict look for duplicated data for item in srcDict: sKey = srcDict[item]['phish_id'] if localData['data'].has_key(sKey): #TODO: Check if hash is diffrent, # if so, created updated STIX Doc # Use smae GUID as original pass else: localData['data'][sKey] = {'src': srcDict[item]} localData['data'][sKey]['meta'] = { 'md5': genHash_md5(srcDict[item]) } localData['data'][sKey]['meta'].update({'dateDL': getUTCTime()}) newData['data'][sKey] = localData['data'][sKey] if isUpdateNewDataOnly == False: newData = localData if len(newData['data']): sTxt = "Found " + str(len(newData['data'])) + " new data elements" sndMSG(sTxt, 'INFO', sNAMEFUNC) else: sTxt = "Found no new data" sndMSG(sTxt, 'INFO', sNAMEFUNC) newData = {} setDB_local(localData, srcData) return (newData)
def adptr_src2Dict(srcData, isUpdateNewDataOnly): sNAMEFUNC = 'adptr_src2Dict()' sTxt = "Called... " sndMSG(sTxt, 'INFO', sNAMEFUNC) ### Input Check if srcData == None: #TODO: Needs error msg: Missing srcData Object return (False) sName = srcData.fileName locDataFile = 'db_' + srcData.fileName.split('.')[0] + '.json' ### fetch from Source location for newest version #srcData.getSrcData(); #TODO: This function in the clsDataSource is not completed # so this getRmt_File is used until class is completed if not getRmt_File(srcData.srcCreds, srcData.filePath + srcData.fileName) == True: # if no source data is found, this script will exit return (False) dstData = getFile_JSON2Dict(locDataFile) if not dstData: dstData = {} newData = {} ### Here the code become specific (unique) this data source ### in time I hope to refactor out as much unique as possible trimFile_btwn(srcData.filePath + srcData.fileName, '<?xml version="1.0" encoding="ISO-8859-1" ?>', '</rss>') srcDict = cnvt_XML2Dict(srcData.filePath + srcData.fileName) srcData.pkgTitle = srcDict['rss']['channel']['title'] srcData.pkgDscrpt = srcDict['rss']['channel']['description'] srcData.pkgLink = srcDict['rss']['channel']['link'] for col in srcDict['rss']['channel']['item']: sKey = col['guid'] sCol = col['title'] sDateVF = sCol.split('(')[1] sDateVF = sDateVF[0:-1] dSrt = datetime.strptime(sDateVF, "%Y-%m-%d") sDateVF = dSrt.strftime("%Y-%m-%dT%H:%M:%SZ") sCol = col['description'] lstAttrib = sCol.split(',') sURI = lstAttrib[0][4:] dictAttrib = { "dateVF": cleanString(sDateVF), "URI": cleanString(sURI), "status": cleanString(lstAttrib[1].split(':')[1]), "version": cleanString(lstAttrib[2].split(':')[1]), "hash": cleanString(lstAttrib[3].split(':')[1]), "title": cleanString(col['title']), "link": cleanString(col['link']), "dscrpt": cleanString(col['description']), "fileName": "", "ipAddr": "", "domain": "" } if len(sURI) > 0: tmpList = sURI.split("/") if len(tmpList) > 1: idx = len(tmpList) - 1 dictAttrib.update({"fileName": cleanString(tmpList[idx])}) if tmpList[2][0:1].isdigit(): dictAttrib.update({"ipAddr": cleanString(tmpList[2])}) else: dictAttrib.update({"domain": cleanString(tmpList[2])}) if sKey in dstData: dstData[sKey]['cnt'] += 1 dstData[sKey]['dateDL'] = getUTCTime() dstData[sKey]['status'] = dictAttrib['status'] #TODO:Check If Exist Element's inactive status changed else: ### Add new Data to local Database dstData[sKey] = {'cnt': 1, 'dateDL': getUTCTime()} dstData[sKey]['attrib'] = dictAttrib ### Generate list of new data only for STIX output newData[sKey] = dstData[sKey] sndFile_Dict2JSON(dstData, locDataFile) if isUpdateNewDataOnly == False: newData = dstData if len(newData) > 0: sTxt = "Found " + str(len(newData)) + " new data elements" sndMSG(sTxt, 'INFO', sNAMEFUNC) else: sTxt = "Found no new data" sndMSG(sTxt, 'INFO', sNAMEFUNC) newData = False return (newData)
def adptr_src2Dict(srcData, isUpdateNewDataOnly): sNAMEFUNC = 'adptr_src2Dict()' sTxt = "Called... " sndMSG(sTxt, 'INFO', sNAMEFUNC) ### Input Check if srcData == None: #TODO: Needs error msg: Missing srcData Object return (False) sName = srcData.fileName locDataFile = 'db_' + srcData.fileName.split('.')[0] + '.json' ### fetch from Source location for newest version #srcData.getSrcData(); #TODO: This function in the clsDataSource is not completed # so this getRmt_File is used until class is completed #print "------< Not Updating >------" if not getRmt_File(srcData.srcCreds, srcData.filePath + srcData.fileName) == True: # if no source data is found, this script will exit return (False) dstData = getFile_JSON2Dict(locDataFile) if not dstData: dstData = {} newData = {} ### Here the code become specific (unique) this data source ### in time I hope to refactor out as much unique as possible srcDict = cnvt_XML2Dict(srcData.filePath + srcData.fileName) srcData.pkgTitle = "Clean MX Phishing URL Block List " srcData.pkgDscrpt = "" srcData.pkgLink = "http://support.clean-mx.de/clean-mx/phishing.php" for item in srcDict['output']['entries']['entry']: sKey = item['id'] if item['first'] == "0": item['first'] = None else: item['first'] = datetime.fromtimestamp(int( item['first'])).strftime('%Y-%m-%dT%H:%M:%SZ') if item['last'] == "0": item['last'] = None else: item['last'] = datetime.fromtimestamp(int( item['last'])).strftime('%Y-%m-%dT%H:%M:%SZ') dictAttrib = item lstNS = [] for i in range(1, 5): if dictAttrib['ns' + str(i)]: lstNS.append(dictAttrib['ns' + str(i)]) dictAttrib.update({"nsList": lstNS}) if sKey in dstData: dstData[sKey]['cnt'] += 1 dstData[sKey]['dateDL'] = getUTCTime() else: ### Add new Data to local Database dstData[sKey] = {'cnt': 1, 'dateDL': getUTCTime()} dstData[sKey]['attrib'] = dictAttrib ### Generate list of new data only for STIX output newData[sKey] = dstData[sKey] sndFile_Dict2JSON(dstData, locDataFile) if isUpdateNewDataOnly == False: newData = dstData if len(newData) > 0: sTxt = "Found " + str(len(newData)) + " new data elements" sndMSG(sTxt, 'INFO', sNAMEFUNC) else: sTxt = "Found no new data" sndMSG(sTxt, 'INFO', sNAMEFUNC) newData = False return (newData)
def adptr_src2Dict(srcData, isUpdateNewDataOnly): sNAMEFUNC = 'adptr_src2Dict()' sTxt = "Called... " sndMSG(sTxt, 'INFO', sNAMEFUNC) ### Input Check if srcData == None: #TODO: Needs error msg: Missing srcData Object return (False) sName = srcData.fileName locDataFile = 'db_' + srcData.fileName.split('.')[0] + '.json' ### Parse Source File in to a Dictionary Object dstData = getFile_JSON2Dict(locDataFile) if not dstData: dstData = {} oDialect = clsCSVDialect() oDialect.from_dict(srcData.parsearg) oDialect.delimiter = '\t' srcDict = cnvt_CSV2Dict(srcData.filePath + srcData.fileName, dialect=oDialect) newData = {} for col in srcDict: # {0: u'', 1: u'20161231', 2: u'38zu.cn', 3: u'attackpage', 4: u'safebrowsing.google.com', 5: u'20140703', 6: u'20140302', 7: u'20130325', 8: u'20120426', 9: u'20110715', 10: u'relisted'} if len(srcDict[col]) < 6: continue else: sKey = srcDict[col][2] lstDateVF = [] for idx in range(5, len(srcDict[col])): if len(srcDict[col][idx]) > 0 and isNumber(srcDict[col][idx][1]): sDateVF = srcDict[col][idx] try: dSrt = datetime.strptime(sDateVF, "%Y%m%d") sDateVF = dSrt.strftime("%Y-%m-%dT%H:%M:%SZ") lstDateVF.append(sDateVF) except: pass #nextvalidation domain type original_reference-why_it_was_listed dateverified dictAttrib = { "domain": cleanString(srcDict[col][2]), "type": cleanString(srcDict[col][3]), "ref": cleanString(srcDict[col][4]), "lstDateVF": lstDateVF } if sKey in dstData: dstData[sKey]['cnt'] += 1 dstData[sKey]['dateDL'] = getUTCTime() else: ### Add new Data to local Database dstData[sKey] = {'cnt': 1, 'dateDL': getUTCTime()} dstData[sKey]['attrib'] = dictAttrib ### Generate list of new data only for STIX output newData[sKey] = dstData[sKey] sndFile_Dict2JSON(dstData, locDataFile) if isUpdateNewDataOnly == False: newData = dstData if len(newData) > 0: sTxt = "Found " + str(len(newData)) + " new data elements" sndMSG(sTxt, 'INFO', sNAMEFUNC) else: sTxt = "Found no new data" sndMSG(sTxt, 'INFO', sNAMEFUNC) newData = False return (newData)
def adptr_src2Dict(srcData, isUpdateNewDataOnly): sNAMEFUNC = 'adptr_src2Dict()' sTxt = "Called... " sndMSG(sTxt, 'INFO', sNAMEFUNC) ### Input Check if srcData == None: #TODO: Needs error msg: Missing srcData Object return (False) sName = srcData.fileName locDataFile = 'db_' + srcData.fileName.split('.')[0] + '.json' ### fetch from Source location for newest version #srcData.getSrcData(); #TODO: This function in the clsDataSource is not completed # so this getRmt_File is used until class is completed #print "------< No Remote Data >------" if not getRmt_File(srcData.srcCreds, srcData.filePath + srcData.fileName) == True: # if no source data is found, this script will exit return (False) dstData = getFile_JSON2Dict(locDataFile) if not dstData: dstData = {} newData = {} ### Here the code become specific (unique) this data source ### in time I hope to refactor out as much unique as possible oDialect = clsCSVDialect() oDialect.from_dict(srcData.parsearg) oDialect.delimiter = '\t' #oDialect.header = True srcDict = cnvt_CSV2Dict(srcData.filePath + srcData.fileName, dialect=oDialect) srcData.pkgTitle = "DShield.org Recommended Block List " srcData.pkgDscrpt = "This list summarizes the top 20 attacking class C (/24) subnets over the last three days. The number of 'attacks' indicates the number of targets reporting scans from this subnet." srcData.pkgLink = "http://feeds.dshield.org/block.txt" sDateVF = None s3daysAgo = None try: sDateVF = getFile_lineByValue( srcData.filePath + srcData.fileName, "updated:")[0].split("updated:")[1].strip() sDateVF = datetime.strptime(sDateVF, "%a %b %d %H:%M:%S %Y %Z") s3daysAgo = sDateVF + timedelta(days=-3) if sDateVF: sDateVF = sDateVF.strftime("%Y-%m-%dT%H:%M:%SZ") s3daysAgo = s3daysAgo.strftime("%Y-%m-%dT%H:%M:%SZ") srcData.pkgDscrpt = srcData.pkgDscrpt.replace( 'last three days.', ('last three days (' + s3daysAgo + " - " + sDateVF + ')')) except: pass for col in srcDict: if 'End' in srcDict[col]: sKey = srcDict[col]['Start'] + "##comma##" + srcDict[col]['End'] else: continue dictAttrib = srcDict[col] if sDateVF: dictAttrib.update({"dateVF": str(sDateVF)}) if s3daysAgo: dictAttrib.update( {"dateRange": str(s3daysAgo) + " - " + str(sDateVF)}) if 'noemail' in srcDict[col]['email']: dictAttrib.update({"email": None}) if sKey in dstData: dstData[sKey]['cnt'] += 1 dstData[sKey]['dateDL'] = getUTCTime() else: ### Add new Data to local Database dstData[sKey] = {'cnt': 1, 'dateDL': getUTCTime()} dstData[sKey]['attrib'] = dictAttrib ### Generate list of new data only for STIX output newData[sKey] = dstData[sKey] sndFile_Dict2JSON(dstData, locDataFile) if isUpdateNewDataOnly == False: newData = dstData if len(newData) > 0: sTxt = "Found " + str(len(newData)) + " new data elements" sndMSG(sTxt, 'INFO', sNAMEFUNC) else: sTxt = "Found no new data" sndMSG(sTxt, 'INFO', sNAMEFUNC) newData = False return (newData)
def adptr_src2Dict(srcData, isUpdateNewDataOnly): sNAMEFUNC = 'adptr_src2Dict()' sTxt = "Called... " sndMSG(sTxt, 'INFO', sNAMEFUNC) ### Input Check if srcData == None: #TODO: Needs error msg: Missing srcData Object return (False) sName = srcData.fileName locDataFile = 'db_' + srcData.fileName.split('.')[0] + '.json' ### Parse Source File in to a Dictionary Object dstData = getFile_JSON2Dict(locDataFile) if not dstData: dstData = {} oDialect = clsCSVDialect() oDialect.from_dict(srcData.parsearg) oDialect.delimiter = ',' srcDict = cnvt_CSV2Dict(srcData.filePath + srcData.fileName, dialect=oDialect) newData = {} #metaData = for col in srcDict: # {0: u'', 1: u'20161231', 2: u'38zu.cn', 3: u'attackpage', 4: u'safebrowsing.google.com', 5: u'20140703', 6: u'20140302', 7: u'20130325', 8: u'20120426', 9: u'20110715', 10: u'relisted'} if len(srcDict[col]) < 1: continue sKey = srcDict[col]['IP Address'] dictAttrib = {} dictAttrib['Flags'] = {} dictAttrib['Ports'] = {} for item in srcDict[col]: if 'Flag' in item: if srcDict[col][item].isdigit(): dictAttrib['Flags'].update({item: int(srcDict[col][item])}) else: dictAttrib['Flagss'].update({item: None}) elif 'Port' in item: if srcDict[col][item].isdigit(): dictAttrib['Ports'].update({item: int(srcDict[col][item])}) else: dictAttrib['Ports'].update({item: None}) elif 'Uptime' in item: if srcDict[col][item].isdigit(): dictAttrib.update({item: int(srcDict[col][item])}) else: dictAttrib.update({item: None}) elif 'Bandwidth' in item: if srcDict[col][item].isdigit(): dictAttrib.update({item: int(srcDict[col][item])}) else: dictAttrib.update({item: None}) else: dictAttrib.update({item: srcDict[col][item]}) if dictAttrib['Hostname'] == dictAttrib['IP Address']: dictAttrib['Hostname'] = None # tmpHash = hashlib.md5(str(dictAttrib)).hexdigest() if sKey in dstData: dstData[sKey]['meta']['cnt'] += 1 dstData[sKey]['meta']['dateDL'] = getUTCTime() # if not tmpHash == dstData[sKey]['meta']['attribHash']: # dstData[sKey]['meta']['hasChanged'] = True # print '---< Found Change >--- ' + sKey # dstData[sKey]['meta']['attribHash'] = tmpHash else: dstData[sKey] = {} dstData[sKey]['meta'] = { 'cnt': 1, 'dateDL': getUTCTime(), 'IDs': {}, 'hasChanged': False, 'attribHash': 0x0 } #dstData[sKey]['meta']['attribHash'] = tmpHash dstData[sKey]['attrib'] = dictAttrib ### Generate list of new data only for STIX output newData[sKey] = dstData[sKey] sndFile_Dict2JSON(dstData, locDataFile) if isUpdateNewDataOnly == False: newData = dstData if len(newData) > 0: sTxt = "Found " + str(len(newData)) + " new data elements" sndMSG(sTxt, 'INFO', sNAMEFUNC) else: sTxt = "Found no new data" sndMSG(sTxt, 'INFO', sNAMEFUNC) newData = False return (newData)
def adptr_src2Dict(src_data, isUpdateNewDataOnly): namefunc = 'adptr_src2Dict()' stxt = "Called... " sndMSG(stxt, 'INFO', namefunc) ### Input Check if src_data is None: # TODO: Needs error msg: Missing srcData Object return False locDataFile = 'db_' + src_data.fileName.split('.')[0] + '.json' ### fetch from Source location for newest version # srcData.getSrcData(); #TODO: This function in the clsDataSource is not completed # so this getRmt_File is used until class is completed # print "------< NOT UPDATING >------" if not getRmt_File(src_data.srcCreds, src_data.filePath + src_data.fileName) == True: # if no source data is found, this script will exit return False dstData = getFile_JSON2Dict(locDataFile) if not dstData: dstData = {} ### Here the code become specific (unique) this data source ### in time I hope to refactor out as much unique as possible trimFile_btwn(src_data.filePath + src_data.fileName, '<?xml version="1.0" encoding="ISO-8859-1" ?>', '</rss>') srcDict = cnvt_XML2Dict(src_data.filePath + src_data.fileName) ### DEBUG CODE #### ################### src_data.pkgTitle = srcDict['rss']['channel']['title'] src_data.pkgDscrpt = srcDict['rss']['channel']['description'] src_data.pkgLink = srcDict['rss']['channel']['link'] newData = {} for col in srcDict['rss']['channel']['item']: sKey = col['guid'] sCol = col['title'] sDateVF = sCol.split('(')[1] sDateVF = sDateVF[0:-1] try: dSrt = datetime.strptime(sDateVF, "%Y-%m-%d %H:%M:%S") sDateVF = dSrt.strftime("%Y-%m-%dT%H:%M:%SZ") except: sDateVF = None sDomain = None sIPAddr = cleanString(sCol.split('(')[0]) if not isIPv4(sIPAddr): sDomain = sIPAddr sIPAddr = None sCol = col['description'] lstAttrib = sCol.split(',') dictAttrib = { "dateVF": sDateVF, "title": cleanString(col['title']), "link": cleanString(col['link']), "dscrpt": cleanString(col['description']), "ipAddr": sIPAddr, "domain": sDomain, } if sKey in dstData: dstData[sKey]['cnt'] += 1 dstData[sKey]['dateDL'] = getUTCTime() # TODO:Check If Exist Element's inactive status changed else: ### Add new Data to local Database dstData[sKey] = {'cnt': 1, 'dateDL': getUTCTime()} dstData[sKey]['attrib'] = dictAttrib ### Generate list of new data only for STIX output newData[sKey] = dstData[sKey] sndFile_Dict2JSON(dstData, locDataFile) if not isUpdateNewDataOnly: newData = dstData if len(newData) > 0: stxt = "Found " + str(len(newData)) + " new data elements" sndMSG(stxt, 'INFO', namefunc) else: stxt = "Found no new data" sndMSG(stxt, 'INFO', namefunc) newData = False return newData
def adptr_src2Dict(srcData, isUpdateNewDataOnly): sNAMEFUNC = 'adptr_src2Dict()' sTxt = "Called... " sndMSG(sTxt, 'INFO', sNAMEFUNC) ### Input Check if srcData == None: #TODO: Needs error msg: Missing srcData Object return (False) sName = srcData.fileName locDataFile = 'db_' + srcData.fileName.split('.')[0] + '.json' ### fetch from Source location for newest version #srcData.getSrcData(); #TODO: This function in the clsDataSource is not completed # so this getRmt_File is used until class is completed if not getRmt_File(srcData.srcCreds, srcData.filePath + srcData.fileName) == True: # if no source data is found, this script will exit return (False) dstData = getFile_JSON2Dict(locDataFile) if not dstData: dstData = {} newData = {} ### Here the code become specific (unique) this data source ### in time I hope to refactor out as much unique as possible ### Parse Source File in to a Dictionary Object dstData = getFile_JSON2Dict(locDataFile) if not dstData: dstData = {} newData = {} oDialect = clsCSVDialect() oDialect.from_dict(srcData.parsearg) oDialect.delimiter = '\n' srcDict = cnvt_CSV2Dict(srcData.filePath + srcData.fileName, dialect=oDialect) srcData.pkgTitle = "SNORT Rule by Emergingthreats | Block Botnet Command and Control" srcData.pkgDscrpt = "Emerging Threats Botnet Command and Control drop rules. These are generated from the EXCELLENT work done by the Shadowserver team and the abuse.ch folks. All Volunteers, we're grateful for their dedication! http://www.shadowserver.org; https://spyeyetracker.abuse.ch; https://palevotracker.abuse.ch; https://zeustracker.abuse.ch. More information available at www.emergingthreats.net" srcData.pkgLink = "http://rules.emergingthreats.net/blockrules/emerging-botcc.portgrouped.rules" for col in srcDict: # {0: u'alert tcp $HOME_NET any -> 50.116.1.225 22 (msg:"ET CNC Shadowserver Reported CnC Server Port 22 Group 1"; flags:S; reference:url,doc.emergingthreats.net/bin/view/Main/BotCC; reference:url,www.shadowserver.org; threshold: type limit, track by_src, seconds 360, count 1; classtype:trojan-activity; flowbits:set,ET.Evil; flowbits:set,ET.BotccIP; sid:2405000; rev:3570;)'} sKey = srcDict[col][0] strTmp = sKey.split("(") tmpList = strTmp[0].split(" ") ipProt = None if tmpList[1]: ipProt = tmpList[1] ipList = None if tmpList[5]: if "[" in tmpList[5]: tmpList[5] = tmpList[5][1:-1] ipList = tmpList[5].split(",") ipPort = None if tmpList[6]: ipPort = tmpList[6] attrList = strTmp[1].split(";")[:-1] tmpDict = {} for i in range(len(attrList)): attrList[i] = cleanString(attrList[i]) tmpKey = attrList[i].split(':')[0] tmpVal = attrList[i].split(':')[1] if tmpKey in tmpDict: tmpDict[tmpKey] += "|" + tmpVal else: tmpDict.update({tmpKey: tmpVal}) dictAttrib = tmpDict dictAttrib.update({ 'ipAddrList': ipList, 'rule': sKey, 'ipPort': ipPort, 'ipProt': ipProt }) if sKey in dstData: dstData[sKey]['cnt'] += 1 dstData[sKey]['dateDL'] = getUTCTime() else: ### Add new Data to local Database dstData[sKey] = {'cnt': 1, 'dateDL': getUTCTime()} dstData[sKey]['attrib'] = dictAttrib ### Generate list of new data only for STIX output newData[sKey] = dstData[sKey] sndFile_Dict2JSON(dstData, locDataFile) if isUpdateNewDataOnly == False: newData = dstData if len(newData) > 0: sTxt = "Found " + str(len(newData)) + " new data elements" sndMSG(sTxt, 'INFO', sNAMEFUNC) else: sTxt = "Found no new data" sndMSG(sTxt, 'INFO', sNAMEFUNC) newData = False return (newData)