def trimFile_btwn(sFile, string_srt, string_end): sNAMEFUNC = 'trimFile_btwn' sTxt = "Called... " sndMSG(sTxt, 'INFO', sNAMEFUNC) iCnt = 0 iSrt = 0 iEnd = 0 with open(sFile, 'r+') as data: lines = data.readlines() data.seek(0) for line in lines: if string_srt in line: iSrt = iCnt if string_end in line: iEnd = iCnt iCnt += 1 data.truncate() for i in range(iSrt, iEnd + 1): data.write(lines[i]) lines = None data.close() return (False)
def decompress(self, srcFile, dstFile, cmpType): if not cmpType in self.CompressionTypes: sTxt = 'Unknown decompress type: ' + cmpType sndMSG(sTxt, 'ERROR', self._strClassName) return (None) if cmpType == 'gz': sTxt = 'Attempting to decompress ' + cmpType + ' file ' sndMSG(sTxt, 'INFO', self._strClassName) return (True) if cmpType == 'zip': sTxt = 'Attempting to decompress ' + cmpType + ' file ' sndMSG(sTxt, 'INFO', self._strClassName) return (True) if cmpType == 'bz': sTxt = 'Attempting to decompress ' + cmpType + ' file ' sndMSG(sTxt, 'INFO', self._strClassName) return (True) if cmpType == 'tar': sTxt = 'Attempting to decompress ' + cmpType + ' file ' sndMSG(sTxt, 'INFO', self._strClassName) return (True) return (False)
def getFile_CSV2List(sFile, dialect=None, sCommentFlag=None): """ Opens received File and parses it based on the received dialect Keyword arguments: sFile -- File Path and File name to be parsed exmple: ./test/test.csv dialect -- Is an instantiates of mngLoc_File.clsCSVDialect containing the CSV parsing parameters sCommentChar -- Is char that begins a line to be ingnored Returns: List object containing a List for each line example: [['date','ip1','domain1'], ['date','ip2','domain2'] ] """ import csv sFuncName = 'mngFiles.getFile_CSV2List' sTxt = "Attemping to parse file as CSV: " + sFile sndMSG(sTxt, 'INFO', sFuncName) if dialect == None: sTxt = "\--> NOTE: No csv dialect was passed, default parsing is by comma " sndMSG(sTxt, 'INFO', sFuncName) if sCommentFlag == None: sCommentFlag = '#' sTxt = "\--> NOTE: No comment flag was passed, default is '#' " sndMSG(sTxt, 'INFO', sFuncName) try: #objFile = _getFile_OBJ(sFile) objFile = open(sFile) if objFile == None: return (None) data = csv.reader(objFile, dialect) sList = [] for line in data: if sCommentFlag in str(line): # TODO: Needs better comment handling, # As is it will remove line in line comments # this need to be fixed to just remove commented section # and still keep the data before it continue sList.append(line) #sList.sort() objFile.close return (sList) except IOError as e: sTxt = str("\--> I/O error({0}): {1}".format(e.errno, e.strerror)) sndMSG(sTxt, 'ERROR', sFuncName) return (None) except: sTxt = "\--> Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt, 'ERROR', sFuncName) return (None)
def sndFile(sData, sFile): import sys sFuncName = 'mngFiles.sndFile' sTxt = "Trying to write to this file : " + sFile sndMSG(sTxt, 'INFO', sFuncName) with open(sFile, "w") as outfile: for line in sData: outfile.write(line) outfile.close() return (0)
def decompressor(srcFile, dstFile, cmpFormat): _strFuncName = 'mngFiles.decompressor' _strFuncVer = '0.0.0' if srcFile == None or dstFile == None or cmpFormat == None: return (None) if srcFile: sTxt = 'decompressing ' + srcFile sndMSG(sTxt, 'INFO', _strFuncName) return (True) return (None)
def unpck_file2file(self): if self._dicParsingArgs['format'][0] in self._lstCompressionTypes: sTxt = 'Attempting to decompression format: ' + self._dicParsingArgs['format'][0] sndMSG(sTxt,'INFO',self._strClassName) ### Run decompressor from mngFiles import decompressor from mngFiles import Decompressor sFile = self._strFilePath + self.fileName + "." srcFile = sFile + self._dicParsingArgs['format'][0] dstFile = sFile + self._dicParsingArgs['format'][1] cmpFrmt = self._dicParsingArgs['format'][0] oDcmp = Decompressor() bStatus = oDcmp.decompress(srcFile,dstFile,cmpFrmt) if bStatus == True: self._dicParsingArgs['format'].pop(0) elif bStatus == False: sTxt = 'Failed to unpack ' + self._lstFileEncapFormats(0) + ' file ' sndMSG(sTxt,'ERROR',self._strClassName) else: sTxt = 'Unpacker returned unexpected result' sndMSG(sTxt,'ERROR',self._strClassName) ### This causes a loop to interater through self._lstFileEncapFormats self.cnvt_SrcFile2Dict() else: sTxt = 'Unrecognized file compression format: ' + self._lstFileEncapFormats(0) sndMSG(sTxt,'ERROR',self._strClassName) return(None)
def getUTCTime(): ''' - Used to get UTC in a standardized format Keyword arguments: Returns: ''' from datetime import datetime sFuncName = 'getUTCTime()' try: sDateTime = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") return (sDateTime) except: sTxt = "Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt, 'ERROR', sFuncName) return (None)
def cnvt_XML2Dict(srcXML, dstFile=None): ''' {Description} Keyword arguments: Returns: ''' import urllib2 import xmltodict ### Setup function sFuncName = 'cnvt_XML2Dict' if not srcXML: sTxt = 'XML Source Required' return (None) if not "http" in srcXML: srcXML = "file://" + srcXML #rssData = urllib2.urlopen(srcXML).read() try: rssData = urllib2.urlopen(srcXML).read() except IOError as e: sTxt = str("\--> I/O error({0}): {1}".format(e.errno, e.strerror)) sndMSG(sTxt, 'ERROR', sFuncName) return (None) except: sTxt = "\--> Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt, 'ERROR', sFuncName) return (None) #dataDict = xmltodict.parse(rssData) try: dataDict = xmltodict.parse(rssData) pass except IOError as e: sTxt = str("\--> I/O error({0}): {1}".format(e.errno, e.strerror)) sndMSG(sTxt, 'ERROR', sFuncName) return (None) except: sTxt = "\--> Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt, 'ERROR', sFuncName) return (None) if dstFile: return (sndFile_Dict2JSON(dataDict, dstFile)) else: return (dataDict) return (None)
def getFile_JSON2Dict(sFile): ''' {Description} Keyword arguments: Returns: ''' import json sFuncName = 'mngFiles.getFile_JSON2Dict' sTxt = "Attemping to parse file as JSON: " + sFile sndMSG(sTxt, 'INFO', sFuncName) try: #json_data = _getFile_OBJ(sFile) json_data = open(sFile) if json_data == None: return (None) data = json.load(json_data) json_data.close() return (data) except IOError as e: sTxt = str("\--> I/O error({0}): {1}".format(e.errno, e.strerror)) sndMSG(sTxt, 'ERROR', sFuncName) return (None) except: sTxt = "\--> Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt, 'ERROR', sFuncName) return (None)
def getRmt_File(dicCreds,sFile): ''' - Simple URL Retrieval function based on python's urllib Keyword arguments: Returns: ''' import urllib sFuncName = 'getRmt_File' rmtSrcFile = dicCreds['URI'] sTxt = "Connecting to " + rmtSrcFile sndMSG(sTxt,'INFO',sFuncName) try: urllib.urlretrieve(rmtSrcFile,sFile) return(True) except: sTxt = "Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt,'ERROR',sFuncName) return(False) return False
def cnvt_SrcFile2Dict(self): if self._dicParsingArgs['format'][0] in self._lstCompressionTypes: self.unpck_file2file() if self._dicParsingArgs['format'][0] in self._lstFileFormatTypes: sFrmt = self._dicParsingArgs['format'][0] sFileSuffix = self._strFilePath + self._strFileNameSuffix if sFrmt == 'txt' or sFrmt == 'csv': from cnvtFiles import cnvt_CSV2Dict from mngFiles import clsCSVDialect_01 oDialect = clsCSVDialect_01() oDialect.from_dict(self._dicParsingArgs) sFile = self.fileName + "." + sFrmt return(cnvt_CSV2Dict(sFile,oDialect,strEncoding=self._strUnicodeEncoding)) elif sFrmt == 'xml' or sFrmt == 'rss': from mngFiles import cnvt_XML2Dict return(cnvt_XML2Dict(sFileSuffix + sFrmt)) elif sFrmt == 'htm' or sFrmt == 'html': from mngFiles import cnvt_HTML2Dict sArg = self._dicParseArg["format"]["htmFltr"] return(cnvt_HTML2Dict(sFileSuffix + sFrmt,sArg)) else: sTxt = "Format not recognized: " + sFrmt sndMSG(sTxt,'ERROR',self._strClassName) else: sTxt = 'Unrecognized file compression format: ' + self._dicParsingArgs['format'][0] sndMSG(sTxt,'ERROR',self._strClassName) return(None)
def sndFile_Dict2JSON(sData, sFile, isCompact=False): ''' {Description} Keyword arguments: Returns: ''' import json sFuncName = 'mngFiles.sndFile_Dict2JSON' sTxt = "Trying to write to this file : " + sFile sndMSG(sTxt, 'INFO', sFuncName) chkFile(sFile, True) try: with open(sFile, "w") as outfile: try: if isCompact == 'True': json.dump(sData, outfile) else: json.dump(sData, outfile, indent=4) except: sTxt = "Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt, 'ERROR', sFuncName) return (False) outfile.close() return (True) except IOError as e: sTxt = str("I/O error({0}): {1}".format(e.errno, e.strerror)) sndMSG(sTxt, 'ERROR', sFuncName) return (None) except: sTxt = "Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt, 'ERROR', sFuncName) return (None)
def getRmt_File2(dicCreds,sFile): ''' - Simple URL Retreval funtion based on python's urllib2 - With chunking and download update Keyword arguments: Returns: ''' import urllib2 sFuncName = 'getRmtFile2' rmtFile = dicCreds['URI'] #"Mozilla/5.0 (X11 U Linux i686) Gecko/20071127 Firefox/2.0.0.11" try: ### Add Proxy # SOURCE: https://docs.python.org/2.4/lib/urllib2-examples.html # proxy_handler = urllib2.ProxyHandler({'http': 'http://www.example.com:3128/'}) # proxy_auth_handler = urllib2.HTTPBasicAuthHandler() # proxy_auth_handler.add_password('realm', 'host', 'username', 'password') # # opener = build_opener(proxy_handler, proxy_auth_handler) # This time, rather than install the OpenerDirector, we use it directly: # opener.open('http://www.example.com/login.html' # # ## Mixed HTTPS HTTP enviroment # urllib2.ProxyHandler({'https': 'http://*****:*****@proxy:3128' })) ### ### Basic HTTP Authentication # SOURCE: https://docs.python.org/2.4/lib/urllib2-examples.html # ##Create an OpenerDirector with support for Basic HTTP Authentication... # auth_handler = urllib2.HTTPBasicAuthHandler() # auth_handler.add_password('realm', 'host', 'username', 'password') # opener = urllib2.build_opener(auth_handler) # ##...and install it globally so it can be used with urlopen. # urllib2.install_opener(opener) # urllib2.urlopen('http://www.example.com/login.html') ### ### Cookie Handler # jar = cookielib.FileCookieJar("cookies") # opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar)) ### ### Modifiy User Agent # opener = urllib2.build_opener() # opener.addheaders = [('User-agent', 'Mozilla/5.0')] # opener.open('http://www.example.com/') ### objURL = urllib2.urlopen(rmtFile) except: sTxt = "Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt,'ERROR',sFuncName) return(False) objFile = open(sFile, 'wb') meta = objURL.info() file_size = int(meta.getheaders("Content-Length")[0]) sTxt = "Downloading: %s Bytes: %s" % (sFile, file_size) sndMSG(sTxt,'INFO',sFuncName) file_size_dl = 0 block_sz = 8192 while True: buffer = objURL.read(block_sz) if not buffer: break file_size_dl += len(buffer) objFile.write(buffer) status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size) status = status + chr(8)*(len(status)+1) sndMSG(status,'INFO',sFuncName) objFile.close() return(True)
def cnvt_CSV2Dict(srcCSV, dialect=None, sCommentFlag=None, strEncoding=None): _sFuncName = 'cnvtFiles.cnvt_CSV2Dict' sTxt = "Called... " sndMSG(sTxt, 'INFO', _sFuncName) bDebug = False if srcCSV == None or dialect == None: return (None) if sCommentFlag == None: sCommentFlag = '#' if strEncoding == None: strEncoding = 'utf-8' if dialect.header == True: #TODO: Change Col header from number to Header Names pass if bDebug: sTxt = "\--[ CSV File Location ]--> " + str(srcCSV) sndMSG(sTxt, 'INFO', _sFuncName) with open(srcCSV, 'rb') as dataSrc: if dataSrc == None: return (None) if bDebug: sTxt = "\--[ input File charCnt ]--> " + str(len(dataSrc.read())) sndMSG(sTxt, 'INFO', _sFuncName) dataSrc.seek(0) data = csv.reader(dataSrc, dialect=dialect) if bDebug: sTxt = "\--[ output CSV RowCnt ]--> " + str(data) sndMSG(sTxt, 'INFO', _sFuncName) csvDict = {} listHeaders = [] iRow = 0 iCol = 0 for row in data: if len(row) < 1: continue if sCommentFlag in str(row[0]): continue if dialect.header == True and len(listHeaders) < 1: listHeaders = row continue csvDict.update({iRow: {}}) for col in row: try: col = unicode(col, strEncoding) except: col = " (NOTE: This data was modified from origin, due to non " + strEncoding + " compatiable chars) " + unicode( col, "utf-8", errors='ignore') if len(listHeaders) > 1: csvDict[iRow].update({listHeaders[iCol]: col}) else: csvDict[iRow].update({iCol: col}) iCol += 1 iCol = 0 iRow += 1 if bDebug: sTxt = "\--[ output Dict RowCnt ]--> " + str(len(csvDict)) sndMSG(sTxt, 'INFO', _sFuncName) dataSrc.close return (csvDict)
def _getFile_OBJ(sFile, MAXFILESIZE=None, FILEENCODING=None, sPassFuncName=None): """ Code Broken, do not use """ #if sPAssFuncName == None: # sFuncName = 'mngFiles._getFile_OBJ' #else: # sFuncName = sPAssFuncName + "(" + sFuncName + ")" sFuncName = 'mngFiles._getFile_OBJ' sTxt = "Attemping to open this file: " + sFile sndMSG(sTxt, 'INFO', sFuncName) if MAXFILESIZE == None: MAXFILESIZE = 1000000000 # => 1GB else: sTxt = "\--> MAXFILESIZE was changed to: " + MAXFILESIZE sndMSG(sTxt, 'INFO', sFuncName) if FILEENCODING == None: FILEENCODING = 'utf-8' else: sTxt = "\--> FILEENCODING was changed to: " + FILEENCODING sndMSG(sTxt, 'INFO', sFuncName) #check Existance of file bFlag = os.path.exists(sFile) if bFlag == False: sTxt = "\--> This file or file path does not exist: " + sFile sndMSG(sTxt, 'ERROR', sFuncName) return (None) #check File size #iSize = os.path.getsize(sFile) #if iSize > MAXFILESIZE: #sTxt = "\--> This file is larger than MAXFILESIZE: " + sFile #sndMSG(sTxt,'ERROR',sFuncName) #return(None) try: with open(sFile) as data: oFile = data.read() data.close() except IOError as e: sTxt = str("\--> I/O error({0}): {1}".format(e.errno, e.strerror)) sndMSG(sTxt, 'ERROR', sFuncName) return (None) except: sTxt = "\--> Unexpected error: " + str(sys.exc_info()[0]) sndMSG(sTxt, 'ERROR', sFuncName) return (None) sTxt = "\--> File was successfully Opened" sndMSG(sTxt, 'INFO', sFuncName) return (oFile)