def UpdateIDF(self, db): self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.ParseStatus = "Updating Inverse Document Frequency!" self.SendEvent() """ query = "select count(ID) from " + Constants.TextCatDocumentsTable + ";" rows = db.FetchAllRows(query) docCount = 0.0 for row in rows: docCount = float(row[0]) queryIDF = "select WordID, Count(WordID) from " + Constants.TextCatBagOfWordsTable + " group by WordID order by WordID;" rowsIDF = db.FetchAllRows(queryIDF) #N = 21578.0 #print 'doc count = %d' %docCount for row in rowsIDF: #idf = math.log(float(21578)/float(row[1]), 2) #print "row[1] = %d" %(row[1]) query = "update " + Constants.TextCatBagOfWordsTable + " set IDF = '%f' where WordID = '%d'"%(math.log(docCount/float(row[1]), 10), row[0]) #print query db.ExecuteNonQuery(query) #break """ #Update Stemmed Words IDF self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.SendEvent() totalDocs = self.DocID queryIDF = "select WordID, Count(WordID) from " + Constants.TextCatBagOfStemmedWordsTable + " group by WordID order by WordID;" rowsIDF = db.FetchAllRows(queryIDF) #N = 21578.0 #print 'doc count = %d' %docCount for row in rowsIDF: query = "update " + Constants.TextCatBagOfStemmedWordsTable + " set IDF = '%f' where WordID = '%d'"%(math.log(totalDocs/float(row[1]), 10), row[0]) #print query db.ExecuteNonQuery(query) """ totalDocs = self.DocID for fword in Globals.StemmedWordFrequency: #for kword in Globals.BitMap: wordDocCount = 0 for bit in Globals.BitMap[fword]['bitmap']: docCount += int(bit) query = "update " + Constants.TextCatBagOfStemmedWordsTable + " set IDF = '%f' where WordID = '%d'"%(math.log(totalDocs/float(wordDocCount), 10), Globals.StemmedWordFrequency[fword]['id']) db.ExecuteNonQuery(query) """ self.ParseStatus = "Done Updating Inverse Document Frequency!" self.SendEvent()
def run(self): global buffer, sentinel global writeThread global diskSHA1 if self.verifyImages: diskSHA1 = hashlib.sha1() #m = hashlib.md5() #sha1 = hashlib.sha1() size = 0 #try: rfin = Win32RawIO.Win32RAWIO(self.rootDrive, 'r') driveSize = rfin.size #print "Read Thread Startime = %s"%time.asctime() #i = 0 self.EstimatedTime = "" while self.keepGoing: self.readTime = time.time() data = rfin.read(1024 * 1024 * 16) if not data: buffer.put(sentinel) break buffer.put(data) self.ElapsedTime = time.time() rate = float(len(data)) / float(self.ElapsedTime - self.readTime) #if size == 0: if self.verifyImages: diskSHA1.update(data) self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( float(driveSize * len(self.listImageNames)) / rate) else: self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( float(driveSize) / rate) size += len(data) self.gaugeValue = (float(size) / float(driveSize)) * 100 self.rateInfo = "%.2fMB of %.2fMB at %.2fMB/sec" % ( size / 1024. / 1024, driveSize / 1024. / 1024, rate / 1024. / 1024) self.Status = "Creating Image... [%.2f%s" % (self.gaugeValue, "%]") self.SendEvent() rfin.close() if self.verifyImages: print "Disk SHA1: %s" % diskSHA1.hexdigest().upper() self.running = False #writeThread.join() """
def DumpBitMapInDatabase(self, db): """ Encode bitmap for each word using RLE encoding technique and update the dictionary adding another field """ self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.ParseStatus = "Updating Database Index!" self.SendEvent() manyValues = [] i = 0 query = "INSERT INTO " + Constants.TextCatBitMapIndex + " (Keyword, Bitmap, Compressed) values (?,?,?)" for kword in Globals.EmailsBitMap: i += 1 manyValues.append( (kword, Globals.EmailsBitMap[kword]['bitmap'], MySQLdb.escape_string( binascii.rlecode_hqx( Globals.EmailsBitMap[kword]['bitmap'])))) if i == 10000: try: db.ExecuteMany(query, manyValues) except: print "Exception query:: " + query + str(manyValues) manyValues = [] if len(manyValues) > 0: try: db.ExecuteMany(query, manyValues) except: print "Exception query:: " + query + str(manyValues[0])
def OnTimer1Timer(self, event): ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.lblTotalDir.SetLabel(PlatformMethods.Convert(self.scanThread.GetDirCount())) self.lblElapsedTime.SetLabel(PlatformMethods.Convert(ElapsedTime)) #self.lblCurrentDir.SetLabel(PlatformMethods.Convert(evt.currentDir)) self.lblFilesCount.SetLabel(PlatformMethods.Convert(self.scanThread.GetFilesCount())) self.RefreshLabels() event.Skip()
def OnTimer1Timer(self, event): global StartTime global DirCount global FilesCount global KnownFilesCount ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - StartTime) self.lblTotalDir.SetLabel(PlatformMethods.Convert(DirCount)) self.lblElapsedTime.SetLabel(PlatformMethods.Convert(ElapsedTime)) #self.lblCurrentDir.SetLabel(PlatformMethods.Convert(evt.currentDir)) self.lblFilesCount.SetLabel(PlatformMethods.Convert(FilesCount)) self.lblKnownFilesCount.SetLabel(PlatformMethods.Convert(KnownFilesCount)) self.RefreshLabels() event.Skip()
def UpdateIDF(self, db): self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.ParseStatus = "Updating Inverse Document Frequency!" self.SendEvent() #Update Stemmed Words IDF self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.SendEvent() totalDocs = self.DocID queryIDF = "select WordID, Count(WordID) from " + Constants.TextCatBagOfStemmedWordsTable + " group by WordID order by WordID;" rowsIDF = db.FetchAllRows(queryIDF) #N = 21578.0 #print 'doc count = %d' %docCount for row in rowsIDF: query = "update %s set IDF = ? where WordID = ?" % ( Constants.TextCatBagOfStemmedWordsTable) #print query db.ExecuteNonQuery( query, (math.log(totalDocs / float(row[1]), 10), row[0])) self.ParseStatus = "Done Updating Inverse Document Frequency!" self.SendEvent()
def OnTimer1Timer(self, event): ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.lblTotalDir.SetLabel( PlatformMethods.Convert(self.scanThread.GetDirCount())) self.lblElapsedTime.SetLabel(PlatformMethods.Convert(ElapsedTime)) #self.lblCurrentDir.SetLabel(PlatformMethods.Convert(evt.currentDir)) self.lblFilesCount.SetLabel( PlatformMethods.Convert(self.scanThread.GetFilesCount())) if not self.scanThread.running: self.throbber1.Stop() self.btnOK.Show(True) self.btnCancel.Show(False) self.timer1.Stop() self.RefreshLabels() event.Skip()
def OnUpdate(self, evt): elapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( evt.elapsedTime - self.StartTime) self.lblElapsedTime.SetLabel(elapsedTime) self.lblScanStatus.SetLabel(PlatformMethods.Decode(evt.scanStatus)) self.gaugeDDToDiskProgress.SetValue(int(evt.gaugeValue)) self.lblEstimatedTime.SetLabel(evt.estimatedTime) self.lblRate.SetLabel(evt.rate) if str(evt.scanStatus) == "Done Writing To Disk!": self.btnOK.Show(True) self.btnCancel.Show(False) self.gaugeDDToDiskProgress.SetValue(100) self.gaugePulse.SetValue(100) self.timer.Stop() #self.gaugePulse.Pulse(False) self.RefreshLabels() evt.Skip()
def SearchKeywords(self): """ if self.WordDict: self.ParseStatus = "Writing to database..." self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.SendEvent() self.HandleWords(self.WordDict) self.WordDict = None """ self.fout.close() db.CloseConnection() self.SendEvent() finishTime = time.time() self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( finishTime - self.StartTime) self.ParseStatus = "Done Preprocessing/Indexing!" self.SendEvent() self.running = False
def CreateBloomFilter(): db = SqliteDatabase(Constants.NSRLDBName) if not db.OpenConnection(): return #m = no. of bits for vector #n = no. of elements or keys to support queries #k = no. of hash functions m = 10000000 n = 1000000 k = 4 BFilter = BloomFilter.BloomFilter(n=n, m=m, k=k) db1 = SqliteDatabase("NSRLBloom.db") if not db1.OpenConnection(): return query = """create table if not exists BloomFilter( BloomFilter blob);""" db1.ExecuteNonQuery(query) query = """SELECT name FROM sqlite_master WHERE type='table' ORDER BY name; """ tables = db.FetchAllRows(query) #i = 0 start = time.time() for table in tables: rows = db.FetchAllRows('select * from %s;' % table) for row in rows: BFilter.add(row[0]) #break end = time.time() print 'time taken = ', CommonFunctions.ConvertSecondsToDayHourMinSec(end - start) #db1.ExecuteMany('insert into BloomFilter (BloomFilter) values (?)', [(cPickle.dumps(BFilter))]) db.CloseConnection() db1.CloseConnection()
def OnUpdate(self, evt): elapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( evt.elapsedTime - self.StartTime) self.lblElapsedTime.SetLabel(elapsedTime) self.lblScanStatus.SetLabel(PlatformMethods.Decode(evt.scanStatus)) try: self.gaugeDDProgress.SetValue(int(evt.gaugeValue)) self.lblEstimatedTime.SetLabel(evt.estimatedTime) self.lblRate.SetLabel(evt.rate) except: pass if str(evt.scanStatus) == "Done Creating Image!": self.btnOK.Show(True) self.btnCancel.Show(False) self.gaugeDDProgress.SetValue(100) self.gaugePulse.SetValue(100) self.timer.Stop() #self.gaugePulse.Pulse(False) doneReading = True print "Done Creating Images at: %s" % time.asctime() self.RefreshLabels() evt.Skip()
def Run(self): db = SqliteDatabase(Globals.EmailsFileName) if not db.OpenConnection(): return self.bloomFilter = self.CreateBloomFilter() #self.bloomFilter = None logFileName = PlatformMethods.Decode( os.path.join( Globals.CasePath, (Globals.EmailsFileName[Globals.EmailsFileName.rfind(os.sep) + 1:] + '.log'))) self.fout = open(logFileName, 'ab') #print self.CheckedMimeTypes self.fout.write( 'Parsing/Indexing Emails Attachments Started at: %s\n' % (time.ctime())) if self.AddressBookPath: self.ParseStatus = "Parsing Address book..." self.SendEvent() AddressBookParser = OutlookAddressBook.AddressBookParser( Globals.AddressBookDict) for root, dirs, files in os.walk(self.AddressBookPath): for eachfile in files: filePath = os.path.join(root, eachfile) self.FilesCount += 1 if (filePath.rfind('.') == -1): continue #print filePath extension = filePath[filePath.rfind('.'):] #print 'extension ', extension if extension.lower() == ".csv": AddressBookParser.Parse(filePath) #print 'add book parsed' else: self.fout.write('No Addressbook path found!\n') #Updte Addressbook query1 = "insert into " + Constants.AddressBookTable + "(EmailID, FirstName, MiddleName, LastName, InBook) values (?,?,?,?,?)" ManyValues = [] for key in Globals.AddressBookDict: #'EmailID': email, 'FirstName': firstName, 'MiddleName': middleName, 'LastName': lastName, 'InBook':1} ManyValues.append((Globals.AddressBookDict[key]['EmailID'], Globals.AddressBookDict[key]['FirstName'], Globals.AddressBookDict[key]['MiddleName'], Globals.AddressBookDict[key]['LastName'], Globals.AddressBookDict[key]['InBook'])) #query = "delete from %s"%Constants.AddressBookTable #db.ExecuteNonQuery(query) #print ManyValues db.ExecuteMany(query1, ManyValues) #self.ParseStatus = "Done Preprocessing/Indexing Emails!" #return textParser = TextParser.TextParser(db, Globals.EmailsStopwords, self.Stemmer, bloomFilter=self.bloomFilter) docxParser = DocxParser.DocxParser(db, Globals.EmailsStopwords, self.Stemmer, bloomFilter=self.bloomFilter) docParser = DocParser.DocParser(db, Globals.EmailsStopwords, self.Stemmer, bloomFilter=self.bloomFilter) docQuery = "insert into %s (DocPath, DocType) values (?, ?)" % ( Constants.TextCatDocumentsTable) if self.AttachmentsPath: for root, dirs, files in os.walk(self.AttachmentsPath): for eachfile in files: filePath = os.path.join(root, eachfile) fileNameList = eachfile.split() if len(fileNameList) >= 2: dateTimeFileName = "%s %s - %s" % ( fileNameList[0], (fileNameList[1].replace(".", ":")), (eachfile[eachfile.rfind('-') + 1:])) if self.AttachmentsDict.has_key(dateTimeFileName): self.AttachmentsDict[dateTimeFileName].append( filePath) else: self.AttachmentsDict[dateTimeFileName] = [filePath] #print 'Intersting! more than 1 attach. file found with same date time: %s'% #else: # self.AttachmentsDict[dateTimeFileName] = filePath else: self.fout.write( 'Attachment filename found without date time: %s\n' % (PlatformMethods.Encode(filePath))) #AttachmentsDict, Stopwords=[], Stemmer=None self.outlookTextParser = OutlookTextParser.OutlookTextParser( db, self.AttachmentsDict, Globals.EmailsStopwords, self.Stemmer, bloomFilter=self.bloomFilter, logFile=self.fout) if self.IndexMessages: self.ParseStatus = "Parsing and Indexing Emails..." else: self.ParseStatus = "Parsing Email Headers..." self.SendEvent() for root, dirs, files in os.walk(self.EmailsPath): if not self.keepGoing: self.running = False return for eachfile in files: self.FilesCount += 1 if not self.keepGoing: self.running = False return filePath = os.path.join(root, eachfile) #print filePath if (filePath.rfind('.') == -1): continue try: extension = filePath[filePath.rfind('.'):] fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension( extension) if fileType: mimeType = fileType.GetMimeType() or "unknown" if mimeType == "text/plain": try: self.outlookTextParser.parse( filePath, self.IndexMessages) except Exception, msg: self.fout.write( 'Error Parsing Message: %s Msg:: %s\n' % (PlatformMethods.Encode(filePath), msg)) self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) if (time.time() - self.EventStart) > 10: self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.SendEvent() except Exception, value: #try: self.fout.write( "Error Parsing Message: %s Msg: %s\n" % (PlatformMethods.Encode(filePath), str(value))) self.fout.flush()
self.fout.write('Total Time Taken: %s\n\n' % (self.ElapsedTime)) if self.IndexAttachments: self.ParseStatus = "Indexing Attachments..." self.SendEvent() for key in self.AttachmentsDict: for eachfile in self.AttachmentsDict[key]: #try: filePath = os.path.join(root, eachfile) self.FileScanStartTime = time.time() self.FilesCount += 1 dotIndex = filePath.rfind('.') if dotIndex == -1: self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) if (time.time() - self.EventStart) > 10: self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.SendEvent() continue extension = filePath[dotIndex:] fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension( extension) if fileType: parsed = False mimeType = fileType.GetMimeType() or "unknown" #if self.CheckedMimeTypes: if mimeType not in self.CheckedMimeTypes:
def run(self): global FileInfoQueue global ScanDone global ThumbnailQueue global SubDirQueue global StartTime global DirCount global FilesCount global TotalImages global KnownFilesCount knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log') self.knownFilesLog = open(knownFileLog, 'w') errorLogFile = os.path.join(Globals.CasePath, 'Errors.log') self.errorLog = open(errorLogFile, 'w') self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'), 'w') self.dbMAC = SqliteDatabase(Globals.MACFileName) if not self.dbMAC.OpenConnection(): return self.dbNSRL = None if os.path.exists(Constants.NSRLDBName): self.dbNSRL = SqliteDatabase(Constants.NSRLDBName) self.dbNSRL.OpenConnection() DBFunctions.CreateFileSystemTable(Globals.FileSystemName, self.EvidenceID, True) DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName, self.EvidenceID, True) DBFunctions.CreateMACTables(Globals.MACFileName, self.EvidenceID, drop=True) if not Globals.EvidencesDict.has_key(self.EvidenceID): Globals.EvidencesDict[self.EvidenceID] = {} self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - StartTime) evt = UpdateLabelEvent(elapsedTime = self.ElapsedTime, KnownFilesCount = KnownFilesCount, totalDir = DirCount, filesCount=FilesCount, scanStatus = "Scan in progress...") wx.PostEvent(self.win, evt) self.MimeTypeDict = {} Globals.TimelinesDict['Created'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1} Globals.TimelinesDict['Modified'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1} Globals.TimelinesDict['Accessed'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1} self.UnzipFileNameDict = {} for root, dirs, files in os.walk(self.rootDir): DirCount += len(dirs) #print 'first thread' #self.ImageCount = 0 self.SubDirList = dirs for afile in files: try: self.progressLog.write("%s\n"%(os.path.join(root, afile))) self.ScanFileInfo(root, afile) except Exception, value: #print 'Error: ', value self.errorLog.write('%s; ScanError: %s\n'%(os.path.join(root, afile), value)) self.errorLog.flush() SubDirQueue.put((root, cPickle.dumps(self.SubDirList)))
class KeywordsScanThread: def __init__(self, win, startTime): self.win = win self.StartTime = startTime #self.dirName = dirName #self.CurrentFileName = "" #self.CurrentDirectory = "" self.DirCount = 0 self.FilesCount = 0 self.ElapsedTime = "" self.SearchStatus = "Search in Progress..." self.KeyColumnNames = "" """ self.timerStatus = wx.Timer(id=wx.NewId(), owner=self) self.Bind(wx.EVT_TIMER, self.OnTimerStatusTimer, id=self.timerStatus.GetId()) """ self.EventStart = time.time() #Globals.frmGlobalMainForm.treeKeywords.GetKeywordsSearchDirList() DBFunctions.CreateKeywordsFrequencyTable(Globals.KeywordsFileName, True) self.InitializeKeyWordsFrequencyDictionary() def Start(self): #self.timerStatus.Start(10000) self.keepGoing = self.running = True thread.start_new_thread(self.Run, ()) #self.Run() def Stop(self): self.keepGoing = False #self.db.CloseConnection() def IsRunning(self): return self.running def Run(self): db = SqliteDatabase(Globals.KeywordsFileName) if not db.OpenConnection(): return #print Globals.KeywordsSearchDirList for dir in Globals.KeywordsSearchDirList: #print dir if dir.find("*.*") >= 0: continue if not os.path.isdir(dir): continue try: #print dir files = os.listdir(dir) self.DirCount += 1 for file in files: if not self.keepGoing: return filePath = dir + PlatformMethods.GetDirSeparator() + file #print filePath if os.path.isfile(filePath): try: if filePath.rfind('.') >= 0: extension = filePath[filePath.rfind('.'):] #print 'extension = ', extension fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension( extension) if fileType: mimeType = fileType.GetMimeType( ) or "Unknown" #print "mimeType ", mimeType #try: #print Globals.KeywordsSearchCategoryList if mimeType in Globals.KeywordsSearchCategoryList: #print 'ReadFile being called' self.ReadFile(filePath, db) self.FilesCount += 1 #self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) """ except Exception, value: print "Failed to read file: %s Error: %s"%(filePath, value) continue """ #else: # time.sleep(1) #else: # time.sleep(1) else: self.ReadFile(filePath, db) self.FilesCount += 1 if (time.time() - self.EventStart) > 10: #print time.time() - self.EventStart self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.SendEvent() except Exception, value: print "Failed to read file: %s Error: %s" % ( filePath, value) except Exception, value: print "Failed to read directory: %s Error: %s" % (dir, value) continue db.CloseConnection() finishTime = time.time() self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( finishTime - self.StartTime) self.SearchStatus = "Done Searching!" self.SendEvent()
def Run(self): """ self.db = SqliteDatabase(Globals.MACFileName) if not self.db.OpenConnection(): return query = "delete from " + Constants.FileInfoTable + ";" self.db.ExecuteNonQuery(query) """ m = hashlib.md5() sha1 = hashlib.sha1() size = 0 #try: rfin = Win32RawIO.Win32RAWIO(self.rootDrive, 'r') driveSize = 0 freeSpace = 0 if self.SourceType == Constants.LogicalDrive: rootPath = self.rootDrive[len(self.rootDrive) - 2:] spc, bps, fc, c = Win32RawIO.GetDiskFreeSpace(rootPath) driveSize = c * spc * bps freeSpace = fc * spc * bps else: print "size %dB %.2fMB" % (rfin.size, rfin.size / 1024. / 1024) print "Cylinders = %s" % rfin.cylinders print "Mediatype = %s" % rfin.mediatype print "Tracks/Cylinder = %s" % rfin.trackspercylinder print "Sectors/Track = %s" % rfin.sectorspertrack print "Bytes/Sector = %s" % rfin.bytespersector startTime = time.time() print "Startime = %s" % time.asctime() #i = 0 self.EstimatedTime = "" fileObjects = [] for fileName in self.listImageNames: fileObjects.append(open(fileName, 'wb')) while self.keepGoing: self.readTime = time.time() data = rfin.read(1024 * 1024 * 16) if len(data) == 0: break for rfout in fileObjects: rfout.write(data) #if self.verifyImages: m.update(data) sha1.update(data) self.ElapsedTime = time.time() #print 'data len=%s'%len(data) #print 'time take=%s'%(self.ElapsedTime-self.readTime) rate = float(len(data)) / float(self.ElapsedTime - self.readTime) if size == 0: self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( float(driveSize) / rate) size += len(data) self.gaugeValue = (float(size) / float(driveSize)) * 100 self.rateInfo = "%.2fMB of %.2fMB at %.2fMB/sec" % ( size / 1024. / 1024, driveSize / 1024. / 1024, rate / 1024. / 1024) self.Status = "Creating Image... [%.2f" % self.gaugeValue self.Status += "%]" self.SendEvent() #i += 1 for rfout in fileObjects: rfout.close() self.ElapsedTime = time.time() #if self.verifyImages: print "MD5 Hash = %s" % m.hexdigest().upper() print "SHA1 Hash = %s" % sha1.hexdigest().upper() rfin.close() print "Endtime = %s" % time.asctime() print "Total Elapsed Time = %s" % CommonFunctions.ConvertSecondsToDayHourMinSec( self.ElapsedTime - startTime) #except: # print "Exception occured: %s"%sys.exc_info()[0] self.running = False evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime, gaugeValue=self.gaugeValue, rate=self.rateInfo, estimatedTime=self.EstimatedTime, scanStatus="Done Creating Image!") wx.PostEvent(self.win, evt)
class MACScanThread: def __init__(self, win, startTime, rootDir): self.win = win self.StartTime = startTime self.rootDir = rootDir #self.CurrentFileName = "" self.CurrentDirectory = "" self.DirCount = 0 self.FilesCount = 0 self.TotalFiles = 0 self.KnownFilesCount = 0 self.ElapsedTime = "" self.EvidenceID = "Evidence1" #self.MD5Hashes = {} self.TotalImages = 0 self.ImageList = [] self.UnzipRootFolder = "%s%s" % (Globals.CurrentEvidenceID, Constants.UnzipRootFolderName) self.dirListQuery = "INSERT INTO %s%s (DirPath, SubDirList) values (?,?)" % ( self.EvidenceID, Constants.DirListTable) #self.mimeQuery = "INSERT INTO " + self.EvidenceID + "Mime (MimeType, FileList) values (?,?)" self.query = """INSERT INTO %s(Name, DirPath, Extension, MimeType, Category, Description, Size, Created, CDate, CMonth, Modified, MDate, MMonth, Accessed, ADate, AMonth, MD5,KnownFile,NewPath) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""" % ( self.EvidenceID) self.imageQuery = """INSERT INTO %s (DirPath, Filename, Thumbnail ) VALUES (?,?,?)""" % ( self.EvidenceID) self.EventStart = time.time() def Start(self): #self.timerStatus.Start(10000) self.keepGoing = self.running = True thread.start_new_thread(self.Run, ()) #self.Run() def Stop(self): self.keepGoing = False #self.dbFileSystem.CloseConnection() def IsRunning(self): return self.running def GetDirCount(self): return self.DirCount def GetFilesCount(self): return self.FilesCount def GetKnownFilesCount(self): return self.KnownFilesCount def CheckDirPathExists(self, DirList, dirPath): for row in DirList: if PlatformMethods.Encode(dirPath) == row[0]: return True return False def Run(self): #print 'run start' #knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log') #self.knownFilesLog = open(knownFileLog, 'a') #self.knownFilesLog.write('Started at: %s\n'%(time.ctime())) errorLogFile = os.path.join(Globals.CasePath, 'Errors.log') self.errorLog = open(errorLogFile, 'ab') self.errorLog.write('Resume Started at: %s\n' % (time.ctime())) self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'), 'ab') self.progressLog.write('Resume Started at: %s\n' % (time.ctime())) self.dbFileSystem = SqliteDatabase(Globals.FileSystemName) if not self.dbFileSystem.OpenConnection(): return self.dbImage = SqliteDatabase(Globals.ImagesFileName) if not self.dbImage.OpenConnection(): return self.dbMAC = SqliteDatabase(Globals.MACFileName) if not self.dbMAC.OpenConnection(): return self.dbNSRL = None if os.path.exists(Constants.NSRLDBName): self.dbNSRL = SqliteDatabase(Constants.NSRLDBName) self.dbNSRL.OpenConnection() #query = "delete from " + Constants.FileInfoTable + ";" #self.dbFileSystem.ExecuteNonQuery(query) DBFunctions.CreateFileSystemTable(Globals.FileSystemName, self.EvidenceID, False) DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName, self.EvidenceID, False) DBFunctions.CreateMACTables(Globals.MACFileName, self.EvidenceID, drop=False) if not Globals.EvidencesDict.has_key(self.EvidenceID): Globals.EvidencesDict[self.EvidenceID] = {} """ query = "select count(distinct(DirPath)) from %s%s;"%(Globals.CurrentEvidenceID, Constants.DirListTable) row = self.dbFileSystem.FetchOneRow() if row: self.DirCount = len(row[0]) query = "select count(*) from self.FilesCount = 0 self.TotalFiles = 0 self.KnownFilesCount = 0 self.ElapsedTime = "" self.EvidenceID = "Evidence1" #self.MD5Hashes = {} self.TotalImages = 0 """ self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime, KnownFilesCount=self.KnownFilesCount, totalDir=self.DirCount, filesCount=self.FilesCount, scanStatus="Scan in progress...") wx.PostEvent(self.win, evt) query = """ select min(CDate), max(CDate), min(CMonth), max(CMonth),min(MDate), max(MDate), min(MMonth), max(MMonth), min(ADate), max(ADate), min(AMonth), max(AMonth) from Evidence1 where CDate <> 0 and CMonth <>0 and CMonth <>0 and MDate<>0 and MDate<>0 and MMonth<>0 and MMonth<>0 and CMonth<>0 and ADate<>0 and ADate<>0 and AMonth<>0 and AMonth<>0; """ row = self.dbFileSystem.FetchOneRow(query) Globals.TimelinesDict['Created'] = { 'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1 } Globals.TimelinesDict['Modified'] = { 'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1 } Globals.TimelinesDict['Accessed'] = { 'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1 } if row: Globals.TimelinesDict['Created'] = { 'MinDate': row[0], 'MaxDate': row[1], 'MinMonth': row[2], 'MaxMonth': row[3] } Globals.TimelinesDict['Modified'] = { 'MinDate': row[4], 'MaxDate': row[5], 'MinMonth': row[6], 'MaxMonth': row[7] } Globals.TimelinesDict['Accessed'] = { 'MinDate': row[8], 'MaxDate': row[9], 'MinMonth': row[10], 'MaxMonth': row[11] } self.FileList = [] #self.ThumbnailList = [] self.SubDirList = [] self.UnzipFileNameDict = {} extractRootPath = os.path.join( Globals.CasePath, '%s%s' % (Globals.CurrentEvidenceID, Constants.UnzipRootFolderName)) for dirName in os.listdir(extractRootPath): dirNameOnly = dirName[:dirName.rfind('-')] if self.UnzipFileNameDict.has_key(dirNameOnly): self.UnzipFileNameDict[dirNameOnly] += 1 else: self.UnzipFileNameDict[dirNameOnly] = 1 query = "select DirPath from %s%s" % (Globals.CurrentEvidenceID, Constants.DirListTable) self.DBDirList = self.dbFileSystem.FetchAllRows(query) Globals.MimeTypeSet = set([]) query = "select distinct(MimeType) from %s" % ( Globals.CurrentEvidenceID) rows = self.dbFileSystem.FetchAllRows(query) for row in rows: Globals.MimeTypeSet.add(row[0]) for root, dirs, files in os.walk(self.rootDir): if self.CheckDirPathExists(self.DBDirList, root): continue #print query query = "delete from %s where DirPath = %s;" % ( Globals.CurrentEvidenceID, self.dbFileSystem.SqlSQuote(root)) #print query self.dbFileSystem.ExecuteNonQuery(query) self.DirCount += len(dirs) #self.ImageCount = 0 self.SubDirList = dirs for afile in files: try: self.progressLog.write( "%s\n" % (PlatformMethods.Encode(os.path.join(root, afile)))) self.ScanFileInfo(root, afile) """ if len(self.ThumbnailList) >= Constants.MaxThumbnailsToHold: self.dbImage.ExecuteMany(self.imageQuery, self.ThumbnailList) self.ThumbnailList = [] """ if len(self.FileList) >= Constants.MaxFileInfoToHold: self.dbFileSystem.ExecuteMany(self.query, self.FileList) self.FileList = None self.FileList = [] except Exception, value: #print 'Error: ', value self.errorLog.write( '%s; ScanError: %s\n' % (PlatformMethods.Encode(os.path.join( root, afile)), PlatformMethods.Encode(value))) self.errorLog.flush() self.dbFileSystem.ExecuteMany( self.dirListQuery, [(root, cPickle.dumps(self.SubDirList))]) """ self.dbImage.ExecuteMany(self.imageQuery, self.ThumbnailList) self.ThumbnailList = [] """ self.dbFileSystem.ExecuteMany(self.query, self.FileList) self.FileList = None #self.FileList = [] query = "INSERT INTO %s%s (CMinDate,CMaxDate,CMinMonth,CMaxMonth, MMinDate,MMaxDate,MMinMonth,MMaxMonth,AMinDate,AMaxDate,AMinMonth,AMaxMonth) values (?,?,?,?,?,?,?,?,?,?,?,?)" % ( self.EvidenceID, Constants.MACRangeTable) self.dbMAC.ExecuteMany( query, [(Globals.TimelinesDict['Created']['MinDate'], Globals.TimelinesDict['Created']['MaxDate'], Globals.TimelinesDict['Created']['MinMonth'], Globals.TimelinesDict['Created']['MaxMonth'], Globals.TimelinesDict['Modified']['MinDate'], Globals.TimelinesDict['Modified']['MaxDate'], Globals.TimelinesDict['Modified']['MinMonth'], Globals.TimelinesDict['Modified']['MaxMonth'], Globals.TimelinesDict['Accessed']['MinDate'], Globals.TimelinesDict['Accessed']['MaxDate'], Globals.TimelinesDict['Accessed']['MinMonth'], Globals.TimelinesDict['Accessed']['MaxMonth'])]) self.dbFileSystem.CloseConnection() self.dbImage.CloseConnection() self.dbMAC.CloseConnection() self.UpdateEvidence(self.rootDir) if self.dbNSRL: self.dbNSRL.CloseConnection() self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime, KnownFilesCount=self.KnownFilesCount, totalDir=self.DirCount, filesCount=self.FilesCount, scanStatus="Done Scanning!") wx.PostEvent(self.win, evt) self.running = False #self.knownFilesLog.close() self.progressLog.close() self.errorLog.close()
def Run(self): #print 'run start' #knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log') #self.knownFilesLog = open(knownFileLog, 'a') #self.knownFilesLog.write('Started at: %s\n'%(time.ctime())) errorLogFile = os.path.join(Globals.CasePath, 'Errors.log') self.errorLog = open(errorLogFile, 'ab') self.errorLog.write('Resume Started at: %s\n' % (time.ctime())) self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'), 'ab') self.progressLog.write('Resume Started at: %s\n' % (time.ctime())) self.dbFileSystem = SqliteDatabase(Globals.FileSystemName) if not self.dbFileSystem.OpenConnection(): return self.dbImage = SqliteDatabase(Globals.ImagesFileName) if not self.dbImage.OpenConnection(): return self.dbMAC = SqliteDatabase(Globals.MACFileName) if not self.dbMAC.OpenConnection(): return self.dbNSRL = None if os.path.exists(Constants.NSRLDBName): self.dbNSRL = SqliteDatabase(Constants.NSRLDBName) self.dbNSRL.OpenConnection() #query = "delete from " + Constants.FileInfoTable + ";" #self.dbFileSystem.ExecuteNonQuery(query) DBFunctions.CreateFileSystemTable(Globals.FileSystemName, self.EvidenceID, False) DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName, self.EvidenceID, False) DBFunctions.CreateMACTables(Globals.MACFileName, self.EvidenceID, drop=False) if not Globals.EvidencesDict.has_key(self.EvidenceID): Globals.EvidencesDict[self.EvidenceID] = {} """ query = "select count(distinct(DirPath)) from %s%s;"%(Globals.CurrentEvidenceID, Constants.DirListTable) row = self.dbFileSystem.FetchOneRow() if row: self.DirCount = len(row[0]) query = "select count(*) from self.FilesCount = 0 self.TotalFiles = 0 self.KnownFilesCount = 0 self.ElapsedTime = "" self.EvidenceID = "Evidence1" #self.MD5Hashes = {} self.TotalImages = 0 """ self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime, KnownFilesCount=self.KnownFilesCount, totalDir=self.DirCount, filesCount=self.FilesCount, scanStatus="Scan in progress...") wx.PostEvent(self.win, evt) query = """ select min(CDate), max(CDate), min(CMonth), max(CMonth),min(MDate), max(MDate), min(MMonth), max(MMonth), min(ADate), max(ADate), min(AMonth), max(AMonth) from Evidence1 where CDate <> 0 and CMonth <>0 and CMonth <>0 and MDate<>0 and MDate<>0 and MMonth<>0 and MMonth<>0 and CMonth<>0 and ADate<>0 and ADate<>0 and AMonth<>0 and AMonth<>0; """ row = self.dbFileSystem.FetchOneRow(query) Globals.TimelinesDict['Created'] = { 'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1 } Globals.TimelinesDict['Modified'] = { 'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1 } Globals.TimelinesDict['Accessed'] = { 'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1 } if row: Globals.TimelinesDict['Created'] = { 'MinDate': row[0], 'MaxDate': row[1], 'MinMonth': row[2], 'MaxMonth': row[3] } Globals.TimelinesDict['Modified'] = { 'MinDate': row[4], 'MaxDate': row[5], 'MinMonth': row[6], 'MaxMonth': row[7] } Globals.TimelinesDict['Accessed'] = { 'MinDate': row[8], 'MaxDate': row[9], 'MinMonth': row[10], 'MaxMonth': row[11] } self.FileList = [] #self.ThumbnailList = [] self.SubDirList = [] self.UnzipFileNameDict = {} extractRootPath = os.path.join( Globals.CasePath, '%s%s' % (Globals.CurrentEvidenceID, Constants.UnzipRootFolderName)) for dirName in os.listdir(extractRootPath): dirNameOnly = dirName[:dirName.rfind('-')] if self.UnzipFileNameDict.has_key(dirNameOnly): self.UnzipFileNameDict[dirNameOnly] += 1 else: self.UnzipFileNameDict[dirNameOnly] = 1 query = "select DirPath from %s%s" % (Globals.CurrentEvidenceID, Constants.DirListTable) self.DBDirList = self.dbFileSystem.FetchAllRows(query) Globals.MimeTypeSet = set([]) query = "select distinct(MimeType) from %s" % ( Globals.CurrentEvidenceID) rows = self.dbFileSystem.FetchAllRows(query) for row in rows: Globals.MimeTypeSet.add(row[0]) for root, dirs, files in os.walk(self.rootDir): if self.CheckDirPathExists(self.DBDirList, root): continue #print query query = "delete from %s where DirPath = %s;" % ( Globals.CurrentEvidenceID, self.dbFileSystem.SqlSQuote(root)) #print query self.dbFileSystem.ExecuteNonQuery(query) self.DirCount += len(dirs) #self.ImageCount = 0 self.SubDirList = dirs for afile in files: try: self.progressLog.write( "%s\n" % (PlatformMethods.Encode(os.path.join(root, afile)))) self.ScanFileInfo(root, afile) """ if len(self.ThumbnailList) >= Constants.MaxThumbnailsToHold: self.dbImage.ExecuteMany(self.imageQuery, self.ThumbnailList) self.ThumbnailList = [] """ if len(self.FileList) >= Constants.MaxFileInfoToHold: self.dbFileSystem.ExecuteMany(self.query, self.FileList) self.FileList = None self.FileList = [] except Exception, value: #print 'Error: ', value self.errorLog.write( '%s; ScanError: %s\n' % (PlatformMethods.Encode(os.path.join( root, afile)), PlatformMethods.Encode(value))) self.errorLog.flush() self.dbFileSystem.ExecuteMany( self.dirListQuery, [(root, cPickle.dumps(self.SubDirList))])
class FileScanThread: def __init__(self, win, startTime): import HTMLParser self.win = win self.StartTime = startTime self.DocID = 0 self.WordID = 0 self.StemmedWordID = 0 self.DirCount = 0 self.FilesCount = 0 self.WordCount = 0 self.StemmedWordCount = 0 self.ElapsedTime = "" self.ParseStatus = "Indexing in Progress..." self.KeyColumnNames = "" self.UseStemmer = False self.Stemmer = None #self.SetupTextCatDB() DBFunctions.SetupTextCatTables(Globals.TextCatFileName) """ self.timerStatus = wx.Timer(id=wx.NewId(), owner=self) self.Bind(wx.EVT_TIMER, self.OnTimerStatusTimer, id=self.timerStatus.GetId()) """ self.EventStart = time.time() self.splitter = re.compile(r'\W*') #self.DigitWord = re.compile(r'[a-z]*\d+[a-z]*', re.I) if Globals.Stemmer == "Porter Stemmer": self.Stemmer = PorterStemmer() #self.UseStemmer = True self.htmlParser = HTMLParser.HTMLParser(self.Stemmer) self.textParser = TextParser.TextParser(self.Stemmer) """ self.timerStatus = wx.Timer(id=wx.NewId(), owner=self) self.Bind(wx.EVT_TIMER, self.OnTimerStatusTimer, id=timerStatus.GetId()) #Globals.frmGlobalMainForm.treeKeywords.GetTextCatDirList() #DBFunctions.SetupKeywordsFrequencyTable(Globals.TextCatFileName) #self.InitializeKeyWordsFrequencyDictionary() """ def Start(self): #self.timerStatus.Start(1000000) self.keepGoing = self.running = True thread.start_new_thread(self.Run, ()) def Stop(self): self.keepGoing = False #self.db.CloseConnection() """ def OnTimerStatusTimer(self, event): self.SendEvent() event.Skip() """ def IsRunning(self): return self.running def Run(self): db = SqliteDatabase(Globals.TextCatFileName) if not db.OpenConnection(): return #print Globals.TextCatDirList for dir in Globals.TextCatDirList: #for dirName in Globals.EvidencesDict['Evidence1']['Dir #print dir if not os.path.isdir(dir): continue if not self.keepGoing: self.running = False return self.DirCount += 1 #TotalDir += 1 files = os.listdir(dir) for file in files: if not self.keepGoing: self.running = False return filePath = os.path.join(dir, file) if not os.path.isfile(filePath): continue if (filePath.rfind('.') == -1): continue try: #if not (filePath.rfind('.') == -1): extension = filePath[filePath.rfind('.'):] fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(extension) if fileType: mimeType = fileType.GetMimeType() or "Unknown" if mimeType in Globals.TextCatCategoryList: #self.ReadFile(filePath) if mimeType == "text/plain": #print 'plain text' self.textParser.parse(filePath, self.WordID, self.StemmedWordID) self.WordID = self.textParser.GetWordID() self.StemmedWordID = self.textParser.GetStemmedWordID() self.WordCount = self.textParser.GetWordCount() self.StemmedWordCount = self.textParser.GetStemmedWordCount() else: # mimeType == "text/html": fin = open(filePath, "r") data = fin.read() self.htmlParser.ResetCounters() #while data: self.htmlParser.parse(data, self.WordID, self.StemmedWordID) """ for line in data: if self.UseStemmer: self.PreprocessDataUsingStemmer(line) else: self.PreprocessDataWithoutStemmer(line) """ #data = fin.read() #data = fin.readlines() self.WordID = self.textParser.GetWordID() self.StemmedWordID = self.textParser.GetStemmedWordID() self.WordCount = self.htmlParser.GetWordCount() self.StemmedWordCount = self.htmlParser.GetStemmedWordCount() fin.close() self.FilesCount += 1 #TotalFiles += 1 self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.DocID += 1 self.UpdateDocumentDatabase(db, dir, file) self.InitializeDocsInfo() if (time.time() - self.EventStart) > 10: #print time.time() - self.EventStart self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.SendEvent() except Exception, value: #print "Failed to read file: %s Error: %s"%(filePath, value) try: print filePath, value except: continue self.WriteTermsInDatabase(db) #self.UpdateWordCount(db) #self.UpdateTF(db) self.DumpBitMapInDatabase(db) #self.UpdateIDF(db) db.CloseConnection() #self.ParseStatus = "Done Updating Inverse Document Frequency!" self.SendEvent() #db = SqliteDatabase(Globals.CurrentProjectFile) #self.tokenizer.close() finishTime = time.time() self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(finishTime - self.StartTime) self.ParseStatus = "Done Preprocessing/Indexing!" self.SendEvent() self.running = False
def Run(self): db = SqliteDatabase(Globals.TextCatFileName) if not db.OpenConnection(): return #print Globals.TextCatDirList for dir in Globals.TextCatDirList: #for dirName in Globals.EvidencesDict['Evidence1']['Dir #print dir if not os.path.isdir(dir): continue if not self.keepGoing: self.running = False return self.DirCount += 1 #TotalDir += 1 files = os.listdir(dir) for file in files: if not self.keepGoing: self.running = False return filePath = os.path.join(dir, file) if not os.path.isfile(filePath): continue if (filePath.rfind('.') == -1): continue try: #if not (filePath.rfind('.') == -1): extension = filePath[filePath.rfind('.'):] fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(extension) if fileType: mimeType = fileType.GetMimeType() or "Unknown" if mimeType in Globals.TextCatCategoryList: #self.ReadFile(filePath) if mimeType == "text/plain": #print 'plain text' self.textParser.parse(filePath, self.WordID, self.StemmedWordID) self.WordID = self.textParser.GetWordID() self.StemmedWordID = self.textParser.GetStemmedWordID() self.WordCount = self.textParser.GetWordCount() self.StemmedWordCount = self.textParser.GetStemmedWordCount() else: # mimeType == "text/html": fin = open(filePath, "r") data = fin.read() self.htmlParser.ResetCounters() #while data: self.htmlParser.parse(data, self.WordID, self.StemmedWordID) """ for line in data: if self.UseStemmer: self.PreprocessDataUsingStemmer(line) else: self.PreprocessDataWithoutStemmer(line) """ #data = fin.read() #data = fin.readlines() self.WordID = self.textParser.GetWordID() self.StemmedWordID = self.textParser.GetStemmedWordID() self.WordCount = self.htmlParser.GetWordCount() self.StemmedWordCount = self.htmlParser.GetStemmedWordCount() fin.close() self.FilesCount += 1 #TotalFiles += 1 self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.DocID += 1 self.UpdateDocumentDatabase(db, dir, file) self.InitializeDocsInfo() if (time.time() - self.EventStart) > 10: #print time.time() - self.EventStart self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.SendEvent() except Exception, value: #print "Failed to read file: %s Error: %s"%(filePath, value) try: print filePath, value except: continue
def ImportNSRLHashAndProduct(self): #q = Queue.Queue() db = SqliteDatabase(Constants.NSRLDBName) if not db.OpenConnection(): return Hashes = set() #txtFile = False #query = "INSERT INTO " + Constants.NSRLFileTable + " (MD5, FileName, ProductCode, OSCode) values (?,?,?,?)" try: fin = open(os.path.join(self.dirPath, "NSRLFile.txt")) #fin = gzip.open(self.dirPath + PlatformMethods.GetDirSeparator() + "NSRLFile.txt.gz") except IOError: #fin = open(self.dirPath + PlatformMethods.GetDirSeparator() + "NSRLFile.txt") #txtFile = True print 'Error Opening file: ', os.path.join(self.dirPath, "NSRLFile.txt") return #pass # Get the file size:y try: fin.seek(0, 2) size = fin.tell() fin.seek(0) except TypeError: size = None #finCSV = csv.reader(fin) #print "Startime = %s"%time.asctime() #i = 0 self.EstimatedTime = "" #while self.keepGoing: count = 0 self.startTime = time.time() #manyValues = [] #totalRows = len(finCSV) skip = True """ t = Thread(target=worker, args=(q,)) t.setDaemon(True) t.start() """ while fin: rows = fin.readlines(10000) if not rows: break #for row in finCSV: for row in rows: if not self.keepGoing: break if skip: skip = False continue md5 = row.split(',')[1].replace('"', '') table = CommonFunctions.GetMD5HashBucketID(md5) #print col #return #q.put([table, md5]) #md5 = row[1] if table not in Hashes: Hashes.add(table) db.ExecuteNonQuery( "CREATE TABLE IF NOT EXISTS %s (MD5 varchar(32) primary key);" % table) #db.ExecuteNonQuery("CREATE INDEX Ind%s ON table (MD5);"%table) try: db.ExecuteNonQuery("insert into %s (MD5) values ('%s');" % (table, md5)) except Exception, value: #print 'Error :: ', value pass if size and not count % 10000: done = fin.tell() self.gaugeValue = float(done * 100) / float(size) #print "Progress %02u%% Done - %uk rows\r" % (done*100/size,count/1000) self.ElapsedTime = time.time() #self.gaugeValue = (float(size)/float(driveSize))*100 timeTaken = float(self.ElapsedTime - self.startTime) if timeTaken == 0: timeTaken = 1 rate = float(done) / timeTaken self.rateInfo = "%.2fMB of %.2fMB at %.2fMB/sec" % ( done / 1024. / 1024, size / 1024. / 1024, rate / 1024. / 1024) self.Status = "Updating NSRL Software Hashes... [%.2f" % self.gaugeValue self.Status += "%]" #self.startTime = time.time() self.SendEvent() if count == 10000: self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( float(size) / rate) count += 1
class FileScanThread: def __init__(self, win, startTime, rootPath): import HTMLParser self.win = win self.StartTime = startTime self.rootPath = rootPath self.DocID = 0 self.WordID = 0 self.StemmedWordID = 0 self.DirCount = 0 self.FilesCount = 0 self.WordCount = 0 self.StemmedWordCount = 0 self.ElapsedTime = "" self.ParseStatus = "Indexing in Progress..." self.KeyColumnNames = "" self.UseStemmer = False self.Stemmer = None #self.SetupTextCatDB() #DBFunctions.SetupTextCatTables(Globals.TextCatFileName) DBFunctions.SetupSqliteIndexTables(Globals.TextCatFileName) self.EventStart = time.time() if Globals.Stemmer == "Porter Stemmer": self.Stemmer = PorterStemmer() def Start(self): #self.timerStatus.Start(1000000) self.keepGoing = self.running = True thread.start_new_thread(self.Run, ()) #self.Run() def Stop(self): self.keepGoing = False #db.CloseConnection() """ def OnTimerStatusTimer(self, event): self.SendEvent() event.Skip() """ def IsRunning(self): return self.running def Run(self): #print Globals.TextCatCategoryList db = SqliteDatabase(Globals.TextCatFileName) if not db.OpenConnection(): return #self.htmlParser = HTMLParser.HTMLParser(self.Stemmer) textParser = TextParser.TextParser(db, Globals.Stopwords, self.Stemmer) #print Globals.TextCatDirList for dirPath, dirs, files in os.walk(self.rootPath): self.DirCount += 1 for afile in files: self.FilesCount += 1 if not self.keepGoing: self.running = False return filePath = os.path.join(dirPath, afile) try: #print filePath parsed = False dotIndex = filePath.rfind('.') if dotIndex >= 0: extension = filePath[dotIndex:] fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension( extension) if fileType: mimeType = fileType.GetMimeType() or "unknown" if Globals.TextCatCategoryList: if mimeType not in Globals.TextCatCategoryList: self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) if (time.time() - self.EventStart) > 10: #print time.time() - self.EventStart self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.SendEvent() continue #print filePath query = "insert into %s (DocPath) values (?)" % ( Constants.TextCatDocumentsTable) DocID = db.InsertAutoRow(query, [(filePath, )]) if mimeType == 'application/msword': try: textParser.parse( DocID, MSOfficeToText.WordToText(filePath), filePath) except: textParser.parse( DocID, HTMLParser.getText(filePath), filePath) elif mimeType == 'application/pdf': try: textParser.parse( DocID, PDFToText.GetText(filePath), filePath) except: textParser.parse( DocID, HTMLParser.getText(filePath), filePath) else: textParser.parse(DocID, HTMLParser.getText(filePath), filePath) parsed = True if not parsed: textParser.parse(DocID, HTMLParser.getText(filePath), filePath) self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) if (time.time() - self.EventStart) > 10: #print time.time() - self.EventStart self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.SendEvent() except Exception, value: #print "Failed to read file: %s Error: %s"%(filePath, value) try: print "Error in Text Preprocessing: ", filePath, value except: print "Error in Text Preprocessing..." continue db.CloseConnection() self.SendEvent() finishTime = time.time() self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( finishTime - self.StartTime) self.ParseStatus = "Done Preprocessing/Indexing!" self.SendEvent() self.running = False
#excel.Quit() if __name__ == "__main__": import os.path import time import re stTime = time.time() splitter = re.compile(r'\W*') docFileName = r'c:\test.doc' data = WordToText(docFileName) for word in splitter.split(data): try: print word except: print 'error' endTime = time.time() print endTime - stTime CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - stTime) #print os.path.exists(r'Data\NSRL.db') #docFileName = r"C:\NMT\Research\ForensicsTool\EmailTest1\Attachments\2006-12-01 14.30.25 - Director's Secretary - Lawson Consultant Team - Lawson Consultant Team-cell and email.doc" #import os.path #print os.path.isfile(docFileName) #excelFile = r'C:\Documents and Settings\Ram\Desktop\Test\TomSavageKeywords.xls' #ExcelToText(excelFile) #pptFile = r'C:\Documents and Settings\Ram\Desktop\Test\BasnetCACTUSTextCat.ppt' #PowerpointToText(pptFile)
def Run(self): #print Globals.TextCatCategoryList db = SqliteDatabase(Globals.TextCatFileName) if not db.OpenConnection(): return #self.htmlParser = HTMLParser.HTMLParser(self.Stemmer) textParser = TextParser.TextParser(db, Globals.Stopwords, self.Stemmer) #print Globals.TextCatDirList for dirPath, dirs, files in os.walk(self.rootPath): self.DirCount += 1 for afile in files: self.FilesCount += 1 if not self.keepGoing: self.running = False return filePath = os.path.join(dirPath, afile) try: #print filePath parsed = False dotIndex = filePath.rfind('.') if dotIndex >= 0: extension = filePath[dotIndex:] fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension( extension) if fileType: mimeType = fileType.GetMimeType() or "unknown" if Globals.TextCatCategoryList: if mimeType not in Globals.TextCatCategoryList: self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) if (time.time() - self.EventStart) > 10: #print time.time() - self.EventStart self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.SendEvent() continue #print filePath query = "insert into %s (DocPath) values (?)" % ( Constants.TextCatDocumentsTable) DocID = db.InsertAutoRow(query, [(filePath, )]) if mimeType == 'application/msword': try: textParser.parse( DocID, MSOfficeToText.WordToText(filePath), filePath) except: textParser.parse( DocID, HTMLParser.getText(filePath), filePath) elif mimeType == 'application/pdf': try: textParser.parse( DocID, PDFToText.GetText(filePath), filePath) except: textParser.parse( DocID, HTMLParser.getText(filePath), filePath) else: textParser.parse(DocID, HTMLParser.getText(filePath), filePath) parsed = True if not parsed: textParser.parse(DocID, HTMLParser.getText(filePath), filePath) self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) if (time.time() - self.EventStart) > 10: #print time.time() - self.EventStart self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.SendEvent() except Exception, value: #print "Failed to read file: %s Error: %s"%(filePath, value) try: print "Error in Text Preprocessing: ", filePath, value except: print "Error in Text Preprocessing..." continue
def __init__(self, prnt): # First, call the base class' __init__ method to create the frame wx.Frame.__init__(self, id=-1, name='', parent=prnt, pos=wx.Point(0, 0), size=wx.Size(600, 600), style=wx.DEFAULT_FRAME_STYLE, title="Dir tree view") self.panViewFolders = wx.Panel(id=-1, name='panViewFolders', parent=self, pos=wx.Point(8, 176), size=wx.Size(500, 500), style=wx.TAB_TRAVERSAL) self.panViewFolders.SetBackgroundColour(wx.Colour(225, 236, 255)) #self.panViewFolders.SetAutoLayout(True) self.treeViewFolders = wx.TreeCtrl(id=wx.NewId(), name='treeViewFolders', parent=self.panViewFolders, pos=wx.Point(0, 0), size=wx.Size(600, 600), style=wx.HSCROLL | wx.VSCROLL | wx.TR_HAS_BUTTONS) self.DirectoryDict = {} """ print "start walking at ", time.ctime() startTime = time.time() self.WalkDir() print "end walking at ", time.ctime() endTime = time.time() print "Time taken to walk ", CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - startTime) startTime = time.time() #save in db DBFunctions.SetupProjectEvidencesTable("caseNew.cfi", True) db = SqliteDatabase("caseNew.cfi") if db.OpenConnection(): query = "insert into " + Constants.ProjectEvidencesTable + " (ID, DisplayName, Location, DirTree) values (?, ?, ?, ?)" db.ExecuteMany(query, [(1, self.evidenceName, self.evidencePath, cPickle.dumps(self.DirectoryDict[self.evidenceName]))] ) db.CloseConnection() endTime = time.time() print "Time taken to add in db ", CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - startTime) """ print 'start time load ', time.ctime() startTime = time.time() db = SqliteDatabase("caseNew.cfi") if db.OpenConnection(): query = "select Location, DisplayName, DirTree from " + Constants.EvidencesTable; row = db.FetchOneRow(query) self.evidencePath = row[0] self.evidenceName = row[1] self.DirectoryDict = cPickle.loads(str(row[2])) print "end time ", time.ctime() endTime = time.time() print "Time taken to load db ", CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - startTime) print "start build tree time ", time.ctime() startTime = time.time() self.treeDir = DirectoryTreeView(self, self.treeViewFolders, self.DirectoryDict, self.evidenceName, self.evidencePath) print "end build tree time ", time.ctime() endTime = time.time() print "Time taken ", CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - startTime)
def Run(self): #print Globals.TextCatCategoryList db = SqliteDatabase(Globals.TextCatFileName) logFileName = PlatformMethods.Decode( os.path.join(Globals.CasePath, ( Globals.TextCatFileName[Globals.TextCatFileName.rfind(os.sep) + 1:] + '.log'))) self.fout = open(logFileName, 'ab') if not db.OpenConnection(): return #self.bloomFilter = self.CreateBloomFilter() self.bloomFilter = None #self.htmlParser = HTMLParser.HTMLParser(self.Stemmer) textParser = TextParser.TextParser(db, Globals.Stopwords, Stemmer=self.Stemmer, bloomFilter=self.bloomFilter) #self.WordDict = {} #print Globals.TextCatDirList docxParser = DocxParser.DocxParser(db, Globals.Stopwords, self.Stemmer) docParser = DocParser.DocParser(db, Globals.Stopwords, self.Stemmer) query = "insert into %s (DocPath) values (?)" % ( Constants.TextCatDocumentsTable) self.filePath = "" for dirPath, dirs, files in os.walk(self.rootPath): self.DirCount += 1 for afile in files: self.FileScanStartTime = time.time() self.FilesCount += 1 """ if (self.FilesCount % Globals.TotalFilesToHold) == 0 and self.WordDict: self.ParseStatus = "Writing to database..." self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.SendEvent() self.HandleWords(self.WordDict) self.ParseStatus = "Indexing in Progress..." self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) self.SendEvent() self.WordDict = {} """ if not self.keepGoing: self.running = False return self.filePath = os.path.join(dirPath, afile) try: #print filePath parsed = False dotIndex = self.filePath.rfind('.') extension = "" if dotIndex >= 0: extension = self.filePath[dotIndex:] fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension( extension) if fileType: mimeType = fileType.GetMimeType() or "unknown" if Globals.TextCatCategoryList: if mimeType not in Globals.TextCatCategoryList: self.FileScanStartTime = time.time() #self.fout.write('%s :'%(self.filePath)) #query = "insert into %s (DocPath) values (?)"%(Constants.TextCatDocumentsTable) #default list of all the mime types doesn't seem to produce the mime type for # MS docx document if mimeType == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or extension == '.docx': try: DocID = db.InsertAutoRow( query, [(PlatformMethods.Encode( self.filePath), )]) #docID, filePath, startTime, logFile, extractMedia = False, MediaPath="" docxParser.Parse( DocID, self.filePath, self.FileScanStartTime, self.fout, extractMedia=False, MediaPath="") parsed = True except Exception, value: self.fout.write( "Error in docxParser : %s Value: %s\n" % (self.filePath, value)) #gives junk so let's not parse it using binary #parsed = True #docxParser.Parse(DocID, self.filePath, extractMedia = False, MediaPath="") curTime = time.time() self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( curTime - self.StartTime) #self.fout.write('%s\n'%(CommonFunctions.ConvertSecondsToDayHourMinSec(curTime - self.FileScanStartTime))) #self.fout.flush() if (curTime - self.EventStart) > 10: #print time.time() - self.EventStart self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( curTime - self.StartTime) self.SendEvent() continue else: pass #print filePath self.FileScanStartTime = time.time() #self.fout.write('%s :'%(self.filePath)) DocID = db.InsertAutoRow( query, [(PlatformMethods.Encode(self.filePath), )]) #print 'mimeType ', mimeType #print 'extension ', extension if mimeType == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or extension == '.docx': try: #docID, filePath, startTime, logFile, extractMedia = False, MediaPath="" docxParser.Parse(DocID, self.filePath, self.FileScanStartTime, self.fout, extractMedia=False, MediaPath="") parsed = True except Exception, value: #gives junk so let's not parse it using binary parsed = True self.fout.write( "Error in docxParser : %s Value: %s\n" % (PlatformMethods.Encode( self.filePath), value)) elif mimeType == 'application/msword': """ try: textParser.parse(DocID, MSOfficeToText.WordToText(self.filePath), self.filePath, self.FileScanStartTime, self.fout) parsed = True except Exception, value: self.fout.write("Error in MSOfficeToText.WordToText : %s Value: %s\n"%(self.filePath, value)) """ try: #docID, filePath, startTime, logFile, extractMedia = False, MediaPath="" docParser.Parse(DocID, self.filePath, self.FileScanStartTime, self.fout, extractMedia=False, MediaPath="") parsed = True except Exception, value: #gives junk so let's not parse it using binary self.fout.write( "Error in DocParser : %s Value: %s\n" % (PlatformMethods.Encode( self.filePath), value)) elif mimeType == 'application/pdf': try: textParser.parse( DocID, PDFToText.GetText(self.filePath), self.filePath, self.FileScanStartTime, self.fout) parsed = True except Exception, value: self.fout.write( "Error in PDFToText: %s Value: %s\n" % (PlatformMethods.Encode( self.filePath), value)) elif mimeType == 'text/plain': try: fin = open(self.filePath, 'rb') #data = fin.read(4096) #while data: textParser.parse(DocID, fin.read(), self.filePath, self.FileScanStartTime, self.fout) parsed = True fin.close() except Exception, value: self.fout.write( "Error in text/plain : %s Value: %s\n" % (PlatformMethods.Encode( self.filePath), value))
def Run(self): db = SqliteDatabase(Globals.KeywordsFileName) if not db.OpenConnection(): return #print Globals.KeywordsSearchDirList for dir in Globals.KeywordsSearchDirList: #print dir if dir.find("*.*") >= 0: continue if not os.path.isdir(dir): continue try: #print dir files = os.listdir(dir) self.DirCount += 1 for file in files: if not self.keepGoing: return filePath = dir + PlatformMethods.GetDirSeparator() + file #print filePath if os.path.isfile(filePath): try: if filePath.rfind('.') >= 0: extension = filePath[filePath.rfind('.'):] #print 'extension = ', extension fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension( extension) if fileType: mimeType = fileType.GetMimeType( ) or "Unknown" #print "mimeType ", mimeType #try: #print Globals.KeywordsSearchCategoryList if mimeType in Globals.KeywordsSearchCategoryList: #print 'ReadFile being called' self.ReadFile(filePath, db) self.FilesCount += 1 #self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime) """ except Exception, value: print "Failed to read file: %s Error: %s"%(filePath, value) continue """ #else: # time.sleep(1) #else: # time.sleep(1) else: self.ReadFile(filePath, db) self.FilesCount += 1 if (time.time() - self.EventStart) > 10: #print time.time() - self.EventStart self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) self.SendEvent() except Exception, value: print "Failed to read file: %s Error: %s" % ( filePath, value) except Exception, value: print "Failed to read directory: %s Error: %s" % (dir, value) continue
class MACScanThread(threading.Thread): def __init__(self, win, rootDir): threading.Thread.__init__(self) self.win = win #self.StartTime = startTime self.rootDir = rootDir #self.CurrentFileName = "" self.CurrentDirectory = "" #self.DirCount = 0 #self.FilesCount = 0 #self.TotalFiles = 0 #self.KnownFilesCount = 0 #self.ElapsedTime = "" self.EvidenceID = "Evidence1" self.EventStart = time.time() self.keepGoing = self.running = True def Start(self): self.keepGoing = self.running = True thread.start_new_thread(self.Run, ()) #self.Run() def Stop(self): self.keepGoing = False #self.dbFileSystem.CloseConnection() def IsRunning(self): return self.running def run(self): global FileInfoQueue global ScanDone global ThumbnailQueue global SubDirQueue global StartTime global DirCount global FilesCount global TotalImages global KnownFilesCount knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log') self.knownFilesLog = open(knownFileLog, 'w') errorLogFile = os.path.join(Globals.CasePath, 'Errors.log') self.errorLog = open(errorLogFile, 'w') self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'), 'w') self.dbMAC = SqliteDatabase(Globals.MACFileName) if not self.dbMAC.OpenConnection(): return self.dbNSRL = None if os.path.exists(Constants.NSRLDBName): self.dbNSRL = SqliteDatabase(Constants.NSRLDBName) self.dbNSRL.OpenConnection() DBFunctions.CreateFileSystemTable(Globals.FileSystemName, self.EvidenceID, True) DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName, self.EvidenceID, True) DBFunctions.CreateMACTables(Globals.MACFileName, self.EvidenceID, drop=True) if not Globals.EvidencesDict.has_key(self.EvidenceID): Globals.EvidencesDict[self.EvidenceID] = {} self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - StartTime) evt = UpdateLabelEvent(elapsedTime = self.ElapsedTime, KnownFilesCount = KnownFilesCount, totalDir = DirCount, filesCount=FilesCount, scanStatus = "Scan in progress...") wx.PostEvent(self.win, evt) self.MimeTypeDict = {} Globals.TimelinesDict['Created'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1} Globals.TimelinesDict['Modified'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1} Globals.TimelinesDict['Accessed'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1} self.UnzipFileNameDict = {} for root, dirs, files in os.walk(self.rootDir): DirCount += len(dirs) #print 'first thread' #self.ImageCount = 0 self.SubDirList = dirs for afile in files: try: self.progressLog.write("%s\n"%(os.path.join(root, afile))) self.ScanFileInfo(root, afile) except Exception, value: #print 'Error: ', value self.errorLog.write('%s; ScanError: %s\n'%(os.path.join(root, afile), value)) self.errorLog.flush() SubDirQueue.put((root, cPickle.dumps(self.SubDirList))) query ="INSERT INTO %s%s (CMinDate,CMaxDate,CMinMonth,CMaxMonth, MMinDate,MMaxDate,MMinMonth,MMaxMonth,AMinDate,AMaxDate,AMinMonth,AMaxMonth) values (?,?,?,?,?,?,?,?,?,?,?,?)"%(self.EvidenceID, Constants.MACRangeTable) self.dbMAC.ExecuteMany(query, [(Globals.TimelinesDict['Created']['MinDate'],Globals.TimelinesDict['Created']['MaxDate'], Globals.TimelinesDict['Created']['MinMonth'], Globals.TimelinesDict['Created']['MaxMonth'], Globals.TimelinesDict['Modified']['MinDate'], Globals.TimelinesDict['Modified']['MaxDate'], Globals.TimelinesDict['Modified']['MinMonth'], Globals.TimelinesDict['Modified']['MaxMonth'], Globals.TimelinesDict['Accessed']['MinDate'], Globals.TimelinesDict['Accessed']['MaxDate'], Globals.TimelinesDict['Accessed']['MinMonth'], Globals.TimelinesDict['Accessed']['MaxMonth'])]) self.dbMAC.CloseConnection() if self.dbNSRL: self.dbNSRL.CloseConnection() self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - StartTime) evt = UpdateLabelEvent(elapsedTime = self.ElapsedTime, KnownFilesCount = KnownFilesCount, totalDir = DirCount, filesCount=FilesCount, scanStatus = "Done Reading! Now Writing!!") wx.PostEvent(self.win, evt) self.running = False self.knownFilesLog.close() self.progressLog.close() self.errorLog.close() ScanDone = True
def Run(self): m = hashlib.md5() #sha1 = hashlib.sha1() size = 0 #try: rfin = open(self.imagePath, 'rb') fileObjects = [] for dirName in self.listDriveNames: fileObjects.append(Win32RawIO.Win32RAWIO(dirName, 'w')) startTime = time.time() print "Startime = %s" % time.asctime() #i = 0 self.EstimatedTime = "" st = os.stat(self.imagePath) imageSize = st[ST_SIZE] while self.keepGoing: self.readTime = time.time() data = rfin.read(1024 * 1024 * 16) if len(data) == 0: break for rfout in fileObjects: rfout.write(data) rfout.flush() #if self.verifyImages: m.update(data) #sha1.update(data) self.ElapsedTime = time.time() #print 'data len=%s'%len(data) #print 'time take=%s'%(self.ElapsedTime-self.readTime) rate = float(len(data)) / float(self.ElapsedTime - self.readTime) if size == 0: self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( float(imageSize) / rate) size += len(data) self.gaugeValue = (float(size) / float(imageSize)) * 100 self.rateInfo = "%.2fMB of %.2fMB at %.2fMB/sec" % ( size / 1024. / 1024, imageSize / 1024. / 1024, rate / 1024. / 1024) self.Status = "Writing Disk... [%.2f%s" % (self.gaugeValue, "%]") #self.Status += "%]" self.SendEvent() #i += 1 for rfout in fileObjects: rfout.close() self.ElapsedTime = time.time() #if self.verifyImages: print "MD5 Hash = %s" % m.hexdigest().upper() #print "SHA1 Hash = %s"%sha1.hexdigest().upper() rfin.close() print "Endtime = %s" % time.asctime() print "Total Elapsed Time = %s" % CommonFunctions.ConvertSecondsToDayHourMinSec( self.ElapsedTime - startTime) #except: # print "Exception occured: %s"%sys.exc_info()[0] self.running = False evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime, gaugeValue=self.gaugeValue, rate=self.rateInfo, estimatedTime=self.EstimatedTime, scanStatus="Done Writing To Disk!") wx.PostEvent(self.win, evt)
def Run(self): #print 'run start' #knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log') #self.knownFilesLog = open(knownFileLog, 'wb') errorLogFile = os.path.join(Globals.CasePath, 'Errors.log') self.errorLog = open(errorLogFile, 'wb') self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'), 'wb') self.dbFileSystem = SqliteDatabase(Globals.FileSystemName) if not self.dbFileSystem.OpenConnection(): return self.dbImage = SqliteDatabase(Globals.ImagesFileName) if not self.dbImage.OpenConnection(): return self.dbMAC = SqliteDatabase(Globals.MACFileName) if not self.dbMAC.OpenConnection(): return self.dbNSRL = None if os.path.exists(Constants.NSRLDBName): self.dbNSRL = SqliteDatabase(Constants.NSRLDBName) self.dbNSRL.OpenConnection() #query = "delete from " + Constants.FileInfoTable + ";" #self.dbFileSystem.ExecuteNonQuery(query) DBFunctions.CreateFileSystemTable(Globals.FileSystemName, Globals.CurrentEvidenceID, True) DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName, Globals.CurrentEvidenceID, True) DBFunctions.CreateMACTables(Globals.MACFileName, Globals.CurrentEvidenceID, drop=True) if not Globals.EvidencesDict.has_key(Globals.CurrentEvidenceID): Globals.EvidencesDict[Globals.CurrentEvidenceID] = {} #11/06/08 Globals.EvidencesDict[Globals.CurrentEvidenceID]['DirTree'] = {} #11/06/08 Globals.EvidencesDict[Globals.CurrentEvidenceID]['Location'] = self.rootDir #11/06/08 Globals.FilesDict[Globals.CurrentEvidenceID] = {} #11/06/08 Globals.ImagesDict[Globals.CurrentEvidenceID] = {} #11/06/08 Globals.EvidencesDict[Globals.CurrentEvidenceID]['dirs'] = {} self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec( time.time() - self.StartTime) evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime, KnownFilesCount=self.KnownFilesCount, totalDir=self.DirCount, filesCount=self.FilesCount, scanStatus="Scan in progress...") wx.PostEvent(self.win, evt) self.MimeTypeDict = {} Globals.TimelinesDict['Created'] = { 'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1 } Globals.TimelinesDict['Modified'] = { 'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1 } Globals.TimelinesDict['Accessed'] = { 'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1 } self.FileList = [] #self.ThumbnailList = [] self.SubDirList = [] self.UnzipFileNameDict = {} for root, dirs, files in os.walk(self.rootDir): self.DirCount += len(dirs) #self.ImageCount = 0 self.SubDirList = None self.SubDirList = dirs dirs = None for afile in files: #06/23/09 added try/catch block to catch runtime errors on long filenames try: self.progressLog.write( "%s\n" % (PlatformMethods.Encode(os.path.join(root, afile)))) self.ScanFileInfo(root, afile) """ if len(self.ThumbnailList) >= Constants.MaxThumbnailsToHold: self.dbImage.ExecuteMany(self.imageQuery, self.ThumbnailList) self.ThumbnailList = [] """ if len(self.FileList) >= Constants.MaxFileInfoToHold: self.dbFileSystem.ExecuteMany(self.query, self.FileList) self.FileList = None self.FileList = [] except Exception, value: try: self.errorLog.write( 'MAC Info Failed on %s; Error: %s\n' % (PlatformMethods.Encode(os.path.join( root, afile)), str(value))) self.errorLog.flush() #print "Failed to get information on file: %s Error: %s"%(fullFileName, value) except Exception, value: self.errorLog.write('MAC Info Failed; Error: %s\n' % (str(value))) self.errorLog.flush()