def UpdateIDF(self, db):
     self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
     self.ParseStatus = "Updating Inverse Document Frequency!"
     self.SendEvent()
     """
     query = "select count(ID) from " + Constants.TextCatDocumentsTable + ";"
     rows = db.FetchAllRows(query)
     docCount = 0.0
     for row in rows:
         docCount = float(row[0])
         
     queryIDF = "select WordID, Count(WordID) from " + Constants.TextCatBagOfWordsTable + " group by WordID order by WordID;"
     rowsIDF = db.FetchAllRows(queryIDF)
     #N = 21578.0
     #print 'doc count = %d' %docCount
     for row in rowsIDF:
         #idf = math.log(float(21578)/float(row[1]), 2)
         #print "row[1] = %d" %(row[1])
         query = "update " + Constants.TextCatBagOfWordsTable + " set IDF = '%f' where WordID = '%d'"%(math.log(docCount/float(row[1]), 10), row[0])
         #print query
         db.ExecuteNonQuery(query)      
         #break
     
     """
     
     
     #Update Stemmed Words IDF
     self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
     self.SendEvent()
     totalDocs = self.DocID
     queryIDF = "select WordID, Count(WordID) from " + Constants.TextCatBagOfStemmedWordsTable + " group by WordID order by WordID;"
     rowsIDF = db.FetchAllRows(queryIDF)
     #N = 21578.0
     #print 'doc count = %d' %docCount
     for row in rowsIDF:
         query = "update " + Constants.TextCatBagOfStemmedWordsTable + " set IDF = '%f' where WordID = '%d'"%(math.log(totalDocs/float(row[1]), 10), row[0])
         #print query
         db.ExecuteNonQuery(query)
     
     """
     totalDocs = self.DocID
     for fword in Globals.StemmedWordFrequency:
         #for kword in Globals.BitMap:
         wordDocCount = 0
         for bit in Globals.BitMap[fword]['bitmap']:
             docCount += int(bit)
         
         query = "update " + Constants.TextCatBagOfStemmedWordsTable + " set IDF = '%f' where WordID = '%d'"%(math.log(totalDocs/float(wordDocCount), 10), Globals.StemmedWordFrequency[fword]['id'])
         db.ExecuteNonQuery(query)
     """ 
     self.ParseStatus = "Done Updating Inverse Document Frequency!"
     self.SendEvent()
Пример #2
0
    def run(self):
        global buffer, sentinel
        global writeThread
        global diskSHA1
        if self.verifyImages:
            diskSHA1 = hashlib.sha1()
        #m = hashlib.md5()
        #sha1 = hashlib.sha1()
        size = 0
        #try:
        rfin = Win32RawIO.Win32RAWIO(self.rootDrive, 'r')
        driveSize = rfin.size
        #print "Read Thread Startime = %s"%time.asctime()
        #i = 0
        self.EstimatedTime = ""
        while self.keepGoing:
            self.readTime = time.time()
            data = rfin.read(1024 * 1024 * 16)

            if not data:
                buffer.put(sentinel)
                break

            buffer.put(data)

            self.ElapsedTime = time.time()
            rate = float(len(data)) / float(self.ElapsedTime - self.readTime)
            #if size == 0:
            if self.verifyImages:
                diskSHA1.update(data)
                self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                    float(driveSize * len(self.listImageNames)) / rate)
            else:
                self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                    float(driveSize) / rate)

            size += len(data)
            self.gaugeValue = (float(size) / float(driveSize)) * 100
            self.rateInfo = "%.2fMB of %.2fMB at %.2fMB/sec" % (
                size / 1024. / 1024, driveSize / 1024. / 1024,
                rate / 1024. / 1024)
            self.Status = "Creating Image... [%.2f%s" % (self.gaugeValue, "%]")
            self.SendEvent()

        rfin.close()
        if self.verifyImages:
            print "Disk SHA1: %s" % diskSHA1.hexdigest().upper()

        self.running = False
        #writeThread.join()
        """
    def DumpBitMapInDatabase(self, db):
        """
        Encode bitmap for each word using RLE encoding technique
        and update the dictionary adding another field
        """
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            time.time() - self.StartTime)
        self.ParseStatus = "Updating Database Index!"
        self.SendEvent()
        manyValues = []
        i = 0
        query = "INSERT INTO " + Constants.TextCatBitMapIndex + " (Keyword, Bitmap, Compressed) values (?,?,?)"
        for kword in Globals.EmailsBitMap:
            i += 1
            manyValues.append(
                (kword, Globals.EmailsBitMap[kword]['bitmap'],
                 MySQLdb.escape_string(
                     binascii.rlecode_hqx(
                         Globals.EmailsBitMap[kword]['bitmap']))))
            if i == 10000:
                try:
                    db.ExecuteMany(query, manyValues)
                except:
                    print "Exception query:: " + query + str(manyValues)
                manyValues = []

        if len(manyValues) > 0:
            try:
                db.ExecuteMany(query, manyValues)
            except:
                print "Exception query:: " + query + str(manyValues[0])
 def OnTimer1Timer(self, event):
     ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
     self.lblTotalDir.SetLabel(PlatformMethods.Convert(self.scanThread.GetDirCount()))
     self.lblElapsedTime.SetLabel(PlatformMethods.Convert(ElapsedTime))
     #self.lblCurrentDir.SetLabel(PlatformMethods.Convert(evt.currentDir))
     self.lblFilesCount.SetLabel(PlatformMethods.Convert(self.scanThread.GetFilesCount()))
     self.RefreshLabels()
     event.Skip()
Пример #5
0
 def OnTimer1Timer(self, event):
     global StartTime
     global DirCount
     global FilesCount
     global KnownFilesCount
     ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - StartTime)
     self.lblTotalDir.SetLabel(PlatformMethods.Convert(DirCount))
     self.lblElapsedTime.SetLabel(PlatformMethods.Convert(ElapsedTime))
     #self.lblCurrentDir.SetLabel(PlatformMethods.Convert(evt.currentDir))
     self.lblFilesCount.SetLabel(PlatformMethods.Convert(FilesCount))
     self.lblKnownFilesCount.SetLabel(PlatformMethods.Convert(KnownFilesCount))
     self.RefreshLabels()
     event.Skip()
    def UpdateIDF(self, db):
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            time.time() - self.StartTime)
        self.ParseStatus = "Updating Inverse Document Frequency!"
        self.SendEvent()

        #Update Stemmed Words IDF
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            time.time() - self.StartTime)
        self.SendEvent()
        totalDocs = self.DocID
        queryIDF = "select WordID, Count(WordID) from " + Constants.TextCatBagOfStemmedWordsTable + " group by WordID order by WordID;"
        rowsIDF = db.FetchAllRows(queryIDF)
        #N = 21578.0
        #print 'doc count = %d' %docCount
        for row in rowsIDF:
            query = "update %s set IDF = ? where WordID = ?" % (
                Constants.TextCatBagOfStemmedWordsTable)
            #print query
            db.ExecuteNonQuery(
                query, (math.log(totalDocs / float(row[1]), 10), row[0]))

        self.ParseStatus = "Done Updating Inverse Document Frequency!"
        self.SendEvent()
Пример #7
0
    def OnTimer1Timer(self, event):
        ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            time.time() - self.StartTime)
        self.lblTotalDir.SetLabel(
            PlatformMethods.Convert(self.scanThread.GetDirCount()))
        self.lblElapsedTime.SetLabel(PlatformMethods.Convert(ElapsedTime))
        #self.lblCurrentDir.SetLabel(PlatformMethods.Convert(evt.currentDir))
        self.lblFilesCount.SetLabel(
            PlatformMethods.Convert(self.scanThread.GetFilesCount()))
        if not self.scanThread.running:
            self.throbber1.Stop()
            self.btnOK.Show(True)
            self.btnCancel.Show(False)
            self.timer1.Stop()

        self.RefreshLabels()
        event.Skip()
Пример #8
0
    def OnUpdate(self, evt):
        elapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            evt.elapsedTime - self.StartTime)
        self.lblElapsedTime.SetLabel(elapsedTime)
        self.lblScanStatus.SetLabel(PlatformMethods.Decode(evt.scanStatus))
        self.gaugeDDToDiskProgress.SetValue(int(evt.gaugeValue))
        self.lblEstimatedTime.SetLabel(evt.estimatedTime)
        self.lblRate.SetLabel(evt.rate)
        if str(evt.scanStatus) == "Done Writing To Disk!":
            self.btnOK.Show(True)
            self.btnCancel.Show(False)
            self.gaugeDDToDiskProgress.SetValue(100)
            self.gaugePulse.SetValue(100)
            self.timer.Stop()
            #self.gaugePulse.Pulse(False)

        self.RefreshLabels()
        evt.Skip()
Пример #9
0
    def SearchKeywords(self):
        """
        if self.WordDict:
            self.ParseStatus = "Writing to database..."
            self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
            self.SendEvent()
            self.HandleWords(self.WordDict)
            self.WordDict = None
        """

        self.fout.close()
        db.CloseConnection()
        self.SendEvent()

        finishTime = time.time()
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            finishTime - self.StartTime)
        self.ParseStatus = "Done Preprocessing/Indexing!"
        self.SendEvent()
        self.running = False
Пример #10
0
def CreateBloomFilter():
    db = SqliteDatabase(Constants.NSRLDBName)
    if not db.OpenConnection():
        return
    #m = no. of bits for vector
    #n = no. of elements or keys to support queries
    #k = no. of hash functions
    m = 10000000
    n = 1000000
    k = 4

    BFilter = BloomFilter.BloomFilter(n=n, m=m, k=k)

    db1 = SqliteDatabase("NSRLBloom.db")
    if not db1.OpenConnection():
        return

    query = """create table if not exists BloomFilter(
        BloomFilter blob);"""
    db1.ExecuteNonQuery(query)

    query = """SELECT name FROM sqlite_master
        WHERE type='table'
        ORDER BY name;
        """
    tables = db.FetchAllRows(query)
    #i = 0
    start = time.time()
    for table in tables:
        rows = db.FetchAllRows('select * from %s;' % table)
        for row in rows:
            BFilter.add(row[0])

        #break

    end = time.time()
    print 'time taken = ', CommonFunctions.ConvertSecondsToDayHourMinSec(end -
                                                                         start)
    #db1.ExecuteMany('insert into BloomFilter (BloomFilter) values (?)', [(cPickle.dumps(BFilter))])
    db.CloseConnection()
    db1.CloseConnection()
Пример #11
0
    def OnUpdate(self, evt):
        elapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            evt.elapsedTime - self.StartTime)
        self.lblElapsedTime.SetLabel(elapsedTime)
        self.lblScanStatus.SetLabel(PlatformMethods.Decode(evt.scanStatus))
        try:
            self.gaugeDDProgress.SetValue(int(evt.gaugeValue))
            self.lblEstimatedTime.SetLabel(evt.estimatedTime)
            self.lblRate.SetLabel(evt.rate)
        except:
            pass

        if str(evt.scanStatus) == "Done Creating Image!":
            self.btnOK.Show(True)
            self.btnCancel.Show(False)
            self.gaugeDDProgress.SetValue(100)
            self.gaugePulse.SetValue(100)
            self.timer.Stop()
            #self.gaugePulse.Pulse(False)
            doneReading = True
            print "Done Creating Images at: %s" % time.asctime()

        self.RefreshLabels()
        evt.Skip()
    def Run(self):
        db = SqliteDatabase(Globals.EmailsFileName)
        if not db.OpenConnection():
            return

        self.bloomFilter = self.CreateBloomFilter()
        #self.bloomFilter = None

        logFileName = PlatformMethods.Decode(
            os.path.join(
                Globals.CasePath,
                (Globals.EmailsFileName[Globals.EmailsFileName.rfind(os.sep) +
                                        1:] + '.log')))
        self.fout = open(logFileName, 'ab')
        #print self.CheckedMimeTypes
        self.fout.write(
            'Parsing/Indexing Emails Attachments Started at: %s\n' %
            (time.ctime()))

        if self.AddressBookPath:
            self.ParseStatus = "Parsing Address book..."
            self.SendEvent()
            AddressBookParser = OutlookAddressBook.AddressBookParser(
                Globals.AddressBookDict)
            for root, dirs, files in os.walk(self.AddressBookPath):
                for eachfile in files:
                    filePath = os.path.join(root, eachfile)
                    self.FilesCount += 1
                    if (filePath.rfind('.') == -1):
                        continue
                    #print filePath
                    extension = filePath[filePath.rfind('.'):]
                    #print 'extension ', extension
                    if extension.lower() == ".csv":
                        AddressBookParser.Parse(filePath)
                        #print 'add book parsed'
        else:
            self.fout.write('No Addressbook path found!\n')
        #Updte Addressbook
        query1 = "insert into " + Constants.AddressBookTable + "(EmailID, FirstName, MiddleName, LastName, InBook) values (?,?,?,?,?)"
        ManyValues = []
        for key in Globals.AddressBookDict:
            #'EmailID': email, 'FirstName': firstName, 'MiddleName': middleName, 'LastName': lastName, 'InBook':1}
            ManyValues.append((Globals.AddressBookDict[key]['EmailID'],
                               Globals.AddressBookDict[key]['FirstName'],
                               Globals.AddressBookDict[key]['MiddleName'],
                               Globals.AddressBookDict[key]['LastName'],
                               Globals.AddressBookDict[key]['InBook']))

        #query = "delete from %s"%Constants.AddressBookTable
        #db.ExecuteNonQuery(query)
        #print ManyValues
        db.ExecuteMany(query1, ManyValues)

        #self.ParseStatus = "Done Preprocessing/Indexing Emails!"
        #return

        textParser = TextParser.TextParser(db,
                                           Globals.EmailsStopwords,
                                           self.Stemmer,
                                           bloomFilter=self.bloomFilter)
        docxParser = DocxParser.DocxParser(db,
                                           Globals.EmailsStopwords,
                                           self.Stemmer,
                                           bloomFilter=self.bloomFilter)
        docParser = DocParser.DocParser(db,
                                        Globals.EmailsStopwords,
                                        self.Stemmer,
                                        bloomFilter=self.bloomFilter)
        docQuery = "insert into %s (DocPath, DocType) values (?, ?)" % (
            Constants.TextCatDocumentsTable)

        if self.AttachmentsPath:
            for root, dirs, files in os.walk(self.AttachmentsPath):
                for eachfile in files:
                    filePath = os.path.join(root, eachfile)

                    fileNameList = eachfile.split()
                    if len(fileNameList) >= 2:
                        dateTimeFileName = "%s %s - %s" % (
                            fileNameList[0],
                            (fileNameList[1].replace(".", ":")),
                            (eachfile[eachfile.rfind('-') + 1:]))

                        if self.AttachmentsDict.has_key(dateTimeFileName):
                            self.AttachmentsDict[dateTimeFileName].append(
                                filePath)
                        else:
                            self.AttachmentsDict[dateTimeFileName] = [filePath]
                            #print 'Intersting! more than 1 attach. file found with same date time: %s'%
                        #else:
                        #    self.AttachmentsDict[dateTimeFileName] = filePath
                    else:
                        self.fout.write(
                            'Attachment filename found without date time: %s\n'
                            % (PlatformMethods.Encode(filePath)))

        #AttachmentsDict, Stopwords=[], Stemmer=None
        self.outlookTextParser = OutlookTextParser.OutlookTextParser(
            db,
            self.AttachmentsDict,
            Globals.EmailsStopwords,
            self.Stemmer,
            bloomFilter=self.bloomFilter,
            logFile=self.fout)

        if self.IndexMessages:
            self.ParseStatus = "Parsing and Indexing Emails..."
        else:
            self.ParseStatus = "Parsing Email Headers..."

        self.SendEvent()
        for root, dirs, files in os.walk(self.EmailsPath):
            if not self.keepGoing:
                self.running = False
                return

            for eachfile in files:
                self.FilesCount += 1
                if not self.keepGoing:
                    self.running = False
                    return

                filePath = os.path.join(root, eachfile)
                #print filePath
                if (filePath.rfind('.') == -1):
                    continue

                try:
                    extension = filePath[filePath.rfind('.'):]
                    fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(
                        extension)
                    if fileType:
                        mimeType = fileType.GetMimeType() or "unknown"
                        if mimeType == "text/plain":
                            try:
                                self.outlookTextParser.parse(
                                    filePath, self.IndexMessages)
                            except Exception, msg:
                                self.fout.write(
                                    'Error Parsing Message: %s Msg:: %s\n' %
                                    (PlatformMethods.Encode(filePath), msg))

                            self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                time.time() - self.StartTime)

                    if (time.time() - self.EventStart) > 10:
                        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                            time.time() - self.StartTime)
                        self.SendEvent()

                except Exception, value:
                    #try:
                    self.fout.write(
                        "Error Parsing Message: %s Msg: %s\n" %
                        (PlatformMethods.Encode(filePath), str(value)))
                    self.fout.flush()
        self.fout.write('Total Time Taken: %s\n\n' % (self.ElapsedTime))

        if self.IndexAttachments:
            self.ParseStatus = "Indexing Attachments..."
            self.SendEvent()
            for key in self.AttachmentsDict:
                for eachfile in self.AttachmentsDict[key]:
                    #try:
                    filePath = os.path.join(root, eachfile)

                    self.FileScanStartTime = time.time()
                    self.FilesCount += 1

                    dotIndex = filePath.rfind('.')
                    if dotIndex == -1:
                        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                            time.time() - self.StartTime)
                        if (time.time() - self.EventStart) > 10:
                            self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                time.time() - self.StartTime)
                            self.SendEvent()
                        continue

                    extension = filePath[dotIndex:]

                    fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(
                        extension)
                    if fileType:
                        parsed = False
                        mimeType = fileType.GetMimeType() or "unknown"
                        #if self.CheckedMimeTypes:
                        if mimeType not in self.CheckedMimeTypes:
Пример #14
0
    def run(self):
        global FileInfoQueue
        global ScanDone
        global ThumbnailQueue
        global SubDirQueue
        global StartTime
        global DirCount
        global FilesCount
        global TotalImages
        global KnownFilesCount
        
        
        knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log')
        self.knownFilesLog = open(knownFileLog, 'w')
        
        errorLogFile = os.path.join(Globals.CasePath, 'Errors.log')
        
        self.errorLog = open(errorLogFile, 'w')
        
        self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'), 'w')
        
        self.dbMAC = SqliteDatabase(Globals.MACFileName)
        if not self.dbMAC.OpenConnection():
            return
        
        self.dbNSRL = None
        if os.path.exists(Constants.NSRLDBName):
            self.dbNSRL = SqliteDatabase(Constants.NSRLDBName)
            self.dbNSRL.OpenConnection()
                
        DBFunctions.CreateFileSystemTable(Globals.FileSystemName, self.EvidenceID, True)
        DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName, self.EvidenceID, True)
        DBFunctions.CreateMACTables(Globals.MACFileName, self.EvidenceID, drop=True)
        
        if not Globals.EvidencesDict.has_key(self.EvidenceID):
            Globals.EvidencesDict[self.EvidenceID] = {}
            
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - StartTime)
        evt = UpdateLabelEvent(elapsedTime = self.ElapsedTime, KnownFilesCount = KnownFilesCount,
            totalDir = DirCount, filesCount=FilesCount, scanStatus = "Scan in progress...")
        wx.PostEvent(self.win, evt)
        
        self.MimeTypeDict = {}
        
        Globals.TimelinesDict['Created'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1}
        Globals.TimelinesDict['Modified'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1}
        Globals.TimelinesDict['Accessed'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1}
        
        self.UnzipFileNameDict = {}
        
        for root, dirs, files in os.walk(self.rootDir):
            DirCount += len(dirs)
            #print 'first thread'
            #self.ImageCount = 0
            self.SubDirList = dirs
            for afile in files:
                try:
                    self.progressLog.write("%s\n"%(os.path.join(root, afile)))
                    self.ScanFileInfo(root, afile)

                except Exception, value:
                    #print 'Error: ', value
                    self.errorLog.write('%s; ScanError: %s\n'%(os.path.join(root, afile), value))
                    self.errorLog.flush()
                
            SubDirQueue.put((root, cPickle.dumps(self.SubDirList)))
Пример #15
0
class KeywordsScanThread:
    def __init__(self, win, startTime):
        self.win = win
        self.StartTime = startTime
        #self.dirName = dirName
        #self.CurrentFileName = ""
        #self.CurrentDirectory = ""
        self.DirCount = 0
        self.FilesCount = 0
        self.ElapsedTime = ""
        self.SearchStatus = "Search in Progress..."
        self.KeyColumnNames = ""
        """
        self.timerStatus = wx.Timer(id=wx.NewId(), owner=self)
        self.Bind(wx.EVT_TIMER, self.OnTimerStatusTimer,
              id=self.timerStatus.GetId())
        """
        self.EventStart = time.time()
        #Globals.frmGlobalMainForm.treeKeywords.GetKeywordsSearchDirList()
        DBFunctions.CreateKeywordsFrequencyTable(Globals.KeywordsFileName,
                                                 True)
        self.InitializeKeyWordsFrequencyDictionary()

    def Start(self):
        #self.timerStatus.Start(10000)
        self.keepGoing = self.running = True
        thread.start_new_thread(self.Run, ())
        #self.Run()

    def Stop(self):
        self.keepGoing = False
        #self.db.CloseConnection()

    def IsRunning(self):
        return self.running

    def Run(self):
        db = SqliteDatabase(Globals.KeywordsFileName)
        if not db.OpenConnection():
            return
        #print Globals.KeywordsSearchDirList
        for dir in Globals.KeywordsSearchDirList:
            #print dir
            if dir.find("*.*") >= 0:
                continue
            if not os.path.isdir(dir):
                continue
            try:
                #print dir
                files = os.listdir(dir)
                self.DirCount += 1
                for file in files:
                    if not self.keepGoing:
                        return
                    filePath = dir + PlatformMethods.GetDirSeparator() + file
                    #print filePath
                    if os.path.isfile(filePath):
                        try:
                            if filePath.rfind('.') >= 0:
                                extension = filePath[filePath.rfind('.'):]
                                #print 'extension = ', extension
                                fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(
                                    extension)
                                if fileType:
                                    mimeType = fileType.GetMimeType(
                                    ) or "Unknown"
                                    #print "mimeType ", mimeType
                                    #try:
                                    #print Globals.KeywordsSearchCategoryList
                                    if mimeType in Globals.KeywordsSearchCategoryList:
                                        #print 'ReadFile being called'
                                        self.ReadFile(filePath, db)
                                        self.FilesCount += 1
                                        #self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
                                        """
                                        except Exception, value:
                                            print "Failed to read file: %s Error: %s"%(filePath, value)
                                            continue
                                        """
                                    #else:
                                    #    time.sleep(1)
                                #else:
                                #    time.sleep(1)
                            else:
                                self.ReadFile(filePath, db)
                                self.FilesCount += 1

                            if (time.time() - self.EventStart) > 10:
                                #print time.time() - self.EventStart
                                self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                    time.time() - self.StartTime)
                                self.SendEvent()

                        except Exception, value:
                            print "Failed to read file: %s Error: %s" % (
                                filePath, value)

            except Exception, value:
                print "Failed to read directory: %s Error: %s" % (dir, value)
                continue

        db.CloseConnection()

        finishTime = time.time()
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            finishTime - self.StartTime)
        self.SearchStatus = "Done Searching!"
        self.SendEvent()
Пример #16
0
    def Run(self):
        """
        self.db = SqliteDatabase(Globals.MACFileName)
        if not self.db.OpenConnection():
            return
        
        query = "delete from " + Constants.FileInfoTable + ";"
        self.db.ExecuteNonQuery(query)
        """
        m = hashlib.md5()
        sha1 = hashlib.sha1()
        size = 0
        #try:
        rfin = Win32RawIO.Win32RAWIO(self.rootDrive, 'r')
        driveSize = 0
        freeSpace = 0
        if self.SourceType == Constants.LogicalDrive:
            rootPath = self.rootDrive[len(self.rootDrive) - 2:]
            spc, bps, fc, c = Win32RawIO.GetDiskFreeSpace(rootPath)
            driveSize = c * spc * bps
            freeSpace = fc * spc * bps

        else:
            print "size %dB  %.2fMB" % (rfin.size, rfin.size / 1024. / 1024)
            print "Cylinders = %s" % rfin.cylinders
            print "Mediatype = %s" % rfin.mediatype
            print "Tracks/Cylinder = %s" % rfin.trackspercylinder
            print "Sectors/Track = %s" % rfin.sectorspertrack
            print "Bytes/Sector = %s" % rfin.bytespersector

        startTime = time.time()
        print "Startime = %s" % time.asctime()
        #i = 0
        self.EstimatedTime = ""
        fileObjects = []
        for fileName in self.listImageNames:
            fileObjects.append(open(fileName, 'wb'))

        while self.keepGoing:
            self.readTime = time.time()
            data = rfin.read(1024 * 1024 * 16)
            if len(data) == 0:
                break
            for rfout in fileObjects:
                rfout.write(data)

            #if self.verifyImages:
            m.update(data)
            sha1.update(data)

            self.ElapsedTime = time.time()
            #print 'data len=%s'%len(data)
            #print 'time take=%s'%(self.ElapsedTime-self.readTime)
            rate = float(len(data)) / float(self.ElapsedTime - self.readTime)
            if size == 0:
                self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                    float(driveSize) / rate)
            size += len(data)
            self.gaugeValue = (float(size) / float(driveSize)) * 100
            self.rateInfo = "%.2fMB of %.2fMB at %.2fMB/sec" % (
                size / 1024. / 1024, driveSize / 1024. / 1024,
                rate / 1024. / 1024)
            self.Status = "Creating Image... [%.2f" % self.gaugeValue
            self.Status += "%]"
            self.SendEvent()
        #i += 1
        for rfout in fileObjects:
            rfout.close()

        self.ElapsedTime = time.time()
        #if self.verifyImages:
        print "MD5 Hash = %s" % m.hexdigest().upper()
        print "SHA1 Hash = %s" % sha1.hexdigest().upper()

        rfin.close()

        print "Endtime = %s" % time.asctime()
        print "Total Elapsed Time = %s" % CommonFunctions.ConvertSecondsToDayHourMinSec(
            self.ElapsedTime - startTime)

        #except:
        #    print "Exception occured: %s"%sys.exc_info()[0]

        self.running = False

        evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime,
                               gaugeValue=self.gaugeValue,
                               rate=self.rateInfo,
                               estimatedTime=self.EstimatedTime,
                               scanStatus="Done Creating Image!")
        wx.PostEvent(self.win, evt)
Пример #17
0
class MACScanThread:
    def __init__(self, win, startTime, rootDir):
        self.win = win
        self.StartTime = startTime
        self.rootDir = rootDir
        #self.CurrentFileName = ""
        self.CurrentDirectory = ""
        self.DirCount = 0
        self.FilesCount = 0
        self.TotalFiles = 0
        self.KnownFilesCount = 0
        self.ElapsedTime = ""
        self.EvidenceID = "Evidence1"
        #self.MD5Hashes = {}
        self.TotalImages = 0
        self.ImageList = []
        self.UnzipRootFolder = "%s%s" % (Globals.CurrentEvidenceID,
                                         Constants.UnzipRootFolderName)

        self.dirListQuery = "INSERT INTO %s%s (DirPath, SubDirList) values (?,?)" % (
            self.EvidenceID, Constants.DirListTable)
        #self.mimeQuery = "INSERT INTO " + self.EvidenceID + "Mime (MimeType, FileList) values (?,?)"

        self.query = """INSERT INTO %s(Name, DirPath, Extension, MimeType, Category, Description, Size, Created, CDate, CMonth,
            Modified, MDate, MMonth, Accessed, ADate, AMonth, MD5,KnownFile,NewPath)
            values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""" % (
            self.EvidenceID)

        self.imageQuery = """INSERT INTO %s (DirPath, Filename, Thumbnail ) VALUES (?,?,?)""" % (
            self.EvidenceID)

        self.EventStart = time.time()

    def Start(self):
        #self.timerStatus.Start(10000)
        self.keepGoing = self.running = True
        thread.start_new_thread(self.Run, ())
        #self.Run()

    def Stop(self):
        self.keepGoing = False
        #self.dbFileSystem.CloseConnection()

    def IsRunning(self):
        return self.running

    def GetDirCount(self):
        return self.DirCount

    def GetFilesCount(self):
        return self.FilesCount

    def GetKnownFilesCount(self):
        return self.KnownFilesCount

    def CheckDirPathExists(self, DirList, dirPath):
        for row in DirList:
            if PlatformMethods.Encode(dirPath) == row[0]:
                return True
        return False

    def Run(self):
        #print 'run start'
        #knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log')
        #self.knownFilesLog = open(knownFileLog, 'a')
        #self.knownFilesLog.write('Started at: %s\n'%(time.ctime()))

        errorLogFile = os.path.join(Globals.CasePath, 'Errors.log')

        self.errorLog = open(errorLogFile, 'ab')
        self.errorLog.write('Resume Started at: %s\n' % (time.ctime()))

        self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'),
                                'ab')
        self.progressLog.write('Resume Started at: %s\n' % (time.ctime()))

        self.dbFileSystem = SqliteDatabase(Globals.FileSystemName)
        if not self.dbFileSystem.OpenConnection():
            return

        self.dbImage = SqliteDatabase(Globals.ImagesFileName)
        if not self.dbImage.OpenConnection():
            return

        self.dbMAC = SqliteDatabase(Globals.MACFileName)
        if not self.dbMAC.OpenConnection():
            return

        self.dbNSRL = None
        if os.path.exists(Constants.NSRLDBName):
            self.dbNSRL = SqliteDatabase(Constants.NSRLDBName)
            self.dbNSRL.OpenConnection()

        #query = "delete from " + Constants.FileInfoTable + ";"
        #self.dbFileSystem.ExecuteNonQuery(query)
        DBFunctions.CreateFileSystemTable(Globals.FileSystemName,
                                          self.EvidenceID, False)
        DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName,
                                          self.EvidenceID, False)
        DBFunctions.CreateMACTables(Globals.MACFileName,
                                    self.EvidenceID,
                                    drop=False)

        if not Globals.EvidencesDict.has_key(self.EvidenceID):
            Globals.EvidencesDict[self.EvidenceID] = {}
        """
        query = "select count(distinct(DirPath)) from %s%s;"%(Globals.CurrentEvidenceID, Constants.DirListTable)
        row = self.dbFileSystem.FetchOneRow()
        if row:
            self.DirCount = len(row[0])
            
        query = "select count(*) from 
        self.FilesCount = 0
        self.TotalFiles = 0
        self.KnownFilesCount = 0
        self.ElapsedTime = ""
        self.EvidenceID = "Evidence1"
        #self.MD5Hashes = {}
        self.TotalImages = 0
        """

        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            time.time() - self.StartTime)
        evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime,
                               KnownFilesCount=self.KnownFilesCount,
                               totalDir=self.DirCount,
                               filesCount=self.FilesCount,
                               scanStatus="Scan in progress...")
        wx.PostEvent(self.win, evt)

        query = """
        select min(CDate), max(CDate), min(CMonth), max(CMonth),min(MDate), max(MDate), min(MMonth), max(MMonth),
        min(ADate), max(ADate), min(AMonth), max(AMonth) from Evidence1 where CDate <> 0 and CMonth <>0 and CMonth
        <>0 and MDate<>0 and MDate<>0 and MMonth<>0 and MMonth<>0 and CMonth<>0 and ADate<>0 and ADate<>0 and AMonth<>0 and AMonth<>0;
        """
        row = self.dbFileSystem.FetchOneRow(query)

        Globals.TimelinesDict['Created'] = {
            'MinDate': -1,
            'MaxDate': -1,
            'MinMonth': -1,
            'MaxMonth': -1
        }
        Globals.TimelinesDict['Modified'] = {
            'MinDate': -1,
            'MaxDate': -1,
            'MinMonth': -1,
            'MaxMonth': -1
        }
        Globals.TimelinesDict['Accessed'] = {
            'MinDate': -1,
            'MaxDate': -1,
            'MinMonth': -1,
            'MaxMonth': -1
        }

        if row:
            Globals.TimelinesDict['Created'] = {
                'MinDate': row[0],
                'MaxDate': row[1],
                'MinMonth': row[2],
                'MaxMonth': row[3]
            }
            Globals.TimelinesDict['Modified'] = {
                'MinDate': row[4],
                'MaxDate': row[5],
                'MinMonth': row[6],
                'MaxMonth': row[7]
            }
            Globals.TimelinesDict['Accessed'] = {
                'MinDate': row[8],
                'MaxDate': row[9],
                'MinMonth': row[10],
                'MaxMonth': row[11]
            }

        self.FileList = []
        #self.ThumbnailList = []
        self.SubDirList = []

        self.UnzipFileNameDict = {}

        extractRootPath = os.path.join(
            Globals.CasePath, '%s%s' %
            (Globals.CurrentEvidenceID, Constants.UnzipRootFolderName))
        for dirName in os.listdir(extractRootPath):
            dirNameOnly = dirName[:dirName.rfind('-')]
            if self.UnzipFileNameDict.has_key(dirNameOnly):
                self.UnzipFileNameDict[dirNameOnly] += 1
            else:
                self.UnzipFileNameDict[dirNameOnly] = 1

        query = "select DirPath from %s%s" % (Globals.CurrentEvidenceID,
                                              Constants.DirListTable)
        self.DBDirList = self.dbFileSystem.FetchAllRows(query)

        Globals.MimeTypeSet = set([])
        query = "select distinct(MimeType) from %s" % (
            Globals.CurrentEvidenceID)
        rows = self.dbFileSystem.FetchAllRows(query)
        for row in rows:
            Globals.MimeTypeSet.add(row[0])

        for root, dirs, files in os.walk(self.rootDir):
            if self.CheckDirPathExists(self.DBDirList, root):
                continue

            #print query
            query = "delete from %s where DirPath = %s;" % (
                Globals.CurrentEvidenceID, self.dbFileSystem.SqlSQuote(root))
            #print query
            self.dbFileSystem.ExecuteNonQuery(query)

            self.DirCount += len(dirs)

            #self.ImageCount = 0
            self.SubDirList = dirs
            for afile in files:
                try:
                    self.progressLog.write(
                        "%s\n" %
                        (PlatformMethods.Encode(os.path.join(root, afile))))
                    self.ScanFileInfo(root, afile)
                    """
                    if len(self.ThumbnailList) >= Constants.MaxThumbnailsToHold:
                        self.dbImage.ExecuteMany(self.imageQuery, self.ThumbnailList)
                        self.ThumbnailList = []
                    """

                    if len(self.FileList) >= Constants.MaxFileInfoToHold:
                        self.dbFileSystem.ExecuteMany(self.query,
                                                      self.FileList)
                        self.FileList = None
                        self.FileList = []

                except Exception, value:
                    #print 'Error: ', value
                    self.errorLog.write(
                        '%s; ScanError: %s\n' %
                        (PlatformMethods.Encode(os.path.join(
                            root, afile)), PlatformMethods.Encode(value)))
                    self.errorLog.flush()

            self.dbFileSystem.ExecuteMany(
                self.dirListQuery, [(root, cPickle.dumps(self.SubDirList))])
        """
        self.dbImage.ExecuteMany(self.imageQuery, self.ThumbnailList)
        self.ThumbnailList = []
        """

        self.dbFileSystem.ExecuteMany(self.query, self.FileList)
        self.FileList = None
        #self.FileList = []

        query = "INSERT INTO %s%s (CMinDate,CMaxDate,CMinMonth,CMaxMonth, MMinDate,MMaxDate,MMinMonth,MMaxMonth,AMinDate,AMaxDate,AMinMonth,AMaxMonth) values (?,?,?,?,?,?,?,?,?,?,?,?)" % (
            self.EvidenceID, Constants.MACRangeTable)

        self.dbMAC.ExecuteMany(
            query, [(Globals.TimelinesDict['Created']['MinDate'],
                     Globals.TimelinesDict['Created']['MaxDate'],
                     Globals.TimelinesDict['Created']['MinMonth'],
                     Globals.TimelinesDict['Created']['MaxMonth'],
                     Globals.TimelinesDict['Modified']['MinDate'],
                     Globals.TimelinesDict['Modified']['MaxDate'],
                     Globals.TimelinesDict['Modified']['MinMonth'],
                     Globals.TimelinesDict['Modified']['MaxMonth'],
                     Globals.TimelinesDict['Accessed']['MinDate'],
                     Globals.TimelinesDict['Accessed']['MaxDate'],
                     Globals.TimelinesDict['Accessed']['MinMonth'],
                     Globals.TimelinesDict['Accessed']['MaxMonth'])])

        self.dbFileSystem.CloseConnection()
        self.dbImage.CloseConnection()
        self.dbMAC.CloseConnection()

        self.UpdateEvidence(self.rootDir)
        if self.dbNSRL:
            self.dbNSRL.CloseConnection()

        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            time.time() - self.StartTime)
        evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime,
                               KnownFilesCount=self.KnownFilesCount,
                               totalDir=self.DirCount,
                               filesCount=self.FilesCount,
                               scanStatus="Done Scanning!")
        wx.PostEvent(self.win, evt)
        self.running = False
        #self.knownFilesLog.close()
        self.progressLog.close()
        self.errorLog.close()
Пример #18
0
    def Run(self):
        #print 'run start'
        #knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log')
        #self.knownFilesLog = open(knownFileLog, 'a')
        #self.knownFilesLog.write('Started at: %s\n'%(time.ctime()))

        errorLogFile = os.path.join(Globals.CasePath, 'Errors.log')

        self.errorLog = open(errorLogFile, 'ab')
        self.errorLog.write('Resume Started at: %s\n' % (time.ctime()))

        self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'),
                                'ab')
        self.progressLog.write('Resume Started at: %s\n' % (time.ctime()))

        self.dbFileSystem = SqliteDatabase(Globals.FileSystemName)
        if not self.dbFileSystem.OpenConnection():
            return

        self.dbImage = SqliteDatabase(Globals.ImagesFileName)
        if not self.dbImage.OpenConnection():
            return

        self.dbMAC = SqliteDatabase(Globals.MACFileName)
        if not self.dbMAC.OpenConnection():
            return

        self.dbNSRL = None
        if os.path.exists(Constants.NSRLDBName):
            self.dbNSRL = SqliteDatabase(Constants.NSRLDBName)
            self.dbNSRL.OpenConnection()

        #query = "delete from " + Constants.FileInfoTable + ";"
        #self.dbFileSystem.ExecuteNonQuery(query)
        DBFunctions.CreateFileSystemTable(Globals.FileSystemName,
                                          self.EvidenceID, False)
        DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName,
                                          self.EvidenceID, False)
        DBFunctions.CreateMACTables(Globals.MACFileName,
                                    self.EvidenceID,
                                    drop=False)

        if not Globals.EvidencesDict.has_key(self.EvidenceID):
            Globals.EvidencesDict[self.EvidenceID] = {}
        """
        query = "select count(distinct(DirPath)) from %s%s;"%(Globals.CurrentEvidenceID, Constants.DirListTable)
        row = self.dbFileSystem.FetchOneRow()
        if row:
            self.DirCount = len(row[0])
            
        query = "select count(*) from 
        self.FilesCount = 0
        self.TotalFiles = 0
        self.KnownFilesCount = 0
        self.ElapsedTime = ""
        self.EvidenceID = "Evidence1"
        #self.MD5Hashes = {}
        self.TotalImages = 0
        """

        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            time.time() - self.StartTime)
        evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime,
                               KnownFilesCount=self.KnownFilesCount,
                               totalDir=self.DirCount,
                               filesCount=self.FilesCount,
                               scanStatus="Scan in progress...")
        wx.PostEvent(self.win, evt)

        query = """
        select min(CDate), max(CDate), min(CMonth), max(CMonth),min(MDate), max(MDate), min(MMonth), max(MMonth),
        min(ADate), max(ADate), min(AMonth), max(AMonth) from Evidence1 where CDate <> 0 and CMonth <>0 and CMonth
        <>0 and MDate<>0 and MDate<>0 and MMonth<>0 and MMonth<>0 and CMonth<>0 and ADate<>0 and ADate<>0 and AMonth<>0 and AMonth<>0;
        """
        row = self.dbFileSystem.FetchOneRow(query)

        Globals.TimelinesDict['Created'] = {
            'MinDate': -1,
            'MaxDate': -1,
            'MinMonth': -1,
            'MaxMonth': -1
        }
        Globals.TimelinesDict['Modified'] = {
            'MinDate': -1,
            'MaxDate': -1,
            'MinMonth': -1,
            'MaxMonth': -1
        }
        Globals.TimelinesDict['Accessed'] = {
            'MinDate': -1,
            'MaxDate': -1,
            'MinMonth': -1,
            'MaxMonth': -1
        }

        if row:
            Globals.TimelinesDict['Created'] = {
                'MinDate': row[0],
                'MaxDate': row[1],
                'MinMonth': row[2],
                'MaxMonth': row[3]
            }
            Globals.TimelinesDict['Modified'] = {
                'MinDate': row[4],
                'MaxDate': row[5],
                'MinMonth': row[6],
                'MaxMonth': row[7]
            }
            Globals.TimelinesDict['Accessed'] = {
                'MinDate': row[8],
                'MaxDate': row[9],
                'MinMonth': row[10],
                'MaxMonth': row[11]
            }

        self.FileList = []
        #self.ThumbnailList = []
        self.SubDirList = []

        self.UnzipFileNameDict = {}

        extractRootPath = os.path.join(
            Globals.CasePath, '%s%s' %
            (Globals.CurrentEvidenceID, Constants.UnzipRootFolderName))
        for dirName in os.listdir(extractRootPath):
            dirNameOnly = dirName[:dirName.rfind('-')]
            if self.UnzipFileNameDict.has_key(dirNameOnly):
                self.UnzipFileNameDict[dirNameOnly] += 1
            else:
                self.UnzipFileNameDict[dirNameOnly] = 1

        query = "select DirPath from %s%s" % (Globals.CurrentEvidenceID,
                                              Constants.DirListTable)
        self.DBDirList = self.dbFileSystem.FetchAllRows(query)

        Globals.MimeTypeSet = set([])
        query = "select distinct(MimeType) from %s" % (
            Globals.CurrentEvidenceID)
        rows = self.dbFileSystem.FetchAllRows(query)
        for row in rows:
            Globals.MimeTypeSet.add(row[0])

        for root, dirs, files in os.walk(self.rootDir):
            if self.CheckDirPathExists(self.DBDirList, root):
                continue

            #print query
            query = "delete from %s where DirPath = %s;" % (
                Globals.CurrentEvidenceID, self.dbFileSystem.SqlSQuote(root))
            #print query
            self.dbFileSystem.ExecuteNonQuery(query)

            self.DirCount += len(dirs)

            #self.ImageCount = 0
            self.SubDirList = dirs
            for afile in files:
                try:
                    self.progressLog.write(
                        "%s\n" %
                        (PlatformMethods.Encode(os.path.join(root, afile))))
                    self.ScanFileInfo(root, afile)
                    """
                    if len(self.ThumbnailList) >= Constants.MaxThumbnailsToHold:
                        self.dbImage.ExecuteMany(self.imageQuery, self.ThumbnailList)
                        self.ThumbnailList = []
                    """

                    if len(self.FileList) >= Constants.MaxFileInfoToHold:
                        self.dbFileSystem.ExecuteMany(self.query,
                                                      self.FileList)
                        self.FileList = None
                        self.FileList = []

                except Exception, value:
                    #print 'Error: ', value
                    self.errorLog.write(
                        '%s; ScanError: %s\n' %
                        (PlatformMethods.Encode(os.path.join(
                            root, afile)), PlatformMethods.Encode(value)))
                    self.errorLog.flush()

            self.dbFileSystem.ExecuteMany(
                self.dirListQuery, [(root, cPickle.dumps(self.SubDirList))])
class FileScanThread:
    def __init__(self, win, startTime):
        import HTMLParser
        self.win = win
        self.StartTime = startTime
        self.DocID = 0
        self.WordID = 0
        self.StemmedWordID = 0
        self.DirCount = 0
        self.FilesCount = 0
        self.WordCount = 0
        self.StemmedWordCount = 0
        self.ElapsedTime = ""
        self.ParseStatus = "Indexing in Progress..."
        self.KeyColumnNames = ""
        self.UseStemmer = False
        self.Stemmer = None
        #self.SetupTextCatDB()
        DBFunctions.SetupTextCatTables(Globals.TextCatFileName)
        
        """
        self.timerStatus = wx.Timer(id=wx.NewId(), owner=self)
        self.Bind(wx.EVT_TIMER, self.OnTimerStatusTimer,
              id=self.timerStatus.GetId())
        """
        self.EventStart = time.time()
        self.splitter = re.compile(r'\W*')
        #self.DigitWord = re.compile(r'[a-z]*\d+[a-z]*', re.I)
        if Globals.Stemmer == "Porter Stemmer":
            self.Stemmer = PorterStemmer()
            #self.UseStemmer = True
        self.htmlParser = HTMLParser.HTMLParser(self.Stemmer)   
        self.textParser = TextParser.TextParser(self.Stemmer)
        """
        self.timerStatus = wx.Timer(id=wx.NewId(),
              owner=self)
        self.Bind(wx.EVT_TIMER, self.OnTimerStatusTimer,
              id=timerStatus.GetId())
        #Globals.frmGlobalMainForm.treeKeywords.GetTextCatDirList()
        #DBFunctions.SetupKeywordsFrequencyTable(Globals.TextCatFileName)
        #self.InitializeKeyWordsFrequencyDictionary()
        """
        
    def Start(self):
        #self.timerStatus.Start(1000000)
        self.keepGoing = self.running = True
        thread.start_new_thread(self.Run, ())
        
        
    def Stop(self):
        self.keepGoing = False
        #self.db.CloseConnection()
        
    """
    def OnTimerStatusTimer(self, event):
        self.SendEvent()
        event.Skip()
    """   
        
    def IsRunning(self):
        return self.running
    
    def Run(self):
        db = SqliteDatabase(Globals.TextCatFileName)
        if not db.OpenConnection():
            return
        
        #print Globals.TextCatDirList
        for dir in Globals.TextCatDirList:
        #for dirName in Globals.EvidencesDict['Evidence1']['Dir
            #print dir
            if not os.path.isdir(dir):
                continue
            
            if not self.keepGoing:
                self.running = False
                return
            
            self.DirCount += 1
            #TotalDir += 1
            files = os.listdir(dir)
            for file in files:
                if not self.keepGoing:
                    self.running = False
                    return
                
                filePath = os.path.join(dir, file)
                if not os.path.isfile(filePath):
                    continue

                if (filePath.rfind('.') == -1):
                    continue
                
                try:
                    #if not (filePath.rfind('.') == -1):
                    extension = filePath[filePath.rfind('.'):]
                    fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(extension)
                    if fileType:
                        mimeType = fileType.GetMimeType() or "Unknown"
                        if mimeType in Globals.TextCatCategoryList:
                            #self.ReadFile(filePath)
                            
                            if mimeType == "text/plain":
                                #print 'plain text'
                                self.textParser.parse(filePath, self.WordID, self.StemmedWordID)
                                self.WordID = self.textParser.GetWordID()
                                self.StemmedWordID = self.textParser.GetStemmedWordID()
                                self.WordCount = self.textParser.GetWordCount()
                                self.StemmedWordCount = self.textParser.GetStemmedWordCount()
                            else: # mimeType == "text/html": 
                                fin = open(filePath, "r")
                                data = fin.read()
                                self.htmlParser.ResetCounters()
                                #while data:
                                self.htmlParser.parse(data, self.WordID, self.StemmedWordID)
                                """
                                for line in data:
                                    if self.UseStemmer:
                                        self.PreprocessDataUsingStemmer(line)
                                    else:
                                        self.PreprocessDataWithoutStemmer(line)
                                """
                                #data = fin.read()
                                #data = fin.readlines()
                                self.WordID = self.textParser.GetWordID()
                                self.StemmedWordID = self.textParser.GetStemmedWordID()

                                self.WordCount = self.htmlParser.GetWordCount()
                                self.StemmedWordCount = self.htmlParser.GetStemmedWordCount()
                                fin.close()
                                
                            self.FilesCount += 1
                            #TotalFiles += 1
                            self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
                            self.DocID += 1
                            
                            self.UpdateDocumentDatabase(db, dir, file)
                            self.InitializeDocsInfo()
                    
                    if (time.time() - self.EventStart) > 10:
                        #print time.time() - self.EventStart
                        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
                        self.SendEvent()
                except Exception, value:
                    #print "Failed to read file: %s Error: %s"%(filePath, value)
                    try:
                        print filePath, value
                    except:
                        continue
                 
        self.WriteTermsInDatabase(db)   
        #self.UpdateWordCount(db)
        #self.UpdateTF(db)
        self.DumpBitMapInDatabase(db)
        #self.UpdateIDF(db)
        db.CloseConnection()
        
        #self.ParseStatus = "Done Updating Inverse Document Frequency!"
        self.SendEvent()
        
            
        #db = SqliteDatabase(Globals.CurrentProjectFile)
        #self.tokenizer.close()
        finishTime = time.time()
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(finishTime - self.StartTime)
        self.ParseStatus = "Done Preprocessing/Indexing!"
        self.SendEvent()
        self.running = False
    def Run(self):
        db = SqliteDatabase(Globals.TextCatFileName)
        if not db.OpenConnection():
            return
        
        #print Globals.TextCatDirList
        for dir in Globals.TextCatDirList:
        #for dirName in Globals.EvidencesDict['Evidence1']['Dir
            #print dir
            if not os.path.isdir(dir):
                continue
            
            if not self.keepGoing:
                self.running = False
                return
            
            self.DirCount += 1
            #TotalDir += 1
            files = os.listdir(dir)
            for file in files:
                if not self.keepGoing:
                    self.running = False
                    return
                
                filePath = os.path.join(dir, file)
                if not os.path.isfile(filePath):
                    continue

                if (filePath.rfind('.') == -1):
                    continue
                
                try:
                    #if not (filePath.rfind('.') == -1):
                    extension = filePath[filePath.rfind('.'):]
                    fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(extension)
                    if fileType:
                        mimeType = fileType.GetMimeType() or "Unknown"
                        if mimeType in Globals.TextCatCategoryList:
                            #self.ReadFile(filePath)
                            
                            if mimeType == "text/plain":
                                #print 'plain text'
                                self.textParser.parse(filePath, self.WordID, self.StemmedWordID)
                                self.WordID = self.textParser.GetWordID()
                                self.StemmedWordID = self.textParser.GetStemmedWordID()
                                self.WordCount = self.textParser.GetWordCount()
                                self.StemmedWordCount = self.textParser.GetStemmedWordCount()
                            else: # mimeType == "text/html": 
                                fin = open(filePath, "r")
                                data = fin.read()
                                self.htmlParser.ResetCounters()
                                #while data:
                                self.htmlParser.parse(data, self.WordID, self.StemmedWordID)
                                """
                                for line in data:
                                    if self.UseStemmer:
                                        self.PreprocessDataUsingStemmer(line)
                                    else:
                                        self.PreprocessDataWithoutStemmer(line)
                                """
                                #data = fin.read()
                                #data = fin.readlines()
                                self.WordID = self.textParser.GetWordID()
                                self.StemmedWordID = self.textParser.GetStemmedWordID()

                                self.WordCount = self.htmlParser.GetWordCount()
                                self.StemmedWordCount = self.htmlParser.GetStemmedWordCount()
                                fin.close()
                                
                            self.FilesCount += 1
                            #TotalFiles += 1
                            self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
                            self.DocID += 1
                            
                            self.UpdateDocumentDatabase(db, dir, file)
                            self.InitializeDocsInfo()
                    
                    if (time.time() - self.EventStart) > 10:
                        #print time.time() - self.EventStart
                        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
                        self.SendEvent()
                except Exception, value:
                    #print "Failed to read file: %s Error: %s"%(filePath, value)
                    try:
                        print filePath, value
                    except:
                        continue
Пример #21
0
    def ImportNSRLHashAndProduct(self):
        #q = Queue.Queue()

        db = SqliteDatabase(Constants.NSRLDBName)
        if not db.OpenConnection():
            return

        Hashes = set()

        #txtFile = False

        #query = "INSERT INTO " + Constants.NSRLFileTable + " (MD5, FileName, ProductCode, OSCode) values (?,?,?,?)"
        try:
            fin = open(os.path.join(self.dirPath, "NSRLFile.txt"))
            #fin = gzip.open(self.dirPath + PlatformMethods.GetDirSeparator() + "NSRLFile.txt.gz")
        except IOError:
            #fin = open(self.dirPath + PlatformMethods.GetDirSeparator() + "NSRLFile.txt")
            #txtFile = True
            print 'Error Opening file: ', os.path.join(self.dirPath,
                                                       "NSRLFile.txt")
            return
            #pass

        # Get the file size:y
        try:
            fin.seek(0, 2)
            size = fin.tell()
            fin.seek(0)
        except TypeError:
            size = None

        #finCSV = csv.reader(fin)

        #print "Startime = %s"%time.asctime()
        #i = 0
        self.EstimatedTime = ""
        #while self.keepGoing:
        count = 0
        self.startTime = time.time()
        #manyValues = []
        #totalRows = len(finCSV)
        skip = True
        """
        t = Thread(target=worker, args=(q,))
        t.setDaemon(True)
        t.start()
        """

        while fin:
            rows = fin.readlines(10000)
            if not rows:
                break

            #for row in finCSV:
            for row in rows:
                if not self.keepGoing:
                    break

                if skip:
                    skip = False
                    continue

                md5 = row.split(',')[1].replace('"', '')
                table = CommonFunctions.GetMD5HashBucketID(md5)
                #print col
                #return
                #q.put([table, md5])
                #md5 = row[1]

                if table not in Hashes:
                    Hashes.add(table)
                    db.ExecuteNonQuery(
                        "CREATE TABLE IF NOT EXISTS %s (MD5 varchar(32) primary key);"
                        % table)
                    #db.ExecuteNonQuery("CREATE INDEX Ind%s ON table (MD5);"%table)

                try:
                    db.ExecuteNonQuery("insert into %s (MD5) values ('%s');" %
                                       (table, md5))
                except Exception, value:
                    #print 'Error :: ', value
                    pass

                if size and not count % 10000:
                    done = fin.tell()
                    self.gaugeValue = float(done * 100) / float(size)

                    #print "Progress %02u%% Done - %uk rows\r" % (done*100/size,count/1000)

                    self.ElapsedTime = time.time()
                    #self.gaugeValue = (float(size)/float(driveSize))*100
                    timeTaken = float(self.ElapsedTime - self.startTime)
                    if timeTaken == 0:
                        timeTaken = 1
                    rate = float(done) / timeTaken
                    self.rateInfo = "%.2fMB of %.2fMB at %.2fMB/sec" % (
                        done / 1024. / 1024, size / 1024. / 1024,
                        rate / 1024. / 1024)
                    self.Status = "Updating NSRL Software Hashes... [%.2f" % self.gaugeValue
                    self.Status += "%]"
                    #self.startTime = time.time()
                    self.SendEvent()

                    if count == 10000:
                        self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                            float(size) / rate)

                count += 1
Пример #22
0
class FileScanThread:
    def __init__(self, win, startTime, rootPath):
        import HTMLParser
        self.win = win
        self.StartTime = startTime
        self.rootPath = rootPath
        self.DocID = 0
        self.WordID = 0
        self.StemmedWordID = 0
        self.DirCount = 0
        self.FilesCount = 0
        self.WordCount = 0
        self.StemmedWordCount = 0
        self.ElapsedTime = ""
        self.ParseStatus = "Indexing in Progress..."
        self.KeyColumnNames = ""
        self.UseStemmer = False
        self.Stemmer = None
        #self.SetupTextCatDB()
        #DBFunctions.SetupTextCatTables(Globals.TextCatFileName)
        DBFunctions.SetupSqliteIndexTables(Globals.TextCatFileName)

        self.EventStart = time.time()
        if Globals.Stemmer == "Porter Stemmer":
            self.Stemmer = PorterStemmer()

    def Start(self):
        #self.timerStatus.Start(1000000)
        self.keepGoing = self.running = True
        thread.start_new_thread(self.Run, ())
        #self.Run()

    def Stop(self):
        self.keepGoing = False
        #db.CloseConnection()

    """
    def OnTimerStatusTimer(self, event):
        self.SendEvent()
        event.Skip()
    """

    def IsRunning(self):
        return self.running

    def Run(self):
        #print Globals.TextCatCategoryList
        db = SqliteDatabase(Globals.TextCatFileName)
        if not db.OpenConnection():
            return
        #self.htmlParser = HTMLParser.HTMLParser(self.Stemmer)
        textParser = TextParser.TextParser(db, Globals.Stopwords, self.Stemmer)

        #print Globals.TextCatDirList
        for dirPath, dirs, files in os.walk(self.rootPath):
            self.DirCount += 1
            for afile in files:
                self.FilesCount += 1
                if not self.keepGoing:
                    self.running = False
                    return

                filePath = os.path.join(dirPath, afile)
                try:
                    #print filePath
                    parsed = False
                    dotIndex = filePath.rfind('.')
                    if dotIndex >= 0:
                        extension = filePath[dotIndex:]

                        fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(
                            extension)
                        if fileType:
                            mimeType = fileType.GetMimeType() or "unknown"
                            if Globals.TextCatCategoryList:
                                if mimeType not in Globals.TextCatCategoryList:

                                    self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                        time.time() - self.StartTime)

                                    if (time.time() - self.EventStart) > 10:
                                        #print time.time() - self.EventStart
                                        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                            time.time() - self.StartTime)
                                        self.SendEvent()

                                    continue

                            #print filePath
                            query = "insert into %s (DocPath) values (?)" % (
                                Constants.TextCatDocumentsTable)
                            DocID = db.InsertAutoRow(query, [(filePath, )])

                            if mimeType == 'application/msword':
                                try:
                                    textParser.parse(
                                        DocID,
                                        MSOfficeToText.WordToText(filePath),
                                        filePath)
                                except:
                                    textParser.parse(
                                        DocID, HTMLParser.getText(filePath),
                                        filePath)

                            elif mimeType == 'application/pdf':
                                try:
                                    textParser.parse(
                                        DocID, PDFToText.GetText(filePath),
                                        filePath)
                                except:
                                    textParser.parse(
                                        DocID, HTMLParser.getText(filePath),
                                        filePath)
                            else:
                                textParser.parse(DocID,
                                                 HTMLParser.getText(filePath),
                                                 filePath)

                            parsed = True

                    if not parsed:
                        textParser.parse(DocID, HTMLParser.getText(filePath),
                                         filePath)

                    self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                        time.time() - self.StartTime)

                    if (time.time() - self.EventStart) > 10:
                        #print time.time() - self.EventStart
                        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                            time.time() - self.StartTime)
                        self.SendEvent()
                except Exception, value:
                    #print "Failed to read file: %s Error: %s"%(filePath, value)
                    try:
                        print "Error in Text Preprocessing: ", filePath, value
                    except:
                        print "Error in Text Preprocessing..."
                        continue

        db.CloseConnection()
        self.SendEvent()

        finishTime = time.time()
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            finishTime - self.StartTime)
        self.ParseStatus = "Done Preprocessing/Indexing!"
        self.SendEvent()
        self.running = False
Пример #23
0
    #excel.Quit()


if __name__ == "__main__":
    import os.path
    import time
    import re
    stTime = time.time()
    splitter = re.compile(r'\W*')
    docFileName = r'c:\test.doc'
    data = WordToText(docFileName)

    for word in splitter.split(data):
        try:
            print word
        except:
            print 'error'

    endTime = time.time()
    print endTime - stTime
    CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - stTime)
    #print os.path.exists(r'Data\NSRL.db')
    #docFileName = r"C:\NMT\Research\ForensicsTool\EmailTest1\Attachments\2006-12-01 14.30.25 - Director's Secretary - Lawson Consultant Team - Lawson Consultant Team-cell and email.doc"
    #import os.path
    #print os.path.isfile(docFileName)

    #excelFile = r'C:\Documents and Settings\Ram\Desktop\Test\TomSavageKeywords.xls'
    #ExcelToText(excelFile)
    #pptFile = r'C:\Documents and Settings\Ram\Desktop\Test\BasnetCACTUSTextCat.ppt'
    #PowerpointToText(pptFile)
Пример #24
0
    def Run(self):
        #print Globals.TextCatCategoryList
        db = SqliteDatabase(Globals.TextCatFileName)
        if not db.OpenConnection():
            return
        #self.htmlParser = HTMLParser.HTMLParser(self.Stemmer)
        textParser = TextParser.TextParser(db, Globals.Stopwords, self.Stemmer)

        #print Globals.TextCatDirList
        for dirPath, dirs, files in os.walk(self.rootPath):
            self.DirCount += 1
            for afile in files:
                self.FilesCount += 1
                if not self.keepGoing:
                    self.running = False
                    return

                filePath = os.path.join(dirPath, afile)
                try:
                    #print filePath
                    parsed = False
                    dotIndex = filePath.rfind('.')
                    if dotIndex >= 0:
                        extension = filePath[dotIndex:]

                        fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(
                            extension)
                        if fileType:
                            mimeType = fileType.GetMimeType() or "unknown"
                            if Globals.TextCatCategoryList:
                                if mimeType not in Globals.TextCatCategoryList:

                                    self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                        time.time() - self.StartTime)

                                    if (time.time() - self.EventStart) > 10:
                                        #print time.time() - self.EventStart
                                        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                            time.time() - self.StartTime)
                                        self.SendEvent()

                                    continue

                            #print filePath
                            query = "insert into %s (DocPath) values (?)" % (
                                Constants.TextCatDocumentsTable)
                            DocID = db.InsertAutoRow(query, [(filePath, )])

                            if mimeType == 'application/msword':
                                try:
                                    textParser.parse(
                                        DocID,
                                        MSOfficeToText.WordToText(filePath),
                                        filePath)
                                except:
                                    textParser.parse(
                                        DocID, HTMLParser.getText(filePath),
                                        filePath)

                            elif mimeType == 'application/pdf':
                                try:
                                    textParser.parse(
                                        DocID, PDFToText.GetText(filePath),
                                        filePath)
                                except:
                                    textParser.parse(
                                        DocID, HTMLParser.getText(filePath),
                                        filePath)
                            else:
                                textParser.parse(DocID,
                                                 HTMLParser.getText(filePath),
                                                 filePath)

                            parsed = True

                    if not parsed:
                        textParser.parse(DocID, HTMLParser.getText(filePath),
                                         filePath)

                    self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                        time.time() - self.StartTime)

                    if (time.time() - self.EventStart) > 10:
                        #print time.time() - self.EventStart
                        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                            time.time() - self.StartTime)
                        self.SendEvent()
                except Exception, value:
                    #print "Failed to read file: %s Error: %s"%(filePath, value)
                    try:
                        print "Error in Text Preprocessing: ", filePath, value
                    except:
                        print "Error in Text Preprocessing..."
                        continue
Пример #25
0
    def __init__(self, prnt):
		# First, call the base class' __init__ method to create the frame
        wx.Frame.__init__(self, id=-1, name='', parent=prnt,
            pos=wx.Point(0, 0), size=wx.Size(600, 600),
            style=wx.DEFAULT_FRAME_STYLE, title="Dir tree view")

        self.panViewFolders = wx.Panel(id=-1,
              name='panViewFolders', parent=self, pos=wx.Point(8, 176),
              size=wx.Size(500, 500), style=wx.TAB_TRAVERSAL)
        self.panViewFolders.SetBackgroundColour(wx.Colour(225, 236, 255))
        #self.panViewFolders.SetAutoLayout(True)
        
        self.treeViewFolders = wx.TreeCtrl(id=wx.NewId(), name='treeViewFolders', parent=self.panViewFolders,
            pos=wx.Point(0, 0), size=wx.Size(600, 600),
            style=wx.HSCROLL | wx.VSCROLL | wx.TR_HAS_BUTTONS)
        
        
        self.DirectoryDict = {}
        """
        print "start walking at ", time.ctime()
        startTime = time.time()
        self.WalkDir()
        
        print "end walking at ", time.ctime()
        
        endTime = time.time()
        print "Time taken to walk ", CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - startTime)
        
        startTime = time.time()
        #save in db
   
        DBFunctions.SetupProjectEvidencesTable("caseNew.cfi", True)
        db = SqliteDatabase("caseNew.cfi")
        if db.OpenConnection():
            query = "insert into " + Constants.ProjectEvidencesTable + " (ID, DisplayName, Location, DirTree) values (?, ?, ?, ?)"
            db.ExecuteMany(query, [(1, self.evidenceName, self.evidencePath, cPickle.dumps(self.DirectoryDict[self.evidenceName]))] )
            db.CloseConnection()
        endTime = time.time()
        
        print "Time taken to add in db ", CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - startTime)
        """
        
        print 'start time load ', time.ctime()
        startTime = time.time()
        db = SqliteDatabase("caseNew.cfi")
        if db.OpenConnection():
            query = "select Location, DisplayName, DirTree from " + Constants.EvidencesTable;
            row = db.FetchOneRow(query)
            self.evidencePath = row[0]
            self.evidenceName = row[1]
            self.DirectoryDict = cPickle.loads(str(row[2]))
            
        print "end time ", time.ctime()
        endTime = time.time()
        print "Time taken to load db ", CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - startTime)
        
        print "start build tree time ", time.ctime()
        startTime = time.time()
        self.treeDir = DirectoryTreeView(self, self.treeViewFolders, self.DirectoryDict, self.evidenceName, self.evidencePath) 
        print "end build tree time ", time.ctime()
        endTime = time.time()
        print "Time taken ", CommonFunctions.ConvertSecondsToDayHourMinSec(endTime - startTime)
Пример #26
0
    def Run(self):
        #print Globals.TextCatCategoryList

        db = SqliteDatabase(Globals.TextCatFileName)
        logFileName = PlatformMethods.Decode(
            os.path.join(Globals.CasePath, (
                Globals.TextCatFileName[Globals.TextCatFileName.rfind(os.sep) +
                                        1:] + '.log')))
        self.fout = open(logFileName, 'ab')
        if not db.OpenConnection():
            return

        #self.bloomFilter = self.CreateBloomFilter()
        self.bloomFilter = None
        #self.htmlParser = HTMLParser.HTMLParser(self.Stemmer)
        textParser = TextParser.TextParser(db,
                                           Globals.Stopwords,
                                           Stemmer=self.Stemmer,
                                           bloomFilter=self.bloomFilter)
        #self.WordDict = {}
        #print Globals.TextCatDirList
        docxParser = DocxParser.DocxParser(db, Globals.Stopwords, self.Stemmer)
        docParser = DocParser.DocParser(db, Globals.Stopwords, self.Stemmer)
        query = "insert into %s (DocPath) values (?)" % (
            Constants.TextCatDocumentsTable)

        self.filePath = ""
        for dirPath, dirs, files in os.walk(self.rootPath):
            self.DirCount += 1
            for afile in files:
                self.FileScanStartTime = time.time()
                self.FilesCount += 1
                """
                if (self.FilesCount % Globals.TotalFilesToHold) == 0 and self.WordDict:
                    self.ParseStatus = "Writing to database..."
                    self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
                    self.SendEvent()
                    self.HandleWords(self.WordDict)
                    self.ParseStatus = "Indexing in Progress..."
                    self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
                    self.SendEvent()
                    self.WordDict = {}
                """
                if not self.keepGoing:
                    self.running = False
                    return

                self.filePath = os.path.join(dirPath, afile)
                try:
                    #print filePath
                    parsed = False
                    dotIndex = self.filePath.rfind('.')
                    extension = ""
                    if dotIndex >= 0:
                        extension = self.filePath[dotIndex:]

                        fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(
                            extension)
                        if fileType:
                            mimeType = fileType.GetMimeType() or "unknown"
                            if Globals.TextCatCategoryList:
                                if mimeType not in Globals.TextCatCategoryList:

                                    self.FileScanStartTime = time.time()
                                    #self.fout.write('%s :'%(self.filePath))
                                    #query = "insert into %s (DocPath) values (?)"%(Constants.TextCatDocumentsTable)

                                    #default list of all the mime types doesn't seem to produce the mime type for
                                    # MS docx document
                                    if mimeType == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or extension == '.docx':
                                        try:
                                            DocID = db.InsertAutoRow(
                                                query,
                                                [(PlatformMethods.Encode(
                                                    self.filePath), )])
                                            #docID, filePath, startTime, logFile, extractMedia = False, MediaPath=""
                                            docxParser.Parse(
                                                DocID,
                                                self.filePath,
                                                self.FileScanStartTime,
                                                self.fout,
                                                extractMedia=False,
                                                MediaPath="")
                                            parsed = True
                                        except Exception, value:
                                            self.fout.write(
                                                "Error in docxParser : %s Value: %s\n"
                                                % (self.filePath, value))
                                            #gives junk so let's not parse it using binary
                                            #parsed = True
                                            #docxParser.Parse(DocID, self.filePath, extractMedia = False, MediaPath="")

                                    curTime = time.time()
                                    self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                        curTime - self.StartTime)

                                    #self.fout.write('%s\n'%(CommonFunctions.ConvertSecondsToDayHourMinSec(curTime - self.FileScanStartTime)))
                                    #self.fout.flush()

                                    if (curTime - self.EventStart) > 10:
                                        #print time.time() - self.EventStart
                                        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                            curTime - self.StartTime)
                                        self.SendEvent()

                                    continue
                                else:
                                    pass

                            #print filePath
                            self.FileScanStartTime = time.time()
                            #self.fout.write('%s :'%(self.filePath))

                            DocID = db.InsertAutoRow(
                                query,
                                [(PlatformMethods.Encode(self.filePath), )])
                            #print 'mimeType ', mimeType
                            #print 'extension ', extension
                            if mimeType == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or extension == '.docx':
                                try:
                                    #docID, filePath, startTime, logFile, extractMedia = False, MediaPath=""
                                    docxParser.Parse(DocID,
                                                     self.filePath,
                                                     self.FileScanStartTime,
                                                     self.fout,
                                                     extractMedia=False,
                                                     MediaPath="")
                                    parsed = True
                                except Exception, value:
                                    #gives junk so let's not parse it using binary
                                    parsed = True
                                    self.fout.write(
                                        "Error in docxParser : %s Value: %s\n"
                                        % (PlatformMethods.Encode(
                                            self.filePath), value))

                            elif mimeType == 'application/msword':
                                """
                                try:
                                    textParser.parse(DocID, MSOfficeToText.WordToText(self.filePath), self.filePath, self.FileScanStartTime, self.fout)
                                    parsed = True
                                except Exception, value:
                                    self.fout.write("Error in MSOfficeToText.WordToText : %s Value: %s\n"%(self.filePath, value))
                                """
                                try:
                                    #docID, filePath, startTime, logFile, extractMedia = False, MediaPath=""
                                    docParser.Parse(DocID,
                                                    self.filePath,
                                                    self.FileScanStartTime,
                                                    self.fout,
                                                    extractMedia=False,
                                                    MediaPath="")
                                    parsed = True
                                except Exception, value:
                                    #gives junk so let's not parse it using binary
                                    self.fout.write(
                                        "Error in DocParser : %s Value: %s\n" %
                                        (PlatformMethods.Encode(
                                            self.filePath), value))

                            elif mimeType == 'application/pdf':
                                try:
                                    textParser.parse(
                                        DocID,
                                        PDFToText.GetText(self.filePath),
                                        self.filePath, self.FileScanStartTime,
                                        self.fout)
                                    parsed = True
                                except Exception, value:
                                    self.fout.write(
                                        "Error in PDFToText: %s Value: %s\n" %
                                        (PlatformMethods.Encode(
                                            self.filePath), value))

                            elif mimeType == 'text/plain':
                                try:
                                    fin = open(self.filePath, 'rb')
                                    #data = fin.read(4096)
                                    #while data:
                                    textParser.parse(DocID, fin.read(),
                                                     self.filePath,
                                                     self.FileScanStartTime,
                                                     self.fout)
                                    parsed = True
                                    fin.close()

                                except Exception, value:
                                    self.fout.write(
                                        "Error in text/plain : %s Value: %s\n"
                                        % (PlatformMethods.Encode(
                                            self.filePath), value))
Пример #27
0
    def Run(self):
        db = SqliteDatabase(Globals.KeywordsFileName)
        if not db.OpenConnection():
            return
        #print Globals.KeywordsSearchDirList
        for dir in Globals.KeywordsSearchDirList:
            #print dir
            if dir.find("*.*") >= 0:
                continue
            if not os.path.isdir(dir):
                continue
            try:
                #print dir
                files = os.listdir(dir)
                self.DirCount += 1
                for file in files:
                    if not self.keepGoing:
                        return
                    filePath = dir + PlatformMethods.GetDirSeparator() + file
                    #print filePath
                    if os.path.isfile(filePath):
                        try:
                            if filePath.rfind('.') >= 0:
                                extension = filePath[filePath.rfind('.'):]
                                #print 'extension = ', extension
                                fileType = wx.TheMimeTypesManager.GetFileTypeFromExtension(
                                    extension)
                                if fileType:
                                    mimeType = fileType.GetMimeType(
                                    ) or "Unknown"
                                    #print "mimeType ", mimeType
                                    #try:
                                    #print Globals.KeywordsSearchCategoryList
                                    if mimeType in Globals.KeywordsSearchCategoryList:
                                        #print 'ReadFile being called'
                                        self.ReadFile(filePath, db)
                                        self.FilesCount += 1
                                        #self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - self.StartTime)
                                        """
                                        except Exception, value:
                                            print "Failed to read file: %s Error: %s"%(filePath, value)
                                            continue
                                        """
                                    #else:
                                    #    time.sleep(1)
                                #else:
                                #    time.sleep(1)
                            else:
                                self.ReadFile(filePath, db)
                                self.FilesCount += 1

                            if (time.time() - self.EventStart) > 10:
                                #print time.time() - self.EventStart
                                self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                                    time.time() - self.StartTime)
                                self.SendEvent()

                        except Exception, value:
                            print "Failed to read file: %s Error: %s" % (
                                filePath, value)

            except Exception, value:
                print "Failed to read directory: %s Error: %s" % (dir, value)
                continue
Пример #28
0
class MACScanThread(threading.Thread):
    def __init__(self, win, rootDir):
        threading.Thread.__init__(self)
        self.win = win
        #self.StartTime = startTime
        self.rootDir = rootDir
        #self.CurrentFileName = ""
        self.CurrentDirectory = ""
        #self.DirCount = 0
        #self.FilesCount = 0
        #self.TotalFiles = 0
        #self.KnownFilesCount = 0
        #self.ElapsedTime = ""
        self.EvidenceID = "Evidence1"
        self.EventStart = time.time()
        self.keepGoing = self.running = True
        
    def Start(self):
        self.keepGoing = self.running = True
        thread.start_new_thread(self.Run, ())
        #self.Run()
        
    def Stop(self):
        self.keepGoing = False
        #self.dbFileSystem.CloseConnection()
        
    def IsRunning(self):
        return self.running
    
    
    def run(self):
        global FileInfoQueue
        global ScanDone
        global ThumbnailQueue
        global SubDirQueue
        global StartTime
        global DirCount
        global FilesCount
        global TotalImages
        global KnownFilesCount
        
        
        knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log')
        self.knownFilesLog = open(knownFileLog, 'w')
        
        errorLogFile = os.path.join(Globals.CasePath, 'Errors.log')
        
        self.errorLog = open(errorLogFile, 'w')
        
        self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'), 'w')
        
        self.dbMAC = SqliteDatabase(Globals.MACFileName)
        if not self.dbMAC.OpenConnection():
            return
        
        self.dbNSRL = None
        if os.path.exists(Constants.NSRLDBName):
            self.dbNSRL = SqliteDatabase(Constants.NSRLDBName)
            self.dbNSRL.OpenConnection()
                
        DBFunctions.CreateFileSystemTable(Globals.FileSystemName, self.EvidenceID, True)
        DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName, self.EvidenceID, True)
        DBFunctions.CreateMACTables(Globals.MACFileName, self.EvidenceID, drop=True)
        
        if not Globals.EvidencesDict.has_key(self.EvidenceID):
            Globals.EvidencesDict[self.EvidenceID] = {}
            
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - StartTime)
        evt = UpdateLabelEvent(elapsedTime = self.ElapsedTime, KnownFilesCount = KnownFilesCount,
            totalDir = DirCount, filesCount=FilesCount, scanStatus = "Scan in progress...")
        wx.PostEvent(self.win, evt)
        
        self.MimeTypeDict = {}
        
        Globals.TimelinesDict['Created'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1}
        Globals.TimelinesDict['Modified'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1}
        Globals.TimelinesDict['Accessed'] = {'MinDate': -1, 'MaxDate': -1, 'MinMonth': -1, 'MaxMonth': -1}
        
        self.UnzipFileNameDict = {}
        
        for root, dirs, files in os.walk(self.rootDir):
            DirCount += len(dirs)
            #print 'first thread'
            #self.ImageCount = 0
            self.SubDirList = dirs
            for afile in files:
                try:
                    self.progressLog.write("%s\n"%(os.path.join(root, afile)))
                    self.ScanFileInfo(root, afile)

                except Exception, value:
                    #print 'Error: ', value
                    self.errorLog.write('%s; ScanError: %s\n'%(os.path.join(root, afile), value))
                    self.errorLog.flush()
                
            SubDirQueue.put((root, cPickle.dumps(self.SubDirList)))
            

        query ="INSERT INTO %s%s (CMinDate,CMaxDate,CMinMonth,CMaxMonth, MMinDate,MMaxDate,MMinMonth,MMaxMonth,AMinDate,AMaxDate,AMinMonth,AMaxMonth) values (?,?,?,?,?,?,?,?,?,?,?,?)"%(self.EvidenceID, Constants.MACRangeTable)
            
        self.dbMAC.ExecuteMany(query, [(Globals.TimelinesDict['Created']['MinDate'],Globals.TimelinesDict['Created']['MaxDate'],
            Globals.TimelinesDict['Created']['MinMonth'], Globals.TimelinesDict['Created']['MaxMonth'],
            Globals.TimelinesDict['Modified']['MinDate'], Globals.TimelinesDict['Modified']['MaxDate'],
            Globals.TimelinesDict['Modified']['MinMonth'], Globals.TimelinesDict['Modified']['MaxMonth'],
            Globals.TimelinesDict['Accessed']['MinDate'], Globals.TimelinesDict['Accessed']['MaxDate'],
            Globals.TimelinesDict['Accessed']['MinMonth'], Globals.TimelinesDict['Accessed']['MaxMonth'])])
            
        self.dbMAC.CloseConnection()
        
        if self.dbNSRL:
            self.dbNSRL.CloseConnection()
        
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(time.time() - StartTime)
        evt = UpdateLabelEvent(elapsedTime = self.ElapsedTime, KnownFilesCount = KnownFilesCount,
            totalDir = DirCount, filesCount=FilesCount, scanStatus = "Done Reading! Now Writing!!")
            
        wx.PostEvent(self.win, evt)
        self.running = False
        self.knownFilesLog.close()
        self.progressLog.close()
        self.errorLog.close()
        ScanDone = True
Пример #29
0
    def Run(self):
        m = hashlib.md5()
        #sha1 = hashlib.sha1()
        size = 0
        #try:
        rfin = open(self.imagePath, 'rb')
        fileObjects = []
        for dirName in self.listDriveNames:
            fileObjects.append(Win32RawIO.Win32RAWIO(dirName, 'w'))

        startTime = time.time()
        print "Startime = %s" % time.asctime()
        #i = 0
        self.EstimatedTime = ""

        st = os.stat(self.imagePath)
        imageSize = st[ST_SIZE]

        while self.keepGoing:
            self.readTime = time.time()
            data = rfin.read(1024 * 1024 * 16)
            if len(data) == 0:
                break
            for rfout in fileObjects:
                rfout.write(data)
                rfout.flush()
            #if self.verifyImages:
            m.update(data)
            #sha1.update(data)

            self.ElapsedTime = time.time()
            #print 'data len=%s'%len(data)
            #print 'time take=%s'%(self.ElapsedTime-self.readTime)
            rate = float(len(data)) / float(self.ElapsedTime - self.readTime)
            if size == 0:
                self.EstimatedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
                    float(imageSize) / rate)
            size += len(data)
            self.gaugeValue = (float(size) / float(imageSize)) * 100
            self.rateInfo = "%.2fMB of %.2fMB at %.2fMB/sec" % (
                size / 1024. / 1024, imageSize / 1024. / 1024,
                rate / 1024. / 1024)
            self.Status = "Writing Disk... [%.2f%s" % (self.gaugeValue, "%]")
            #self.Status += "%]"
            self.SendEvent()
        #i += 1
        for rfout in fileObjects:
            rfout.close()

        self.ElapsedTime = time.time()
        #if self.verifyImages:
        print "MD5 Hash = %s" % m.hexdigest().upper()
        #print "SHA1 Hash = %s"%sha1.hexdigest().upper()

        rfin.close()

        print "Endtime = %s" % time.asctime()
        print "Total Elapsed Time = %s" % CommonFunctions.ConvertSecondsToDayHourMinSec(
            self.ElapsedTime - startTime)

        #except:
        #    print "Exception occured: %s"%sys.exc_info()[0]

        self.running = False

        evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime,
                               gaugeValue=self.gaugeValue,
                               rate=self.rateInfo,
                               estimatedTime=self.EstimatedTime,
                               scanStatus="Done Writing To Disk!")
        wx.PostEvent(self.win, evt)
Пример #30
0
    def Run(self):
        #print 'run start'
        #knownFileLog = os.path.join(Globals.CasePath, 'KnownFiles.log')
        #self.knownFilesLog = open(knownFileLog, 'wb')

        errorLogFile = os.path.join(Globals.CasePath, 'Errors.log')

        self.errorLog = open(errorLogFile, 'wb')

        self.progressLog = open(os.path.join(Globals.CasePath, 'Progress.log'),
                                'wb')

        self.dbFileSystem = SqliteDatabase(Globals.FileSystemName)
        if not self.dbFileSystem.OpenConnection():
            return

        self.dbImage = SqliteDatabase(Globals.ImagesFileName)
        if not self.dbImage.OpenConnection():
            return

        self.dbMAC = SqliteDatabase(Globals.MACFileName)
        if not self.dbMAC.OpenConnection():
            return

        self.dbNSRL = None
        if os.path.exists(Constants.NSRLDBName):
            self.dbNSRL = SqliteDatabase(Constants.NSRLDBName)
            self.dbNSRL.OpenConnection()

        #query = "delete from " + Constants.FileInfoTable + ";"
        #self.dbFileSystem.ExecuteNonQuery(query)
        DBFunctions.CreateFileSystemTable(Globals.FileSystemName,
                                          Globals.CurrentEvidenceID, True)
        DBFunctions.CreateThumbnailsTable(Globals.ImagesFileName,
                                          Globals.CurrentEvidenceID, True)
        DBFunctions.CreateMACTables(Globals.MACFileName,
                                    Globals.CurrentEvidenceID,
                                    drop=True)

        if not Globals.EvidencesDict.has_key(Globals.CurrentEvidenceID):
            Globals.EvidencesDict[Globals.CurrentEvidenceID] = {}

        #11/06/08 Globals.EvidencesDict[Globals.CurrentEvidenceID]['DirTree'] = {}
        #11/06/08 Globals.EvidencesDict[Globals.CurrentEvidenceID]['Location'] = self.rootDir

        #11/06/08 Globals.FilesDict[Globals.CurrentEvidenceID] = {}
        #11/06/08 Globals.ImagesDict[Globals.CurrentEvidenceID] = {}
        #11/06/08 Globals.EvidencesDict[Globals.CurrentEvidenceID]['dirs'] = {}
        self.ElapsedTime = CommonFunctions.ConvertSecondsToDayHourMinSec(
            time.time() - self.StartTime)
        evt = UpdateLabelEvent(elapsedTime=self.ElapsedTime,
                               KnownFilesCount=self.KnownFilesCount,
                               totalDir=self.DirCount,
                               filesCount=self.FilesCount,
                               scanStatus="Scan in progress...")
        wx.PostEvent(self.win, evt)

        self.MimeTypeDict = {}

        Globals.TimelinesDict['Created'] = {
            'MinDate': -1,
            'MaxDate': -1,
            'MinMonth': -1,
            'MaxMonth': -1
        }
        Globals.TimelinesDict['Modified'] = {
            'MinDate': -1,
            'MaxDate': -1,
            'MinMonth': -1,
            'MaxMonth': -1
        }
        Globals.TimelinesDict['Accessed'] = {
            'MinDate': -1,
            'MaxDate': -1,
            'MinMonth': -1,
            'MaxMonth': -1
        }

        self.FileList = []
        #self.ThumbnailList = []
        self.SubDirList = []

        self.UnzipFileNameDict = {}

        for root, dirs, files in os.walk(self.rootDir):
            self.DirCount += len(dirs)

            #self.ImageCount = 0
            self.SubDirList = None
            self.SubDirList = dirs
            dirs = None
            for afile in files:
                #06/23/09 added try/catch block to catch runtime errors on long filenames
                try:
                    self.progressLog.write(
                        "%s\n" %
                        (PlatformMethods.Encode(os.path.join(root, afile))))
                    self.ScanFileInfo(root, afile)
                    """
                    if len(self.ThumbnailList) >= Constants.MaxThumbnailsToHold:
                        self.dbImage.ExecuteMany(self.imageQuery, self.ThumbnailList)
                        self.ThumbnailList = []
                    """

                    if len(self.FileList) >= Constants.MaxFileInfoToHold:
                        self.dbFileSystem.ExecuteMany(self.query,
                                                      self.FileList)
                        self.FileList = None
                        self.FileList = []

                except Exception, value:
                    try:
                        self.errorLog.write(
                            'MAC Info Failed on %s; Error: %s\n' %
                            (PlatformMethods.Encode(os.path.join(
                                root, afile)), str(value)))
                        self.errorLog.flush()
                        #print "Failed to get information on file: %s Error: %s"%(fullFileName, value)
                    except Exception, value:
                        self.errorLog.write('MAC Info Failed; Error: %s\n' %
                                            (str(value)))
                        self.errorLog.flush()