class TestDictDiffer(TestCase): def setUp(self): a = {'a': 1, 'b': 1, 'c': 0} b = {'a': 1, 'b': 2, 'd': 0} self.d = DictDiffer(b, a) def test_added(self): self.assertEqual(self.d.added(), set(['d'])) def test_removed(self): self.assertEqual(self.d.removed(), set(['c'])) def test_changed(self): self.assertEqual(self.d.changed(), set(['b'])) def test_unchanged(self): self.assertEqual(self.d.unchanged(), set(['a']))
def checkDelta(self, letter): if letter == 'A': delta = self.deltaA # Refer to the opposite MHL to access and perform searches on it oppositeMHL = self.B listLetter = 'A' listLabel = '1st' listLabelOpposite = '2nd' listColor = LOG_COLOR_MHL_A listColorOpposite = LOG_COLOR_MHL_B elif letter == 'B': delta = self.deltaB oppositeMHL = self.A listLetter = 'B' listLabel = '2nd' listLabelOpposite = '1st' listColor = LOG_COLOR_MHL_B listColorOpposite = LOG_COLOR_MHL_A else: raise Exception( "INTERNAL: Couldn't check deltas, none were specified. Specify one" ) return # Quickly clean Nonexistent objects out if they exist deltaClean = [h for h in delta if not isinstance(h, HashNonexistent)] deltaClean.sort() for hash in deltaClean: # Debug # print(color('DEBUG >>>', 'yellow'), hash.identifier, color(hash.filename, 'green')) # print('rh', hash.recordedHashes) foundHashPossible = None beenCounted = False # If this hash has been counted yet # Look for a match by other hash # E.g., if XXHASH and MD5 present, search by MD5 for otherHashType, otherHashValue in hash.recordedHashes.items(): if otherHashType == hash.identifierType: pass # Skip the hash type we are already using hashPossible = oppositeMHL.findByOtherHash( otherHashType, otherHashValue) if isinstance(hashPossible, HashNonexistent): # No result found, move on foundHashPossible = False pass else: # Found it # And because we found it by another hash... # Let's update the IDENTIFIER. Risky? hash.identifier = otherHashValue hash.identifierType = otherHashType hashPossible.identifier = otherHashValue hashPossible.identifierType = otherHashType foundHashPossible = True break if foundHashPossible is False: # Searched but no matches by other hash. # Look for a match by filename hashPossible = oppositeMHL.findHashByAttribute( 'filename', hash.filename) if isinstance(hashPossible, HashNonexistent): # Definitely missing. No other matches by name or hash. foundHashPossible = False else: foundHashPossible = True if foundHashPossible is True: # Compare the hash and the possible hash. diff = DictDiffer(hash.__dict__, hashPossible.__dict__) dAdded = diff.added() dRemoved = diff.removed() dUnchanged = diff.unchanged() dChanged = diff.changed() # First print a filename so everything fits underneath it. logDetail(' ' + color(hash.filename, None, attrs=LOG_COLOR_BOLD)) # Then begin testing. if hash.identifierType == hashPossible.identifierType: # Hash type is the same if hash.identifier == hashPossible.identifier: # And so are the hashes # But check if it's a duplicate first if hash.isDuplicate is True: logDetail( ' This file is a duplicate. Another file exists in this MHL with the same hash.' ) if not beenCounted: self.COUNT['DUPLICATE'] += 1 beenCounted = True logDetail( ' Hash ({}):'.format(listLabel), colored( hash.identifier + ' ({})'.format(hash.identifierType), listColor)) else: if not beenCounted: self.COUNT['PERFECT'] += 1 beenCounted = True logDetail(' Hash: identical.') else: # But the hashes are different. File has changed? if not beenCounted: self.COUNT['HASH_CHANGED'] += 1 beenCounted = True logDetail( color( ' Hash: These hashes are different from each other. It is likely the files were different between the time the MHLs were generated.', LOG_COLOR_WARNING)) else: # Hash type is not the same. Unlikely to be comparable. if not beenCounted: self.COUNT['HASH_TYPE_DIFFERENT'] += 1 beenCounted = True logDetail( color( " Hash: These hashes are of different types. It's not possible to compare them.", LOG_COLOR_INFORMATION)) if hash.isDuplicate is False: logDetail( ' Hash ({}):'.format(listLabel), color( '{} ({})'.format(hash.identifier, hash.identifierType), listColor)) logDetail( ' Hash ({}):'.format(listLabelOpposite), color( '{} ({})'.format(hashPossible.identifier, hashPossible.identifierType), listColorOpposite)) if {'filename', 'directory', 'size'}.issubset(dUnchanged): # If neither of these variables have changed, then we have a perfect match. # EVEN THOUGH we used a slightly different preferred hash. if not beenCounted: self.COUNT['PERFECT'] += 1 beenCounted = True continue else: if 'filename' in dChanged: if not beenCounted: self.COUNT['MINOR'] += 1 beenCounted = True logDetail(' Filename: different (1st):', color(hash.filename, LOG_COLOR_MHL_A)) logDetail( ' (2nd):', color(hashPossible.filename, LOG_COLOR_MHL_B)) else: # If the filename is the same, it has already been declared closer to the top. pass if 'directory' in dChanged: if not beenCounted: self.COUNT['MINOR'] += 1 beenCounted = True logDetail(' Path: different (1st):', color(hash.directory, LOG_COLOR_MHL_A)) logDetail( ' (2nd):', color(hashPossible.directory, LOG_COLOR_MHL_B)) else: logDetail(' Path: identical:', hash.directory) if 'size' in dChanged: # First, check if the Size is simply "Not specified" # This is not an anomaly if so. if hash.sizeDefined == False: # If we have come this far (hash match, name, directory) but size can't be compared # That is as good as we are gonna get. self.COUNT['PERFECT'] += 1 beenCounted = True else: # It is an anomaly if the size has changed while the hash has not. # Report it as impossible, but also print it to the user anyway. if not beenCounted: self.COUNT['IMPOSSIBLE'] += 1 beenCounted = True logDetail(' Size: different (1st):', color(hash.sizeHuman, LOG_COLOR_MHL_A)) logDetail( ' (2nd):', color(hashPossible.sizeHuman, LOG_COLOR_MHL_B)) else: logDetail(' ' + 'Size: identical: ' + hashPossible.sizeHuman) if 'lastmodificationdate' in dChanged: if LOG_SHOW_DATES: if not beenCounted: self.COUNT['MINOR'] += 1 beenCounted = True hModDate = showDate(hash.lastmodificationdate) hPModDate = showDate( hashPossible.lastmodificationdate) logDetail(' Modified date: different (1st):', color(hModDate, LOG_COLOR_MHL_A)) logDetail(' (2nd):', color(hPModDate, LOG_COLOR_MHL_B)) else: # Don't count date changes unless user wants it (LOG_SHOW_DATES is true) pass # Briefly explain to the user what attributes were added/removed if LOG_SHOW_DATES == False: dAddedFiltered = [ i for i in dAdded if i not in LIST_OF_DATE_ATTRIBUTES ] dRemovedFiltered = [ i for i in dRemoved if i not in LIST_OF_DATE_ATTRIBUTES ] else: dAddedFiltered = dAdded dRemovedFiltered = dRemoved if len(dAddedFiltered) > 0: dAddedString = ', '.join( str(i) for i in dAddedFiltered) logDetail(' These attributes exist in 1st only:', color(dAddedString, LOG_COLOR_MHL_A)) if len(dRemovedFiltered) > 0: dRemovedString = ', '.join( str(i) for i in dRemovedFiltered) logDetail(' These attributes exist in 2nd only:', color(dRemovedString, LOG_COLOR_MHL_B)) pass else: # Else if foundHashPossible was False. self.COUNT['MISSING'] += 1 logDetail( ' ' + color(hash.filename, listColor, attrs=LOG_COLOR_BOLD)) logDetail(' This file only exists in', color(listLabel + ' MHL', listColor) + '.') logDetail(' ' + 'Path:', hash.directory) logDetail(' ' + 'Size:', hash.sizeHuman) logDetail(' ' + 'Hash:', hash.identifier, '({})'.format(hash.identifierType))
def checkCommon(self): for hashA, hashB in self.common: beenCounted = False diff = DictDiffer(hashA.__dict__, hashB.__dict__) dAdded = diff.added() dRemoved = diff.removed() dChanged = diff.changed() dUnchanged = diff.unchanged() if {'filename', 'directory', 'size'}.issubset(dUnchanged): # If neither of these variables have changed, then we have a perfect match. # Report it and move on. if not beenCounted: self.COUNT['PERFECT'] += 1 beenCounted = True continue if 'filename' in dChanged: if not beenCounted: self.COUNT['MINOR'] += 1 beenCounted = True logDetail(' ' + color(hashA.filename, 'green', attrs=LOG_COLOR_BOLD)) logDetail(' Filename: different (1st):', color(hashA.filename, LOG_COLOR_MHL_A)) logDetail(' (2nd):', color(hashB.filename, LOG_COLOR_MHL_B)) else: logDetail(' ' + color(hashA.filename, None, attrs=LOG_COLOR_BOLD)) if 'directory' in dChanged: if not beenCounted: self.COUNT['MINOR'] += 1 beenCounted = True logDetail(' Path: different (1st):', color(hashA.directory, LOG_COLOR_MHL_A)) logDetail(' (2nd):', color(hashB.directory, LOG_COLOR_MHL_B)) else: logDetail(' Path: identical: ' + hashA.directory) # Straight up print the hash, don't check it. # At this stage, it's not possible for the hash to be different. # A check has already been performed for the pair to even be included in this group. logDetail(' Hash: identical: {} ({})'.format( hashA.identifier, hashA.identifierType)) if 'size' in dChanged: # First, check if the Size is simply "Not specified" if hashA.sizeDefined == False or hashB.sizeDefined == False: self.COUNT['PERFECT'] += 1 beenCounted = True # It is an anomaly if the size has changed, but not the hash. # Report it as impossible, but also print it to the user anyway. if not beenCounted: self.COUNT['IMPOSSIBLE'] += 1 beenCounted = True logDetail(' Size: different (1st):', color(hashA.sizeHuman, LOG_COLOR_MHL_A)) logDetail(' (2nd):', color(hashB.sizeHuman, LOG_COLOR_MHL_B)) else: logDetail(' ' + 'Size: identical: ' + hashA.sizeHuman) if 'lastmodificationdate' in dChanged: if LOG_SHOW_DATES: if not beenCounted: self.COUNT['MINOR'] += 1 beenCounted = True logDetail( ' Modified date: different (1st):', color(hashA.lastmodificationdate, LOG_COLOR_MHL_A)) logDetail( ' (2nd):', color(hashB.lastmodificationdate, LOG_COLOR_MHL_B)) else: # Don't count date changes unless user wants it (LOG_SHOW_DATES is true) pass # Briefly explain to the user what attributes were added/removed if LOG_SHOW_DATES == False: dAddedFiltered = [ i for i in dAdded if i not in LIST_OF_DATE_ATTRIBUTES ] dRemovedFiltered = [ i for i in dRemoved if i not in LIST_OF_DATE_ATTRIBUTES ] else: dAddedFiltered = dAdded dRemovedFiltered = dRemoved if len(dAddedFiltered) > 0: dAddedString = ', '.join(str(i) for i in dAddedFiltered) logDetail(' These attributes exist in 1st only:', color(dAddedString, LOG_COLOR_MHL_A)) if len(dRemovedFiltered) > 0: dRemovedString = ', '.join(str(i) for i in dRemovedFiltered) logDetail(' These attributes exist in 2nd only:', color(dRemovedString, LOG_COLOR_MHL_B))
class TestDictDiffer(TestCase): def setUp(self): a = {'a': 1, 'b': 1, 'c': 0} b = {'a': 1, 'b': 2, 'd': 0} self.d = DictDiffer(b, a) def test_added(self): self.assertEqual(self.d.added(), set(['d'])) def test_removed(self): self.assertEqual(self.d.removed(), set(['c'])) def test_changed(self): self.assertEqual(self.d.changed(), set(['b'])) def test_unchanged(self): self.assertEqual(self.d.unchanged(), set(['a'])) def test_changes(self): self.assertEqual(self.d.changes(), {'added': 1, 'removed': 1, 'changed': 1}) def test_changes_same(self): """Dict are same """ a = {'a': 1, 'b': 1, 'c': 0} b = {'a': 1, 'b': 1, 'c': 0} tdf = DictDiffer(b, a) self.assertEqual(tdf.changes(), {'added': 0, 'removed': 0, 'changed': 0}) def test_haschanges(self): self.assertEqual(self.d.has_changes(), True) def test_haschanges_no(self): """Dict are the same """ a = {'a': 1, 'b': 1, 'c': 0} b = {'a': 1, 'b': 1, 'c': 0} tdf = DictDiffer(b, a) self.assertEqual(tdf.has_changes(), False) def test_haschanges_empty(self): """Dict are empty """ tdf = DictDiffer({}, {}) self.assertFalse(tdf.has_changes()) def test_nb_changes(self): """Number of changes """ a = {'a': 1, 'b': 1, 'c': 0} b = {'a': 1, 'b': 2} tdf = DictDiffer(b, a) self.assertEqual(tdf.nb_changes(), 2) def test_nb_changes_same(self): """Number of changes """ a = {'a': 1, 'b': 1} b = {'a': 1, 'b': 1} tdf = DictDiffer(b, a) self.assertEqual(tdf.nb_changes(), 0) def test_nb_changes_full(self): """Number of changes """ a = {'a': 1, 'b': 1} b = {'d': 1, 'f': 1} tdf = DictDiffer(b, a) self.assertEqual(tdf.nb_changes(), 4) def test_fulldiff(self): """ """ a = {'a': 1, 'b': 1, 'c': 4} b = {'d': 1, 'f': 1, 'c': 5} tdf = DictDiffer(b, a) diff = tdf.fulldiff() result = {'added': [{'d': 1}, {'f': 1}], 'changed': [{'key': 'c', 'old': 4, 'new': 5}], 'removed': [{'a': 1}, {'b': 1}]} self.assertEqual(diff, result)