def test_multiple_filename_only_matches_to_one_accession(self): session = Session() accession = AccessionBuilder().build() restore1 = create_perfect_match(accession) restore1.md5 = 'filename_only_md5_1' restore1.bytes = restore1.bytes + 100 restore2 = create_perfect_match(accession) restore2.md5 = 'filename_only_md5_2' restore2.bytes = restore2.bytes + 100 session.add(accession) session.add(restore1) session.add(restore2) session.commit() accessions = session.query(Accession) new_matches_found = find_filename_only_matches(session, accessions) self.assertEqual(2, len(new_matches_found)) self.assertEqual(2, len(accession.filename_only_matches)) self.assertEqual(1, len(restore1.filename_only_matches)) self.assertEqual(accession, restore1.filename_only_matches[0]) self.assertEqual(1, len(restore2.filename_only_matches)) self.assertEqual(accession, restore2.filename_only_matches[0])
def test_accession_with_perfect_match_and_altered_md5_match_and_filename_only_match( self): session = Session() accession = AccessionBuilder().build() perfect_restore = create_perfect_match(accession) altered_restore = create_perfect_match(accession) altered_restore.md5 = 'altered_md5' filename_only_restore = create_perfect_match(accession) filename_only_restore.md5 = 'filename_only_md5' filename_only_restore.bytes = filename_only_restore.bytes + 100 session.add(accession) session.add(perfect_restore) session.add(altered_restore) session.add(filename_only_restore) session.commit() accessions = session.query(Accession) perfect_matches_found = find_perfect_matches(session, accessions) altered_md5_matches_found = find_altered_md5_matches( session, accessions) filename_only_matches_found = find_filename_only_matches( session, accessions) self.assertEqual(1, len(perfect_matches_found)) self.assertEqual(1, len(altered_md5_matches_found)) self.assertEqual(1, len(filename_only_matches_found)) self.assertEqual(perfect_restore, accession.perfect_matches[0]) self.assertEqual(altered_restore, accession.altered_md5_matches[0]) self.assertEqual(filename_only_restore, accession.filename_only_matches[0])
def test_deleting_accession_using_raw_sql_should_delete_filename_only_match( self): session = Session() accession = AccessionBuilder().set_batch("batch_to_delete").build() restore = create_perfect_match(accession) restore.md5 = 'filename_only_md5' restore.bytes = restore.bytes + 100 session.add(accession) session.add(restore) session.commit() accessions = session.query(Accession) new_matches_found = find_filename_only_matches(session, accessions) session.commit() self.assertEqual(1, len(new_matches_found)) self.assertEqual(1, len(accession.filename_only_matches)) self.assertEqual(1, len(restore.filename_only_matches)) # Delete using raw SQL, not SQLAlchemy, to test ON DELETE CASCADE session.execute( "DELETE FROM accessions where accessions.batch = 'batch_to_delete'" ) session.commit() accessions_count = session.query(Accession).count() restores_count = session.query(Restore).count() matches_count = session.query(filename_only_matches_table).count() self.assertEqual(0, accessions_count) self.assertEqual(1, restores_count) # Restores are not affected self.assertEqual(0, matches_count) self.assertEqual([], restore.filename_only_matches)
def test_deleting_accession_using_orm_should_delete_filename_only_match( self): session = Session() accession = AccessionBuilder().set_batch("batch_to_delete").build() restore = create_perfect_match(accession) restore.md5 = 'filename_only_md5' restore.bytes = restore.bytes + 100 session.add(accession) session.add(restore) session.commit() accessions = session.query(Accession) new_matches_found = find_filename_only_matches(session, accessions) session.commit() self.assertEqual(1, len(new_matches_found)) self.assertEqual(1, len(accession.filename_only_matches)) self.assertEqual(1, len(restore.filename_only_matches)) # Delete using SQLAlchemy ORM session.delete(accession) session.commit() accessions_count = session.query(Accession).count() restores_count = session.query(Restore).count() matches_count = session.query(filename_only_matches_table).count() self.assertEqual(0, accessions_count) self.assertEqual(1, restores_count) # Restores are not affected self.assertEqual(0, matches_count) self.assertEqual([], restore.filename_only_matches)
def test_filename_only_match_does_not_include_altered_md_match(self): # In this test, we are running find_filename_only_matches without first # running find_altered_md5_matches, to ensure an altered MD match # is not added to the filename only matches session = Session() accession = AccessionBuilder().build() altered_restore = create_perfect_match(accession) altered_restore.md5 = 'altered_md5' filename_only_restore = create_perfect_match(accession) filename_only_restore.md5 = 'filename_only_md5' filename_only_restore.bytes = filename_only_restore.bytes + 100 session.add(accession) session.add(altered_restore) session.add(filename_only_restore) session.commit() accessions = session.query(Accession) filename_only_matches_found = find_filename_only_matches( session, accessions) self.assertEqual(0, len(accession.altered_md5_matches)) self.assertEqual(1, len(filename_only_matches_found)) self.assertEqual(filename_only_restore, accession.filename_only_matches[0])
def test_finding_filename_only_matches_more_than_once(self): session = Session() accession = AccessionBuilder().build() restore = create_perfect_match(accession) restore.md5 = 'filename_only_md5' restore.bytes = restore.bytes + 100 session.add(accession) session.add(restore) session.commit() accessions = session.query(Accession) new_matches_found = find_filename_only_matches(session, accessions) self.assertEqual(1, len(new_matches_found)) self.assertEqual(1, len(accession.filename_only_matches)) self.assertEqual(1, len(restore.filename_only_matches)) accessions = session.query(Accession) new_matches_found = find_filename_only_matches(session, accessions) self.assertEqual(0, len(new_matches_found)) self.assertEqual(1, len(accession.filename_only_matches)) self.assertEqual(1, len(restore.filename_only_matches))
def test_deleting_accession_using_raw_sql_should_delete_filename_only_match_not_affect_other_matches( self): session = Session() accession1 = AccessionBuilder().set_batch("batch_to_delete").build() restore1 = create_perfect_match(accession1) restore1.md5 = 'filename_only_md5' restore1.bytes = restore1.bytes + 100 accession2 = AccessionBuilder().set_batch("batch_to_preserve").build() restore2 = create_perfect_match(accession2) restore2.md5 = 'filename_only_md5' restore2.bytes = restore2.bytes + 100 session.add(accession1) session.add(restore1) session.add(accession2) session.add(restore2) session.commit() accessions = session.query(Accession) new_matches_found = find_filename_only_matches(session, accessions) session.commit() self.assertEqual(2, len(new_matches_found)) self.assertEqual(1, len(accession1.filename_only_matches)) self.assertEqual(1, len(restore1.filename_only_matches)) self.assertEqual(1, len(accession2.filename_only_matches)) self.assertEqual(1, len(restore2.filename_only_matches)) # Delete using raw SQL, not SQLAlchemy, to test ON DELETE CASCADE connection = session.connection() connection.execute( "DELETE FROM accessions where accessions.batch = 'batch_to_delete'" ) session.commit() accessions_count = session.query(Accession).count() restores_count = session.query(Restore).count() matches_count = session.query(filename_only_matches_table).count() self.assertEqual(1, accessions_count) self.assertEqual(2, restores_count) # Restores are no affected self.assertEqual(1, matches_count) self.assertEqual([], restore1.filename_only_matches) self.assertIn(accession2, restore2.filename_only_matches) self.assertIn(restore2, accession2.filename_only_matches)
def test_same_bytes_and_filename_but_different_md5(self): session = Session() accession = AccessionBuilder().build() restore = create_perfect_match(accession) restore.md5 = 'filename_only_md5' session.add(accession) session.add(restore) session.commit() # Verify that filename and bytes are the same, but MD5 differs self.assertEqual(accession.bytes, restore.bytes) self.assertEqual(accession.filename, restore.filename) self.assertNotEqual(accession.md5, restore.md5) accessions = session.query(Accession) new_matches_found = find_filename_only_matches(session, accessions) self.assertEqual(0, len(new_matches_found)) self.assertEqual(0, len(accession.filename_only_matches)) self.assertEqual(0, len(restore.filename_only_matches))
def test_no_filename_only_match(self): session = Session() accession = AccessionBuilder().build() restore = RestoreBuilder().build() session.add(accession) session.add(restore) session.commit() # Verify that MD5 checksums, filenames, and bytes are not equal self.assertNotEqual(accession.md5, restore.md5) self.assertNotEqual(accession.filename, restore.filename) self.assertNotEqual(accession.bytes, restore.bytes) accessions = session.query(Accession) new_matches_found = find_filename_only_matches(session, accessions) # No perfect match should be found self.assertEqual(0, len(new_matches_found)) self.assertEqual(0, len(accession.filename_only_matches)) self.assertEqual(0, len(restore.filename_only_matches))