Пример #1
0
    def setup_perfect_matches_and_transfers(session, batch_name):
        # Sets up the database with 4 accessions where:
        #    2 of the accessions has a single perfect match
        #    1 of the accessions has two perfect matches
        #    1 of the accessions has no perfect match
        # and one accession has been transferred.

        accession_single_perfect_match = AccessionBuilder().set_batch(batch_name).build()
        restore_single_perfect_match = create_perfect_match(accession_single_perfect_match)
        session.add(accession_single_perfect_match)
        session.add(restore_single_perfect_match)

        accession_multiple_perfect_matches = AccessionBuilder().set_batch(batch_name).build()
        restore_multiple_perfect_match1 = create_perfect_match(accession_multiple_perfect_matches)
        restore_multiple_perfect_match2 = create_perfect_match(accession_multiple_perfect_matches)
        session.add(accession_multiple_perfect_matches)
        session.add(restore_multiple_perfect_match1)
        session.add(restore_multiple_perfect_match2)

        accession_transfer = AccessionBuilder().set_batch(batch_name).build()
        restore_transfer = create_perfect_match(accession_transfer)
        transfer = TransferBuilder().set_filepath(restore_transfer.filepath).build()
        session.add(accession_transfer)
        session.add(restore_transfer)
        session.add(transfer)

        accession_without_match = AccessionBuilder().set_batch(batch_name).build()
        session.add(accession_without_match)

        find_perfect_matches(session, get_accessions(session, batch_name))
        transfers = session.query(Transfer).all()
        find_transfer_matches(session, transfers)
        session.commit()
Пример #2
0
    def test_batch_with_accession_with_perfect_match(self):
        session = Session()

        accession = AccessionBuilder().set_batch("batch_to_delete").build()
        restore = create_perfect_match(accession)

        session.add(accession)
        session.add(restore)
        session.commit()

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)

        self.assertEqual(1, len(new_matches_found))
        self.assertEqual(1, len(accession.perfect_matches))
        self.assertEqual(1, len(restore.perfect_matches))
        self.assertEqual(accession, restore.perfect_matches[0])
        self.assertEqual(restore, accession.perfect_matches[0])

        delete_accessions(session, "batch_to_delete")
        session.commit()

        accessions = session.query(Accession)
        self.assertEqual(0, accessions.count())
        self.assertEqual([], restore.perfect_matches)
Пример #3
0
    def test_delete_accessions_matched_accession(self):
        # This should never happen - but we can delete _any_ accession,
        # even if it has a perfect match.
        file_stream = io.StringIO()

        session = Session()
        accession = AccessionBuilder().build()
        restore = create_perfect_match(accession)
        session.add(accession)
        session.add(restore)

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)
        self.assertEqual(1, len(new_matches_found))
        perfect_matches_count = session.query(perfect_matches_table).count()
        self.assertEqual(1, perfect_matches_count)
        session.commit()
        accession_id = accession.id

        unmatched_accessions_found = [accession]
        delete_accessions(session, unmatched_accessions_found, file_stream)
        perfect_matches_count = session.query(perfect_matches_table).count()
        self.assertEqual(0, perfect_matches_count)
        self.assertEqual(
            0,
            session.query(Accession).filter(
                Accession.id == accession_id).count())

        output_lines = file_stream.getvalue().split('\n')
        self.assertEqual("", output_lines[0].strip())
        self.assertEqual("---- Deleting accessions ----",
                         output_lines[1].strip())
        self.assertEqual(f"{accession.batch},{accession.relpath}",
                         output_lines[2].strip())
Пример #4
0
    def test_deleting_accession_using_raw_sql_should_delete_perfect_match(self):
        session = Session()

        accession = AccessionBuilder().set_batch("batch_to_delete").build()
        restore = create_perfect_match(accession)

        session.add(accession)
        session.add(restore)
        session.commit()

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)
        session.commit()

        self.assertEqual(1, len(new_matches_found))
        self.assertEqual(1, len(accession.perfect_matches))
        self.assertEqual(1, len(restore.perfect_matches))

        # Delete using raw SQL, not SQLAlchemy, to test ON DELETE CASCADE
        session.execute("DELETE FROM accessions where accessions.batch = 'batch_to_delete'")
        session.commit()

        accessions_count = session.query(Accession).count()
        restores_count = session.query(Restore).count()
        matches_count = session.query(perfect_matches_table).count()
        self.assertEqual(0, accessions_count)
        self.assertEqual(1, restores_count)  # Restores are not affected
        self.assertEqual(0, matches_count)
        self.assertEqual([], restore.perfect_matches)
Пример #5
0
    def test_multiple_perfect_matches_to_multiple_accession(self):
        # Tests that there can be two accessions that both match two restores
        session = Session()

        accession1 = AccessionBuilder().build()
        accession2 = AccessionBuilder().set_md5(accession1.md5)\
                                       .set_bytes(accession1.bytes)\
                                       .set_filename(accession1.filename)\
                                       .build()
        restore1 = create_perfect_match(accession1)
        restore2 = create_perfect_match(accession2)

        session.add(accession1)
        session.add(accession2)
        session.add(restore1)
        session.add(restore2)
        session.commit()

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)

        self.assertEqual(4, len(new_matches_found))
        self.assertEqual(2, len(accession1.perfect_matches))
        self.assertEqual(2, len(accession2.perfect_matches))
        self.assertEqual(2, len(restore1.perfect_matches))
        self.assertIn(accession1, restore1.perfect_matches)
        self.assertIn(accession2, restore1.perfect_matches)
        self.assertIn(accession1, restore2.perfect_matches)
        self.assertIn(accession2, restore2.perfect_matches)
Пример #6
0
    def test_deleting_accession_using_orm_should_delete_perfect_match(self):
        session = Session()

        accession = AccessionBuilder().set_batch("batch_to_delete").build()
        restore = create_perfect_match(accession)

        session.add(accession)
        session.add(restore)
        session.commit()

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)
        session.commit()

        self.assertEqual(1, len(new_matches_found))
        self.assertEqual(1, len(accession.perfect_matches))
        self.assertEqual(1, len(restore.perfect_matches))

        # Delete using SQLAlchemy ORM
        session.delete(accession)
        session.commit()

        accessions_count = session.query(Accession).count()
        restores_count = session.query(Restore).count()
        matches_count = session.query(perfect_matches_table).count()
        self.assertEqual(0, accessions_count)
        self.assertEqual(1, restores_count)  # Restores are not affected
        self.assertEqual(0, matches_count)
        self.assertEqual([], restore.perfect_matches)
Пример #7
0
    def test_accession_with_perfect_match_and_altered_md5_match_and_filename_only_match(
            self):
        session = Session()

        accession = AccessionBuilder().build()
        perfect_restore = create_perfect_match(accession)
        altered_restore = create_perfect_match(accession)
        altered_restore.md5 = 'altered_md5'
        filename_only_restore = create_perfect_match(accession)
        filename_only_restore.md5 = 'filename_only_md5'
        filename_only_restore.bytes = filename_only_restore.bytes + 100

        session.add(accession)
        session.add(perfect_restore)
        session.add(altered_restore)
        session.add(filename_only_restore)
        session.commit()

        accessions = session.query(Accession)
        perfect_matches_found = find_perfect_matches(session, accessions)
        altered_md5_matches_found = find_altered_md5_matches(
            session, accessions)
        filename_only_matches_found = find_filename_only_matches(
            session, accessions)
        self.assertEqual(1, len(perfect_matches_found))
        self.assertEqual(1, len(altered_md5_matches_found))
        self.assertEqual(1, len(filename_only_matches_found))
        self.assertEqual(perfect_restore, accession.perfect_matches[0])
        self.assertEqual(altered_restore, accession.altered_md5_matches[0])
        self.assertEqual(filename_only_restore,
                         accession.filename_only_matches[0])
Пример #8
0
    def test_find_untransferred_accessions_with_mixed_batch(self):
        session = Session()

        # Set up a batch with some accessions transferred, and others untransferred

        mixed_transferred_accession = AccessionBuilder().set_batch(
            "mixed_batch").build()
        mixed_transferred_restore = create_perfect_match(
            mixed_transferred_accession)
        mixed_transfer = TransferBuilder().set_filepath(
            mixed_transferred_restore.filepath).build()

        session.add(mixed_transferred_accession)
        session.add(mixed_transferred_restore)
        session.add(mixed_transfer)

        mixed_untransferred_accession = AccessionBuilder().set_batch(
            "mixed_batch").build()
        mixed_untransferred_restore = create_perfect_match(
            mixed_untransferred_accession)

        session.add(mixed_untransferred_accession)
        session.add(mixed_untransferred_restore)

        find_perfect_matches(
            session,
            [mixed_transferred_accession, mixed_untransferred_accession])
        find_transfer_matches(session, [mixed_transfer])
        session.commit()

        # Verify setup
        self.assertEqual(1, len(mixed_transferred_accession.perfect_matches))
        self.assertEqual(1, len(mixed_transferred_restore.transfers))
        self.assertEqual(1, len(mixed_untransferred_accession.perfect_matches))
        self.assertEqual(0, len(mixed_untransferred_restore.transfers))

        batch = "mixed_batch"
        untransferred_accessions = find_untransferred_accessions(
            session, batch)
        self.assertEqual(1, len(untransferred_accessions))
        self.assertEqual(mixed_untransferred_accession.id,
                         untransferred_accessions[0].id)
Пример #9
0
    def test_finding_perfect_matches_more_than_once(self):
        session = Session()

        accession = AccessionBuilder().build()
        restore = create_perfect_match(accession)

        session.add(accession)
        session.add(restore)
        session.commit()

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)

        self.assertEqual(1, len(new_matches_found))
        self.assertEqual(1, len(accession.perfect_matches))
        self.assertEqual(1, len(restore.perfect_matches))

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)
        self.assertEqual(0, len(new_matches_found))
        self.assertEqual(1, len(accession.perfect_matches))
        self.assertEqual(1, len(restore.perfect_matches))
Пример #10
0
    def setUp(self):
        create_test_engine()
        engine = Session().get_bind()
        Base.metadata.create_all(engine)

        session = Session()

        self.transferred_accession = AccessionBuilder().set_batch(
            "transferred_batch").build()
        self.transferred_restore = create_perfect_match(
            self.transferred_accession)
        self.transfer = TransferBuilder().set_filepath(
            self.transferred_restore.filepath).build()

        session.add(self.transferred_accession)
        session.add(self.transferred_restore)
        session.add(self.transfer)

        self.untransferred_accession = AccessionBuilder().set_batch(
            "untransferred_batch").build()
        self.untransferred_restore = create_perfect_match(
            self.untransferred_accession)

        session.add(self.untransferred_accession)
        session.add(self.untransferred_restore)

        find_perfect_matches(
            session,
            [self.transferred_accession, self.untransferred_accession])
        find_transfer_matches(session, [self.transfer])
        session.commit()

        self.assertEqual(1, len(self.transferred_accession.perfect_matches))
        self.assertEqual(1, len(self.transferred_restore.transfers))
        self.assertEqual(1, len(self.untransferred_accession.perfect_matches))
        self.assertEqual(0, len(self.untransferred_restore.transfers))
Пример #11
0
    def test_one_matched_and_one_unmatched_accession(self):
        session = Session()

        batch_name = "TestBatch"
        no_match_accession = AccessionBuilder().set_batch(batch_name).build()
        session.add(no_match_accession)

        match_accession = AccessionBuilder().set_batch(batch_name).build()
        match_restore = create_perfect_match(match_accession)
        session.add(match_accession)
        session.add(match_restore)

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)
        self.assertEqual(1, len(new_matches_found))
        session.commit()

        unmatched_accessions_found = unmatched_accessions(session, batch_name)
        self.assertEqual(1, len(unmatched_accessions_found))
        self.assertIn(no_match_accession, unmatched_accessions_found)
Пример #12
0
    def test_no_perfect_match(self):
        session = Session()

        accession = AccessionBuilder().build()
        restore = RestoreBuilder().build()

        session.add(accession)
        session.add(restore)
        session.commit()

        # Verify that at least MD5 checksums are not equal
        self.assertNotEqual(accession.md5, restore.md5)

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)

        # No perfect match should be found
        self.assertEqual(0, len(new_matches_found))
        self.assertEqual(0, len(accession.perfect_matches))
        self.assertEqual(0, len(restore.perfect_matches))
Пример #13
0
    def test_same_md5_but_not_same_filename(self):
        session = Session()

        accession = AccessionBuilder().set_filename('foo.txt').build()
        restore = create_perfect_match(accession)
        restore.filename = 'bar.txt'
        session.add(accession)
        session.add(restore)
        session.commit()

        # Verify that MD5 checksums are the same, but filenames differ
        self.assertEqual(accession.md5, restore.md5)
        self.assertNotEqual(accession.filename, restore.filename)

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)

        self.assertEqual(0, len(new_matches_found))
        self.assertEqual(0, len(accession.perfect_matches))
        self.assertEqual(0, len(restore.perfect_matches))
Пример #14
0
    def test_multiple_perfect_matches_to_one_accession(self):
        session = Session()

        accession = AccessionBuilder().build()
        restore1 = create_perfect_match(accession)
        restore2 = create_perfect_match(accession)

        session.add(accession)
        session.add(restore1)
        session.add(restore2)
        session.commit()

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)

        self.assertEqual(2, len(new_matches_found))
        self.assertEqual(2, len(accession.perfect_matches))
        self.assertEqual(1, len(restore1.perfect_matches))
        self.assertEqual(accession, restore1.perfect_matches[0])
        self.assertEqual(1, len(restore2.perfect_matches))
        self.assertEqual(accession, restore2.perfect_matches[0])
Пример #15
0
    def test_deleting_accession_using_raw_sql_should_delete_perfect_match_not_affect_other_matches(self):
        session = Session()

        accession1 = AccessionBuilder().set_batch("batch_to_delete").build()
        restore1 = create_perfect_match(accession1)

        accession2 = AccessionBuilder().set_batch("batch_to_preserve").build()
        restore2 = create_perfect_match(accession2)

        session.add(accession1)
        session.add(restore1)

        session.add(accession2)
        session.add(restore2)
        session.commit()

        accessions = session.query(Accession)
        new_matches_found = find_perfect_matches(session, accessions)
        session.commit()

        self.assertEqual(2, len(new_matches_found))
        self.assertEqual(1, len(accession1.perfect_matches))
        self.assertEqual(1, len(restore1.perfect_matches))
        self.assertEqual(1, len(accession2.perfect_matches))
        self.assertEqual(1, len(restore2.perfect_matches))

        # Delete using raw SQL, not SQLAlchemy, to test ON DELETE CASCADE
        connection = session.connection()
        connection.execute("DELETE FROM accessions where accessions.batch = 'batch_to_delete'")
        session.commit()

        accessions_count = session.query(Accession).count()
        restores_count = session.query(Restore).count()
        matches_count = session.query(perfect_matches_table).count()
        self.assertEqual(1, accessions_count)
        self.assertEqual(2, restores_count)  # Restores are no affected
        self.assertEqual(1, matches_count)
        self.assertEqual([], restore1.perfect_matches)
        self.assertIn(accession2, restore2.perfect_matches)
        self.assertIn(restore2, accession2.perfect_matches)