Exemplo n.º 1
0
 def test_main_1(self, run_pipeline_mock):
     """Verify that main runs correctly with:
     valid pipeline and valid default options."""
     unparsed_args = ["pdm_utils.run", "import"]
     run.main(unparsed_args)
     with self.subTest():
         self.assertTrue(run_pipeline_mock.called)
Exemplo n.º 2
0
 def test_main_1(self, alchemy_mock):
     """Verify update data and final data are retrieved."""
     # Testing the update flag and final flag have been merged so that
     # PhagesDB is only queried once for all data in the genome, since
     # it is time-intensive.
     alchemy_mock.return_value = self.alchemist
     # If final=True, any genome in database will be checked on PhagesDB
     # regardless of AnnotationAuthor
     unparsed_args = get_unparsed_args(update=True, final=True)
     run.main(unparsed_args)
     count1 = count_files(updates_folder)
     count2 = count_files(phagesdb_genome_folder)
     count3 = count_rows(update_table)
     count4 = count_rows(phagesdb_import_table)
     with self.subTest():
         self.assertEqual(count1, 1)
     with self.subTest():
         # It's not clear how stable the storage of any particular final
         # flat file is on PhagesDB. There is possibility that for any
         # genome the associated flat file will be removed. So this is
         # one area of testing that could be improved. For now, simply
         # verify that 1 or more files have been retrieved.
         self.assertTrue(count2 > 0)
     with self.subTest():
         # There should be several rows of updates.
         self.assertTrue(count3 > 0)
     with self.subTest():
         self.assertEqual(count2, count4)
Exemplo n.º 3
0
    def test_main_1(self, alchemy_mock, gpd_mock, rud_mock):
        """Verify compare runs successfully with:
        MySQL, PhagesDB, and GenBank records saved,
        a duplicate MySQL accession (for D29 and Trixie),
        an invalid accession (for L5),
        a duplicate phage.Name (for Constance and Et2Brutus),
        a PhagesDB name unmatched to MySQL (for 'unmatched')."""
        alchemy_mock.return_value = self.alchemist
        gpd_mock.return_value = self.pdb_json_results
        rud_mock.return_value = FASTA_FILE

        # Make modifications to cause errors.
        stmts = []
        stmts.append(create_update("phage", "Accession", L5_ACC + "1", "L5"))
        stmts.append(create_update("phage", "Accession", TRIXIE_ACC, "D29"))

        stmts.append(create_update("phage", "Name", "Dupe", "Constance"))
        stmts.append(create_update("phage", "Name", "Dupe", "Et2Brutus"))

        stmts.append(create_update("phage", "PhageID", "Dupe", "Constance"))
        stmts.append(
            create_update("phage", "PhageID", "Dupe_Draft", "Et2Brutus"))
        for stmt in stmts:
            test_db_utils.execute(stmt)

        run.main(self.unparsed_args)
        count = count_files(test_folder)
        # input("check")
        with self.subTest():
            self.assertTrue(count > 0)
        with self.subTest():
            gpd_mock.assert_called()
        with self.subTest():
            rud_mock.assert_called()
Exemplo n.º 4
0
 def test_main_2(self, getpass_mock):
     """Verify frozen database is created from database with
     all genomes removed."""
     getpass_mock.side_effect = [USER, PWD]
     self.unparsed_args.extend(["-f", "phage.Status != draft"])
     run.main(self.unparsed_args)
     count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2)
     self.assertEqual(count2[0]["count"], 0)
Exemplo n.º 5
0
 def test_main_1(self, alchemy_mock):
     """Verify no GenBank record is retrieved."""
     alchemy_mock.return_value = self.alchemist
     run.main(self.unparsed_args)
     count = count_files(results_path)
     with self.subTest():
         self.assertTrue(results_path.exists())
     with self.subTest():
         self.assertEqual(count, 0)
Exemplo n.º 6
0
 def test_main_1(self, getpass_mock):
     """Verify database is installed from file."""
     getpass_mock.side_effect = [USER, PWD]
     unparsed_args = get_unparsed_args(option="file")
     run.main(unparsed_args)
     # Query for version data. This verifies that the databases exists
     # and that it contains a pdm_utils schema with data.
     version_data = test_db_utils.get_data(
         test_db_utils.version_table_query)
     self.assertEqual(len(version_data), 1)
Exemplo n.º 7
0
 def test_main_11(self, getpass_mock):
     """Verify frozen database is created from database
     using quoted filter value."""
     getpass_mock.side_effect = [USER, PWD]
     stmt = create_update("phage", "Status", "final", "Trixie")
     test_db_utils.execute(stmt)
     self.unparsed_args.extend(["-f", "phage.Status != 'draft'"])
     run.main(self.unparsed_args)
     count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2)
     self.assertEqual(count2[0]["count"], 1)
Exemplo n.º 8
0
 def test_main_3(self, getpass_mock):
     """Verify frozen database is created from database with
     only one 'final' genome."""
     getpass_mock.side_effect = [USER, PWD]
     stmt = create_update("phage", "Status", "final", "Trixie")
     test_db_utils.execute(stmt)
     self.unparsed_args.extend(["-f", "phage.Status!=draft"])
     print(self.unparsed_args)
     run.main(self.unparsed_args)
     count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2)
     self.assertEqual(count2[0]["count"], 1)
Exemplo n.º 9
0
 def test_main_6(self, getpass_mock):
     """Verify frozen database is created from database with
     one genome based on two filters from two tables."""
     getpass_mock.side_effect = [USER, PWD]
     stmt = create_update("phage", "Status", "final", "Trixie")
     test_db_utils.execute(stmt)
     filters = "phage.Status != draft AND gene.Notes = repressor"
     self.unparsed_args.extend(["-f", filters])
     run.main(self.unparsed_args)
     count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2)
     self.assertEqual(count2[0]["count"], 1)
Exemplo n.º 10
0
 def test_main_2(self, alchemy_mock):
     """Verify one GenBank record is retrieved."""
     alchemy_mock.return_value = self.alchemist
     stmt = create_update("phage", "Accession", TRIXIE_ACC, "Trixie")
     test_db_utils.execute(stmt)
     run.main(self.unparsed_args)
     count = count_files(results_path)
     with self.subTest():
         self.assertTrue(results_path.exists())
     with self.subTest():
         self.assertEqual(count, 1)
Exemplo n.º 11
0
 def test_main_5(self, getpass_mock):
     """Verify frozen database is created from database with
     no genomes based on two filters."""
     getpass_mock.side_effect = [USER, PWD]
     stmt = create_update("phage", "Status", "final", "Trixie")
     test_db_utils.execute(stmt)
     filters = "phage.Status != draft AND phage.HostGenus = Gordonia"
     self.unparsed_args.extend(["-f", filters])
     run.main(self.unparsed_args)
     count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2)
     self.assertEqual(count2[0]["count"], 0)
Exemplo n.º 12
0
 def test_main_5(self, alchemy_mock, mg_mock):
     """Verify draft data already in database is retrieved
     with force_download."""
     # Create a list of 2 matched genomes, only one of which has
     # status = draft.
     matched_genomes = create_matched_genomes()
     alchemy_mock.return_value = self.alchemist
     mg_mock.return_value = (matched_genomes, {"EagleEye"})
     unparsed_args = get_unparsed_args(draft=True, force_download=True)
     run.main(unparsed_args)
     count = count_files(pecaan_genomes_folder)
     self.assertEqual(count, 2)
Exemplo n.º 13
0
 def test_main_3(self, alchemy_mock):
     """Verify no GenBank record is retrieved based on one filter."""
     alchemy_mock.return_value = self.alchemist
     stmt = create_update("phage", "Accession", TRIXIE_ACC, "Trixie")
     test_db_utils.execute(stmt)
     self.unparsed_args.extend(["-f", f"phage.Accession!={TRIXIE_ACC}"])
     run.main(self.unparsed_args)
     count = count_files(results_path)
     with self.subTest():
         self.assertTrue(results_path.exists())
     with self.subTest():
         self.assertEqual(count, 0)
Exemplo n.º 14
0
 def test_main_13(self, getpass_mock):
     """Verify data is changed when there is NO data in the database
     and reset = True."""
     getpass_mock.side_effect = [USER, PWD]
     self.unparsed_args.extend(["-f", "phage.Status != draft", "-r"])
     run.main(self.unparsed_args)
     count = test_db_utils.get_data(COUNT_PHAGE, db=DB2)
     version = test_db_utils.get_data(test_db_utils.version_table_query,
                                      db=DB2)
     with self.subTest():
         self.assertEqual(count[0]["count"], 0)
     with self.subTest():
         self.assertEqual(version[0]["Version"], 0)
Exemplo n.º 15
0
 def test_main_3(self, alchemy_mock, mg_mock):
     """Verify draft data is retrieved."""
     matched_genomes = create_matched_genomes()
     alchemy_mock.return_value = self.alchemist
     mg_mock.return_value = (matched_genomes, {"EagleEye"})
     unparsed_args = get_unparsed_args(draft=True)
     run.main(unparsed_args)
     count1 = count_files(pecaan_genomes_folder)
     count2 = count_rows(pecaan_import_table)
     with self.subTest():
         self.assertEqual(count1, 1)
     with self.subTest():
         self.assertEqual(count1, count2)
Exemplo n.º 16
0
 def test_main_1(self, getpass_mock):
     """Verify frozen database is created from database with
     no change in genome count when no filters are provided."""
     getpass_mock.side_effect = [USER, PWD]
     run.main(self.unparsed_args)
     count1 = test_db_utils.get_data(COUNT_PHAGE, db=DB)
     count2 = test_db_utils.get_data(COUNT_PHAGE, db=DB2)
     version = test_db_utils.get_data(test_db_utils.version_table_query,
                                      db=DB2)
     with self.subTest():
         self.assertEqual(count1[0]["count"], count2[0]["count"])
     with self.subTest():
         self.assertEqual(version[0]["Version"], 1)
Exemplo n.º 17
0
    def test_main_1(self, getpass_mock):
        """Verify step-by-step database conversion round trip is successful and
        generates identical empty schema files."""

        # Downgrade one schema version at a time and generate empty schema file.
        # If current = 4, down = [3, 2, 1, 0]
        down_versions = list(reversed(range(0, self.current)))
        for step in down_versions:
            getpass_mock.side_effect = [USER, PWD]
            unparsed_args = get_unparsed_convert_args(str(step))
            schema_filename = f"down_{unparsed_args[-1]}.sql"
            schema_filepath = Path(results_path, schema_filename)
            run.main(unparsed_args)
            test_db_utils.create_schema_file(schema_filepath)

        # Now upgrade one schema version at a time and generate empty schema file.
        # If current = 4, up = [1, 2, 3, 4]
        up_versions = list(range(1, self.current + 1))
        for step in up_versions:
            getpass_mock.side_effect = [USER, PWD]
            unparsed_args = get_unparsed_convert_args(str(step))
            schema_filename = f"up_{unparsed_args[-1]}.sql"
            schema_filepath = Path(results_path, schema_filename)
            run.main(unparsed_args)
            test_db_utils.create_schema_file(schema_filepath)

        # Compare the empty schema file representing the same schema version,
        # generated from a downgrade (e.g. 7 to 6) and an upgrade (e.g. 5 to 6).
        # If current = 4, compare = [1, 2, 3]
        compare_versions = list(range(1, self.current))
        diff_file_list = []
        for step in compare_versions:
            filepath1 = Path(results_path, f"up_{step}.sql")
            filepath2 = Path(results_path, f"down_{step}.sql")
            diff_filepath = Path(results_path, f"diff_{step}.txt")
            diff_file_list.append(diff_filepath)
            diff_files(filepath1, filepath2, diff_filepath)

        # Get the number of lines in each diff file. If there is no
        # difference, there should be 0 lines.
        results = []
        for diff_filepath in diff_file_list:
            with open(diff_filepath, "r") as handle:
                lines = handle.readlines()
            results.append(len(lines))

        # Test the results.
        for result in results:
            with self.subTest():
                self.assertEqual(result, 0)
Exemplo n.º 18
0
 def test_main_3(self, alchemy_mock):
     """Verify version data is updated."""
     alchemy_mock.return_value = self.alchemist
     unparsed_args = get_unparsed_args(version=True)
     run.main(unparsed_args)
     version_table = test_db_utils.get_data(test_db_utils.version_table_query)
     phage_table = test_db_utils.get_data(test_db_utils.phage_table_query)
     data_dict = phage_id_dict(phage_table)
     alice = data_dict["Alice"]
     with self.subTest():
         self.assertEqual(version_table[0]["Version"], 1)
     # Just confirm that only version data was changed.
     with self.subTest():
         self.assertEqual(alice["HostGenus"], "unknown")
Exemplo n.º 19
0
 def test_main_4(self, alchemy_mock):
     """Verify one GenBank record is retrieved based on one filter."""
     alchemy_mock.return_value = self.alchemist
     stmt1 = create_update("phage", "Accession", TRIXIE_ACC, "Trixie")
     test_db_utils.execute(stmt1)
     stmt2 = create_update("phage", "Status", "final", "Trixie")
     test_db_utils.execute(stmt2)
     self.unparsed_args.extend(["-f", "phage.Status!=draft"])
     run.main(self.unparsed_args)
     count = count_files(results_path)
     with self.subTest():
         self.assertTrue(results_path.exists())
     with self.subTest():
         self.assertEqual(count, 1)
Exemplo n.º 20
0
 def test_main_5(self, getpass_mock):
     """Verify database is installed from file and overwrites
     existing database."""
     getpass_mock.side_effect = [USER, PWD]
     # First install a database with data. Then delete version table.
     test_db_utils.create_filled_test_db()
     test_db_utils.execute("DROP TABLE version")
     unparsed_args = get_unparsed_args(option="file")
     run.main(unparsed_args)
     # Now query for version data. This verifies that it replaced
     # the first database.
     version_data = test_db_utils.get_data(
         test_db_utils.version_table_query)
     self.assertEqual(len(version_data), 1)
Exemplo n.º 21
0
    def test_main_2(self, alchemy_mock, gpd_mock):
        """Verify duplicate PhagesDB names are identified."""
        # Clear accessions so that GenBank is not queried. No need for that.
        stmt = create_update("phage", "Accession", "")
        test_db_utils.execute(stmt)

        alchemy_mock.return_value = self.alchemist
        gpd_mock.return_value = ({}, {"L5"}, {"D29"})
        run.main(self.unparsed_args)
        count = count_files(test_folder)
        # input("check")
        with self.subTest():
            self.assertTrue(count > 0)
        with self.subTest():
            gpd_mock.assert_called()
Exemplo n.º 22
0
 def test_main_4(self, alchemy_mock):
     """Verify final data with very recent date are retrieved
     with force_download."""
     alchemy_mock.return_value = self.alchemist
     stmt = create_update("phage", "DateLastModified", "2200-01-01")
     test_db_utils.execute(stmt)
     unparsed_args = get_unparsed_args(final=True, force_download=True)
     run.main(unparsed_args)
     count = count_files(phagesdb_genome_folder)
     with self.subTest():
         # It's not clear how stable the storage of any particular final
         # flat file is on PhagesDB. There is possibility that for any
         # genome the associated flat file will be removed. So this is
         # one area of testing that could be improved. For now, simply
         # verify that 1 or more files have been retrieved.
         self.assertTrue(count > 0)
Exemplo n.º 23
0
 def test_main_4(self, alchemy_mock):
     """Verify version data and phage table data are updated."""
     alchemy_mock.return_value = self.alchemist
     host_genus = "Mycobacterium"
     tkt = get_alice_ticket("HostGenus", host_genus)
     create_update_table([tkt], update_table)
     unparsed_args = get_unparsed_args(file=update_table, version=True)
     run.main(unparsed_args)
     version_table = test_db_utils.get_data(test_db_utils.version_table_query)
     phage_table = test_db_utils.get_data(test_db_utils.phage_table_query)
     data_dict = phage_id_dict(phage_table)
     alice = data_dict["Alice"]
     with self.subTest():
         self.assertEqual(alice["HostGenus"], host_genus)
     with self.subTest():
         self.assertEqual(version_table[0]["Version"], 1)
Exemplo n.º 24
0
 def test_main_1(self, alchemy_mock):
     """Verify update runs with empty ticket table."""
     alchemy_mock.return_value = self.alchemist
     create_update_table([], update_table)
     unparsed_args = get_unparsed_args(file=update_table)
     run.main(unparsed_args)
     version_table = test_db_utils.get_data(test_db_utils.version_table_query)
     phage_table = test_db_utils.get_data(test_db_utils.phage_table_query)
     data_dict = phage_id_dict(phage_table)
     alice = data_dict["Alice"]
     trixie = data_dict["Trixie"]
     # Nothing should be different.
     with self.subTest():
         self.assertEqual(alice["HostGenus"], "unknown")
     with self.subTest():
         self.assertEqual(trixie["HostGenus"], "unknown")
     with self.subTest():
         self.assertEqual(version_table[0]["Version"], 0)
Exemplo n.º 25
0
 def test_main_2(self, alchemy_mock):
     """Verify genbank data is retrieved."""
     alchemy_mock.return_value = self.alchemist
     stmt1 = create_update("phage", "RetrieveRecord", "1", phage_id="Trixie")
     test_db_utils.execute(stmt1)
     stmt2 = create_update("phage", "Accession", TRIXIE_ACC, phage_id="Trixie")
     test_db_utils.execute(stmt2)
     unparsed_args = get_unparsed_args(genbank=True, genbank_results=True)
     run.main(unparsed_args)
     query = "SELECT COUNT(*) FROM phage"
     count1 = test_db_utils.get_data(query)[0]["COUNT(*)"]
     count2 = count_files(genbank_genomes_folder)
     count3 = count_rows(genbank_import_table)
     count4 = count_rows(genbank_results_table)
     with self.subTest():
         self.assertEqual(count2, 1)
     with self.subTest():
         self.assertEqual(count2, count3)
     with self.subTest():
         self.assertEqual(count1, count4)
Exemplo n.º 26
0
    def test_main_3(self, getpass_mock):
        """Verify database with new name is created."""
        getpass_mock.side_effect = [USER, PWD]

        # Downgrade one step.
        step = self.current - 1
        unparsed_args = get_unparsed_convert_args(str(step), new_db=DB2)
        run.main(unparsed_args)

        # Get data from both databases.
        data1 = test_db_utils.get_data(QUERY)
        v1 = data1[0]["SchemaVersion"]
        data2 = test_db_utils.get_data(QUERY, db=DB2)
        v2 = data2[0]["SchemaVersion"]

        # Test
        with self.subTest():
            self.assertEqual(v1, self.current)
        with self.subTest():
            self.assertEqual(v2, step)
Exemplo n.º 27
0
 def test_main_3(self, getpass_mock):
     """Verify database sql and version files are downloaded from server."""
     getpass_mock.side_effect = [USER, PWD]
     # If the entire Actinobacteriophage database is being downloaded for testing,
     # be sure to only download the SQL file and do NOT install it,
     # else it will overwrite the existing Actinobacteriophage database.
     # Since the pdm_anon user is calling this pipeline, and since
     # this user should not have MySQL privileges to do anything other
     # than select data from Actinobacteriophage, this shouldn't be a problem.
     unparsed_args = get_unparsed_args(db=DB2,
                                       option="server",
                                       download=True,
                                       version=True,
                                       output_folder=output_path)
     run.main(unparsed_args)
     file1 = Path(results_path, f"{DB2}.sql")
     file2 = Path(results_path, f"{DB2}.version")
     with self.subTest():
         self.assertTrue(file1.exists())
     with self.subTest():
         self.assertTrue(file2.exists())
Exemplo n.º 28
0
    def test_main_2(self, getpass_mock):
        """Verify non-step-by-step database conversion round trip is successful
        and generates identical empty schema files."""
        getpass_mock.side_effect = [USER, PWD, USER, PWD]

        # Get current schema file.
        schema_filepath1 = Path(results_path, "before.sql")
        test_db_utils.create_schema_file(schema_filepath1)

        # Downgrade to schema version 0.
        step = 0
        unparsed_args = get_unparsed_convert_args(str(step))
        run.main(unparsed_args)

        # Now upgrade to the original schema version.
        unparsed_args = get_unparsed_convert_args(str(self.current))
        run.main(unparsed_args)

        # Get data and schema file after round trip.
        schema_filepath2 = Path(results_path, "after.sql")
        test_db_utils.create_schema_file(schema_filepath2)
        new_version_data = test_db_utils.get_data(QUERY)
        new_schema_version = new_version_data[0]["SchemaVersion"]

        # Compare the two empty schema files representing the same schema version,
        # generated from a downgrade to 0 and upgrade.
        diff_filepath = Path(results_path, "diff.txt")
        diff_files(schema_filepath1, schema_filepath2, diff_filepath)

        # Get the number of lines in each diff file. If there is no
        # difference, there should be 0 lines.
        with open(diff_filepath, "r") as handle:
            lines = handle.readlines()
        result = len(lines)

        # Test the results.
        self.assertEqual(result, 0)
Exemplo n.º 29
0
 def test_main_2(self, alchemy_mock):
     """Verify update runs with five tickets in ticket table."""
     alchemy_mock.return_value = self.alchemist
     host_genus = "Mycobacterium"
     cluster = "A"
     subcluster = "A2"
     status = "final"
     accession = "ABC123"
     tkt1 = get_alice_ticket("HostGenus", host_genus)
     tkt2 = get_alice_ticket("Cluster", cluster)
     tkt3 = get_alice_ticket("Subcluster", subcluster)
     tkt4 = get_alice_ticket("Status", status)
     tkt5 = get_alice_ticket("Accession", accession)
     tkts = [tkt1, tkt2, tkt3, tkt4, tkt5]
     create_update_table(tkts, update_table)
     unparsed_args = get_unparsed_args(file=update_table)
     run.main(unparsed_args)
     version_table = test_db_utils.get_data(test_db_utils.version_table_query)
     phage_table = test_db_utils.get_data(test_db_utils.phage_table_query)
     data_dict = phage_id_dict(phage_table)
     alice = data_dict["Alice"]
     trixie = data_dict["Trixie"]
     with self.subTest():
         self.assertEqual(alice["HostGenus"], host_genus)
     with self.subTest():
         self.assertEqual(alice["Cluster"], cluster)
     with self.subTest():
         self.assertEqual(alice["Subcluster"], subcluster)
     with self.subTest():
         self.assertEqual(alice["Accession"], accession)
     with self.subTest():
         self.assertEqual(alice["Status"], status)
     # Just confirm that only Alice data was changed.
     with self.subTest():
         self.assertEqual(trixie["HostGenus"], "unknown")
     with self.subTest():
         self.assertEqual(version_table[0]["Version"], 0)
Exemplo n.º 30
0
"""Controls pipelines from command line."""
import sys
from pdm_utils import run
run.main(sys.argv)