Пример #1
0
    def test_get_table_1(self, translate_table_mock):
        """Verify translate_table() is called with the correct parameters.
        """
        translate_table_mock.return_value = "phage"

        querying.get_table(self.metadata, "phage")

        translate_table_mock.assert_called_with(self.metadata, "phage")
Пример #2
0
def parse_feature_data(alchemist, values=[], limit=8000):
    """Returns Cds objects containing data parsed from a MySQL database.

    :param alchemist: A connected and fully built AlchemyHandler object.
    :type alchemist: AlchemyHandler
    :param values: List of GeneIDs upon which the query can be conditioned.
    :type values: list[str]
    """
    gene_table = querying.get_table(alchemist.metadata, "gene")
    primary_key = list(gene_table.primary_key.columns)[0]
    cds_data_columns = list(gene_table.c)

    cds_data_query = querying.build_select(alchemist.graph, cds_data_columns)

    cds_data = querying.execute(alchemist.engine,
                                cds_data_query,
                                in_column=primary_key,
                                values=values,
                                limit=limit)

    cds_list = []
    for data_dict in cds_data:
        cds_ftr = mysqldb.parse_gene_table_data(data_dict)
        cds_list.append(cds_ftr)

    return cds_list
Пример #3
0
    def key(self, key):
        if isinstance(key, Column):
            self._key = key
        elif isinstance(key, str):
            if self.graph is None:
                raise ValueError("String key input requires MySQL connection.")

            metadata = self.graph.graph["metadata"]

            try:
                self._key = q.get_column(self.graph.graph["metadata"], key)
            except:
                try:
                    table_obj = q.get_table(metadata, key)
                except:
                    raise ValueError("Inputted string key is neither a valid "
                                     "MySQL column or table.")

                self._key = list(table_obj.primary_key.columns)[0]

        else:
            raise TypeError("Filter key value is invalid."
                            "Filter key must be one of the following: \n"
                            "SQLAlchemy Column\n"
                            "MySQL column string\n"
                            "MySQL table string\n")
Пример #4
0
def main(unparsed_args_list):
    """Run main get_gb_records pipeline."""
    # Parse command line arguments
    args = parse_args(unparsed_args_list)

    # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium
    # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']]
    filters = args.filters
    ncbi_cred_dict = ncbi.get_ncbi_creds(args.ncbi_credentials_file)
    output_folder = basic.set_path(args.output_folder, kind="dir", expect=True)
    working_dir = pathlib.Path(RESULTS_FOLDER)
    working_path = basic.make_new_dir(output_folder, working_dir, attempt=50)
    if working_path is None:
        print(f"Invalid working directory '{working_dir}'")
        sys.exit(1)

    # Verify database connection and schema compatibility.
    print("Connecting to the MySQL database...")
    alchemist = AlchemyHandler(database=args.database)
    alchemist.connect(pipeline=True)
    engine = alchemist.engine
    mysqldb.check_schema_compatibility(engine, "the get_gb_records pipeline")

    # Get SQLAlchemy metadata Table object
    # table_obj.primary_key.columns is a
    # SQLAlchemy ColumnCollection iterable object
    # Set primary key = 'phage.PhageID'
    alchemist.build_metadata()
    table = querying.get_table(alchemist.metadata, TARGET_TABLE)
    for column in table.primary_key.columns:
        primary_key = column

    # Create filter object and then add command line filter strings
    db_filter = Filter(alchemist=alchemist, key=primary_key)
    db_filter.values = []

    # Attempt to add filters and exit if needed.
    add_filters(db_filter, filters)

    # Performs the query
    db_filter.update()

    # db_filter.values now contains list of PhageIDs that pass the filters.
    # Get the accessions associated with these PhageIDs.
    keep_set = set(db_filter.values)

    # Create data sets
    print("Retrieving accessions from the database...")
    query = construct_accession_query(keep_set)
    list_of_dicts = mysqldb_basic.query_dict_list(engine, query)
    id_acc_dict = get_id_acc_dict(list_of_dicts)
    acc_id_dict = get_acc_id_dict(id_acc_dict)
    engine.dispose()
    if len(acc_id_dict.keys()) > 0:
        get_data(working_path, acc_id_dict, ncbi_cred_dict)
    else:
        print("There are no records to retrieve.")
Пример #5
0
    def test_execute_2(self):
        """Verify execute() retrieves expected data.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phage_table = querying.get_table(self.metadata, "phage")
        select = querying.build_select(self.graph,
                                       phage_table,
                                       where=where_clause)

        results = querying.execute(self.engine, select)

        for result in results:
            self.assertEqual(result["Cluster"], "A")
Пример #6
0
    def test_execute_1(self):
        """Verify execute() correctly executes SQLAlchemy select objects.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phage_table = querying.get_table(self.metadata, "phage")
        select = querying.build_select(self.graph,
                                       phage_table,
                                       where=where_clause)

        results = querying.execute(self.engine, select)
        result_keys = results[0].keys()

        self.assertTrue("PhageID" in result_keys)
        self.assertTrue("Cluster" in result_keys)
        self.assertTrue("Subcluster" in result_keys)
Пример #7
0
    def test_execute_value_subqueries(self):
        """Verify execute_value_subqueries() retrieves expected data.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phage_table = querying.get_table(self.metadata, "phage")
        phageid = querying.get_column(self.metadata, "phage.PhageID")
        select = querying.build_select(self.graph,
                                       phage_table,
                                       where=where_clause)

        results = querying.execute_value_subqueries(
            self.engine,
            select,
            phageid, ["Trixie", "D29", "Alice", "Myrna"],
            limit=2)

        for result in results:
            self.assertEqual(result["Cluster"], "A")
Пример #8
0
 def test_get_table_3(self):
     """Verify get_table() raises ValueError from invalid table name.
     """
     with self.assertRaises(ValueError):
         querying.get_table(self.metadata, "not_a_table")
Пример #9
0
 def test_get_table_2(self):
     """Verify get_table() operates case insensitive.
     """
     self.assertEqual(querying.get_table(self.metadata, "pHAgE"),
                      self.phage)
Пример #10
0
 def test_get_table_1(self):
     """Verify get_table() retrieves correct Table.
     """
     self.assertEqual(querying.get_table(self.metadata, "phage"),
                      self.phage)
Пример #11
0
    def test_get_table_2(self):
        """Verify get_table() returns the correct Table object.
        """
        table_obj = querying.get_table(self.metadata, "phage")

        self.assertEqual(table_obj, self.phage)
Пример #12
0
def main(unparsed_args_list):
    """Run main freeze database pipeline."""
    args = parse_args(unparsed_args_list)
    ref_database = args.database
    reset = args.reset
    new_database = args.new_database_name
    prefix = args.prefix

    # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium
    # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']]
    filters = args.filters

    # Create config object with data obtained from file and/or defaults.
    config = configfile.build_complete_config(args.config_file)
    mysql_creds = config["mysql"]

    # Verify database connection and schema compatibility.
    print("Connecting to the MySQL database...")
    alchemist1 = AlchemyHandler(database=ref_database,
                                username=mysql_creds["user"],
                                password=mysql_creds["password"])
    alchemist1.connect(pipeline=True)
    engine1 = alchemist1.engine
    mysqldb.check_schema_compatibility(engine1, "the freeze pipeline")

    # Get SQLAlchemy metadata Table object
    # table_obj.primary_key.columns is a
    # SQLAlchemy ColumnCollection iterable object
    # Set primary key = 'phage.PhageID'
    alchemist1.build_metadata()
    table = querying.get_table(alchemist1.metadata, TARGET_TABLE)
    for column in table.primary_key.columns:
        primary_key = column

    # Create filter object and then add command line filter strings
    db_filter = Filter(alchemist=alchemist1, key=primary_key)
    db_filter.values = []

    # Attempt to add filters and exit if needed.
    add_filters(db_filter, filters)

    # Performs the query
    db_filter.update()

    # db_filter.values now contains list of PhageIDs that pass the filters.
    # Get the number of genomes that will be retained and build the
    # MYSQL DELETE statement.
    keep_set = set(db_filter.values)
    delete_stmt = construct_delete_stmt(TARGET_TABLE, primary_key, keep_set)
    count_query = construct_count_query(TARGET_TABLE, primary_key, keep_set)
    phage_count = mysqldb_basic.scalar(alchemist1.engine, count_query)

    # Determine the name of the new database.
    if new_database is None:
        if prefix is None:
            prefix = get_prefix()
        new_database = f"{prefix}_{phage_count}"

    # Create the new database, but prevent overwriting of current database.
    if engine1.url.database != new_database:
        result = mysqldb_basic.drop_create_db(engine1, new_database)
    else:
        print(
            "Error: names of the reference and frozen databases are the same.")
        print("No database will be created.")
        result = 1

    # Copy database.
    if result == 0:
        print(f"Reference database: {ref_database}")
        print(f"New database: {new_database}")
        result = mysqldb_basic.copy_db(engine1, new_database)
        if result == 0:
            print(f"Deleting genomes...")
            alchemist2 = AlchemyHandler(database=new_database,
                                        username=engine1.url.username,
                                        password=engine1.url.password)
            alchemist2.connect(pipeline=True)
            engine2 = alchemist2.engine
            engine2.execute(delete_stmt)
            if reset:
                engine2.execute(RESET_VERSION)

            # Close up all connections in the connection pool.
            engine2.dispose()
        else:
            print("Unable to copy the database.")
        # Close up all connections in the connection pool.
        engine1.dispose()
    else:
        print(f"Error creating new database: {new_database}.")
    print("Freeze database script completed.")
Пример #13
0
    def test_get_table_2(self):
        table_obj = querying.get_table(self.metadata, "phage")

        self.assertEqual(table_obj, self.phage)
Пример #14
0
    def test_get_table_1(self, TranslateTable):
        TranslateTable.return_value = "phage"

        querying.get_table(self.metadata, "phage")

        TranslateTable.assert_called_with(self.metadata, "phage")