Пример #1
0
    def test_build_select_1(self, ExtractWhereClauses, ExtractOrderByClauses,
                            BuildFromClause, Select, AppendWhereClauses,
                            AppendOrderByClauses):
        ExecutableMock = Mock()
        SelectFromMock = Mock()
        type(ExecutableMock).select_from = SelectFromMock
        SelectFromMock.return_value = ExecutableMock

        ExtractWhereClauses.return_value = self.columns
        ExtractOrderByClauses.return_value = self.columns
        BuildFromClause.return_value = self.phage
        Select.return_value = ExecutableMock
        AppendWhereClauses.return_value = ExecutableMock
        AppendOrderByClauses.return_value = ExecutableMock

        querying.build_select(self.graph,
                              self.columns,
                              where=self.whereclauses,
                              order_by=self.columns)

        ExtractWhereClauses.assert_called_once_with(self.whereclauses)
        ExtractOrderByClauses.assert_called_once_with(self.columns)
        total_columns = self.columns + self.columns + self.columns
        BuildFromClause.assert_called_once_with(self.graph, total_columns)

        Select.assert_called_once_with(self.columns)
        SelectFromMock.assert_called_once_with(self.phage)
        AppendWhereClauses.assert_called_once_with(ExecutableMock,
                                                   self.whereclauses)
        AppendOrderByClauses.assert_called_once_with(ExecutableMock,
                                                     self.columns)
Пример #2
0
    def test_build_select_1(self, extract_columns_mock, build_from_clause_mock,
                            select_mock, append_where_clauses_mock,
                            append_order_by_clauses_mock):
        """Verify function structure of build_select().
        """
        executable_mock = Mock()
        select_from_mock = Mock()
        type(executable_mock).select_from = select_from_mock
        select_from_mock.return_value = executable_mock

        extract_columns_mock.return_value = self.columns
        build_from_clause_mock.return_value = self.phage
        select_mock.return_value = executable_mock
        append_where_clauses_mock.return_value = executable_mock
        append_order_by_clauses_mock.return_value = executable_mock

        querying.build_select(self.graph,
                              self.columns,
                              order_by=self.columns,
                              add_in=self.columns)

        extract_columns_mock.assert_any_call(None)
        extract_columns_mock.assert_any_call(self.columns, check=Column)
        total_columns = self.columns * 4
        build_from_clause_mock.assert_called_once_with(self.graph,
                                                       total_columns)

        select_mock.assert_called_once_with(self.columns)
        select_from_mock.assert_called_once_with(self.phage)
        append_where_clauses_mock.assert_called_once_with(
            executable_mock, None)
        append_order_by_clauses_mock.assert_called_once_with(
            executable_mock, self.columns)
Пример #3
0
    def select(self, raw_columns, return_dict=True):
        """Queries for data conditioned on the values in the Filter object.

        :param columns: SQLAlchemy Column object(s)
        :type columns: Column
        :type columns: str
        :type columns: list[Column]
        :type columns: list[str]
        :param return_dict: Toggle whether to return data as a dictionary.
        :type return_dict: Boolean
        :returns: SELECT data conditioned on the values in the Filter object.
        :rtype: dict
        :rtype: list[RowProxy]
        """
        self.check()

        columns = self.get_columns(raw_columns)

        query = q.build_select(self._graph, columns, add_in=self._key)
        results = q.execute(self._engine,
                            query,
                            in_column=self._key,
                            values=self._values,
                            return_dict=return_dict)

        return results
Пример #4
0
def get_cds_seqrecords(alchemist, values=[], nucleotide=False, verbose=False):
    cds_list = parse_feature_data(alchemist, values=values)

    seqrecords = []
    genomes_dict = {}
    for cds in cds_list:
        if not cds.genome_id in genomes_dict.keys():
            if verbose:
                print(f"...Retrieving parent genome for {cds.id}...")
            phage_id_obj = querying.get_column(alchemist.metadata,
                                               "phage.PhageID")
            phage_obj = phage_id_obj.table

            parent_genome_query = querying.build_select(
                                                alchemist.graph,
                                                phage_obj,
                                                where=\
                                                phage_id_obj==cds.genome_id)
            parent_genome_data = mysqldb_basic.first(alchemist.engine,
                                                     parent_genome_query)
            parent_genome = mysqldb.parse_phage_table_data(parent_genome_data)
            genomes_dict.update({cds.genome_id: parent_genome})

        if verbose:
            print(f"Converting {cds.id}...")
        cds.genome_length = genomes_dict[cds.genome_id].length
        cds.set_seqfeature()

        record = cds_to_seqrecord(cds, genomes_dict[cds.genome_id])
        seqrecords.append(record)

    return seqrecords
Пример #5
0
    def build_values(self, where=None, order_by=None):
        self.check()

        values = []

        if where == None:
            where_clauses = []
        else:
            where_clauses = where

        if self.values != []:
            where_clauses.append(self.key.in_(self.values))

        query = q.build_select(self.graph, [self._key], where=where_clauses)

        if isinstance(order_by, Column):
            query = query.order_by(order_by)

        proxy = self.engine.execute(query)
        results = proxy.fetchall()

        for result in results:
            values.append(result[0])

        return values
Пример #6
0
def parse_feature_data(alchemist, values=[], limit=8000):
    """Returns Cds objects containing data parsed from a MySQL database.

    :param alchemist: A connected and fully built AlchemyHandler object.
    :type alchemist: AlchemyHandler
    :param values: List of GeneIDs upon which the query can be conditioned.
    :type values: list[str]
    """
    gene_table = querying.get_table(alchemist.metadata, "gene")
    primary_key = list(gene_table.primary_key.columns)[0]
    cds_data_columns = list(gene_table.c)

    cds_data_query = querying.build_select(alchemist.graph, cds_data_columns)

    cds_data = querying.execute(alchemist.engine,
                                cds_data_query,
                                in_column=primary_key,
                                values=values,
                                limit=limit)

    cds_list = []
    for data_dict in cds_data:
        cds_ftr = mysqldb.parse_gene_table_data(data_dict)
        cds_list.append(cds_ftr)

    return cds_list
Пример #7
0
    def test_first_column_1(self):
        """Verify first_column() returns expected data type.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phageid = querying.get_column(self.metadata, "phage.PhageID")
        select = querying.build_select(self.graph, phageid, where=where_clause)

        results = querying.first_column(self.engine, select)

        self.assertTrue(isinstance(results, list))
        self.assertTrue(isinstance(results[0], str))
Пример #8
0
    def test_build_select_4(self):
        """Verify build_select() handles many-to-one relations as expected.
        build_select() queries should duplicate 'one' when filtering 'many'
        """
        where_clause = (self.Subcluster == "A2")
        select_query = querying.build_select(self.graph,
                                             self.Cluster,
                                             where=where_clause)

        dict_list = query_dict_list(self.engine, select_query)

        self.assertTrue(len(dict_list) > 1)
Пример #9
0
    def test_build_select_1(self):
        """Verify build_select() creates valid SQLAlchemy executable.
        """
        select_query = querying.build_select(self.graph, self.PhageID)

        phage_ids = []
        dict_list = query_dict_list(self.engine, select_query)
        for dict in dict_list:
            phage_ids.append(dict["PhageID"])

        self.assertTrue("Myrna" in phage_ids)
        self.assertTrue("D29" in phage_ids)
        self.assertTrue("Trixie" in phage_ids)
Пример #10
0
def retrieve_cluster_data(pan_alchemist, cluster_ids):
    cluster_table = Cluster.__table__

    query = querying.build_select(
                        pan_alchemist.graph,
                        [cluster_table.c.Spread, cluster_table.c.CentroidID,
                         cluster_table.c.CentroidSeq,
                         cluster_table.c.ClusterID])
    results = querying.execute(pan_alchemist.engine, query,
                               in_column=cluster_table.c.ClusterID,
                               values=cluster_ids)

    return results
Пример #11
0
    def test_first_column_2(self):
        """Verify first_column() retrieves expected data.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phageid = querying.get_column(self.metadata, "phage.PhageID")
        select = querying.build_select(self.graph, phageid, where=where_clause)

        results = querying.first_column(self.engine, select)

        self.assertTrue("Trixie" in results)
        self.assertTrue("D29" in results)
        self.assertFalse("Myrna" in results)
Пример #12
0
    def test_execute_2(self):
        """Verify execute() retrieves expected data.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phage_table = querying.get_table(self.metadata, "phage")
        select = querying.build_select(self.graph,
                                       phage_table,
                                       where=where_clause)

        results = querying.execute(self.engine, select)

        for result in results:
            self.assertEqual(result["Cluster"], "A")
Пример #13
0
    def test_execute_1(self):
        """Verify execute() correctly executes SQLAlchemy select objects.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phage_table = querying.get_table(self.metadata, "phage")
        select = querying.build_select(self.graph,
                                       phage_table,
                                       where=where_clause)

        results = querying.execute(self.engine, select)
        result_keys = results[0].keys()

        self.assertTrue("PhageID" in result_keys)
        self.assertTrue("Cluster" in result_keys)
        self.assertTrue("Subcluster" in result_keys)
Пример #14
0
    def test_build_select_2(self):
        """Verify build_select() appends WHERE clauses to executable.
        """
        where_clause = (self.Cluster == "A")
        select_query = querying.build_select(self.graph,
                                             self.PhageID,
                                             where=where_clause)

        phage_ids = []
        dict_list = query_dict_list(self.engine, select_query)
        for dict in dict_list:
            phage_ids.append(dict["PhageID"])

        self.assertTrue("Trixie" in phage_ids)
        self.assertTrue("D29" in phage_ids)
        self.assertFalse("Myrna" in phage_ids)
Пример #15
0
def map_translations(alchemist, pham_ids):
    gene = alchemist.metadata.tables["gene"]

    pham_id = gene.c.PhamID
    gene_id = gene.c.GeneID
    translation = gene.c.Translation

    query = querying.build_select(alchemist.graph, [gene_id, translation])
    results = querying.execute(alchemist.engine, query, in_column=pham_id,
                               values=pham_ids)

    gs_to_ts = {}
    for result in results:
        gs_to_ts[result["GeneID"]] = result["Translation"].decode("utf-8")

    return gs_to_ts
Пример #16
0
def use_function_report_data(db_filter,
                             data_dicts,
                             columns,
                             conditionals,
                             verbose=False):
    """Reads in FunctionReport data and pairs it with existing data.

    :param db_filter: A connected and fully built Filter object.
    :type db_filter: Filter
    :param data_dicts: List of data dictionaries from a FunctionReport file.
    :type data_dicts: list[dict]
    :param columns: List of SQLAlchemy Columns to retrieve data for.
    :type columns: list[Column]
    :param conditionals: List of SQLAlchemy BinaryExpressions to filter with.
    :type conditionals: List[BinaryExpression]
    :param verbose: A boolean value to toggle progress print statements.
    :type verbose: bool
    """
    if verbose:
        print("Retreiving feature data using pham function report...")

    export_dicts = []
    for data_dict in data_dicts:
        final_call = data_dict["Final Call"]
        if final_call.lower() == "hypothetical protein":
            final_call = ""
        conditionals.append(
            querying.build_where_clause(db_filter.graph,
                                        f"gene.Notes!='{final_call}'"))

        query = querying.build_select(db_filter.graph,
                                      columns,
                                      where=conditionals)

        results = querying.execute(db_filter.engine,
                                   query,
                                   in_column=db_filter.key,
                                   values=[data_dict["Pham"]])

        for result in results:
            if (not result["Accession"]) or (not result["LocusTag"]):
                continue
            result["Notes"] = data_dict["Final Call"]
            result["Start"] = result["Start"] + 1
            export_dicts.append(result)

    return export_dicts
Пример #17
0
    def test_build_select_3(self):
        """Verify build_select() appends ORDER BY clauses to executable.
        """
        select_query = querying.build_select(self.graph,
                                             self.PhageID,
                                             order_by=self.PhageID)

        dict_list = query_dict_list(self.engine, select_query)

        phage_ids = []
        dict_list = query_dict_list(self.engine, select_query)
        for dict in dict_list:
            phage_ids.append(dict["PhageID"])

        self.assertEqual("Alice", phage_ids[0])
        self.assertTrue("Myrna" in phage_ids)
        self.assertTrue("D29" in phage_ids)
        self.assertTrue("Trixie" in phage_ids)
Пример #18
0
    def test_execute_value_subqueries(self):
        """Verify execute_value_subqueries() retrieves expected data.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phage_table = querying.get_table(self.metadata, "phage")
        phageid = querying.get_column(self.metadata, "phage.PhageID")
        select = querying.build_select(self.graph,
                                       phage_table,
                                       where=where_clause)

        results = querying.execute_value_subqueries(
            self.engine,
            select,
            phageid, ["Trixie", "D29", "Alice", "Myrna"],
            limit=2)

        for result in results:
            self.assertEqual(result["Cluster"], "A")
Пример #19
0
    def sort(self, raw_columns):
        """Re-queries for the Filter's values, applying a ORDER BY clause.
       
        :param raw_column: SQLAlchemy Column object(s) or object name(s).
        :type raw_columns: Column
        :type raw_columns: str
        :type raw_columns: list[Column]
        :type raw_columns: list[str]
        """
        self.check()

        columns = self.get_columns(raw_columns)

        query = q.build_select(self._graph, self._key, order_by=columns)

        values = q.first_column(self._engine,
                                query,
                                in_column=self._key,
                                values=self._values)
        self._values = values
        self._values_valid = True
Пример #20
0
def execute_csv_export(alchemist,
                       export_path,
                       table="phage",
                       values=[],
                       verbose=False):
    remove_fields = {
        "phage": ["Sequence"],
        "gene": ["Translation"],
        "domain": [],
        "gene_domain": [],
        "pham": [],
        "pham_color": [],
        "trna": ["Sequence"],
        "tmrna": [],
        "trna_structures": []
    }

    table_obj = alchemist.get_table(table)

    select_columns = []
    headers = []
    for column in table_obj.columns:
        if column.name not in remove_fields[table]:
            select_columns.append(column)
            headers.append(column.name)

    for column in table_obj.primary_key.columns:
        primary_key = column

    query = querying.build_select(alchemist.graph, select_columns)

    if values:
        query = query.where(primary_key.in_(values))

    results = alchemist.execute(query)

    file_path = export_path.joinpath(f"{table}.csv")
    basic.export_data_dict(results, file_path, headers, include_headers=True)
Пример #21
0
def get_pham_gene_translations(alchemist, phams):
    """Creates a 2D dictionary that maps phams to dictionaries that map
    unique translations to respective geneids for the specified phams.

    :param alchemist:  A connected and fully build AlchemyHandler object
    :type alchemist: AlchemyHandler
    :return: Returns a dictionary mapping phams to translations to geneids
    :rtype: dict{dict}
    """
    gene_obj = alchemist.metadata.tables["gene"]

    name_obj = gene_obj.c.Name
    phageid_obj = gene_obj.c.PhageID
    phamid_obj = gene_obj.c.PhamID
    translation_obj = gene_obj.c.Translation

    query = querying.build_select(
        alchemist.graph, [phamid_obj, phageid_obj, name_obj, translation_obj])

    results = querying.execute(alchemist.engine,
                               query,
                               in_column=phamid_obj,
                               values=phams)

    pham_ts_to_id = dict()
    for result in results:
        translation = result["Translation"].decode("utf-8")

        pham_ts = pham_ts_to_id.get(result["PhamID"], dict())
        ts_ids = pham_ts.get(translation, list())

        ts_id = " ".join([result["PhageID"], f"gp{result['Name']}"])
        ts_ids.append(ts_id)
        pham_ts[translation] = ts_ids

        pham_ts_to_id[result["PhamID"]] = pham_ts

    return pham_ts_to_id
Пример #22
0
def use_csv_data(db_filter, data_dicts, columns, conditionals, verbose=False):
    """Reads in gene table csv data and pairs it with existing data.

    :param db_filter: A connected and fully built Filter object.
    :type db_filter: Filter
    :param data_dicts: List of data dictionaries from a FunctionReport file.
    :type data_dicts: list[dict]
    :param columns: List of SQLAlchemy Columns to retrieve data for.
    :type columns: list[Column]
    :param conditionals: List of SQLAlchemy BinaryExpressions to filter with.
    :type conditionals: List[BinaryExpression]
    :param verbose: A boolean value to toggle progress print statements.
    :type verbose: bool
    """
    if verbose:
        print("Retrieving feauture data using gene table csv...")

    query = querying.build_select(db_filter.graph, columns, where=conditionals)
    results = querying.execute(db_filter.engine,
                               query,
                               in_column=db_filter.key,
                               values=db_filter.values)

    results_dict = {}
    for result in results:
        results_dict['GeneID'] = result

    export_dicts = []
    for data_dict in data_dicts:
        result_dict = results_dict.get(data_dict['GeneID'])
        if result_dict is None:
            continue
        elif result_dict["Notes"].decode("utf-8") != data_dict["Notes"]:
            result_dict["Notes"] = data_dict["Notes"]
            export_dicts.append(result_dict)

    return export_dicts
Пример #23
0
def execute_resubmit(alchemist,
                     revisions_data_dicts,
                     folder_path,
                     folder_name,
                     filters="",
                     groups=[],
                     verbose=False):
    """Executes the entirety of the genbank resubmit pipeline.

    :param alchemist: A connected and fully built AlchemyHandler object.
    :type alchemist: AlchemyHandler
    :param revisions_data_dicts: Data dictionaries containing pham/notes data.
    :type revisions_data_dicts: list[dict]
    :param folder_path: Path to a valid dir for new dir creation.
    :type folder_path: Path
    :param folder_name: A name for the export folder.
    :type folder_name: str
    :param verbose: A boolean value to toggle progress print statements.
    :type verbose: bool
    """
    db_filter = Filter(alchemist=alchemist)
    db_filter.key = "gene.PhamID"
    db_filter.add(BASE_CONDITIONALS)

    if filters != "":
        try:
            db_filter.add(filters)
        except:
            print("Please check your syntax for the conditional string:\n"
                  f"{filters}")

    resubmit_columns = db_filter.get_columns(RESUBMIT_COLUMNS)

    phams = []
    for data_dict in revisions_data_dicts:
        phams.append(data_dict["Pham"])

    db_filter.values = phams

    if verbose:
        print("Creating export folder...")
    export_path = folder_path.joinpath(folder_name)
    export_path = basic.make_new_dir(folder_path, export_path, attempt=50)

    conditionals_map = {}
    export_db.build_groups_map(db_filter,
                               export_path,
                               conditionals_map,
                               groups=groups,
                               verbose=verbose)

    if verbose:
        print("Prepared query and path structure, beginning review export...")

    for mapped_path in conditionals_map.keys():
        if verbose:
            print("Retreiving phage data for pham revisions...")
        export_dicts = []
        for data_dict in revisions_data_dicts:
            if verbose:
                print(f"...Retrieving data for pham {data_dict['Pham']}...")

            conditionals = conditionals_map[mapped_path]

            final_call = data_dict["Final Call"]
            if final_call == "Hypothetical Protein":
                final_call = ""
            conditionals.append(
                querying.build_where_clause(alchemist.graph,
                                            f"gene.Notes!={final_call}"))

            query = querying.build_select(alchemist.graph,
                                          resubmit_columns,
                                          where=conditionals)

            results = querying.execute(alchemist.engine,
                                       query,
                                       in_column=db_filter.key,
                                       values=[data_dict["Pham"]])

            for result in results:
                format_resubmit_data(result, data_dict["Final Call"])
                export_dicts.append(result)

        if not export_dicts:
            if verbose:
                print("'{mapped_path.name}' data selected for resubmision "
                      "matches selected call; no resubmision exported...")

            mapped_path.rmdir()
            continue

        export_dicts = sorted(export_dicts,
                              key=lambda export_dict: export_dict["Phage"])

        if verbose:
            print(f"Writing {CSV_NAME} in {mapped_path.name}...")
        file_path = mapped_path.joinpath(CSV_NAME)
        basic.export_data_dict(export_dicts,
                               file_path,
                               RESUBMIT_HEADER,
                               include_headers=True)