Exemplo n.º 1
0
 def test_execute_6(self):
     """Verify that execute() raises ValueError with lacking instruction.
     """
     with self.assertRaises(ValueError):
         querying.execute(self.mock_engine,
                          self.mock_executable,
                          values=self.values)
Exemplo n.º 2
0
 def test_first_column_4(self):
     """Verify first_column() raises ValueError with lacking instructions.
     """
     with self.assertRaises(ValueError):
         querying.execute(self.mock_engine,
                          self.mock_executable,
                          values=self.values)
Exemplo n.º 3
0
    def test_execute_1(self):
        """Verify function structure of execute().
        """
        querying.execute(self.mock_engine, self.mock_executable)

        self.mock_engine.execute.assert_called()
        self.mock_proxy.fetchall.assert_called()
Exemplo n.º 4
0
    def test_execute_5(self, subqueries_mock):
        """Verify that execute() calls execute_value_subqueries().
        """
        querying.execute(self.mock_engine,
                         self.mock_executable,
                         values=self.values,
                         in_column=self.mock_in_column,
                         limit=8001,
                         return_dict=False)

        subqueries_mock.assert_called_with(self.mock_engine,
                                           self.mock_executable,
                                           self.mock_in_column,
                                           self.values,
                                           limit=8001,
                                           return_dict=False)
Exemplo n.º 5
0
    def select(self, raw_columns, return_dict=True):
        """Queries for data conditioned on the values in the Filter object.

        :param columns: SQLAlchemy Column object(s)
        :type columns: Column
        :type columns: str
        :type columns: list[Column]
        :type columns: list[str]
        :param return_dict: Toggle whether to return data as a dictionary.
        :type return_dict: Boolean
        :returns: SELECT data conditioned on the values in the Filter object.
        :rtype: dict
        :rtype: list[RowProxy]
        """
        self.check()

        columns = self.get_columns(raw_columns)

        query = q.build_select(self._graph, columns, add_in=self._key)
        results = q.execute(self._engine,
                            query,
                            in_column=self._key,
                            values=self._values,
                            return_dict=return_dict)

        return results
Exemplo n.º 6
0
def parse_feature_data(alchemist, values=[], limit=8000):
    """Returns Cds objects containing data parsed from a MySQL database.

    :param alchemist: A connected and fully built AlchemyHandler object.
    :type alchemist: AlchemyHandler
    :param values: List of GeneIDs upon which the query can be conditioned.
    :type values: list[str]
    """
    gene_table = querying.get_table(alchemist.metadata, "gene")
    primary_key = list(gene_table.primary_key.columns)[0]
    cds_data_columns = list(gene_table.c)

    cds_data_query = querying.build_select(alchemist.graph, cds_data_columns)

    cds_data = querying.execute(alchemist.engine,
                                cds_data_query,
                                in_column=primary_key,
                                values=values,
                                limit=limit)

    cds_list = []
    for data_dict in cds_data:
        cds_ftr = mysqldb.parse_gene_table_data(data_dict)
        cds_list.append(cds_ftr)

    return cds_list
Exemplo n.º 7
0
    def test_execute_3(self, dict_mock):
        """Verify that execute() calls built-in function dict().
        """
        dict_mock.return_value = "dict_return_value"

        results = querying.execute(self.mock_engine, self.mock_executable)

        self.assertEqual(results, ["dict_return_value"])
Exemplo n.º 8
0
    def test_execute_4(self, dict_mock):
        """Verify that parameter return_dict controls conversion with dict().
        """
        dict_mock.return_value = "dict_return_value"

        results = querying.execute(self.mock_engine,
                                   self.mock_executable,
                                   return_dict=False)

        self.assertNotEqual(results, ["dict_return_value"])
        self.assertEqual(results, self.mock_results)
Exemplo n.º 9
0
def retrieve_cluster_data(pan_alchemist, cluster_ids):
    cluster_table = Cluster.__table__

    query = querying.build_select(
                        pan_alchemist.graph,
                        [cluster_table.c.Spread, cluster_table.c.CentroidID,
                         cluster_table.c.CentroidSeq,
                         cluster_table.c.ClusterID])
    results = querying.execute(pan_alchemist.engine, query,
                               in_column=cluster_table.c.ClusterID,
                               values=cluster_ids)

    return results
Exemplo n.º 10
0
    def test_execute_2(self):
        """Verify execute() retrieves expected data.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phage_table = querying.get_table(self.metadata, "phage")
        select = querying.build_select(self.graph,
                                       phage_table,
                                       where=where_clause)

        results = querying.execute(self.engine, select)

        for result in results:
            self.assertEqual(result["Cluster"], "A")
Exemplo n.º 11
0
def get_phams_and_lengths_from_organism(alchemist, organism_id):
    gene_obj = alchemist.metadata.tables["gene"]

    phageid_obj = gene_obj.c.PhageID
    phamid_obj = gene_obj.c.PhamID
    length_obj = gene_obj.c.Length

    phams_query = select([phamid_obj,
                          length_obj]).where(phageid_obj == organism_id)

    phams_and_lengths = querying.execute(alchemist.engine,
                                         phams_query,
                                         return_dict=False)

    return phams_and_lengths
Exemplo n.º 12
0
def map_translations(alchemist, pham_ids):
    gene = alchemist.metadata.tables["gene"]

    pham_id = gene.c.PhamID
    gene_id = gene.c.GeneID
    translation = gene.c.Translation

    query = querying.build_select(alchemist.graph, [gene_id, translation])
    results = querying.execute(alchemist.engine, query, in_column=pham_id,
                               values=pham_ids)

    gs_to_ts = {}
    for result in results:
        gs_to_ts[result["GeneID"]] = result["Translation"].decode("utf-8")

    return gs_to_ts
Exemplo n.º 13
0
    def test_execute_1(self):
        """Verify execute() correctly executes SQLAlchemy select objects.
        """
        where_clause = querying.build_where_clause(self.graph,
                                                   "phage.Cluster=A")
        phage_table = querying.get_table(self.metadata, "phage")
        select = querying.build_select(self.graph,
                                       phage_table,
                                       where=where_clause)

        results = querying.execute(self.engine, select)
        result_keys = results[0].keys()

        self.assertTrue("PhageID" in result_keys)
        self.assertTrue("Cluster" in result_keys)
        self.assertTrue("Subcluster" in result_keys)
Exemplo n.º 14
0
def get_phams_and_coords_from_organism(alchemist, organism_id):
    gene_obj = alchemist.metadata.tables["gene"]

    phageid_obj = gene_obj.c.PhageID
    phamid_obj = gene_obj.c.PhamID
    start_obj = gene_obj.c.Start
    stop_obj = gene_obj.c.Stop

    phams_query = select([phamid_obj, start_obj,
                          stop_obj]).where(phageid_obj == organism_id)

    phams_and_coords = querying.execute(alchemist.engine,
                                        phams_query,
                                        return_dict=False)

    return phams_and_coords
Exemplo n.º 15
0
def use_function_report_data(db_filter,
                             data_dicts,
                             columns,
                             conditionals,
                             verbose=False):
    """Reads in FunctionReport data and pairs it with existing data.

    :param db_filter: A connected and fully built Filter object.
    :type db_filter: Filter
    :param data_dicts: List of data dictionaries from a FunctionReport file.
    :type data_dicts: list[dict]
    :param columns: List of SQLAlchemy Columns to retrieve data for.
    :type columns: list[Column]
    :param conditionals: List of SQLAlchemy BinaryExpressions to filter with.
    :type conditionals: List[BinaryExpression]
    :param verbose: A boolean value to toggle progress print statements.
    :type verbose: bool
    """
    if verbose:
        print("Retreiving feature data using pham function report...")

    export_dicts = []
    for data_dict in data_dicts:
        final_call = data_dict["Final Call"]
        if final_call.lower() == "hypothetical protein":
            final_call = ""
        conditionals.append(
            querying.build_where_clause(db_filter.graph,
                                        f"gene.Notes!='{final_call}'"))

        query = querying.build_select(db_filter.graph,
                                      columns,
                                      where=conditionals)

        results = querying.execute(db_filter.engine,
                                   query,
                                   in_column=db_filter.key,
                                   values=[data_dict["Pham"]])

        for result in results:
            if (not result["Accession"]) or (not result["LocusTag"]):
                continue
            result["Notes"] = data_dict["Final Call"]
            result["Start"] = result["Start"] + 1
            export_dicts.append(result)

    return export_dicts
Exemplo n.º 16
0
def get_pham_gene_translations(alchemist, phams):
    """Creates a 2D dictionary that maps phams to dictionaries that map
    unique translations to respective geneids for the specified phams.

    :param alchemist:  A connected and fully build AlchemyHandler object
    :type alchemist: AlchemyHandler
    :return: Returns a dictionary mapping phams to translations to geneids
    :rtype: dict{dict}
    """
    gene_obj = alchemist.metadata.tables["gene"]

    name_obj = gene_obj.c.Name
    phageid_obj = gene_obj.c.PhageID
    phamid_obj = gene_obj.c.PhamID
    translation_obj = gene_obj.c.Translation

    query = querying.build_select(
        alchemist.graph, [phamid_obj, phageid_obj, name_obj, translation_obj])

    results = querying.execute(alchemist.engine,
                               query,
                               in_column=phamid_obj,
                               values=phams)

    pham_ts_to_id = dict()
    for result in results:
        translation = result["Translation"].decode("utf-8")

        pham_ts = pham_ts_to_id.get(result["PhamID"], dict())
        ts_ids = pham_ts.get(translation, list())

        ts_id = " ".join([result["PhageID"], f"gp{result['Name']}"])
        ts_ids.append(ts_id)
        pham_ts[translation] = ts_ids

        pham_ts_to_id[result["PhamID"]] = pham_ts

    return pham_ts_to_id
Exemplo n.º 17
0
def use_csv_data(db_filter, data_dicts, columns, conditionals, verbose=False):
    """Reads in gene table csv data and pairs it with existing data.

    :param db_filter: A connected and fully built Filter object.
    :type db_filter: Filter
    :param data_dicts: List of data dictionaries from a FunctionReport file.
    :type data_dicts: list[dict]
    :param columns: List of SQLAlchemy Columns to retrieve data for.
    :type columns: list[Column]
    :param conditionals: List of SQLAlchemy BinaryExpressions to filter with.
    :type conditionals: List[BinaryExpression]
    :param verbose: A boolean value to toggle progress print statements.
    :type verbose: bool
    """
    if verbose:
        print("Retrieving feauture data using gene table csv...")

    query = querying.build_select(db_filter.graph, columns, where=conditionals)
    results = querying.execute(db_filter.engine,
                               query,
                               in_column=db_filter.key,
                               values=db_filter.values)

    results_dict = {}
    for result in results:
        results_dict['GeneID'] = result

    export_dicts = []
    for data_dict in data_dicts:
        result_dict = results_dict.get(data_dict['GeneID'])
        if result_dict is None:
            continue
        elif result_dict["Notes"].decode("utf-8") != data_dict["Notes"]:
            result_dict["Notes"] = data_dict["Notes"]
            export_dicts.append(result_dict)

    return export_dicts
Exemplo n.º 18
0
def execute_resubmit(alchemist,
                     revisions_data_dicts,
                     folder_path,
                     folder_name,
                     filters="",
                     groups=[],
                     verbose=False):
    """Executes the entirety of the genbank resubmit pipeline.

    :param alchemist: A connected and fully built AlchemyHandler object.
    :type alchemist: AlchemyHandler
    :param revisions_data_dicts: Data dictionaries containing pham/notes data.
    :type revisions_data_dicts: list[dict]
    :param folder_path: Path to a valid dir for new dir creation.
    :type folder_path: Path
    :param folder_name: A name for the export folder.
    :type folder_name: str
    :param verbose: A boolean value to toggle progress print statements.
    :type verbose: bool
    """
    db_filter = Filter(alchemist=alchemist)
    db_filter.key = "gene.PhamID"
    db_filter.add(BASE_CONDITIONALS)

    if filters != "":
        try:
            db_filter.add(filters)
        except:
            print("Please check your syntax for the conditional string:\n"
                  f"{filters}")

    resubmit_columns = db_filter.get_columns(RESUBMIT_COLUMNS)

    phams = []
    for data_dict in revisions_data_dicts:
        phams.append(data_dict["Pham"])

    db_filter.values = phams

    if verbose:
        print("Creating export folder...")
    export_path = folder_path.joinpath(folder_name)
    export_path = basic.make_new_dir(folder_path, export_path, attempt=50)

    conditionals_map = {}
    export_db.build_groups_map(db_filter,
                               export_path,
                               conditionals_map,
                               groups=groups,
                               verbose=verbose)

    if verbose:
        print("Prepared query and path structure, beginning review export...")

    for mapped_path in conditionals_map.keys():
        if verbose:
            print("Retreiving phage data for pham revisions...")
        export_dicts = []
        for data_dict in revisions_data_dicts:
            if verbose:
                print(f"...Retrieving data for pham {data_dict['Pham']}...")

            conditionals = conditionals_map[mapped_path]

            final_call = data_dict["Final Call"]
            if final_call == "Hypothetical Protein":
                final_call = ""
            conditionals.append(
                querying.build_where_clause(alchemist.graph,
                                            f"gene.Notes!={final_call}"))

            query = querying.build_select(alchemist.graph,
                                          resubmit_columns,
                                          where=conditionals)

            results = querying.execute(alchemist.engine,
                                       query,
                                       in_column=db_filter.key,
                                       values=[data_dict["Pham"]])

            for result in results:
                format_resubmit_data(result, data_dict["Final Call"])
                export_dicts.append(result)

        if not export_dicts:
            if verbose:
                print("'{mapped_path.name}' data selected for resubmision "
                      "matches selected call; no resubmision exported...")

            mapped_path.rmdir()
            continue

        export_dicts = sorted(export_dicts,
                              key=lambda export_dict: export_dict["Phage"])

        if verbose:
            print(f"Writing {CSV_NAME} in {mapped_path.name}...")
        file_path = mapped_path.joinpath(CSV_NAME)
        basic.export_data_dict(export_dicts,
                               file_path,
                               RESUBMIT_HEADER,
                               include_headers=True)
Exemplo n.º 19
0
    def test_execute_2(self):
        """Verify execute() converts results to data dictionaries.
        """
        results = querying.execute(self.mock_engine, self.mock_executable)

        self.assertEqual(results, [self.data_dict])