def test_execute_6(self): """Verify that execute() raises ValueError with lacking instruction. """ with self.assertRaises(ValueError): querying.execute(self.mock_engine, self.mock_executable, values=self.values)
def test_first_column_4(self): """Verify first_column() raises ValueError with lacking instructions. """ with self.assertRaises(ValueError): querying.execute(self.mock_engine, self.mock_executable, values=self.values)
def test_execute_1(self): """Verify function structure of execute(). """ querying.execute(self.mock_engine, self.mock_executable) self.mock_engine.execute.assert_called() self.mock_proxy.fetchall.assert_called()
def test_execute_5(self, subqueries_mock): """Verify that execute() calls execute_value_subqueries(). """ querying.execute(self.mock_engine, self.mock_executable, values=self.values, in_column=self.mock_in_column, limit=8001, return_dict=False) subqueries_mock.assert_called_with(self.mock_engine, self.mock_executable, self.mock_in_column, self.values, limit=8001, return_dict=False)
def select(self, raw_columns, return_dict=True): """Queries for data conditioned on the values in the Filter object. :param columns: SQLAlchemy Column object(s) :type columns: Column :type columns: str :type columns: list[Column] :type columns: list[str] :param return_dict: Toggle whether to return data as a dictionary. :type return_dict: Boolean :returns: SELECT data conditioned on the values in the Filter object. :rtype: dict :rtype: list[RowProxy] """ self.check() columns = self.get_columns(raw_columns) query = q.build_select(self._graph, columns, add_in=self._key) results = q.execute(self._engine, query, in_column=self._key, values=self._values, return_dict=return_dict) return results
def parse_feature_data(alchemist, values=[], limit=8000): """Returns Cds objects containing data parsed from a MySQL database. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param values: List of GeneIDs upon which the query can be conditioned. :type values: list[str] """ gene_table = querying.get_table(alchemist.metadata, "gene") primary_key = list(gene_table.primary_key.columns)[0] cds_data_columns = list(gene_table.c) cds_data_query = querying.build_select(alchemist.graph, cds_data_columns) cds_data = querying.execute(alchemist.engine, cds_data_query, in_column=primary_key, values=values, limit=limit) cds_list = [] for data_dict in cds_data: cds_ftr = mysqldb.parse_gene_table_data(data_dict) cds_list.append(cds_ftr) return cds_list
def test_execute_3(self, dict_mock): """Verify that execute() calls built-in function dict(). """ dict_mock.return_value = "dict_return_value" results = querying.execute(self.mock_engine, self.mock_executable) self.assertEqual(results, ["dict_return_value"])
def test_execute_4(self, dict_mock): """Verify that parameter return_dict controls conversion with dict(). """ dict_mock.return_value = "dict_return_value" results = querying.execute(self.mock_engine, self.mock_executable, return_dict=False) self.assertNotEqual(results, ["dict_return_value"]) self.assertEqual(results, self.mock_results)
def retrieve_cluster_data(pan_alchemist, cluster_ids): cluster_table = Cluster.__table__ query = querying.build_select( pan_alchemist.graph, [cluster_table.c.Spread, cluster_table.c.CentroidID, cluster_table.c.CentroidSeq, cluster_table.c.ClusterID]) results = querying.execute(pan_alchemist.engine, query, in_column=cluster_table.c.ClusterID, values=cluster_ids) return results
def test_execute_2(self): """Verify execute() retrieves expected data. """ where_clause = querying.build_where_clause(self.graph, "phage.Cluster=A") phage_table = querying.get_table(self.metadata, "phage") select = querying.build_select(self.graph, phage_table, where=where_clause) results = querying.execute(self.engine, select) for result in results: self.assertEqual(result["Cluster"], "A")
def get_phams_and_lengths_from_organism(alchemist, organism_id): gene_obj = alchemist.metadata.tables["gene"] phageid_obj = gene_obj.c.PhageID phamid_obj = gene_obj.c.PhamID length_obj = gene_obj.c.Length phams_query = select([phamid_obj, length_obj]).where(phageid_obj == organism_id) phams_and_lengths = querying.execute(alchemist.engine, phams_query, return_dict=False) return phams_and_lengths
def map_translations(alchemist, pham_ids): gene = alchemist.metadata.tables["gene"] pham_id = gene.c.PhamID gene_id = gene.c.GeneID translation = gene.c.Translation query = querying.build_select(alchemist.graph, [gene_id, translation]) results = querying.execute(alchemist.engine, query, in_column=pham_id, values=pham_ids) gs_to_ts = {} for result in results: gs_to_ts[result["GeneID"]] = result["Translation"].decode("utf-8") return gs_to_ts
def test_execute_1(self): """Verify execute() correctly executes SQLAlchemy select objects. """ where_clause = querying.build_where_clause(self.graph, "phage.Cluster=A") phage_table = querying.get_table(self.metadata, "phage") select = querying.build_select(self.graph, phage_table, where=where_clause) results = querying.execute(self.engine, select) result_keys = results[0].keys() self.assertTrue("PhageID" in result_keys) self.assertTrue("Cluster" in result_keys) self.assertTrue("Subcluster" in result_keys)
def get_phams_and_coords_from_organism(alchemist, organism_id): gene_obj = alchemist.metadata.tables["gene"] phageid_obj = gene_obj.c.PhageID phamid_obj = gene_obj.c.PhamID start_obj = gene_obj.c.Start stop_obj = gene_obj.c.Stop phams_query = select([phamid_obj, start_obj, stop_obj]).where(phageid_obj == organism_id) phams_and_coords = querying.execute(alchemist.engine, phams_query, return_dict=False) return phams_and_coords
def use_function_report_data(db_filter, data_dicts, columns, conditionals, verbose=False): """Reads in FunctionReport data and pairs it with existing data. :param db_filter: A connected and fully built Filter object. :type db_filter: Filter :param data_dicts: List of data dictionaries from a FunctionReport file. :type data_dicts: list[dict] :param columns: List of SQLAlchemy Columns to retrieve data for. :type columns: list[Column] :param conditionals: List of SQLAlchemy BinaryExpressions to filter with. :type conditionals: List[BinaryExpression] :param verbose: A boolean value to toggle progress print statements. :type verbose: bool """ if verbose: print("Retreiving feature data using pham function report...") export_dicts = [] for data_dict in data_dicts: final_call = data_dict["Final Call"] if final_call.lower() == "hypothetical protein": final_call = "" conditionals.append( querying.build_where_clause(db_filter.graph, f"gene.Notes!='{final_call}'")) query = querying.build_select(db_filter.graph, columns, where=conditionals) results = querying.execute(db_filter.engine, query, in_column=db_filter.key, values=[data_dict["Pham"]]) for result in results: if (not result["Accession"]) or (not result["LocusTag"]): continue result["Notes"] = data_dict["Final Call"] result["Start"] = result["Start"] + 1 export_dicts.append(result) return export_dicts
def get_pham_gene_translations(alchemist, phams): """Creates a 2D dictionary that maps phams to dictionaries that map unique translations to respective geneids for the specified phams. :param alchemist: A connected and fully build AlchemyHandler object :type alchemist: AlchemyHandler :return: Returns a dictionary mapping phams to translations to geneids :rtype: dict{dict} """ gene_obj = alchemist.metadata.tables["gene"] name_obj = gene_obj.c.Name phageid_obj = gene_obj.c.PhageID phamid_obj = gene_obj.c.PhamID translation_obj = gene_obj.c.Translation query = querying.build_select( alchemist.graph, [phamid_obj, phageid_obj, name_obj, translation_obj]) results = querying.execute(alchemist.engine, query, in_column=phamid_obj, values=phams) pham_ts_to_id = dict() for result in results: translation = result["Translation"].decode("utf-8") pham_ts = pham_ts_to_id.get(result["PhamID"], dict()) ts_ids = pham_ts.get(translation, list()) ts_id = " ".join([result["PhageID"], f"gp{result['Name']}"]) ts_ids.append(ts_id) pham_ts[translation] = ts_ids pham_ts_to_id[result["PhamID"]] = pham_ts return pham_ts_to_id
def use_csv_data(db_filter, data_dicts, columns, conditionals, verbose=False): """Reads in gene table csv data and pairs it with existing data. :param db_filter: A connected and fully built Filter object. :type db_filter: Filter :param data_dicts: List of data dictionaries from a FunctionReport file. :type data_dicts: list[dict] :param columns: List of SQLAlchemy Columns to retrieve data for. :type columns: list[Column] :param conditionals: List of SQLAlchemy BinaryExpressions to filter with. :type conditionals: List[BinaryExpression] :param verbose: A boolean value to toggle progress print statements. :type verbose: bool """ if verbose: print("Retrieving feauture data using gene table csv...") query = querying.build_select(db_filter.graph, columns, where=conditionals) results = querying.execute(db_filter.engine, query, in_column=db_filter.key, values=db_filter.values) results_dict = {} for result in results: results_dict['GeneID'] = result export_dicts = [] for data_dict in data_dicts: result_dict = results_dict.get(data_dict['GeneID']) if result_dict is None: continue elif result_dict["Notes"].decode("utf-8") != data_dict["Notes"]: result_dict["Notes"] = data_dict["Notes"] export_dicts.append(result_dict) return export_dicts
def execute_resubmit(alchemist, revisions_data_dicts, folder_path, folder_name, filters="", groups=[], verbose=False): """Executes the entirety of the genbank resubmit pipeline. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param revisions_data_dicts: Data dictionaries containing pham/notes data. :type revisions_data_dicts: list[dict] :param folder_path: Path to a valid dir for new dir creation. :type folder_path: Path :param folder_name: A name for the export folder. :type folder_name: str :param verbose: A boolean value to toggle progress print statements. :type verbose: bool """ db_filter = Filter(alchemist=alchemist) db_filter.key = "gene.PhamID" db_filter.add(BASE_CONDITIONALS) if filters != "": try: db_filter.add(filters) except: print("Please check your syntax for the conditional string:\n" f"{filters}") resubmit_columns = db_filter.get_columns(RESUBMIT_COLUMNS) phams = [] for data_dict in revisions_data_dicts: phams.append(data_dict["Pham"]) db_filter.values = phams if verbose: print("Creating export folder...") export_path = folder_path.joinpath(folder_name) export_path = basic.make_new_dir(folder_path, export_path, attempt=50) conditionals_map = {} export_db.build_groups_map(db_filter, export_path, conditionals_map, groups=groups, verbose=verbose) if verbose: print("Prepared query and path structure, beginning review export...") for mapped_path in conditionals_map.keys(): if verbose: print("Retreiving phage data for pham revisions...") export_dicts = [] for data_dict in revisions_data_dicts: if verbose: print(f"...Retrieving data for pham {data_dict['Pham']}...") conditionals = conditionals_map[mapped_path] final_call = data_dict["Final Call"] if final_call == "Hypothetical Protein": final_call = "" conditionals.append( querying.build_where_clause(alchemist.graph, f"gene.Notes!={final_call}")) query = querying.build_select(alchemist.graph, resubmit_columns, where=conditionals) results = querying.execute(alchemist.engine, query, in_column=db_filter.key, values=[data_dict["Pham"]]) for result in results: format_resubmit_data(result, data_dict["Final Call"]) export_dicts.append(result) if not export_dicts: if verbose: print("'{mapped_path.name}' data selected for resubmision " "matches selected call; no resubmision exported...") mapped_path.rmdir() continue export_dicts = sorted(export_dicts, key=lambda export_dict: export_dict["Phage"]) if verbose: print(f"Writing {CSV_NAME} in {mapped_path.name}...") file_path = mapped_path.joinpath(CSV_NAME) basic.export_data_dict(export_dicts, file_path, RESUBMIT_HEADER, include_headers=True)
def test_execute_2(self): """Verify execute() converts results to data dictionaries. """ results = querying.execute(self.mock_engine, self.mock_executable) self.assertEqual(results, [self.data_dict])