def query(query: str, input_dir: str, output_dir: str) -> None: """Perform a query of knowledge graph using a class contained in query_utils Args: query: A query class containing instructions for performing a query input_dir: Directory where any input files required to execute query are located (typically 'data', where transformed and merged graph files are) output_dir: Directory to output results of query Returns: None. """ run_query(query=query, input_dir=input_dir, output_dir=output_dir)
def query(yaml: str, output_dir: str, query_key: str = 'query', endpoint_key: str = 'endpoint', outfile_ext: str = ".tsv") -> None: """Perform a query of knowledge graph using a class contained in query_utils Args: yaml: A YAML file containing a SPARQL query (see queries/sparql/ for examples) output_dir: Directory to output results of query query_key: the key in the yaml file containing the query string endpoint_key: the key in the yaml file containing the sparql endpoint URL outfile_ext: file extension for output file [.tsv] Returns: None. """ query = parse_query_yaml(yaml) result_dict = run_query(query=query[query_key], endpoint=query[endpoint_key]) if not os.path.exists(output_dir): os.makedirs(output_dir) outfile = os.path.join( output_dir, os.path.splitext(os.path.basename(yaml))[0] + outfile_ext) result_dict_to_tsv(result_dict, outfile)
def query(yaml: str, output_dir: str, query_key: str='query', endpoint_key: str='endpoint', outfile_ext: str=".tsv") -> None: """Perform a query of knowledge graph using a class contained in query_utils Args: yaml: A rq file containing a SPARQL query in grlc format: https://github.com/CLARIAH/grlc/blob/master/README.md output_dir: Directory to output results of query query_key: the key in the yaml file containing the query string endpoint_key: the key in the yaml file containing the sparql endpoint URL outfile_ext: file extension for output file [.tsv] Returns: None. """ query = parse_query_rq(yaml) result_dict = run_query(query=query[query_key], endpoint=query[endpoint_key]) if not os.path.exists(output_dir): os.makedirs(output_dir) outfile = os.path.join(output_dir, os.path.splitext(os.path.basename(yaml))[0] + outfile_ext) result_dict_to_tsv(result_dict, outfile)
if not os.path.exists(kg_tar): wget.download('http://kg-hub.berkeleybop.io/kg-covid-19.tar.gz', data_dir) with tarfile.open(kg_tar) as tar: tar.extractall(data_dir) for path in [intact_path, sars_genes_path, drug_central]: node_file = '/'.join([data_dir, path, 'nodes.tsv']) edge_file = '/'.join([data_dir, path, 'edges.tsv']) node_url = '/'.join(['http://kg-hub.berkeleybop.io', path, 'nodes.tsv']) edge_url = '/'.join(['http://kg-hub.berkeleybop.io', path, 'edges.tsv']) if not os.path.exists(node_file): wget.download(node_url, node_file) if not os.path.exists(edge_file): wget.download(edge_url, edge_file) run_query('TargetCandidates', input_dir=data_dir, output_dir=data_dir) target_info = read_csv('data/target_candidates.tsv', sep="\t") pnnl_data = read_csv('data/pnnl/PNNLTargetList_2020_06_08 - TargetList_2020_06_05.tsv', sep="\t") pnnl_data.rename(columns={'Uniprot': 'protein ID'}, inplace=True) pnnl_data['protein ID'] = \ pnnl_data['protein ID'].apply(lambda x: "{}{}".format('UniProtKB:', x)) target_info.drop(['confidence score','comments'], axis=1, inplace=True) target_info = pd.concat([pnnl_data, target_info], axis=0, join='outer', ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False, copy=True) host_only = False