def process_row(bel_parser: BELParser, row: Dict, line_number: int) -> None: """Process a row.""" if not row['Checked']: # don't use unchecked material return if not (row['Correct'] or row['Changed'] ): # if it's neither correct nor changed, then it's f****d return reference = row['PMID'] if not reference: raise Exception('missing reference') bel_parser.control_parser.citation_db = CITATION_TYPE_PUBMED bel_parser.control_parser.citation_db_id = reference # Set the evidence bel_parser.control_parser.evidence = row['Evidence'] # TODO set annotations if they exist annotations = { 'Curator': row['Curator'], 'Confidence': 'Medium', # needs re-curation } if 'INDRA UUID' in row: annotations['INDRA_UUID'] = row['INDRA UUID'] if 'Belief' in row: annotations['INDRA_Belief'] = row['Belief'] if 'API' in row: annotations['INDRA_API'] = row['API'] # Set annotations bel_parser.control_parser.annotations.update(annotations) sub = row['Subject'] obj = row['Object'] # Build a BEL statement and parse it bel = f"{sub} {row['Predicate']} {obj}" # Cast line number from numpy.int64 to integer since JSON cannot handle this class line_number = int(line_number) try: bel_parser.parseString(bel, line_number=line_number) except BELParserWarning as exc: bel_parser.graph.add_warning(exc) except pyparsing.ParseException as exc: bel_parser.graph.add_warning( BELSyntaxError(line_number=line_number, line=bel, position=exc.loc))
def setUpClass(cls): cls.parser = BELParser( graph=BELGraph(), # gets overwritten in each test namespace_to_term_to_encoding=namespace_to_term, annotation_to_term=annotation_to_term, namespace_to_pattern={'dbSNP': re.compile('rs[0-9]*')}, )
def setUpClass(cls): cls.graph = BELGraph() cls.parser = BELParser( cls.graph, namespace_to_term=namespace_to_term, annotation_to_term=annotation_to_term, namespace_to_pattern={'dbSNP': re.compile('rs[0-9]*')})
def get_graph( self, use_cached: bool = True, use_tqdm: bool = False, tqdm_kwargs: Optional[Mapping[str, Any]] = None, ) -> BELGraph: """Get the BEL graph from all sheets in this repository. .. warning:: This BEL graph isn't pre-filled with namespace and annotation URLs. """ if use_cached and os.path.exists(self._cache_json_path): return pybel.from_nodelink_gz(self._cache_json_path) graph = BELGraph() if self.metadata is not None: self.metadata.update(graph) logger.info('streamlining parser') bel_parser = BELParser(graph) paths = list(self.iterate_sheets_paths()) if use_tqdm: _tqdm_kwargs = dict(desc=f'Sheets in {self.directory}') if tqdm_kwargs: _tqdm_kwargs.update(tqdm_kwargs) paths = tqdm(list(paths), **_tqdm_kwargs) for path in paths: graph.path = path try: df = pd.read_excel(path) except LookupError as exc: logger.warning(f'Error opening {path}: {exc}') continue # Check columns in DataFrame exist if not _check_curation_template_columns(df): logger.warning(f'^ above columns in {path} were missing') continue process_df(bel_parser=bel_parser, df=df, use_tqdm=use_tqdm, tqdm_kwargs=dict(desc=f'Reading {path}')) if self.prior is not None: # assign edges to sub-graphs prior = self.get_prior() assign_subgraphs(graph=graph, prior=prior) pybel.to_nodelink_file(graph, self._cache_json_path, indent=2, sort_keys=True) return graph
def build_parser_service(app: Flask): """Add the parser app for sending and receiving BEL statements.""" graph = BELGraph() parser = BELParser(graph, citation_clearing=False) @app.route('/api/parser/status') def get_status(): """Return the status of the parser. --- tags: - parser """ return jsonify({ 'status': 'ok', 'graph_number_nodes': graph.number_of_nodes(), 'graph_number_edges': graph.number_of_edges(), **graph.document, }) @app.route('/api/parser/parse/<statement>', methods=['GET', 'POST']) def parse_bel(statement): """Parse a URL-encoded BEL statement. --- tags: - parser parameters: - name: statement in: query description: A BEL statement required: true type: string """ parser.control_parser.clear() parser.control_parser.evidence = str(uuid4()) parser.control_parser.citation = dict(type=str(uuid4()), reference=str(uuid4())) parser.control_parser.annotations.update({ METADATA_TIME_ADDED: str(time.asctime()), METADATA_IP: request.remote_addr, METADATA_HOST: request.host, METADATA_USER: request.remote_user, **request.args, }) try: res = parser.statement.parseString(statement) except Exception as e: return jsonify({ 'status': 'bad', 'exception': str(e), 'input': statement, }) else: return jsonify(**res.asDict())
def setUp(self): graph = BELGraph() namespace_to_term = { 'HGNC': { (None, 'AKT1'): 'GRP', (None, 'YFG'): 'GRP' }, 'MESH': { (None, 'nucleus'): 'A' } } self.parser = BELParser(graph, namespace_to_term_to_encoding=namespace_to_term, autostreamline=False)
def setUp(self): graph = BELGraph() namespace_dict = { 'HGNC': { 'AKT1': 'GRP', 'YFG': 'GRP' }, 'MESHCS': { 'nucleus': 'A' } } self.parser = BELParser(graph, namespace_dict=namespace_dict, autostreamline=False)
def setUpClass(cls): cls.graph = BELGraph() cls.parser = BELParser(cls.graph, namespace_dict=namespaces, annotation_dict=annotations, namespace_regex={'dbSNP': 'rs[0-9]*'})