def test_parse_site_residue_only(): rp = ReachProcessor(None) text = ['serine residue', 'serine', 'a serine site'] for t in text: residue, site = rp._parse_site_text(t) assert (residue == 'Serine') assert (site is None)
def test_parse_site_text(): rp = ReachProcessor(None) text = [ 'threonine 185', 'thr 185', 'thr-185', 'threonine residue 185', 'T185' ] for t in text: residue, site = rp._parse_site_text(t) assert (residue == 'Threonine') assert (site == '185')
def process_json_str(json_str, citation=None): """Return a ReachProcessor by processing the given REACH json string. The output from the REACH parser is in this json format. For more information on the format, see: https://github.com/clulab/reach Parameters ---------- json_str : str The json string to be processed. citation : Optional[str] A PubMed ID passed to be used in the evidence for the extracted INDRA Statements. Default: None Returns ------- rp : ReachProcessor A ReachProcessor containing the extracted INDRA Statements in rp.statements. """ assert isinstance(json_str, basestring) json_str = json_str.replace('frame-id', 'frame_id') json_str = json_str.replace('argument-label', 'argument_label') json_str = json_str.replace('object-meta', 'object_meta') json_str = json_str.replace('doc-id', 'doc_id') json_str = json_str.replace('is-hypothesis', 'is_hypothesis') json_str = json_str.replace('is-negated', 'is_negated') json_str = json_str.replace('is-direct', 'is_direct') json_str = json_str.replace('found-by', 'found_by') try: json_dict = json.loads(json_str) except ValueError: logger.error('Could not decode JSON string.') return None rp = ReachProcessor(json_dict, citation) rp.get_modifications() rp.get_complexes() rp.get_activation() rp.get_translocation() rp.get_regulate_amounts() return rp