def run_measurement_finder(filename): measurements = list() try: p = Popen( ['java', '-jar', FULL_JAR, '-f', filename, '-m', MODEL_FULL_DIR], stdout=PIPE, stderr=STDOUT) json_str = '' for line in p.stdout: json_str += str(line, 'utf-8') json_obj = json.loads(json_str) results = json_obj['results'] for res in results: meas_count = int(res["measurementCount"]) if meas_count > 0: # print("found %d measurements" % meas_count) meas_results = res['measurements'] for meas in meas_results: meas_obj = Measurement.from_dict(meas) meas_obj.__setattr__('sentence', res['sentence']) measurements.append(meas_obj) except Exception as e: print(e) return measurements
def run_value_extractor_full(term_list, text, minimum_value, maximum_value, is_case_sensitive_text=False, denom_only=False): # convert terms to lowercase unless doing a case-sensitive match if not is_case_sensitive_text: term_list = [term.lower() for term in term_list] text = text.lower() # do range check on numerator values for fractions if isinstance(minimum_value, str): if -1 != minimum_value.find('/'): minimum_value = minimum_value.split('/')[0] if isinstance(maximum_value, str): if -1 != maximum_value.find('/'): maximum_value = maximum_value.split('/')[0] minval = float(minimum_value) maxval = float(maximum_value) sentence_list = segmentor.parse_sentences(text) process_results = [] matchers = [re.compile(r"\b%s\b" % t, re.IGNORECASE) for t in term_list] vals = product(sentence_list, matchers) for v in vals: sentence = v[0] matcher = v[1] match = matcher.search(sentence) if match: term = match.group(0) value_results = extract_value(term, sentence, minval, maxval, denom_only=denom_only) if len(value_results) > 0: for x in value_results: process_results.append( Measurement(sentence=sentence, text=x.matching_term, start=x.start, end=x.end, condition=x.cond, X=x.num1, Y=x.num2)) return process_results
def run_measurement_finder_full(text, term_list, is_case_sensitive_text=False): if not is_case_sensitive_text: term_list = [term.lower() for term in term_list] text = text.lower() term_count = len(term_list) terms = ",".join(term_list) results = [] sentence_list = segmentor.parse_sentences(text) for s in sentence_list: json_str = run_subject_finder(terms, s) json_obj = json.loads(json_str) if 0 == json_obj['measurementCount']: continue if term_count > 0 and not json_obj['querySuccess']: # ignore if query term(s) not found continue if json_obj and 'measurementList' in json_obj: for x in json_obj['measurementList']: try: m = Measurement(sentence=s, text=x['text'], start=x['start'], end=x['end'], temporality=x['temporality'], units=x['units'], condition=x['condition'], matching_terms=', '.join( x['matchingTerm']), subject=', '.join(x['subject']), location=x['location'], X=x['x'], Y=x['y'], Z=x['z'], x_view=x['xView'], y_view=x['yView'], z_view=x['zView'], value1=x['values'], min_value=x['minValue'], max_value=x['maxValue']) results.append(m) except Exception as ex: log('measurement_finder_wrapper exception: {0}'.format(ex), ERROR) log(ERROR, ex) return results
def run_value_extractor_full(term_list, text, minimum_value, maximum_value, enumlist=None, is_case_sensitive_text=False, denom_only=False, values_before_terms=False): if enumlist is None: enumlist = list() sentence_list = segmentor.parse_sentences(text) process_results = [] for sentence in sentence_list: json_string = run_value_extractor( term_list, sentence, str_minval=minimum_value, str_maxval=maximum_value, str_enumlist=enumlist, is_case_sensitive=is_case_sensitive_text, is_denom_only=denom_only, values_before_terms=values_before_terms) if json_string is not None and len(json_string) > 0: # parse the JSON result json_data = json.loads(json_string) # the individual value extractions are in the 'measurementList' if 'measurementList' in json_data: measurements = json_data['measurementList'] for m in measurements: process_results.append( Measurement(sentence=sentence, text=m['text'], start=m['start'], end=m['end'], condition=m['condition'], X=m['x'], Y=m['y'], matching_terms=m['matchingTerm'], min_value=m['minValue'], max_value=m['maxValue'])) return process_results
def run_value_extractor_full(term_list, text, minimum_value, maximum_value, enumlist=None, is_case_sensitive_text=False, denom_only=False): if enumlist is None: enumlist = list() sentence_list = segmentor.parse_sentences(text) process_results = [] matchers = [re.compile(r"\b%s\b" % t, re.IGNORECASE) for t in term_list] vals = product(sentence_list, matchers) for v in vals: sentence = v[0] matcher = v[1] match = matcher.search(sentence) if match: term = match.group(0) value_str = run_value_extractor( term, sentence, str_minval=minimum_value, str_maxval=maximum_value, str_enumlist=enumlist, is_case_sensitive=is_case_sensitive_text, is_denom_only=denom_only) if len(value_str) > 0: value_results = json.loads(value_str) if 'measurementList' in value_results: measurement_results = value_results['measurementList'] for x in measurement_results: process_results.append( Measurement(sentence=sentence, text=x['matchingTerm'], start=x['start'], end=x['end'], condition=x['condition'], X=x['x'], Y=x['y'])) return process_results
def run_measurement_finder_full(text, term_list, is_case_sensitive_text=False): if not is_case_sensitive_text: term_list = [term.lower() for term in term_list] text = text.lower() terms = ",".join(term_list) results = [] sentence_list = segmentor.parse_sentences(text) for s in sentence_list: json_str = run_subject_finder(terms, s) json_obj = json.loads(json_str) if json_obj and 'measurementList' in json_obj: for x in json_obj['measurementList']: try: results.append( Measurement(sentence=s, text=x['text'], location=x['location'], x_view=x['x_view'], start=x['start'], temporality=x['temporality'], y_view=x['y_view'], Z=x['z'], subject=', '.join(x['subject']), X=x['x'], end=x['end'], condition=x['condition'], matching_terms=', '.join( x['matchingTerm']), value1=x['values'], z_view=x['z_view'], units=x['units'], Y=x['y'])) except Exception as ex: print(ex) return results
def run_measurement_finder(filename): measurements = list() try: p = Popen(['java', '-jar', FULL_JAR, '-f', filename, '-m', MODEL_FULL_DIR], stdout=PIPE, stderr=STDOUT) json_str = '' for line in p.stdout: json_str += str(line, 'utf-8') json_obj = json.loads(json_str) results = json_obj['results'] for res in results: meas_count = int(res["measurementCount"]) if meas_count > 0: # print("found %d measurements" % meas_count) meas_results = res['measurements'] for meas in meas_results: meas_obj = Measurement.from_dict(meas) meas_obj.__setattr__('sentence', res['sentence']) measurements.append(meas_obj) except Exception as e: print(e) return measurements