示例#1
0
def run_measurement_finder(filename):

    measurements = list()
    try:
        p = Popen(
            ['java', '-jar', FULL_JAR, '-f', filename, '-m', MODEL_FULL_DIR],
            stdout=PIPE,
            stderr=STDOUT)
        json_str = ''
        for line in p.stdout:
            json_str += str(line, 'utf-8')

        json_obj = json.loads(json_str)
        results = json_obj['results']
        for res in results:
            meas_count = int(res["measurementCount"])
            if meas_count > 0:
                # print("found %d measurements" % meas_count)
                meas_results = res['measurements']
                for meas in meas_results:
                    meas_obj = Measurement.from_dict(meas)
                    meas_obj.__setattr__('sentence', res['sentence'])
                    measurements.append(meas_obj)
    except Exception as e:
        print(e)

    return measurements
def run_value_extractor_full(term_list,
                             text,
                             minimum_value,
                             maximum_value,
                             is_case_sensitive_text=False,
                             denom_only=False):
    # convert terms to lowercase unless doing a case-sensitive match
    if not is_case_sensitive_text:
        term_list = [term.lower() for term in term_list]
        text = text.lower()

    # do range check on numerator values for fractions
    if isinstance(minimum_value, str):
        if -1 != minimum_value.find('/'):
            minimum_value = minimum_value.split('/')[0]

    if isinstance(maximum_value, str):
        if -1 != maximum_value.find('/'):
            maximum_value = maximum_value.split('/')[0]

    minval = float(minimum_value)
    maxval = float(maximum_value)

    sentence_list = segmentor.parse_sentences(text)
    process_results = []
    matchers = [re.compile(r"\b%s\b" % t, re.IGNORECASE) for t in term_list]
    vals = product(sentence_list, matchers)
    for v in vals:
        sentence = v[0]
        matcher = v[1]
        match = matcher.search(sentence)
        if match:
            term = match.group(0)
            value_results = extract_value(term,
                                          sentence,
                                          minval,
                                          maxval,
                                          denom_only=denom_only)
            if len(value_results) > 0:
                for x in value_results:
                    process_results.append(
                        Measurement(sentence=sentence,
                                    text=x.matching_term,
                                    start=x.start,
                                    end=x.end,
                                    condition=x.cond,
                                    X=x.num1,
                                    Y=x.num2))

    return process_results
示例#3
0
def run_measurement_finder_full(text, term_list, is_case_sensitive_text=False):
    if not is_case_sensitive_text:
        term_list = [term.lower() for term in term_list]
        text = text.lower()

    term_count = len(term_list)
    terms = ",".join(term_list)
    results = []

    sentence_list = segmentor.parse_sentences(text)
    for s in sentence_list:
        json_str = run_subject_finder(terms, s)
        json_obj = json.loads(json_str)
        if 0 == json_obj['measurementCount']:
            continue
        if term_count > 0 and not json_obj['querySuccess']:
            # ignore if query term(s) not found
            continue
        if json_obj and 'measurementList' in json_obj:
            for x in json_obj['measurementList']:
                try:
                    m = Measurement(sentence=s,
                                    text=x['text'],
                                    start=x['start'],
                                    end=x['end'],
                                    temporality=x['temporality'],
                                    units=x['units'],
                                    condition=x['condition'],
                                    matching_terms=', '.join(
                                        x['matchingTerm']),
                                    subject=', '.join(x['subject']),
                                    location=x['location'],
                                    X=x['x'],
                                    Y=x['y'],
                                    Z=x['z'],
                                    x_view=x['xView'],
                                    y_view=x['yView'],
                                    z_view=x['zView'],
                                    value1=x['values'],
                                    min_value=x['minValue'],
                                    max_value=x['maxValue'])
                    results.append(m)

                except Exception as ex:
                    log('measurement_finder_wrapper exception: {0}'.format(ex),
                        ERROR)
                    log(ERROR, ex)

    return results
def run_value_extractor_full(term_list,
                             text,
                             minimum_value,
                             maximum_value,
                             enumlist=None,
                             is_case_sensitive_text=False,
                             denom_only=False,
                             values_before_terms=False):

    if enumlist is None:
        enumlist = list()
    sentence_list = segmentor.parse_sentences(text)
    process_results = []

    for sentence in sentence_list:

        json_string = run_value_extractor(
            term_list,
            sentence,
            str_minval=minimum_value,
            str_maxval=maximum_value,
            str_enumlist=enumlist,
            is_case_sensitive=is_case_sensitive_text,
            is_denom_only=denom_only,
            values_before_terms=values_before_terms)

        if json_string is not None and len(json_string) > 0:

            # parse the JSON result
            json_data = json.loads(json_string)

            # the individual value extractions are in the 'measurementList'
            if 'measurementList' in json_data:
                measurements = json_data['measurementList']
                for m in measurements:
                    process_results.append(
                        Measurement(sentence=sentence,
                                    text=m['text'],
                                    start=m['start'],
                                    end=m['end'],
                                    condition=m['condition'],
                                    X=m['x'],
                                    Y=m['y'],
                                    matching_terms=m['matchingTerm'],
                                    min_value=m['minValue'],
                                    max_value=m['maxValue']))

    return process_results
示例#5
0
def run_value_extractor_full(term_list,
                             text,
                             minimum_value,
                             maximum_value,
                             enumlist=None,
                             is_case_sensitive_text=False,
                             denom_only=False):
    if enumlist is None:
        enumlist = list()
    sentence_list = segmentor.parse_sentences(text)
    process_results = []
    matchers = [re.compile(r"\b%s\b" % t, re.IGNORECASE) for t in term_list]
    vals = product(sentence_list, matchers)
    for v in vals:
        sentence = v[0]
        matcher = v[1]
        match = matcher.search(sentence)
        if match:
            term = match.group(0)
            value_str = run_value_extractor(
                term,
                sentence,
                str_minval=minimum_value,
                str_maxval=maximum_value,
                str_enumlist=enumlist,
                is_case_sensitive=is_case_sensitive_text,
                is_denom_only=denom_only)

            if len(value_str) > 0:
                value_results = json.loads(value_str)
                if 'measurementList' in value_results:
                    measurement_results = value_results['measurementList']
                    for x in measurement_results:
                        process_results.append(
                            Measurement(sentence=sentence,
                                        text=x['matchingTerm'],
                                        start=x['start'],
                                        end=x['end'],
                                        condition=x['condition'],
                                        X=x['x'],
                                        Y=x['y']))

    return process_results
def run_measurement_finder_full(text, term_list, is_case_sensitive_text=False):
    if not is_case_sensitive_text:
        term_list = [term.lower() for term in term_list]
        text = text.lower()

    terms = ",".join(term_list)
    results = []

    sentence_list = segmentor.parse_sentences(text)
    for s in sentence_list:
        json_str = run_subject_finder(terms, s)
        json_obj = json.loads(json_str)
        if json_obj and 'measurementList' in json_obj:
            for x in json_obj['measurementList']:
                try:
                    results.append(
                        Measurement(sentence=s,
                                    text=x['text'],
                                    location=x['location'],
                                    x_view=x['x_view'],
                                    start=x['start'],
                                    temporality=x['temporality'],
                                    y_view=x['y_view'],
                                    Z=x['z'],
                                    subject=', '.join(x['subject']),
                                    X=x['x'],
                                    end=x['end'],
                                    condition=x['condition'],
                                    matching_terms=', '.join(
                                        x['matchingTerm']),
                                    value1=x['values'],
                                    z_view=x['z_view'],
                                    units=x['units'],
                                    Y=x['y']))
                except Exception as ex:
                    print(ex)

    return results
def run_measurement_finder(filename):

    measurements = list()
    try:
        p = Popen(['java', '-jar', FULL_JAR, '-f', filename, '-m', MODEL_FULL_DIR], stdout=PIPE, stderr=STDOUT)
        json_str = ''
        for line in p.stdout:
            json_str += str(line, 'utf-8')

        json_obj = json.loads(json_str)
        results = json_obj['results']
        for res in results:
            meas_count = int(res["measurementCount"])
            if meas_count > 0:
                # print("found %d measurements" % meas_count)
                meas_results = res['measurements']
                for meas in meas_results:
                    meas_obj = Measurement.from_dict(meas)
                    meas_obj.__setattr__('sentence', res['sentence'])
                    measurements.append(meas_obj)
    except Exception as e:
        print(e)

    return measurements