示例#1
0
def submit_test(nlpql):
    """
    Testing ClarityNLP job
    """

    url = util.claritynlp_url + 'nlpql_tester'
    log('URL from submit_test: "{0}"'.format(url))

    token, oauth = util.app_token()
    response = requests.post(url, headers=get_headers(token), data=nlpql)
    if response.status_code == 200:
        data = response.json()
        if 'success' in data:
            if not data['success']:
                log(data['error'], util.ERROR)
                return False, data['error']
        if 'valid' in data:
            if not data['valid']:
                log(data['valid'])
                return False, data['valid']


#         log("\n\nJob Response:\n")
#         log(data)
        return True, data
    else:
        log(response.status_code)
        log(response.reason)
        return False, {
            'success': False,
            'status_code': response.status_code,
            'reason': str(response.reason),
            'valid': False
        }
示例#2
0
def submit_job(nlpql_json):
    """
    Submitting ClarityNLP job
    """

    url = util.claritynlp_url + 'phenotype?background=false'
    log('URL from submit_job: "{0}"'.format(url))

    phenotype_string = json.dumps(nlpql_json)
    log("POSTing phenotype:")
    log(nlpql_json.get('name'))
    log("")

    token, oauth = util.app_token()
    response = requests.post(url,
                             headers=get_headers(token),
                             data=phenotype_string)
    if response.status_code == 200:
        data = response.json()
        if 'success' in data:
            if not data['success']:
                log(data['error'], util.ERROR)
                return False, data['error']
        # log("\n\nJob Response:\n")
        # log(data)
        return True, data
    else:
        log(response.status_code)
        log(response.reason)
        return False, response.reason
示例#3
0
def get_reports(source_id):
    """
    Get reports based on generated source
    """
    url = '{}/select?indent=on&q=source:{}&wt=json&rows=1000'.format(
        util.solr_url, source_id)

    token, oauth = util.app_token()
    response = requests.get(url, headers=get_headers(token))
    if response.status_code == 200:
        res = response.json()['response']
        if not res:
            res = {'docs': []}
        return True, res['docs']
    else:
        return False, {'reason': response.reason}
示例#4
0
def delete_report(source_id):
    """
    Deleting reports based on generated source
    """
    url = util.solr_url + 'update?commit=true'
    log('URL from delete_report: "{0}"'.format(url))

    data = '<delete><query>source:%s</query></delete>' % source_id

    token, oauth = util.app_token()
    response = requests.post(url,
                             headers=get_headers(token),
                             data=json.dumps(data, indent=4))
    if response.status_code == 200:
        return True, response.reason
    else:
        return False, response.reason
示例#5
0
        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/css/bootstrap.min.css">
        </head>
        <body>
        
        <div class="container">
        <h1>Upload CSV for NLPQL Form Parsing</h1>
        <br>
        
        <div>
            <a href="https://github.com/ClarityNLP/Utilities/blob/master/custom_query/afib.csv" target="_blank">
                (See sample CSV)
            </a>
        </div>
        <form method=post enctype=multipart/form-data>
          <br>
          <input type=text name="formname" class="form-control" placeholder="Form Name">
          <br>
          <input type=file name=file class="form-control-file">
          <br>
          <input type=submit value=Upload class="btn btn-primary">
        </form>
        </div>
        </body>
        '''


if __name__ == '__main__':
    util.app_token()
    application.run(host='0.0.0.0', port=5000, debug=True)
    util.set_logger(application.logger)
示例#6
0
def get_results(job_id: int,
                source_data=None,
                report_ids=None,
                return_only_if_complete=False,
                patient_id=-1,
                name="NLPAAS Job"):
    log('** JOB ID**')
    log(job_id)
    """
    Reading Results from Mongo
    TODO use API endpoint
    """
    if not source_data:
        source_data = list()
    if not report_ids:
        report_ids = list()

    # Checking if it is a dev box
    status = "status/%s" % job_id
    url = util.claritynlp_url + status
    log('URL from get_results: "{0}"'.format(url))

    # Polling for job completion
    while True:
        time.sleep(3.0)

        token, oauth = util.app_token()
        r = oauth.get(url)

        if r.status_code != 200:
            return Response(json.dumps(
                {
                    'message':
                    'Could not query job status from NLP API. Reason: ' +
                    r.reason
                },
                indent=4),
                            status=500,
                            mimetype='application/json'), False

        try:
            status = r.json()["status"]
        except Exception as ex1:
            log(ex1, util.ERROR)
        if status == "COMPLETED":
            break
        if return_only_if_complete:
            break

    if return_only_if_complete and status != "COMPLETED":
        return '''
            {
                "job_completed": false,
                "job_id":{},
                "status":{}
            }
        '''.format(job_id, status), False

    time.sleep(3)

    # /phenotype_paged_results/<int:job_id>/<string:phenotype_final_str>
    """
    Submitting ClarityNLP job
    """
    results = list()
    final_list = list()
    n = 0
    r_formatted = ''
    while n < 10:
        url = "{}phenotype_paged_results/{}/{}".format(util.claritynlp_url,
                                                       job_id, 'true')
        url2 = "{}phenotype_paged_results/{}/{}".format(
            util.claritynlp_url, job_id, 'false')

        response = oauth.get(url)
        response2 = oauth.get(url2)

        if response.status_code == 200:
            results.extend(response.json()['results'])
        if response2.status_code == 200:
            results.extend(response2.json()['results'])
        if len(results) > 0:
            break
        else:
            time.sleep(2.0)
            n += 1
    try:
        log('')
        log('total results for {}: {}'.format(name, len(results)))
        log('')
        log('')
        if len(results) == 0:
            return '''
                        "success":"true",
                        "message":"No results found for job id {}"
                    '''.format(job_id), False
        for r in results:
            # log('** REPORT (R)**', util.INFO)
            # log(r, util.INFO)
            r_formatted = json.dumps(r, indent=4)
            report_id = r['report_id']
            source = r['source']

            # Three types of result objects 'r' to handle:
            # 1) Result objects from ClarityNLP, computed via NLPQL
            #       These have been ingested into Solr, static
            #       All the expected fields present
            # 2) Result objects temporarily loaded into Solr via JSON blob
            #        The JSON blob is POSTed to NLPaaS
            #        The doc_index and source fields constructed differently
            #            from normal Solr ingest process
            # 3) Result objects obtained from FHIR server via CQL call
            #        CQLExecutionTask returns this data
            #        No underlying source document at all, so no report_text

            pipeline_type = r['pipeline_type'].lower()
            if 'cqlexecutiontask' == pipeline_type:
                # no source docs, data obtained via CQL query
                r['report_text'] = ''
                r['report_type'] = r.get('resourceType', 'Unknown')
                final_list.append(r)
                continue

            if len(report_ids) > 0 and report_id not in report_ids:
                continue

            # compute the doc_index encoded in the source field
            try:
                doc_index = int(
                    report_id.replace(source, '').replace('_', '')) - 1
            except ValueError as ve:
                doc_index = -1
                log("non-integer source index", util.ERROR)
                log(ve, util.ERROR)
                log(r_formatted)

            if doc_index == -1 and patient_id != -1:
                r['report_text'] = ''
            elif len(source_data) > 0 and doc_index < len(source_data):
                source_doc = source_data[doc_index]
                r['report_text'] = source_doc['report_text']
            else:
                r['report_text'] = r['sentence']
            final_list.append(r)

        result_string = dumps(final_list)
        return result_string, True
    except Exception as ex:
        log(ex, util.ERROR)
        log(r_formatted)
        return '''
            "success":"false",
            "message":{}
        '''.format(str(ex)), False
示例#7
0
def upload_reports(data, access_token=None):
    """
    Uploading reports with unique source
    """

    url = util.solr_url + 'update?commit=true&commitWithin=10000'
    log('URL from upload_reports: "{0}"'.format(url))

    # Generating a source_id
    rand_uuid = uuid.uuid1()
    source_id = str(rand_uuid)

    payload = list()
    report_list = list()
    nlpaas_id = 1
    fhir_resource = False

    # print('**DATA**')
    # print(json.dumps(data, indent=4))

    for report in data['reports']:
        report_id = '{}_{}'.format(source_id, str(nlpaas_id))
        json_body = {
            "report_type": "ClarityNLPaaS Document",
            "id": report_id,
            "report_id": report_id,
            "source": source_id,
            "nlpaas_id": str(nlpaas_id),
            "subject": "ClarityNLPaaS Subject",
            "report_date": "1970-01-01T00:00:00Z",
            'original_report_id': ''
        }
        if type(report) == str:
            json_body["report_text"] = report
        else:
            resource_type = ''
            if 'resource' in report:
                report_resource = report.get('resource')
                report_resource['fullUrl'] = report.get('fullUrl')
                report = report_resource

            if 'resourceType' in report:
                resource_type = report['resourceType']

            report_date = None
            if 'created' in report:
                report_date = dateparser.parse(report['created'])
            if not report_date and 'indexed' in report:
                report_date = dateparser.parse(report['indexed'])
            if report_date:
                # The 'report_date' variable is a python 'datetime' object.
                # For Solr ingest, need to:
                #     1) convert to UTC
                #     2) format as Solr wants it
                utc_report_date = report_date.astimezone(
                    tz=datetime.timezone.utc)
                json_body['report_date'] = utc_report_date.strftime(
                    '%Y-%m-%dT%H:%M:%SZ')

            if 'subject' in report:
                if 'reference' in report['subject']:
                    subject = report['subject']['reference']
                else:
                    subject = str(report['subject'])
                if '/' in subject:
                    # subject usually returned as 'Patient/12345' or similar
                    subject = subject.split('/')[-1]
                json_body['subject'] = subject

            if 'id' in report:
                json_body['original_report_id'] = str(report['id'])

            if 'type' in report:
                if 'coding' in report['type'] and len(
                        report['type']['coding']) > 0:
                    coded_type = report['type']['coding'][0]
                    if 'display' in coded_type:
                        json_body['report_type'] = coded_type['display']

            if resource_type == 'DocumentReference' or resource_type == 'DiagnosticReport':
                fhir_resource = True
                txt = ''
                # log('** REPORT **')
                # log(report)
                if 'content' in report:
                    for c in report['content']:
                        attachment = c.get('attachment', None)
                        if attachment:
                            content_type = attachment.get(
                                'contentType', 'text/plain')
                            report_data = attachment.get('data', None)
                            if content_type and report_data:
                                decoded_txt = base64.b64decode(
                                    report_data).decode("utf-8")
                                if content_type == 'application/pdf':
                                    if 'url' in c['attachment']:
                                        url = attachment.get('url')
                                        types = [
                                            'application/json+fhir',
                                            content_type
                                        ]
                                        txt = ''
                                        for t in list(set(types)):
                                            if txt == '':
                                                if access_token:
                                                    headers = {
                                                        'Accept':
                                                        t,
                                                        'Authorization':
                                                        'Bearer {}'.format(
                                                            access_token)
                                                    }
                                                else:
                                                    headers = {'Accept': t}
                                                if 'json' in t:
                                                    txt = get_text(
                                                        url,
                                                        headers,
                                                        key='content')
                                                else:
                                                    txt = get_text(
                                                        url, headers)
                                elif 'xml' in content_type or 'html' in content_type:
                                    clean_txt = re.sub('<[^<]+?>', '',
                                                       decoded_txt)
                                    txt += clean_txt
                                    txt += '\n'
                                else:
                                    txt += decoded_txt
                                    txt += '\n'

                            elif 'data' in c['attachment']:
                                decoded_txt = base64.b64decode(
                                    report_data).decode("utf-8")
                                txt += decoded_txt
                                txt += '\n'

                json_body["report_text"] = txt
            else:
                json_body["report_text"] = str(report)
        if len(json_body["report_text"]) > 0:
            payload.append(json_body)
            report_list.append(report_id)
            nlpaas_id += 1

    log('{} total documents'.format(len(payload)))
    # log('** PAYLOAD **', util.INFO)
    # log(payload, util.INFO)
    if len(payload) > 0:
        token, oauth = util.app_token()
        log('uploading solr docs...')
        response = requests.post(url,
                                 headers=get_headers(token),
                                 data=json.dumps(payload, indent=4))
        if response.status_code == 200:
            the_time = 0
            while True:
                log('checking for solr upload...')
                data = dict()
                data['query'] = "*:*"
                data['params'] = {'wt': 'json'}
                data['filter'] = 'source:"{}"'.format(source_id)
                doc_results = 0
                try:
                    post_data = json.dumps(data, indent=4)
                    response = requests.post((util.solr_url + '/select'),
                                             headers=get_headers(token),
                                             data=post_data)
                    # log(response.text)
                    res = response.json().get('response', None)
                    if res:
                        doc_results = int(res.get('numFound', 0))
                except Exception as ex:
                    log(ex, util.ERROR)
                    log("unable to query docs", util.ERROR)

                if doc_results > 0:
                    log("documents uploaded {}".format(doc_results), util.INFO)
                    break

                the_time += 1
                time.sleep(1)

                if the_time > 15:
                    log("documents not yet loaded in 15 sec", util.ERROR)
                    break

            return True, source_id, report_list, fhir_resource, payload
        else:
            return False, response.reason, report_list, fhir_resource, payload
    else:
        return True, "All documents were empty or invalid, or no documents were passed in.", report_list, \
               fhir_resource, payload