示例#1
0
def get_nct_list_from_zip(input_zip, mile_range=50):
    zcdb = ZipCodeDatabase()
    zip_list = [
        z.zip for z in zcdb.get_zipcodes_around_radius(input_zip, mile_range)
    ]  # default mile range set at 100
    conn = general_pool_criteria.connection()
    cur = conn.cursor()
    # conn.execute("USE trial_knowledge_base_COVID19") ##
    sql = '''
            ;with cte (code, DeclarationItem, Declaration) as
            (
              select nct_id,
                cast(left(zip_codes, charindex('|',zip_codes+'|')-1) as varchar(50)) DeclarationItem,
                     stuff(zip_codes, 1, charindex('|',zip_codes+'|'), '') Declaration
              from dbo.aact_trial_info
              union all
              select code,
                cast(left(Declaration, charindex('|',Declaration+'|')-1) as varchar(50)) DeclarationItem,
                stuff(Declaration, 1, charindex('|',Declaration+'|'), '') Declaration
              from cte
              where Declaration > ''
            ) 
            select code as nct_id, DeclarationItem as zip
            from cte
            order by nct_id asc
            option (maxrecursion 0);
        '''
    cur.execute(sql)
    trial_zips = cur.fetchall()
    cur.close()
    conn.close()

    # compare nearby zip codes to trial zip codes
    nearby_nct_list = []
    for item in trial_zips:
        test_nct = item[0]
        if item[1] is not None:
            test_zip = item[1].split('-')[0]
        else:
            test_zip = 00000
        if test_zip in zip_list:
            nearby_nct_list.append(
                test_nct
            )  # some zip codes stored with '-xxxx' after primary 5 digit, pyzipcode no like that
    nearby_nct_list = list(set(nearby_nct_list))

    # temp loop to add trial number, to be removed in the future
    temp_return_list = []
    i = 1
    for item in nearby_nct_list:
        temp_return_list.append('%s;%d' % (item, i))
        i += 1

    # return temp_return_list
    # fengyang: also return the nearby zipcode list
    return [temp_return_list, zip_list]
def statistics():
    parameters = {}

    conn = general_pool_criteria.connection()
    cur = conn.cursor()

    cur.execute("select location from dbo.access_stat")
    zip_code = cur.fetchall()
    zip_code = [str((item[0]).encode('ascii', 'ignore')) for item in zip_code]
    result = Counter(zip_code)
    result = sorted(result.items(), key=lambda obj: obj[1])
    parameters['zip_code_range'] = [e[0] for e in result]
    parameters['zip_code_count'] = [e[1] for e in result]

    cur.execute("select trial_type from dbo.access_stat")
    trial_type = cur.fetchall()
    trial_type = [e[0].encode('ascii', 'ignore') for e in trial_type]
    parameters['type_trials_dict'] = Counter(trial_type)

    cur.execute("select age from dbo.access_stat")
    age = cur.fetchall()
    age = [int(item[0]) for item in age]
    result = Counter(age)
    if 99990 in result:
        result.pop(99990)
    result = sorted(result.items(), key=lambda obj: obj[0])
    parameters['age_range'] = [e[0] for e in result]
    parameters['age_count'] = [e[1] for e in result]

    cur.execute("select exposure from dbo.access_stat")
    exposure = cur.fetchall()
    exposure = [e[0].encode('ascii', 'ignore') for e in exposure]
    parameters['exposure_dict'] = Counter(exposure)

    cur.execute("select domain from dbo.access_stat")
    domain = cur.fetchall()
    domain = [e[0].encode('ascii', 'ignore') for e in domain]
    parameters['domain_dict'] = Counter(domain)

    cur.execute("select user_picked_time from dbo.access_stat")
    user_picked_time_l = cur.fetchall()

    cur.execute("select stat from dbo.access_stat")
    stat = cur.fetchall()
    stat = [e[0].encode('ascii', 'ignore') for e in stat]
    parameters['stat_dict'] = Counter(stat)

    cur.execute("select preg from dbo.access_stat")
    preg = cur.fetchall()
    preg = [e[0].encode('ascii', 'ignore') for e in preg]
    parameters['preg_dict'] = Counter(preg)

    conn.close()
    cur.close()
    return render_template('statistics.html', parameters=parameters)
def start_question_detail():
    age = request.args.get('age')
    # gender = request.args.get('gender')
    exposure = request.args.get('exposure')
    domain = request.args.get('domain')
    user_picked_time = request.args.get('user_picked_time')
    stat = request.args.get('stat')
    preg = request.args.get('preg')
    miles = request.args.get('miles')

    pre_quest_answers = [age, exposure, domain, user_picked_time, stat, preg]
    locn = request.args.get('locn')
    trial_type = request.args.get('trial_type')

    #Save user access info into the database
    rnd = ''.join(str(random.choice(range(6))) for _ in range(6))
    tim = time.strftime('%Y%m%d%H%M%S')
    session_id = rnd + tim
    age = int(age.encode('ascii', 'ignore'))
    if 0 <= age < 10:
        age = 10
    else:
        age = int(age / 10) * 10

    conn = general_pool_criteria.connection()
    cur = conn.cursor()
    sql = "insert into dbo.access_stat values ('%s', '%s', '%s', '%s','%s','%s','%s','%s','%s','%s')" % (
        session_id, str(locn), str(miles), trial_type, str(age), exposure,
        domain, user_picked_time or '', stat, preg)
    #print sql
    cur.execute(sql)
    conn.commit()
    conn.close()
    cur.close()

    # get trials and tags
    rnct = session.get('working_nct_list_pre_process')
    working_nct_id_list = qst.init_working_nct_id_list(rnct, pre_quest_answers)
    question_answer_list = []

    if len(working_nct_id_list) > 0:
        question_answer_list = qst.find_new_question(question_answer_list,
                                                     working_nct_id_list)
        log.info('%s -- first question' % (request.remote_addr))

    return jsonify(question_answer_list=question_answer_list,
                   working_nct_id_list=working_nct_id_list)
示例#4
0
def find_nct_loc_within_range(loc_latlng_list, nct_id_list,
                              input_zipcode_latlng, input_miles):
    '''
    :param loc_latlng_list: [{'lat': , 'lon': }]
    :param input_zipcode_latlng: {'lat': , 'lon': }
    :param input_miles: number
    :return:
    '''
    lat1 = input_zipcode_latlng['lat']
    lng1 = input_zipcode_latlng['lng']
    # allow distance error +3
    distance_list = [
        haversine(lng1, lat1, loc['lng'], loc['lat']) +
        3 if loc is not None else 999999 for loc in loc_latlng_list
    ]
    filter_index = np.where(np.array(distance_list) <= input_miles)[0]
    # loc_within_range = np.array(loc_latlng_list)[np.where(np.array(distance_list) <= input_miles)[0]]
    loc_within_range = np.array(loc_latlng_list)[filter_index]
    loc_within_range = list(loc_within_range)
    nct_within_range = np.array(nct_id_list)[filter_index]
    nct_within_range = list(nct_within_range)

    # get trial title
    conn = general_pool_criteria.connection()
    cur = conn.cursor()
    sql = '''
            select nct_id, official_title
            from dbo.aact_trial_info
            '''
    cur.execute(sql)
    trial_title = cur.fetchall()
    conn.close()
    cur.close()
    nct_id_title = {}
    if len(trial_title) > 0:
        for r in trial_title:
            if len(r) >= 2:
                nct_id_title[r[0]] = r[1]
    nct_title_within_range = [
        nct_id_title[nct_id] for nct_id in nct_within_range
    ]

    return [loc_within_range, nct_within_range, nct_title_within_range]
示例#5
0
def get_nct_list_from_keywords(keyword_string):
    temp = keyword_string.replace('; ', ';')
    keyword_list = temp.split(';')

    # create dynamic string to handle keyword search in sql
    key_search = ""
    if len(keyword_list) > 0 and keyword_list[0] != '':
        key_search = "where "
        for item in [x.lower() for x in keyword_list]:
            key_search = key_search + "(lower(condition_names) like '%" + item + "%' or "
            key_search = key_search + "lower(intervention_names) like '%" + item + "%' or "
            key_search = key_search + "lower(official_title) like '%" + item + "%' or "
            key_search = key_search + "lower(outcome_measure) like '%" + item + "%') and "
        key_search = key_search[:-5]

    # send query to server
    conn = general_pool_criteria.connection()
    cur = conn.cursor()
    sql = '''
                select nct_id
                from dbo.aact_trial_info
                %s
                order by nct_id asc
                ''' % key_search
    print(sql)
    cur.execute(sql)
    key_trials = cur.fetchall()
    conn.close()
    cur.close()

    # clean up items being returned
    to_return = []
    if len(key_trials) > 0:
        for item in key_trials:
            to_return.append(item[0])
    # else:
    #     to_return.append('Error')

    return to_return
示例#6
0
def query_trial_info_for_modal(nct_id):
    conn = general_pool_criteria.connection()
    cur = conn.cursor()
    sql = '''
            select official_title, study_type, primary_purpose, study_description, gender, minimum_age, maximum_age, 
                healthy_volunteers, phase, allocation, intervention_model, observation_model,
                masking, outcome_measure, outcome_description, facilities_and_contacts, intervention_names, central_contacts
            from dbo.aact_trial_info
            where nct_id = '%s'
            ''' % nct_id
    cur.execute(sql)
    trial_data = cur.fetchall()
    conn.close()
    cur.close()

    # clean up items being returned
    to_return = []
    if len(trial_data) == 1:
        to_return.extend(trial_data[0])
    else:
        to_return.append('Error')

    return to_return
示例#7
0
def find_annotated_nct_id_list():
    '''
        find annotated working nct id list
        :param: none
        :return: a working nct id list in our annotation list
    '''
    annotated_working_list = []
    conn = general_pool_criteria.connection()
    cur = conn.cursor()
    sql = '''
        select distinct(nct_id)
        from dbo.aact_trial_info_us
        where status = 'Recruiting' and nct_id in ( select distinct(nct_id)
                                                    from dbo.all_criteria );
    '''
    cur.execute(sql)
    nctids = cur.fetchall()
    conn.close()
    cur.close()

    if len(nctids) > 0:
        for nctid in nctids:
            annotated_working_list.append(str(nctid[0]))
    return annotated_working_list
示例#8
0
def update_working_nct_id_list(question_answer_list, working_nct_id_list):
    '''
    update working_nct_id_list by comparing question_answer_list with criteria knowledge base
    :param question_answer_list:
    :param working_nct_id_list:
    :return: an updated working_nct_id_list

    working_nct_id_list = [('NCT02901717', 3431, 0), ('NCT01287182', 3432, 0),('NCT01035944', 3432, 0),('NCT00562068', 3431, 1),('NCT00742300', 3431, 2),]
    question_answer_list = [{'answer': {}, 'question': (3, u'pregnant')}]
    '''
    question_number = len(question_answer_list)

    if question_number > 0:
        this_qa = question_answer_list[question_number - 1]
        this_entity_text = this_qa['question']['entity_text']
        this_domain = this_qa['question']['domain']
        table_name = 'dbo.dquest_omop_clean_' + this_domain

        if 'answer' not in this_qa.keys():
            return working_nct_id_list

        this_answer = this_qa['answer']
        this_include = this_answer['include']

        if this_domain.lower() != 'measurement':
            rangestart = 0
            rangeend = 0
            if 'rangestart' in this_answer.keys():
                rangestart = this_answer['rangestart']

            if 'rangeend' in this_answer.keys():
                rangeend = this_answer['rangeend']

            if this_include == 'INC':
                sql = '''
                        select distinct nctid from %s
                        where concept_cluster_name in ('%s')
                        and 
                        (
                            (flag = 0 and beforedays >= %s)
                            or 
                            (flag = 1 and beforedays < %s)
                        )
                        ''' % (table_name, this_entity_text, rangeend,
                               rangeend)
            else:
                sql = '''
                        select distinct nctid from %s
                        where concept_cluster_name in ('%s')
                        and 
                        (
                            (flag = 1)
                        )
                        ''' % (table_name, this_entity_text)
        else:
            if 'measurement_value' in this_answer.keys(
            ) and this_include == 'INC':
                measurement_value = this_answer['measurement_value']
                sql = '''
                        select distinct nctid from %s
                        where concept_cluster_name in ('%s')
                        and 
                        (
                            (
                                flag = 0 and (min <= %s and max >= %s)
                            ) or 
                            (
                                flag = 1 and (min > %s or max < %s)
                            )
                        )
                        ''' % (table_name, this_entity_text, measurement_value,
                               measurement_value, measurement_value,
                               measurement_value)
            else:
                sql = '''
                    select top(0) nctid from %s
                ''' % (table_name)

        conn = general_pool_criteria.connection()
        cur = conn.cursor()
        cur.execute(sql)
        details = cur.fetchall()
        filtered_nct_id = [nct_id[0] for nct_id in details]
        conn.close()
        cur.close()
        for nct_record in working_nct_id_list:
            if nct_record[0] in filtered_nct_id and nct_record[2] == 0:
                nct_record[2] = question_number
        return working_nct_id_list
    else:
        return working_nct_id_list


# print(update_working_nct_id_list(question_answer_list,working_nct_id_list))
示例#9
0
def find_new_question(question_answer_list, working_nct_id_list, domain='all'):
    '''
    find new question by frequency.
    alternatively, information entropy should be considered sum(plog(p))
    :param question_answer_list: questions already answered or skipped with their corresponding answers
    :param working_nct_id_list: a working nct id list
    :return: a updated question_answer_list by appending a new question

    Example
    working_nct_id_list = [['NCT02901717', 3431, 0], ['NCT01287182', 3432, 0],['NCT01035944', 3432, 0],['NCT00562068', 3431, 1], ['NCT00742300', 3431, 2]]
    question_answer_list = [{'answer': {}, 'question': (3, u'pregnant')}]
    '''
    # working_nct_id_frame = pd.DataFrame(working_nct_id_list,columns=['nct_id', 'ctgov_rank', 'num_of_question'])
    working_nct_id_0 = [
        record[0] for record in working_nct_id_list if record[2] == 0
    ]

    placeholders1 = ",".join("?" * len(working_nct_id_0))
    ########################################################################################################################
    # placeholders1 = ",".join("?" * 2000)
    # working_nct_id_0 = [record[0] for record in working_nct_id_list if record[2] == 0][0:2000]
    # ERROR is raised if the nct list is larger than 2000
    # Use subsampling to solve this issue.
    # Select the first 2000 is better.
    if len(working_nct_id_0) > 2000:
        placeholders1 = ",".join("?" * 2000)
        working_nct_id_0 = working_nct_id_0[0:2000]
    ########################################################################################################################
    domain = domain.lower()
    conn = general_pool_criteria.connection()
    cur = conn.cursor()
    if domain != 'all':
        table_name = 'dbo.dquest_omop_clean_' + domain
        placeholders2 = "?"
        active_question_0 = [
            qa['question']['entity_text'] for qa in question_answer_list
            if qa['question']['domain'] == domain
        ]
        placeholders3 = ",".join("?" * len(active_question_0))
        params = []
        params.extend(working_nct_id_0)
        params.extend([domain])

        if len(active_question_0) == 0:

            sql = """
                    select top(1) count(distinct nctid) AS count, concept_cluster_name
                    from %s
                    where nctid in (%s) and concept_cluster_name is NOT NULL
                    and domain = %s
                    group by concept_cluster_name 
                    order by count(distinct nctid) desc
                """ % (table_name, placeholders1, placeholders2)
        else:
            params.extend(active_question_0)
            sql = """
                    select top(1) count(distinct nctid) AS count, concept_cluster_name
                    from %s
                    where nctid in (%s) and concept_cluster_name is NOT NULL
                    and domain = %s
                    and concept_cluster_name not in (%s)
                    group by concept_cluster_name 
                    order by count(distinct nctid) desc
                """ % (table_name, placeholders1, placeholders2, placeholders3)

        cur.execute(sql, params)
        next_concept = cur.fetchall()
        conn.close()
        cur.close()

        if len(next_concept) > 0:
            this_q = {
                'question': {
                    'domain': domain,
                    'entity_text': next_concept[0][1]
                }
            }
        else:
            this_q = {'question': {'domain': domain, 'entity_text': 'NQF'}}
        question_answer_list.append(this_q)

    else:
        active_question_0 = [
            qa['question']['entity_text'] for qa in question_answer_list
        ]
        placeholders2 = ",".join("?" * len(active_question_0))
        params = []
        params.extend(working_nct_id_0)

        if len(active_question_0) == 0:
            sql = """
                    select top(1) count(distinct nctid) AS count, concept_cluster_name, domain
                    from dbo.dquest_omop_clean_condition
                    where nctid in (%s) and concept_cluster_name is NOT NULL
                    group by concept_cluster_name,domain 
                    order by count(distinct nctid) desc
                """ % (placeholders1)
            print(sql)

        else:
            params.extend(active_question_0)
            sql = """
                    select top(1) count(distinct nctid) AS count, concept_cluster_name, domain
                    from dbo.dquest_omop_clean_condition
                    where nctid in (%s) and concept_cluster_name is NOT NULL
                    and concept_cluster_name not in (%s) 
                    group by concept_cluster_name,domain 
                    order by count(distinct nctid) desc
                """ % (placeholders1, placeholders2)
        cur.execute(sql, params)
        next_concept = cur.fetchall()
        conn.close()
        cur.close()

        if len(next_concept) > 0:
            this_q = {
                'question': {
                    'domain': next_concept[0][2].lower(),
                    'entity_text': next_concept[0][1]
                }
            }
        else:
            this_q = {
                'question': {
                    'domain': 'condition',
                    'entity_text': 'NQF'
                }
            }
        question_answer_list.append(this_q)

    return question_answer_list
示例#10
0
def find_active_nct_id_list(active_restriction, trial_type='all'):
    '''
        find annotated working nct id list which is actively recruiting
        :param: none
        :return: a working nct id list in our annotation list
    '''
    # creating set of trial statuses based on user entry
    if active_restriction == 'true':
        active_restriction = True
    else:
        active_restriction = False
    if active_restriction:
        status_terms = ",".join(
            str("'" + x + "'")
            for x in ['Recruiting', 'Enrolling by Invitation', 'Available'])
    else:
        status_terms = 'select distinct status from dbo.aact_trial_info'

    active_trial_list = []
    conn = general_pool_criteria.connection()
    cur = conn.cursor()

    # managing different trial type based on user entry
    if trial_type == 'all':
        sql = '''
            select distinct nct_id_desc, pt_cohort, value
            from dbo.key_criteria_v2
                cross apply string_split(nct_id_desc, '_')
            where [value] in (select nct_id
                            from dbo.aact_trial_info
                            where status in (%s))
            order by nct_id_desc;
        ''' % status_terms
    else:
        type_search_terms = ''
        if trial_type == 'intervention':
            type_search_terms = ",".join(
                str("'" + x + "'")
                for x in ['interventional', 'expanded access'])
        elif trial_type == 'observation':
            type_search_terms = ",".join(
                str("'" + x + "'")
                for x in ['observational', 'observational [patient registry]'])
        sql = '''
            select distinct nct_id_desc, pt_cohort, value
            from dbo.key_criteria_v2
                cross apply string_split(nct_id_desc, '_')
            where [value] in (select nct_id
                            from dbo.aact_trial_info
                            where status in (%s) and 
                            lower(study_type) in (%s))
            order by nct_id_desc;
                ''' % (status_terms, type_search_terms)
    cur.execute(sql)
    nctids = cur.fetchall()
    conn.close()
    cur.close()

    if len(nctids) > 0:
        for nctid in nctids:
            active_trial_list.append(
                str(nctid[0]) + ';' + str(nctid[2]) + ';' + str(nctid[1]))

    return active_trial_list
示例#11
0
def update_working_nct_id_list(question_answer_list, working_nct_id_list):
    '''
    update working_nct_id_list by comparing question_answer_list with criteria knowledge base
    :param question_answer_list:
    :param working_nct_id_list:
    :return: an updated working_nct_id_list

    working_nct_id_list = [('NCT02901717', 3431, 0), ('NCT01287182', 3432, 0),('NCT01035944', 3432, 0),('NCT00562068', 3431, 1),('NCT00742300', 3431, 2),]
    question_answer_list = [{'answer': {}, 'question': (3, u'pregnant')}]
    '''
    question_number = len(question_answer_list)

    if question_number > 0:
        this_qa = question_answer_list[question_number - 1]
        this_entity_text = this_qa['question']['entity_text']
        this_domain = this_qa['question']['domain']
        table_name = 'dbo.all_criteria_v2'

        if 'answer' not in this_qa.keys():
            return working_nct_id_list

        this_answer = this_qa['answer']
        this_include = this_answer['include']

        if this_domain.lower() != 'measurement':
            rangestart = 0
            rangeend = 0
            if 'rangestart' in this_answer.keys():
                rangestart = this_answer['rangestart']

            if 'rangeend' in this_answer.keys():
                rangeend = this_answer['rangeend']

            if this_include == 'INC':
                sql = '''
                        select distinct nct_id_original
                        from %s
                        where (LOWER (concept_name) = LOWER ('%s') and
                            is_exclusion = 0 and
                            concept_group_id is null and 
                            (1 = case
                                when before_days != 0 and (%s > before_days) then 1
                                    else 0
                                end)) OR
                            (lower(concept_name) = lower('%s') and
                                is_exclusion = 1)
                        ''' % (table_name, this_entity_text, rangeend,
                               this_entity_text)
            else:
                sql = '''
                        select distinct nct_id_original
                        from %s
                        where lower(concept_name) = lower('%s') and
                            is_exclusion = 0
                        ''' % (table_name, this_entity_text)
        else:
            if 'measurement_value' in this_answer.keys(
            ) and this_include == 'INC':
                measurement_value = this_answer['measurement_value']
                print('meas_value: ' + str(measurement_value))
                if measurement_value.isdigit():
                    sql = '''
                            select distinct nct_id_original
                            from %s
                            where (lower(concept_name) = lower('%s') and
                                %s <= numeric_att_max and
                                %s >= numeric_att_min AND
                                is_exclusion = 1) OR
                                (lower(concept_name) = lower('%s') and
                                (%s > numeric_att_max or
                                %s < numeric_att_min) AND
                                is_exclusion = 0 and 
                                concept_group_id is null)
                            ''' % (table_name, this_entity_text,
                                   measurement_value, measurement_value,
                                   this_entity_text, measurement_value,
                                   measurement_value)
                else:
                    sql = '''
                            select distinct nct_id_original
                            from %s
                            where lower(concept_name) = lower('%s') and
                                (1= case
                                    when (lower('%s') = 'pos' or lower('%s') = 'positive') and 
                                        ((lower(numeric_source_text) = 'negative' and is_exclusion = 0) or 
                                        (lower(numeric_source_text) like '%%positiv%%' and is_exclusion = 1)) then 1
                                    when (lower('%s') = 'neg' or lower('%s') = 'negative') and 
                                        ((lower(numeric_source_text) = 'negative' and is_exclusion = 1) or 
                                        (lower(numeric_source_text) like '%%positiv%%' and is_exclusion = 0)) then 1
                                    else 0
                                end)
                            ''' % (table_name, this_entity_text,
                                   measurement_value, measurement_value,
                                   measurement_value, measurement_value)
            else:
                sql = '''
                    select top(0) nct_id_original from %s
                ''' % (table_name)

        conn = general_pool_criteria.connection()
        cur = conn.cursor()
        cur.execute(sql)
        details = cur.fetchall()

        filtered_nct_id = []
        filtered_nct_id = [nct_id[0] for nct_id in details]
        conn.close()
        cur.close()
        for c in range(len(working_nct_id_list)):
            if working_nct_id_list[c][0] in filtered_nct_id:
                working_nct_id_list[c][3] = question_number
        return working_nct_id_list
    else:
        return working_nct_id_list
示例#12
0
def filter_nct_ids_by_pre_questions(answer_list):
    '''
        find working nct id list after filter the answers to pre-questions
        :param answer_list: the list of the answers to pre-questions
        :return: a working nct id list in our annotation list
    '''
    answer_list = [str(x) for x in answer_list]
    age = answer_list[0]
    # gender = answer_list[1]
    exposure = answer_list[1]
    domain = answer_list[2]
    user_picked_time = answer_list[3]
    stat = answer_list[4]
    preg = answer_list[5]

    # query database filter nctids
    conn = general_pool_criteria.connection()
    cur = conn.cursor()

    sql = '''
            select distinct keyc.nct_id, keyc.pt_cohort
            from dbo.key_criteria_v2 as keyc
            left join dbo.aact_trial_info as aact
                on keyc.nct_id = aact.nct_id
            where
                (1= case
                    when %s <> 99999 and ((%s >= aact.minimum_age or aact.minimum_age is null) and 
                                         (%s <= aact.maximum_age or aact.maximum_age is null) ) then 1
                    when %s = 99999 then 1
                        else 0
                end )
            and
                (1= case
                    when '%s' = 'yes' and  keyc.disease_status in ('yes', 'all') then 1
                    when '%s' = 'no' and keyc.disease_status in ('no', 'all') then 1
                    when '%s' = 'cleared' and keyc.disease_status in ('cleared', 'all') then 1
                    when '%s' = 'idk' and keyc.disease_status in ('yes','no','cleared', 'all') then 1
                        else 0
                end)
            and
                (1= case
                    when '%s' = 'yes' and (keyc.exposure_status = 1 or keyc.exposure_status is null) then 1
                    when '%s' = 'no' and (keyc.exposure_status = 0 or keyc.exposure_status is null) then 1
                    when '%s' = 'idk' and (keyc.exposure_status in (1, 0) or keyc.exposure_status is null) then 1
                        else 0 
                end) 
            and
                (1= case
                    when '%s' = 'yes' and (keyc.is_hospitalized = 1 or keyc.is_hospitalized is null) then 1
                    when '%s' = 'no' and (keyc.is_hospitalized = 0 or keyc.is_hospitalized is null) then 1
                    when '%s' = 'idk' and (keyc.is_hospitalized in (1, 0) or keyc.is_hospitalized is null) then 1
                        else 0
                end)
            and
                (1= case
                    when '%s' = 'yes' and (keyc.preg_status = 1 or keyc.preg_status is null) then 1
                    when ('%s' = 'no' or '%s' = 'n/a') and (keyc.preg_status = 0 or keyc.preg_status is null) then 1
                    when '%s' = 'idk' and (keyc.preg_status in (1, 0) or keyc.preg_status is null) then 1
                        else 0
                    end)
             and(
                1= case
                    when ('%s' = 'None'  or '%s' = '') then 1
                    when '%s' <> 'None' and
            --             ((CURRENT_DATE  - TO_DATE(s, 'MM/DD/YYYY'))
                        (DATEDIFF(day, CONVERT(VARCHAR, '%s', 101), CONVERT(VARCHAR, getdate(), 101)) 
                        <= keyc.days_to_disease or keyc.days_to_disease is null) then 1
                    else 0
                end)
        ''' % (age, age, age, age, domain, domain, domain, domain, exposure,
               exposure, exposure, stat, stat, stat, preg, preg, preg, preg,
               user_picked_time, user_picked_time, user_picked_time,
               user_picked_time)

    print(sql)
    cur.execute(sql)
    nctids = cur.fetchall()
    conn.close()
    cur.close()

    result_ids = []
    if len(nctids) > 0:
        for nctid in nctids:
            result_ids.append([nctid[0], nctid[1]])

    return result_ids
示例#13
0
def find_new_question(question_answer_list, working_nct_id_list, domain='all'):
    '''
    find new question by frequency.
    alternatively, information entropy should be considered sum(plog(p))
    :param question_answer_list: questions already answered or skipped with their corresponding answers
    :param working_nct_id_list: a working nct id list
    :return: a updated question_answer_list by appending a new question

    Example
    working_nct_id_list = [['NCT02901717', 3431, 0], ['NCT01287182', 3432, 0],['NCT01035944', 3432, 0],['NCT00562068', 3431, 1], ['NCT00742300', 3431, 2]]
    question_answer_list = [{'answer': {}, 'question': (3, u'pregnant')}]
    '''
    # working_nct_id_frame = pd.DataFrame(working_nct_id_list,columns=['nct_id', 'ctgov_rank', 'num_of_question'])
    working_nct_id_0 = [
        record[0] for record in working_nct_id_list if record[3] == 0
    ]

    working_nct_id_0_len = len(working_nct_id_0)
    placeholders1 = ",".join(str("'" + x + "'") for x in working_nct_id_0)
    ########################################################################################################################
    # placeholders1 = ",".join("?" * 2000)
    # working_nct_id_0 = [record[0] for record in working_nct_id_list if record[2] == 0][0:2000]
    # ERROR is raised if the nct list is larger than 2000
    # Use subsampling to solve this issue.
    # Select the first 2000 is better.
    if len(working_nct_id_0) > 2000:
        placeholders1 = ",".join("?" * 2000)
        working_nct_id_0 = working_nct_id_0[0:2000]
    ########################################################################################################################
    domain = domain.lower()
    conn = general_pool_criteria.connection()
    cur = conn.cursor()
    if domain != 'all':
        table_name = 'dbo.all_criteria_v2'
        placeholders2 = str(domain)
        active_question_0 = [
            qa['question']['entity_text'] for qa in question_answer_list
            if qa['question']['domain'] == domain
        ]
        print('active question: ' + str(active_question_0))
        placeholders3 = ",".join(str("'" + x + "'") for x in active_question_0)

        if len(active_question_0) == 0:

            sql = '''
                    SELECT TOP(1) sum(PlogP) AS IE, concept_name
                    FROM(
                        select concept_name, include, count, -(count/%s)*LOG((count/%s)) AS PlogP
                        FROM
                            (
                        select CAST(count(distinct nct_id_original) AS [float]) AS count, concept_name, include
                            from %s
                            where nct_id_original in (%s) and concept_name is NOT NULL
                            and lower(domain) = '%s'
                            and to_display = 1
                            group by concept_name, include
                        ) X
                    ) X
                    GROUP BY concept_name
                    ORDER BY sum(X.PlogP) DESC
                ''' % (working_nct_id_0_len, working_nct_id_0_len, table_name,
                       placeholders1, placeholders2)
        else:
            sql = '''
                    SELECT TOP(1) sum(PlogP) AS IE, concept_name
                    FROM(
                        select concept_name, include, count, -(count/%s)*LOG((count/%s)) AS PlogP
                        FROM
                            (
                        select CAST(count(distinct nct_id_original) AS [float]) AS count, concept_name, include
                            from %s
                            where nct_id_original in (%s) and concept_name is NOT NULL
                            and lower(domain) = '%s'
                            and concept_name not in (%s)
                            and to_display = 1
                            group by concept_name, include
                        ) X
                    ) X
                    GROUP BY concept_name
                    ORDER BY sum(X.PlogP) DESC
                ''' % (working_nct_id_0_len, working_nct_id_0_len, table_name,
                       placeholders1, placeholders2, placeholders3)
        cur.execute(sql)
        next_concept = cur.fetchall()
        conn.close()
        cur.close()

        if len(next_concept) > 0:
            this_q = {
                'question': {
                    'domain': domain,
                    'entity_text': next_concept[0][1]
                }
            }
        else:
            this_q = {'question': {'domain': domain, 'entity_text': 'NQF'}}
        question_answer_list.append(this_q)

    else:
        table_name = 'dbo.all_criteria_v2'
        active_question_0 = [
            qa['question']['entity_text'] for qa in question_answer_list
        ]
        placeholders2 = ", ".join(
            str("'" + x + "'") for x in active_question_0)

        if len(active_question_0) == 0:
            sql = '''
                    SELECT TOP(1) sum(PlogP) AS IE, concept_name, domain, include
                    FROM(
                        select concept_name, include, domain, count, -(count/%s)*LOG((count/%s)) AS PlogP
                        FROM
                            (
                        select CAST(count(distinct nct_id_original) AS [float]) AS count, concept_name, include, domain
                            from %s
                            where nct_id_original in (%s) and concept_name is NOT NULL
                            and to_display = 1
                            group by concept_name, include, domain
                        ) X
                    ) X
                    GROUP BY concept_name, domain, include
                    ORDER BY sum(X.PlogP) DESC
                ''' % (working_nct_id_0_len, working_nct_id_0_len, table_name,
                       placeholders1)
        else:
            sql = '''
                    SELECT TOP(1) sum(PlogP) AS IE, concept_name, domain, include
                    FROM(
                        select concept_name, include, domain, count, -(count/%s)*LOG((count/%s)) AS PlogP
                        FROM
                            (
                            select CAST(count(distinct nct_id_original) AS [float]) AS count, concept_name, include, domain
                                from %s
                                where nct_id_original in (%s) and concept_name is NOT NULL
                                and concept_name not in (%s)
                                and to_display = 1
                                group by concept_name, include, domain
                        ) X
                    ) X
                    GROUP BY concept_name, domain, include
                    ORDER BY sum(X.PlogP) DESC
                ''' % (working_nct_id_0_len, working_nct_id_0_len, table_name,
                       placeholders1, placeholders2)
        cur.execute(sql)
        next_concept = cur.fetchall()
        conn.close()
        cur.close()

        if len(next_concept) > 0:
            this_q = {
                'question': {
                    'domain': next_concept[0][2],
                    'entity_text': next_concept[0][1]
                }
            }
        else:
            this_q = {'question': {'domain': domain, 'entity_text': 'NQF'}}
        question_answer_list.append(this_q)

    return question_answer_list
import pyodbc
import pandas as pd
from DBUtils.PooledDB import PooledDB
import googlemaps
import pickle
from app import general_pool_criteria

# use the googlemaps pacakages to geocode locations
# Please use the google map api key for geocoding services
# fengyang's API key: '*******'
gmap_api_key = ''
gmaps = googlemaps.Client(key=gmap_api_key)

conn = general_pool_criteria.connection()
cur = conn.cursor()
sql = '''
        select nct_id, facilities_and_contacts
        from dbo.aact_trial_info
        '''
cur.execute(sql)
trial_data = cur.fetchall()
conn.close()
cur.close()

def split_locs_into_list(x):
    loc_list = []
    if x is not None:
        loc_list = x.split('|')
    return loc_list

i = 0
示例#15
0
def filter_nct_ids_by_pre_questions(answer_list):
    '''
        find working nct id list after filter the answers to pre-questions
        :param answer_list: the list of the answers to pre-questions
        :return: a working nct id list in our annotation list
    '''
    answer_list = [str(x) for x in answer_list]
    age = answer_list[0]
    gender = answer_list[1]
    domain = answer_list[2]
    user_picked_time = answer_list[3]
    exposure = answer_list[4]
    stat = answer_list[5]
    preg = answer_list[6]
    # query database filter nctids
    conn = general_pool_criteria.connection()
    cur = conn.cursor()
    params = [
        age, age, age, gender, gender, domain, domain, domain, domain,
        exposure, exposure, exposure, stat, stat, stat, preg, preg, preg, preg
    ]
    placeholder1 = ",".join("?" * len(params))

    sql = '''
            select distinct aact.nct_id
            from dbo.aact_trial_info_us as aact
            left outer join dbo.key_criteria as keyc
                on aact.nct_id = keyc.nct_id
            where 
                (%s >= aact.minimum_age or aact.minimum_age is null) 
            and 
                (%s <= aact.maximum_age or aact.maximum_age is null) 
            and
               (1= case 
                        when '%s' = 'male' and aact.gender in ('male', 'all') then 1 
                        when '%s' = 'female' and aact.gender in ('female', 'all') then 1 
                        when '%s' = 'other' and aact.gender in ('male', 'female', 'all') then 1
                            else 0 
                    end)
             and
                (1= case
                    when '%s' = 'yes' and  keyc.disease_status in ('yes', 'all') then 1
                    when '%s' = 'no' and keyc.disease_status in ('no', 'all') then 1
                    when '%s' = 'cleared' and keyc.disease_status in ('cleared', 'all') then 1
                    when '%s' = 'all' and keyc.disease_status in ('yes','no','cleared', 'all') then 1
                        else 0
                end)
            and
                (1= case
                    when '%s' = 'yes' and (keyc.exposure_status = 'yes' or keyc.exposure_status is null) then 1
                    when '%s' = 'no' and (keyc.exposure_status = 'no' or keyc.exposure_status is null) then 1
                    when '%s' = 'idk' and (keyc.exposure_status in ('yes', 'no') or keyc.exposure_status is null) then 1
                        else 0 
                end) 
            and
                (1= case 
                    when '%s' = 'yes' and (keyc.is_hospitalized = 1 or keyc.is_hospitalized is null) then 1
                    when '%s' = 'no' and (keyc.is_hospitalized = 0 or keyc.is_hospitalized is null) then 1
                    when '%s' = 'idk' and (keyc.is_hospitalized in (1, 0) or keyc.is_hospitalized is null) then 1
                        else 0
                end)
            and 
                (1= case
                    when '%s' = 'yes' and (keyc.preg_status = 1 or keyc.preg_status is null) then 1
                    when ('%s' = 'no' or '%s' = 'n/a') and (keyc.preg_status = 0 or keyc.preg_status is null) then 1
                    when '%s' = 'idk' and (keyc.preg_status in (1, 0) or keyc.preg_status is null) then 1
                        else 0 
                    end)
            and(
                1= case
                    when ('%s' = 'None'  or '%s' = '') and keyc.days_to_disease is null then 1
                    when '%s' != 'None' and
                        (DATEDIFF(day, CONVERT(VARCHAR, '%s', 101), CONVERT(VARCHAR, getdate(), 101)) 
                        <= keyc.days_to_disease or keyc.days_to_disease is null) then 1
                    else 0
                end)
        ''' % (age, age, gender, gender, gender, domain, domain, domain,
               domain, exposure, exposure, exposure, stat, stat, stat, preg,
               preg, preg, preg, user_picked_time, user_picked_time,
               user_picked_time, user_picked_time)

    print(sql)
    cur.execute(sql)
    nctids = cur.fetchall()
    result_ids = []
    if len(nctids) > 0:
        for nctid in nctids:
            result_ids.append(str(nctid[0]))
    print('after filter pre-questions: ', len(nctids), result_ids)
    conn.close()
    cur.close()
    return result_ids