Exemplo n.º 1
0
def vuid2rxcui():
    """
    Creates a .csv file containing the results of the RxNorm API
    Allows calls to be made only once & can be merged onto data cube
    Input: aggregated source data as a dataframe
    Output: CSV file with all unique VUIDs and corresponding RxNorm CUIs
    """
    if os.path.exists('./vuid_to_rxcui.csv'):
        if config.print_status:
            print('VUIDs already created in .csv file')
        return None

    # load data & find unique values
    df = load_agg_data()
    vuids = df.VUID.unique()

    if config.print_status:
        print('Initiating API requests for VUIDs')
    # make iterable list of URLs to send to api
    urls = api_rxnorm.make_url_requests(vuids, 'vuid')
    results = api_rxnorm.async_calls(urls)

    # clean up & store in CSV
    vuid_to_rxcui = defaultdict(list)

    for result in results:
        # extract digits numeric results
        digits = re.findall(b'\d+', result)
        vuid = int(digits[0])
        # could be multiple rxcuis for a single vuid
        cuis = []
        for d in digits[1:]:
            cuis.append(int(d))
        vuid_to_rxcui[vuid] = cuis

    vuid_to_rxcui = pd.DataFrame.from_dict(vuid_to_rxcui, orient='index')
    vuid_to_rxcui.reset_index(inplace=True)
    assert (len(vuid_to_rxcui) == len(vuids) - 1)
    # make enough column names for all extra placeholders
    cnames = ['vuid']
    for i in range(1, vuid_to_rxcui.shape[1]):
        cnames.append('rxcui' + str(i))
    vuid_to_rxcui.columns = cnames

    if config.print_status:
        print('API requests for VUID to RxCUI Complete.')
        print('Initiating API requests for generic & brand RxCUIs...')

    # search for generic & brand names
    rxcui_brand_generic = api_rxnorm.getBrandGeneric(vuid_to_rxcui,
                                                     cui_col='rxcui1')

    # coerce from dictionary to dataframe to allow merge
    df = pd.DataFrame.from_dict(rxcui_brand_generic, orient='index')
    df.rename(columns={0: 'rxcui1_brand', 1: 'rxcui1_generic'}, inplace=True)
    df['rxcui1'] = df.index.astype(float)

    # merge & save
    vuid_to_rxcui = vuid_to_rxcui.merge(df, how='left', on='rxcui1')
    vuid_to_rxcui.to_csv('./vuid_to_rxcui.csv')

    if config.print_status:
        print('All API requests for VUIDs complete')
Exemplo n.º 2
0
def cui2class(data):
    """
    Creates a .csv file containing the results of the RxNorm API
    Allows calls to be made only once & can be merged onto data cube
    Input: dataframe of unique drug/strength/unit/form along with best-matched RxCUIs
    Output: CSV file with the corresponding drug classes
    """

    # load data & find unique values
    df = data.drop(columns='unique_combo')
    search_terms = np.unique(df.values)
    # drop missing values
    search_terms = search_terms[~np.isnan(search_terms)]

    # prep for storing in CSV
    cui2class = defaultdict(dict)

    if config.print_status:
        print('Initiating ' + str(len(search_terms)) +
              ' API requests for Drug Classes of RxCUIs')

    # break dataframe into subsections to prevent API timeout
    search_terms = [search_terms[i * config.batch_size:(i + 1) * config.batch_size] \
                    for i in range((len(search_terms) + config.batch_size - 1) // config.batch_size)]
    counter = 0

    if config.print_status:
        print('Large list of terms. Created ' + str(len(search_terms)) +
              ' batches.')

    for terms in search_terms:
        if config.print_status:
            counter += 1
            print('Starting batch # ' + str(counter))

        # make iterable list of URLs to send to api
        urls = api_rxnorm.make_url_requests(terms, 'cui2class')
        results = api_rxnorm.async_calls(urls)

        for result in results:
            soup = BeautifulSoup(result, 'html.parser')  #'xml')
            class_all = soup.find_all('classname')
            classes = set([])
            for c in class_all:
                classes.add(c.get_text())
            # add to dictionary
            try:
                unique_combo = soup.find('rxcui').get_text()
                cui2class[unique_combo] = classes
            except:
                pass

    cui2class = pd.DataFrame.from_dict(cui2class, orient='index')
    cui2class.reset_index(inplace=True)
    # make enough column names for all extra placeholders
    cnames = ['rxcui']
    for i in range(1, cui2class.shape[1]):
        cnames.append('cat' + str(i))
    cui2class.columns = cnames

    # coerce from dictionary to dataframe to allow merge
    #df = pd.DataFrame.from_dict(rxcui_brand_generic, orient='index')
    #df.rename(columns={0: 'rxcui1_brand', 1: 'rxcui1_generic'}, inplace=True)
    #df['rxcui1'] = df.index.astype(float)

    # merge & save
    #cui2class = cui2class.merge(df, how='left', on='rxcui1')
    cui2class.to_csv(config.out_dir + 'cui2class.csv', index=False)

    if config.print_status:
        print(
            'All API requests for Converting RxCUIs to MESH Classes complete')
Exemplo n.º 3
0
def pharmaorder2rxcui():
    """
    Creates a .csv file containing the results of the RxNorm API
    Allows calls to be made only once & can be merged onto data cube
    Input: aggregated source data as a dataframe
    Output: CSV file with the unique PharmacyOderableItems that had a corresponding RxNorm CUI
    """
    if os.path.exists('./pharmaorder_to_rxcui.csv'):
        if config.print_status:
            print('PharmacyOrderables already created in .csv file')
        return None

    # load data & find unique values
    df = load_agg_data()
    orderables = df.PharmacyOrderableItem.unique()

    if config.print_status:
        print('Initiating API requests for PharmacyOrderableItems')

    # make iterable list of URLs to send to api
    urls = api_rxnorm.make_url_requests(orderables, 'string')
    results = api_rxnorm.async_calls(urls)

    # clean up & store in CSV
    pharmaorder_to_rxcui = defaultdict(dict)

    for result in results:
        soup = BeautifulSoup(result, 'xml')
        cui_all = soup.find_all('rxcui')
        cui = set([])
        for c in cui_all:
            cui.add(int(c.get_text()))
        # add to dictionary
        pharmaorder = soup.find('inputTerm').get_text()
        pharmaorder_to_rxcui[pharmaorder] = cui

    pharmaorder_to_rxcui = pd.DataFrame.from_dict(pharmaorder_to_rxcui,
                                                  orient='index')
    pharmaorder_to_rxcui.reset_index(inplace=True)
    # make enough column names for all extra placeholders
    cnames = ['pharmaorder']
    for i in range(1, pharmaorder_to_rxcui.shape[1]):
        cnames.append('rxcui' + str(i))
    pharmaorder_to_rxcui.columns = cnames

    if config.print_status:
        print('API requests for PharmacyOrderableItems to RxCUI Complete.')
        print('Initiating API requests for generic & brand RxCUIs...')

    # search for generic & brand names
    rxcui_brand_generic = api_rxnorm.getBrandGeneric(pharmaorder_to_rxcui,
                                                     cui_col='rxcui1')

    # coerce from dictionary to dataframe to allow merge
    df = pd.DataFrame.from_dict(rxcui_brand_generic, orient='index')
    df.rename(columns={0: 'rxcui1_brand', 1: 'rxcui1_generic'}, inplace=True)
    df['rxcui1'] = df.index.astype(float)

    # merge & save
    pharmaorder_to_rxcui = pharmaorder_to_rxcui.merge(df,
                                                      how='left',
                                                      on='rxcui1')
    pharmaorder_to_rxcui.to_csv('./pharmaorder_to_rxcui.csv')

    if config.print_status:
        print('All API requests for PharamcyOrderableItems complete')
Exemplo n.º 4
0
def structured_elements2rxcui(data):
    """
    Creates a .csv file containing the results of the RxNorm API
    Allows calls to be made only once & can be merged onto data cube
    Input: aggregated source data as a dataframe
    Output: CSV file with the unique combination of ingredient/strength/unit/form that had a corresponding RxNorm CUI
    """

    # load data & find unique values
    df = data
    structured_elements = df[[
        'DrugNameWithoutDose', 'StrengthText', 'DrugUnit', 'DosageForm'
    ]].drop_duplicates()
    search_terms = api_rxnorm.make_string(structured_elements)

    if config.print_status:
        print('Initiating ' + str(len(search_terms)) +
              ' API requests for Unique Meds')

    # prepare for storing in CSV
    structured_elements_to_rxcui = defaultdict(dict)

    # break dataframe into subsections to prevent API timeout
    search_terms = [search_terms[i * config.batch_size:(i + 1) * config.batch_size] \
                    for i in range((len(search_terms) + config.batch_size - 1) // config.batch_size)]
    counter = 0

    if config.print_status:
        print('Large list of terms. Created ' + str(len(search_terms)) +
              ' batches.')

    for terms in search_terms:
        if config.print_status:
            counter += 1
            print('Starting batch # ' + str(counter))
        # make iterable list of URLs to send to api
        urls = api_rxnorm.make_url_requests(terms, 'string')
        results = api_rxnorm.async_calls(urls)

        for result in results:
            soup = BeautifulSoup(result, 'html.parser')  #'xml')
            cui_all = soup.find_all('rxcui')
            cui = set([])
            for c in cui_all:
                cui.add(int(c.get_text()))
            # add to dictionary
            try:
                unique_combo = soup.find('inputterm').get_text()
                structured_elements_to_rxcui[unique_combo] = cui
            except:
                pass

    structured_elements_to_rxcui = pd.DataFrame.from_dict(
        structured_elements_to_rxcui, orient='index')
    structured_elements_to_rxcui.reset_index(inplace=True)
    # make enough column names for all extra placeholders
    cnames = ['unique_combo']
    for i in range(1, structured_elements_to_rxcui.shape[1]):
        cnames.append('rxcui' + str(i))
    structured_elements_to_rxcui.columns = cnames

    if config.print_status:
        print('API requests for Structured Elements to RxCUI Complete.')

    # merge & save
    #structured_elements_to_rxcui = structured_elements_to_rxcui.merge(df, how='left', on='rxcui1')
    structured_elements_to_rxcui.to_csv(config.out_dir +
                                        'structured_elements_to_rxcui.csv',
                                        index=False)

    if config.print_status:
        print('All API requests for Structured Elements complete')
Exemplo n.º 5
0
def ndc2rxcui():
    """
    Creates a .csv file containing the results of the RxNorm API
    Allows calls to be made only once & can be merged onto data cube
    Input: aggregated source data as a dataframe
    Output: CSV file with the unique NDCs that had a corresponding RxNorm CUI
    """
    if os.path.exists('./ndc_to_rxcui.csv'):
        if config.print_status:
            print('NDCs already created in .csv file')
        return None

    # load data & find unique values
    df = load_agg_data()
    ndcs = df.NDC.unique()

    if config.print_status:
        print('Initiating API requests for NDCs')

    # make iterable list of URLs to send to api
    urls = api_rxnorm.make_url_requests(ndcs, 'ndc')
    results = api_rxnorm.async_calls(urls)

    # clean up & store in CSV
    ndc_to_rxcui = defaultdict(list)

    for result in results:
        # extract digits numeric results
        try:
            # find first instance of any NDC number (assumes one is always present)
            pat = re.compile(b'\d+(-\d+)*')
            ndc = pat.search(result)[0].decode("utf-8")

            # find the corresponding RxNorm CUI
            # assumes only 1 RxCUI present & that it falls after the NDC (which is always present)
            pat = re.compile(b'\d+')
            cui = int(pat.search(result, 47)[0])
        except:
            continue

        # add to dictionary
        ndc_to_rxcui[ndc] = cui

    ndc_to_rxcui = pd.DataFrame.from_dict(ndc_to_rxcui, orient='index')
    ndc_to_rxcui.reset_index(inplace=True)
    ndc_to_rxcui.columns = ['ndc', 'rxcui']

    if config.print_status:
        print('API requests for NDC to RxCUI Complete.')
        print('Initiating API requests for generic & brand RxCUIs...')

    # search for generic & brand names
    rxcui_brand_generic = api_rxnorm.getBrandGeneric(ndc_to_rxcui,
                                                     cui_col='rxcui')

    # coerce from dictionary to dataframe to allow merge
    df = pd.DataFrame.from_dict(rxcui_brand_generic, orient='index')
    df.rename(columns={0: 'rxcui_brand', 1: 'rxcui_generic'}, inplace=True)
    df['rxcui'] = df.index.astype(float)

    # merge & save
    ndc_to_rxcui = ndc_to_rxcui.merge(df, how='left', on='rxcui')
    ndc_to_rxcui.to_csv('./ndc_to_rxcui.csv')

    if config.print_status:
        print('All API requests for NDCs complete')