def process_wrapper(in_queue, out_queue, plock, disease_file, ndo=0):
    dx = bd.BeeStringDict(marketscan+'DX', keysize = 9, readonly=True)
    idx = bd.BeeStringDict(marketscan+'indices', keysize = 51, readonly=True)
    code2dx = pickle.load(open(disease_file))

    for (county, index, tot_cty) in iter( in_queue.get, None ):
        if index % 50000 == 0:
            plock.acquire()
            print 'AT {:1.2f} %\n'.format(index/float(tot_cty)*100)
            sys.stdout.flush()
            plock.release()
        ret = {}

        ## Do DX for M and F separately
        for gender in ['M','F']:
            county_ids = set()
            try:
                county_ids = idx[gender + ' ' + county]
            except KeyError:
                print 'indices does not have key: F ' + county
                continue 
            if ndo > 0:
                county_ids = set(list(county_ids)[:ndo])
            ret[gender] = do_county(dx, county_ids, code2dx)
        if len(ret.keys()) > 0:
            out_queue.put((county, ret))
    out_queue.put(None)
示例#2
0
def main():

    rx = bd.BeeStringDict('RX', keysize=9, readonly=1)
    rx_set = bd.BeeStringDict('RXset', keysize=9, readonly=0)

    count = -1
    empty = 0

    keys = sorted(rx.keys())
    print len(keys)

    for key in keys:

        line = rx.get(key, '')

        if len(line) > 9:

            subs = line.split('|')
            codes = []
            for sub in subs:
                code = sub.split(':')[0]
                if len(code) > 4:
                    codes.append(code)

            #print codes
            rx_set[key] = set(codes)
            rx_set.commit()
        else:
            empty += 1

        count += 1
        if count % 10000 == 0:
            print  rich_string_wrap(num2comma2(count),'y',0,'k',0), \
            rich_string_wrap(num2comma2(empty),'r',0,'k',0), ('%4.3f ' % (float(empty+0.5)/float(count+0.5)))
示例#3
0
 def __init__(self,
              filename=None,
              type=None,
              index_attrs=None,
              key_sizes=None,
              from_dump=False):
     if from_dump == True:
         #
         path = os.getcwd() + '/storage/' + type.__name__ + '/' + 'dumb.p'
         #storage = Storage()
         #storage.open_file(path)
         #filetext = storage.current_file.read()
         #c = pickle.load(StringIO.StringIO(filetext))
         c = pickle.load(open(path, 'rb'))
         self.filename = c.filename
         self.name = type.__name__
         self.gd = c.gd
         self.pp = c.pp
         self.type = c.type
         self.counter = c.counter
         self.size = c.size
         self.m = c.m
         self.index_attrs = c.index_attrs
         self.key_sizes = c.key_sizes
         self.trees = {}
         print("Extracting ")
         #storage.extract('storage/' + type.__name__)
         print("Extracting done ")
         for index_attr, key_size in zip(self.index_attrs, self.key_sizes):
             tree = BeeDict.BeeStringDict(os.getcwd() + '/storage/' +
                                          type(type).__name__ + index_attr,
                                          keysize=key_size)
             # tree.close()
             self.trees[index_attr] = tree
     else:
         self.filename = filename
         self.gd = 0
         self.pp = [0]
         self.type = type
         self.name = type.__name__
         self.counter = 0
         self.size = 0
         self.m = {}
         if index_attrs is not None and key_sizes is not None:
             self.index_attrs = index_attrs
             self.key_sizes = key_sizes
             self.trees = {}
             for attr, key_size in zip(index_attrs, key_sizes):
                 tree = BeeDict.BeeStringDict(os.getcwd() + '/storage/' +
                                              type(type).__name__ + attr,
                                              keysize=key_size)
                 # tree.close()
                 self.trees[attr] = tree
示例#4
0
def process_pat_worker(in_queue, out_queue, plock, process_function, db_list,
                       **kwargs):
    for db in db_list:
        kwargs[db] = bd.BeeStringDict(db_list[db][0] + db,
                                      keysize=db_list[db][1],
                                      readonly=True)
    #people_dx = bd.BeeStringDict('kanix_matches_F_Breast_Cancer.txt_demo_bd',
    #                          keysize = 9, readonly=True)
    #kwargs['person_dx_info'] = people_dx ### BADDD
    #rx = bd.BeeStringDict(marketscan+'RX', keysize = 9, readonly=True)
    #dx = bd.BeeStringDict(marketscan+'DX', keysize = 9, readonly=True)
    #demo = bd.BeeStringDict(marketscan+'ID_demographics', keysize = 9, readonly=True)
    #rxset = bd.BeeStringDict(marketscan+'RXSet', keysize = 9, readonly=True)
    #dx = bd.BeeStringDict(marketscan+'DX', keysize = 9, readonly=True)

    ## iter = reads until get "None"
    for (patline, index, tot_pat) in iter(in_queue.get, None):
        if index % 50000 == 0:
            plock.acquire()
            print 'AT {:1.2f} %\n'.format(index / float(tot_pat) * 100)
            sys.stdout.flush()
            plock.release()

        result = process_function(patline, **kwargs)
        if not result is None:
            out_queue.put(result)

    out_queue.put(None)
    plock.acquire()
    print 'child: is in q empty? ' + str(in_queue.empty())
    plock.release()
示例#5
0
def main():

    global drug2set

    drug2set = read_drug_sets()
    clean = False

    names = []
    for name in sorted(drug2set.keys()):
        imya = name.replace(" ", "_")
        names.append(imya)

    system('clear')

    from mx.BeeBase import BeeDict as bd
    rx_set = bd.BeeStringDict('../RXset', keysize=9, readonly=1)
    all_pats = sorted(rx_set.keys())

    ndrugs = len(names)
    num = 0

    for i in range(num_procs):
        procs.append(multiprocessing.Process(target=worker))
        procs[-1].daemon = True
        procs[-1].start()

    num = 0
    alle = float(len(all_pats))

    for item in all_pats:

        drug_set = rx_set.get(item, set([]))

        if num < len(all_pats):
            num += 1

            if len(drug_set) > 0:
                q.put((drug_set, num, item, alle))
        else:
            break

        if num < ndrugs:
            q.join()

    for p in procs:
        q.put(None)

    q.join()

    for p in procs:
        p.join()

    print "Finished everything...."
    print "num active children:", multiprocessing.active_children()
示例#6
0
def page_test():
    studs = get_dataset()
    p = Ipage()
    open('page.txt', 'w').close()
    from mx.BeeBase import BeeDict
    tree = BeeDict.BeeStringDict(os.getcwd() + '/storage/' + student.__name__ +
                                 'name',
                                 keysize=256)

    for stud in studs[0:10]:
        p.insert(stud)
        p.store('page.txt', 0)
    p.store_to_tree(tree, student, 'name', 'page.txt')
    print(zip(tree.keys(), tree.values()))
    tree.close()
示例#7
0
def main():

    global drug2set

    drug2set = read_drug_sets()
    clean = False

    names = []
    for name in sorted(drug2set.keys()):
        imya = name.replace(" ", "_")
        names.append(imya)

    system('clear')

    from mx.BeeBase import BeeDict as bd
    rx_set = bd.BeeStringDict('../RXset', keysize=9, readonly=1)
    all_pats = sorted(rx_set.keys())

    print num2comma2(len(all_pats))
示例#8
0
def tree_test():
    from mx.BeeBase import BeeDict
    # Here is a very simple file based string dictionary:
    # d = BeeDict.BeeStringDict('storage/BeeStringDict.example', keysize=26)
    # studs = get_shuffled_million()
    # i = 0
    # max = ''
    # for stud in studs[:100000]:
    #     with Profiler() as p:
    #         d[stud.attrs['name']] = stud.get_string()
    #         i += 1
    #         d.commit()
    #     if i % 5000 == 0:
    #         print('#', i)
    # d.close()

    d = BeeDict.BeeStringDict('storage/BeeStringDict.example', keysize=26)
    martha = d.cursor(key='Martha Morrow')
    print(martha.next())
    # print(len(d))
    # print(d['Martha Morrow'])
    print(martha.key, d[martha.key])
示例#9
0
from mx.BeeBase import BeeDict as bd
import pandas as pd
from itertools import chain
data = '/Volumes/Macintosh HD-1/Users/Data/'
data = '/Users/Data/'
px = bd.BeeStringDict(data + 'PX', keysize=9, readonly=True)
rx = bd.BeeStringDict(data + 'RXset', keysize=9, readonly=True)
dx = bd.BeeStringDict(data + 'DX', keysize=9, readonly=True)
demo = bd.BeeStringDict(data + 'ID_demographics', keysize=9, readonly=True)

from pat_weeks import *

import billing_codes
[icd9, cpt] = billing_codes.load_icd9_cpt()
import time
t0 = time.time()
bcpat = open('ab').read().strip().split('\n')
ndo = 0  #100000
import numpy as np
lens = np.zeros((len(bcpat), 5))
for (i, pat) in enumerate(bcpat):
    wkinfo = get_pat_weeks(pat, dx, px)
    wnums = [int(w) for w in wkinfo['d'].keys()]
    brca_diagnos_wk = [
        int(w)
        for w in get_diagnosis_weeks(wkinfo, ('174', '239.3', '238.3', '233'))
    ]
    lens[i, :] = [
        len(wkinfo['d']),
        len(wkinfo['p']),
        max(wnums) - min(wnums),