Exemplo n.º 1
0
    ];
ITEM_PREFIXES = \
    [   "A","B","C","D","E","F","G","H","I",        
    ];
specificArgvList = \
    [   
    ];

for categoryName in CATEGORY_LIST:
    for itemPrefix in ITEM_PREFIXES:
        specificArgv = ["-c", categoryName, "-i", itemPrefix, "results/queryResults.%s.%s.tab.gz" % (itemPrefix, categoryName) ];
        specificArgvList.append(specificArgv);

prog = ProgressDots(1,1,"Processes",total=len(specificArgvList));
for specificArgv in specificArgvList:
    key = "%.3d.%s" % (prog.getCounts(), str.join("_",specificArgv) );
    key = key.replace("/","..");    # Don't want to use directory separator in temp log file name
    argv = list(baseArgv)
    argv.extend(specificArgv);
    log.info( "Starting: "+str.join(" ", argv) );
    logFile = stdOpen(LOG_FILE_TEMPLATE % key,"w")
    
    # Blocking sub-process call if want serial processes.
    #subprocess.call(argv, stderr=logFile);

    # Non-blocking subprocess.Popen to spawn parallel processes
    process = subprocess.Popen(argv, stderr=logFile);
    log.info("Process ID: %s" % process.pid);

    # Print command lines to effectively generate a .sh script
    #print "nohup",
Exemplo n.º 2
0
    'LABNTBNP', 'LABOSM', 'LABPALB', 'LABPCCG4O', 'LABPCCR', 'LABPCTNI',
    'LABPHOS', 'LABPLTS', 'LABPROCT', 'LABPT', 'LABPTEG', 'LABPTT', 'LABRESP',
    'LABRESPG', 'LABRETIC', 'LABSPLAC', 'LABSTLCX', 'LABSTOBGD', 'LABTNI',
    'LABTRFS', 'LABTRIG', 'LABTSH', 'LABUCR', 'LABUOSM', 'LABUA', 'LABUAPRN',
    'LABUPREG', 'LABURIC', 'LABURNA', 'LABURNC', 'LABUSPG'
]

labs_to_test = ["LABLACWB"]
#    [#"LABMGN, #"LABPTT", #"LABPHOS", "LABTNI", #"LABLACWB",
#    #"LABA1C", #"LABHEPAR", #"LABPLTS", "LABLAC","LABLIPS",
#    "LABTSH", "LABHCTX", "LABLDH", "LABK", "LABNTBNP",
#    "LABCRP", "LABCK", #"LABFER",
#    "LABUSPG"]

baseArgv = \
    ["python","LabChangePredictionPipeline.py"]

prog = ProgressDots(1, 1, "Processes", total=len(labs_to_test))
for lab_name in labs_to_test:
    key = "%.3d.%s" % (prog.getCounts(), lab_name)
    key = key.replace(
        "/",
        "..")  # Don't want to use directory separator in temp log file name
    argv = list(baseArgv)
    argv.append(lab_name)
    log.info("Starting: " + str.join(" ", argv))
    logFile = stdOpen(LOG_FILE_TEMPLATE % key, "w")

    p = subprocess.Popen(argv, stderr=logFile)
    # Blocking sub-process call. Can just run multiple instances to parallelize, or consider  subprocess.Popen objects instead
Exemplo n.º 3
0
def main(argv):
    medIdsByActiveRx = dict()
    medIdsByActiveRx['Buprenorphine'] = ('125498', '114474', '212560',
                                         '114475', '114467', '114468')
    medIdsByActiveRx['Fentanyl Patch'] = ('2680', '27908', '125379', '27905',
                                          '27906', '540107', '540638',
                                          '540101', '27907')
    medIdsByActiveRx['Methadone'] = ('540483', '4953', '4951', '10546',
                                     '214468', '15996', '41938', '4954',
                                     '4952')
    medIdsByActiveRx['Hydrocodone'] = ('3724', '4579', '8576', '8577', '8951',
                                       '10204', '12543', '13040', '14963',
                                       '14965', '14966', '17061', '17927',
                                       '19895', '20031', '28384', '29486',
                                       '29487', '34505', '34544', '35613',
                                       '117862', '204249', '206739')
    medIdsByActiveRx['Hydromorphone'] = ('2458', '2459', '2464', '2465',
                                         '3757', '3758', '3759', '3760',
                                         '3761', '10224', '10225', '10226',
                                         '10227', '200439', '201094', '201096',
                                         '201098', '540125', '540179',
                                         '540666')
    medIdsByActiveRx['Morphine'] = (
        '5167', '5168', '5172', '5173', '5176', '5177', '5178', '5179', '5180',
        '5183', '6977', '10655', '15852', '20908', '20909', '20910', '20914',
        '20915', '20919', '20920', '20921', '20922', '29464', '30138', '31413',
        '36140', '36141', '79691', '87820', '89282', '91497', '95244', '96810',
        '112562', '112564', '115335', '115336', '126132', '198543', '198544',
        '198623', '201842', '201848', '205011', '206731', '207949', '208896',
        '540182', '540300')
    medIdsByActiveRx['Oxycodone'] = ('5940', '5941', '6122', '6981', '10812',
                                     '10813', '10814', '14919', '16121',
                                     '16123', '16129', '16130', '19187',
                                     '26637', '26638', '27920', '27921',
                                     '27922', '27923', '28897', '28899',
                                     '28900', '31851', '31852', '31863',
                                     '31864', '92248', '126939', '200451',
                                     '203690', '203691', '203692', '203705',
                                     '203706', '203707', '204020', '204021')

    query = baseQuery()
    totalPatients = float(DBUtil.execute(query)[0][0])

    # print"Total Patients\t%s" % totalPatients

    # print"======= Dx Groups ===========";
    # print"Dx Group\tPt Count\tDx Rate";
    patientsPerDxGroup = dict()
    query = SQLQuery()
    query.addSelect("count(distinct prob.pat_id) as ptCount")
    query.addSelect("prob.%s" % DX_COL)
    query.addFrom("stride_problem_list as prob")
    query.addWhereOp("prob.noted_date", "<", LIMIT_DATE)
    query.addGroupBy("prob.%s" % DX_COL)
    if DOUBLE_DX:
        query.addSelect("prob2.%s" % DX_COL)
        query.addFrom("stride_problem_list as prob2")
        query.addWhere("prob.pat_id = prob2.pat_id")
        query.addWhereOp("prob2.noted_date", "<", LIMIT_DATE)
        query.addGroupBy("prob2.%s" % DX_COL)
    results = DBUtil.execute(query)
    for row in results:
        patientCount = row[0]
        dxGroup = row[1]
        if DOUBLE_DX:
            dxGroup = (dxGroup, row[2])
            # Composite tuple including second diagnosis
        patientsPerDxGroup[dxGroup] = patientCount

    progress = ProgressDots()
    for activeRx, medIds in medIdsByActiveRx.iteritems():
        query = baseQuery()
        query.addWhereIn("medication_id", medIds)

        # Baseline prescription rates
        rxPtCount = DBUtil.execute(query)[0][0]

        # print"====== Rx Counts ======";
        # print"Rx\tPt Count\tRx Rate";
        # print"%s\t%s\t%s" % (activeRx, rxPtCount, (rxPtCount/totalPatients));

        # print"======== Rx-Dx Association ========";
        statIds = (
            "P-Fisher",
            "P-YatesChi2",
            "oddsRatio",
            "relativeRisk",
            "interest",
            "LR+",
            "LR-",
            "sensitivity",
            "specificity",
            "PPV",
            "NPV",
        )
        if progress.getCounts() == 0:
            headerCols = [
                "Rx", "Dx", "RxDxCount", "RxCount", "DxCount", "Total"
            ]
            if DOUBLE_DX:
                headerCols.insert(2, "Dx2")
            headerCols.extend(statIds)
            headerStr = str.join("\t", headerCols)
            print headerStr

        # Query out per diagnosis group, but do as aggregate grouped query
        query.addSelect("prob.%s" % DX_COL)
        query.addFrom("stride_problem_list as prob")
        query.addWhere("med.pat_id = prob.pat_id")
        query.addWhereOp("prob.noted_date", "<", LIMIT_DATE)
        #query.addWhereIn("prob.%s" % DX_COL, dxKeys );
        query.addGroupBy("prob.%s" % DX_COL)
        if DOUBLE_DX:
            query.addSelect("prob2.%s" % DX_COL)
            query.addFrom("stride_problem_list as prob2")
            query.addWhere("prob.pat_id = prob2.pat_id")
            query.addWhereOp("prob2.noted_date", "<", LIMIT_DATE)
            query.addGroupBy("prob2.%s" % DX_COL)
        results = DBUtil.execute(query)
        for row in results:
            rxDxPtCount = row[0]
            dxGroup = row[1]
            if DOUBLE_DX:
                dxGroup = (dxGroup, row[2])
                # Composite tuple including second diagnosis
            dxPtCount = patientsPerDxGroup[dxGroup]

            conStats = ContingencyStats(rxDxPtCount, rxPtCount, dxPtCount,
                                        totalPatients)

            dataCells = [
                activeRx, dxGroup, rxDxPtCount, rxPtCount, dxPtCount,
                totalPatients
            ]
            if DOUBLE_DX:
                dataCells[1] = dxGroup[0]
                dataCells.insert(2, dxGroup[1])
            for statId in statIds:
                try:
                    dataCells.append(conStats[statId])
                except ZeroDivisionError:
                    dataCells.append(None)
            for i, value in enumerate(dataCells):
                dataCells[i] = str(value)
                # String conversion to allow for concatenation below
            dataStr = str.join("\t", dataCells)
            print dataStr
            progress.update()
    progress.printStatus()