示例#1
0
def addBMI ( allClinDict ):

    bmiVec = []
    catVec = []

    weightKey = findKey ( allClinDict, "weight" )
    heightKey = findKey ( allClinDict, "height" )

    for ii in range(len(allClinDict[weightKey])):
        w = allClinDict[weightKey][ii]
        h = allClinDict[heightKey][ii]
        try:
            bmi = float(w) / ( float(h/100.) * float(h/100.) )
            bmiCat = assignBMIcategory ( bmi )
            ## print w, h, bmi, bmiCat
            catVec += [ bmiCat ]
            bmiVec += [ bmi ]
        except:
            ## print w, h, "NA"
            ## if ( w != "NA" ): print " weight is not NA ??? "
            ## if ( h != "NA" ): print " height is not NA ??? "
            bmiVec += [ "NA" ]
            catVec += [ "NA" ]

    allClinDict["N:CLIN:BMI:::::"] = bmiVec
    allClinDict["C:CLIN:BMIcat:::::"] = catVec

    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict["N:CLIN:BMI:::::"] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount

    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict["C:CLIN:BMIcat:::::"] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    return ( allClinDict )
示例#2
0
def checkMenopause ( allClinDict ):

    print " in checkMenopause ... "
    print " "

    newVec = []

    menopauseKey = findKey ( allClinDict, "menopause_status" )
    ageKey = findKey ( allClinDict, "age_at_initial_pathologic_diagnosis" )

    for ii in range(len(allClinDict[ageKey])):
        m = allClinDict[menopauseKey][ii]
        a = allClinDict[ageKey][ii]
        if ( m.startswith("Pre_") ):
            newVec += [ "Pre" ]
        elif ( m.startswith("Post_") ):
            newVec += [ "Post" ]
        elif ( a >= 50 ):
            newVec += [ "Post" ]
        else:
            newVec += [ "Pre" ]

    allClinDict["C:CLIN:menopause50:::::"] = newVec

    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict["C:CLIN:menopause50:::::"] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    return ( allClinDict )
示例#3
0
def checkClinicalStage ( allClinDict ):

    print " in checkClinicalStage ... "
    print " "

    newStage = []

    stageKey = findKey ( allClinDict, "clinical_stage" )
    TstageKey = findKey ( allClinDict, "pathologic_T" )
    barcodeKey = findKey ( allClinDict, "bcr_patient_barcode" )

    numP = len(allClinDict[stageKey])

    for ii in range(numP):

        if ( allClinDict[TstageKey][ii].startswith("T1a") ):
            allClinDict[TstageKey][ii] = "T1b1"

        curStage = allClinDict[stageKey][ii]
        if ( curStage.startswith("IV") ):
            newStage += [ "III,IV" ]
        elif ( curStage.startswith("III") ):
            newStage += [ "III,IV" ]
        elif ( curStage.startswith("II") ):
            newStage += [ "II" ]
        elif ( curStage.startswith("I") ):
            newStage += [ "I" ]
        else:
            newStage += [ "NA" ]

        if ( 0 ):
            print " "
            print " "
            print ii, allClinDict[barcodeKey][ii], allClinDict[stageKey][ii], allClinDict[TstageKey][ii]

    ## as of 22sep ... there is stage info for 240 patients, and the counts
    ## look like this:
    ##          70  IB1
    ##          35  IB
    ##          34  IB2
    ##          33  IIIB
    ##          26  IIB
    ##           7  IIA2
    ##           7  IIA
    ##           5  IVB
    ##          etc
    ## after grouping, we get 147 stage I (61%), 49 stage II (20%), and 44 stage III,IV (18%)

    keyString = "C:CLIN:clinStage:::::"
    allClinDict[keyString] = newStage
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    return ( allClinDict )
示例#4
0
def checkCancerStatus ( allClinDict ):

    print " in checkCancerStatus ... "
    print " "

    newSite = []

    nteKey = findKey ( allClinDict, "new_tumor_event_after_initial_treatment" )
    siteKey = findKey ( allClinDict, "new_neoplasm_event_occurrence_anatomic_site" )
    typeKey = findKey ( allClinDict, "new_neoplasm_event_type" )
    textKey = findKey ( allClinDict, "new_neoplasm_occurrence_anatomic_site_text" )
    days2nteKey = findKey ( allClinDict, "days_to_new_tumor_event_after_initial_treatment" )

    numP = len(allClinDict[nteKey])

    for ii in range(numP):
        if ( allClinDict[nteKey][ii] == "YES" ):

            siteStr = allClinDict[siteKey][ii]

            if ( 1 ):
                if ( siteStr == "Other_specify" ):
                    siteStr = allClinDict[textKey][ii]
                elif ( siteStr == "NA" ):
                    siteStr = allClinDict[textKey][ii]

            if ( siteStr != "NA" ): siteStr = siteStr.lower()
            newSite += [ siteStr ]

            if ( 0 ):
                print " "
                print ii
                ## print " site : ", allClinDict[siteKey][ii]
                print " type : ", allClinDict[typeKey][ii]
                ## print " text : ", allClinDict[textKey][ii]
                print " siteStr : ", siteStr
                print " days : ", allClinDict[days2nteKey][ii]

        else:
            newSite += [ "NA" ]

    ## the types of things I'm seeing are:
    ##     type: Distant_Metastasis
    ##          --> then 'site' sometimes gives the location, or else says "Other_specify"
    ##              in which case the 'text' might give the location

    ## also note that the "days_to_nte" ranges from 62 to 2893 (the lowest numbers are 62, 77, 93, 94, 153, 178...)

    keyString = "C:CLIN:nte_site:::::"
    allClinDict[keyString] = newSite
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    return ( allClinDict )
示例#5
0
 def fixUpFeatureNames(self, allClinDict):
 
     print " "
     print " in fixUpFeatureNames ... "
 
     keyList = allClinDict.keys()
     newClinDict = {}
 
     for aKey in keyList:
 
         ## if the feature name already looks like B:SAMP:etc or N:CLIN:etc
         ## then we don't do anything
         if ( aKey[1]==":" and aKey[6]==":" ):
             newName = aKey
 
         else:
             try:
                 featType = self.getFeatTypeByName ( aKey )
                 print featType
                 info = miscClin.lookAtKey ( allClinDict[aKey] )
                 print aKey
                 print info
                 if ( info[0] == "NOMINAL" ):
                     if ( info[3] == 2 ):
                         newName = "B:" + featType + ":" + aKey + ":::::"
                     else:
                         newName = "C:" + featType + ":" + aKey + ":::::"
                 elif ( info[0] == "NUMERIC" ):
                     newName = "N:" + featType + ":" + aKey + ":::::"
                 else:
                     sys.exit(-1)
             except:
                 traceback.print_exc(limit=5)
                 print " ERROR in fixUpFeatureNames ??? key not found <%s> " % aKey
                 newName = aKey
 
             newClinDict[newName] = allClinDict[aKey]
 
     return ( newClinDict )
示例#6
0
    def fixUpFeatureNames(self, allClinDict):

        print " "
        print " in fixUpFeatureNames ... "

        keyList = allClinDict.keys()
        newClinDict = {}

        for aKey in keyList:

            ## if the feature name already looks like B:SAMP:etc or N:CLIN:etc
            ## then we don't do anything
            if (aKey[1] == ":" and aKey[6] == ":"):
                newName = aKey

            else:
                try:
                    featType = self.getFeatTypeByName(aKey)
                    print featType
                    info = miscClin.lookAtKey(allClinDict[aKey])
                    print aKey
                    print info
                    if (info[0] == "NOMINAL"):
                        if (info[3] == 2):
                            newName = "B:" + featType + ":" + aKey + ":::::"
                        else:
                            newName = "C:" + featType + ":" + aKey + ":::::"
                    elif (info[0] == "NUMERIC"):
                        newName = "N:" + featType + ":" + aKey + ":::::"
                    else:
                        sys.exit(-1)
                except:
                    traceback.print_exc(limit=5)
                    print " ERROR in fixUpFeatureNames ??? key not found <%s> " % aKey
                    newName = aKey

                newClinDict[newName] = allClinDict[aKey]

        return (newClinDict)
示例#7
0
def addAgeSplits ( allClinDict ):

    print " in addAgeSplits ... "
    print " "

    ageKey = findKey ( allClinDict, "age_at_initial_pathologic_diagnosis" )
    numP = len(allClinDict[ageKey])

    youngMax = [ 30, 35, 40, 45, 50, 40, 40, 35 ]
    oldMin   = [ 30, 35, 40, 45, 50, 45, 50, 55 ]
    numC = len(youngMax)
    newVecs = [0] * numC
    for iC in range(numC):
        newVecs[iC] = ["NA"] * numP

    for ii in range(numP):
        a = allClinDict[ageKey][ii]
        if ( a != "NA" ):
            for iC in range(numC):
                if ( a <= youngMax[iC] ):
                    newVecs[iC][ii] = "young"
                elif ( a > oldMin[iC] ):
                    newVecs[iC][ii] = "old"

    for iC in range(numC):
        if ( youngMax[iC] == oldMin[iC] ):
            keyString = "B:CLIN:ageSplit_%d:::::" % ( youngMax[iC] )
        else:
            keyString = "B:CLIN:ageSplit_%d_%d:::::" % ( youngMax[iC], oldMin[iC] )
        print keyString
        allClinDict[keyString] = newVecs[iC]
        ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
        print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
        print labelList

    return ( allClinDict )
示例#8
0
def writeARFF(allClinDict, bestKeyOrder, progName, outName):

    if (not outName.endswith(".arff")):
        outName += ".arff"

    print " "
    print " writing output arff file ", outName

    fh = file(outName, 'w')

    # before we get started we want to know which features we are going to write
    # out and their data types ...
    keyTypes = [0] * len(bestKeyOrder)

    # this is not being decided by lookAtKey anymore ...
    useFlags = [1] * len(bestKeyOrder)

    labelLists = [0] * len(bestKeyOrder)
    labelCounts = [0] * len(bestKeyOrder)
    numSkip = 0
    numNumeric = 0
    numNominal = 0
    for ii in range(len(bestKeyOrder)):
        aKey = bestKeyOrder[ii]
        (keyTypes[ii], nCount, nNA, nCard, labelLists[ii],
         labelCounts[ii]) = miscClin.lookAtKey(allClinDict[aKey])
        # print aKey, useFlags[ii], keyTypes[ii], nCount, nNA, nCard
        if (useFlags[ii]):
            if (keyTypes[ii] == 'NUMERIC'):
                numNumeric += 1
            if (keyTypes[ii] == 'NOMINAL'):
                numNominal += 1
        else:
            numSkip += 1

    aKey = bestKeyOrder[0]
    numK = len(allClinDict[aKey])

    # first we write some comments at the top ...
    now = datetime.datetime.now()
    fh.write('%% TCGA clinical data extracted from xml files\n')
    fh.write('%% %s\n' % str(progName))
    # TODO: replease with configurable username!
    fh.write('%% [email protected]\n')
    fh.write('%% %s\n' % now.strftime("%Y-%m-%d %H:%M"))
    fh.write('%% %d numeric fields, %d nominal fields, %d skipped\n' %
             (numNumeric, numNominal, numSkip))
    fh.write('%% %d examples\n' % numK)
    fh.write('\n')

    # then comes the header information:
    fh.write('@RELATION clinical-test\n')
    fh.write('\n')

    # the ARFF format allows for several different types of attributes:
    # numeric, nominal, string, date, and relational
    # --> for the moment we will only use numeric and nominal

    # so we need to be able to determine, for every key, whether it
    # is 'numeric' or 'nominal'
    for ii in range(len(bestKeyOrder)):
        aKey = bestKeyOrder[ii]
        if (useFlags[ii]):
            fh.write('@ATTRIBUTE %s ' % aKey)
            if (keyTypes[ii] == 'NUMERIC'):
                minVal = 999999
                maxVal = -999999
                for jj in range(len(allClinDict[aKey])):
                    try:
                        xVal = float(allClinDict[aKey][jj])
                        if (minVal > xVal):
                            minVal = xVal
                        if (maxVal < xVal):
                            maxVal = xVal
                    except:
                        doNothing = 1
                fh.write('NUMERIC %% [%d, %d] %d\n' %
                         (int(minVal + 0.499), int(maxVal + 0.499), naCount))
            elif (keyTypes[ii] == 'NOMINAL'):
                fh.write('{')
                for jj in range(len(labelLists[ii])):
                    aLabel = labelLists[ii][jj]
                    fh.write('"%s"' % aLabel)
                    if (jj < (len(labelLists[ii]) - 1)):
                        fh.write(', ')
                fh.write('} %% [ ')
                totCount = 0
                for jj in range(len(labelLists[ii])):
                    fh.write("%d" % labelCounts[ii][jj])
                    totCount += labelCounts[ii][jj]
                    if (jj < (len(labelLists[ii]) - 1)):
                        fh.write(", ")
                naCount = numK - totCount
                fh.write(' ] + %d\n' % naCount)

    fh.write('\n')
    fh.write('@DATA\n')

    # and now we can write the data:
    for kk in range(numK):
        # print " kk=%d ... " % kk
        outLine = ''
        for ii in range(len(bestKeyOrder)):
            if (useFlags[ii]):
                aKey = bestKeyOrder[ii]
                # print aKey
                # print allClinDict[aKey][kk]
                if (noData(allClinDict[aKey][kk])):
                    outLine += '?, '
                else:
                    if (keyTypes[ii] == 'NUMERIC'):
                        try:
                            outLine += ('%s, ' % str(allClinDict[aKey][kk]))
                        except:
                            print ' ERROR ??? ', aKey, kk, allClinDict[aKey][kk]
                            sys.exit(-1)
                    else:
                        try:
                            outLine += ('"%s", ' %
                                        allClinDict[aKey][kk].upper())
                        except:
                            outLine += ('"%s", ' % str(allClinDict[aKey][kk]))
                            # print " WARNING: this field is supposedly not numeric??? ", aKey, kk, allClinDict[aKey][kk]
                            # print allClinDict[aKey]
                            # sys.exit(-1)
        if (outLine.endswith(", ")):
            outLine = outLine[:-2]

        outLine += ("\t\t%% ")
        try:
            outLine += ("%s" % allClinDict['bcr_patient_barcode'][kk])
        except:
            try:
                outLine += ("%s" % allClinDict['C:CLIN:bcr_patient_barcode:::::'][kk])
            except:
                print " ERROR in writeARFF "
                print allClinDict.keys()
                sys.exit(-1)
        fh.write("%s\n" % outLine)

    fh.write('\n')
    fh.close()
示例#9
0
def makeMergedDx ( allClinDict ):

    print " in makeMergedDx ... "
    print " "

    mergeDx = []
    epcReview = []

    histTypeKey = findKey ( allClinDict, "histological_type" )
    epcDxKey = findKey ( allClinDict, "C:CLIN:Dx_EPC" )

    barKey = findKey ( allClinDict, "bcr_patient_barcode" )

    numP = len(allClinDict[histTypeKey])

    for ii in range(numP):

        if ( 1 ):
            print " "
            print " "
            print " patient index ", ii, allClinDict[barKey][ii], allClinDict[histTypeKey][ii], allClinDict[epcDxKey][ii]

        ## expected possible values for the histological_type field:
        ##      206 Cervical_Squamous_Cell_Carcinoma
        ##       23 Endocervical_Type_of_Adenocarcinoma
        ##        6 Mucinous_Adenocarcinoma_of_Endocervical_Type
        ##        5 Adenosquamous
        ##        4 Endometrioid_Adenocarcinoma_of_Endocervix
        ##        4 Endocervical_Adenocarcinoma_of_the_Usual_Type
        ##       70 NA

        ## expected values for Dx_EPC field:
        ##      4 Adenosquamous
        ##     27 Endocervical_Adeno
        ##    123 NA
        ##     99 Squamous

        if ( allClinDict[epcDxKey][ii] != "NA" ): 
            epcReview += [ "TRUE" ]
        else:
            epcReview += [ "FALSE" ]

        if ( allClinDict[epcDxKey][ii] != "NA" ):
            mergeDx += [ allClinDict[epcDxKey][ii] ]
        else:
            if ( allClinDict[histTypeKey][ii] == "Cervical_Squamous_Cell_Carcinoma" ):
                mergeDx += [ "Squamous" ]
            elif ( allClinDict[histTypeKey][ii] == "Endocervical_Type_of_Adenocarcinoma" ):
                mergeDx += [ "Adenocarcinoma" ]
            elif ( allClinDict[histTypeKey][ii] == "Mucinous_Adenocarcinoma_of_Endocervical_Type" ):
                mergeDx += [ "Adenocarcinoma" ]
            elif ( allClinDict[histTypeKey][ii] == "Adenosquamous" ):
                mergeDx += [ "Adenosquamous" ]
            elif ( allClinDict[histTypeKey][ii] == "Endometrioid_Adenocarcinoma_of_Endocervix" ):
                mergeDx += [ "Adenocarcinoma" ]
            elif ( allClinDict[histTypeKey][ii] == "Endocervical_Adenocarcinoma_of_the_Usual_Type" ):
                mergeDx += [ "Adenocarcinoma" ]
            elif ( allClinDict[histTypeKey][ii] == "NA" ):
                mergeDx += [ "NA" ]
            else:
                print " ERROR ??? we should not be here ... ", ii, allClinDict[barKey][ii], \
                    allClinDict[histTypeKey][ii], allClinDict[epcDxKey][ii]

        ## just double-checking terminology one more time ...
        if ( mergeDx[-1] == "Endocervical_Adeno" ):
            mergeDx[-1] = "Adenocarcinoma"
            
    keyString = "C:CLIN:Dx_merged:::::"
    allClinDict[keyString] = mergeDx
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    keyString = "C:CLIN:EPC_review:::::"
    allClinDict[keyString] = epcReview
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    return ( allClinDict )
示例#10
0
def checkLymphNodes_HystDx ( allClinDict ):

    print " in checkLymphNodes_HystDx ... "
    print " "

    newHyst = []
    newDxM = []
    numLNpos = []
    tfLNpos = []

    ## here we have 138 'radical', 6 'simple', and 5 'other'
    hysTypeKey = findKey ( allClinDict, "hysterectomy_performed_type" )
    hysTextKey = findKey ( allClinDict, "hysterectomy_performed_text" )
    dxMeth1Key = findKey ( allClinDict, "initial_pathologic_diagnosis_method" )
    dxMeth2Key = findKey ( allClinDict, "init_pathology_dx_method_other" )

    barKey = findKey ( allClinDict, "bcr_patient_barcode" )
    LNEcountKey = findKey ( allClinDict, "lymph_node_examined_count" )
    LNEposHEkey = findKey ( allClinDict, "number_of_lymphnodes_positive_by_he" )
    LNEposIHCkey = findKey ( allClinDict, "number_of_lymphnodes_positive_by_ihc" )

    numP = len(allClinDict[hysTypeKey])

    for ii in range(numP):

        if ( 0 ):
            print " "
            print " "
            print " patient index ", ii, allClinDict[barKey][ii]

        if ( allClinDict[hysTypeKey][ii] == "NA" and allClinDict[hysTextKey][ii] == "NA" ):
            newHyst += [ "NO_or_NA" ]
        else:
            newHyst += [ "YES" ]

        ## here we want to figure out what method was used for diagnosis ...
        newDxM += [ "NA" ]
        dxMethod = "NA"

        if ( allClinDict[hysTypeKey][ii].lower().find("hysterect") >= 0 ):
            if ( allClinDict[hysTypeKey][ii].lower().find("radical") >= 0 ):
                dxMethod = "radical_hysterectomy"
            elif ( allClinDict[hysTypeKey][ii].lower().find("simple") >= 0 ):
                dxMethod = "simple_hysterectomy"
            elif ( allClinDict[hysTypeKey][ii].lower().find("total_abd") >= 0 ):
                dxMethod = "total_abdominal_hysterectomy"
        if ( allClinDict[hysTextKey][ii].lower().find("hysterect") >= 0 ):
            if ( allClinDict[hysTextKey][ii].lower().find("radical") >= 0 ):
                dxMethod = "radical_hysterectomy"
            elif ( allClinDict[hysTextKey][ii].lower().find("simple") >= 0 ):
                dxMethod = "simple_hysterectomy"
            elif ( allClinDict[hysTextKey][ii].lower().find("total_abd") >= 0 ):
                dxMethod = "total_abdominal_hysterectomy"

        if ( dxMethod == "NA" ):
            if ( allClinDict[dxMeth1Key][ii].lower().find("cone") >= 0 ):
                dxMethod = "cone_biopsy"
        if ( dxMethod == "NA" ):
            if ( allClinDict[dxMeth2Key][ii].lower().find("cone") >= 0 ):
                dxMethod = "cone_biopsy"

        if ( dxMethod == "NA" ):
            if ( allClinDict[dxMeth1Key][ii].lower().find("biops") >= 0 ):
                dxMethod = "biopsy"
        if ( dxMethod == "NA" ):
            if ( allClinDict[dxMeth2Key][ii].lower().find("biops") >= 0 ):
                dxMethod = "biopsy"

        if ( dxMethod == "NA" ):
            if ( allClinDict[hysTypeKey][ii] != "NA" ): dxMethod = "other"
            if ( allClinDict[hysTextKey][ii] != "NA" ): dxMethod = "other"
            if ( allClinDict[dxMeth1Key][ii] != "NA" ): dxMethod = "other"
            if ( allClinDict[dxMeth2Key][ii] != "NA" ): dxMethod = "other"
            if ( dxMethod == "other" ): print " setting dxMethod to OTHER ", ii, \
                        allClinDict[hysTypeKey][ii], allClinDict[hysTextKey][ii], \
                        allClinDict[dxMeth1Key][ii], allClinDict[dxMeth2Key][ii]
        
        newDxM[-1] = dxMethod


        numPos = 0
        if ( allClinDict[LNEposHEkey][ii] != "NA" ):
            numPos += allClinDict[LNEposHEkey][ii]
        if ( allClinDict[LNEposIHCkey][ii] != "NA" ):
            numPos += allClinDict[LNEposIHCkey][ii]

        if ( (allClinDict[LNEposHEkey][ii] == "NA") and (allClinDict[LNEposIHCkey][ii] == "NA") ):
            numLNpos += [ "NA" ]
            tfLNpos  += [ "NA" ]
        else:
            numLNpos += [ numPos ]
            if ( numPos == 0 ):
                tfLNpos += [ "FALSE" ]
            else:
                tfLNpos += [ "TRUE" ]

        if ( 0 ):
            if ( allClinDict[hysTypeKey][ii] == "NA" ):
                if ( allClinDict[hysTextKey][ii] != "NA" ):
                    print " text filled out but not type "
            if ( allClinDict[hysTextKey][ii] == "NA" ):
                if ( allClinDict[hysTypeKey][ii] != "NA" ):
                    print " type filled out but not text "
    
            print " hysTypeKey  : ", allClinDict[hysTypeKey][ii]
            print " hysTextKey  : ", allClinDict[hysTextKey][ii]
            print " lymph nodes : ", allClinDict[LNEcountKey][ii], \
                                     allClinDict[LNEposHEkey][ii], \
                                     allClinDict[LNEposIHCkey][ii]

    print " done working through each patient ... "
    print len(newHyst), len(newDxM), len(tfLNpos), len(numLNpos)
    print " "

    keyString = "C:CLIN:hysterectomy:::::"
    allClinDict[keyString] = newHyst
    print " (a) ", keyString, newHyst
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    keyString = "C:CLIN:dx_method:::::"
    allClinDict[keyString] = newDxM
    print " (b) ", keyString, newDxM
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    keyString = "C:CLIN:LNposTF:::::"
    allClinDict[keyString] = tfLNpos
    print " (c) ", keyString, tfLNpos
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    keyString = "N:CLIN:numLNpos:::::"
    allClinDict[keyString] = numLNpos
    print " (3) ", keyString, numLNpos
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount

    print " DONE DONE DONE "

    return ( allClinDict )
示例#11
0
def checkTumorStatus ( allClinDict ):

    print " in checkTumorStatus ... "
    print " "

    newStatus1 = []
    newStatus2 = []

    statusKey = findKey ( allClinDict, "person_neoplasm_cancer_status" )
    days2fupKey = findKey ( allClinDict, "days_to_last_followup" )
    vitalKey = findKey ( allClinDict, "vital_status" )
    days2deathKey = findKey ( allClinDict, "days_to_death" )

    numP = len(allClinDict[statusKey])

    for ii in range(numP):

        days2last = -1
        if ( allClinDict[days2fupKey][ii] != "NA" ):
            days2last = allClinDict[days2fupKey][ii]
        if ( allClinDict[days2deathKey][ii] != "NA" ):
            days2last = max ( allClinDict[days2deathKey][ii], days2last )

        if ( 0 ):
            print " "
            print " "
            print ii
            print " status     : ", allClinDict[statusKey][ii]
            print " vital      : ", allClinDict[vitalKey][ii]
            ## print " days2fup   : ", allClinDict[days2fupKey][ii]
            ## print " days2death : ", allClinDict[days2deathKey][ii]
            print " days2last  : ", days2last
            if ( allClinDict[vitalKey][ii] == "Alive" ):
                if ( days2last < 90 ):
                    print "         Alive and less than 90 days ", allClinDict[statusKey][ii]

        newStatus1 += [ "NA" ]
        if ( allClinDict[statusKey][ii] == "TUMOR_FREE" and days2last >= 90 ):
            newStatus1[-1] = "TUMOR_FREE"
        elif ( allClinDict[statusKey][ii] == "WITH_TUMOR" and days2last >= 90 ):
            newStatus1[-1] = "WITH_TUMOR"

        newStatus2 += [ "NA" ]
        if ( allClinDict[statusKey][ii] == "TUMOR_FREE" and allClinDict[vitalKey][ii] == "Alive" ):
            newStatus2[-1] = "Alive_woTumor"
        elif ( allClinDict[statusKey][ii] == "WITH_TUMOR" and allClinDict[vitalKey][ii] == "Dead" ):
            newStatus2[-1] = "Dead_wTumor"

    ## as of 13aug ... there are 57 patients who are alive and have less than 90 days of follow-up
    ## of these:        24 are "tumor_free"
    ##                  17 are "NA"
    ##                  16 are "with_tumor"

    keyString = "C:CLIN:tumorStatus1:::::"
    allClinDict[keyString] = newStatus1
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    keyString = "C:CLIN:tumorStatus2:::::"
    allClinDict[keyString] = newStatus2
    ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[keyString] )
    print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
    print labelList

    return ( allClinDict )
示例#12
0
    # was this just for debugging purposes ???
    fList = getFeatList ( featureList )

    for aF in fList:
        print aF

        for aKey in allClinDict.keys():

            if ( aKey[1] == ":" ):
                aTokens = aKey.split(':')
                tKey = aTokens[2]
            else:
                tKey = aKey

            if ( aF == tKey ):
                ( keyType, nCount, naCount, cardCount, labelList, labelCount ) = miscClin.lookAtKey ( allClinDict[aKey] )
                print " %s  N=%d  NA=%d  not-NA=%d  card=%d " % ( keyType, nCount, naCount, (nCount-naCount), cardCount ), labelCount
                if ( keyType != "NUMERIC" ): print labelList
                print " "
                print " "


    # now we need to do some massaging and computing ...
    try:
        allClinDict = addBMI ( allClinDict )
    except:
        print " addBMI function failed "

    try:
        allClinDict = checkMenopause ( allClinDict )
    except: