Exemplo n.º 1
0
    def writeMatrix(self, matrixParams, outFilename):
        print datetime.now(), 'writing out data matrix to %s' % outFilename
        newFeatureName = "C:SAMP:" + self.configuration['fm_feature_type'].lower() + "Platform:::::" + self.configuration['fm_platform_type']
        newFeatureValue = self.configuration['techtype']
        tsvIO.addConstFeature (matrixParams, newFeatureName, newFeatureValue)

        tsvIO.writeTSV_dataMatrix (matrixParams, matrixParams['sortRowFlag'], matrixParams['sortColFlag'], outFilename)
        print datetime.now(), 'finished writing out data matrix\n'
Exemplo n.º 2
0
    def writeMatrix(self, matrixParams, outFilename):
        print datetime.now(), 'writing out data matrix to %s' % outFilename
        newFeatureName = "C:SAMP:" + self.configuration[
            'fm_feature_type'].lower(
            ) + "Platform:::::" + self.configuration['fm_platform_type']
        newFeatureValue = self.configuration['techtype']
        tsvIO.addConstFeature(matrixParams, newFeatureName, newFeatureValue)

        tsvIO.writeTSV_dataMatrix(matrixParams, matrixParams['sortRowFlag'],
                                  matrixParams['sortColFlag'], outFilename)
        print datetime.now(), 'finished writing out data matrix\n'
Exemplo n.º 3
0
def main():
    args = parseArgs()
    chr2data = {}
    chr2maxcoord = {}
    for index in range(1, 25):
        chrom = new_Level3_matrix_MM28may13.unifychr(str(index))
        chr2data[chrom] = new_Level3_matrix_MM28may13.AutoVivification()
        chr2maxcoord[chrom] = 0

    steplength = 1000
    sampleList = _readAllSnpDataFile(
        args.infile, args.include, chr2data, chr2maxcoord, steplength)

    cutFrac = 0.02
    resegment.NA_VALUE = -999999
    resegment.NEAR_ZERO = 0.0001
    segList, _, dataMatrix = _resegmentCNdata(
        sampleList, chr2data, chr2maxcoord, steplength, cutFrac)

    try:
        dataD = {}
        dataD['rowLabels'] = segList
        dataD['colLabels'] = sampleList
        dataD['dataMatrix'] = dataMatrix
        dataD['dataType'] = "%s:%s" % ("N", "CNVR")

        newFeatureName = "C:SAMP:" + "cnvrPlatform"
        newFeatureValue = "Genome_Wide_SNP_6"
        dataD = tsvIO.addConstFeature(dataD, newFeatureName, newFeatureValue)

        sortRowFlag = 0
        sortColFlag = 1
        tsvIO.writeTSV_dataMatrix(
            dataD, sortRowFlag, sortColFlag, args.outfile)
    except:
        print " FATAL ERROR: failed to write out any resegmented copy-number data "
Exemplo n.º 4
0
# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#

if __name__ == "__main__":

    if (1):
        if (len(sys.argv) == 3):
            inFile = sys.argv[1]
            outFile = sys.argv[2]
        else:
            print " "
            print " Usage: %s <input TSV file> <output TSV file> "
            print " "
            print " ERROR -- bad command line arguments "
            sys.exit(-1)

    print " "
    print " Running : %s %s %s " % (sys.argv[0], sys.argv[1], sys.argv[2])
    print " "
    print " "

    # now read in the input feature matrix ...
    dataD = tsvIO.readTSV(inFile)

    # add new custom features ...
    dataD = addCustomFeatures(dataD)

    # and write the matrix back out
    tsvIO.writeTSV_dataMatrix(dataD, 0, 0, outFile)

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 5
0
    numCol = len(dataMatrix[0])
    skipRowList = []
    for iRow in range(numRow):
        allNA = 1
        iCol = 0
        while (allNA == 1 and iCol < numCol):
            if (dataMatrix[iRow][iCol] != NA_VALUE):
                allNA = 0
            iCol += 1
        if (allNA):
            skipRowList += [iRow]

    if (len(skipRowList) > 0):
        print " after checking for all-NA features ... "
        print " number of rows to be skipped : ", len(skipRowList)
        print " --> number of rows remaining : ", (numRow - len(skipRowList))
        outD2 = tsvIO.filter_dataMatrix(outD, skipRowList, [])
        outD = outD2

    # print " "
    # print " calling writeTSV_dataMatrix ... ", outFile
    sortRowFlag = 0
    sortColFlag = 0
    tsvIO.writeTSV_dataMatrix(outD, sortRowFlag, sortColFlag, outFile)

    print " "
    print " DONE "
    print " "

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 6
0
    outD['colLabels'] = unionColLabels
    outD['dataMatrix'] = outMatrix

    sortRowFlag = 0  # seems best not to sort the rows
    sortColFlag = 0

    if (sortRowFlag):
        fTypeList.sort()
    outD['dataType'] = makeDataTypeString(dTypeList, fTypeList)

    if (pruneOrder != "NA"):
        print " "
        print " now calling pruneTSV_dataMatrix on the merged dataMatrix ... ", pruneOrder
        outD = tsvIO.pruneTSV_dataMatrix(outD, rowMaxNAfrac, colMaxNAfrac,
                                         pruneOrder)
    else:
        print " "
        print " NOT doing any pruning of the merged dataMatrix "

    print " "
    print ' (d) TIME ', time.asctime(time.localtime(time.time()))
    print " calling writeTSV_dataMatrix ... ", outFile
    tsvIO.writeTSV_dataMatrix(outD, sortRowFlag, sortColFlag, outFile)

    print " "
    print " DONE ", dTypeList, fTypeList
    print ' (e) TIME ', time.asctime(time.localtime(time.time()))
    print " "

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 7
0
if __name__ == "__main__":

    if (1):
        if (len(sys.argv) != 4):
            print " Usage : %s <input feature matrix> <output feature matrix> <featType> " % sys.argv[0]
            print "         to avoid filtering on featType, use ANY "
            sys.exit(-1)
        inFile = sys.argv[1]
        outFile = sys.argv[2]
        featType = sys.argv[3]

    print " "
    print " "
    print " ******************** "
    print "  in filterIdentFeat  "
    print " ******************** "

    inD = tsvIO.readTSV(inFile)
    rowLabels = inD['rowLabels']
    numRow = len(rowLabels)

    outD = removeIdenticalFeatures(inD, featType)

    tsvIO.writeTSV_dataMatrix(outD, 0, 0, outFile)

    print " FINISHED "
    print " "


# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 8
0
        else:
            print " "
            print " Usage: %s <input TSV file> <output TSV file> "
            print " "
            print " ERROR -- bad command line arguments "
            sys.exit(-1)

    print " "
    print " Running : %s  %s  %s  " % (sys.argv[0], sys.argv[1], sys.argv[2])
    print " "

    # read in the input feature matrix first, just in case there
    # actually isn't one yet available ...
    testD = tsvIO.readTSV(inFile)
    try:
        print len(testD['rowLabels']), len(testD['colLabels'])
    except:
        print " --> invalid / missing input feature matrix "
        sys.exit(-1)

    # we want to "check" for "deleted" METH probes
    if (do_summaryMeth):
        newD = summaryMeth(testD)
        ## tsvIO.writeTSV_dataMatrix ( newD, 0, 0, outFile )
        testD = newD

    # and finally write it out ...
    tsvIO.writeTSV_dataMatrix(testD, 0, 0, outFile)

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 9
0
                        fVal = float(tokenList[iC + 1])
                        dataMatrix[iR][iC] = fVal
                    except:
                        dataMatrix[iR][iC] = NA_VALUE
                        numNA += 1
                iR += 1
                print " iR=%d    numNA=%d " % (iR, numNA)

        dataD = {}
        dataD['rowLabels'] = rowLabels
        dataD['colLabels'] = hdrTokens[1:]
        dataD['dataMatrix'] = dataMatrix
        dataD['dataType'] = "N:MIRN"
        print ' writing out data matrix to ', outFilename

        newFeatureName = "C:SAMP:mirnPlatform:::::seq"
        newFeatureValue = zPlat
        dataD = tsvIO.addConstFeature(dataD, newFeatureName, newFeatureValue)

        sortRowFlag = 0
        sortColFlag = 0
        tsvIO.writeTSV_dataMatrix(
            dataD, sortRowFlag, sortColFlag, outFilename)

    print ' '
    print ' DONE !!! '
    print ' '


# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 10
0
        iCol = 0
        while (allNA == 1 and iCol < numCol):
            if (dataMatrix[iRow][iCol] != NA_VALUE):
                allNA = 0
            iCol += 1
        if (allNA):
            skipRowList += [iRow]

    if (len(skipRowList) > 0):
        print " after checking for all-NA features ... "
        print " number of rows to be skipped : ", len(skipRowList)
        print " --> number of rows remaining : ", (numRow - len(skipRowList))
        outD2 = tsvIO.filter_dataMatrix(outD, skipRowList, [])
        outD = outD2

    # set up sorting options ...
    sortRowFlag = 0
    sortColFlag = 0
    rowOrder = []
    colOrder = []

    # print " "
    # print " calling writeTSV_dataMatrix ... ", outFile
    tsvIO.writeTSV_dataMatrix(outD, sortRowFlag, sortColFlag, outFile, rowOrder, colOrder)

    print " "
    print " DONE "
    print " "

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 11
0
if __name__ == "__main__":

    if (len(sys.argv) != 3):
        print " Usage : %s <input TSV> <output TSV> " % sys.argv[0]
        sys.exit(-1)

    tsvNameIn = sys.argv[1]
    tsvNameOut = sys.argv[2]

    print " "
    print " ****************************************************************** "
    print " reading input file <%s> " % tsvNameIn
    tmpD = tsvIO.readTSV(tsvNameIn)

    if (len(tmpD) == 0):
        print " in addIndicators ... no input data ... nothing to do here ... "
        sys.exit(-1)

    # automatically generate indicator features for categorical features
    tmpD = addIndicatorFeatures(tmpD)

    tsvIO.writeTSV_dataMatrix(tmpD, 0, 0, tsvNameOut)

    print " "
    print " "
    print " FINISHED "
    print " "
    print " "

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 12
0
            outFile = sys.argv[2]
        else:
            print " "
            print " Usage: %s <input TSV file> <output TSV file> "
            print " "
            print " ERROR -- bad command line arguments "
            sys.exit(-1)

    print " "
    print " Running : %s  %s  %s  " % (sys.argv[0], sys.argv[1], sys.argv[2])
    print " "

    # read in the input feature matrix first, just in case there
    # actually isn't one yet available ...
    testD = tsvIO.readTSV(inFile)
    try:
        print len(testD['rowLabels']), len(testD['colLabels'])
    except:
        print " --> invalid / missing input feature matrix "
        sys.exit(-1)

    # and write it back out ...
    sortColFlag = 1  # sort the sample barcodes
    sortRowFlag = 0  # do NOT sort the feature names
    simpleNamesFlag = 1  # and write out 'simple' names

    tsvIO.writeTSV_dataMatrix ( testD, sortRowFlag, sortColFlag, \
                                outFile, [], [], simpleNamesFlag )

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 13
0
            outFile = sys.argv[2]
        else:
            print " "
            print " Usage: %s <input TSV file> <output TSV file> "
            print " "
            print " ERROR -- bad command line arguments "
            sys.exit(-1)

    print " "
    print " Running : %s  %s  %s  " % (sys.argv[0], sys.argv[1], sys.argv[2])
    print " "

    # read in the input feature matrix first, just in case there
    # actually isn't one yet available ...
    testD = tsvIO.readTSV(inFile)
    try:
        print len(testD['rowLabels']), len(testD['colLabels'])
    except:
        print " --> invalid / missing input feature matrix "
        sys.exit(-1)

    # and write it back out ...
    sortColFlag = 1     # sort the sample barcodes
    sortRowFlag = 0     # do NOT sort the feature names
    simpleNamesFlag = 1 # and write out 'simple' names

    tsvIO.writeTSV_dataMatrix ( testD, sortRowFlag, sortColFlag, \
                                outFile, [], [], simpleNamesFlag )

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 14
0
    if (len(sys.argv) != 3):
        print " Usage : %s <input TSV> <output TSV> " % sys.argv[0]
        print " ERROR -- bad command line arguments "
        sys.exit(-1)

    tsvNameIn = sys.argv[1]
    tsvNameOut = sys.argv[2]

    print " "
    print " ****************************************************************** "
    print " reading input file <%s> " % tsvNameIn
    tmpD = tsvIO.readTSV(tsvNameIn)

    if (len(tmpD) == 0):
        print " in addIndicators ... no input data ... nothing to do here ... "
        sys.exit(-1)

    # automatically generate indicator features for categorical features
    tmpD = addIndicatorFeatures(tmpD)

    tsvIO.writeTSV_dataMatrix(tmpD, 0, 0, tsvNameOut)

    print " "
    print " "
    print " FINISHED "
    print " "
    print " "

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 15
0
        while (allNA == 1 and iCol < numCol):
            if (dataMatrix[iRow][iCol] != NA_VALUE):
                allNA = 0
            iCol += 1
        if (allNA):
            skipRowList += [iRow]

    if (len(skipRowList) > 0):
        print " after checking for all-NA features ... "
        print " number of rows to be skipped : ", len(skipRowList)
        print " --> number of rows remaining : ", (numRow - len(skipRowList))
        outD2 = tsvIO.filter_dataMatrix(outD, skipRowList, [])
        outD = outD2

    # set up sorting options ...
    sortRowFlag = 0
    sortColFlag = 0
    rowOrder = []
    colOrder = []

    # print " "
    # print " calling writeTSV_dataMatrix ... ", outFile
    tsvIO.writeTSV_dataMatrix(outD, sortRowFlag, sortColFlag, outFile,
                              rowOrder, colOrder)

    print " "
    print " DONE "
    print " "

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
Exemplo n.º 16
0
    # check that the feature names are still unique ...
    print " --> verify that the feature names are unique ... "
    (newLabels, rmList) = tsvIO.uniqueFeatureLabels(annotD['rowLabels'],
                                                    annotD['dataMatrix'])
    print "     back from tsvIO.uniqueFeatureLabels "

    # quick sanity check that labels are still what I think they are ...
    for ii in range(len(newLabels)):
        if (not (newLabels[ii] == annotD['rowLabels'][ii])):
            print " "
            print " BAILING !!! ", newLabels[ii], annotD['rowLabels'][ii]
            print " "
            sys.exit(-1)

    # remove any 'extra' features that need removing ...
    if (len(rmList) > 0):
        print "     --> need to remove these rows ", rmList
        tmpD = tsvIO.filter_dataMatrix(annotD, rmList, [])
        annotD = tmpD

    # and write the matrix back out
    print " --> calling tsvIO.writeTSV_dataMatrix ... "
    tsvIO.writeTSV_dataMatrix(annotD, 0, 0, outFile)

    print " "
    print " DONE "
    print " "

# -#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#