class LibSvmClassifier(BaseClassifier): OPTIONS = { "strSvmPrefix": Option("features", doc="Prefix for libSVM .model and .range files."), "hasZeroInsert": Option(True), #"clsWriter" : Option(LibSvmWriter, doc="Writer class to write feature data, e.g. in sparse libSVM format."), } SVM_MODEL = svm_model NORMALIZER = Normalizer NAME = 'libSVM' METHOD = 'Support Vector Machine' __attributes__ = ['oSvmModel', 'oNormalizer'] def __init__(self, strDataPath, oLogger, **options): super(LibSvmClassifier, self).__init__(**options) strModelFilePath = os.path.join( strDataPath, self.getOption('strSvmPrefix') + '.model') if os.path.isfile(strModelFilePath): self._oLogger.info("Loading libSVM model file '%s'." % strModelFilePath) self.oSvmModel = self.SVM_MODEL(strModelFilePath) else: raise IOError("libSVM model file '%s' not found!" % strModelFilePath) strRangeFilePath = os.path.join( strDataPath, self.getOption('strSvmPrefix') + '.range') if os.path.isfile(strRangeFilePath): self._oLogger.info("Loading libSVM range file '%s'." % strRangeFilePath) self.oNormalizer = self.NORMALIZER(strRangeFilePath) else: raise IOError("libSVM range file '%s' not found!" % strRangeFilePath) self.bProbability = True if self.oSvmModel.probability == 1 else False def normalize(self, lstSampleFeatureData): return self.oNormalizer.scale(lstSampleFeatureData) def __call__(self, lstSampleFeatureData): lstScaledFeatures = self.normalize(lstSampleFeatureData) if self.getOption('hasZeroInsert'): lstScaledFeatures = [0] + lstScaledFeatures if self.bProbability: fLabel, dctProb = self.oSvmModel.predict_probability( lstScaledFeatures) iLabel = int(fLabel) else: fLabel = self.oSvmModel.predict(lstScaledFeatures) iLabel = int(fLabel) dctProb = {iLabel: 1.0} return iLabel, dctProb
class ExcelMacroPlateMapper2(ExcelMacroPlateMapper): OPTIONS = dict(strFieldDelimiter = Option('\t'), dctColumns =\ Option({'GeneSymbol' : 'GeneSymbol', 'siRNAID' : 'siRNAID', #'Position' : 'WellOrder2', 'Position' : 'Position', 'Group' : 'Group', 'Well' : 'Well', }), dctOptionalColumns =\ Option({}), ) def __init__(self, strFilename, **dctOptions): super(ExcelMacroPlateMapper2, self).__init__(strFilename, **dctOptions) # normalize position infos self._oMappingTable.sort('WellOrder2') lstIdx = self._oMappingTable.selectIndices( lambda data: data['WellOrder2'] == 0) del self._oMappingTable[lstIdx] for iCnt, oRecord in enumerate(self._oMappingTable): oRecord['WellOrder2'] = iCnt + 1
class Plate96(_Plate): OPTIONS = \ dict(tplDims = Option((12,8)), tplPositionDistance = Option((9000,9000)), tplAxisOrientation = Option((1,-1)), tplSubWells = Option((1,1)), )
class Plate384(_Plate): OPTIONS = \ dict(tplDims = Option((24,16)), tplPositionDistance = Option((4500,4500)), tplAxisOrientation = Option((1,-1)), tplSubWells = Option((1,1)), )
class Labtek(_Plate): OPTIONS = \ dict(tplDims = Option((32,12)), tplPositionDistance = Option((1125,1125)), tplAxisOrientation = Option((1,-1)), tplSubWells = Option((1,1)), )
class NewExcelPlateMapper(ExcelMacroPlateMapper): OPTIONS = dict(dctColumns =\ Option({'GeneSymbol' : 'GeneSymbol', 'siRNAID' : 'siRNAID', 'Position' : 'Position', 'Group' : 'Group', 'Well' : 'Well', }), dctOptionalColumns =\ Option({}), )
class LabtekFgcz(Labtek): OPTIONS = \ dict(tplDims = Option((33,12)), ) def __init__(self, **dctOptions): super(LabtekFgcz, self).__init__(**dctOptions)
class QiagenPlateMapper(ExcelMacroPlateMapper): OPTIONS = dict(strFieldDelimiter = Option('\t'), dctColumns =\ Option({'GeneSymbol' : 'GeneSymbol', 'siRNAID' : 'siRNAID', 'PosX' : 'PosX', 'PosY' : 'PosY', 'Position' : 'Position', 'Group' : 'Group', 'Well' : 'Well', 'CatalogNo' : 'QiagenCatNo', }), dctOptionalColumns =\ Option({}), )
class ExcelMacroPlateMapper(_PlateMapper): OPTIONS = dict(strFieldDelimiter = Option('\t'), dctColumns =\ Option({'GeneSymbol' : 'GeneSymbol', 'siRNAID' : 'siRNAID', 'Position' : 'Stagepositionnr_', 'Group' : 'Group', }), dctOptionalColumns =\ Option({'FunctionalGroup' : 'FunctionalGroup', 'GroupAverage' : 'GroupAverage', 'Rank' : 'Rank', 'Site' : 'Site', 'Well' : 'Well', 'PosX' : 'Spotpos_X', 'PosY' : 'Spotpos_Y', }), ) def __init__(self, strFilename, **dctOptions): super(ExcelMacroPlateMapper, self).__init__(**dctOptions) self._oMappingTable = \ importTable(strFilename, fieldDelimiter=self.getOption('strFieldDelimiter')) #print self._oMappingTable def getOptionalColumns(self): lstColumns = [] for strColumn, strColumnMapped in self.getOption( 'dctOptionalColumns').iteritems(): if strColumnMapped in self._oMappingTable.getColumnKeys(): lstColumns.append(strColumn) return lstColumns def iterator(self): for oRecord in self._oMappingTable: dctRecord = {} for strColumn, strColumnMapped in self.getOption( 'dctColumns').iteritems(): dctRecord[strColumn] = oRecord[strColumnMapped] for strColumn, strColumnMapped in self.getOption( 'dctOptionalColumns').iteritems(): if strColumnMapped in oRecord.getColumnKeys(): dctRecord[strColumn] = oRecord[strColumnMapped] yield dctRecord def convertMapping(self, tplSubWells=(3, 3)): iBlockSize = tplSubWells[0] * tplSubWells[1] oTable = self._oMappingTable.copyStructure() print oTable for oRecord in self._oMappingTable: iPosX = oRecord['Spotpos_X'] iPosY = oRecord['Spotpos_Y'] iStagePos = oRecord['Stagepositionnr_'] iCnt = 1 for y in range(tplSubWells[1]): for x in range(tplSubWells[0]): oRecord['Stagepositionnr_'] = iBlockSize * (iStagePos - 1) + iCnt oRecord['Spotpos_X'] = iPosX oRecord['Spotpos_Y'] = iPosY oRecord['Site'] = iCnt oTable.append(oRecord) iCnt += 1 self._oMappingTable = oTable print oTable def exportMapping(self, strFilename, strFieldDelimiter='\t'): exportTable(self._oMappingTable, strFilename, fieldDelimiter=strFieldDelimiter, stringDelimiter='', useLabelsAsKeys=True)
class Plate96_4x4(Plate96): OPTIONS = \ dict(tplSubWells = Option((4,4)), )
class Plate96_3x3(Plate96): OPTIONS = \ dict(tplSubWells = Option((3,3)), )
class Plate96Norm(Plate96): OPTIONS = \ dict(tplAxisOrientation = Option((1,1)), )
class Plate384_4x3(Plate384): OPTIONS = \ dict(tplSubWells = Option((4,3)), )
class Labtek6(Labtek8): OPTIONS = \ dict(tplDims = Option((3,2)), )
class Labtek8(Labtek): OPTIONS = \ dict(tplDims = Option((4,2)), tplPositionDistance = Option((13000,11000)), )
class _Plate(OptionManager): OPTIONS = \ dict(strExperimentName = Option(None), lstSortOrder = Option(['Rank', 'Group', 'GeneSymbol', 'Position']), lstRanks = Option([('Group', 'Neg. Control', 1), ('Group', 'Pos. Control', 2), ('Group', 'Control', 3), ]), iDefaultRank = Option(1000), tplDims = Option(None), tplPositionDistance = Option(None), tplAxisOrientation = Option(None), tplSubWells = Option(None), ) def __init__(self, **dctOptions): super(_Plate, self).__init__(**dctOptions) lstColumns = [x[0] for x in _PlateMapper.COLUMNS] lstTypeCodes = [x[1] for x in _PlateMapper.COLUMNS] self._oTable = newTable(lstColumns, columnTypeCodes=lstTypeCodes, convertFromStrings=True) tplDims = self.getOption('tplDims') tplSubWells = self.getOption('tplSubWells') tplAxisOrientation = self.getOption('tplAxisOrientation') iPos = 1 for iCol in range(tplDims[0]): for iRow in range(tplDims[1]): iX = iCol + 1 if tplAxisOrientation[ 0] > 0 else tplDims[0] - iCol iY = iRow if tplAxisOrientation[ 1] > 0 else tplDims[1] - iRow - 1 strWell = "%s%02d" % (chr(ord('A') + iY), iX) for iSite in range(1, tplSubWells[0] * tplSubWells[1] + 1): self._oTable.append( data={ 'PosX': iCol, 'PosY': iRow, 'Site': iSite, 'Well': strWell, #'Position' : '%04d' % iPos, }) iPos += 1 self._oTable.index('GENE_SYMBOL', 'GeneSymbol') self._oTable.index('GENE_GROUP', 'Group') self._oTable.index('POSITION', 'Position') self._oTable.index('COORDINATES', ('PosX', 'PosY')) #print self._oTable def optimizePositions(self): from mito.ccore import tsp lstPosData = self.getCoordinates() lstIndices = tsp(lstPosData) aData = self._oTable.getData() self._oTable.setData(aData.take(lstIndices[:-1], axis=0)) self._oTable.appendColumn('OptimizedScanPosition', typeCode='i', data=range(1, len(lstPosData) + 1)) def makeMeander(self): t = self._oTable xDim, yDim = self.getOption('tplDims') alternate = False pos = 1 for x in range(xDim): if x == 0: yr = range(yDim) else: yr = range(1, yDim) if alternate: yr.reverse() alternate = not alternate for y in yr: idx = t.selectIndices( lambda rec: rec['PosX'] == x and rec['PosY'] == y)[0] t[idx]['Position'] = _PlateMapper.POSITION_CONVERT % pos pos += 1 for x in reversed(range(1, xDim)): idx = t.selectIndices( lambda rec: rec['PosX'] == x and rec['PosY'] == 0)[0] t[idx]['Position'] = _PlateMapper.POSITION_CONVERT % pos pos += 1 def importMapping(self, oPlateMapper, importSites=False, ignoreCoordinates=False): dctPositions = {} lstOptionalColumns = oPlateMapper.getOptionalColumns() for strColumn, strTypeCode in oPlateMapper.OPTIONAL_COLUMNS: if strColumn in lstOptionalColumns: self._oTable.appendColumn(strColumn, typeCode=strTypeCode) tplSubWells = self.getOption('tplSubWells') iSites = tplSubWells[0] * tplSubWells[1] for iCnt, dctRecord in enumerate(oPlateMapper.iterator()): #if dctRecord['Position'] in self._oTable['Position']: # raise AssertionError("Position '%s' occurred twice!" % dctRecord['Position']) if 'Well' in dctRecord: dctRecord['Well'] = '%s%02d' % (dctRecord['Well'][0].upper(), int(dctRecord['Well'][1:])) if not 'Rank' in dctRecord or dctRecord['Rank'] is None: dctRecord['Rank'] = self.getOption('iDefaultRank') lstRanks = self.getOption('lstRanks') if not lstRanks is None: for strKey, strValue, iRank in lstRanks: if dctRecord[strKey] == strValue: dctRecord['Rank'] = iRank break if type(dctRecord['Position']) == types.IntType: bConvertPosition = True iPos = dctRecord['Position'] else: bConvertPosition = False #print 'bConvertPosition', bConvertPosition if ignoreCoordinates: if bConvertPosition: dctRecord[ 'Position'] = oPlateMapper.POSITION_CONVERT % iPos self._oTable[iCnt].update(**dctRecord) else: if 'Well' in dctRecord: lstIdx = self._oTable.selectIndices( lambda data: data['Well'] == dctRecord['Well']) #print "moo", lstIdx elif 'PosX' in dctRecord and 'PosY' in dctRecord: lstIdx = self._oTable.selectIndices( lambda data: data['PosX'] == dctRecord[ 'PosX'] and data['PosY'] == dctRecord['PosY']) else: lstIdx = [iCnt] #print len(lstIdx), iSites, dctRecord['PosX'], dctRecord['PosY'] assert len(lstIdx) == iSites for iCnt2, iIdx in enumerate(lstIdx): if bConvertPosition: iConvPos = (iPos - 1) * iSites + iCnt2 + 1 else: iConvPos = iPos dctRecord[ 'Position'] = oPlateMapper.POSITION_CONVERT % iConvPos self._oTable[iIdx].update(**dctRecord) self._oTable.updateIndices() self._oTable.sort('Position') def exportPlate(self, strFilename, strFieldDelimiter='\t'): exportTable(self._oTable, strFilename, writeRowLabels=False, fieldDelimiter=strFieldDelimiter, stringDelimiter='') def exportScanPositions(self, oScanPositionWriter, bOptimize=False): if bOptimize and 'OptimizedScanPosition' not in self._oTable.getColumnKeys( ): self.optimizePositions() tplAxisOrientation = self.getOption('tplAxisOrientation') tplPositionDistance = self.getOption('tplPositionDistance') lstPositions = [(x * tplAxisOrientation[0] * tplPositionDistance[0], y * tplAxisOrientation[1] * tplPositionDistance[1]) for x, y in self.getPositions()] oScanPositionWriter.export(lstPositions) def getCoordinates(self, oTable=None, oFilter=None): if oTable is None: oTable = self._oTable return [(oRecord['PosX'], oRecord['PosY']) for oRecord in oTable if oFilter is None or oFilter(oRecord)] def selectCoordinates(self, strKey, strValue): oTable = self._oTable.select(lambda data: equal(data, strValue), strKey) return self.getCoordinates(oTable=oTable) def getPositions(self, oTable=None): if oTable is None: oTable = self._oTable return oTable['Position'] def selectPositions(self, strKey, strValue): oTable = self._oTable.select(lambda data: equal(data, strValue), strKey) return self.getPositions(oTable=oTable) def select(self, strKey, strValue): return self._oTable.select(lambda data: equal(data, strValue), strKey) def getDataFromPosition(self, oPos): return self._oTable.select(lambda data: equal(data, oPos), 'Position')[0] #def getDataFromScanPosition(self, oPos): # iPos = (int(oPos)-1) / self.getOption('') # return self._oTable.select(lambda data: equal(data, iPos), 'Position')[0] def getTourLength(self): tplLastP = None fLength = .0 lstPositions = self.getCoordinates() if len(lstPositions) > 0: lstPositions.append(lstPositions[0]) tplPositionDistance = self.getOption('tplPositionDistance') for iX, iY in lstPositions: iX *= tplPositionDistance[0] iY *= tplPositionDistance[1] if tplLastP is not None: fLength += math.sqrt( math.pow(iX - tplLastP[0], 2) + math.pow(iY - tplLastP[1], 2)) tplLastP = (iX, iY) return fLength def getEmptyArray(self, oValue=None): aData = empty( asarray(self.getOption('tplDims')) * asarray(self.getOption('tplSubWells'))) if oValue is not None: aData[:] = oValue return aData def getRecordIterator(self, lstScanPositions=None): if not self._oTable.hasIndex('AnnotationSort'): self._oTable.index('AnnotationSort', self.getOption('lstSortOrder')) for oRecord in self._oTable.getRecordIterator( indexName='AnnotationSort'): if (lstScanPositions is None or oRecord['Position'] in lstScanPositions): yield oRecord def getGroups(self): lstGeneGroups = self._oTable['Group'] return unique(lstGeneGroups) def getGeneSymbols(self): lstGeneSymbols = self._oTable['GeneSymbol'] return unique(lstGeneSymbols) def generateColors(self, key, colors): iter_colors = itertools.cycle(colors) keys = self._oTable[key] key_colors = [] for idx, name in enumerate(keys): if idx == 0 or keys[idx - 1] != name: color = iter_colors.next() key_colors.append(color) return key_colors @staticmethod def coordinateToLabel(oCoord, strAxis): #if type(oCoord) == type.ListType: if strAxis == 'x': lstLabels = ["%d" % (iV + 1) for iV in oCoord] elif strAxis == 'y': lstLabels = [chr(iV + 65) for iV in oCoord] return lstLabels @staticmethod def splitWell(strWell, bColumnToInt=True, bRowUppercase=True): strRow = strWell[0].upper() oCol = int(strWell[1:]) if bColumnToInt else strWell[1:] return strRow, oCol def __str__(self): return str(self._oTable) def __cmp__(self, oPlate): return cmp(self.getOption('strExperimentName'), oPlate.getOption('strExperimentName')) def sort(self, *args, **options): self._oTable.sort(*args, **options)