示例#1
0
def dictFromColumns(fN, keyValCols, keyValTypes, assumeUnique = True):
    '''Cols and Types are 2-tuple with key first, val second
    assumeUnique will raise Error True and non-unique value arises'''

    keyCasteFxn = cgLuckyCharmsFlat.getCasteFunction(keyValTypes[0])
    valCasteFxn = cgLuckyCharmsFlat.getCasteFunction(keyValTypes[1])
    keyCol = keyValCols[0]
    valCol = keyValCols[1]

    key_val = {}
    f = open(fN, 'r')
    for line in f:
        ls = line.strip().split('\t')
       
        key, val = keyCasteFxn(ls[keyCol]), valCasteFxn(ls[valCol])

        if assumeUnique:
            if key in key_val:
                raise NameError("Mapping is not 1 to 1")
            else:
                key_val[key] = val

        else:
            key_val.setdefault(key, []).append(val)
    f.close()
   
    return key_val
示例#2
0
def dictFromColumns(fN, keyValCols, keyValTypes, assumeUnique = True):
    '''Cols and Types are 2-tuple with key first, val second
    assumeUnique will raise Error True and non-unique value arises'''

    keyCasteFxn = cgLuckyCharmsFlat.getCasteFunction(keyValTypes[0])
    valCasteFxn = cgLuckyCharmsFlat.getCasteFunction(keyValTypes[1])
    keyCol = keyValCols[0]
    valCol = keyValCols[1]

    key_val = {}
    f = open(fN, 'r')
    for line in f:
        ls = line.strip().split('\t')
       
        key, val = keyCasteFxn(ls[keyCol]), valCasteFxn(ls[valCol])

        if assumeUnique:
            if key in key_val:
                raise NameError("Mapping is not 1 to 1")
            else:
                key_val[key] = val

        else:
            key_val.setdefault(key, []).append(val)
    f.close()
   
    return key_val
示例#3
0
	def loadTranscriptionInfo(self, attNames):
		'''loads caste fxns, column positions, default values for each selected attribute'''		

		for attName in attNames:
			dataField = getattr(self._dataClass, attName)
			self._attName_casteFromFxn[attName] = cgLuckyCharms.getCasteFunction(dataField.dataType, True)
			self._attName_casteToFxn[attName] = cgLuckyCharms.getCasteFunction(dataField.dataType, False)
			self._attName_columnPosition[attName] = dataField.dataSlot
			self._attName_defaultValue[attName] = dataField.dataDefault
示例#4
0
文件: cgNexus.py 项目: cgreer/cgNexus
    def loadTranscriptionInfo(self):
        '''loads caste fxns, column positions, default values for each ALL attributes in format file'''		

        for attName in self._attName__formatInfo:
            dataSlot, dataType, dataDefault = self._attName__formatInfo[attName]
            self._attName_casteFromFxn[attName] = cgLuckyCharms.getCasteFunction(dataType, True)
            self._attName_casteToFxn[attName] = cgLuckyCharms.getCasteFunction(dataType, False)
            self._attName_columnPosition[attName] = dataSlot
            self._attName_defaultValue[attName] = dataDefault
示例#5
0
    def loadTranscriptionInfo(self, attNames):
        '''loads caste fxns, column positions, default values for each selected attribute'''

        for attName in attNames:
            dataField = getattr(self._dataClass, attName)
            self._attName_casteFromFxn[
                attName] = cgLuckyCharms.getCasteFunction(
                    dataField.dataType, True)
            self._attName_casteToFxn[attName] = cgLuckyCharms.getCasteFunction(
                dataField.dataType, False)
            self._attName_columnPosition[attName] = dataField.dataSlot
            self._attName_defaultValue[attName] = dataField.dataDefault
示例#6
0
def listFromColumns(fN, columns, valTypes, mergeType = 'lol', naToZero = False):
    '''multiple columns should either go into multiple lists (list of lists)
    or merge into the same list
    mergeType = lol or merge (list of list or merge)'''


    #checks
    if len(columns) != len(valTypes):
        raise NameError("Must provide Types for ALL columns")
  
    
    colNum_casteFxn = dict( (columns[i], cgLuckyCharmsFlat.getCasteFunction(valTypes[i])) for i in range(len(columns)) )

    lol = [list() for i in columns] 
    f = open(fN, 'r')
    for line in f:
        ls = line.strip().split('\t')
        for i, colNum in enumerate(columns):
            if ls[colNum] == "NA":
                ls[colNum] = "0"
            lol[i].append(colNum_casteFxn[colNum](ls[colNum]))
    f.close()
    
    if len(lol) == 1:
        return lol[0]
    elif mergeType == 'lol':
        return lol
    elif mergeType == 'merge':
        mergedList = []
        [mergedList.extend(x) for x in lol]
        return mergedList
    else:
        raise NameError("WTH?!")
示例#7
0
def listFromColumns(fN, columns, valTypes, mergeType = 'lol'):
    '''multiple columns should either go into multiple lists (list of lists)
    or merge into the same list
    mergeType = lol or merge (list of list or merge)'''

    #checks
    if len(columns) != len(valTypes):
        raise NameError("Must provide Types for ALL columns")
  
    
    colNum_casteFxn = dict( (i, cgLuckyCharmsFlat.getCasteFunction(valTypes[i])) for i in range(len(columns)) )

    lol = [list() for i in range(len(columns))] 
    f = open(fN, 'r')
    for line in f:
        ls = line.strip().split('\t')
        for i in columns:
            lol[i].append(ls[i])
    f.close()
    
    if len(lol) == 1:
        return lol[0]
    elif mergeType == 'lol':
        return lol
    elif mergeType == 'merge':
        mergedList = []
        [mergedList.extend(x) for x in lol]
        return mergedList
    else:
        raise NameError("WTH?!")
示例#8
0
文件: cgNexus.py 项目: cgreer/cgNexus
    def loadFormatInfo(self):
        '''from column format file, get positions and such
        0 is always the id so start from 1'''

        #handle quickFormat
        if type(self._dataFormatFN) == type([]):
            for formatLine in self._dataFormatFN:
                colNum, attName, theType, defValue = formatLine.strip().split(' ')
                colNum = int(colNum)
            
                #check for empty lists:
                if 'List' in theType and defValue == '.':
                    defValue = list()
                else:
                    defValue = cgLuckyCharms.getCasteFunction(theType)(defValue)

                self._attName__formatInfo[attName] = (colNum, theType, defValue) 
        #handle file
        else:
            f = open(self._dataFormatFN, 'r')
            for i, line in enumerate(f):

                #blank line means skipped data
                if line.strip() == '': continue 

                #get formatting info
                attName, theType, defValue = line.strip().split('\t')

                #check for empty lists:
                if 'List' in theType and defValue == '.':
                    defValue = list()
                else:
                    defValue = cgLuckyCharms.getCasteFunction(theType)(defValue)

                self._attName__formatInfo[attName] = (i + 1, theType, defValue) 
            f.close()