def dictFromColumns(fN, keyValCols, keyValTypes, assumeUnique = True): '''Cols and Types are 2-tuple with key first, val second assumeUnique will raise Error True and non-unique value arises''' keyCasteFxn = cgLuckyCharmsFlat.getCasteFunction(keyValTypes[0]) valCasteFxn = cgLuckyCharmsFlat.getCasteFunction(keyValTypes[1]) keyCol = keyValCols[0] valCol = keyValCols[1] key_val = {} f = open(fN, 'r') for line in f: ls = line.strip().split('\t') key, val = keyCasteFxn(ls[keyCol]), valCasteFxn(ls[valCol]) if assumeUnique: if key in key_val: raise NameError("Mapping is not 1 to 1") else: key_val[key] = val else: key_val.setdefault(key, []).append(val) f.close() return key_val
def loadTranscriptionInfo(self, attNames): '''loads caste fxns, column positions, default values for each selected attribute''' for attName in attNames: dataField = getattr(self._dataClass, attName) self._attName_casteFromFxn[attName] = cgLuckyCharms.getCasteFunction(dataField.dataType, True) self._attName_casteToFxn[attName] = cgLuckyCharms.getCasteFunction(dataField.dataType, False) self._attName_columnPosition[attName] = dataField.dataSlot self._attName_defaultValue[attName] = dataField.dataDefault
def loadTranscriptionInfo(self): '''loads caste fxns, column positions, default values for each ALL attributes in format file''' for attName in self._attName__formatInfo: dataSlot, dataType, dataDefault = self._attName__formatInfo[attName] self._attName_casteFromFxn[attName] = cgLuckyCharms.getCasteFunction(dataType, True) self._attName_casteToFxn[attName] = cgLuckyCharms.getCasteFunction(dataType, False) self._attName_columnPosition[attName] = dataSlot self._attName_defaultValue[attName] = dataDefault
def loadTranscriptionInfo(self, attNames): '''loads caste fxns, column positions, default values for each selected attribute''' for attName in attNames: dataField = getattr(self._dataClass, attName) self._attName_casteFromFxn[ attName] = cgLuckyCharms.getCasteFunction( dataField.dataType, True) self._attName_casteToFxn[attName] = cgLuckyCharms.getCasteFunction( dataField.dataType, False) self._attName_columnPosition[attName] = dataField.dataSlot self._attName_defaultValue[attName] = dataField.dataDefault
def listFromColumns(fN, columns, valTypes, mergeType = 'lol', naToZero = False): '''multiple columns should either go into multiple lists (list of lists) or merge into the same list mergeType = lol or merge (list of list or merge)''' #checks if len(columns) != len(valTypes): raise NameError("Must provide Types for ALL columns") colNum_casteFxn = dict( (columns[i], cgLuckyCharmsFlat.getCasteFunction(valTypes[i])) for i in range(len(columns)) ) lol = [list() for i in columns] f = open(fN, 'r') for line in f: ls = line.strip().split('\t') for i, colNum in enumerate(columns): if ls[colNum] == "NA": ls[colNum] = "0" lol[i].append(colNum_casteFxn[colNum](ls[colNum])) f.close() if len(lol) == 1: return lol[0] elif mergeType == 'lol': return lol elif mergeType == 'merge': mergedList = [] [mergedList.extend(x) for x in lol] return mergedList else: raise NameError("WTH?!")
def listFromColumns(fN, columns, valTypes, mergeType = 'lol'): '''multiple columns should either go into multiple lists (list of lists) or merge into the same list mergeType = lol or merge (list of list or merge)''' #checks if len(columns) != len(valTypes): raise NameError("Must provide Types for ALL columns") colNum_casteFxn = dict( (i, cgLuckyCharmsFlat.getCasteFunction(valTypes[i])) for i in range(len(columns)) ) lol = [list() for i in range(len(columns))] f = open(fN, 'r') for line in f: ls = line.strip().split('\t') for i in columns: lol[i].append(ls[i]) f.close() if len(lol) == 1: return lol[0] elif mergeType == 'lol': return lol elif mergeType == 'merge': mergedList = [] [mergedList.extend(x) for x in lol] return mergedList else: raise NameError("WTH?!")
def loadFormatInfo(self): '''from column format file, get positions and such 0 is always the id so start from 1''' #handle quickFormat if type(self._dataFormatFN) == type([]): for formatLine in self._dataFormatFN: colNum, attName, theType, defValue = formatLine.strip().split(' ') colNum = int(colNum) #check for empty lists: if 'List' in theType and defValue == '.': defValue = list() else: defValue = cgLuckyCharms.getCasteFunction(theType)(defValue) self._attName__formatInfo[attName] = (colNum, theType, defValue) #handle file else: f = open(self._dataFormatFN, 'r') for i, line in enumerate(f): #blank line means skipped data if line.strip() == '': continue #get formatting info attName, theType, defValue = line.strip().split('\t') #check for empty lists: if 'List' in theType and defValue == '.': defValue = list() else: defValue = cgLuckyCharms.getCasteFunction(theType)(defValue) self._attName__formatInfo[attName] = (i + 1, theType, defValue) f.close()