def traverse(self): """ self.noHeader: #2012.8.10 2012.1.9 """ newHeader = [] key2dataLs = {} #key is the keyColumn, dataLs corresponds to the sum of each column from valueColumnLs delimiter = None noOfDataColumnsFromPriorFiles = 0 for inputFname in self.inputFnameLs: if not os.path.isfile(inputFname): if self.exitNonZeroIfAnyInputFileInexistent: sys.exit(3) else: continue reader = None try: inputFile = utils.openGzipFile(inputFname) delimiter = figureOutDelimiter(inputFile) reader = MatrixFile(inputFile=inputFile, delimiter=delimiter) except: sys.stderr.write('Except type: %s\n'%repr(sys.exc_info())) import traceback traceback.print_exc() valueColumnLs = [] try: header = reader.next() self.handleNewHeader(header, newHeader, self.keyColumnLs, valueColumnLs, keyColumnSet=self.keyColumnSet) if self.noHeader: #2012.8.10 inputFile.seek(0) reader = MatrixFile(inputFile=inputFile, delimiter=delimiter) except: #in case something wrong (i.e. file is empty) sys.stderr.write('Except type: %s\n'%repr(sys.exc_info())) import traceback traceback.print_exc() if reader is not None and valueColumnLs: visitedKeySet = set() for row in reader: try: self.handleValueColumns(row, key2dataLs=key2dataLs, keyColumnLs=self.keyColumnLs, \ valueColumnLs=valueColumnLs, noOfDataColumnsFromPriorFiles=noOfDataColumnsFromPriorFiles, \ visitedKeySet=visitedKeySet) except: #in case something wrong (i.e. file is empty) sys.stderr.write('Ignore this row: %s.\n'%repr(row)) sys.stderr.write('Except type: %s\n'%repr(sys.exc_info())) import traceback traceback.print_exc() del reader #append empty data to keys who are not present in this current "reader" file totalKeySet = set(key2dataLs.keys()) unvisitedKeySet = totalKeySet - visitedKeySet for key in unvisitedKeySet: for i in valueColumnLs: key2dataLs[key].append('') noOfDataColumnsFromPriorFiles += len(valueColumnLs) if self.noHeader: #2012.8.10 newHeader = None returnData = PassingData(key2dataLs=key2dataLs, delimiter=delimiter, header=newHeader) return returnData
def traverse(self): """ 2012.1.9 """ newHeader = [] key2dataLs = { } #key is the keyColumn, dataLs corresponds to the sum of each column from valueColumnLs delimiter = None for inputFname in self.inputFnameLs: if not os.path.isfile(inputFname): if self.exitNonZeroIfAnyInputFileInexistent: sys.exit(3) else: continue reader = None try: inputFile = utils.openGzipFile(inputFname) delimiter = figureOutDelimiter(inputFile) reader = MatrixFile(inputFile=inputFile, delimiter=delimiter) except: sys.stderr.write('Except type: %s\n' % repr(sys.exc_info())) import traceback traceback.print_exc() try: #if isCSVReader: header = reader.next() #else: # header = inputFile.readline().strip().split() #whatever splits them self.handleNewHeader(header, newHeader, self.keyColumnLs, self.valueColumnLs, keyColumnSet=self.keyColumnSet) if self.noHeader: #2012.8.10 inputFile.seek(0) reader = MatrixFile(inputFile=inputFile, delimiter=delimiter) except: #in case something wrong (i.e. file is empty) sys.stderr.write('Except type: %s\n' % repr(sys.exc_info())) import traceback traceback.print_exc() if reader is not None: for row in reader: #if not isCSVReader: # row = row.strip().split() try: self.handleValueColumns( row, key2dataLs=key2dataLs, keyColumnLs=self.keyColumnLs, valueColumnLs=self.valueColumnLs) except: #in case something wrong (i.e. file is empty) sys.stderr.write('Ignore this row: %s.\n' % repr(row)) sys.stderr.write('Except type: %s\n' % repr(sys.exc_info())) import traceback traceback.print_exc() del reader if self.noHeader: #2012.8.10 newHeader = None returnData = PassingData(key2dataLs=key2dataLs, delimiter=delimiter, header=newHeader) return returnData