def smartReadMat(filename): """Read a matrix from a file, determining the file type automatically. This function returns a pair (matrix,fieldnames). The filetype is determined automatically from the extension. The following formats are currently supported: - '.amat' : PLearn ascii matrix format - '.pmat' : PLearn binary matrix format - '.csv' : Text-file comma-separated values format """ if filename.endswith(".amat"): arr, fieldnames = readAMat(filename) elif filename.endswith(".pmat"): pmat = PMat(filename) arr = pmat.getRows(0, pmat.length) fieldnames = pmat.fieldnames pmat.close() elif filename.endswith(".csv"): # Use CSV sniffer to detect presence of header. sniffer = csv.Sniffer() f = open(filename) sample = f.read(1000) has_header = sniffer.has_header(sample) f.seek(0) # Load csv into array csv_reader = csv.reader(f) if has_header: fieldnames = csv_reader.next() arr = numpy.numarray.array([[float(value) for value in fields] for fields in csv_reader]) if not has_header: # Generate fake fieldnames fieldnames = ['field%d' % (i + 1) for i in range(arr.shape[1])] f.close() else: raise ValueError, "Unrecognized file type for '%s'; valid extensions are: " \ "{'.amat', '.pmat', '.csv'}" % filename return arr, fieldnames
m = array([[stats[k][i] for i in range(self.width())] for k in sk]) _printMatrix(m, sk, self.fieldnames, os, pretty) print "\nCovariance Matrix:" _printMatrix(stats["COV"], self.fieldnames, self.fieldnames, os, pretty) print "\nCorrelation Matrix:" _printMatrix(stats["CORR"], self.fieldnames, self.fieldnames, os, pretty) if __name__ == "__main__": from plearn.vmat.readAMat import readAMat ut,fieldnames = readAMat( os.path.join( ppath.ppath('PLEARNDIR'), 'examples', 'data', 'test_suite', 'top_100_test.amat' ) ) sc = StatsCollector(fieldnames) sc.update(ut) sc.printStats(sys.stdout, False) print "\nAfter accumulating some more:" sc.update(ut) sc.printStats(sys.stdout, False) print "\nAfter forgetting:" sc.forget(fieldnames) sc.printStats(sys.stdout, False)
"N", "NMISSING", "NNONMISSING", "E", "V", "STDDEV", "STDERR", "SUM", "SUMSQ", "MIN", "ARGMIN", "MAX", "ARGMAX" ] m = array([[stats[k][i] for i in range(self.width())] for k in sk]) _printMatrix(m, sk, self.fieldnames, os, pretty) print "\nCovariance Matrix:" _printMatrix(stats["COV"], self.fieldnames, self.fieldnames, os, pretty) print "\nCorrelation Matrix:" _printMatrix(stats["CORR"], self.fieldnames, self.fieldnames, os, pretty) if __name__ == "__main__": from plearn.vmat.readAMat import readAMat ut, fieldnames = readAMat( os.path.join(ppath.ppath('PLEARNDIR'), 'examples', 'data', 'test_suite', 'top_100_test.amat')) sc = StatsCollector(fieldnames) sc.update(ut) sc.printStats(sys.stdout, False) print "\nAfter accumulating some more:" sc.update(ut) sc.printStats(sys.stdout, False) print "\nAfter forgetting:" sc.forget(fieldnames) sc.printStats(sys.stdout, False)