def readReferenceMatrix(self, storage, filename): from pysgpp import DataVector, DataMatrix # read reference matrix try: fd = tools.gzOpen(filename, 'r') except IOError as e: fd = None if not fd: fd = tools.gzOpen('tests/' + filename, 'r') dat = fd.read().strip() fd.close() dat = dat.split('\n') dat = [l.strip().split(None) for l in dat] # right number of entries? self.assertEqual(storage.getSize(), len(dat)) self.assertEqual(storage.getSize(), len(dat[0])) m_ref = DataMatrix(len(dat), len(dat[0])) for i in range(len(dat)): for j in range(len(dat[0])): m_ref.set(i, j, float(dat[i][j])) return m_ref
def readDataVector(filename): from pysgpp import DataVector try: fin = tools.gzOpen(filename, 'r') except IOError, e: fin = None
def readReferenceMatrix(self, storage, filename): from pysgpp import DataVector, DataMatrix # read reference matrix try: fd = tools.gzOpen(filename, 'r') except IOError, e: fd = None
def readDataVector(filename): from pysgpp import DataVector try: fin = tools.gzOpen(filename, 'r') except IOError as e: fin = None if not fin: fin = tools.gzOpen('tests/' + filename, 'r') data = [] classes = [] hasclass = False # get the different section of ARFF-File for line in fin: sline = line.strip().lower() if sline.startswith("%") or len(sline) == 0: continue if sline.startswith("@data"): break if sline.startswith("@attribute"): value = sline.split() if value[1].startswith("class"): hasclass = True else: data.append([]) # read in the data stored in the ARFF file for line in fin: sline = line.strip() if sline.startswith("%") or len(sline) == 0: continue values = sline.split(",") if hasclass: classes.append(float(values[-1])) values = values[:-1] for i in range(len(values)): data[i].append(float(values[i])) # cleaning up and return fin.close() return {"data": data, "classes": classes, "filename": filename}
def getminmax(filename, separator=None): """Computes min and max for each parameter""" fd = tools.gzOpen(filename, 'r') mmax = [-10e10 for d in range(dim)] mmin = [10e10 for d in range(dim)] for line in fd.readlines(): line = line.split(separator) for d in range(len(line) - 1): mmax[d] = max(float(line[d]), mmax[d]) mmin[d] = min(float(line[d]), mmin[d]) fd.close() return (mmin, mmax)
def getminmax(filename, separator=None): """Computes min and max for each parameter""" fd = tools.gzOpen(filename, 'r') mmax = [-10e10 for d in range(dim)] mmin = [10e10 for d in range(dim)] for line in fd.readlines(): line = line.split(separator) for d in range(len(line)-1): mmax[d] = max(float(line[d]), mmax[d]) mmin[d] = min(float(line[d]), mmin[d]) fd.close() return (mmin, mmax)
p[0] = random.uniform(0, 100) p[1] = random.uniform(40 * math.pi, 560 * math.pi) p[2] = random.uniform(0, 1) p[3] = random.uniform(1, 11) eps = random.normalvariate(0.0, 0.1) # $ \atan \left( \left( x_1 x_2 - (x_1 x_3)^{-1} \right) / x_0 \right) + \epsilon $ p[4] = math.atan((p[1] * p[2] - 1.0 / (p[1] * p[3])) / p[0]) + eps X.setRow(i, p) else: sys.exit(1) if options.outfile and ".csv" in options.outfile: from numpy import savetxt #header = ','.join(['x%d'%i for i in xrange(X.getNcols()-1)] + ['classes']) savetxt(options.outfile, X.array(), fmt='%.12f', delimiter=',') sys.exit(1) elif options.outfile: fd = tools.gzOpen(options.outfile, 'w') else: fd = sys.stdout fd.write("""@RELATION "%s"\n\n""" % (namestring)) for d in range(X.getNcols() - 1): fd.write("""@ATTRIBUTE x%d NUMERIC\n""" % (d)) fd.write("""@ATTRIBUTE class NUMERIC\n\n@Data\n""") for i in xrange(X.getNrows()): X.getRow(i, p) fd.write(','.join([str(p[d]) for d in range(X.getNcols())]) + "\n") if options.outfile: fd.close()
# Sets the column in m m.setColumn(i, erg) return m def readReferenceMatrix(self, storage, filename): from pysgpp import DataVector, DataMatrix # read reference matrix try: fd = tools.gzOpen(filename, 'r') except IOError, e: fd = None if not fd: fd = tools.gzOpen('tests/' + filename, 'r') dat = fd.read().strip() fd.close() dat = dat.split('\n') dat = map(lambda l: l.strip().split(None), dat) # right number of entries? self.assertEqual(storage.size(), len(dat)) self.assertEqual(storage.size(), len(dat[0])) m_ref = DataMatrix(len(dat), len(dat[0])) for i in xrange(len(dat)): for j in xrange(len(dat[0])): m_ref.set(i, j, float(dat[i][j]))
#Sets the column in m m.setColumn(i, erg) return m def readReferenceMatrix(self, storage, filename): from pysgpp import DataVector, DataMatrix # read reference matrix try: fd = tools.gzOpen(filename, 'r') except IOError, e: fd = None if not fd: fd = tools.gzOpen('tests/' + filename, 'r') dat = fd.read().strip() fd.close() dat = dat.split('\n') dat = map(lambda l: l.strip().split(None), dat) # right number of entries? self.assertEqual(storage.size(), len(dat)) self.assertEqual(storage.size(), len(dat[0])) m_ref = DataMatrix(len(dat), len(dat[0])) for i in xrange(len(dat)): for j in xrange(len(dat[0])): m_ref.set(i, j, float(dat[i][j]))
N = 10000 X = DataMatrix(N, 11) p = DataVector(11) for i in xrange(N): for d in range(10): if d == 0 or d == 1: p[d] = random.uniform(-0.5, 0.5) else: p[d] = random.random() eps = random.normalvariate(0.0, 0.1) p[10] = p[0]**2 * p[1]**2 + eps p[0] += 0.5 p[1] += 0.5 X.setRow(i, p) fd = tools.gzOpen("parabola.10000_test.arff", 'w') fd.write("""@RELATION "Parabola %s"\n\n""" % (N)) for d in range(X.getNcols() - 1): fd.write("""@ATTRIBUTE x%d NUMERIC\n""" % (d)) fd.write("""@ATTRIBUTE class NUMERIC\n\n@DATA\n""") for i in xrange(X.getNrows()): X.getRow(i, p) fd.write(','.join([str(p[d]) for d in range(X.getNcols())]) + "\n") fd.close()
help="Don't use colors. Only for --eps") parser.add_option("--dotwidth", dest="dotwidth", action="store", type="int", default=None, help="Width of dots (gnuplot lw)") (options, args) = parser.parse_args() # check arguments if not options.data: print "--data missing" parser.parse_args(['-h']) # read data fd = tools.gzOpen(options.data, 'r') data = fd.readlines() fd.close() if not options.datsep.strip(): options.datsep = None gnuplot_separator = "" else: gnuplot_separator = 'set datafile separator "%s"' % (options.datsep) if options.data[-3:] == ".gz": fname = "< zcat " + options.data else: fname = options.data # get dimension: line = data[0].strip().split(options.datsep)
help="Don't plot label") parser.add_option("--monochrome", dest="monochrome", action="store_true", default=False, help="Don't use colors. Only for --eps") parser.add_option("--dotwidth", dest="dotwidth", action="store", type="int", default=None, help="Width of dots (gnuplot lw)") (options,args)=parser.parse_args() # check arguments if not options.data: print "--data missing" parser.parse_args(['-h']) # read data fd = tools.gzOpen(options.data, 'r') data = fd.readlines() fd.close() if not options.datsep.strip(): options.datsep = None gnuplot_separator = "" else: gnuplot_separator = 'set datafile separator "%s"' % (options.datsep) if options.data[-3:] == ".gz": fname = "< zcat "+options.data else: fname = options.data # get dimension: