Exemplo n.º 1
0
def readReferenceMatrix(self, storage, filename):
    from pysgpp import DataVector, DataMatrix
    # read reference matrix
    try:
        fd = tools.gzOpen(filename, 'r')
    except IOError as e:
        fd = None

    if not fd:
        fd = tools.gzOpen('tests/' + filename, 'r')

    dat = fd.read().strip()
    fd.close()
    dat = dat.split('\n')
    dat = [l.strip().split(None) for l in dat]

    # right number of entries?
    self.assertEqual(storage.getSize(), len(dat))
    self.assertEqual(storage.getSize(), len(dat[0]))

    m_ref = DataMatrix(len(dat), len(dat[0]))
    for i in range(len(dat)):
        for j in range(len(dat[0])):
            m_ref.set(i, j, float(dat[i][j]))

    return m_ref
Exemplo n.º 2
0
def readDataVector(filename):
    from pysgpp import DataVector

    try:
        fin = tools.gzOpen(filename, 'r')
    except IOError, e:
        fin = None
Exemplo n.º 3
0
def readReferenceMatrix(self, storage, filename):
    from pysgpp import DataVector, DataMatrix
    # read reference matrix
    try:
        fd = tools.gzOpen(filename, 'r')
    except IOError, e:
        fd = None
Exemplo n.º 4
0
def readDataVector(filename):
    from pysgpp import DataVector

    try:
        fin = tools.gzOpen(filename, 'r')
    except IOError, e:
        fin = None
Exemplo n.º 5
0
def readReferenceMatrix(self, storage, filename):
    from pysgpp import DataVector, DataMatrix
    # read reference matrix
    try:
        fd = tools.gzOpen(filename, 'r')
    except IOError, e:
        fd = None
Exemplo n.º 6
0
def readDataVector(filename):
    from pysgpp import DataVector

    try:
        fin = tools.gzOpen(filename, 'r')
    except IOError as e:
        fin = None

    if not fin:
        fin = tools.gzOpen('tests/' + filename, 'r')

    data = []
    classes = []
    hasclass = False

    # get the different section of ARFF-File
    for line in fin:
        sline = line.strip().lower()
        if sline.startswith("%") or len(sline) == 0:
            continue

        if sline.startswith("@data"):
            break

        if sline.startswith("@attribute"):
            value = sline.split()
            if value[1].startswith("class"):
                hasclass = True
            else:
                data.append([])

    # read in the data stored in the ARFF file
    for line in fin:
        sline = line.strip()
        if sline.startswith("%") or len(sline) == 0:
            continue

        values = sline.split(",")
        if hasclass:
            classes.append(float(values[-1]))
            values = values[:-1]
        for i in range(len(values)):
            data[i].append(float(values[i]))

    # cleaning up and return
    fin.close()
    return {"data": data, "classes": classes, "filename": filename}
Exemplo n.º 7
0
def getminmax(filename, separator=None):
    """Computes min and max for each parameter"""
    fd = tools.gzOpen(filename, 'r')

    mmax = [-10e10 for d in range(dim)]
    mmin = [10e10 for d in range(dim)]
    for line in fd.readlines():
        line = line.split(separator)
        for d in range(len(line) - 1):
            mmax[d] = max(float(line[d]), mmax[d])
            mmin[d] = min(float(line[d]), mmin[d])
    fd.close()

    return (mmin, mmax)
Exemplo n.º 8
0
def getminmax(filename, separator=None):
    """Computes min and max for each parameter"""
    fd = tools.gzOpen(filename, 'r')

    mmax = [-10e10 for d in range(dim)]
    mmin = [10e10 for d in range(dim)]
    for line in fd.readlines():
        line = line.split(separator)
        for d in range(len(line)-1):
            mmax[d] = max(float(line[d]), mmax[d])
            mmin[d] = min(float(line[d]), mmin[d])
    fd.close()

    return (mmin, mmax)
Exemplo n.º 9
0
        p[0] = random.uniform(0, 100)
        p[1] = random.uniform(40 * math.pi, 560 * math.pi)
        p[2] = random.uniform(0, 1)
        p[3] = random.uniform(1, 11)
        eps = random.normalvariate(0.0, 0.1)
        # $ \atan \left( \left( x_1 x_2 - (x_1 x_3)^{-1} \right) / x_0 \right) + \epsilon $
        p[4] = math.atan((p[1] * p[2] - 1.0 / (p[1] * p[3])) / p[0]) + eps
        X.setRow(i, p)
else:
    sys.exit(1)

if options.outfile and ".csv" in options.outfile:
    from numpy import savetxt
    #header = ','.join(['x%d'%i for i in xrange(X.getNcols()-1)] + ['classes'])
    savetxt(options.outfile, X.array(), fmt='%.12f', delimiter=',')
    sys.exit(1)
elif options.outfile:
    fd = tools.gzOpen(options.outfile, 'w')
else:
    fd = sys.stdout

fd.write("""@RELATION "%s"\n\n""" % (namestring))
for d in range(X.getNcols() - 1):
    fd.write("""@ATTRIBUTE x%d NUMERIC\n""" % (d))
fd.write("""@ATTRIBUTE class NUMERIC\n\n@Data\n""")
for i in xrange(X.getNrows()):
    X.getRow(i, p)
    fd.write(','.join([str(p[d]) for d in range(X.getNcols())]) + "\n")
if options.outfile:
    fd.close()
Exemplo n.º 10
0
        # Sets the column in m
        m.setColumn(i, erg)

    return m


def readReferenceMatrix(self, storage, filename):
    from pysgpp import DataVector, DataMatrix
    # read reference matrix
    try:
        fd = tools.gzOpen(filename, 'r')
    except IOError, e:
        fd = None

    if not fd:
        fd = tools.gzOpen('tests/' + filename, 'r')

    dat = fd.read().strip()
    fd.close()
    dat = dat.split('\n')
    dat = map(lambda l: l.strip().split(None), dat)

    # right number of entries?
    self.assertEqual(storage.size(), len(dat))
    self.assertEqual(storage.size(), len(dat[0]))

    m_ref = DataMatrix(len(dat), len(dat[0]))
    for i in xrange(len(dat)):
        for j in xrange(len(dat[0])):
            m_ref.set(i, j, float(dat[i][j]))
Exemplo n.º 11
0
        #Sets the column in m
        m.setColumn(i, erg)

    return m


def readReferenceMatrix(self, storage, filename):
    from pysgpp import DataVector, DataMatrix
    # read reference matrix
    try:
        fd = tools.gzOpen(filename, 'r')
    except IOError, e:
        fd = None

    if not fd:
        fd = tools.gzOpen('tests/' + filename, 'r')

    dat = fd.read().strip()
    fd.close()
    dat = dat.split('\n')
    dat = map(lambda l: l.strip().split(None), dat)

    # right number of entries?
    self.assertEqual(storage.size(), len(dat))
    self.assertEqual(storage.size(), len(dat[0]))

    m_ref = DataMatrix(len(dat), len(dat[0]))
    for i in xrange(len(dat)):
        for j in xrange(len(dat[0])):
            m_ref.set(i, j, float(dat[i][j]))
Exemplo n.º 12
0
N = 10000

X = DataMatrix(N, 11)
p = DataVector(11)

for i in xrange(N):
    for d in range(10):
        if d == 0 or d == 1:
            p[d] = random.uniform(-0.5, 0.5)
        else:
            p[d] = random.random()

    eps = random.normalvariate(0.0, 0.1)
    p[10] = p[0]**2 * p[1]**2 + eps

    p[0] += 0.5
    p[1] += 0.5

    X.setRow(i, p)

fd = tools.gzOpen("parabola.10000_test.arff", 'w')

fd.write("""@RELATION "Parabola %s"\n\n""" % (N))
for d in range(X.getNcols() - 1):
    fd.write("""@ATTRIBUTE x%d NUMERIC\n""" % (d))
fd.write("""@ATTRIBUTE class NUMERIC\n\n@DATA\n""")
for i in xrange(X.getNrows()):
    X.getRow(i, p)
    fd.write(','.join([str(p[d]) for d in range(X.getNcols())]) + "\n")

fd.close()
Exemplo n.º 13
0
                  help="Don't use colors. Only for --eps")
parser.add_option("--dotwidth",
                  dest="dotwidth",
                  action="store",
                  type="int",
                  default=None,
                  help="Width of dots (gnuplot lw)")
(options, args) = parser.parse_args()

# check arguments
if not options.data:
    print "--data missing"
    parser.parse_args(['-h'])

# read data
fd = tools.gzOpen(options.data, 'r')
data = fd.readlines()
fd.close()

if not options.datsep.strip():
    options.datsep = None
    gnuplot_separator = ""
else:
    gnuplot_separator = 'set datafile separator "%s"' % (options.datsep)

if options.data[-3:] == ".gz":
    fname = "< zcat " + options.data
else:
    fname = options.data
# get dimension:
line = data[0].strip().split(options.datsep)
Exemplo n.º 14
0
                  help="Don't plot label")
parser.add_option("--monochrome", dest="monochrome", action="store_true", 
                  default=False, 
                  help="Don't use colors. Only for --eps")
parser.add_option("--dotwidth", dest="dotwidth", action="store", type="int",
                  default=None, 
                  help="Width of dots (gnuplot lw)")
(options,args)=parser.parse_args()

# check arguments
if not options.data:
    print "--data missing"
    parser.parse_args(['-h'])
    
# read data
fd = tools.gzOpen(options.data, 'r')
data = fd.readlines()
fd.close()

if not options.datsep.strip():
    options.datsep = None
    gnuplot_separator = ""
else:
    gnuplot_separator = 'set datafile separator "%s"' % (options.datsep)


if options.data[-3:] == ".gz":
    fname = "< zcat "+options.data
else:
    fname = options.data
# get dimension: