示例#1
0
def rec2csv(rec_array, csv_file, formatd=None, **kwargs):
    """
    Convenience wrapper function on top of mlab.rec2csv to allow fixed-
    precision output to CSV files

    Parameters
    ----------
    rec_aray : numpy 1-d recarray
        The recarray to be written out

    csv_file : str
        CSV file name

    kwargs : dict
        Keyword arguments to pass through to mlab.rec2csv

    Returns
    -------
    None
    """

    # Get the formatd objects associated with each field
    formatd = mlab.get_formatd(rec_array, formatd)

    # For all FormatFloat objects, switch to FormatDecimal objects
    for (k, v) in formatd.iteritems():
        if isinstance(v, mlab.FormatFloat):
            formatd[k] = FormatDecimal()

    # Pass this specification to mlab.rec2csv
    mlab.rec2csv(rec_array, csv_file, formatd=formatd, **kwargs)
示例#2
0
def rec2csv(rec_array, csv_file, formatd=None, **kwargs):
    """
    Convenience wrapper function on top of mlab.rec2csv to allow fixed-
    precision output to CSV files

    Parameters
    ----------
    rec_aray : numpy 1-d recarray
        The recarray to be written out

    csv_file : str
        CSV file name

    kwargs : dict
        Keyword arguments to pass through to mlab.rec2csv

    Returns
    -------
    None
    """

    # Get the formatd objects associated with each field
    formatd = mlab.get_formatd(rec_array, formatd)

    # For all FormatFloat objects, switch to FormatDecimal objects
    for (k, v) in formatd.iteritems():
        if isinstance(v, mlab.FormatFloat):
            formatd[k] = FormatDecimal()

    # Pass this specification to mlab.rec2csv
    mlab.rec2csv(rec_array, csv_file, formatd=formatd, **kwargs)
def otherfunc(roifiles, subjects):
    import numpy as np
    from matplotlib.mlab import rec2csv
    import os
    first = np.recfromcsv(roifiles[0])
    numcons = len(first.dtype.names) - 1
    roinames = ["subject_id"] + first["roi"].tolist()
    formats = ['a20'] + ['f4' for f in roinames[1:]]
    confiles = []
    for con in range(0, numcons):
        recarray = np.zeros(len(roifiles),
                            dtype={
                                'names': roinames,
                                "formats": formats
                            })
        for i, file in enumerate(roifiles):
            recfile = np.recfromcsv(file)
            recarray["subject_id"][i] = subjects[i]
            for roi in roinames[1:]:
                value = recfile["con%02d" % (con + 1)][recfile['roi'] == roi]
                if value:
                    recarray[roi][i] = value
                else:
                    recarray[roi][i] = 999
        filename = os.path.abspath("grouped_con%02d.csv" % (con + 1))
        rec2csv(recarray, filename)
        confiles.append(filename)
    return confiles
示例#4
0
 def make_csv(self, out_csv, array):
     if out_csv is None:
         return 0
     else:
         print "Generating csv"
         mlab.rec2csv(array, out_csv)
         return 1
示例#5
0
def testR(d=simple(), size=500):

    X = random_from_categorical_formula(d, size)

    X = ML.rec_append_fields(X, 'response', np.random.standard_normal(size))
    fname = tempfile.mktemp()
    ML.rec2csv(X, fname)
    Rstr = '''
    data = read.table("%s", sep=',', header=T)
    cur.lm = lm(response ~ %s, data)
    COEF = coef(cur.lm)
    ''' % (fname, d.Rstr)
    rpy2.robjects.r(Rstr)
    remove(fname)
    nR = list(np.array(rpy2.robjects.r("names(COEF)")))

    nt.assert_true('(Intercept)' in nR)
    nR.remove("(Intercept)")
    nF = [str(t).replace("_", "").replace("*", ":") for t in d.formula.terms]

    nR = sorted([sorted(n.split(":")) for n in nR])

    nt.assert_true('1' in nF)
    nF.remove('1')

    nF = sorted([sorted(n.split(":")) for n in nF])
    nt.assert_equal(nR, nF)

    return d, X, nR, nF
示例#6
0
 def test_csv2rec_roundtrip(self):
     delta = datetime.timedelta(days=1)
     date0 = datetime.date(2007,12,16)
     date1 = date0 + delta
     date2 = date1 + delta
     delta = datetime.timedelta(days=1)
     datetime0 = datetime.datetime(2007,12,16,22,29,34,924122)
     datetime1 = datetime0 + delta
     datetime2 = datetime1 + delta
     ra=numpy.rec.fromrecords([
             (123, date0, datetime0, 1197346475.0137341, 'a,bc'),
             (456, date1, datetime1, 123.456, 'd\'ef'),
             (789, date2, datetime2, 0.000000001, 'ghi'),
                         ],
         names='intdata,datedata,datetimedata,floatdata,stringdata')
     fh = StringIO.StringIO()
     mlab.rec2csv( ra, fh )
     fh.seek(0)
     if 0:
         print 'CSV contents:','-'*40
         print fh.read()
         print '-'*40
         fh.seek(0)
     ra2 = mlab.csv2rec(fh)
     fh.close()
     for name in ra.dtype.names:
         if 0:
             print name, repr(ra[name]), repr(ra2[name])
             dt = ra.dtype[name]
             print 'repr(dt.type)',repr(dt.type)
         self.failUnless( numpy.all(ra[name] == ra2[name]) ) # should not fail with numpy 1.0.5
def otherfunc(roifiles, subjects):
    import numpy as np
    from matplotlib.mlab import rec2csv
    import os

    first = np.recfromcsv(roifiles[0])
    numcons = len(first.dtype.names) - 1
    roinames = ["subject_id"] + first["roi"].tolist()
    formats = ["a20"] + ["f4" for f in roinames[1:]]
    confiles = []
    for con in range(0, numcons):
        recarray = np.zeros(len(roifiles), dtype={"names": roinames, "formats": formats})
        for i, file in enumerate(roifiles):
            recfile = np.recfromcsv(file)
            recarray["subject_id"][i] = subjects[i]
            for roi in roinames[1:]:
                value = recfile["con%02d" % (con + 1)][recfile["roi"] == roi]
                if value:
                    recarray[roi][i] = value
                else:
                    recarray[roi][i] = 999
        filename = os.path.abspath("grouped_con%02d.csv" % (con + 1))
        rec2csv(recarray, filename)
        confiles.append(filename)
    return confiles
示例#8
0
def testR(d=simple(), size=500):

    X = random_from_categorical_formula(d, size)

    X = ML.rec_append_fields(X, 'response', np.random.standard_normal(size))
    fname = tempfile.mktemp()
    ML.rec2csv(X, fname)
    Rstr = '''
    data = read.table("%s", sep=',', header=T)
    cur.lm = lm(response ~ %s, data)
    COEF = coef(cur.lm)
    ''' % (fname, d.Rstr)
    rpy2.robjects.r(Rstr)
    remove(fname)
    nR = list(np.array(rpy2.robjects.r("names(COEF)")))

    nt.assert_true('(Intercept)' in nR)
    nR.remove("(Intercept)")
    nF = [str(t).replace("_","").replace("*",":") for t in d.formula.terms]
             
    nR = sorted([sorted(n.split(":")) for n in nR])

    nt.assert_true('1' in nF)
    nF.remove('1')

    nF = sorted([sorted(n.split(":")) for n in nF])
    nt.assert_equal(nR, nF)

    return d, X, nR, nF
示例#9
0
 def test_csv2rec_closefile(self):
     # If passed a file-like object, rec2csv should not close it.
     ra=numpy.rec.array([(123, 1197346475.0137341), (456, 123.456)],
                        dtype=[('a', '<i8'), ('b', '<f8')])
     fh = StringIO.StringIO()
     mlab.rec2csv( ra, fh )
     self.failIf( fh.closed )
示例#10
0
def makediffs(models = _allmodels, verbose = False, kpp = True):
    for model in models:
        model = os.path.splitext(os.path.basename(model))[0]
        if kpp:
            kppdat = csv2rec(os.path.join(model, model + '.dat'), delimiter = ' ')
        else:
            if model not in _modelconfigs:
                raise IOError('If KPP is not properly installed, you cannot run tests on mechanisms other than cbm4, saprc99, and small_strato.')
            kppdat = csv2rec(os.path.join(os.path.dirname(__file__), model + '.dat'), delimiter = ' ')
        pykppdat = csv2rec(os.path.join(model, model + '.pykpp.dat'), delimiter = ',')
        diff = pykppdat.copy()
        pct = pykppdat.copy()
        keys = set(kppdat.dtype.names).intersection(pykppdat.dtype.names)
        notkeys = set(pykppdat.dtype.names).difference(kppdat.dtype.names)
        notkeys.remove('t')
        for k in notkeys:
            diff[k] = np.nan
            pct[k] = np.nan
    
        for k in keys:
            diff[k] = pykppdat[k] - kppdat[k][:]
            pct[k] = diff[k] / kppdat[k][:] * 100
        diff['t'] = pykppdat['t'] - (kppdat['time'] * 3600. + pykppdat['t'][0])
        pct['t'] = diff['t'] / (kppdat['time'] * 3600. + pykppdat['t'][0]) * 100
        
        rec2csv(diff, os.path.join(model, model + '.diff.csv'), delimiter = ',')
        rec2csv(pct, os.path.join(model, model + '.pct.csv'), delimiter = ',')
示例#11
0
def main():
    print "initializing"
    ap.env.overwriteOutput = True
    ap.env.workspace = WORKSPACE
    
    ras = ["marginal_ag_land_ha",
            "favored_ag_land_ha",
            "ag_wateronly_constrained_ha",
            "ag_landonly_constrained_ha",
            "ag_both_constrained_ha"]
    lbls = ["mar_ha","fav_ha","water_ha","land_ha","both_ha"]
    
    ap.CheckOutExtension("SPATIAL")
    
    POLYS = "mena_plus"
    POLYFIELD = "name"
    
    recs = []
    for i in range(len(ras)):
        ap.sa.ZonalStatisticsAsTable(POLYS,POLYFIELD,ras[i],lbls[i],"DATA","SUM")
        recs.append(ap.da.TableToNumPyArray(lbls[i],[POLYFIELD,"SUM"]))
    
    outrecs = [recs[i]["SUM"] for i in range(len(recs))]
    outrecs.extend([recs[i][POLYFIELD] for i in range(len(recs))])
    mlab.rec2csv(np.rec.fromarrays(outrecs, names=lbls),OUTCSV)
    
    
    print "complete"
示例#12
0
def rewrite_spec(subj, run, root = "/home/jtaylo/FIAC-HBM2009"):
    """
    Take a FIAC specification file and get two specifications
    (experiment, begin).

    This creates two new .csv files, one for the experimental
    conditions, the other for the "initial" confounding trials that
    are to be modelled out. 

    For the block design, the "initial" trials are the first
    trials of each block. For the event designs, the 
    "initial" trials are made up of just the first trial.

    """

    if exists(pjoin("%(root)s", "fiac%(subj)d", "subj%(subj)d_evt_fonc%(run)d.txt") % {'root':root, 'subj':subj, 'run':run}):
        designtype = 'evt'
    else:
        designtype = 'bloc'

    # Fix the format of the specification so it is
    # more in the form of a 2-way ANOVA

    eventdict = {1:'SSt_SSp', 2:'SSt_DSp', 3:'DSt_SSp', 4:'DSt_DSp'}
    s = StringIO()
    w = csv.writer(s)
    w.writerow(['time', 'sentence', 'speaker'])

    specfile = pjoin("%(root)s", "fiac%(subj)d", "subj%(subj)d_%(design)s_fonc%(run)d.txt") % {'root':root, 'subj':subj, 'run':run, 'design':designtype}
    d = np.loadtxt(specfile)
    for row in d:
        w.writerow([row[0]] + eventdict[row[1]].split('_'))
    s.seek(0)
    d = csv2rec(s)

    # Now, take care of the 'begin' event
    # This is due to the FIAC design

    if designtype == 'evt':
        b = np.array([(d[0]['time'], 1)], np.dtype([('time', np.float),
                                                    ('initial', np.int)]))
        d = d[1:]
    else:
        k = np.equal(np.arange(d.shape[0]) % 6, 0)
        b = np.array([(tt, 1) for tt in d[k]['time']], np.dtype([('time', np.float),
                                                                 ('initial', np.int)]))
        d = d[~k]

    designtype = {'bloc':'block', 'evt':'event'}[designtype]

    fname = pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "experiment_%(run)02d.csv") % {'root':root, 'subj':subj, 'run':run, 'design':designtype}
    rec2csv(d, fname)
    experiment = csv2rec(fname)

    fname = pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "initial_%(run)02d.csv") % {'root':root, 'subj':subj, 'run':run, 'design':designtype}
    rec2csv(b, fname)
    initial = csv2rec(fname)

    return d, b
示例#13
0
    def to_file(self, filename, **kwargs):
        """
        Saves results to file, which will be gzipped if `filename` has a .gz
        extension.

        kwargs are passed to matplotlib.mlab.rec2csv
        """
        rec2csv(self.data, filename, **kwargs)
示例#14
0
    def to_file(self, filename, **kwargs):
        """
        Saves results to file, which will be gzipped if `filename` has a .gz
        extension.

        kwargs are passed to matplotlib.mlab.rec2csv
        """
        rec2csv(self.data, filename, **kwargs)
示例#15
0
def convert(infilename,
            outfilename,
            ):

    results = tables.open_file(infilename,mode='r')
    ra = results.root.textlog[:]
    results.close()
    mlab.rec2csv( ra, outfilename)
def test_rec2csv_bad_shape():
    try:
        bad = np.recarray((99,4),[('x',np.float),('y',np.float)])
        fd = tempfile.TemporaryFile(suffix='csv')
    
        # the bad recarray should trigger a ValueError for having ndim > 1.
        mlab.rec2csv(bad,fd)
    finally:
        fd.close()
示例#17
0
def test_rec2csv_bad_shape():
    try:
        bad = np.recarray((99, 4), [('x', np.float), ('y', np.float)])
        fd = tempfile.TemporaryFile(suffix='csv')

        # the bad recarray should trigger a ValueError for having ndim > 1.
        mlab.rec2csv(bad, fd)
    finally:
        fd.close()
示例#18
0
def write_results_to_csv(results, directory):

    experiments, outcomes = results
#     deceased_pop = outcomes['relative market price']
#     time = outcomes[TIME]
    
    rec2csv(experiments, directory+'/experiments.csv', withheader=True)
    
    for key, value in outcomes.iteritems():
        np.savetxt(directory+'/{}.csv'.format(key), value, delimiter=',')
示例#19
0
def interesting_out(opts,interesting,data):
    """
    Take a list of fields, and the recs
    output recs as csv to opts["out"], e.g. --out
    """
    header = True
    from matplotlib import mlab
    for d in data:
        cleaned = mlab.rec_keep_fields(d,interesting)
        mlab.rec2csv(cleaned,opts["out"],withheader=header)
        header=False
示例#20
0
def interesting_out(opts, interesting, data):
    """
    Take a list of fields, and the recs
    output recs as csv to opts["out"], e.g. --out
    """
    header = True
    from matplotlib import mlab
    for d in data:
        cleaned = mlab.rec_keep_fields(d, interesting)
        mlab.rec2csv(cleaned, opts["out"], withheader=header)
        header = False
示例#21
0
def main():
    inputlist = ["bin/global_BWS_20121015.csv","bin/global_WRI_20121015.csv"]
    lhs = mlab.csv2rec("bin/global_GU_20121015.csv")
    rhslist = []
    for x in inputlist:
        rhslist.append(mlab.csv2rec(x))
    
    rhslist[0]["basinid"] = rhslist[0]["basinid"].astype(np.long)
    keys = ("basinid","countryid","id")
    lhs = join_recs_on_keys(lhs,rhslist,keys)
    mlab.rec2csv(lhs,"bin/test.csv")
    print "complete"
示例#22
0
def main():
    print "initializing"
    
    ap.env.overwriteOutput = True
    
    #"World_Cylindrical_Equal_Area"
    sr = ap.SpatialReference(54034) 
    ap.Project_management(BASINPOLY, TMP_OUT, sr)
    ap.CalculateAreas_stats(TMP_OUT,TMP_OUT2)
    out = ap.da.FeatureClassToNumPyArray(TMP_OUT2,[BASIN_ID_FIELD,"F_AREA"])
    mlab.rec2csv(out,AREACSV)
    
    print "complete"
示例#23
0
def test_recarray_csv_roundtrip():
    expected = np.recarray((99, ), [('x', np.float), ('y', np.float),
                                    ('t', np.float)])
    expected['x'][0] = 1
    expected['y'][1] = 2
    expected['t'][2] = 3
    fd = tempfile.TemporaryFile(suffix='csv')
    mlab.rec2csv(expected, fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose(expected['x'], actual['x'])
    assert np.allclose(expected['y'], actual['y'])
    assert np.allclose(expected['t'], actual['t'])
示例#24
0
def test_recarray_csv_roundtrip():
    expected = np.recarray((99, ), [('x', np.float), ('y', np.float),
                                    ('t', np.float)])
    expected['x'][:] = np.linspace(-1e9, -1, 99)
    expected['y'][:] = np.linspace(1, 1e9, 99)
    expected['t'][:] = np.linspace(0, 0.01, 99)
    fd = tempfile.TemporaryFile(suffix='csv')
    mlab.rec2csv(expected, fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose(expected['x'], actual['x'])
    assert np.allclose(expected['y'], actual['y'])
    assert np.allclose(expected['t'], actual['t'])
示例#25
0
def test_recarray_csv_roundtrip():
    expected = np.recarray((99,),
                          [('x',np.float),('y',np.float),('t',np.float)])
    expected['x'][0] = 1
    expected['y'][1] = 2
    expected['t'][2] = 3
    fd = tempfile.TemporaryFile(suffix='csv')
    mlab.rec2csv(expected,fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose( expected['x'], actual['x'] )
    assert np.allclose( expected['y'], actual['y'] )
    assert np.allclose( expected['t'], actual['t'] )
示例#26
0
def test_recarray_csv_roundtrip():
    expected = np.recarray((99,),
                          [('x',np.float),('y',np.float),('t',np.float)])
    expected['x'][:] = np.linspace(-1e9, -1, 99)
    expected['y'][:] = np.linspace(1, 1e9, 99)
    expected['t'][:] = np.linspace(0, 0.01, 99)
    fd = tempfile.TemporaryFile(suffix='csv')
    mlab.rec2csv(expected,fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose( expected['x'], actual['x'] )
    assert np.allclose( expected['y'], actual['y'] )
    assert np.allclose( expected['t'], actual['t'] )
示例#27
0
文件: testing.py 项目: ainafp/nilearn
    def __call__(self, *args, **kwargs):
        """Load requested dataset, downloading it if needed or requested.

        For test purpose, instead of actually fetching the dataset, this
        function creates empty files and return their paths.
        """
        kwargs['mock'] = True
        files = original_fetch_files(*args, **kwargs)
        # Fill CSV files with given content if needed
        for f in files:
            basename = os.path.basename(f)
            if basename in self.csv_files:
                array = self.csv_files[basename]
                rec2csv(array, f)
        return files
示例#28
0
def write_results_to_csv(results, directory):

    experiments, outcomes = results
#     deceased_pop = outcomes['relative market price']
#     time = outcomes[TIME]
    
    rec2csv(experiments, directory+'/experiments.csv', withheader=True)
    
    for key, value in outcomes.iteritems():
        np.savetxt(directory+'/{}.csv'.format(key), value, delimiter=',')
#     np.savetxt('./data/scarcity/relative_market_price.csv', deceased_pop, delimiter=',')
#     np.savetxt('./data/scarcity/time.csv', time, delimiter=',')
#     
    for entry in experiments.dtype.descr:
        print entry
示例#29
0
    def __call__(self, *args, **kwargs):
        """Load requested dataset, downloading it if needed or requested.

        For test purpose, instead of actually fetching the dataset, this
        function creates empty files and return their paths.
        """
        kwargs['mock'] = True
        files = original_fetch_files(*args, **kwargs)
        # Fill CSV files with given content if needed
        for f in files:
            basename = os.path.basename(f)
            if basename in self.csv_files:
                array = self.csv_files[basename]
                rec2csv(array, f)
        return files
示例#30
0
def main(basin_csv, basin_poly, storage_pts, stor_csv):
    basin_rec = mlab.csv2rec(basin_csv)

    ids = basin_rec["basinid"]
    d_ids = basin_rec["dwnbasinid"]

    ap.Identity_analysis(storage_pts, basin_poly, TMP_OUT, "NO_FID")
    out = ap.da.FeatureClassToNumPyArray(TMP_OUT,[STOR_FIELD,BASIN_ID_FIELD])

    stor = np.array([np.sum(out[STOR_FIELD][out[BASIN_ID_FIELD]==i]) for i in ids])

    fa_stor = fa.accumulate(ids,d_ids,f0,f,stor)

    outrec = np.rec.fromarrays((ids,stor,fa_stor),names=("basinid","stor","fa_stor"))
    
    mlab.rec2csv(outrec, stor_csv)
示例#31
0
    def test_recarray_csv_roundtrip(self):
        expected = np.recarray((99, ), [('x', np.float), ('y', np.float),
                                        ('t', np.float)])
        # initialising all values: uninitialised memory sometimes produces
        # floats that do not round-trip to string and back.
        expected['x'][:] = np.linspace(-1e9, -1, 99)
        expected['y'][:] = np.linspace(1, 1e9, 99)
        expected['t'][:] = np.linspace(0, 0.01, 99)

        mlab.rec2csv(expected, self.fd)
        self.fd.seek(0)
        actual = mlab.csv2rec(self.fd)

        np.testing.assert_allclose(expected['x'], actual['x'])
        np.testing.assert_allclose(expected['y'], actual['y'])
        np.testing.assert_allclose(expected['t'], actual['t'])
示例#32
0
def test_recarray_csv_roundtrip():
    expected = np.recarray((99,),
                          [('x',np.float),('y',np.float),('t',np.float)])
    # initialising all values: uninitialised memory sometimes produces floats
    # that do not round-trip to string and back.
    expected['x'] = np.linspace(0,1e-200,99)
    expected['y'] = np.linspace(0,1,99)
    expected['t'] = np.linspace(0,1e300,99)
    fd = tempfile.TemporaryFile(suffix='csv', mode="w+")
    mlab.rec2csv(expected,fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose( expected['x'], actual['x'] )
    assert np.allclose( expected['y'], actual['y'] )
    assert np.allclose( expected['t'], actual['t'] )
示例#33
0
def test_recarray_csv_roundtrip():
    expected = np.recarray((99, ), [('x', np.float), ('y', np.float),
                                    ('t', np.float)])
    # initialising all values: uninitialised memory sometimes produces floats
    # that do not round-trip to string and back.
    expected['x'][:] = np.linspace(-1e9, -1, 99)
    expected['y'][:] = np.linspace(1, 1e9, 99)
    expected['t'][:] = np.linspace(0, 0.01, 99)
    fd = tempfile.TemporaryFile(suffix='csv', mode="w+")
    mlab.rec2csv(expected, fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose(expected['x'], actual['x'])
    assert np.allclose(expected['y'], actual['y'])
    assert np.allclose(expected['t'], actual['t'])
示例#34
0
def write_results_to_csv(results, directory):

    experiments, outcomes = results
    #     deceased_pop = outcomes['relative market price']
    #     time = outcomes[TIME]

    rec2csv(experiments, directory + '/x.csv', withheader=True)

    for key, value in outcomes.iteritems():
        np.savetxt(directory + '/{}.csv'.format(key), value, delimiter=',')


#     np.savetxt('./data/scarcity/relative_market_price.csv', deceased_pop, delimiter=',')
#     np.savetxt('./data/scarcity/time.csv', time, delimiter=',')
#
    for entry in x.dtype.descr:
        print entry
示例#35
0
def getrange(date, num):

    daily_arrays = []

    for day in range(num):

        string_date = date.strftime('%Y%m%d')
        daily_arrays.append(fetchday(string_date))
        date += datetime.timedelta(1)

    full_range = concatenate(daily_arrays, axis=1)
    
    filename = string_date+'+'+str(num)+'.csv'
    mlab.rec2csv(full_range, filename)
    print 'saved as ', filename

    return full_range
示例#36
0
def test():
    """Test script"""
    import matplotlib.mlab as mlab
    import time
    import gen_merge

    BASINCSV = r"C:\Users\francis.gassert\Documents\ArcGIS\GISSync\global_maps\basins_15006.csv"
    BASINID = "basinid"
    DWNBASIN = "dwnbasinid"
    OUTCSV = r"C:\Users\francis.gassert\Documents\ArcGIS\GISSync\global_maps\bt_test.csv"
    runoffcsv = r"C:\Users\francis.gassert\Documents\ArcGIS\GISSync\global_maps\global-GLDAS-2.0_Noah-3.3_M.020-20121211-filled-20130821-RO.csv"
    
    basin_arr = mlab.csv2rec(BASINCSV)
    ids = basin_arr[BASINID]
    d_ids = basin_arr[DWNBASIN]
    r_arr = mlab.csv2rec(runoffcsv)
    r = r_arr["2010"]
    assert np.all(r_arr[BASINID]==ids)
    
    def f0( i, r ):
        return r[i]
    def f( i, idx, values, *args ):
        return np.sum(values[idx]) + f0(i, *args)
    
    
    time.clock()
    #id_dict = dict(zip(ids, upstream_ids(ids, d_ids)))
    #r2 = gen_merge.arrange_vector_by_ids(r, ids, np.arange(len(ids)+1))
    #out1 = np.array([np.sum(r2[id_dict[i]])+r2[i] for i in ids])
    #t1 = time.clock()

    out2 = accumulate(ids, d_ids, f0, f, r)
    t2 = time.clock()
    
    btcsv = r"C:\Users\francis.gassert\Documents\ArcGIS\GISSync\global_maps\global-GLDAS-2.0_Noah-3.3_M.020-20121211-filled-20130821-Bt.csv"
    bt_arr = mlab.csv2rec(btcsv)
    bt = bt_arr["2010"]

    #print ("time1: %s" % t1)
    print ("time2: %s" % t2)

    #print ("error1: %s " % (np.sum(out1-bt)/np.sum(bt)) )
    print ("error2: %s " % (np.sum(out2-bt)/np.sum(bt)) )
    
    outrec2 = np.rec.fromarrays((ids,out2),names=(BASINID,"2010"))
    mlab.rec2csv(outrec2,OUTCSV)
    def test_csv2rec_roundtrip(self):

        # Make sure double-precision floats and strings pass through a
        # roundtrip unaltered.

        # A bug in numpy (fixed in r4602) meant that numpy scalars
        # lost precision when passing through repr(). csv2rec was
        # affected by this. This test will only pass on numpy >=
        # 1.0.5.
        delta = datetime.timedelta(days=1)
        date0 = datetime.date(2007, 12, 16)
        date1 = date0 + delta
        date2 = date1 + delta

        delta = datetime.timedelta(days=1)
        datetime0 = datetime.datetime(2007, 12, 16, 22, 29, 34, 924122)
        datetime1 = datetime0 + delta
        datetime2 = datetime1 + delta
        ra = numpy.rec.fromrecords(
            [
                (123, date0, datetime0, 1197346475.0137341, 'a,bc'),
                (456, date1, datetime1, 123.456, 'd\'ef'),
                (789, date2, datetime2, 0.000000001, 'ghi'),
            ],
            names='intdata,datedata,datetimedata,floatdata,stringdata')

        fh = StringIO.StringIO()
        mlab.rec2csv(ra, fh)
        fh.seek(0)
        if 0:
            print('CSV contents:', '-' * 40)
            print(fh.read())
            print('-' * 40)
            fh.seek(0)
        ra2 = mlab.csv2rec(fh)
        fh.close()
        #print 'ra', ra
        #print 'ra2', ra2
        for name in ra.dtype.names:
            if 0:
                print(name, repr(ra[name]), repr(ra2[name]))
                dt = ra.dtype[name]
                print('repr(dt.type)', repr(dt.type))
            self.failUnless(numpy.all(
                ra[name] == ra2[name]))  # should not fail with numpy 1.0.5
示例#38
0
def main():
    print "initializing"
    ap.env.overwriteOutput = True
    ap.env.workspace = WORKSPACE
  
    print "copying"
    ap.CopyFeatures_management(INFEATURES,OUTFEATURES)
    
    print "joining"
    arr = mlab.csv2rec(INCSV)
    
    ap.da.ExtendTable(OUTFEATURES,JOIN_FIELD_SHP,arr,JOIN_FIELD_CSV)
    
    print "saving"
    arr = ap.da.TableToNumPyArray(OUTFEATURES,"*")
    mlab.rec2csv(arr,OUTCSV)
    
    print "complete"
示例#39
0
    def test_csv2rec_masks(self):
        csv = """date,age,weight,name
2007-01-01,12,32.2,"jdh1"
0000-00-00,0,23,"jdh2"
2007-01-03,,32.5,"jdh3"
2007-01-04,12,NaN,"jdh4"
2007-01-05,-1,NULL,"""
        missingd = dict(date='0000-00-00', age='-1', weight='NULL')
        fh = StringIO.StringIO(csv)
        r1 = mlab.csv2rec(fh, missingd=missingd)
        fh = StringIO.StringIO()
        mlab.rec2csv(r1, fh, missingd=missingd)
        fh.seek(0)
        r2 = mlab.csv2rec(fh, missingd=missingd)
        self.failUnless( numpy.all( r2['date'].mask   == [0,1,0,0,0] ))
        self.failUnless( numpy.all( r2['age'].mask    == [0,0,1,0,1] ))
        self.failUnless( numpy.all( r2['weight'].mask == [0,0,0,0,1] ))
        self.failUnless( numpy.all( r2['name'].mask   == [0,0,0,0,1] ))
示例#40
0
    def test_recarray_csv_roundtrip(self):
        expected = np.recarray((99,),
                               [(str('x'), np.float),
                                (str('y'), np.float),
                                (str('t'), np.float)])
        # initialising all values: uninitialised memory sometimes produces
        # floats that do not round-trip to string and back.
        expected['x'][:] = np.linspace(-1e9, -1, 99)
        expected['y'][:] = np.linspace(1, 1e9, 99)
        expected['t'][:] = np.linspace(0, 0.01, 99)

        mlab.rec2csv(expected, self.fd)
        self.fd.seek(0)
        actual = mlab.csv2rec(self.fd)

        np.testing.assert_allclose(expected['x'], actual['x'])
        np.testing.assert_allclose(expected['y'], actual['y'])
        np.testing.assert_allclose(expected['t'], actual['t'])
示例#41
0
    def test_csv2rec_roundtrip(self):

        # Make sure double-precision floats and strings pass through a
        # roundtrip unaltered.

        # A bug in numpy (fixed in r4602) meant that numpy scalars
        # lost precision when passing through repr(). csv2rec was
        # affected by this. This test will only pass on numpy >=
        # 1.0.5.
        delta = datetime.timedelta(days=1)
        date0 = datetime.date(2007,12,16)
        date1 = date0 + delta
        date2 = date1 + delta

        delta = datetime.timedelta(days=1)
        datetime0 = datetime.datetime(2007,12,16,22,29,34,924122)
        datetime1 = datetime0 + delta
        datetime2 = datetime1 + delta
        ra=numpy.rec.fromrecords([
                (123, date0, datetime0, 1197346475.0137341, 'a,bc'),
                (456, date1, datetime1, 123.456, 'd\'ef'),
                (789, date2, datetime2, 0.000000001, 'ghi'),
                            ],
            names='intdata,datedata,datetimedata,floatdata,stringdata')

        fh = StringIO.StringIO()
        mlab.rec2csv( ra, fh )
        fh.seek(0)
        if 0:
            print 'CSV contents:','-'*40
            print fh.read()
            print '-'*40
            fh.seek(0)
        ra2 = mlab.csv2rec(fh)
        fh.close()
        #print 'ra', ra
        #print 'ra2', ra2
        for name in ra.dtype.names:
            if 0:
                print name, repr(ra[name]), repr(ra2[name])
                dt = ra.dtype[name]
                print 'repr(dt.type)',repr(dt.type)
            self.failUnless( numpy.all(ra[name] == ra2[name]) ) # should not fail with numpy 1.0.5
def test_recarray_csv_roundtrip():
    expected = np.recarray((99,),
                          [('x',np.float),('y',np.float),('t',np.float)])
    # initialising all values: uninitialised memory sometimes produces floats
    # that do not round-trip to string and back.
    expected['x'][:] = np.linspace(-1e9, -1, 99)
    expected['y'][:] = np.linspace(1, 1e9, 99)
    expected['t'][:] = np.linspace(0, 0.01, 99)
    if sys.version_info[0] == 2:
        fd = tempfile.TemporaryFile(suffix='csv', mode="wb+")
    else:
        fd = tempfile.TemporaryFile(suffix='csv', mode="w+", newline='')
    mlab.rec2csv(expected,fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose( expected['x'], actual['x'] )
    assert np.allclose( expected['y'], actual['y'] )
    assert np.allclose( expected['t'], actual['t'] )
示例#43
0
def main(basin_csv, ut_csv, uc_csv, ncons_csv):
    basin_rec = mlab.csv2rec(basin_csv)

    uc_rec = mlab.csv2rec(uc_csv)
    ut_rec = mlab.csv2rec(ut_csv)

    ids = basin_rec["basinid"]
    d_ids = basin_rec["dwnbasinid"]
     
    uc = gen_merge.arrange_vector_by_ids(uc_rec["ct"],uc_rec["basinid"],ids)
    ut = gen_merge.arrange_vector_by_ids(ut_rec["ut"],ut_rec["basinid"],ids)
    unc = ut - uc

    n = len(ids)

    ncons = fa.accumulate(ids,d_ids,f0,f,unc)

    outrec = np.rec.fromarrays((ids,ncons),names=("basinid","ncons"))
    
    mlab.rec2csv(outrec, ncons_csv)
示例#44
0
    def test_csv2rec_masks(self):
        # Make sure masked entries survive roundtrip

        csv = """date,age,weight,name
2007-01-01,12,32.2,"jdh1"
0000-00-00,0,23,"jdh2"
2007-01-03,,32.5,"jdh3"
2007-01-04,12,NaN,"jdh4"
2007-01-05,-1,NULL,"""
        missingd = dict(date='0000-00-00', age='-1', weight='NULL')
        fh = StringIO.StringIO(csv)
        r1 = mlab.csv2rec(fh, missingd=missingd)
        fh = StringIO.StringIO()
        mlab.rec2csv(r1, fh, missingd=missingd)
        fh.seek(0)
        r2 = mlab.csv2rec(fh, missingd=missingd)

        self.failUnless( numpy.all( r2['date'].mask   == [0,1,0,0,0] ))
        self.failUnless( numpy.all( r2['age'].mask    == [0,0,1,0,1] ))
        self.failUnless( numpy.all( r2['weight'].mask == [0,0,0,0,1] ))
        self.failUnless( numpy.all( r2['name'].mask   == [0,0,0,0,1] ))
示例#45
0
def main():
    
    basins = mlab.csv2rec(BASINCSV)
    dwnbasin = basins["dwnbasinid"].astype(np.long)
    basinid = basins["basinid"].astype(np.long)
    dwnbasin[dwnbasin==0]=basinid[dwnbasin==0]
    
    numbasins = np.max(basinid)
    dbid = np.zeros(numbasins+1, dtype=np.long)
    dbid[basinid] = dwnbasin
    
    olddbid = dbid.copy()
    dbid1 = dbid.copy()
    dbid = dbid[dbid]
    while np.sum(olddbid!=dbid):
        olddbid = dbid.copy()
        dbid = dbid[dbid]
    
    outrec = np.rec.fromarrays([np.arange(0,numbasins+1),dbid,dbid1], names=("BasinID","bigbasin","dbid"))
    mlab.rec2csv(outrec,"big_basins.csv")
    
    ap.CopyFeatures_management(RAWBASINS, BIGBASINS)
    ap.da.ExtendTable(BIGBASINS, BASIN_ID_FIELD, outrec, BASIN_ID_FIELD)
示例#46
0
def complete_data_single(symbol, saveNewFile=False):
    good_dates = get_data("SPY").date
    data = get_data(symbol)
    dates = data.date

    # First check if the records from the first days are missing
    # and fill this data with the record found
    if not (good_dates[0] in dates):
        # First find the most recent values that is on the data
        open_val = data[0][1]
        high = data[0][2]
        low = data[0][3]
        close = data[0][4]
        volume = data[0][5]
        adj_close = data[0][6]

        # Then add that record to the beginning until data starts
        # it is necesary to modify the date
        i = 0
        while not (good_dates[i] in dates):
            new = (good_dates[i], open_val, high, low, close, volume,
                   adj_close)
            #n = np.array(most_recent, dtype=data.dtype)
            data = np.insert(data, i, new, 0)
            i = i + 1

    # TODO: Missing values not onthe beginning

    if saveNewFile:
        try:
            os.remove('./data/%s - old.csv' % symbol)
        except:
            pass
        os.rename('./data/%s.csv' % symbol, './data/%s - old.csv' % symbol)
        mlab.rec2csv(data, './data/%s.csv' % symbol, delimiter=',')

    return data
示例#47
0
                    nv_scan_candidates = equal_scans

                # chose the subject with closest baseline age scan
                target_age = np.min(age[subj])
                candidates_age = [np.min(age[s]) for s in nv_scan_candidates]
                closest = np.argmin(abs(candidates_age - target_age))
                nv_match = nv_scan_candidates[closest]
                nv_matches.append(nv_match)
                nv_subjects.remove(nv_match)
                print 'Matched ADHD %d (%d scans, %s) to NV %d (%d scans, %s).' % (
                    subj, len(rows[subj]), gf[rows[subj][0]]['sex'], nv_match,
                    len(rows[nv_match]), gf[rows[nv_match][0]]['sex'])
                break
            else:
                target_num_scans -= 1
    if not found:
        rm.append(subj)

# remove all subjects for whom we didn't find a match
print 'ADHD subjects without matches:', rm
for subj in rm:
    adhd_subjects.remove(subj)

# finally, create new variable and output it to a new file
match_bool = np.zeros(len(gf))
for subj in (nv_matches + adhd_subjects):
    match_bool[rows[subj]] = 1
match_bool = mlab.rec_append_fields(gf, var, match_bool)
mlab.rec2csv(match_bool,
             csv_file[:-4] + '_matched_onSex_onNumScan_onBaseAge.csv')
示例#48
0
    nburn = 100
    pos = [THETA + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]
     
    sampler = emcee.EnsembleSampler(nwalkers,ndim,lnprob,args=(cat['X'],cat['Y']),
                                    threads=nthreads)
    sampler.run_mcmc(pos,nsamples)
     
    samples = sampler.chain[:, nburn:, :].reshape((-1, ndim))
     
    rich,[rich_min,rich_max] = median_interval(samples[:,0])
    x,[xmin,xmax] = median_interval(samples[:,1])
    y,[ymin,ymax] = median_interval(samples[:,2])

    # True centroid
    x0,y0 = WCS.wcs_world2pix(hdu.header['LON'],hdu.header['LAT'],1)

    # Results
    res = [x0,y0,x,xmin,xmax,y,ymin,ymax]
    results.append(res)

    if do_plot:
        fig = corner.corner(samples, labels=["rich", "x", "y"])
        fig.savefig("triangle_eri2.png")

results = np.rec.array(results,names=['lon','lat','x','xmin','xmax','y','ymin','ymax'])

filename='results2_b%i_s%i.csv'%(NBINS,nsamples)
print("Writing %s ..."%filename)
rec2csv(results,filename)

示例#49
0
    else:
        testFiles = glob.glob(dirName + '*.csv')
        # for each file, if there's no modified version of it, create it
        for fname in testFiles:
            if fname.find('modified') < 0:
                modName = fname[:-4] + '_modified.csv'
                if not os.path.exists(modName):
                    shutil.copyfile(fname, modName)
        # for each file in the directory, replace all the occurrences of the first column by the 3rd column
        for fname in testFiles:
            # only operate on modified files
            if fname.find('modified') >= 0:
                data = np.recfromcsv(fname)
                changed = False
                # look for occurrences in the first column
                for row in data:
                    if row[0] == rec[0]:
                        row[0] = rec[2]
                        changed = True
                # look for occurrences in the first row. The header is read in as a
                # tuple, so we need to do it differently
                new_names = []
                for column in data.dtype.names:
                    if column == str(rec[0]):
                        new_names.append(str(rec[2]))
                    else:
                        new_names.append(column)
                data.dtype.names = new_names
                if changed:
                    mlab.rec2csv(data, fname)
示例#50
0
    ind = np.where(table['JobID'] == sid)
    for k in dr.split("_"):
        if k.endswith("src"):
            Nsrcs[ind] = int(k[:-3])
        if k.endswith("time"):
            Ntimes[ind] = int(k[:-4])
        if k.endswith('chan'):
            Nchans[ind] = int(k[:-4])
        if k == 'mwa':
            Nbls[ind] = 8128
        if k == 'triangle':
            Nbls[ind] = 3


def hms2sec(hms):
    h, m, s = map(float, hms.split(":"))
    return h * 60.**2 + m * 60. + s


runtime_sec = np.array(map(hms2sec, table['Elapsed']))
cores_per_node = table['NCores'] / table['NNodes']
ntasks = Nsrcs * Ntimes * Nbls * Nchans
timepertask = runtime_sec / ntasks


table = append_fields(table, ['CoresPerNode', 'Nbls', 'Ntimes', 'Nchan', 'Nsrc', 'Ntasks', 'Runtime_Seconds', 'RuntimePerTask_seconds'], [cores_per_node, Nbls, Ntimes, Nchans, Nsrcs, ntasks, runtime_sec, timepertask])
print table
print table.dtype

rec2csv(table, 'profiling_results_table.csv')
示例#51
0
def export(model_information, model_summary, metrics, lift_table, correlation):
		## Export model output in csv
		blank_type 	= np.dtype([('Col1', 'S100'), ('Col2', 'S100'),( 'Col3', 'S100'), ('Col4', 'S100'), ('Col5', 'S100'), ('Col6', 'S100'), ('Col7', 'S100')])
		blank_line 	= np.asarray([("  ", "  ", " ", " ", " ", " ", " ")], dtype=blank_type)
		blank_line 	= np.hstack((blank_line, blank_line))
		dot_line 	= np.asarray([("=======================", "=======================",  "=======================",  "=======================", "=======================", "=======================", "=======================")], dtype=blank_type)
		title_1 	= np.asarray([("  ", "  ", "Model Information", " ",  " ",  " ", " "	)], dtype=blank_type)
		title_2 	= np.asarray([("  ", "  ", "Model Summary", " ",  " ",  " ", " "		)], dtype=blank_type)
		title_3 	= np.asarray([("  ", "  ", "Metrics", " ",  " ",  " ", " "				)], dtype=blank_type)
		title_4 	= np.asarray([("  ", "  ", "Lift Table", " ",  " ",  " ", " "			)], dtype=blank_type)
		title_5 	= np.asarray([("  ", "  ", "Correlation Table", " ",  " ",  " ", " "	)], dtype=blank_type)

		file_name 	= str('Logistic Regression Output '+datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')+'.csv').replace(":", "-")
		
		with open(file_name, 'wb') as outfile:
			mlab.rec2csv(blank_line, 			outfile, withheader=False)
			mlab.rec2csv(title_1, 				outfile, withheader=False)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)
			mlab.rec2csv(model_information, 	outfile, withheader=False)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)

			mlab.rec2csv(blank_line, 			outfile, withheader=False)
			mlab.rec2csv(title_2, 				outfile, withheader=False)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)
			mlab.rec2csv(model_summary, 		outfile, withheader=True)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)

			mlab.rec2csv(blank_line, 			outfile, withheader=False)
			mlab.rec2csv(title_3, 				outfile, withheader=False)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)
			mlab.rec2csv(metrics, 				outfile, withheader=False)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)

			mlab.rec2csv(blank_line, 			outfile, withheader=False)
			mlab.rec2csv(title_4, 				outfile, withheader=False)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)
			mlab.rec2csv(lift_table.to_records(),outfile, withheader=True)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)

			mlab.rec2csv(blank_line, 			outfile, withheader=False)
			mlab.rec2csv(title_5, 				outfile, withheader=False)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)
			mlab.rec2csv(correlation, 			outfile, withheader=True)
			mlab.rec2csv(dot_line, 				outfile, withheader=False)

			outfile.close()
示例#52
0
def normalize_dat(dat_filename):
    """Normalise dat filename and return record with normalized value"""
    names = ['value', 'angle']
    c_rec = mlab.csv2rec(dat_filename, names=names, delimiter=' ')
    total_value = sum(c_rec['value'])
    c_rec['value'] = c_rec['value'] / total_value

    return c_rec


def merge_dat(dat_rec_list):
    """Create a new dat file from a list of normalized dat file"""
    nb_rec = len(dat_rec_list)
    new_rec = dat_rec_list[0]
    new_rec['value'] = new_rec['value'] / nb_rec
    for c_rec in dat_rec_list[1:]:
        new_rec['value'] += c_rec['value'] / nb_rec

    return new_rec


if __name__ == '__main__':

    dat_filenames = sys.argv[1:]
    dat_recs = []
    for dat_filename in dat_filenames:
        dat_rec = normalize_dat(dat_filename)
        dat_recs.append(dat_rec)
    new_record = merge_dat(dat_recs)
    mlab.rec2csv(new_record, 'out.dat', delimiter=' ')
示例#53
0
def save_results(results, file_name):
    '''
    save the results to the specified tar.gz file. The results are stored as 
    csv files. There is an x.csv, and a csv for each outcome. In 
    addition, there is a metadata csv which contains the datatype information
    for each of the columns in the x array.

    Parameters
    ----------    
    results : tuple
              the return of run_experiments
    file_name : str
                the path of the file
    
    Raises
    ------
    IOError if file not found

    '''
    file_name = os.path.abspath(file_name)

    def add_file(tararchive, string_to_add, filename):
        tarinfo = tarfile.TarInfo(filename)
        tarinfo.size = len(string_to_add)

        fh = BytesIO(string_to_add.encode('UTF-8'))

        z.addfile(tarinfo, fh)

    def save_numpy_array(fh, data):
        data = pd.DataFrame(data)
        data.to_csv(fh, header=False, index=False, encoding='UTF-8')

    experiments, outcomes = results
    with tarfile.open(file_name, 'w:gz') as z:
        # write the x to the zipfile
        experiments_file = WriterFile()
        rec2csv(experiments, experiments_file, withheader=True)
        add_file(z, experiments_file.getvalue(), 'experiments.csv')

        # write experiment metadata
        dtype = experiments.dtype.descr
        dtype = ["{},{}".format(*entry) for entry in dtype]
        dtype = "\n".join(dtype)
        add_file(z, dtype, 'experiments metadata.csv')

        # write outcome metadata
        outcome_names = outcomes.keys()
        outcome_meta = [
            "{},{}".format(outcome, outcomes[outcome].shape)
            for outcome in outcome_names
        ]
        outcome_meta = "\n".join(outcome_meta)
        add_file(z, outcome_meta, "outcomes metadata.csv")

        # outcomes
        for key, value in outcomes.items():
            fh = WriterFile()

            nr_dim = len(value.shape)
            if nr_dim == 3:
                for i in range(value.shape[2]):
                    data = value[:, :, i]
                    save_numpy_array(fh, data)
                    fh = fh.getvalue()
                    fn = '{}_{}.csv'.format(key, i)
                    add_file(z, fh, fn)
                    fh = WriterFile()
            else:
                save_numpy_array(fh, value)
                fh = fh.getvalue()
                add_file(z, fh, '{}.csv'.format(key))

    info("results saved successfully to {}".format(file_name))
示例#54
0
def fit_classifier(aml_clean_path, class_path, test=False, performance=False, 
                   n_fits=100, test_split=0.2, save_clf=True):
    '''Fits random forest classifier to aml_ref_clean formatted csv.
    Note that the species code should be contained in the folder col.'''

    # Get class_path dir, used for ancillary file names
    class_dir, tail = os.path.split(class_path)
    prefix = tail.split('.')[0]

    # Load refe_features_table
    table = csv2rec(aml_clean_path)

    # Only use calls with qual < 0.3 (Armitage)
    table = table[table.qual < 0.3]

    # Get target col (y) with integer codes instead of spp names
    y_str = table.folder  # Assumes spp name is in folder col
    y_str_uniq = set(list(y_str))

    n_spp = len(y_str_uniq)
    spp_codes = range(0, n_spp)
    code_table = np.array(zip(spp_codes, y_str_uniq),
                          dtype = [('code','<i8'), ('spp', '|S8')])

    y = np.zeros(len(y_str))  # Get col of full length with codes, not names
    for code, spp in code_table:
        y[y_str == spp] = int(code)

    # Get filename col for later grouping into passes
    f = table.filename

    # Remove non-feature cols from table
    table = rec_drop_fields(table, ['path', 'folder', 'filename', 'st', 'dc', 
                                    'qual', 'pmc'])

    # Get list of feature names remaining in table
    feature_names = table.dtype.names

    # Recarray to ndarray - http://stackoverflow.com/questions/5957380/
    # convert-structured-array-to-regular-numpy-array
    X = table.view((float, len(table.dtype.names)))

    # Partition data if test, holding portion for testing
    if not test:
        X_tr = X
        y_tr = y
        f_tr = f
        X_te = X
        y_te = y
        f_te = f
    else:
        # Use StratifiedShuffleSplit since train_test_split does not stratify
        sss = StratifiedShuffleSplit(y, 1, test_size=test_split)
        for train_index, test_index in sss:  # Only once since n_iter=1 above
            X_tr, X_te = X[train_index], X[test_index]
            y_tr, y_te = y[train_index], y[test_index]
            f_tr, f_te = f[train_index], f[test_index]

        sort_ind = f_te.argsort()  # Sort test data for pass analysis later
        X_te = X_te[sort_ind,:]  # Sort rows
        y_te = y_te[sort_ind]
        f_te = f_te[sort_ind]
        # (Train data order does not matter)

    # Define and fit classifier
    clf = RandomForestClassifier(n_estimators=n_fits, oob_score=True, 
                                 compute_importances=True)
    clf.fit(X_tr, y_tr)

    # If performance, save various performance metrics
    # NOTE: Performance of passes is difficult to understand if if test=True,
    # as the calls in one pass may be split up.
    if performance:

        # Get OOB score
        print 'OOB Score: ', clf.oob_score_

        # Predict on test data, which may be held out (test=True) or all data
        y_te_pr = clf.predict(X_te)

        # Get true data and predictions by passes
        pred_te = clf.predict_proba(X_te)  # Prob of each spp
        f_te_p, pred_te_p, other = sum_group(f_te, pred_te, [y_te])
        y_te_p = other[0]  # Actual spp for each pass

        y_te_p_pr = []
        for row in xrange(len(y_te_p)):  # Find pred species for each pass
            y_te_p_pr.append(pred_te_p[row].argmax())  # First ind, ties bias
        y_te_p_pr = np.array(y_te_p_pr)

        # Get accuracy and confusion matrix for calls
        def make_conf_mat(y_te, y_te_pr, type):
            conf_mat = metrics.confusion_matrix(y_te, y_te_pr)
            conf_mat_frac = conf_mat / np.sum(conf_mat, axis=0)
            print type, ' Accuracy: ', metrics.zero_one_score(y_te, y_te_pr)

            np.savetxt(os.path.join(class_dir, prefix+'_conf_'+type+'.csv'),
                       conf_mat, fmt='%i', delimiter=',')
            np.savetxt(os.path.join(class_dir, prefix+'_conffr_'+type+'.csv'), 
                       conf_mat_frac, fmt = '%.6f', delimiter=',')

        make_conf_mat(y_te, y_te_pr, 'call')
        make_conf_mat(y_te_p, y_te_p_pr, 'pass')

    # Save spp_code table, feature_names, and pickle classifier
    rec2csv(code_table, os.path.join(class_dir, prefix + '_spp_codes.csv'))
    rec2csv(np.array(list(feature_names), dtype=[('features', 'S8')]),
        os.path.join(class_dir, prefix + '_feature_names.csv'))
    if save_clf:
        joblib.dump(clf, class_path, compress = 9)
示例#55
0
    # Pack it into a recarray:
    names = ('ppm', 'echo_on', 'echo_off', 'diff')
    formats = (float, float, float, float)
    dt = zip(names, formats)
    m_e1 = np.mean(G.echo_on, 0)
    m_e2 = np.mean(G.echo_off, 0)
    diff = m_e2 - m_e1

    if in_args.out_file:
        prep_arr = [(G.f_ppm[i], m_e1[i], m_e2[i], diff[i])
                    for i in range(len(G.f_ppm))]
        out_array = np.array(prep_arr, dtype=dt)

        # And save to output:
        mlab.rec2csv(out_array, in_args.out_file)

    G.fit_gaba()

    if in_args.plot:
        fig, ax = plt.subplots(3)
        ax[0].plot(G.f_ppm, m_e1)
        ax[0].plot(G.f_ppm[G.cr_idx], np.mean(G.creatine_model, 0), 'r')
        ax[1].plot(G.f_ppm, m_e2)
        ax[2].plot(G.f_ppm, diff)
        ax[2].plot(G.f_ppm[G.gaba_idx], np.mean(G.gaba_model, 0), 'r')
        for a in ax:
            a.invert_xaxis()
            a.set_xlabel('ppm')

        plt.show()
示例#56
0
def group_things(list_of_jsons):
    """Fields to save in output csv
- Subject id
- Num Outliers
- Mincost
- All tsnr values (0 for missing values)
"""
    import numpy as np
    from nipype.utils.filemanip import load_json
    from bips.workflows.gablab.wips.fmri.preprocessing.group_preproc_QA import extract_snr, extract_art
    #from bips.workflows.group_preproc_QA import extract_art

    snr_names = []
    snr_dict = {}

    for tmp in list_of_jsons:
        a = load_json(tmp)
        names = [b[0] for b in a['SNR_table'][0][1:]]
        snr_names += names
        snr_names = np.unique(snr_names).tolist()

    for name in snr_names:
        snr_dict[name] = []

    mincost = []
    common_outliers = []
    total_outliers = []
    intensity_outliers = []
    motion_outliers = []
    subject_id = []

    all_fields = [
        'subject_id', 'total_outliers', 'mincost', 'motion_outliers',
        'intensity_outliers', 'common_outliers'
    ] + snr_names
    dtype = [('subject_id', '|S20')] + [(str(n), 'f4') for n in all_fields[1:]]
    arr = np.zeros(len(list_of_jsons), dtype=dtype)

    for fi in list_of_jsons:
        f = load_json(fi)
        subject_id.append(f['subject_id'])
        mot, inten, com, out = extract_art(f['art'])
        motion_outliers.append(mot)
        intensity_outliers.append(inten)
        common_outliers.append(com)
        total_outliers.append(out)
        mincost.append(f['mincost'][0])
        for n in snr_names:
            t = extract_snr(f['SNR_table'], n)
            snr_dict[n].append(t)

    arr['subject_id'] = subject_id
    arr['total_outliers'] = total_outliers
    arr['mincost'] = mincost
    arr['motion_outliers'] = motion_outliers
    arr['intensity_outliers'] = intensity_outliers
    arr['common_outliers'] = common_outliers

    for key, item in snr_dict.iteritems():
        arr[key] = item

    import os
    from matplotlib.mlab import rec2csv
    outfile = os.path.abspath('grouped_metrics.csv')
    rec2csv(arr, outfile)
    return outfile
    def create_info_table(self,
                          raster_join_field,
                          attribute_file,
                          attribute_join_field,
                          drop_fields=None):
        """
        Create ArcInfo table from attribute csv file

        Parameters
        ----------
        raster_join_field : str
            field in raster to use for joining to attribute data
        attribute_file : str
            name and path of file containing attribute information
        attribute_join_field : str
            field in attribute file to use to join to raster
        drop_fields : list of str
            fields in the attribute file to drop before join to raster

        Returns
        -------
        name of temp ArcInfo table, list of fields to join from info table

        """
        print('Building info table from attribute file')

        # Crosswalk of numpy types to ESRI types for numeric data
        numpy_to_esri_type = {
            ('b', 1): 'SHORT',
            ('i', 1): 'SHORT',
            ('i', 2): 'SHORT',
            ('i', 4): 'LONG',
            ('f', 4): 'FLOAT',
            ('f', 8): 'DOUBLE',
        }

        # Read the CSV file in to a recarray
        ra = mlab.csv2rec(attribute_file)
        col_names = [str(x).upper() for x in ra.dtype.names]
        ra.dtype.names = col_names

        # If there are fields to drop, do that now and get a new recarray
        if drop_fields is not None:

            # Ensure that the drop fields are actually fields in the current
            # recarray
            drop_fields = [x for x in drop_fields if x in ra.dtype.names]

            # Create a new recarray with these fields omitted
            ra = mlab.rec_drop_fields(ra, drop_fields)
            col_names = list(ra.dtype.names)

        # Get the column types and formats
        col_types = [(ra.dtype[i].kind, ra.dtype[i].itemsize)
                     for i in range(len(ra.dtype))]
        formats = [ra.dtype[i].str for i in range(len(ra.dtype))]

        # Sanitize column names
        #   No field name may be longer than 16 chars
        #   No field name can start with a number
        for i in range(len(col_names)):
            if len(col_names[i]) > 16:
                col_names[i] = col_names[i][0:16]
            if col_names[i][0].isdigit():
                col_names[i] = col_names[i].lstrip('0123456789')

        # Reset the names for the recarray
        ra.dtype.names = col_names

        # Sanitize the data
        # Change True/False to 1/0 to be read into short type
        bit_fields = [(i, n)
                      for (i, (n, t)) in enumerate(zip(col_names, col_types))
                      if t[0] == 'b']
        if bit_fields:
            for rec in ra:
                for (col_num, field) in bit_fields:
                    value = getattr(rec, field)
                    if value:
                        setattr(rec, field, 1)
                    else:
                        setattr(rec, field, 0)

            # Change the bit fields to be short integer
            for (col_num, field) in bit_fields:
                formats[col_num] = '<i2'

        # Create a sanitized recarray and output back to CSV
        temp_csv = os.path.join(env.workspace, 'xxtmp.csv')
        ra2 = np.rec.fromrecords(ra, names=col_names, formats=formats)
        mlab.rec2csv(ra2, temp_csv)

        # Create a scratch name for the temporary ArcInfo table
        temp_table = arcpy.CreateScratchName('', '', 'ArcInfoTable')

        # Create the ArcInfo table and add the fields
        table_name = os.path.basename(temp_table)
        arcpy.CreateTable_management(env.workspace, table_name)
        for (n, t) in zip(col_names, col_types):
            try:
                esri_type = numpy_to_esri_type[t]
                arcpy.AddField_management(temp_table, n, esri_type)
            except KeyError:
                if t[0] == 'S':
                    arcpy.AddField_management(temp_table, n, 'TEXT', '#', '#',
                                              t[1])
                else:
                    err_msg = 'Type not found for ' + str(t)
                    print(err_msg)
                    continue

        # Append the records from the CSV field to the temporary INFO table
        arcpy.Append_management(temp_csv, temp_table, 'NO_TEST')

        # Strip out the join field from the names if they are the same
        raster_join_field = raster_join_field.upper()
        attribute_join_field = attribute_join_field.upper()
        if raster_join_field == attribute_join_field:
            col_names.remove(attribute_join_field)

        # Create a semi-colon delimited string of the fields we want to join
        field_list = ';'.join(col_names)

        # Clean up
        os.remove(temp_csv)

        return temp_table, field_list