Exemplo n.º 1
0
    def test_removal_of_error_parameter(self):

        filename = make_temp_dir('remove_errored.hdf5')
        traj = Trajectory(name='traj', add_time=True, filename=filename)
        traj.f_add_result('iii', 42)
        traj.f_add_result(FakeResult, 'j.j.josie', 43)

        file = traj.v_storage_service.filename
        traj.f_store(only_init=True)
        with self.assertRaises(RuntimeError):
            traj.f_store()

        with pt.open_file(file, mode='r') as fh:
            jj = fh.get_node(where='/%s/results/j/j' % traj.v_name)
            self.assertTrue('josie' not in jj)

        traj.j.j.f_remove_child('josie')
        traj.j.j.f_add_result(FakeResult2, 'josie2', 444)

        traj.f_store()
        with self.assertRaises(pex.NoSuchServiceError):
            traj.f_store_child('results', recursive=True)

        with pt.open_file(file, mode='r') as fh:
            jj = fh.get_node(where='/%s/results/j/j' % traj.v_name)
            self.assertTrue('josie2' in jj)
            josie2 =jj._f_get_child('josie2')
            self.assertTrue('hey' in josie2)
            self.assertTrue('fail' not in josie2)
Exemplo n.º 2
0
 def __init__(self, filename, model_state, proposal_state):
     """
     Initialize the object.
     """
     self.filename = filename
     self.ChainRecordDType = state_to_table_dtype(model_state)
     self.ChainRecordDType['step'] = pt.UInt32Col()
     self.ChainRecordDType['accepted'] = pt.UInt32Col()
     self.ChainRecordDType['proposal'] = pt.UInt16Col()
     self.ProposalRecordDType = state_to_table_dtype(proposal_state)
     self.ChainCounterDType = {'id': pt.UInt16Col(),
                               'name': pt.StringCol(itemsize=32),
                               'date': pt.StringCol(itemsize=26)
                               }
     if os.path.exists(filename) and pt.is_pytables_file(filename):
         self.fd = pt.open_file(filename, mode='a')
     else:
         self.fd = pt.open_file(filename, mode='w')
         self.fd.create_group('/', 'mcmc',
                              'Metropolis-Hastings Algorithm Data')
         self.fd.create_table('/mcmc', 'proposals',
                              self.ProposalRecordDType,
                              'MCMC Proposals')
         self.fd.create_table('/mcmc', 'chain_counter',
                              self.ChainCounterDType, 'Chain Counter')
         self.fd.create_group('/mcmc', 'data', 'Collection of Chains')
Exemplo n.º 3
0
def validate_results_node(test, expected_path, actual_path, expected_node,
                          actual_node):
    """Validate results by comparing two specific nodes

    :param test: instance of the TestCase.
    :param expected_path: path to the reference data.
    :param actual_path: path to the output from the test.
    :param expected_node: path to the reference node.
    :param actual_node: path to the output node from the test.

    """
    with tables.open_file(expected_path, 'r') as expected_file, \
            tables.open_file(actual_path, 'r') as actual_file:
        expected = expected_file.get_node(expected_node)
        try:
            actual = actual_file.get_node(actual_node)
        except tables.NoSuchNodeError:
            test.fail("Node '%s' does not exist in datafile" % actual_node)
            if type(expected) is tables.table.Table:
                validate_tables(test, expected, actual)
            elif type(expected) is tables.vlarray.VLArray:
                validate_vlarrays(test, expected, actual)
            elif type(expected) is tables.array.Array:
                validate_arrays(test, expected, actual)
            else:
                raise NotImplementedError
Exemplo n.º 4
0
    def test_maximum_overview_size(self):

        filename = make_temp_dir('maxisze.hdf5')

        env = Environment(trajectory='Testmigrate', filename=filename,

                          log_config=get_log_config(), add_time=True)

        traj = env.v_trajectory
        for irun in range(pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH):
            traj.f_add_parameter('f%d.x' % irun, 5)

        traj.f_store()


        store = pt.open_file(filename, mode='r+')
        table = store.root._f_get_child(traj.v_name).overview.parameters_overview
        self.assertEquals(table.nrows, pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH)
        store.close()

        for irun in range(pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH,
                  2*pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH):
            traj.f_add_parameter('f%d.x' % irun, 5)

        traj.f_store()

        store = pt.open_file(filename, mode='r+')
        table = store.root._f_get_child(traj.v_name).overview.parameters_overview
        self.assertEquals(table.nrows, pypetconstants.HDF5_MAX_OVERVIEW_TABLE_LENGTH)
        store.close()

        env.f_disable_logging()
Exemplo n.º 5
0
def validate_results(test, expected_path, actual_path):
    """Validate results by comparing in and output HDF5 files

    :param test: instance of the TestCase.
    :param expected_path: path to the reference data.
    :param actual_path: path to the output from the test.

    """
    with tables.open_file(expected_path, 'r') as expected_file, \
            tables.open_file(actual_path, 'r') as actual_file:
        for expected_node in expected_file.walk_nodes('/', 'Leaf'):
            try:
                actual_node = actual_file.get_node(expected_node._v_pathname)
            except tables.NoSuchNodeError:
                test.fail("Node '%s' does not exist in datafile" %
                          expected_node._v_pathname)
            if type(expected_node) is tables.table.Table:
                validate_tables(test, expected_node, actual_node)
            elif type(expected_node) is tables.vlarray.VLArray:
                validate_vlarrays(test, expected_node, actual_node)
            elif type(expected_node) is tables.array.Array:
                validate_arrays(test, expected_node, actual_node)
            else:
                raise NotImplementedError
            validate_attributes(test, expected_node, actual_node)
        validate_attributes(test, expected_file.root, actual_file.root)
Exemplo n.º 6
0
    def test03b_Compare64EArray(self):
        "Comparing several written and read 64-bit time values in an EArray."

        # Create test EArray with data.
        h5file = tables.open_file(self.h5fname, "w", title="Test for comparing Time64 E arrays")
        ea = h5file.create_earray("/", "test", tables.Time64Atom(), shape=(0, 2))

        # Size of the test.
        nrows = ea.nrowsinbuf + 34  # Add some more rows than buffer.
        # Only for home checks; the value above should check better
        # the I/O with multiple buffers.
        # nrows = 10

        for i in range(nrows):
            j = i * 2
            ea.append(((j + 0.012, j + 1 + 0.012),))
        h5file.close()

        # Check the written data.
        h5file = tables.open_file(self.h5fname)
        arr = h5file.root.test.read()
        h5file.close()

        orig_val = numpy.arange(0, nrows * 2, dtype=numpy.int32) + 0.012
        orig_val.shape = (nrows, 2)
        if common.verbose:
            print("Original values:", orig_val)
            print("Retrieved values:", arr)
        self.assertTrue(allequal(arr, orig_val), "Stored and retrieved values do not match.")
Exemplo n.º 7
0
    def test02_copy(self):
        """Checking (X)Array.copy() method ('numetic' flavor)"""

        srcfile = self._testFilename("oldflavor_numeric.h5")
        tmpfile = tempfile.mktemp(".h5")
        shutil.copy(srcfile, tmpfile)
        try:
            # Open the HDF5 with old numeric flavor
            with tables.open_file(tmpfile, "r+") as h5file:
                # Copy to another location
                self.assertWarns(FlavorWarning,
                                 h5file.root.array1.copy, '/', 'array1copy')
                h5file.root.array2.copy('/', 'array2copy')
                h5file.root.carray1.copy('/', 'carray1copy')
                h5file.root.carray2.copy('/', 'carray2copy')
                h5file.root.vlarray1.copy('/', 'vlarray1copy')
                h5file.root.vlarray2.copy('/', 'vlarray2copy')

                if self.close:
                    h5file.close()
                    h5file = tables.open_file(tmpfile)
                else:
                    h5file.flush()

                # Assert other properties in array
                self.assertEqual(h5file.root.array1copy.flavor, 'numeric')
                self.assertEqual(h5file.root.array2copy.flavor, 'python')
                self.assertEqual(h5file.root.carray1copy.flavor, 'numeric')
                self.assertEqual(h5file.root.carray2copy.flavor, 'python')
                self.assertEqual(h5file.root.vlarray1copy.flavor, 'numeric')
                self.assertEqual(h5file.root.vlarray2copy.flavor, 'python')
        finally:
            os.remove(tmpfile)
Exemplo n.º 8
0
def WriteRead(filename, testTuple):
    if common.verbose:
        print('\n', '-=' * 30)
        print("Running test for object %s" % type(testTuple))

    # Create an instance of HDF5 Table
    fileh = tables.open_file(filename, mode="w")
    root = fileh.root
    try:
        # Create the array under root and name 'somearray'
        a = testTuple
        fileh.create_array(root, 'somearray', a, "Some array")
    finally:
        # Close the file
        fileh.close()

    # Re-open the file in read-only mode
    fileh = tables.open_file(filename, mode="r")
    root = fileh.root

    # Read the saved array
    try:
        b = root.somearray.read()
        # Compare them. They should be equal.
        if not a == b and common.verbose:
            print("Write and read lists/tuples differ!")
            print("Object written:", a)
            print("Object read:", b)

        # Check strictly the array equality
        assert a == b
    finally:
        # Close the file
        fileh.close()
Exemplo n.º 9
0
 def test_2D_multiphase(self):
     # RELOAD MODULES
     self.reload_modules()
     pnList = [(twp_navier_stokes_p, twp_navier_stokes_n),
               (clsvof_p,               clsvof_n)]
     self.so = multiphase_so
     pList=[]
     nList=[]
     sList=[]
     for (pModule,nModule) in pnList:
         pList.append(pModule)
         if pList[-1].name == None:
             pList[-1].name = pModule.__name__
         nList.append(nModule)
     for i in range(len(pnList)):
         sList.append(default_s)
     self.so.name += "_2D_falling_bubble"
     # NUMERICAL SOLUTION #
     ns = proteus.NumericalSolution.NS_base(self.so,
                                            pList,
                                            nList,
                                            sList,
                                            opts)
     ns.calculateSolution('2D_falling_bubble')
     # COMPARE VS SAVED FILES #
     expected_path = 'comparison_files/multiphase_2D_falling_bubble.h5'
     expected = tables.open_file(os.path.join(self._scriptdir,expected_path))
     actual = tables.open_file('multiphase_2D_falling_bubble.h5','r')
     assert np.allclose(expected.root.phi_t2,actual.root.phi_t2,atol=1e-10)
     expected.close()
     actual.close()
Exemplo n.º 10
0
 def compute_rab(self):
     '''
     Uses the current set of ICA realizations (pytabled) to compute K*(K-1)/2 cross-correlation matrices;
     they are indexed via tuples.  R(a,b) is much smaller than the ICA realizations (all R(a,b) matrices
     are generally smaller than ONE realization), so R(a,b) is also retained in memory. Recomputation of
     the R(a,b) matrices is forced.
     '''
     if not os.path.exists(self.rabDirectory):
         try:
             os.mkdir(self.rabDirectory)
         except OSError:
             pass
     icaFiles = sorted(os.listdir(self.icaDirectory))
     if len(icaFiles) == 0:
         raise RAICARICAException
     for fi in icaFiles:
         fiPtr = tb.open_file(os.path.join(self.icaDirectory,fi),'r')
         si = fiPtr.get_node('/decomps/sources').read()
         fiPtr.close()
         i = np.int(deconstruct_file_name(fi)[1])
         print 'Working on R(%d,b)'%i
         for fj in icaFiles:
             j = np.int(deconstruct_file_name(fj)[1])
             if j > i:
                 # sources assumed to have unit std. dev. but nonzero mean - will behave badly if not!
                 fjPtr = tb.open_file(os.path.join(self.icaDirectory,fj),'r')
                 sj = fjPtr.get_node('/decomps/sources').read()
                 fjPtr.close()
                 self.RabDict[(i,j)] = np.abs(corrmatrix(si,sj))
     # pickle the result
     rabPtr = open(os.path.join(self.rabDirectory,'rabmatrix.db'),'wb')
     cPickle.dump(self.RabDict,rabPtr,protocol=-1)
     rabPtr.close()
Exemplo n.º 11
0
    def test02_CompareTable(self):
        "Comparing written time data with read data in a Table."

        wtime = 1234567890.123456

        # Create test Table with data.
        h5file = tables.open_file(
            self.h5fname, 'w', title="Test for comparing Time tables")
        tbl = h5file.create_table('/', 'test', self.MyTimeRow)
        row = tbl.row
        row['t32col'] = int(wtime)
        row['t64col'] = (wtime, wtime)
        row.append()
        h5file.close()

        # Check the written data.
        h5file = tables.open_file(self.h5fname)
        recarr = h5file.root.test.read(0)
        h5file.close()

        self.assertEqual(recarr['t32col'][0], int(wtime),
                         "Stored and retrieved values do not match.")

        comp = (recarr['t64col'][0] == numpy.array((wtime, wtime)))
        self.assertTrue(numpy.alltrue(comp),
                        "Stored and retrieved values do not match.")
Exemplo n.º 12
0
    def test01b_Compare64VLArray(self):
        "Comparing several written and read 64-bit time values in a VLArray."

        # Create test VLArray with data.
        h5file = tables.open_file(
            self.h5fname, 'w', title="Test for comparing Time64 VL arrays")
        vla = h5file.create_vlarray('/', 'test', self.myTime64Atom)

        # Size of the test.
        nrows = vla.nrowsinbuf + 34  # Add some more rows than buffer.
        # Only for home checks; the value above should check better
        # the I/O with multiple buffers.
        # nrows = 10

        for i in xrange(nrows):
            j = i * 2
            vla.append((j + 0.012, j + 1 + 0.012))
        h5file.close()

        # Check the written data.
        h5file = tables.open_file(self.h5fname)
        arr = h5file.root.test.read()
        h5file.close()

        arr = numpy.array(arr)
        orig_val = numpy.arange(0, nrows * 2, dtype=numpy.int32) + 0.012
        orig_val.shape = (nrows, 1, 2)
        if common.verbose:
            print "Original values:", orig_val
            print "Retrieved values:", arr
        self.assertTrue(allequal(arr, orig_val),
                        "Stored and retrieved values do not match.")
Exemplo n.º 13
0
 def test_case_2(self):
     # Set parameters for this test
     parameters.ct.test_case=2
     # RELOAD MODULES
     self.reload_modules()
     pnList = [(clsvof_p, clsvof_n)]
     self.so = default_so
     self.so.tnList = clsvof_n.tnList
     pList=[]
     nList=[]
     sList=[]
     for (pModule,nModule) in pnList:
         pList.append(pModule)
         if pList[-1].name == None:
             pList[-1].name = pModule.__name__
         nList.append(nModule)
     for i in range(len(pnList)):
         sList.append(default_s)
     self.so.name = "clsvof_test_case_2"
     # NUMERICAL SOLUTION #
     ns = proteus.NumericalSolution.NS_base(self.so,
                                            pList,
                                            nList,
                                            sList,
                                            opts)
     ns.calculateSolution('test_case_2')
     # COMPARE VS SAVED FILES #
     expected_path = 'comparison_files/clsvof_test_case_2.h5'
     expected = tables.open_file(os.path.join(self._scriptdir,expected_path))
     actual = tables.open_file('clsvof_test_case_2.h5','r')
     assert np.allclose(expected.root.u_t2,actual.root.u_t2,atol=1e-10)
     expected.close()
     actual.close()
Exemplo n.º 14
0
 def test_EV2(self):
     thelper_vof.ct.STABILIZATION_TYPE = 1 # EV
     thelper_vof.ct.ENTROPY_TYPE = 2 #logarithmic
     thelper_vof.ct.cE = 0.1
     thelper_vof.ct.FCT = True
     reload(thelper_vof_p)
     reload(thelper_vof_n)
     self.so.name = self.pList[0].name+"_EV2"
     # NUMERICAL SOLUTION #
     ns = proteus.NumericalSolution.NS_base(self.so,
                                            self.pList,
                                            self.nList,
                                            self.sList,
                                            opts)
     self.sim_names.append(ns.modelList[0].name)
     ns.calculateSolution('vof')
     # COMPARE VS SAVED FILES #
     expected_path = 'comparison_files/vof_level_3_EV2.h5'
     expected = tables.open_file(os.path.join(self._scriptdir,expected_path))
     actual = tables.open_file('vof_level_3_EV2.h5','r')
     assert np.allclose(expected.root.u_t2,
                        actual.root.u_t2,
                        atol=1e-10)
     expected.close()
     actual.close()
Exemplo n.º 15
0
def main(argv):
    args = parse_args(argv[1:])

    fileout = os.path.abspath(args.output)

    start = time()
    for fin in args.inputs:
        filein = os.path.abspath(fin)
        print 'Concatenating %s' % filein
        if not os.path.exists(fileout):
            copyfile(filein, fileout)
        else:
            # Can't use HdfStorage.readCoordinates because it needs an
            # Ice.Communicator object, so there's no point using the
            # OMERO.tables interface
            tout = tables.open_file(fileout, 'r+')
            tin = tables.open_file(filein, 'r')
            nrows = tin.root.OME.Measurements.nrows

            for a in range(0, nrows, ROW_CHUNK):
                b = min(nrows, a + ROW_CHUNK)
                print '\tRows %d:%d' % (a, b)
                rows = tin.root.OME.Measurements.read_coordinates(range(a, b))
                tout.root.OME.Measurements.append(rows)

            tin.close()
            tout.close()
        print '\tCumulative time: %d seconds' % (time() - start)

    print 'Done'
Exemplo n.º 16
0
    def _runTest(self):
        self.ns = NumericalSolution.NS_base(self.so,
                                            self.pList,
                                            self.nList,
                                            self.so.sList,
                                            opts)
        self.ns.calculateSolution('stokes')

        relpath = 'comparison_files/drivenCavityNSE_LSC_expected.h5'
        expected = tables.open_file(os.path.join(self._scriptdir,relpath))
        actual = tables.open_file('drivenCavityNSETrial.h5','r')

        assert numpy.allclose(expected.root.velocity_t7,
                              actual.root.velocity_t7,
                              atol=1e-2)
        expected.close()
        actual.close()
        relpath = 'comparison_files/drivenCavityNSE_LSC_expected.log'
        actual_log = TestTools.NumericResults.build_from_proteus_log('proteus.log')
        expected_log = TestTools.NumericResults.build_from_proteus_log(os.path.join(self._scriptdir,
                                                                                    relpath))
        plot_lst = [(3.7,0,3),(3.2,0,2),(2.7,0,2),(2.2,0,1),(1.7,0,1)]
        L1 = expected_log.get_ksp_resid_it_info(plot_lst)
        L2 = actual_log.get_ksp_resid_it_info(plot_lst)
        assert L1 == L2
Exemplo n.º 17
0
def test_estimator_pytables():
    m1 = MyEstimator(a=1, b='a', c=None, d=False, e=np.zeros(3)).fit(None)

    f = tables.open_file(fn, 'w')
    m1.to_pytables(f.root)
    f.close()

    g = tables.open_file(fn)
    m2 = MyEstimator.from_pytables(g.root.MyEstimator)

    print m1.__dict__
    print m2.__dict__

    for key, value in m1.get_params().iteritems():
        if any(isinstance(value, t) for t in [int, float, str]):
            assert value == getattr(m2, key, object())
        else:
            eq(value, getattr(m2, key, object()), err_msg='error on param key=%s' % key)

    for key in m1._get_estimate_names():
        value = getattr(m1, key)
        if any(isinstance(value, t) for t in [int, float, str]):
            assert value == getattr(m2, key, object())
        else:
            eq(value, getattr(m2, key, object()), err_msg='error on estimate key=%s' % key)

    g.close()
Exemplo n.º 18
0
 def test_3D_hex(self):
     # Set parameters for test 
     parameters_for_poisson.ct.nd = 3
     parameters_for_poisson.useHex = True
     # Reload _p and _n modules
     reload(poisson_p)
     reload(poisson_n)
     poisson_n.nnx=poisson_p.nn
     poisson_n.nny=poisson_p.nn
     poisson_n.nnz=poisson_p.nn
     # Update name 
     self.so.name = "3D_"+self.pList[0].name+"_hex_degree2"
     # NUMERICAL SOLUTION #
     ns = proteus.NumericalSolution.NS_base(self.so,
                                            self.pList,
                                            self.nList,
                                            self.sList,
                                            opts)
     self.sim_names.append(ns.modelList[0].name)
     ns.calculateSolution('poisson')
     # COMPARE VS SAVED FILES #
     expected_path = 'comparison_files/'+self.so.name+'.h5' 
     expected = tables.open_file(os.path.join(self._scriptdir,expected_path))
     actual = tables.open_file(self.so.name+'.h5','r')
     assert np.allclose(expected.root.u0_t1,
                        actual.root.u0_t1,
                        atol=1e-10)
Exemplo n.º 19
0
def store_and_sort_corsika_data(source, destination, overwrite=False,
                                progress=False):
    """First convert the data to HDF5 and create a sorted version"""

    if os.path.exists(destination):
        if not overwrite:
            if progress:
                raise Exception("Destination already exists, doing nothing")
            return
        else:
            os.remove(destination)

    corsika_data = CorsikaFile(source)

    temp_dir = os.path.dirname(destination)
    temp_path = create_tempfile_path(temp_dir)

    with tables.open_file(temp_path, 'a') as hdf_temp:
        store_corsika_data(corsika_data, hdf_temp, progress=progress)
    with tables.open_file(temp_path, 'a') as hdf_temp:
        create_index(hdf_temp, progress=progress)
    with tables.open_file(temp_path, 'r') as hdf_temp, \
            tables.open_file(destination, 'w') as hdf_data:
        copy_and_sort_node(hdf_temp, hdf_data, progress=progress)

    os.remove(temp_path)
Exemplo n.º 20
0
 def __init__(self, h5_filename_queue):
     """
         param h5_filename_queue: a queue of temporary hdf5 files
     """
     self.h5_filename_queue = h5_filename_queue
     tables.open_file(table_path, 'w').close() #creates a new file
     super(WriteHDF5Thread, self).__init__()
Exemplo n.º 21
0
def dropfields(input_path, output_path, todrop):
    input_file = tables.open_file(input_path, mode="r")
    input_root = input_file.root

    output_file = tables.open_file(output_path, mode="w")
    output_globals = output_file.create_group("/", "globals", "Globals")

    print(" * copying globals ...", end=' ')
    copy_table(input_root.globals.periodic, output_globals)
    print("done.")

    output_entities = output_file.create_group("/", "entities", "Entities")
    for table in input_file.iterNodes(input_root.entities):
        table_fields = get_fields(table)
        table_fields = [(fname, ftype) for fname, ftype in table_fields
                        if fname not in todrop]
        size = (len(table) * table.dtype.itemsize) / 1024.0 / 1024.0
        #noinspection PyProtectedMember
        print(" * copying table %s (%.2f Mb) ..." % (table._v_name, size),
              end=' ')
        copy_table(table, output_entities, table_fields)
        print("done.")

    input_file.close()
    output_file.close()
Exemplo n.º 22
0
    def __init__(self, parent, filename):

        if not isinstance(filename, string_types):
            raise ValueError(
                'Pytables requires filename parameter as string. Got {} instead.'
                .format(filename.__class__))

        self.parent = parent
        self.version = HDFPartition.VERSION

        self.n_rows = 0
        self.n_cols = 0

        self.cache = []

        if os.path.exists(filename):
            self._h5_file = open_file(filename, mode='a')
            self.meta = HDFReader._read_meta(self._h5_file)
            self.version, self.n_rows, self.n_cols = _get_file_header(
                self._h5_file.root.partition.file_header)
        else:
            # No, doesn't exist
            self._h5_file = open_file(filename, mode='w')
            self.meta = deepcopy(MPRowsFile.META_TEMPLATE)

        self.header_mangler = lambda name: re.sub('_+', '_', re.sub('[^\w_]', '_', name).lower()).rstrip('_')

        if self.n_rows == 0:
            self.meta['about']['create_time'] = time.time()
Exemplo n.º 23
0
 def __init__( self,
               fnContigLengths,
               fnWssd,
               overwrite,
               openMode,
               groupsToCheck=[],
               compression=False ):
     
     self.compress = compression
     
     assert os.path.exists(fnContigLengths)
     if openMode=='r':
         assert not overwrite
         assert os.path.exists(fnWssd), fnWssd
     
     debug_output('WssdBase: reading contig lengths from file %s'%fnContigLengths)        
     
     self.mContigNameLen = {}
     for l in open(fnContigLengths,'r'):
         l=l.replace('\n','').split('\t')
         self.mContigNameLen[l[0]]=int(l[1])
     
     debug_output('WSSD space: %d contigs totaling %d bp'%( len(self.mContigNameLen), sum(self.mContigNameLen.values()) ))
     
     if overwrite or not os.path.exists(fnWssd): 
         self.tbl = tables.open_file( fnWssd, 'w' )
     else:
         if openMode=='r':
             self.tbl = tables.open_file( fnWssd, 'r' )
         else:
             self.tbl = tables.open_file( fnWssd, 'a' )
Exemplo n.º 24
0
def create_synth(kind, prec):

    prefix_orig = "cellzome/cellzome-"
    iname = dirname + prefix_orig + "none-" + prec + ".h5"
    f = tb.open_file(iname, "r")

    if prec == "single":
        type_ = tb.Float32Atom()
    else:
        type_ = tb.Float64Atom()

    prefix = "synth/synth-"
    for clevel in range(10):
        oname = "%s/%s-%s%d-%s.h5" % (dirname, prefix, kind, clevel, prec)
        # print "creating...", iname
        f2 = tb.open_file(oname, "w")

        if kind in ["none", "numpy"]:
            filters = None
        else:
            filters = tb.Filters(complib=kind, complevel=clevel, shuffle=shuffle)

        for name in ["maxarea", "mascotscore"]:
            col = f.get_node("/", name)
            r = f2.create_carray("/", name, type_, col.shape, filters=filters)
            if name == "maxarea":
                r[:] = np.arange(col.nrows, dtype=type_.dtype)
            else:
                r[:] = np.arange(col.nrows, 0, dtype=type_.dtype)

        f2.close()
        if clevel == 0:
            size = 1.5 * float(os.stat(oname)[6])
    f.close()
    return size
Exemplo n.º 25
0
 def open(self, mode, ncols=1, nrows=1, xll=0, yll=0, cellsize=1, nodatavalue=-9999.0,
     dataset_name="dummy", group_prefix="row", table_prefix="col", index_format="04i", variables=[], units=[]):
     # Initialise
     fpath = os.path.join(self.folder, self.name);
     if (mode[0] == 'w'):
         # Open the file
         self.__datafile = tables.open_file(fpath, 'w');
         
         # Assign the data attributes 
         self.ncols = ncols;
         self.nrows = nrows;                    
         self.xll = xll;
         self.yll = yll;
         self.cellsize = cellsize;
         self.nodatavalue = nodatavalue;
         self.dataset_name = dataset_name;
         self.group_prefix = group_prefix;
         self.table_prefix = table_prefix;
         self.index_format = index_format;
         self.variables = variables;
         self.units = units;
         self.writeheader();
     else: 
         # If file does not exist, then ...
         if os.path.exists(fpath):
             # Retrieve the data attributes from the header file
             self.readheader();
             GridEnvelope2D.__init__(self, self.ncols, self.nrows, self.xll, self.yll, self.cellsize, self.cellsize);
             self.__datafile = tables.open_file(fpath, 'r');
             return True;
         else: return False;   
Exemplo n.º 26
0
    def __init__(self, cfg):
        self.cfg = cfg
        self.path = os.path.join(self.cfg.subsets_path, 'data.db')
        self.results = None
        if os.path.exists(self.path):
            try:
                self.h5 = tables.open_file(self.path, 'a')
                self.results = self.h5.root.results
            except:
                # If anything fails, we just create a new database...
                log.warning("""Failed to open existing database at %s, or
                database is corrupted. Creating a new one""", self.path)
                self.results = None

        # Something went wrong!
        if not self.results:
            try:
                # Try closing this, just in case
                self.h5.close()
            except:
                pass

            # Compression is good -- and faster, according to the pytables docs...
            f = tables.Filters(complib='blosc', complevel=5)
            self.h5 = tables.open_file(self.path, 'w', filters=f)
            self.results = self.h5.create_table(
                '/', 'results', cfg.data_layout.data_type)
            self.results.cols.subset_id.create_csindex()

        assert isinstance(self.results, tables.Table)
        assert self.results.indexed
Exemplo n.º 27
0
def h5_apply_func(input_path, output_path, node_func):
    """
    Apply node_func to all nodes of input_path and store the result in
    output_path

    Parameters
    ----------
    input_path : str
        path to .h5 input file
    output_path : str
        path to .h5 output file
    node_func : function
        function that will be applied to all nodes
        func(node, new_parent) -> new_node
        new_node must be node if node must be copied
                         None if node must not be copied
                         another Node if node must not be copied (was already
                                      handled/copied/modified by func)
    """
    with tables.open_file(input_path) as input_file, \
            tables.open_file(output_path, mode="w") as output_file:
        for node in input_file.walk_nodes(classname='Leaf'):
            if node is not input_file.root:
                print(node._v_pathname, "...", end=' ')
                parent_path = node._v_parent._v_pathname
                if parent_path in output_file:
                    new_parent = output_file.get_node(parent_path)
                else:
                    new_parent = output_file._create_path(parent_path)
                new_node = node_func(node, new_parent)
                if new_node is node:
                    print("copying (without modifications) ...", end=' ')
                    node._f_copy(new_parent)
                print("done.")
Exemplo n.º 28
0
 def process(self, rows_slice):
                 
     with Worker.hdf5_lock:
         with tables.open_file(self.hdf5_file, 'r+') as fileh:
             T = fileh.get_node(self.path + '/temporaries')
             tmp = T[rows_slice, ...]
         
     ind = np.arange(0, rows_slice.stop - rows_slice.start)    
     
     # tmp = - A_new    
     tmp -= self.rows_sum
     diag_A = tmp[ind, rows_slice.start + ind].copy()
     np.clip(tmp, 0, np.inf, tmp)
     tmp[ind, rows_slice.start + ind] = diag_A
     
     Worker.hdf5_lock.acquire()
     
     with tables.open_file(self.hdf5_file, 'r+') as fileh:
         A = fileh.get_node(self.path + '/availabilities')
         a = A[rows_slice, ...]
         
     Worker.hdf5_lock.release()
         
     # yet more damping
     a = a * self.damping - tmp * (1 - self.damping)
     
     with Worker.hdf5_lock:
         with tables.open_file(self.hdf5_file, 'r+') as fileh:
             A = fileh.get_node(self.path + '/availabilities')
             T = fileh.get_node(self.path + '/temporaries')
             
             A[rows_slice, ...] = a
             T[rows_slice, ...] = tmp
             
     del a, tmp
Exemplo n.º 29
0
def ptconcat(output_file, input_files, overwrite=False):
    """Concatenate HDF5 Files"""
    filt = tb.Filters(
        complevel=5, shuffle=True, fletcher32=True, complib='zlib'
    )
    out_tabs = {}
    dt_file = input_files[0]
    log.info("Reading data struct '%s'..." % dt_file)
    h5struc = tb.open_file(dt_file, 'r')
    log.info("Opening output file '%s'..." % output_file)
    if overwrite:
        outmode = 'w'
    else:
        outmode = 'a'
    h5out = tb.open_file(output_file, outmode)

    for node in h5struc.walk_nodes('/', classname='Table'):
        path = node._v_pathname
        log.debug(path)
        dtype = node.dtype
        p, n = os.path.split(path)
        out_tabs[path] = h5out.create_table(
            p, n, description=dtype, filters=filt, createparents=True
        )
    h5struc.close()
    for fname in input_files:
        log.info('Reading %s...' % fname)
        h5 = tb.open_file(fname)
        for path, out in out_tabs.items():
            tab = h5.get_node(path)
            out.append(tab[:])
        h5.close()
    h5out.close()
Exemplo n.º 30
0
    def __init__(self, output_dir, chrom_list):
        # combined allele-specific read counts
        as_count_filename = "%s/combined_as_count.h5" % output_dir
        self.as_count_h5 = tables.open_file(as_count_filename, "w")
        
        # combined mapped read counts
        read_count_filename = "%s/combined_read_count.h5" % output_dir
        self.read_count_h5 = tables.open_file(read_count_filename, "w")

        # counts of genotypes
        ref_count_filename = "%s/combined_ref_count.h5" % output_dir
        self.ref_count_h5 = tables.open_file(ref_count_filename, "w")
        
        alt_count_filename = "%s/combined_alt_count.h5" % output_dir
        self.alt_count_h5 = tables.open_file(alt_count_filename, "w")
        
        het_count_filename = "%s/combined_het_count.h5" % output_dir
        self.het_count_h5 = tables.open_file(het_count_filename, "w")
        
        self.filenames = [as_count_filename, read_count_filename,
                          ref_count_filename, alt_count_filename,
                          het_count_filename]

        self.h5_files = [self.as_count_h5, self.read_count_h5,
                         self.ref_count_h5, self.alt_count_h5, 
                         self.het_count_h5]

        # initialize all of these files
        atom = tables.UInt16Atom(dflt=0)
        
        for h5f in self.h5_files:
            for chrom in chrom_list:
                self.create_carray(h5f, chrom, atom)
Exemplo n.º 31
0
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#       GNU General Public License for more details.
#
#       You should have received a copy of the GNU General Public License
#       along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#       Author:  Vicent Mas - [email protected]

#
#       This script is based on a set of scripts by Francesc Alted.
"""Several simple EArrays."""

import tables
import numpy

fileh = tables.open_file('earray_samples.h5', mode='w')

root = fileh.root
a = tables.StringAtom(itemsize=8)
# Use ``a`` as the object type for the enlargeable array.
array_c = fileh.create_earray(root, 'array_c', a, (0, ), "Chars")
array_c.append(numpy.array(['a' * 2, 'b' * 4], dtype='S8'))
array_c.append(numpy.array(['a' * 6, 'b' * 8, 'c' * 10], dtype='S8'))

# Create an string atom
a = tables.StringAtom(itemsize=1)
# Use it as a type for the enlargeable array
hdfarray = fileh.create_earray(root, 'array_char', a, (0, ), "Character array")
hdfarray.append(numpy.array(['a', 'b', 'c']))
# The next is legal:
hdfarray.append(numpy.array(['c', 'b', 'c', 'd']))
Exemplo n.º 32
0
from snn.optimizer.snnsgd import SNNSGD
import tables
from neurodata.load_data import create_dataloader
from snn.utils.utils_snn import get_acc_and_loss
from snn.utils.utils_snn import get_acc_layered

sample_length = 2000000  # length of samples during training in ms
dt = 25000  # us
polarity = False
T = int(sample_length / dt)  # number of timesteps in a sample
input_size = (1 + polarity) * 26 * 26

dataset_path = r"C:\Users\K1804053\OneDrive - King's College London\PycharmProjects\datasets\mnist-dvs\mnist_dvs_events_new.hdf5"
ds = 1

dataset = tables.open_file(dataset_path)
x_max = dataset.root.stats.train_data[1] // ds
dataset.close()

n_outputs = 2
n_hidden = 16
n_neurons_per_layer = [64]

network = LayeredSNN(input_size, n_neurons_per_layer, n_outputs,  synaptic_filter=filters.raised_cosine_pillow_08, n_basis_feedforward=[8],
                     n_basis_feedback=[1], tau_ff=[10], tau_fb=[10], mu=[0.5], device='cpu')

topology = torch.zeros([n_hidden + n_outputs, n_hidden + input_size + n_outputs])
topology[:n_hidden, :input_size] = 1
topology[n_hidden:, input_size:-n_outputs] = 1

network2 = BinarySNN(**make_network_parameters(network_type='snn',
Exemplo n.º 33
0
 def setUp(self):
     self.data_path = self.create_tempfile_from_testdata()
     self.data = tables.open_file(self.data_path, 'a')
Exemplo n.º 34
0
    def __init__(self,parent,subgui=True):
         # get the current path

        curpath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
        constpath = os.path.join(os.path.split(curpath)[0],'SimISR','const')
        # set the root
        self.parent = parent
        self.subgui = subgui
        # set up frames for list
        self.frame1 = Tkinter.Frame(self.parent)
        self.frame1.grid(row=0,column=0)
        self.frame2 = Tkinter.Frame(self.parent)
        self.frame2.grid(row=0,column=1)

        self.output = []
        self.beamhandle = None
        if subgui:
            self.sizecanv = [500,500]
            self.beamcodeent= Tkinter.Entry(self.frame1)
            self.beamcodeent.grid(row=1,column=1)
            self.beamcodeentlabel = Tkinter.Label(self.frame1,text="Enter Beamcodes")
            self.beamcodeentlabel.grid(row=1,column=0,sticky='e')
            self.beambuttex = Tkinter.Button(self.frame1, text="Read", command=self.readbcobar)
            self.beambuttex.grid(row=1,column=2,sticky='w')
            self.beambutt = Tkinter.Button(self.frame1, text="Import", command=self.beambuttonClick)
            self.beambutt.grid(row=2,column=2,sticky='w')
            canvrow = 3
        else:
            self.sizecanv = [1000,1000]
            self.leb = Tkinter.Label(self.frame1, text="Beam Selector",font=("Helvetica", 16))
            self.leb.grid(row=0, sticky=Tkinter.W+Tkinter.E+Tkinter.N+Tkinter.S,columnspan=2)
            self.butt = Tkinter.Button(self.frame1, text="Finished", command=self.buttonClick)
            self.butt.grid(row=1,column=1,sticky='w')
            self.beamcodeent= Tkinter.Entry(self.frame1)
            self.beamcodeent.grid(row=2,column=1,sticky='w')
            self.beamcodeentlabel = Tkinter.Label(self.frame1,text="Enter Beamcodes")
            self.beamcodeentlabel.grid(row=2,column = 0,sticky='e')
            self.beambuttex = Tkinter.Button(self.frame1, text="Read", command=self.readbcobar)
            self.beambuttex.grid(row=2,column=2,sticky='w')
            self.beambutt = Tkinter.Button(self.frame1, text="Import", command=self.beambuttonClick)
            self.beambutt.grid(row=3,column=2,sticky='w')

            canvrow = 4
        self.off_x = self.sizecanv[0]/2
        self.off_y = self.sizecanv[1]/2
        self.div = 75.0*self.sizecanv[0]/1000.0
        self.lat = [80,70,60,50,40,30]
        self.angles = np.arange(0,180,30)

        self.var = Tkinter.StringVar()
        self.var.set("PFISR")
        self.choices = {"PFISR":get_files('PFISR_PARAMS.h5'),
                        "RISR-N":get_files('RISR_PARAMS.h5'),
                        "Sondrestrom":get_files('Sondrestrom_PARAMS.h5'),
                        "Millstone":get_files('Millstone_PARAMS.h5')}#, "RISR-S":'file3'}
        self.option = Tkinter.OptionMenu(self.frame1, self.var, *self.choices)
        self.option.grid(row=1,column=0,sticky='w')
        hfile=tables.open_file(self.choices[self.var.get()])
        self.lines = hfile.root.Params.Kmat.read()
        hfile.close()
        self.readfile = Tkinter.StringVar()

        # set up the canvas
        self.canv = Tkinter.Canvas(self.frame1 , width=self.sizecanv[0], height=self.sizecanv[1],background='white')
        self.canv.grid(row=canvrow,column=0,columnspan=2)

        self.Drawlines()
        self.Drawbeams()

        self.canv.bind('<ButtonPress-1>', self.onCanvasClick)
        self.canv.bind('<ButtonPress-2>', self.onCanvasRightClick)
        self.var.trace('w', self.Changefile)
        self.canv.update()

        # beam list
        self.bidlabel = Tkinter.Label(self.frame2,text="Beam ID")
        self.bidlabel.grid(row=0,column=0)
        self.azlabel = Tkinter.Label(self.frame2,text="Azimuth")
        self.azlabel.grid(row=0,column=1)
        self.ellabel = Tkinter.Label(self.frame2,text="Elevation")
        self.ellabel.grid(row=0,column=2)

        self.scroll = Tkinter.Scrollbar(self.frame2)
        self.scroll.grid(row=1,column=3)

        self.beamtext = Tkinter.Text(self.frame2,yscrollcommand=self.scroll.set)
        self.beamtext.config(width=50,state=Tkinter.DISABLED)
        self.beamtext.grid(row = 1,column = 0,columnspan=3)
        self.beamlines = []
        self.scroll.config(command=self.beamtext.yview)

        # bounding box
        self.boxbutton= Tkinter.Button(self.frame2, text="Angle Box", command=self.boxbuttonClick)
        self.boxbutton.grid(row=2,column=0,sticky='w')

        self.azminmaxlabel = Tkinter.Label(self.frame2,text="Az min and max")
        self.azminmaxlabel.grid(row=3,column=0,sticky='e')
        self.azmin= Tkinter.Entry(self.frame2)
        self.azmin.grid(row=3,column=1,sticky='w')
        self.azmax= Tkinter.Entry(self.frame2)
        self.azmax.grid(row=3,column=2,sticky='w')

        self.elminmaxlabel = Tkinter.Label(self.frame2,text="El min and max")
        self.elminmaxlabel.grid(row=4,column=0,sticky='e')
        self.elmin= Tkinter.Entry(self.frame2)
        self.elmin.grid(row=4,column=1,sticky='w')
        self.elmax= Tkinter.Entry(self.frame2)
        self.elmax.grid(row=4,column=2,sticky='w')

        # Az choice
        self.azbutton=Tkinter.Button(self.frame2, text="Az Choice", command=self.azbuttonClick)
        self.azbutton.grid(row=5,column=0,sticky='w')
        self.azchoice= Tkinter.Entry(self.frame2)
        self.azchoice.grid(row=5,column=1,sticky='w')

        # Az choice
        self.elbutton=Tkinter.Button(self.frame2, text="El Choice", command=self.elbuttonClick)
        self.elbutton.grid(row=6,column=0,sticky='w')
        self.elchoice= Tkinter.Entry(self.frame2)
        self.elchoice.grid(row=6,column=1,sticky='w')

        self.azsortbutton=Tkinter.Button(self.frame2, text="Az sort", command=self.azsortbuttonClick)
        self.azsortbutton.grid(row=7,column=0,sticky='w')
        self.elsortbutton=Tkinter.Button(self.frame2, text="El Sort", command=self.elsortbuttonClick)
        self.elsortbutton.grid(row=7,column=1,sticky='w')
Exemplo n.º 35
0
def h5_read(file):
    h5file = tables.open_file(file, driver="H5FD_CORE")
    array = h5file.root.somename.read()
    #h5file.close()
    return array, h5file
Exemplo n.º 36
0
def test_source_to_sink():
    """Tests simulations with one facility that has a conversion factor.

    The trivial cycle simulation involves only one KFacility which provides
    what it requests itself. The conversion factors for requests and bids
    are kept the same so that the facility provides exactly what it requests.
    The amount of the transactions follow a power law.

    Amount = InitialAmount * ConversionFactor ^ Time

    This equation is used to test each transaction amount.
    """
    if not cyclus_has_coin():
        raise SkipTest("Cyclus does not have COIN")

    # A reference simulation input for the trivial cycle simulation.
    ref_input = os.path.join(INPUT, "trivial_cycle.xml")
    # Conversion factors for the three simulations
    k_factors = [0.95, 1, 2]

    for k_factor in k_factors:
        clean_outs()

        sim_input = create_sim_input(ref_input, k_factor, k_factor)

        holdsrtn = [1]  # needed because nose does not send() to test generator
        outfile = which_outfile()
        cmd = ["cyclus", "-o", outfile, "--input-file", sim_input]
        yield check_cmd, cmd, '.', holdsrtn
        rtn = holdsrtn[0]
        if rtn != 0:
            return  # don't execute further commands

        # tables of interest
        paths = ["/AgentEntry", "/Resources", "/Transactions", "/Info"]
        # Check if these tables exist
        yield assert_true, tables_exist(outfile, paths)
        if not tables_exist(outfile, paths):
            outfile.close()
            clean_outs()
            return  # don't execute further commands

        # Get specific tables and columns
        if outfile == h5out:
            output = tables.open_file(h5out, mode="r")
            agent_entry = output.get_node("/AgentEntry")[:]
            info = output.get_node("/Info")[:]
            resources = output.get_node("/Resources")[:]
            transactions = output.get_node("/Transactions")[:]
            output.close()
        else:
            conn = sqlite3.connect(sqliteout)
            conn.row_factory = sqlite3.Row
            cur = conn.cursor()
            exc = cur.execute
            agent_entry = exc('SELECT * FROM AgentEntry').fetchall()
            info = exc('SELECT * FROM Info').fetchall()
            resources = exc('SELECT * FROM Resources').fetchall()
            transactions = exc('SELECT * FROM Transactions').fetchall()
            conn.close()

        # Find agent ids
        agent_ids = to_ary(agent_entry, "AgentId")
        spec = to_ary(agent_entry, "Spec")

        facility_id = find_ids(":agents:KFacility", spec, agent_ids)
        # Test for only one KFacility
        yield assert_equal, len(facility_id), 1

        sender_ids = to_ary(transactions, "SenderId")
        receiver_ids = to_ary(transactions, "ReceiverId")
        expected_sender_array = np.empty(sender_ids.size)
        expected_sender_array.fill(facility_id[0])
        expected_receiver_array = np.empty(receiver_ids.size)
        expected_receiver_array.fill(facility_id[0])
        yield assert_array_equal, sender_ids, expected_sender_array
        yield assert_array_equal, receiver_ids, expected_receiver_array

        # Transaction ids must be equal range from 1 to the number of rows
        expected_trans_ids = np.arange(0, sender_ids.size, 1)
        yield assert_array_equal, \
            to_ary(transactions, "TransactionId"), \
                   expected_trans_ids

        # Track transacted resources
        resource_ids = to_ary(resources, "ResourceId")
        quantities = to_ary(resources, "Quantity")

        # Almost equal cases due to floating point k_factors
        i = 0
        initial_capacity = quantities[0]
        for q in quantities:
            yield assert_almost_equal, q, initial_capacity * k_factor**i
            i += 1

        clean_outs()
        os.remove(sim_input)
Exemplo n.º 37
0
def create_trx(path, n_clusters=6, weights='', window=1):
    try:
        trx_ = sio.loadmat(path + "/trx.mat")
        trx = trx_['trx']

        data_types = (
            'full_path', 'id', 'numero_larva', 'numero_larva_num', 'protocol',
            'pipeline', 'stimuli', 'neuron', 't', 'x_spine', 'y_spine',
            'x_contour', 'y_contour', 'x_center', 'y_center', 'straight_proba',
            'bend_proba', 'curl_proba', 'ball_proba',
            'straight_and_light_bend_proba', 'global_state', 'x_neck_down',
            'y_neck_down', 'x_neck_top', 'y_neck_top', 'x_neck', 'y_neck',
            'x_head', 'y_head', 'x_tail', 'y_tail', 'S', 'prod_scal_1',
            'prod_scal_2', 'S_smooth_5', 'S_deriv_smooth_5',
            'angle_upper_lower_smooth_5', 'angle_upper_lower_deriv_smooth_5',
            'angle_downer_upper_smooth_5', 'angle_downer_upper_deriv_smooth_5',
            'eig_smooth_5', 'eig_deriv_smooth_5',
            'head_velocity_norm_smooth_5', 'tail_velocity_norm_smooth_5',
            'motion_velocity_norm_smooth_5', 'motion_to_u_tail_head_smooth_5',
            'motion_to_v_tail_head_smooth_5', 'd_eff_tail_norm_smooth_5',
            'd_eff_tail_norm_deriv_smooth_5', 'd_eff_head_norm_smooth_5',
            'd_eff_head_norm_deriv_smooth_5', 'larva_length_smooth_5',
            'larva_length_deriv_smooth_5', 'proba_global_state', 'run', 'cast',
            'stop', 'hunch', 'back', 'roll', 'small_motion', 'start_stop',
            't_start_stop', 'n_duration', 'nb_action', 'As_smooth_5',
            'global_state_large_state', 'global_state_small_large_state',
            'start_stop_large', 't_start_stop_large', 'duration_large',
            'n_duration_large', 'nb_action_large', 'start_stop_large_small',
            't_start_stop_large_small', 'duration_large_small',
            'n_duration_large_small', 'nb_action_large_small', 'run_large',
            'cast_large', 'stop_large', 'hunch_large', 'back_large',
            'roll_large', 'run_weak', 'cast_weak', 'stop_weak', 'hunch_weak',
            'back_weak', 'roll_weak', 'run_strong', 'cast_strong',
            'stop_strong', 'hunch_strong', 'back_strong', 'roll_strong',
            'global_state_clustering', 'start_stop_clustering',
            't_start_stop_clustering', 'duration_clustering',
            'n_duration_clustering', 'nb_action_clustering') + tuple(
                'clustering_' + str(i) for i in range(n_clusters))

        x = load_transform(path, window=window)

        trx_new = []

        ae, decoder = autoencoder(
            dims=[x[0].shape[-1] - 2, 500, 500, 2000, 10])
        n_stacks = len([x[0].shape[-1] - 2, 500, 500, 2000, 10]) - 1

        hidden = ae.get_layer(name='encoder_%d' % (n_stacks - 1)).output
        clustering_layer = ClusteringLayer(n_clusters,
                                           name='clustering')(hidden)

        modele = Model(inputs=ae.input, outputs=[clustering_layer, ae.output])

        modele.load_weights(weights)

        for j, larva in enumerate(x):
            t = larva[:, -1]

            X = larva[:, :-2]

            res = modele.predict(X)
            predictions = res[0].argmax(axis=1)

            if predictions[0] != predictions[1]:
                predictions[0] = predictions[1]

            if predictions[-1] != predictions[-2]:
                predictions[-1] = predictions[-2]

            for i in range(1, len(predictions) - 1):
                if (predictions[i] != predictions[i + 1]) & (
                        predictions[i] != predictions[i - 1]):
                    predictions[i] = predictions[i - 1]

            predictions = pd.DataFrame(predictions)

            global_state_clustering = np.array(
                [i + 1 for i in predictions.values])
            start_stop_clustering = [[] for i in range(n_clusters)]
            t_start_stop_clustering = [[] for i in range(n_clusters)]
            duration_clustering = [[] for i in range(n_clusters)]
            n_duration_clustering = [[] for i in range(n_clusters)]
            nb_action_clustering = [[] for i in range(n_clusters)]
            states = []

            for i in range(n_clusters):
                indices = list(predictions[predictions[0] == i].index)

                if len(indices) > 0:
                    indices_change = [
                        indices[i] + 1 for i in range(len(indices) - 1)
                        if (indices[i] + 1 != indices[i + 1]) or
                        (indices[i - 1] + 1 != indices[i])
                    ] + [indices[-1] + 1]
                    times_change = [t[i - 1] for i in indices_change]
                    indices_change = [[
                        indices_change[i], indices_change[i + 1]
                    ] for i in range(len(indices_change)) if (i % 2 == 0)]
                    times_change = [[times_change[i], times_change[i + 1]]
                                    for i in range(len(times_change))
                                    if (i % 2 == 0)]
                    len_behavior = [[sublist[1] - sublist[0]]
                                    for sublist in times_change]
                    n_duration = [[sublist[1] - sublist[0]]
                                  for sublist in indices_change]
                    nb_action = len(indices_change)

                    start_stop_clustering[i] = indices_change
                    t_start_stop_clustering[i] = times_change
                    duration_clustering[i] = len_behavior
                    n_duration_clustering[i] = n_duration
                    nb_action_clustering[i] = nb_action

                states.append([[1] if predictions.values[j] == i else [-1]
                               for j in range(len(predictions))])

            start_stop_clustering = np.array(
                [[i for i in start_stop_clustering]])
            t_start_stop_clustering = np.array(
                [[i for i in t_start_stop_clustering]])
            duration_clustering = np.array(
                [[i if (len(i) > 0) else [[0]] for i in duration_clustering]])
            n_duration_clustering = np.array(
                [i if (len(i) > 0) else [[0]] for i in n_duration_clustering])
            nb_action_clustering = np.array([[
                np.array([i]).astype('O') if (i) else np.array([0]).astype('O')
                for i in nb_action_clustering
            ]])

            tmp = [
                global_state_clustering, start_stop_clustering,
                t_start_stop_clustering, duration_clustering,
                n_duration_clustering, nb_action_clustering
            ]
            tmp += states
            tmp = tuple(tmp)

            trx_new.append(
                np.reshape(
                    np.array(tuple(trx[j][0]) + tmp,
                             dtype=[(n, d)
                                    for (n, d) in zip(data_types, ["O"] *
                                                      len(data_types))]), [
                                                          1,
                                                      ]))

        trx_new = np.array(trx_new)
        trx_['trx'] = trx_new
        sio.savemat(path + '/trx_new_%d_clusters' % n_clusters,
                    trx_,
                    long_field_names=True)
    except NotImplementedError:
        trx_ = tables.open_file(path + "trx.mat")

        data_titles_old = (
            'full_path', 'id', 'numero_larva', 'numero_larva_num', 'protocol',
            'pipeline', 'stimuli', 'neuron', 't', 'x_spine', 'y_spine',
            'x_contour', 'y_contour', 'x_center', 'y_center', 'straight_proba',
            'bend_proba', 'curl_proba', 'ball_proba',
            'straight_and_light_bend_proba', 'global_state', 'x_neck_down',
            'y_neck_down', 'x_neck_top', 'y_neck_top', 'x_neck', 'y_neck',
            'x_head', 'y_head', 'x_tail', 'y_tail', 'S', 'prod_scal_1',
            'prod_scal_2', 'S_smooth_5', 'S_deriv_smooth_5',
            'angle_upper_lower_smooth_5', 'angle_upper_lower_deriv_smooth_5',
            'angle_downer_upper_smooth_5', 'angle_downer_upper_deriv_smooth_5',
            'eig_smooth_5', 'eig_deriv_smooth_5',
            'head_velocity_norm_smooth_5', 'tail_velocity_norm_smooth_5',
            'motion_velocity_norm_smooth_5', 'motion_to_u_tail_head_smooth_5',
            'motion_to_v_tail_head_smooth_5', 'd_eff_tail_norm_smooth_5',
            'd_eff_tail_norm_deriv_smooth_5', 'd_eff_head_norm_smooth_5',
            'd_eff_head_norm_deriv_smooth_5', 'larva_length_smooth_5',
            'larva_length_deriv_smooth_5', 'proba_global_state', 'run', 'cast',
            'stop', 'hunch', 'back', 'roll', 'small_motion', 'start_stop',
            't_start_stop', 'n_duration', 'nb_action', 'As_smooth_5',
            'global_state_large_state', 'global_state_small_large_state',
            'start_stop_large', 't_start_stop_large', 'duration_large',
            'n_duration_large', 'nb_action_large', 'start_stop_large_small',
            't_start_stop_large_small', 'duration_large_small',
            'n_duration_large_small', 'nb_action_large_small', 'run_large',
            'cast_large', 'stop_large', 'hunch_large', 'back_large',
            'roll_large', 'run_weak', 'cast_weak', 'stop_weak', 'hunch_weak',
            'back_weak', 'roll_weak', 'run_strong', 'cast_strong',
            'stop_strong', 'hunch_strong', 'back_strong', 'roll_strong')
        data_titles = (
            'full_path', 'id', 'numero_larva', 'numero_larva_num', 'protocol',
            'pipeline', 'stimuli', 'neuron', 't', 'x_spine', 'y_spine',
            'x_contour', 'y_contour', 'x_center', 'y_center', 'straight_proba',
            'bend_proba', 'curl_proba', 'ball_proba',
            'straight_and_light_bend_proba', 'global_state', 'x_neck_down',
            'y_neck_down', 'x_neck_top', 'y_neck_top', 'x_neck', 'y_neck',
            'x_head', 'y_head', 'x_tail', 'y_tail', 'S', 'prod_scal_1',
            'prod_scal_2', 'S_smooth_5', 'S_deriv_smooth_5',
            'angle_upper_lower_smooth_5', 'angle_upper_lower_deriv_smooth_5',
            'angle_downer_upper_smooth_5', 'angle_downer_upper_deriv_smooth_5',
            'eig_smooth_5', 'eig_deriv_smooth_5',
            'head_velocity_norm_smooth_5', 'tail_velocity_norm_smooth_5',
            'motion_velocity_norm_smooth_5', 'motion_to_u_tail_head_smooth_5',
            'motion_to_v_tail_head_smooth_5', 'd_eff_tail_norm_smooth_5',
            'd_eff_tail_norm_deriv_smooth_5', 'd_eff_head_norm_smooth_5',
            'd_eff_head_norm_deriv_smooth_5', 'larva_length_smooth_5',
            'larva_length_deriv_smooth_5', 'proba_global_state', 'run', 'cast',
            'stop', 'hunch', 'back', 'roll', 'small_motion', 'start_stop',
            't_start_stop', 'n_duration', 'nb_action', 'As_smooth_5',
            'global_state_large_state', 'global_state_small_large_state',
            'start_stop_large', 't_start_stop_large', 'duration_large',
            'n_duration_large', 'nb_action_large', 'start_stop_large_small',
            't_start_stop_large_small', 'duration_large_small',
            'n_duration_large_small', 'nb_action_large_small', 'run_large',
            'cast_large', 'stop_large', 'hunch_large', 'back_large',
            'roll_large', 'run_weak', 'cast_weak', 'stop_weak', 'hunch_weak',
            'back_weak', 'roll_weak', 'run_strong', 'cast_strong',
            'stop_strong', 'hunch_strong', 'back_strong', 'roll_strong',
            'global_state_clustering', 'start_stop_clustering',
            't_start_stop_clustering', 'duration_clustering',
            'n_duration_clustering', 'nb_action_clustering') + tuple(
                'clustering_' + str(i) for i in range(n_clusters))

        x = load_transform(path, window=window)
        trx_new = []

        ae, decoder = autoencoder(
            dims=[x[0].shape[-1] - 2, 500, 500, 2000, 10])
        n_stacks = len([x[0].shape[-1] - 2, 500, 500, 2000, 10]) - 1

        hidden = ae.get_layer(name='encoder_%d' % (n_stacks - 1)).output
        clustering_layer = ClusteringLayer(n_clusters,
                                           name='clustering')(hidden)

        modele = Model(inputs=ae.input, outputs=[clustering_layer, ae.output])

        modele.load_weights(weights)

        for j, larva in enumerate(x):
            t = larva[:, -1]

            X = larva[:, :-2]

            res = modele.predict(X)
            predictions = res[0].argmax(axis=1)

            if predictions[0] != predictions[1]:
                predictions[0] = predictions[1]

            if predictions[-1] != predictions[-2]:
                predictions[-1] = predictions[-2]

            for i in range(1, len(predictions) - 1):
                if (predictions[i] != predictions[i + 1]) & (
                        predictions[i] != predictions[i - 1]):
                    predictions[i] = predictions[i - 1]

            predictions = pd.DataFrame(predictions)

            global_state_clustering = np.array(
                [i + 1 for i in predictions.values])
            start_stop_clustering = [[] for i in range(n_clusters)]
            t_start_stop_clustering = [[] for i in range(n_clusters)]
            duration_clustering = [[] for i in range(n_clusters)]
            n_duration_clustering = [[] for i in range(n_clusters)]
            nb_action_clustering = [[] for i in range(n_clusters)]
            states = []

            for i in range(n_clusters):
                indices = list(predictions[predictions[0] == i].index)

                if len(indices) > 0:
                    indices_change = [
                        indices[i] + 1 for i in range(len(indices) - 1)
                        if (indices[i] + 1 != indices[i + 1]) or
                        (indices[i - 1] + 1 != indices[i])
                    ] + [indices[-1] + 1]
                    times_change = [t[i - 1] for i in indices_change]
                    indices_change = [[
                        indices_change[i], indices_change[i + 1]
                    ] for i in range(len(indices_change)) if (i % 2 == 0)]
                    times_change = [[times_change[i], times_change[i + 1]]
                                    for i in range(len(times_change))
                                    if (i % 2 == 0)]
                    len_behavior = [[sublist[1] - sublist[0]]
                                    for sublist in times_change]
                    n_duration = [[sublist[1] - sublist[0]]
                                  for sublist in indices_change]
                    nb_action = len(indices_change)

                    start_stop_clustering[i] = indices_change
                    t_start_stop_clustering[i] = times_change
                    duration_clustering[i] = len_behavior
                    n_duration_clustering[i] = n_duration
                    nb_action_clustering[i] = nb_action

                states.append([[1] if predictions.values[j] == i else [-1]
                               for j in range(len(predictions))])

            start_stop_clustering = np.array(
                [[i for i in start_stop_clustering]])
            t_start_stop_clustering = np.array(
                [[i for i in t_start_stop_clustering]])
            duration_clustering = np.array(
                [[i if (len(i) > 0) else [[0]] for i in duration_clustering]])
            n_duration_clustering = np.array(
                [i if (len(i) > 0) else [[0]] for i in n_duration_clustering])
            nb_action_clustering = np.array([[
                np.array([i]).astype('O') if (i) else np.array([0]).astype('O')
                for i in nb_action_clustering
            ]])

            tmp = [
                global_state_clustering, start_stop_clustering,
                t_start_stop_clustering, duration_clustering,
                n_duration_clustering, nb_action_clustering
            ]
            tmp += states
            tmp = tuple(tmp)

            trx_new.append(
                np.reshape(
                    np.array(tuple(trx_.root.trx[k][0][j][0].T
                                   for k in data_titles_old) + tmp,
                             dtype=[(n, d)
                                    for (n, d) in zip(data_titles, ["O"] *
                                                      len(data_titles))]), [
                                                          1,
                                                      ]))

        trx_new = np.array(trx_new, ndmin=2)
        trx_new = {'__header__': '', '__version__': '', 'trx': trx_new}
        sio.savemat(path + '/trx_new_%d_clusters' % n_clusters,
                    trx_new,
                    long_field_names=True)
        trx_.close()
Exemplo n.º 38
0
def test_pmap_writer(config_tmpdir, s1_dataframe_converted,
                     s2_dataframe_converted, s2si_dataframe_converted):

    # setup temporary file
    filename = 'test_pmaps_auto.h5'
    PMP_file_name = os.path.join(config_tmpdir, filename)

    # Get test data
    s1_dict, _ = s1_dataframe_converted
    s2_dict, _ = s2_dataframe_converted
    s2si_dict, _ = s2si_dataframe_converted

    #P = PMaps(s1_dict, s2_dict, s2si_dict)

    event_numbers = sorted(set(s1_dict).union(set(s2si_dict)))
    timestamps = {e: int(time.time() % 1 * 10**9) for e in event_numbers}

    run_number = 632

    # Write pmaps to disk.
    with tb.open_file(PMP_file_name, 'w') as h5out:
        write_pmap = pmap_writer(h5out)
        write_run_and_event = run_and_event_writer(h5out)
        for event_no in event_numbers:
            timestamp = timestamps[event_no]
            s1 = s1_dict[event_no]
            s2 = s2_dict[event_no]
            s2si = s2si_dict[event_no]

            write_pmap(event_no, s1, s2, s2si)
            write_run_and_event(run_number, event_no, timestamp)

    # Read back the data we have just written
    S1D, S2D, S2SiD = load_pmaps(PMP_file_name)
    rundf, evtdf = read_run_and_event_from_pmaps_file(PMP_file_name)

    # Convert them into our transient format
    # S1D   = df_to_s1_dict (s1df)
    # S2D   = df_to_s2_dict (s2df)
    # S2SiD = df_to_s2si_dict(s2df, s2sidf)

    ######################################################################
    # Compare original data to those read back

    for event_no, s1 in s1_dict.items():
        s1 = s1_dict[event_no]
        S1 = S1D[event_no]

        for peak_no in s1.peak_collection():
            PEAK = S1.peak_waveform(peak_no)
            peak = s1.peak_waveform(peak_no)
            np.testing.assert_allclose(peak.t, PEAK.t)
            np.testing.assert_allclose(peak.E, PEAK.E)

    for event_no, s2 in s2_dict.items():
        s2 = s2_dict[event_no]
        S2 = S2D[event_no]

        for peak_no in s2.peak_collection():
            PEAK = S2.peak_waveform(peak_no)
            peak = s2.peak_waveform(peak_no)
            np.testing.assert_allclose(peak.t, PEAK.t)
            np.testing.assert_allclose(peak.E, PEAK.E)

    for event_no, si in s2si_dict.items():
        si = s2si_dict[event_no]
        Si = S2SiD[event_no]

        for peak_no in si.peak_collection():
            PEAK = Si.peak_waveform(peak_no)
            peak = si.peak_waveform(peak_no)
            np.testing.assert_allclose(peak.t, PEAK.t)
            np.testing.assert_allclose(peak.E, PEAK.E)

            for sipm_no in si.sipms_in_peak(peak_no):
                sipm_wfm = si.sipm_waveform(peak_no, sipm_no)
                SIPM_wfm = Si.sipm_waveform(peak_no, sipm_no)
                np.testing.assert_allclose(sipm_wfm.t, SIPM_wfm.t)

    # Event numbers
    np.testing.assert_equal(evtdf.evt_number.values,
                            np.array(event_numbers, dtype=np.int32))

    # Run numbers
    np.testing.assert_equal(
        rundf.run_number.values,
        np.full(len(event_numbers), run_number, dtype=np.int32))
Exemplo n.º 39
0
@author: seb18121
"""

import numpy as np
# import matplotlib.pyplot as plt
import tables, sys, gc

# python /mnt/d/My_python_script/feedback_aperp_1D.py XXX_aperp_NO feedbackfactor detuning
basename = sys.argv[1]  # retreive the base name
# filename1 = 'CEP_aperp_41'
filename = basename + ".h5"
outfilename = "entrance.h5"

print("Reading the field file ...\n")

h5 = tables.open_file(filename, 'r')

wavelength = h5.root.runInfo._v_attrs.lambda_r
nx = h5.root.runInfo._v_attrs.nX
ny = h5.root.runInfo._v_attrs.nY
nz = h5.root.runInfo._v_attrs.nZ2
Lc = h5.root.runInfo._v_attrs.Lc
Lg = h5.root.runInfo._v_attrs.Lg
meshsizeX = h5.root.runInfo._v_attrs.sLengthOfElmX
meshsizeY = h5.root.runInfo._v_attrs.sLengthOfElmY
meshsizeZ2 = h5.root.runInfo._v_attrs.sLengthOfElmZ2
meshsizeXSI = meshsizeX * np.sqrt(Lc * Lg)
meshsizeYSI = meshsizeY * np.sqrt(Lc * Lg)
meshsizeZSI = meshsizeZ2 * Lc

fieldin = h5.root.aperp.read()
Exemplo n.º 40
0
                #print("Adding cut: %d" % (storeIdx))
                self.add(constraint=cplex.SparsePair(thevars,thecoefs), sense = "G", rhs = (b_Lext[0,minIdx]))


full = int(sys.argv[1])
feuer = int(sys.argv[2])
xnL = int(sys.argv[3])
xnU = int(sys.argv[4])
tnL = int(sys.argv[5])
tnU = int(sys.argv[6])
stepX = int(sys.argv[7])
stepT = int(sys.argv[8])
for timeVar in range(tnL,tnU+1,stepT):
    for countVar in range(xnL,xnU+1,stepX):
        if feuer:
            matlabData = tables.open_file('data/feuerData%d_%d_%d.mat' % (countVar,timeVar,2))
        else:
            matlabData = tables.open_file('data/contaData%d_%d_%d.mat' % (countVar,timeVar,5))
        A2=matlabData.root.A2.data[...]
        Amipred=scipy.sparse.csc_matrix((matlabData.root.A2.data[...],matlabData.root.A2.ir[...], matlabData.root.A2.jc[...]))
        Amipred=scipy.sparse.lil_matrix(Amipred)
        b_Lred=matlabData.root.b_L2[0]
        b_Ured=matlabData.root.b_U2[0]
        c=matlabData.root.c[0]
        xn=int(matlabData.root.xn[0,0])
        tn=int(matlabData.root.tn[0,0])
        Amipext=scipy.sparse.csc_matrix((matlabData.root.Aext.data[...],matlabData.root.Aext.ir[...], matlabData.root.Aext.jc[...]))
        Amipext=scipy.sparse.lil_matrix(Amipext)
        b_Uext=matlabData.root.b_Uext
        b_Lext=matlabData.root.b_Lext
        intVarN=int(matlabData.root.intVarN[0])
Exemplo n.º 41
0
    def load(self):
        """
        Loads a matrix stored in h5 format
        :param matrix_filename:
        :return: matrix, cut_intervals, nan_bins, distance_counts, correction_factors
        """
        log.debug('Load in h5 format')

        with tables.open_file(self.matrixFileName, 'r') as f:
            parts = {}
            try:
                for matrix_part in ('data', 'indices', 'indptr', 'shape'):
                    parts[matrix_part] = getattr(f.root.matrix,
                                                 matrix_part).read()
            except Exception as e:
                log.info(
                    'No h5 file. Please check parameters concerning the file type!'
                )
                e
            matrix = csr_matrix(tuple(
                [parts['data'], parts['indices'], parts['indptr']]),
                                shape=parts['shape'])
            # matrix = hiCMatrix.fillLowerTriangle(matrix)
            # get intervals
            intvals = {}
            for interval_part in ('chr_list', 'start_list', 'end_list',
                                  'extra_list'):
                if toString(interval_part) == toString('chr_list'):
                    chrom_list = getattr(f.root.intervals,
                                         interval_part).read()
                    intvals[interval_part] = toString(chrom_list)
                else:
                    intvals[interval_part] = getattr(f.root.intervals,
                                                     interval_part).read()

            cut_intervals = list(
                zip(intvals['chr_list'], intvals['start_list'],
                    intvals['end_list'], intvals['extra_list']))
            assert len(cut_intervals) == matrix.shape[0], \
                "Error loading matrix. Length of bin intervals ({}) is different than the " \
                "size of the matrix ({})".format(len(cut_intervals), matrix.shape[0])

            # get nan_bins
            try:
                if hasattr(f.root, 'nan_bins'):
                    nan_bins = f.root.nan_bins.read()
                else:
                    nan_bins = np.array([])
            except Exception:
                nan_bins = np.array([])

            # get correction factors
            try:
                if hasattr(f.root, 'correction_factors'):
                    correction_factors = f.root.correction_factors.read()
                    assert len(correction_factors) == matrix.shape[0], \
                        "Error loading matrix. Length of correction factors does not" \
                        "match size of matrix"
                    correction_factors = np.array(correction_factors)
                    mask = np.isnan(correction_factors)
                    correction_factors[mask] = 0
                    mask = np.isinf(correction_factors)
                    correction_factors[mask] = 0
                else:
                    correction_factors = None
            except Exception:
                correction_factors = None

            try:
                # get correction factors
                if hasattr(f.root, 'distance_counts'):
                    distance_counts = f.root.correction_factors.read()
                else:
                    distance_counts = None
            except Exception:
                distance_counts = None
            return matrix, cut_intervals, nan_bins, distance_counts, correction_factors
Exemplo n.º 42
0
def main():
    dummy_parser = argparse.ArgumentParser(description='train_mm.py')
    opts.model_opts(dummy_parser)
    dummy_opt = dummy_parser.parse_known_args([])[0]

    os.makedirs(os.path.dirname(opt.output), exist_ok=True)

    device = torch.device("cpu")
    # opt.cuda = opt.gpu > -1
    opt.cuda = False
    if opt.gpuid:
        device = torch.device("cuda:{}".format(opt.gpuid[0]))
        cuda.set_device(device)
        opt.cuda = True
        # torch.cuda.set_device(opt.gpu)

    # loading checkpoint just to find multimodal model type
    checkpoint = torch.load(opt.model,
                            map_location=lambda storage, loc: storage)
    opt.multimodal_model_type = checkpoint['opt'].multimodal_model_type
    del checkpoint

    if opt.batch_size > 1:
        print(
            "Batch size > 1 not implemented! Falling back to batch_size = 1 ..."
        )
        opt.batch_size = 1

    # load test image features
    test_file = tables.open_file(opt.path_to_test_img_feats, mode='r')
    if opt.multimodal_model_type in ['imgd', 'imge', 'imgw']:
        test_img_feats = test_file.root.global_feats[:]
    elif opt.multimodal_model_type in ['src+img']:
        test_img_feats = test_file.root.local_feats[:]
    else:
        raise Exception("Model type not implemented: %s" %
                        opt.multimodal_model_type)
    test_file.close()

    # Load the model.
    fields, model, model_opt = \
        onmt.ModelConstructor.load_test_model(opt, dummy_opt.__dict__)
    #opt.multimodal_model_type = checkpoint['opt'].multimodal_model_type

    # File to write sentences to.
    out_file = codecs.open(opt.output, 'w', 'utf-8')

    # Test data
    data = onmt.io.build_dataset(fields,
                                 opt.data_type,
                                 opt.src,
                                 opt.tgt,
                                 src_dir=opt.src_dir,
                                 sample_rate=opt.sample_rate,
                                 window_size=opt.window_size,
                                 window_stride=opt.window_stride,
                                 window=opt.window,
                                 use_filter_pred=False)

    # Sort batch by decreasing lengths of sentence required by pytorch.
    # sort=False means "Use dataset's sortkey instead of iterator's".
    data_iter = onmt.io.OrderedIterator(dataset=data,
                                        device=device,
                                        batch_size=opt.batch_size,
                                        train=False,
                                        sort=False,
                                        sort_within_batch=True,
                                        shuffle=False)

    # Translator
    scorer = onmt.translate.GNMTGlobalScorer(opt.alpha, opt.beta)
    translator = onmt.translate.TranslatorMultimodal(
        model,
        fields,
        beam_size=opt.beam_size,
        n_best=opt.n_best,
        global_scorer=scorer,
        max_length=opt.max_length,
        copy_attn=model_opt.copy_attn,
        cuda=opt.cuda,
        beam_trace=opt.dump_beam != "",
        min_length=opt.min_length,
        test_img_feats=test_img_feats,
        multimodal_model_type=opt.multimodal_model_type)
    builder = onmt.translate.TranslationBuilder(data, translator.fields,
                                                opt.n_best, opt.replace_unk,
                                                opt.tgt)

    # Statistics
    counter = count(1)
    pred_score_total, pred_words_total = 0, 0
    gold_score_total, gold_words_total = 0, 0

    for sent_idx, batch in enumerate(data_iter):
        batch_data = translator.translate_batch(batch, data, sent_idx)
        translations = builder.from_batch(batch_data)

        for trans in translations:
            pred_score_total += trans.pred_scores[0]
            pred_words_total += len(trans.pred_sents[0])
            if opt.tgt:
                gold_score_total += trans.gold_score
                gold_words_total += len(trans.gold_sent)

            n_best_preds = [
                " ".join(pred) for pred in trans.pred_sents[:opt.n_best]
            ]
            out_file.write('\n'.join(n_best_preds))
            out_file.write('\n')
            out_file.flush()

            if opt.verbose:
                sent_number = next(counter)
                output = trans.log(sent_number)
                os.write(1, output.encode('utf-8'))

    _report_score('PRED', pred_score_total, pred_words_total)
    if opt.tgt:
        _report_score('GOLD', gold_score_total, gold_words_total)
        if opt.report_bleu:
            _report_bleu()
        if opt.report_rouge:
            _report_rouge()

    if opt.dump_beam:
        import json
        json.dump(translator.beam_accum,
                  codecs.open(opt.dump_beam, 'w', 'utf-8'))
Exemplo n.º 43
0
def get_metadata():
    """
    Reads metadata content (i. e. model parametrizations and objectives) of specified .h5 file.
    GET parameters:
        - datasetName with "dataset".
        - drKernelName with "drk".
    :return:
    """

    app.config["DATASET_NAME"] = InputDataset.check_dataset_name(
        request.args.get('datasetName'))
    app.config[
        "DR_KERNEL_NAME"] = DimensionalityReductionKernel.check_kernel_name(
            request.args.get('drKernelName'))
    app.config["CACHE_ROOT"] = "/tmp/" + app.config[
        "DATASET_NAME"] + "_" + app.config["DR_KERNEL_NAME"]

    # Update root storage path with new dataset name.
    app.config["STORAGE_PATH"] = app.config["ROOT_STORAGE_PATH"] + app.config[
        "DATASET_NAME"] + "/"

    app.config["SURROGATE_MODELS_PATH"] = app.config["STORAGE_PATH"] + app.config["DR_KERNEL_NAME"] + \
                                          "_surrogatemodels.pkl"
    app.config["EXPLAINER_VALUES_PATH"] = app.config["STORAGE_PATH"] + app.config["DR_KERNEL_NAME"] + \
                                          "_explainervalues.pkl"
    dataset_name_class_links = {
        "movie": MovieDataset,
        "happiness": HappinessDataset
    }
    app.config["DATASET_CLASS"] = dataset_name_class_links[
        app.config["DATASET_NAME"]]

    # Compile metadata template.
    update_and_get_metadata_template()

    # Build file name.
    file_name: str = (app.config["STORAGE_PATH"] + "embedding_" +
                      app.config["DR_KERNEL_NAME"] + ".h5")
    app.config["FULL_FILE_NAME"] = file_name

    # Open .h5 file, if dataset name and DR kernel name are valid and file exists.
    if app.config["DATASET_NAME"] is not None and \
            app.config["DR_KERNEL_NAME"] is not None and \
            os.path.isfile(file_name):
        ###################################################
        # Load dataset.
        ###################################################

        h5file = tables.open_file(filename=file_name, mode="r")
        # Cast to dataframe, then return as JSON.
        df = pandas.DataFrame(h5file.root.metadata[:]).set_index("id")
        # Close file.
        h5file.close()

        ###################################################
        # Preprocess and cache dataset.
        ###################################################

        # Prepare dataframe for ratings.
        app.config["RATINGS"] = df.copy(deep=True)
        app.config["RATINGS"]["rating"] = 0

        # Assemble embedding-level metadata.
        app.config["EMBEDDING_METADATA"]["original"] = df
        app.config["EMBEDDING_METADATA"]["features_preprocessed"], \
        app.config["EMBEDDING_METADATA"]["labels"], \
        app.config["EMBEDDING_METADATA"]["features_categorical_encoding_translation"] = \
            Utils.preprocess_embedding_metadata_for_predictor(
                metadata_template=app.config["METADATA_TEMPLATE"], embeddings_metadata=df
            )

        ###################################################
        # Load global surrogate models and local
        # explainer values.
        ###################################################

        # Compute regressor for each objective.
        with open(app.config["SURROGATE_MODELS_PATH"], "rb") as file:
            app.config["GLOBAL_SURROGATE_MODELS"] = pickle.load(file)

        # Load explainer values.
        # Replace specific metric references with an arbitrary "metric" for parsing in frontend.
        app.config["EXPLAINER_VALUES"] = pd.read_pickle(
            app.config["EXPLAINER_VALUES_PATH"])

        # Return JSON-formatted embedding data.
        df["rating"] = app.config["RATINGS"]["rating"]

        # todo (remove, generate data cleanly) Hack: Rename target_domain_performance and n_components here, dismiss
        #  b_nx.
        return jsonify(
            df.rename(
                columns={
                    "target_domain_performance": "rdp",
                    "separability_metric": "separability"
                }).drop(["b_nx"], axis=1).to_json(orient='index'))

    else:
        return "File/kernel does not exist.", 400
Exemplo n.º 44
0
    def save(self, filename, pSymmetric=True, pApplyCorrection=None):
        """
        Saves a matrix using hdf5 format
        :param filename:
        :return: None
        """
        log.debug('Save in h5 format')

        # self.restoreMaskedBins()
        if not filename.endswith(".h5"):
            filename += ".h5"

        # if the file name already exists
        # try to find a new suitable name
        if os.path.isfile(filename):
            log.warning("*WARNING* File already exists {}\n "
                        "Overwriting ...\n".format(filename))

            unlink(filename)
        if self.nan_bins is None:
            self.nan_bins = np.array([])
        elif not isinstance(self.nan_bins, np.ndarray):
            self.nan_bins = np.array(self.nan_bins)

        # save only the upper triangle of the
        if pSymmetric:
            # symmetric matrix
            matrix = triu(self.matrix, k=0, format='csr')
        else:
            matrix = self.matrix
        matrix.eliminate_zeros()

        filters = tables.Filters(complevel=5, complib='blosc')
        with tables.open_file(filename, mode="w",
                              title="HiCExplorer matrix") as h5file:
            matrix_group = h5file.create_group(
                "/",
                "matrix",
            )
            # save the parts of the csr matrix
            for matrix_part in ('data', 'indices', 'indptr', 'shape'):
                arr = np.array(getattr(matrix, matrix_part))
                atom = tables.Atom.from_dtype(arr.dtype)
                ds = h5file.create_carray(matrix_group,
                                          matrix_part,
                                          atom,
                                          shape=arr.shape,
                                          filters=filters)
                ds[:] = arr

            # save the matrix intervals
            intervals_group = h5file.create_group(
                "/",
                "intervals",
            )
            chr_list, start_list, end_list, extra_list = zip(
                *self.cut_intervals)
            for interval_part in ('chr_list', 'start_list', 'end_list',
                                  'extra_list'):
                arr = np.array(eval(interval_part))
                atom = tables.Atom.from_dtype(arr.dtype)
                ds = h5file.create_carray(intervals_group,
                                          interval_part,
                                          atom,
                                          shape=arr.shape,
                                          filters=filters)
                ds[:] = arr

            # save nan bins
            if len(self.nan_bins):
                atom = tables.Atom.from_dtype(self.nan_bins.dtype)
                ds = h5file.create_carray(h5file.root,
                                          'nan_bins',
                                          atom,
                                          shape=self.nan_bins.shape,
                                          filters=filters)
                ds[:] = self.nan_bins

            # save corrections factors
            if self.correction_factors is not None and len(
                    self.correction_factors):
                self.correction_factors = np.array(self.correction_factors)
                mask = np.isnan(self.correction_factors)
                self.correction_factors[mask] = 0
                atom = tables.Atom.from_dtype(self.correction_factors.dtype)
                ds = h5file.create_carray(h5file.root,
                                          'correction_factors',
                                          atom,
                                          shape=self.correction_factors.shape,
                                          filters=filters)
                ds[:] = np.array(self.correction_factors)

            # save distance counts
            if self.distance_counts is not None and len(self.distance_counts):
                atom = tables.Atom.from_dtype(self.distance_counts.dtype)
                ds = h5file.create_carray(h5file.root,
                                          'distance_counts',
                                          atom,
                                          shape=self.distance_counts.shape,
                                          filters=filters)
                ds[:] = np.array(self.distance_counts)
Exemplo n.º 45
0
def read_data(fname,args,p):
    data = dict()
    c51_data = h5.open_file(fname)
    # check how many Nbs are in file
    if args.Nbs == None:
        Nbs = c51_data.get_node('/gA/'+p['ensembles'][0]+'/bs').read().shape[0]
    else:
        Nbs = args.Nbs
    p['Nbs'] = Nbs
    print('using Nbs = %d samples' %Nbs)

    ga_bs    = np.zeros([Nbs,p['l_d']])
    ga_b0    = np.zeros([p['l_d']])
    epi_bs   = np.zeros_like(ga_bs)
    epi_b0   = np.zeros_like(ga_b0)
    mL_b0    = np.zeros([p['l_d']])
    mL_bs    = np.zeros([Nbs,p['l_d']])
    aw0_b0   = np.zeros([p['l_d']])
    aw0_bs   = np.zeros([Nbs,p['l_d']])
    aSaw0_b0 = np.zeros([p['l_d']])
    aSaw0_bs = np.zeros([Nbs,p['l_d']])
    eju_bs   = np.zeros_like(ga_bs)
    eju_b0   = np.zeros_like(ga_b0)
    epqsq_bs = np.zeros_like(ga_bs)
    epqsq_b0 = np.zeros_like(ga_b0)
    for i,ens in enumerate(p['ensembles']):
        ga_bs[:,i]    = c51_data.get_node('/gA/'+ens+'/bs').read()[0:Nbs]
        ga_b0[i]      = float(c51_data.get_node('/gA/'+ens+'/b0').read())
        epi_bs[:,i]   = c51_data.get_node('/epi/'+ens+'/bs').read()[0:Nbs]
        epi_b0[i]     = float(c51_data.get_node('/epi/'+ens+'/b0').read())
        mL_bs[:,i]    = c51_data.get_node('/mpiL/'+ens+'/bs').read()[0:Nbs]
        mL_b0[i]      = float(c51_data.get_node('/mpiL/'+ens+'/b0').read())
        aw0_bs[:,i]   = c51_data.get_node('/aw0/'+ens+'/bs').read()[0:Nbs]
        aw0_b0[i]     = float(c51_data.get_node('/aw0/'+ens+'/b0').read())
        # we have to multiply a by sqrt(alpha_S) as a is squared in the extrapolation functions
        # to swap sqrt(alpha_S) a in for a 
        aSaw0_bs[:,i] = aw0_bs[:,i] * np.sqrt(p['afs'][ens])
        aSaw0_b0[i]   = aw0_b0[i] * np.sqrt(p['afs'][ens])
        eju_bs[:,i]   = c51_data.get_node('/eju/'+ens+'/bs').read()[0:Nbs]
        eju_b0[i]     = float(c51_data.get_node('/eju/'+ens+'/b0').read())
        epqsq_bs[:,i] = c51_data.get_node('/epqsq/'+ens+'/bs').read()[0:Nbs]
        epqsq_b0[i]   = float(c51_data.get_node('/epqsq/'+ens+'/b0').read())
        print('%s gA = %.4f +- %.4f, epi = %.5f +- %.5f, mpiL = %.4f +- %.4f' \
            %(ens,ga_b0[i],ga_bs.std(axis=0)[i],epi_b0[i],epi_bs.std(axis=0)[i],
            mL_b0[i],mL_bs.std(axis=0)[i]))
    data['ga_bs']    = ga_bs
    data['ga_b0']    = ga_b0
    data['epi_bs']   = epi_bs
    data['epi_b0']   = epi_b0
    data['mL_bs']    = mL_bs
    data['mL_b0']    = mL_b0
    data['aw0_bs']   = aw0_bs
    data['aw0_b0']   = aw0_b0
    data['eju_b0']   = eju_b0
    data['eju_bs']   = eju_bs
    data['epqsq_b0'] = epqsq_b0
    data['epqsq_bs'] = epqsq_bs
    data['aSaw0_bs'] = aSaw0_bs
    data['aSaw0_b0'] = aSaw0_b0
    c51_data.close()
    return data
__author__ = 'zelalem'

import fastcluster
import numpy
import scipy.spatial.distance as dist
import csv
import tables

hdf5_path = "/home/zelalem/Downloads/popular_year.hdf5"
a = tables.StringAtom(itemsize=24)
hdf5_file = tables.open_file(hdf5_path, mode='w')
data_storage = hdf5_file.create_earray(hdf5_file.root, 'e_array',a, (0,))

f = open('/home/zelalem/Desktop/PS_year_month.csv','rb')
reader = list(csv.reader(f))
dataMatrix = []

for row in reader[1:]:

    dataMatrix.append(row[1:])

dataMatrix = numpy.array(dataMatrix)
# distanceMatrix = dist.pdist(dataMatrix,'euclidean') # Metric: 'euclidean', 'seuclidean', 'cosine', 'hamming', 'correlation'
data_storage.append(dist.pdist(dataMatrix,'euclidean')) # Metric: 'euclidean', 'seuclidean', 'cosine', 'hamming', 'correlation'
linkage = fastcluster.linkage(data_storage, method='median') # Method 'single','complete','average','weighted','ward','centroid','median'
num_of_cluster = 8
clust_dict = {i: [i] for i in xrange(len(linkage)+1)}


for i in xrange(len(linkage)-num_of_cluster+1):
    clust1= int(linkage[i][0])
        #print(y)

        #print('After processing, sample:', X[0])
    #    print('After processing, labels:', y_batch[0])
    #print('y SHAPE AFTER', np.array(y_batch, dtype=object).shape)

# ======================================================================================================================
# Write pre-processed TRAIN data to csv file
# ======================================================================================================================

# Set the compression level
    filters = tables.Filters(complib='blosc', complevel=5)

    # Save X batches into file
    f = tables.open_file(x_filename + '.hdf', 'a')
    ds = f.create_carray(
        '/',
        'x_data' + str(i),
        obj=X_batch,
        filters=filters,
    )
    ds[:] = X_batch
    #print(ds)
    f.close()

    if not x_filename == 'data\\preprocessed_data\\x_TEST_SENTENC_data_preprocessed':  # do NOT write for TEST DATA
        # Save y batches into file
        f = tables.open_file(y_filename + '.hdf', 'a')
        ds = f.create_carray('/',
                             'y_data' + str(i),
Exemplo n.º 48
0
            print('BAD FILE: ' + str(filelist[nums]))

        print("Done!")
        print(featuresarray.shape)

    return group, truth


#pr = cProfile.Profile()
#pr.enable()
group, truth = test_my_little_function()

#pr.disable()
#s = StringIO.StringIO()
#sortby = 'cumulative'
#ps = pstats.Stats(pr,stream=s).sort_stats(sortby)
#ps.print_stats()
#print(s.getvalue())

print(group.shape)
print(truth.shape)
h5file = 1
if h5file == 1:
    import tables
    f = tables.open_file(outputname,
                         'w',
                         filters=tables.Filters(complib='zlib', complevel=6))
    f.create_carray('/', 'features', obj=group)
    f.create_carray('/', 'truth', obj=truth)
    f.close()
Exemplo n.º 49
0
def sub_wrapper(day_datetimes, j):
    print day_datetimes[j]
    date = day_datetimes[j]
    try:
        date_1 = date + datetime.timedelta(minutes=13)
        global_data, time_slot = pinkdust.load_channels(date,
                                                        date_1)
        data_087 = global_data[8.7].data
        data_108 = global_data[10.8].data
        data_120 = global_data[12.0].data
        data = np.zeros(
            (data_087.shape[0], data_087.shape[1], 3))

        data[:, :, 0] = data_087
        data[:, :, 1] = data_108
        data[:, :, 2] = data_120

    except:
        print 'Adding date to list of missing dates'
        with open('/soge-home/projects/seviri_dust'
                  '/raw_seviri_data/bt_native/missing_msgnative_dates'
                  '.txt', 'a') as my_file:
            my_file.write('\n')
            my_file.write(date.strftime('%Y%m%d%H%M%S'))
        data = np.zeros(
            (msg_area.shape[0], msg_area.shape[1], 3))
        data[:] = np.nan

    if (data.shape[0] != msg_area.shape[0]) or (
                data.shape[1] !=
                msg_area.shape[1]):
        print 'Native file data has wrong dimensions - using the second half of the array'
        data_copy = deepcopy(data)
        data = np.zeros(
            (msg_area.shape[0], msg_area.shape[1], 3))
        data[:] = data_copy[msg_area.shape[0]:, :]

    msg_con_nn = image.ImageContainerNearest(data, msg_area,
                                             radius_of_influence=50000)
    area_con_nn = msg_con_nn.resample(target_area)
    result_data_nn = area_con_nn.image_data

    bt_data = np.zeros((3, lons.shape[0], lons.shape[1]))
    bt_data[0] = result_data_nn[:, :, 0]
    bt_data[1] = result_data_nn[:, :, 1]
    bt_data[2] = result_data_nn[:, :, 2]

    cloudmask = None

    # Now, instead of writing to day array, you write to hdf
    f = tables.open_file('/soge-home/projects/seviri_dust/raw_seviri_data'
                         '/intermediary_files/BT_087_' + date.strftime(
        '%Y%m%d%H%M%S.hdf'),
                         'w')
    atom = tables.Atom.from_dtype(bt_data[0].dtype)
    filters = tables.Filters(complib='blosc', complevel=5)
    ds = f.create_carray(f.root, 'data', atom,
                         bt_data[0].shape,
                         filters=filters)
    ds[:] = bt_data[0]
    f.close()

    f = tables.open_file('/soge-home/projects/seviri_dust/raw_seviri_data'
                         '/intermediary_files/BT_108_' + date.strftime(
        '%Y%m%d%H%M%S.hdf'), 'w')
    atom = tables.Atom.from_dtype(bt_data[1].dtype)
    filters = tables.Filters(complib='blosc', complevel=5)
    ds = f.create_carray(f.root, 'data', atom,
                         bt_data[0].shape,
                         filters=filters)
    ds[:] = bt_data[1]
    f.close()

    f = tables.open_file('/soge-home/projects/seviri_dust/raw_seviri_data'
                         '/intermediary_files/BT_120_' + date.strftime(
        '%Y%m%d%H%M%S.hdf'), 'w')
    atom = tables.Atom.from_dtype(bt_data[2].dtype)
    filters = tables.Filters(complib='blosc', complevel=5)
    ds = f.create_carray(f.root, 'data', atom,
                         bt_data[0].shape,
                         filters=filters)
    ds[:] = bt_data[2]
    f.close()

    print 'Wrote', day_datetimes[j]
Exemplo n.º 50
0
 def __enter__(self):
     self.file = open_file(self.out_fp, 'w')
     self.group = self.file.create_group("/", 'experiments', '')
     self.table = self.file.create_table(self.group, self.table_name,
                                         self.row_class, "")
     return self
Exemplo n.º 51
0
 def __enter__(self):
     """ Enter context """
     if self.hdf is None:
         self.hdf = tables.open_file(self.source)
     return self
Exemplo n.º 52
0
def auto_merge_h5files(file_list,
                       output_filename='merged.h5',
                       nodes_keys=None,
                       merge_arrays=False,
                       filters=HDF5_ZSTD_FILTERS):
    """
    Automatic merge of HDF5 files.
    A list of nodes keys can be provided to merge only these nodes. If None, all nodes are merged.

    Parameters
    ----------
    file_list: list of path
    output_filename: path
    nodes_keys: list of path
    """

    if nodes_keys is None:
        keys = set(get_dataset_keys(file_list[0]))
    else:
        keys = set(nodes_keys)

    bar = tqdm(total=len(file_list))
    with open_file(output_filename, 'w', filters=filters) as merge_file:
        with open_file(file_list[0]) as f1:
            for k in keys:
                if type(f1.root[k]) == tables.table.Table:
                    merge_file.create_table(os.path.join(
                        '/',
                        k.rsplit('/', maxsplit=1)[0]),
                                            os.path.basename(k),
                                            createparents=True,
                                            obj=f1.root[k].read())
                if type(f1.root[k]) == tables.array.Array:
                    if merge_arrays:
                        merge_file.create_earray(os.path.join(
                            '/',
                            k.rsplit('/', maxsplit=1)[0]),
                                                 os.path.basename(k),
                                                 createparents=True,
                                                 obj=f1.root[k].read())
                    else:
                        merge_file.create_array(os.path.join(
                            '/',
                            k.rsplit('/', maxsplit=1)[0]),
                                                os.path.basename(k),
                                                createparents=True,
                                                obj=f1.root[k].read())
        bar.update(1)
        for filename in file_list[1:]:
            common_keys = keys.intersection(get_dataset_keys(filename))
            with open_file(filename) as file:
                for k in common_keys:
                    try:
                        if merge_arrays:
                            merge_file.root[k].append(file.root[k].read())
                        else:
                            if type(file.root[k]) == tables.table.Table:
                                merge_file.root[k].append(file.root[k].read())
                    except:
                        print("Can't append node {} from file {}".format(
                            k, filename))
            bar.update(1)
Exemplo n.º 53
0
    def run_case(self, stop_at_ignition=False, restart=False):
        """Run simulation case set up ``setup_case``.

        :param bool stop_at_ignition: If ``True``, stop integration at ignition point.
        :param bool restart: If ``True``, skip if results file exists.
        """

        if restart and os.path.isfile(self.meta['save-file']):
            print('Skipped existing case ', self.meta['id'])
            return

        # Save simulation results in hdf5 table format.
        table_def = {
            'time':
            tables.Float64Col(pos=0),
            'temperature':
            tables.Float64Col(pos=1),
            'pressure':
            tables.Float64Col(pos=2),
            'mass_fractions':
            tables.Float64Col(shape=(self.reac.thermo.n_species), pos=3),
        }

        with tables.open_file(self.save_file, mode='w',
                              title=str(self.idx)) as h5file:

            table = h5file.create_table(where=h5file.root,
                                        name='simulation',
                                        description=table_def)
            # Row instance to save timestep information to
            timestep = table.row
            # Save initial conditions
            timestep['time'] = self.reac_net.time
            timestep['temperature'] = self.reac.T
            timestep['pressure'] = self.reac.thermo.P
            timestep['mass_fractions'] = self.reac.Y
            # Add ``timestep`` to table
            timestep.append()

            ignition_flag = False

            # Main time integration loop; continue integration while time of
            # the ``ReactorNet`` is less than specified end time.
            while self.reac_net.time < self.time_end:
                self.reac_net.step()

                # Save new timestep information
                timestep['time'] = self.reac_net.time
                timestep['temperature'] = self.reac.T
                timestep['pressure'] = self.reac.thermo.P
                timestep['mass_fractions'] = self.reac.Y

                if self.reac.T > self.properties[
                        'temperature'] + 400.0 and not ignition_flag:
                    self.ignition_delay = self.reac_net.time
                    ignition_flag = True

                    if stop_at_ignition:
                        continue

                # Add ``timestep`` to table
                timestep.append()

            # Write ``table`` to disk
            table.flush()

        return self.ignition_delay
Exemplo n.º 54
0
def wrapper(y):

    Year_lower = years_list[y]
    Year_upper = years_list[y]
    Month_lower = 9
    Month_upper = 9
    Day_lower = 1
    Day_upper = 8
    Hour_lower = 0
    Hour_upper = 23
    Minute_lower = 0
    Minute_upper = 45

    # Generate datetime objects corresponding to these time bounds
    time_params = np.array(
        [Year_lower, Year_upper, Month_lower, Month_upper,
         Day_lower, Day_upper, Hour_lower, Hour_upper,
         Minute_lower, Minute_upper])

    datetimes = utilities.get_datetime_objects(time_params)

    years = np.unique(np.asarray(([j.year for j in datetimes])))

    months = np.unique(np.asarray(([j.month for j in datetimes])))

    days = np.unique(np.asarray(([j.day for j in datetimes])))
    for m in np.arange(0, len(years)):
        for k in np.arange(0, len(months)):

            for i in np.arange(0, len(days)):
                day_datetimes_bool = np.asarray(
                    [j.day == days[i] and j.month ==
                     months[k] and j.year ==
                     years[m] for j in datetimes])
                day_datetimes = datetimes[day_datetimes_bool]

                if len(day_datetimes) == 0:
                    continue

                ncfile = pinkdust.create_time_nc_file(
                    '/soge-home/projects/seviri_dust/raw_seviri_data/bt_nc/'
                    + day_datetimes[0].strftime(
                        "%B%Y") + '/BT_' + day_datetimes[
                        0].strftime(
                        '%Y%m%d') + '.nc', day_datetimes, lats, lons)

                day_array[:] = 0

                pool = multiprocessing.Pool()
                for j in np.arange(0, len(day_datetimes)):
                    pool.apply_async(sub_wrapper, args=(day_datetimes, j,))
                pool.close()
                pool.join()

                # Now read back in all the intermediaries and write to nc

                for j in np.arange(0, len(day_datetimes)):

                    f = tables.open_file(
                        '/soge-home/projects/seviri_dust/raw_seviri_data'
                        '/intermediary_files/BT_087_' + day_datetimes[
                            j].strftime(
                            '%Y%m%d%H%M%S.hdf'))
                    arrobj = f.get_node('/data')
                    bt_087 = arrobj.read()
                    f.close()

                    f = tables.open_file(
                        '/soge-home/projects/seviri_dust/raw_seviri_data'
                        '/intermediary_files/BT_108_' + day_datetimes[
                            j].strftime(
                            '%Y%m%d%H%M%S.hdf'))
                    arrobj = f.get_node('/data')
                    bt_108 = arrobj.read()
                    f.close()

                    f = tables.open_file(
                        '/soge-home/projects/seviri_dust/raw_seviri_data'
                        '/intermediary_files/BT_120_' + day_datetimes[
                            j].strftime(
                            '%Y%m%d%H%M%S.hdf'))
                    arrobj = f.get_node('/data')
                    bt_120 = arrobj.read()
                    f.close()

                    ncfile = pinkdust.save_to_existing_nc(ncfile,
                                                          bt_087,
                                                bt_108,
                                            bt_120,
                                            None, day_datetimes[j])

                print 'Day ' + str(days[i]) + ' done'
                ncfile.close()
            print 'Month ' + str(months[k]) + ' done'
Exemplo n.º 55
0
# FILE INFO ETC.
s = sys.stdin.read()
if (s.split(" ") == ""):
    raise ValueError("Please provide at least one directory.")
else:
    DIR_TOREAD = s.split(" ")

if CF.VERBOSE:
    print("Directories to be read:")
    print(DIR_TOREAD)

##### Initialize the file. #####

# Open file connection, writing new file to disk.
myh5 = tables.open_file(CF.FILE_NAME, mode="w", title=CF.FILE_TITLE)

# Create the EArray for actual values.
a = tables.UInt8Atom()
myh5.create_earray(myh5.root,
                   name="riskLevels",
                   atom=a,
                   shape=(0, CF.NUMLAT, CF.NUMLON),
                   title=CF.DATA_TITLE)

# Create the EArray for grid information, and populate it.
a = tables.Float64Atom()
myh5.create_earray(myh5.root,
                   name="gridInfo",
                   atom=a,
                   shape=(0, len(CF.INFO_LIST)),
Exemplo n.º 56
0
	elif args.flux == "GaisserHillas" : flux = GaisserHillas()
	elif args.flux == "Hoerandel" : flux = Hoerandel()
	elif args.flux == "Hoerandel5" : flux = Hoerandel5()
	elif args.flux == "Hoerandel_IT" : flux = Hoerandel_IT()

	eventcount = 0
	#generator = weighting.from_simprod(21269,False,'vm-simprod2.icecube.wisc.edu')
	#generator = weighting.icetop_mc_weights(21269,'/home/tmcelroy/icecube/domeff/datasetConfig.json')
	nfiles = len(file_list)

	rand = ROOT.TRandom3()

	for filename in file_list :
		h5file = 0
		try :
			h5file = open_file(filename, mode="r")
		except : continue
		
		domtable = h5file.root.doms
		eventtable = h5file.root.events
		runtable = h5file.root.runinfo

		domindex = 0
		eventindex = -1

		for event in eventtable.iterrows() :


			eventcount += 1
			weight = 1.0
			if args.flux != "data" :
Exemplo n.º 57
0
import tables
import pandas as pd
import numpy as np

h5_file = tables.open_file("../../../../../restricted_images_national.h5")
table_train = h5_file.get_node("/{}".format("train"))  #len=58263
table_validate = h5_file.get_node("/{}".format("validate"))  #len=7744
table_test = h5_file.get_node("/{}".format("test"))  #len= 13509

#table_train.colnames #Table[0][0:11] ['img0','img1','img_id','inc0','inc1','lat','lng','pop0','pop1','pop_centile','sol0','sol1']
#print(len(table_train[0]), len(table_train[0][0]), len(table_train[0][0][0]), len(table_train[0][0][0][0]))
#(12, 94, 94, 7)
#table.colnames #Table[0][0:11]
lats = []  #latitudes of points
lngs = []  #longitudes of points
pop0 = []  #Population of 2000 of points
pop1 = []  #Population of 2010 of points
popdiff = []  #Pop1 - Pop0
inc0 = []  #Income of 2000
inc1 = []  #Income of 2010 of points
incdiff = []  #inc1 - inc0 of points
for table in [table_train, table_validate, table_test]:
    for row in table:
        #row[3]=inc0, row[4]=inc1, row[5]=lat, row[6]=lng, row[7]=pop0, row[8]=pop1
        inc0.append(row[3])
        inc1.append(row[4])
        incdiff.append(row[4] - row[3])
        lats.append(row[5])
        lngs.append(row[6])
        pop0.append(row[7])
        pop1.append(row[8])
Exemplo n.º 58
0
def main():
    #reads from hdf5 file (must already exist)
    h5file = tables.open_file("t", mode="a", title="Mag Data for AUTUMNX")
    stationlist = [
        'SALU', 'AKUL', 'PUVR', 'INUK', 'KJPK', 'RADI', 'VLDR', 'STFL', 'SEPT',
        'SCHF'
    ]
    table = {}
    #get yesterday's date
    date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
    strd = date.strftime('%Y_%m_%d')
    year, month, day = re.split('_', strd)

    #main loop
    for station in stationlist:
        #should probably clean this up, this is pretty bad right now
        if station == 'SALU':
            table[station] = h5file.root.magnetometer.SALU
        elif station == 'AKUL':
            table[station] = h5file.root.magnetometer.AKUL
        elif station == 'PUVR':
            table[station] = h5file.root.magnetometer.PUVR
        elif station == 'INUK':
            table[station] = h5file.root.magnetometer.INUK
        elif station == 'KJPK':
            table[station] = h5file.root.magnetometer.KJPK
        elif station == 'RADI':
            table[station] = h5file.root.magnetometer.RADI
        elif station == 'VLDR':
            table[station] = h5file.root.magnetometer.VLDR
        elif station == 'STFL':
            table[station] = h5file.root.magnetometer.STFL
        elif station == 'SEPT':
            table[station] = h5file.root.magnetometer.SEPT
        elif station == 'SCHF':
            table[station] = h5file.root.magnetometer.SCHF
        print station
        #read data from http
        url = 'http://autumn.athabascau.ca/magdata/L1/{0}/fluxgate/{1}/{2}/{3}/AUTUMNX_{0}_TGBO_{4}_PT0,5S.txt'.format(
            station, year, month, day, strd)
        try:
            response = urllib2.urlopen(url)
        except:
            continue
        cd = re.split('\n', response.read())
        del cd[0:13]
        data = []
        #parse datetime object into seconds from epoch
        for line in cd:
            ld = re.split('\s+', line)
            try:
                datet = ld[0] + ' ' + ld[1] + '000'
                datet = (
                    datetime.datetime.strptime(datet, '%Y-%m-%d %H:%M:%S.%f') -
                    datetime.datetime.utcfromtimestamp(0)).total_seconds()
                ld = [datet, float(ld[3]), float(ld[4]), float(ld[5])]
                data.append(ld)
            except:
                continue
        #append data to table in hdf5 file
        mag = table[station].row
        then = datetime.datetime.now()
        print "Starting Data append..."
        for line in data:
            mag['time'] = line[0]
            mag['Bx'] = line[1]
            mag['By'] = line[2]
            mag['Bz'] = line[3]
            mag.append()
        #flush table buffer
        table[station].flush()
        print "{0} s to complete".format(datetime.datetime.now() - then)
    #close file
    h5file.close()
Exemplo n.º 59
0
        "length", "x", "y", "z", "r", "energy", "xmin", "ymin", "zmin", "rmin",
        "xmax", "ymax", "zmax", "rmax", "xb1", "yb1", "zb1", "eb1", "xb2",
        "yb2", "zb2", "eb2", "ovlp_e"
    ]

    data = namedtuple("data", columns)  # auxiliar namedtuple

    paolina_algorithm = track_blob_info_creator_extractor(**paolina_params)

    out_df = pd.DataFrame(columns=columns)

    try:
        DECO = pd.read_hdf(in_filename, "DECO/Events")
    except KeyError:
        # save empty file
        with tb.open_file(out_filename, "w") as h5out:
            df_writer(h5out, out_df, group_name="tracks", table_name="events")
        index_tables(out_filename)
        sys.exit()

    for (event, peak), deco in DECO.groupby(["event", "npeak"]):

        # pre-proccess
        deco.loc[:, "time"] = 0
        deco.loc[:, "Ec"] = deco["E"]
        deco.loc[:, "Ep"] = deco["E"]
        deco.loc[:, ("Q", "Xrms", "Yrms", "nsipm")] = np.nan

        # Paolina
        hitc = hits_from_df(deco)[event]
        df, voxels, track_hitc, out_of_map = paolina_algorithm(hitc)
Exemplo n.º 60
0
print(args)

local_data_path = r'/path/to/datasets'

if args.dataset[:3] == 'shd':
    dataset = local_data_path + r'/shd/' + args.dataset
elif args.dataset[:5] == 'mnist':
    dataset = local_data_path + r'/mnist-dvs/' + args.dataset
elif args.dataset[:11] == 'dvs_gesture':
    dataset = local_data_path + r'/DvsGesture/' + args.dataset
elif args.dataset[:7] == 'swedish':
    dataset = local_data_path + r'/SwedishLeaf_processed/' + args.dataset
else:
    print('Error: dataset not found')

args.dataset = tables.open_file(dataset)

args.disable_cuda = str2bool(args.disable_cuda)
args.device = None
if not args.disable_cuda and torch.cuda.is_available():
    args.device = torch.device('cuda')
else:
    args.device = torch.device('cpu')

### Network parameters
args.n_input_neurons = dataset.root.stats.train_data[1]
args.n_output_neurons = dataset.root.stats.train_label[1]
args.n_hidden_neurons = args.n_h

### Learning parameters
if not args.num_samples_train: