def init_sim_h_table(self): if self.read_mode == 'w': try: self.db_main.remove_node('/', 'sim_h') print 'simulation header table dropped.' except tables.exceptions.NoSuchNodeError: print 'simulation header table not exist yet. nothing to initialize.' try: sim_h_desc = { 'sim_uuid': tables.StringCol(32), 'dix': tables.IntCol(), 'startdix': tables.IntCol(), 'enddix': tables.IntCol() } self.t_sim_h = self.db_main.create_table( '/', 'sim_h', sim_h_desc) self.t_sim_h.cols.sim_uuid.create_index() self.t_sim_h.cols.dix.create_index() print 'simulation header table created.' except tables.exceptions.NodeError: self.t_sim_h = self.db_main.get_node('/', 'sim_h') print 'simulation header table opened.' else: self.t_sim_h = self.db_main.get_node('/', 'sim_h') print 'simulation header table opened.'
def init_q_log(self): if self.read_mode == 'w': try: self.db_main.remove_node('/', 'q_log') print 'stats table dropped.' except tables.exceptions.NoSuchNodeError: print 'no q_log to drop.' try: q_log_desc = { 'sim_uuid': tables.StringCol(32), 'ticker': tables.StringCol(10), 'dix': tables.IntCol(), 'state': tables.Int64Col(), 'action': tables.IntCol(), 'reward': tables.FloatCol() } self.q_log = self.db_main.create_table('/', 'q_log', q_log_desc) self.q_log.cols.ticker.create_index() self.q_log.cols.state.create_index() self.q_log.cols.action.create_index() print 'q log table created.' except tables.exceptions.NodeError: self.q_log = self.db_main.get_node('/', 'q_log') print 'q_log table opened.' else: self.q_log = self.db_main.get_node('/', 'q_log') print 'q_log table opened.'
def table_description(classname, nclassname, shape=()): """Return a table description for testing queries. The description consists of all PyTables data types, both in the top level and in the ``c_nested`` nested column. A column of a certain TYPE gets called ``c_TYPE``. An extra integer column ``c_extra`` is also provided. If a `shape` is given, it will be used for all columns. Finally, an extra indexed column ``c_idxextra`` is added as well in order to provide some basic tests for multi-index queries. """ classdict = {} colpos = append_columns(classdict, shape) ndescr = nested_description(nclassname, colpos, shape=shape) classdict['c_nested'] = ndescr colpos += 1 extracol = tables.IntCol(shape=shape, pos=colpos) classdict['c_extra'] = extracol colpos += 1 idxextracol = tables.IntCol(shape=shape, pos=colpos) classdict['c_idxextra'] = idxextracol colpos += 1 return type(classname, (tables.IsDescription,), classdict)
def _col_descriptor(self): descriptor = OrderedDict([(k, t.FloatCol(dflt=nan, pos=i)) for i, k in enumerate(self.col_names)]) descriptor['stat'] = t.IntCol(dflt=-1, pos=self.col_names.index('stat')) descriptor['qn'] = t.IntCol(dflt=-1, pos=self.col_names.index('qn')) return descriptor
def init_q_table(self): if self.read_mode == 'w': try: self.db_main.remove_node('/', 'q_table') print 'q_table table dropped.' except tables.exceptions.NoSuchNodeError: print 'no q table to drop.' try: q_table_desc = { 'ticker': tables.IntCol(), 'state': tables.IntCol(), 'action': tables.IntCol(), 'reward': tables.FloatCol() } self.t_q = self.db_main.create_table('/', 'q_table', q_table_desc) self.t_q.cols.ticker.create_index() self.t_q.cols.state.create_index() self.t_q.cols.action.create_index() print 'q table created.' except tables.exceptions.NodeError: self.t_q = self.db_main.get_node('/', 'q_table') print 'q table opened.' else: self.t_q = self.db_main.get_node('/', 'q_table') print 'q table opened.'
class SongMusicBrainz(tables.IsDescription): """ Class to hold information coming from MusicBrainz for one song """ year = tables.IntCol() # ARRAY INDEX idx_artist_mbtags = tables.IntCol()
class Sample(tables.IsDescription): vname = tables.StringCol(20) jno = tables.IntCol() pco = tables.IntCol() class features(tables.IsDescription): pos = tables.FloatCol(shape=(30, )) jod = tables.FloatCol(shape=(630, )) jld = tables.FloatCol(shape=(364, )) lla = tables.FloatCol(shape=(756, ))
class ENMetadata(tables.IsDescription): artist = tables.StringCol(MAXSTRLEN) bitrate = tables.IntCol() duration = tables.FloatCol() genre = tables.StringCol(MAXSTRLEN) id = tables.StringCol(MAXSTRLEN) md5 = tables.StringCol(32) release = tables.StringCol(MAXSTRLEN) samplerate = tables.IntCol() status = tables.StringCol(MAXSTRLEN) title = tables.StringCol(MAXSTRLEN) analysispath = tables.StringCol(MAXSTRLEN)
class Particle(tables.IsDescription): """This class defines a table record. """ lati = tables.IntCol(pos=0) # longi = IntCol(pos=1) Time = tables.Time64Col(pos=2) pressure = tables.FloatCol(pos=3) ID = tables.StringCol(itemsize=10, pos=4) Int16 = tables.UIntCol(itemsize=4, pos=5) Int64 = tables.IntCol(itemsize=8, pos=6) Bool = tables.BoolCol(pos=7)
def init_stats_table(self): if self.read_mode == 'w': try: self.db_main.remove_node('/', 'stats') print 'stats table dropped.' except tables.exceptions.NoSuchNodeError: print 'stats table not exist yet. nothing to initialize.' try: stats_desc = { 'train_uuid': tables.StringCol(32), 'pca': tables.IntCol(), 'ticker': tables.StringCol(10), 'model': tables.StringCol(3), 'kpi': tables.StringCol(15), 'accuracy': tables.FloatCol() } self.t_stats = self.db_main.create_table( '/', 'stats', stats_desc) self.t_stats.cols.ticker.create_index() self.t_stats.cols.kpi.create_index() print 'statistics table created.' except tables.exceptions.NodeError: self.t_stats = self.db_main.get_node('/', 'stats') print 'stats table opened.' else: self.t_stats = self.db_main.get_node('/', 'stats') print 'stats table opened.'
def init_portfolio_log(self): try: p_log_desc = { 'sim_uuid': tables.StringCol(32), 'index_t': tables.StringCol(10), 'dix': tables.IntCol(), 'ticker': tables.StringCol(10), 'volume': tables.IntCol() } self.p_log = self.db_main.create_table('/', 'p_log', p_log_desc) self.p_log.cols.sim_uuid.create_index() self.p_log.cols.ticker.create_index() print 'portfolio log table created.' except tables.exceptions.NodeError: self.p_log = self.db_main.get_node('/', 'p_log') print 'portfolio log opened.'
def __init__(self, file_name, table_name, col_defs): self._file = tables.open_file(file_name, mode='w', title='generated data') self._typemap = {'int': tables.IntCol(4), 'float': tables.Float64Col()} self._table = self._create_table(table_name, col_defs) self._row = self._table.row
def transpose_genotype(args, hdf_name): t = tables.open_file(args.outfolder + hdf_name, mode='r') data = t.root.data num_pat = data.shape[1] num_feat = data.shape[0] chunk = args.tcm // num_feat chunk = int(np.clip(chunk, 1, num_pat)) f = tables.open_file(args.outfolder + '/genotype.h5', mode='w') f.create_earray(f.root, 'data', tables.IntCol(), (0, num_feat), expectedrows=num_pat, filters=tables.Filters(complib='zlib', complevel=1)) f.close() f = tables.open_file(args.outfolder + '/genotype.h5', mode='a') for pat in tqdm.tqdm(range(int(np.ceil(num_pat / chunk) + 1))): begins = pat * chunk tills = min(((pat + 1) * chunk), num_pat) a = np.array(data[:, begins:tills], dtype=int) a = a.T f.root.data.append(a) f.close() t.close() print("Completed", args.study_name)
class TestDescription(tables.IsDescription): rid = tables.IntCol(pos=0) rcolor = tables.EnumCol(self.enum, self.defaultName, base=self.enumType, shape=shape, pos=1)
class FontGlyphData(tb.IsDescription): index = tb.IntCol(pos=1) charcode = tb.IntCol(pos=2) unichar = tb.StringCol(8, pos=3) offset_x = tb.Int16Col(pos=4) offset_y = tb.Int16Col(pos=5) width = tb.UInt8Col(pos=6) height = tb.UInt8Col(pos=7) atlas_x = tb.UInt16Col(pos=8) atlas_y = tb.UInt16Col(pos=9) atlas_w = tb.UInt16Col(pos=10) atlas_h = tb.UInt16Col(pos=11) tex_x1 = tb.Float32Col(pos=12) tex_y1 = tb.Float32Col(pos=13) tex_x2 = tb.Float32Col(pos=14) tex_y2 = tb.Float32Col(pos=15)
def init_recommendation_table(self): if self.read_mode == 'w': try: self.db_main.remove_node('/', 'recommend') print 'recommendation table dropped.' except tables.exceptions.NoSuchNodeError: print 'recommendation table does not exist yet. nothing to initialize.' try: recommend_desc = { 'sim_uuid': tables.StringCol(32), 'dix': tables.IntCol(), 'symbol': tables.StringCol(10), 'tradeTx': tables.StringCol(10), 'tradePriceMin': tables.FloatCol(), 'tradePriceMid': tables.FloatCol(), 'tradePriceMax': tables.FloatCol(), 'trade12dd': tables.IntCol(), 'tradeIdxPct': tables.FloatCol(), 'tradeDate': tables.StringCol(10), 'tradeDateCopy': tables.StringCol(10), 'offsetTraining': tables.IntCol(), 'minTraining': tables.IntCol(), 'modelAccuracy': tables.FloatCol() } self.t_recommend = self.db_main.create_table( '/', 'recommend', recommend_desc) self.t_recommend.cols.sim_uuid.create_index() self.t_recommend.cols.dix.create_index() print 'recommendation table created.' except tables.exceptions.NodeError: self.t_recommend = self.db_main.get_node('/', 'recommend') print 'recommendation table opened.' else: try: self.t_recommend = self.db_main.get_node('/', 'recommend') print 'recommendation table opened.' except tables.exceptions.NoSuchNodeError: recommend_desc = { 'sim_uuid': tables.StringCol(32), 'dix': tables.IntCol(), 'symbol': tables.StringCol(10), 'tradeTx': tables.StringCol(10), 'tradePriceMin': tables.FloatCol(), 'tradePriceMid': tables.FloatCol(), 'tradePriceMax': tables.FloatCol(), 'trade12dd': tables.IntCol(), 'tradeIdxPct': tables.FloatCol(), 'tradeDate': tables.StringCol(10), 'tradeDateCopy': tables.StringCol(10), 'offsetTraining': tables.IntCol(), 'minTraining': tables.IntCol(), 'modelAccuracy': tables.FloatCol() } self.t_recommend = self.db_main.create_table( '/', 'recommend', recommend_desc) self.t_recommend.cols.sim_uuid.create_index() self.t_recommend.cols.dix.create_index()
class Particle(tb.IsDescription): name = tb.StringCol(16) # 16-character String idnumber = tb.Int64Col() # Signed 64-bit integer ADCcount = tb.UInt16Col() # Unsigned short integer TDCcount = tb.UInt8Col() # Unsigned byte grid_i = tb.Int32Col() # Integer grid_j = tb.IntCol() # Integer (equivalent to Int32Col) pressure = tb.Float32Col() # Float (single-precision) energy = tb.FloatCol() # Double (double-precision)
class Particle(tables.IsDescription): """Description of a table record.""" name = tables.StringCol(16, pos=1) lati = tables.IntCol(pos=2) vector = tables.Int32Col(shape=(200, ), pos=3) matrix1 = tables.Int32Col(shape=(2, 200), pos=4) matrix2 = tables.FloatCol(shape=(100, 2), pos=5) matrix3 = tables.FloatCol(shape=(10, 100, 2), pos=5) matrix4 = tables.FloatCol(shape=(2, 10, 100, 2), pos=5)
class TableDescription(tables.IsDescription): """Describe the fields in an HDF5 table. Attributes: cid: IntCol An in teger field for the unique call id numbers wid: IntCol An integer field for the whale id numbers. Each whale has an unique identifier sp: IntCol An integer column for the species code. 1=killer whale, 2=Pilot whale data: Float32Col A float32 2d array for the spectral representation of each whale call """ cid = tables.IntCol() wid = tables.IntCol() sp = tables.IntCol() data = tables.Float32Col((200,200))
def init_noTrade(self): try: desc = {'ticker': tables.StringCol(10), 'dix': tables.IntCol()} self.noTrade = self.db_main.create_table('/', 'noTrade', desc) self.noTrade.cols.ticker.create_index() self.noTrade.cols.dix.create_index(kind='full') print 'noTrade table created.' except tables.exceptions.NodeError: noTrade = self.db_main.get_node('/', 'noTrade') print 'noTrade opened.'
def init_simulation_log(self): try: perf_desc = { 'sim_uuid': tables.StringCol(32), 'gamma': tables.FloatCol(), 'alpha': tables.FloatCol(), 'simrun': tables.IntCol(), 'dix': tables.IntCol(), 'index': tables.StringCol(10), 'p_value': tables.FloatCol(), 'cash': tables.FloatCol(), 'i_value': tables.IntCol() } self.s_log = self.db_main.create_table('/', 's_log', perf_desc) self.s_log.cols.dix.create_index(kind='full') print 'simulation log table created.' except tables.exceptions.NodeError: s_log = self.db_main.get_node('/', 's_log') print 'simulation log opened.'
class Metadata(tables.IsDescription): jno = tables.IntCol() pco = tables.IntCol() setup = tables.IntCol() camera = tables.IntCol() person = tables.IntCol() replication = tables.IntCol() action = tables.IntCol()
def init_transaction_log(self): try: t_log_desc = { 'sim_uuid': tables.StringCol(32), 'dix': tables.IntCol(), 'ticker': tables.StringCol(10), 'tx': tables.StringCol(10), 'price': tables.FloatCol(), 'volume': tables.IntCol(), 'close': tables.FloatCol(), 'cash_before': tables.FloatCol(), 'cash_after': tables.FloatCol(), '12dd': tables.IntCol() } self.t_log = self.db_main.create_table('/', 't_log', t_log_desc) self.t_log.cols.sim_uuid.create_index() self.t_log.cols.ticker.create_index() print 'transaction log table created.' except tables.exceptions.NodeError: self.t_log = self.db_main.get_node('/', 't_log') print 'transaction log opened.'
class SongMetaData(tables.IsDescription): """ Class to hold the metadata of one song """ artist_name = tables.StringCol(MAXSTRLEN) artist_id = tables.StringCol(32) artist_mbid = tables.StringCol(40) artist_playmeid = tables.IntCol() artist_7digitalid = tables.IntCol() analyzer_version = tables.StringCol(32) genre = tables.StringCol(MAXSTRLEN) release = tables.StringCol(MAXSTRLEN) release_7digitalid = tables.IntCol() title = tables.StringCol(MAXSTRLEN) artist_familiarity = tables.Float64Col() artist_hotttnesss = tables.Float64Col() song_id = tables.StringCol(32) song_hotttnesss = tables.Float64Col() artist_latitude = tables.Float64Col() artist_longitude = tables.Float64Col() artist_location = tables.StringCol(MAXSTRLEN) track_7digitalid = tables.IntCol() # ARRAY INDICES idx_similar_artists = tables.IntCol() idx_artist_terms = tables.IntCol()
def transpose_genotype_job(job_begins, job_tills, job_n, study_name, outfolder, tcm, comp_level=9): print("job_n:", job_n, 'job_begins:', job_begins, 'job_tills:', job_tills) hdf5_name = '/' + study_name + '_step4_genotype_selected_variants.h5' if (os.path.exists(outfolder + hdf5_name)): t = tables.open_file(outfolder + hdf5_name, mode='r') else: print('using', outfolder + study_name + '_step3_genotype_no_missing.h5') t = tables.open_file(outfolder + study_name + '_step3_genotype_no_missing.h5', mode='r') data = t.root.data num_pat = data.shape[1] num_feat = data.shape[0] chunk = tcm // num_feat chunk = int(np.clip(chunk, 1, num_pat)) print("chuncksize =", chunk) f = tables.open_file(outfolder + study_name + '_step5_genotype_transposed_' + str(job_n) + '.h5', mode='w') f.create_earray(f.root, 'data', tables.IntCol(), (0, num_feat), expectedrows=num_pat, filters=tables.Filters(complib='zlib', complevel=comp_level)) f.close() n_in_job = job_tills - job_begins f = tables.open_file(outfolder + study_name + '_step5_genotype_transposed_' + str(job_n) + '.h5', mode='a') for subjects in tqdm.tqdm(range(int(np.ceil(n_in_job / chunk) + 1))): begins = job_begins + subjects * chunk tills = min((job_begins + (subjects + 1) * chunk), job_tills) a = np.array(data[:, begins:tills], dtype=np.int8) a = a.T f.root.data.append(a) f.close() t.close() print("Completed", job_n)
def saveWorld(self): '''TODO: check if we are currently working on a world, save it. if not, we ignore the command. ''' self.updateWorld() alreadyTried = False if not self.fileLocation and not alreadyTried: alreadyTried = True self.saveWorldAs() else: h5Filter = tables.Filters(complevel=9, complib='zlib', shuffle=True, fletcher32=True) h5file = tables.openFile(self.fileLocation, mode='w', title="worldData", filters=h5Filter) # store our numpy datasets for k in self.world: if self.world[k] is not None: atom = tables.Atom.from_dtype(self.world[k].dtype) shape = self.world[k].shape cArray = h5file.createCArray(h5file.root, k, atom, shape) cArray[:] = self.world[k] # store our world settings pyDict = { 'key': tables.StringCol(itemsize=40), 'value': tables.IntCol(), } settingsTable = h5file.createTable('/', 'settings', pyDict) settings = dict( width=self.mapSize[0], height=self.mapSize[1], algorithm=self.algorithm, roughness=self.roughness, avgLandmass=self.avgLandmass, avgElevation=self.avgElevation, hasMountains=self.hasMountains, hemisphere=self.hemisphere, ) settingsTable.append(settings.items()) settingsTable.cols.key.createIndex() # create an index h5file.close() del h5file, h5Filter
def init_parameter_table(self): if self.read_mode == 'w': try: self.db_main.remove_node('/', 'parameter') print 'paramter table dropped.' except tables.exceptions.NoSuchNodeError: print 'parameter table not exist yet. nothing to initialize.' try: parameter_desc = { 'train_uuid': tables.StringCol(32), 'pca': tables.IntCol(), 'ticker': tables.StringCol(10), 'model': tables.StringCol(3), 'kpi': tables.StringCol(15), 'kernel': tables.StringCol(10), 'C': tables.IntCol(), 'max_depth': tables.IntCol(), 'n_neighbors': tables.IntCol(), 'weights': tables.StringCol(10), 'algorithm': tables.StringCol(10) } self.t_parameter = self.db_main.create_table( '/', 'parameter', parameter_desc) self.t_parameter.cols.train_uuid.create_index() self.t_parameter.cols.pca.create_index() self.t_parameter.cols.ticker.create_index() self.t_parameter.cols.model.create_index() self.t_parameter.cols.kpi.create_index() print 'parameter table created.' except tables.exceptions.NodeError: self.t_parameter = self.db_main.get_node('/', 'parameter') print 'parameter table opened.' else: self.t_parameter = self.db_main.get_node('/', 'parameter') print 'parameter table opened.'
def transpose_genotype(args): step4_name = args.genotype + '/' + args.study_name + '_step4_genotype_selected_variants.h5' step3_name = args.genotype + '/' + args.study_name + '_step3_genotype_no_missing.h5' step2_name = args.genotype + '/' + args.study_name + '_step2_merged_genotype.h5' if (os.path.exists(step4_name)): t = tables.open_file(step4_name, mode='r') elif (os.path.exists(step3_name)): print('WARNING skipped step 4, all variants are used: using', step3_name) t = tables.open_file(step3_name, mode='r') elif (os.path.exists(step2_name)): print( 'WARNING skipped step 3, only skip this step if you are sure there are no missing variants (i.e: genotype only has values 0,1,2)', step2_name) t = tables.open_file(step2_name, mode='r') else: print('no valid genotype found') data = t.root.data num_pat = data.shape[1] num_feat = data.shape[0] chunk = args.tcm // num_feat chunk = int(np.clip(chunk, 1, num_pat)) print("chuncksize =", chunk) f = tables.open_file(args.outfolder + '/genotype.h5', mode='w') f.create_earray(f.root, 'data', tables.IntCol(), (0, num_feat), expectedrows=num_pat, filters=tables.Filters(complib='zlib', complevel=args.comp_level)) f.close() f = tables.open_file(args.outfolder + '/genotype.h5', mode='a') for pat in tqdm.tqdm(range(int(np.ceil(num_pat / chunk) + 1))): begins = pat * chunk tills = min(((pat + 1) * chunk), num_pat) a = np.array(data[:, begins:tills], dtype=np.int8) a = a.T f.root.data.append(a) f.close() t.close() print("Completed", args.study_name) print("You can delete all other h5 files if genotype.h5 is correct") args.outfolder = args.genotype
def init_sp500Changes(self): try: desc = { 'ticker': tables.StringCol(10), 'dix': tables.IntCol(), 'sector': tables.StringCol(8), 'change': tables.StringCol(10) } self.sp500Changes = self.db_main.create_table( '/', 'sp500Changes', desc) self.sp500Changes.cols.ticker.create_index() self.sp500Changes.cols.dix.create_index(kind='full') print 'sp500Changes table created.' except tables.exceptions.NodeError: sp500Changes = self.db_main.get_node('/', 'sp500Changes') print 'sp500Changes opened.'
def init_ticker_ids_table(self): if self.read_mode == 'w': try: ticker_desc = { 'ticker': tables.StringCol(10), 'id': tables.IntCol() } self.t_ticker_ids = self.db_main.create_table( '/', 'ticker_symbols', ticker_desc) self.t_ticker_ids.cols.ticker.create_index() print 'ticker ids table created.' except tables.exceptions.NodeError: self.t_ticker_ids = self.db_main.get_node( '/', 'ticker_symbols') print 'ticker symbols table opened.' else: self.t_ticker_ids = self.db_main.get_node('/', 'ticker_symbols') print 'ticker symbols table opened.'