class LocusTable(tables.IsDescription): id = UInt64Col(pos=0) name = StringCol(512, pos=1) #'species_name.chromosome_name.locus_name' chromosome_id = UInt64Col(pos=2) #64 byte-long start = UInt64Col(pos=3) stop = UInt64Col(pos=4) ref_allele = StringCol(64, pos=5) ref_allele_length = UInt64Col( pos=6 ) #this could be larger than 64, good when ref_allele is simply too long ref_allele_frequency = Float64Col(pos=7) alt_allele = StringCol(64, pos=8) alt_allele_length = UInt64Col( pos=9 ) #this could be larger than 64, good when ref_allele is simply too long alt_allele_frequency = Float64Col(pos=10) generation_mutation_arose = Int64Col(pos=11) generation_mutation_fixed = Int64Col(pos=12) mutation_type = StringCol( 4, pos=13 ) #0=synonymous/non-coding, 1=non-synonymous, i=insertion, d=deletion, v=inversion fitness = Float64Col(pos=14) ancestral_amino_acid = StringCol(4, pos=15) #only for mutation_type 0/1 derived_amino_acid = StringCol(4, pos=16)
class Record(tables.IsDescription): var1 = StringCol(itemsize=4, dflt=b"abcd", pos=0) var2 = StringCol(itemsize=1, dflt=b"a", pos=1) var3 = BoolCol(dflt=1) var4 = Int8Col(dflt=1) var5 = UInt8Col(dflt=1) var6 = Int16Col(dflt=1) var7 = UInt16Col(dflt=1) var8 = Int32Col(dflt=1) var9 = UInt32Col(dflt=1) var10 = Int64Col(dflt=1) var11 = Float32Col(dflt=1.0) var12 = Float64Col(dflt=1.0) var13 = ComplexCol(itemsize=8, dflt=(1. + 0.j)) var14 = ComplexCol(itemsize=16, dflt=(1. + 0.j)) if hasattr(tables, 'Float16Col'): var15 = tables.Float16Col(dflt=1.0) if hasattr(tables, 'Float96Col'): var16 = tables.Float96Col(dflt=1.0) if hasattr(tables, 'Float128Col'): var17 = tables.Float128Col(dflt=1.0) if hasattr(tables, 'Complex196Col'): var18 = tables.ComplexCol(itemsize=24, dflt=(1. + 0.j)) if hasattr(tables, 'Complex256Col'): var19 = tables.ComplexCol(itemsize=32, dflt=(1. + 0.j))
def test_reads_meta_schema(self): temp_fs = fsopendir('temp://') filename = temp_fs.getsyspath('temp.h5') # use minimal descriptor to make the test simplier. descriptor = { 'pos': Int64Col(), 'name': StringCol(itemsize=255), 'type': StringCol(itemsize=255) } rows = [[('pos', float(i)), ('name', str(i)), ('type', str(i))] for i in range(2)] self._write_test_meta(temp_fs, 'schema', descriptor, rows) with open_file(filename, mode='r') as h5_file: ret = HDFReader._read_meta(h5_file) self.assertIn('schema', ret) self.assertEqual(len(ret['schema']), 3) # One for template, other for columns. self.assertEqual(ret['schema'][0], MPRowsFile.SCHEMA_TEMPLATE) self.assertEqual(len(ret['schema'][1]), len(MPRowsFile.SCHEMA_TEMPLATE)) self.assertEqual(len(ret['schema'][0]), len(MPRowsFile.SCHEMA_TEMPLATE)) pos_index = MPRowsFile.SCHEMA_TEMPLATE.index('pos') name_index = MPRowsFile.SCHEMA_TEMPLATE.index('name') self.assertEqual(ret['schema'][1][pos_index], 0) self.assertEqual(ret['schema'][2][pos_index], 1.0) self.assertEqual(ret['schema'][1][name_index], '0') self.assertEqual(ret['schema'][2][name_index], '1')
def _get_rows_descriptor(columns): """ Converts columns specifications from ambry_sources format to pytables descriptor. Args: columns (list of dict) Returns: dict: valid pytables descriptor. """ TYPE_MAP = { 'int': lambda pos: Int32Col(pos=pos), 'long': lambda pos: Int64Col(pos=pos), 'str': lambda pos: StringCol(itemsize=255, pos=pos), 'bytes': lambda pos: StringCol(itemsize=255, pos=pos), 'float': lambda pos: Float64Col(pos=pos), 'unknown': lambda pos: StringCol(itemsize=255, pos=pos), } descriptor = {} for column in columns: pytables_type = TYPE_MAP.get(column['type']) if not pytables_type: raise Exception( 'Failed to convert `{}` ambry_sources type to pytables type.'. format(column['type'])) descriptor[column['name']] = pytables_type(column['pos']) return descriptor
class Particle(IsDescription): name = StringCol(16) # 16-character String idnumber = Int64Col() # Signed 64-bit integer ADCcount = UInt16Col() # Unsigned short integer TDCcount = UInt8Col() # unsigned byte grid_i = Int32Col() # 32-bit integer grid_j = Int32Col() # 32-bit integer pressure = Float32Col() # float (single-precision) energy = Float64Col() # double (double-precision)
def _getDescription(self,columns, data): tmp = {} for col,value in zip(columns, data): if isinstance(int(value), (int, float, complex)): tmp[col] = Int64Col() elif isinstance(value, (str, bytes)): tmp[col] = StringCol(itemsize=128) else: tmp[col] = Col.from_dtype(dtype(np.asarray(value).dtype)) return tmp
class AssociationTable(tables.IsDescription): """ 2012.12.18 pytable class to store the genome-wide association result """ id = UInt64Col(pos=0) locus_id = UInt64Col(pos=1, dflt=0) chromosome = StringCol(64, pos=2, dflt='') #64 byte-long start = UInt64Col(pos=3, dflt=0) stop = UInt64Col(pos=4, dflt=0) score = Float64Col(pos=5, dflt=-1) mac = Int64Col(pos=6, dflt=-1) maf = Float64Col(pos=7, dflt=-1) genotype_var_perc = Float64Col(pos=8, dflt=-1) beta_list = Float64Col(shape=(5,), pos=9, dflt=-1) #2013.1.9 beta_pvalue_list = Float64Col(shape=(5,), pos=9, dflt=-1)
def setup(self, node, block_size, blob_name, index_name): if not hasattr(node, blob_name): self.file_.create_earray(node, blob_name, StringAtom(itemsize=block_size), (0, ), filters=filters) description = {} description["index"] = Int64Col(pos=0) description["start"] = UInt32Col(pos=1) description["size"] = UInt32Col(pos=2) # every colums which appears in a where method call should/must be indexed ! # this is not only for performance but for correct lookup as well (I had strange bugs # else) string_index = self.file_.create_table(node, index_name, description, filters=None) string_index.cols.index.create_index()