Exemplo n.º 1
0
class LocusTable(tables.IsDescription):
    id = UInt64Col(pos=0)
    name = StringCol(512, pos=1)  #'species_name.chromosome_name.locus_name'
    chromosome_id = UInt64Col(pos=2)  #64 byte-long
    start = UInt64Col(pos=3)
    stop = UInt64Col(pos=4)
    ref_allele = StringCol(64, pos=5)
    ref_allele_length = UInt64Col(
        pos=6
    )  #this could be larger than 64, good when ref_allele is simply too long
    ref_allele_frequency = Float64Col(pos=7)

    alt_allele = StringCol(64, pos=8)
    alt_allele_length = UInt64Col(
        pos=9
    )  #this could be larger than 64, good when ref_allele is simply too long
    alt_allele_frequency = Float64Col(pos=10)

    generation_mutation_arose = Int64Col(pos=11)
    generation_mutation_fixed = Int64Col(pos=12)
    mutation_type = StringCol(
        4, pos=13
    )  #0=synonymous/non-coding, 1=non-synonymous, i=insertion, d=deletion, v=inversion
    fitness = Float64Col(pos=14)
    ancestral_amino_acid = StringCol(4, pos=15)  #only for mutation_type 0/1
    derived_amino_acid = StringCol(4, pos=16)
Exemplo n.º 2
0
class Record(tables.IsDescription):
    var1 = StringCol(itemsize=4, dflt=b"abcd", pos=0)
    var2 = StringCol(itemsize=1, dflt=b"a", pos=1)
    var3 = BoolCol(dflt=1)
    var4 = Int8Col(dflt=1)
    var5 = UInt8Col(dflt=1)
    var6 = Int16Col(dflt=1)
    var7 = UInt16Col(dflt=1)
    var8 = Int32Col(dflt=1)
    var9 = UInt32Col(dflt=1)
    var10 = Int64Col(dflt=1)
    var11 = Float32Col(dflt=1.0)
    var12 = Float64Col(dflt=1.0)
    var13 = ComplexCol(itemsize=8, dflt=(1. + 0.j))
    var14 = ComplexCol(itemsize=16, dflt=(1. + 0.j))
    if hasattr(tables, 'Float16Col'):
        var15 = tables.Float16Col(dflt=1.0)
    if hasattr(tables, 'Float96Col'):
        var16 = tables.Float96Col(dflt=1.0)
    if hasattr(tables, 'Float128Col'):
        var17 = tables.Float128Col(dflt=1.0)
    if hasattr(tables, 'Complex196Col'):
        var18 = tables.ComplexCol(itemsize=24, dflt=(1. + 0.j))
    if hasattr(tables, 'Complex256Col'):
        var19 = tables.ComplexCol(itemsize=32, dflt=(1. + 0.j))
Exemplo n.º 3
0
    def test_reads_meta_schema(self):
        temp_fs = fsopendir('temp://')
        filename = temp_fs.getsyspath('temp.h5')

        # use minimal descriptor to make the test simplier.
        descriptor = {
            'pos': Int64Col(),
            'name': StringCol(itemsize=255),
            'type': StringCol(itemsize=255)
        }
        rows = [[('pos', float(i)), ('name', str(i)), ('type', str(i))]
                for i in range(2)]
        self._write_test_meta(temp_fs, 'schema', descriptor, rows)

        with open_file(filename, mode='r') as h5_file:
            ret = HDFReader._read_meta(h5_file)
            self.assertIn('schema', ret)
            self.assertEqual(len(ret['schema']),
                             3)  # One for template, other for columns.
            self.assertEqual(ret['schema'][0], MPRowsFile.SCHEMA_TEMPLATE)
            self.assertEqual(len(ret['schema'][1]),
                             len(MPRowsFile.SCHEMA_TEMPLATE))
            self.assertEqual(len(ret['schema'][0]),
                             len(MPRowsFile.SCHEMA_TEMPLATE))

            pos_index = MPRowsFile.SCHEMA_TEMPLATE.index('pos')
            name_index = MPRowsFile.SCHEMA_TEMPLATE.index('name')
            self.assertEqual(ret['schema'][1][pos_index], 0)
            self.assertEqual(ret['schema'][2][pos_index], 1.0)

            self.assertEqual(ret['schema'][1][name_index], '0')
            self.assertEqual(ret['schema'][2][name_index], '1')
Exemplo n.º 4
0
def _get_rows_descriptor(columns):
    """ Converts columns specifications from ambry_sources format to pytables descriptor.

    Args:
        columns (list of dict)

    Returns:
        dict: valid pytables descriptor.
    """
    TYPE_MAP = {
        'int': lambda pos: Int32Col(pos=pos),
        'long': lambda pos: Int64Col(pos=pos),
        'str': lambda pos: StringCol(itemsize=255, pos=pos),
        'bytes': lambda pos: StringCol(itemsize=255, pos=pos),
        'float': lambda pos: Float64Col(pos=pos),
        'unknown': lambda pos: StringCol(itemsize=255, pos=pos),
    }
    descriptor = {}

    for column in columns:
        pytables_type = TYPE_MAP.get(column['type'])
        if not pytables_type:
            raise Exception(
                'Failed to convert `{}` ambry_sources type to pytables type.'.
                format(column['type']))
        descriptor[column['name']] = pytables_type(column['pos'])
    return descriptor
Exemplo n.º 5
0
class Particle(IsDescription):
    name = StringCol(16)   # 16-character String
    idnumber = Int64Col()      # Signed 64-bit integer
    ADCcount = UInt16Col()     # Unsigned short integer
    TDCcount = UInt8Col()      # unsigned byte
    grid_i = Int32Col()      # 32-bit integer
    grid_j = Int32Col()      # 32-bit integer
    pressure = Float32Col()    # float  (single-precision)
    energy = Float64Col()    # double (double-precision)
Exemplo n.º 6
0
    def _getDescription(self,columns, data):
        tmp = {}
        for col,value in zip(columns, data):
            
            if isinstance(int(value), (int, float, complex)):
                tmp[col] = Int64Col()

            elif isinstance(value, (str, bytes)):
                tmp[col] = StringCol(itemsize=128)

            else:
                tmp[col] = Col.from_dtype(dtype(np.asarray(value).dtype))

        return tmp
Exemplo n.º 7
0
class AssociationTable(tables.IsDescription):
	"""
	2012.12.18 pytable class to store the genome-wide association result
	"""
	id = UInt64Col(pos=0)
	locus_id = UInt64Col(pos=1, dflt=0)
	chromosome = StringCol(64, pos=2, dflt='')	#64 byte-long
	start = UInt64Col(pos=3, dflt=0)
	stop = UInt64Col(pos=4, dflt=0)
	score = Float64Col(pos=5, dflt=-1)
	mac = Int64Col(pos=6, dflt=-1)
	maf = Float64Col(pos=7, dflt=-1)
	genotype_var_perc = Float64Col(pos=8, dflt=-1)
	beta_list = Float64Col(shape=(5,), pos=9, dflt=-1)	#2013.1.9
	beta_pvalue_list = Float64Col(shape=(5,), pos=9, dflt=-1)
Exemplo n.º 8
0
    def setup(self, node, block_size, blob_name, index_name):

        if not hasattr(node, blob_name):
            self.file_.create_earray(node,
                                     blob_name,
                                     StringAtom(itemsize=block_size), (0, ),
                                     filters=filters)

            description = {}
            description["index"] = Int64Col(pos=0)
            description["start"] = UInt32Col(pos=1)
            description["size"] = UInt32Col(pos=2)

            # every colums which appears in a where method call should/must be indexed !
            # this is not only for performance but for correct lookup as well (I had strange bugs
            # else)
            string_index = self.file_.create_table(node,
                                                   index_name,
                                                   description,
                                                   filters=None)
            string_index.cols.index.create_index()