示例#1
0
def create_table(conn, name, start, stop):
    if conn.table_exists(name):
        conn.delete_table(name)
    logging.debug('Creating table "{}"...'.format(name))
    conn.create_table(name)

    writer = conn.create_batch_writer(name)
    logging.debug('Writing mutations...')
    for i in range(start, stop):
        if name == 'uints':
            value = '{0:06d}'.format(i)
        elif name == 'ints':
            value = '{0:06d}'.format(i)
        elif name == 'floats':
            value = '{0:07f}'.format(i + 0.5)
        elif name == 'strings':
            value = 'xxx' + str(i)
        elif name == 'missing_data':
            if i % 2 == 0:
                value = 'NA'
            elif i % 3 == 0:
                value = 'nan'
            else:
                value = '{0:06d}'.format(i)
        else:
            raise ValueError('invalid table name')
        m = pyaccumulo.Mutation('row{0:06d}'.format(i - start).encode('utf-8'))
        m.put(cf='f{0:06d}'.format(i - start).encode('utf-8'),
              cq='q{0:06d}'.format(i - start).encode('utf-8'),
              val=value.encode('utf-8'))
        #logging.debug('Adding Mutation(row={}, updates={})...'.format(m.row, [u.value for u in m.updates]))
        writer.add_mutation(m)
    writer.close()
示例#2
0
    def _write_file(self, uuid, md5, size, path, full_path):
        with open(full_path, 'rb') as f:
            data = f.read()

        util.check_uuid(uuid)
        util.check_md5(md5)
        if size != len(data):
            raise ValueError('size of file {}: {} != {}'.format(
                path, len(data), size))
        util.check_md5_hash(data, md5)
        if not path.startswith(uuid + '/'):
            if path.startswith('latest/') or path.startswith('content'):
                path = uuid + '/' + path
            else:
                raise ValueError('path does not start with uuid or svn/git')

        m = pyaccumulo.Mutation(md5)
        m.put(cf='file|project', cq=uuid)
        m.put(cf='file|path', cq=path)
        if md5 not in self.hashes:
            m.put(cf='file|size', cq='size', val=str(size))
            m.put(cf='file|content', cq='content', val=data)
            m.put(cf='twosix', cq='tags')  # false
            self.hashes.add(md5)
        #m.put(cf='file|count', cq='count', value=TODO)
        #m.put(cf='twosix|tag', cq='recursion', val='0.443')
        self.wr.add_mutation(m)
def mutation_from_kv_tuple(tup):
    """ Define a pyaccumulo mutation from a (key, value) formatted tuple.
    """
    row, cv, cf, cq, val = tup
    print 'Inserting entry: \n Row - %s,\n Column_Visibility - %s,\n Column_Family - %s,\n Column_Qualifier - %s,\n Value - %s' % (row,cv,cf,cq,val)
    m = pyaccumulo.Mutation(row)
    m.put(cf=cf, cq=cq, cv=cv, val=val)
    return m
def mutation_from_kv_tuple(tup, vis=''):
    """ Define a pyaccumulo mutation from a (key, value) formatted tuple.
    """
    row, val = tup
    print 'Inserting signed row: %s, value: %s' % (row, val)
    m = pyaccumulo.Mutation(row)
    m.put(cf='', cq='', cv=vis, val=val)
    return m
示例#5
0
    def insert_data_accumulo(self, csvData, fields, name, host, port, user, password):
        conn = self.get_connection_accumulo(host, port, user, password)
        if not conn.table_exists(name):
            return Util.error_msg("Accumulo table " + name + " does not exist")

        wr = conn.create_batch_writer(name)
        fieldsLen = len(fields)
        rowNum = 1
        for row in csvData:
            m = pyaccumulo.Mutation(str(rowNum))
            for i in range(0, fieldsLen):
                field = fields[i]
                m.put(str(i + 1), field, str(row[i]))
            wr.add_mutation(m)
            rowNum += 1

        wr.close()
        return Util.success_msg()
示例#6
0
 def add_project(self, uuid):
     util.check_uuid(uuid)
     m = pyaccumulo.Mutation(uuid)
     m.put(cf="", cq="")
     self.conn.write(self.project_table, m)
     self.projects.add(uuid)