Exemplos de InstructionStream em Python, exemplos de corepy.arch.spu.platform.InstructionStream em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: fbdemo.py Projeto: tmaone/efi

def fb_draw():
  code0 = synspu.InstructionStream()
  code1 = synspu.InstructionStream()  
  proc = synspu.Processor()

  fb = cell_fb.framebuffer()
  cell_fb.fb_open(fb)

  draw0 = FBDraw()
  draw0.set_buffers(cell_fb.fb_addr(fb, 0), cell_fb.fb_addr(fb, 1))
  draw0.set_stride(fb.stride)

  draw0.synthesize(code0)

  draw1 = FBDraw()
  draw1.set_buffers(cell_fb.fb_addr(fb, 1), cell_fb.fb_addr(fb, 0))cell_fb.fb_addr(fb, 0))
  draw1.set_stride(fb.stride)

  draw1.synthesize(code1)

  while True:

    # cell_fb.fb_clear(fb, 0)
    proc.execute(code0)
    cell_fb.fb_wait_vsync(fb)
    cell_fb.fb_flip(fb, 0)

    # cell_fb.fb_clear(fb, 1)
    proc.execute(code1)
    cell_fb.fb_wait_vsync(fb)
    cell_fb.fb_flip(fb, 1)
    

  cell_fb.fb_close(fb)
  return

Exemplo n.º 2

0

Exibir arquivo

def TestDecrementer():

    code = synspu.InstructionStream()

    spu_write_decr(code, 0x7FFFFFFFl)
    spu_start_decr(code)

    # Get a message from the PPU
    spu_read_in_mbox(code)

    reg = spu_read_decr(code)
    spu_write_out_mbox(code, reg)
    spu_stop_decr(code)

    proc = synspu.Processor()

    spe_id = proc.execute(code, async=True)

    print 'test is sleeping for 1 second'
    time.sleep(1)
    synspu.spu_exec.write_in_mbox(spe_id, 0x44CAFE)

    while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
        pass

    print 'spu said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

    proc.join(spe_id)

    return

Exemplo n.º 3

0

Exibir arquivo

def test_stream_popc():
    code = synspu.InstructionStream()
    proc = synspu.Processor()

    bits = array.array('I', range(1024))
    for i in range(0, 1024, 4):
        bits[i] = 0x01010101  # 4 bits
        bits[i + 1] = 0xFFFFFFFF  # 32 bits
        bits[i + 2] = 0x10101010  # 4 bits
        bits[i + 3] = 0xFF0FF0F0  # 20 bits = 60 bits total


#    bits[i]   = 1
#    bits[i+1] = 2
#    bits[i+2] = 3
#    bits[i+3] = 4

#abits = synspu.aligned_memory(len(bits), typecode = 'I')
#abits.copy_to(bits.buffer_info()[0], len(bits))

    popc = syn_popc_stream()
    popc.set_stream_addr(bits.buffer_info()[0])
    popc.set_stream_size(len(bits))

    popc.synthesize(code)

    count = proc.execute(code, mode='mbox')
    print '-->', count
    assert (count == 60 * 1024 / 4)

    return

Exemplo n.º 4

0

Exibir arquivo

def TestMbox():

    code = synspu.InstructionStream()

    # Send a message to the PPU
    spu_write_out_mbox(code, 0xDEADBEEFl)

    # Get a message from the PPU
    reg = spu_read_in_mbox(code)

    # And send it back
    code.add(spu.wrch(reg, SPU_WrOutMbox))

    proc = synspu.Processor()

    spe_id = proc.execute(code, async=True)
    synspu.spu_exec.write_in_mbox(spe_id, 0x88CAFE)

    while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
        pass
    print 'spe said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))
    while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
        pass
    print 'spe said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

    proc.join(spe_id)

    return

Exemplo n.º 5

0

Exibir arquivo

    def generate(self, results, pattern, r1_range, r2_range, max_init, max_n,
                 size):

        # Setup the range parameter array
        r1_inc = (r1_range[1] - r1_range[0]) / size[0]
        r2_inc = (r2_range[1] - r2_range[0]) / size[1]

        ranges = extarray.extarray('f', [0.0] * 16)
        for i in range(4):
            ranges[i] = r1_range[0]
            ranges[4 + i] = r2_range[0]
            ranges[8 + i] = r1_inc
            ranges[12 + i] = r2_inc

        # Setup the pattern vector
        bits = _pattern2vector(pattern)

        # Copy the paramters to aligned buffers
        #a_ranges = synspu.aligned_memory(len(ranges), typecode='I')
        #a_ranges.copy_to(ranges.buffer_info()[0], len(ranges))

        #a_pattern = synspu.aligned_memory(len(bits), typecode='I')
        #a_pattern.copy_to(bits.buffer_info()[0], len(bits))

        renderer = MailboxRenderer()
        ly_block = LyapunovBlock()

        ly_block.set_size(size[0], size[1])
        #ly_block.set_range(a_ranges)
        #ly_block.set_pattern(a_pattern)
        ly_block.set_range(ranges)
        ly_block.set_pattern(bits)
        ly_block.set_max_init(max_init)
        ly_block.set_max_n(max_n)
        ly_block.set_renderer(renderer)

        code = synspu.InstructionStream()
        ly_block.synthesize(code)

        proc = synspu.Processor()

        spe_id = proc.execute(code, async=True)

        for i in range(size[0] * size[1]):
            while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
                pass
            print 'ly said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

        proc.join(spe_id)

        # for x in range(size[0]):
        #   r2 = r2_range[0] + r2_inc
        #   print 'col:', x, r1, r2

        #   for y in range(size[1]):
        #     results[y, x] = lyapunov_point(pattern, r1, r2, max_init, max_n)
        #     r2 += r2_inc
        #   r1 += r1_inc

        return

Exemplo n.º 6

0

Exibir arquivo

Arquivo: iterators.py Projeto: microwave89-hv/efi

def TestVecIter(n_spus=1):
    n = 1024
    a = extarray.extarray('I', range(n))

    buffer_size = 16

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = var.SignedWord(0, code)

    stream = stream_buffer(code,
                           a.buffer_info()[0],
                           n * 4,
                           buffer_size,
                           0,
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    md = memory_desc('i', 0, buffer_size)

    for buffer in stream:
        for current in spu_vec_iter(code, md):
            current.v = current + current

    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(0, n):
        assert (a[i] == i + i)

    return

Exemplo n.º 7

0

Exibir arquivo

def TestTanimoto():
  code = synspu.InstructionStream()
  proc = synspu.Processor()

  code.set_debug(True)
  
  x_regs = code.acquire_registers(2)
  y_regs = code.acquire_registers(2)
  result = code.acquire_register()

  tan = Tanimoto()

  tan.set_n_bits(256)
  tan.set_x_regs(x_regs)
  tan.set_y_regs(y_regs)
  tan.set_result_reg(result)
  
  tan.synthesize(code)

  code.print_code()

  proc.execute(code)

  # TODO: Do a real test, not just a synthesis test
  return

Exemplo n.º 8

0

Exibir arquivo

def TestSetSlotValue():
    import corepy.arch.spu.platform as synspu
    import corepy.arch.spu.isa as spu
    import corepy.arch.spu.types.spu_types as var
    import corepy.arch.spu.lib.dma as dma

    code = synspu.InstructionStream()
    proc = synspu.Processor()
    spu.set_active_code(code)
    a = var.SignedWord(0x11)
    b = var.SignedWord(0x13)
    r = var.SignedWord(0xFFFFFFFF)

    set_slot_value(code, r, 0, 0x10)
    set_slot_value(code, r, 1, a)
    set_slot_value(code, r, 2, 0x12)
    set_slot_value(code, r, 3, b)

    for i in range(4):
        spu.wrch(r, dma.SPU_WrOutMbox)
        spu.rotqbyi(r, r, 4)

    spe_id = proc.execute(code, mode='async')

    for i in range(4):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        result = synspu.spu_exec.read_out_mbox(spe_id)
        assert (result == (i + 0x10))

    proc.join(spe_id)

    return

Exemplo n.º 9

0

Exibir arquivo

def TestAll():
    import corepy.arch.spu.platform as env

    code = env.InstructionStream()
    spu.set_active_code(code)

    a = code.acquire_register()
    b = code.acquire_register()
    c = code.acquire_register()

    shr(c, a, b)
    cneq(c, a, b)
    cge(c, a, b)
    cgei(c, a, 10)
    lt(c, a, b)
    lti(c, a, 10)

    a_immediate(c, a, 10)
    a_immediate(c, a, 10000)
    sf_immediate(c, a, 10000)

    code.print_code()
    proc = env.Processor()
    proc.execute(code)

    return

Exemplo n.º 10

0

Exibir arquivo

Arquivo: spu_basics.py Projeto: maxim-tyutyunnikov/corepy

def DoubleBufferExample(n_spus=6):
    """
  stream_buffer is an iterator that streams data from main memory to
  SPU local store in blocked buffers.  The buffers can be managed
  using single or double buffering semantics.  The induction variable
  returned by the buffer returns the address of the current buffer.

  Note: stream_buffer was designed before memory descriptors and has
        not been updated to support them yet.  The interface will
        change slightly when the memory classes are finalized.
  """
    n = 30000
    buffer_size = 16

    # Create an array and align the data
    a = extarray.extarray('I', range(n))

    addr = a.buffer_info()[0]
    n_bytes = n * 4

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = SignedWord(0, code)
    two = SignedWord(2, code)

    # Create the stream buffer, parallelizing it if using more than 1 SPU
    stream = stream_buffer(code,
                           addr,
                           n_bytes,
                           buffer_size,
                           0,
                           buffer_mode='double',
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    # Loop over the buffers
    for buffer in stream:

        # Create an iterators that computes the address offsets within the
        # buffer.  Note: this will be supported by var/vec iters soon.
        for lsa in syn_iter(code, buffer_size, 16):
            code.add(spu.lqx(current, lsa, buffer))
            current.v = current - two
            code.add(spu.stqx(current, lsa, buffer))

    # Run the synthetic program and copy the results back to the array
    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(2, len(a)):
        try:
            assert (a[i] == i - 2)
        except:
            print 'DoubleBuffer error:', a[i], i - 2

    return

Exemplo n.º 11

0

Exibir arquivo

Arquivo: spu_log.py Projeto: maxim-tyutyunnikov/corepy

def TestFloats():
    import math

    code = synspu.InstructionStream()
    proc = synspu.Processor()

    spu.set_active_code(code)

    code.set_debug(True)

    # Create a simple SPU program that computes log for all values bettween
    # .01 and 10.0 with .01 increments

    start = .65
    stop = .75
    inc = .01

    sp_step = 0x3C23D70A
    # r_current = var.Word(0x3C23D70A) # .01 in single precision
    r_current = var.Word(0x3F266666)
    r_step = var.Word(sp_step)  # .01 in single precision
    result = var.Word(0)
    log = SPULog()

    log.setup(code)
    log.set_result(result)
    log.set_x(r_current)

    log_iter = syn_iter(code, int((stop - start) / inc))

    for i in log_iter:

        log.synthesize(code)
        spu.fa(r_current, r_current, r_step)
        spu.wrch(result, dma.SPU_WrOutMbox)

    # code.print_code()
    spe_id = proc.execute(code, mode='async')

    x = start
    for i in range(int((stop - start) / inc)):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        slog = synspu.spu_exec.read_out_mbox(spe_id)
        print '%.3f 0x%08X  %.08f %.08f ' % (x, slog, _sp_to_float(slog),
                                             math.log(x, 2))
        x += inc

    proc.join(spe_id)

    return

Exemplo n.º 12

0

Exibir arquivo

Arquivo: ispu.py Projeto: tmaone/efi

    def __init__(self):

        # Code and memory buffers
        self.code = env.InstructionStream()
        self.regs = extarray.extarray('I', 128 * 4)
        self.regs.clear()

        # Runtime parameters
        self.speid = None
        self.reg_lsa = None
        self.proc = None

        self.synthesize()

        return

Exemplo n.º 13

0

Exibir arquivo

def TestSaveBuffer1():
    import array

    code = synspu.InstructionStream()
    proc = synspu.Processor()

    code.set_debug(True)
    spu.set_active_code(code)

    n = 2**14
    data = array.array('I', range(n))
    #data = synspu.aligned_memory(n, typecode = 'I')
    #data.copy_to(data_array.buffer_info()[0], len(data_array))

    save_buffer = SaveBuffer()

    save_buffer.setup()
    save_buffer.init_ls_buffer(0, 128)
    save_buffer.init_mm_buffer(data.buffer_info()[0], n)

    value = var.SignedWord(0xCAFEBABE)

    for i in spuiter.syn_iter(code, n / 4):
        save_buffer.save_register(value)

    code.print_code()
    spe_id = proc.execute(code, mode='async')

    for i in range(n / 4):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        print 'size: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        print 'offset: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        print 'test: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

    proc.join(spe_id)

    #data.copy_from(data_array.buffer_info()[0], len(data_array))

    print data[:10]
    return

Exemplo n.º 14

0

Exibir arquivo

Arquivo: iterators.py Projeto: microwave89-hv/efi

def TestContinueLabel(n_spus=1):
    n = 1024
    a = extarray.extarray('I', range(n))

    buffer_size = 16

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = var.SignedWord(0, code)
    test = var.SignedWord(0, code)
    four = var.SignedWord(4, code)

    stream = stream_buffer(code,
                           a.buffer_info()[0],
                           n * 4,
                           buffer_size,
                           0,
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    md = memory_desc('i', 0, buffer_size)
    lsa_iter = spu_vec_iter(code, md)

    for buffer in stream:
        for current in lsa_iter:
            current.v = current + current

            test.v = (current == four)
            code.add(spu.gbb(test, test))
            #lbl_continue = code.add(spu.stop(0xC)) - 1 # Place holder for the continue
            #lsa_iter.add_continue(code, 0, lambda lbl, reg = test.reg: spu.brz(reg, lbl))
            code.add(spu.brz(test.reg, lsa_iter.continue_label))
            current.v = current + current

        #lsa_iter.add_continue(code, lbl_continue, lambda next, reg = test.reg: spu.brz(reg, next))

    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(0, n):
        if i >= 4:
            assert (a[i] == i + i)
        else:
            #print a[i]
            assert (a[i] == i * 4)
    return

Exemplo n.º 15

0

Exibir arquivo

def test_syn(kernel):
    code = synspu.InstructionStream()
    proc = synspu.Processor()

    popc = kernel()
    popc.synthesize(code)

    params = synspu.spu_exec.ExecParams()
    params.p7 = 0x01010101  # 4 bits
    params.p8 = 0xFFFFFFFF  # 32 bits
    params.p9 = 0x10101010  # 4 bits
    params.p10 = 0xFF0FF0F0  # 20 bits = 60 bits total

    count = proc.execute(code, mode='mbox', params=params)
    assert (count == 60)

    return

Exemplo n.º 16

0

Exibir arquivo

Arquivo: iterators.py Projeto: microwave89-hv/efi

def TestStreamBufferSingle(n_spus=1):
    n = 1024
    a = extarray.extarray('I', range(n))
    buffer_size = 128

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = var.SignedWord(0, code)

    addr = a.buffer_info()[0]
    stream = stream_buffer(code, addr, n * 4, buffer_size, 0, save=True)
    if n_spus > 1: stream = parallel(stream)

    #r_bufsize = code.acquire_register()
    #r_lsa = code.acquire_register()
    #r_current = code.acquire_register()

    for buffer in stream:
        #util.load_word(code, r_bufsize, buffer_size)
        #code.add(spu.il(r_lsa, 0))

        #loop = code.size()

        #code.add(spu.lqx(r_current, buffer, r_lsa))
        #code.add(spu.a(r_current, r_current, r_current))
        #code.add(spu.stqx(r_current, buffer, r_lsa))

        #code.add(spu.ai(r_bufsize, r_bufsize, -16))
        #code.add(spu.ai(r_lsa, r_lsa, 16))
        #code.add(spu.brnz(r_bufsize, loop - code.size()))

        for lsa in syn_iter(code, buffer_size, 16):
            code.add(spu.lqx(current, lsa, buffer))
            current.v = current + current
            #current.v = 5
            code.add(spu.stqx(current, lsa, buffer))

    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(0, n):
        assert (a[i] == i + i)

    return

Exemplo n.º 17

0

Exibir arquivo

Arquivo: spu_log.py Projeto: maxim-tyutyunnikov/corepy

def TestLog():
    code = synspu.InstructionStream()
    proc = synspu.Processor()

    spu.set_active_code(code)
    # Create a simple SPU program that computes log for 10 values and
    # sends the result back using the mailbox

    log = SPULog()

    values = []
    result = code.acquire_register()

    N = 10

    x = 1
    for i in range(N):
        val = var.Word(x)
        spu.cuflt(val, val, 155)
        values.append(val)
        x = x * 10

    log.setup(code)
    log.set_result(result)

    for i in range(N):

        log.set_x(values[i])
        log.synthesize(code)

        spu.wrch(result, dma.SPU_WrOutMbox)

    spe_id = proc.execute(code, mode='async')

    x = 1
    for i in range(N):
        while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
            pass
        print 'log said: 0x%08X  (%d)' % (
            synspu.spu_exec.read_out_mbox(spe_id), x)
        x = x * 10

    proc.join(spe_id)

    return

Exemplo n.º 18

0

Exibir arquivo

Arquivo: ispugui.py Projeto: tmaone/efi

    def _startSPU(self):
        self.ctx = ctx = env.spu_exec.alloc_context()

        # Execute a no-op instruction stream so the prolog is executed
        code = env.InstructionStream()
        code.add(spu.nop(code.r_zero))

        code.cache_code()
        itemsize = code.render_code.itemsize
        code_len = len(code.render_code) * itemsize
        if code_len % 16 != 0:
            code_len += 16 - (code_len % 16)
        code_lsa = 0x40000 - code_len

        env.spu_exec.run_stream(ctx, code.inst_addr(), code_len, code_lsa,
                                code_lsa)

        self.localstore = extarray.extarray('I', 262144 / 4)
        self.localstore.set_memory(ctx.spuls)
        return

Exemplo n.º 19

0

Exibir arquivo

Arquivo: ispugui.py Projeto: tmaone/efi

    def GenerateStream(self, step=None):
        code = env.InstructionStream()
        txt = self.editCtrl.GetText().split('\n')
        txtlen = len(txt)

        for i in xrange(0, txtlen):
            # For the stop case, want all instructions except the current one to be
            # STOP instructions.
            cmd = txt[i].strip()
            if step != None and i != step:
                if cmd == "" or cmd[0] == '#':
                    continue
                if cmd[-1] == ":":
                    # Label - better parsing?
                    code.add(code.get_label(cmd[:-1]))
                else:
                    code.add(spu.stop(0x2FFF))
                continue

            if self.editCtrl.IsBreakSet(i):
                code.add(spu.stop(0x2FFF))
                continue

            if cmd != "" and cmd[0] != '#':
                if cmd[-1] == ":":
                    # Label - better parsing?
                    inst = code.get_label(cmd[:-1])
                else:
                    # Instruction
                    strcmd = re.sub("Label\((.*?)\)", "code.get_label('\\1')",
                                    cmd)
                    try:
                        inst = eval('spu.%s' % strcmd)
                    except:
                        print 'Error creating instruction: %s' % cmd

                code.add(inst)
        code.cache_code()
        return code

Exemplo n.º 20

0

Exibir arquivo

def TestSignal():

    code = synspu.InstructionStream()

    # Get a signal from the PPU
    reg = spu_read_signal1(code)

    # And send it back
    code.add(spu.wrch(reg, SPU_WrOutMbox))

    proc = synspu.Processor()

    spe_id = proc.execute(code, async=True)
    synspu.spu_exec.write_signal(spe_id, 1, 0xCAFEBABEl)

    while synspu.spu_exec.stat_out_mbox(spe_id) == 0:
        pass

    print 'sig said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))

    proc.join(spe_id)

    return

Exemplo n.º 21

0

Exibir arquivo

Arquivo: iterators.py Projeto: microwave89-hv/efi

def TestStreamBufferDouble(n_spus=1):
    n = 2048
    a = extarray.extarray('I', range(n))

    buffer_size = 32

    if n_spus > 1: code = env.ParallelInstructionStream()
    else: code = env.InstructionStream()

    current = var.SignedWord(0, code)

    addr = a.buffer_info()[0]
    n_bytes = n * 4
    #print 'addr 0x%(addr)x %(addr)d' % {'addr':a.buffer_info()[0]}, n_bytes, buffer_size

    stream = stream_buffer(code,
                           addr,
                           n_bytes,
                           buffer_size,
                           0,
                           buffer_mode='double',
                           save=True)
    if n_spus > 1: stream = parallel(stream)

    for buffer in stream:
        for lsa in syn_iter(code, buffer_size, 16):
            code.add(spu.lqx(current, lsa, buffer))
            current.v = current + current
            code.add(spu.stqx(current, lsa, buffer))

    proc = env.Processor()
    r = proc.execute(code, n_spus=n_spus)

    for i in range(0, len(a)):
        assert (a[i] == i + i)

    return

Exemplo n.º 22

0

Exibir arquivo

# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import corepy.lib.extarray as extarray
import corepy.arch.spu.isa as spu
import corepy.arch.spu.platform as env
import corepy.arch.spu.lib.dma as dma
from corepy.arch.spu.lib.util import load_word

import time

if __name__ == '__main__':
    code = env.InstructionStream()
    proc = env.Processor()

    spu.set_active_code(code)

    r_cnt = code.acquire_register()
    load_word(code, r_cnt, 0x10000)

    br_loop = code.size()

    spu.ai(r_cnt, r_cnt, -1)
    spu.brnz(r_cnt, br_loop - code.size())

    code.print_code()

    for i in xrange(0, 10000):

Exemplo n.º 23

0

Exibir arquivo

Arquivo: spu_basics.py Projeto: maxim-tyutyunnikov/corepy

def MemoryDescExample(data_size=20000):
    """
  This example uses a memory descriptor to move 20k integers back and 
  forth between main memory and the SPU local store. Each value is
  incremented by 1 while on the SPU.
  
  Memory descriptors are a general purpose method for describing a
  region of memory.  Memory is described by a typecode, address, and
  size.  Memory descriptors can be initialized by hand or from an
  array or buffer object.

  For main memory, memory descriptors are useful for transfering data
  between main memory and an SPU's local store.  The get/put methods
  on a memory descriptor generate the SPU code to move data of any
  size between main memory and local store.

  Memory descriptors can also be used with spu_vec_iters to describe
  the region of memory to iterate over.  The typecode in the memory
  descriptor is used to determine the type for the loop induction
  variable.

  Note that there is currently no difference between memory
  descriptors for main memory and local store.  It's up to the user to
  make sure the memory descriptor settings make sense in the current
  context.  (this will probably change in the near future)

  Note: get/put currently use loops rather than display lists for
        transferring data over 16k.
  """

    code = env.InstructionStream()
    proc = env.Processor()

    code.debug = True
    spu.set_active_code(code)

    # Create a python array
    data = extarray.extarray('I', range(data_size))

    # Align the data in the array
    #a_data = aligned_memory(data_size, typecode = 'I')
    #a_data.copy_to(data.buffer_info()[0], data_size)

    # Create memory descriptor for the data in main memory
    data_desc = memory_desc('I')
    #data_desc.from_array(a_data)
    data_desc.from_array(data)

    # Transfer the data to 0x0 in the local store
    data_desc.get(code, 0)

    # Create memory descriptor for the data in the local store for use
    # in the iterator
    lsa_data = memory_desc('i', 0, data_size)

    # Add one to each value
    for x in spu_vec_iter(code, lsa_data):
        x.v = x + 1

    # Transfer the data back to main memory
    data_desc.put(code, 0)

    dma.spu_write_out_mbox(code, 0xCAFE)

    # Execute the synthetic program
    # code.print_code()

    spe_id = proc.execute(code, async=True)
    proc.join(spe_id)

    # Copy it back to the Python array
    #a_data.copy_from(data.buffer_info()[0], data_size)

    for i in xrange(data_size):
        assert (data[i] == i + 1)
    return

Exemplo n.º 24

0

Exibir arquivo

Arquivo: iterators.py Projeto: microwave89-hv/efi

def TestSPUIter():
    size = 32
    data = extarray.extarray('I', range(size))
    code = env.InstructionStream()

    r_zero = code.acquire_register()
    r_ea_data = code.acquire_register()
    r_ls_data = code.acquire_register()
    r_size = code.acquire_register()
    r_tag = code.acquire_register()

    # Load zero
    util.load_word(code, r_zero, 0)

    #print 'array ea: %X' % (data.buffer_info()[0])
    #print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
    #  str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))

    # Load the effective address
    util.load_word(code, r_ea_data, data.buffer_info()[0])

    # Load the size
    util.load_word(code, r_size, size * 4)

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 12))

    # Load the lsa
    code.add(spu.ai(r_ls_data, r_zero, 0))

    # Load the data into address 0
    dma.mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 12
    dma.mfc_write_tag_mask(code, 1 << 12)

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code)

    # Increment the data values by 1 using an unrolled loop (no branches)
    # r_current = code.acquire_register()
    current = var.SignedWord(0, code)

    # Use an SPU iter
    for lsa in syn_iter(code, size * 4, 16):
        code.add(spu.lqx(current, r_zero, lsa))
        # code.add(spu.ai(1, r_current, r_current))
        current.v = current + current
        code.add(spu.stqx(current, r_zero, lsa))

    # code.release_register(r_current)
    #current.release_register(code)

    # Store the values back to main memory

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 13))

    # Load the data into address 0
    dma.mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 12
    dma.mfc_write_tag_mask(code, 1 << 13)

    # Wait for the transfer to complete
    dma.mfc_read_tag_status_all(code)

    # Cleanup
    code.release_register(r_zero)
    code.release_register(r_ea_data)
    code.release_register(r_ls_data)
    code.release_register(r_size)
    code.release_register(r_tag)

    # Stop for debugging
    # code.add(spu.stop(0xA))

    # Execute the code
    proc = env.Processor()
    r = proc.execute(code)

    for i in range(0, size):
        assert (data[i] == i + i)

    return

Exemplo n.º 25

0

Exibir arquivo

def TestTanimotoBlock(n_vecs = 4):
  code = synspu.InstructionStream()
  proc = synspu.Processor()

  code.set_debug(True)
  spu.set_active_code(code)
  
  tb = TanimotoBlock()
  ls_save = LocalSave()
  mm_save = MemorySave()

  code.set_debug(True)

  # Input block parameters
  m = 128
  n = 64
  # n_vecs = 9
  n_bits = 128 * n_vecs

  # Main memory results buffer
  # max_results = 2**16
  max_results = 16384
  words_per_result = 4

  mm_results_data = array.array('I', [12 for i in range(max_results * words_per_result)])
  #mm_results_buffer = synspu.aligned_memory(max_results * words_per_result, typecode = 'I')
  # mm_results_buffer.copy_to(mm_results_data.buffer_info()[0], len(mm_results_data))

  mm_results = spuiter.memory_desc('I')
  #mm_results.from_array(mm_results_buffer)
  mm_results.from_array(mm_results_data)

  mm_save.set_md_save_buffer(mm_results)
    
  # Local Results buffer
  buffer_size = var.SignedWord(16384)
  buffer_addr = var.SignedWord(m * n * n_vecs * 4)
  ls_results = spuiter.memory_desc('B')
  ls_results.set_size_reg(buffer_size)
  ls_results.set_addr_reg(buffer_addr)

  ls_save.set_md_results(ls_results)
  ls_save.set_mm_save_op(mm_save)

  # Setup the TanimotoBlock class
  tb.set_n_bits(n_bits)
  tb.set_block_size(m, n)

  tb.set_x_addr(0)
  tb.set_y_addr(m * n_vecs * 16)
  tb.set_save_op(ls_save)

  # Main test loop
  n_samples = 10000
  for samples in spuiter.syn_iter(code, n_samples):
    tb.synthesize(code)

  spu.wrch(buffer_size, dma.SPU_WrOutMbox)
  
  spu.stop(0x2000) 

  # "Function" Calls
  ls_save.block()
  mm_save.block()

  # code.print_code()
  start = time.time()
  spe_id = proc.execute(code, async=True)
  
  while synspu.spu_exec.stat_out_mbox(spe_id) == 0: pass
  # print 'tb said: 0x%X' % (synspu.spu_exec.read_out_mbox(spe_id))
  stop = time.time()

  # mm_results_buffer.copy_from(mm_results_data.buffer_info()[0], len(mm_results_data))
  
  proc.join(spe_id)
  total = stop - start
  bits_sec = (m * n * n_bits * n_samples) / total / 1e9
  ops_per_compare = 48 * 4 + 8  # 48 SIMD instructions, 8 scalar
  insts_per_compare = 56
  gops = (m * n * n_vecs * n_samples * ops_per_compare ) / total / 1e9
  ginsts = (m * n * n_vecs * n_samples * insts_per_compare ) / total / 1e9  
  print '%.6f sec, %.2f Gbits/sec, %.2f GOps, %.2f GInsts, %d insts' % (
    total, bits_sec, gops, ginsts, code.size())
  return

Exemplo n.º 26

0

Exibir arquivo

def TestMFC():
    size = 32
    #data_array = array.array('I', range(size))
    #data = synspu.aligned_memory(size, typecode = 'I')
    #data.copy_to(data_array.buffer_info()[0], len(data_array))
    data = extarray.extarray('I', range(size))
    code = synspu.InstructionStream()

    r_zero = code.acquire_register()
    r_ea_data = code.acquire_register()
    r_ls_data = code.acquire_register()
    r_size = code.acquire_register()
    r_tag = code.acquire_register()

    # Load zero
    util.load_word(code, r_zero, 0)

    print 'array ea: %X' % (data.buffer_info()[0])
    print 'r_zero = %s, ea_data = %s, ls_data = %s, r_size = %s, r_tag = %s' % (
        str(r_zero), str(r_ea_data), str(r_ls_data), str(r_size), str(r_tag))

    # Load the effective address
    print 'test ea: %X' % data.buffer_info()[0]
    util.load_word(code, r_ea_data, data.buffer_info()[0])

    # Load the size
    code.add(spu.ai(r_size, r_zero, size * 4))

    # Load the tag
    code.add(spu.ai(r_tag, r_zero, 2))

    # Load the lsa
    code.add(spu.ai(r_ls_data, r_zero, 0))

    # Load the data into address 0
    mfc_get(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 2
    mfc_write_tag_mask(code, 1 << 2)

    # Wait for the transfer to complete
    mfc_read_tag_status_all(code)

    # Increment the data values by 1 using an unrolled loop (no branches)
    r_current = code.acquire_register()

    for lsa in range(0, size * 4, 16):
        code.add(spu.lqa(r_current, (lsa >> 2)))
        code.add(spu.ai(r_current, r_current, 1))
        code.add(spu.stqa(r_current, (lsa >> 2)))

    code.release_register(r_current)

    # Store the values back to main memory

    # Load the data into address 0
    mfc_put(code, r_ls_data, r_ea_data, r_size, r_tag)

    # Set the tag bit to 2
    mfc_write_tag_mask(code, 1 << 2)

    # Wait for the transfer to complete
    mfc_read_tag_status_all(code)

    # Cleanup
    code.release_register(r_zero)
    code.release_register(r_ea_data)
    code.release_register(r_ls_data)
    code.release_register(r_size)
    code.release_register(r_tag)

    # Stop for debugging
    # code.add(spu.stop(0xA))

    # Execute the code
    proc = synspu.Processor()
    # code.print_code()
    #print data_array
    proc.execute(code)

    #data.copy_from(data_array.buffer_info()[0], len(data_array))

    for i in range(size):
        assert (data[i] == i + 1)

    return

Exemplo n.º 27

0

Exibir arquivo

    def generate(self,
                 results,
                 patterns,
                 r1_range,
                 r2_range,
                 max_init,
                 max_n,
                 size,
                 n_spus=6):
        # Connect to the framebuffer
        #fb = cell_fb.framebuffer()
        #cell_fb.fb_open(fb)
        buffer = extarray.extarray('B', size[0] * size[1] * 4)
        buffer.clear()

        # Setup the range parameter array
        r1_inc = (r1_range[1] - r1_range[0]) / size[0]
        r2_inc = (r2_range[1] - r2_range[0]) / size[1]

        ranges = [0 for i in range(n_spus)]
        #a_ranges = [0 for i in range(n_spus)]

        # Slice and dice for parallel execution
        spu_slices = [[size[0], size[1] / n_spus] for ispu in range(n_spus)]
        spu_slices[-1][1] += size[1] % n_spus

        offset = 0.0
        for ispu in range(n_spus):
            ranges[ispu] = extarray.extarray('f', [0.0] * 16)

            for i in range(4):
                ranges[ispu][
                    i] = r1_range[0] + float(i) * r1_inc  # horizontal is simd
                ranges[ispu][4 + i] = r2_range[0] + offset
                ranges[ispu][8 + i] = r1_inc * 4.0
                ranges[ispu][12 + i] = r2_inc
            # print ranges

            # Copy the paramters to aligned buffers
            #a_ranges[ispu] = synspu.aligned_memory(len(ranges[ispu]), typecode='I')
            #a_ranges[ispu].copy_to(ranges[ispu].buffer_info()[0], len(ranges[ispu]))

            offset += r2_inc * spu_slices[ispu][1]

        # Setup the pattern vector
        for pattern in patterns:
            if len(pattern) != len(patterns[0]):
                raise Exception('All patterns must be the same length')

        bits = [_pattern2vector(pattern) for pattern in patterns]
        #a_pattern = synspu.aligned_memory(len(bits[0]), typecode='I')
        pattern = extarray.extarray('I', len(bits[0]))

        # Create the instruction streams
        codes = []

        n = len(patterns) * 10
        offset = 0
        for ispu in range(n_spus):
            renderer = FBRenderer()
            renderer.set_lsa(0x100)
            #renderer.set_addr(cell_fb.fb_addr(fb, 0) + offset)
            renderer.set_addr(buffer.buffer_info()[0] + offset)
            renderer.set_width(size[0])
            #renderer.set_stride(fb.stride)
            renderer.set_stride(size[0])

            ly_block = LyapunovBlock()

            ly_block.set_size(*spu_slices[i])
            #ly_block.set_range(a_ranges[ispu])
            ly_block.set_range(ranges[ispu])
            #ly_block.set_pattern(a_pattern)
            ly_block.set_pattern(pattern)
            ly_block.set_max_init(max_init)
            ly_block.set_max_n(max_n)
            ly_block.set_renderer(renderer)

            code = synspu.InstructionStream()
            # code.set_debug(True)
            codes.append(code)
            #offset += spu_slices[i][1] * fb.stride * 4
            offset += spu_slices[i][1] * size[0] * 4

            # for i in spuiter.syn_range(code, n):
            ly_block.synthesize(code)

        # code.print_code()
        proc = synspu.Processor()

        #cell_fb.fb_clear(fb, 0)
        buffer.clear()

        import time
        ids = [0 for i in range(n_spus)]
        start = time.time()

        ipattern = 0
        n_patterns = len(patterns)
        len_bits = len(bits[0])
        pattern_inc = 1

        for i in range(n):
            #a_pattern.copy_to(bits[ipattern].buffer_info()[0], len_bits)
            # TODO - better/faster
            for j in xrange(0, len_bits):
                pattern[j] = bits[ipattern][j]

            for ispu in range(n_spus):
                ids[ispu] = proc.execute(codes[ispu], async=True)

            for ispu in range(n_spus):
                proc.join(ids[ispu])

            #cell_fb.fb_wait_vsync(fb)
            #cell_fb.fb_flip(fb, 0)
            # TODO - write buffer to image file
            #im = Image.frombuffer("RGBA", size, buffer.tostring(), "raw", "RGBA", 0, 1)
            imgbuf = Image.new("RGBA", size)

            arr = [(buffer[i + 3], buffer[i + 2], buffer[i + 1], 0xFF)
                   for i in xrange(0, len(buffer), 4)]
            imgbuf.putdata(arr)
            imgbuf.save("lyapunov_%d.png" % ipattern)

            ipattern += pattern_inc
            if (ipattern == (n_patterns - 1)) or (ipattern == 0):
                pattern_inc *= -1

            print ipattern

        stop = time.time()

        print '%.2f fps (%.6f)' % (float(n) / (stop - start), (stop - start))
        #cell_fb.fb_close(fb)

        return