Пример #1
0
 def mmLoaddupPd(self, src, repList):
     sList = sorted(repList, key=lambda t: t[0], reverse=True)
     dst = sList[0][1]
     if dst.reglen == 2 and dst.mrmap == [0, 1]:
         at = src.pointer.getAt()
         return mmShufflePd(mmLoaduPd(dst.pointer), mmLoaduPd(dst.pointer),
                            (at[0], at[0]))
Пример #2
0
    def storeMatrix(self, mParams):
        src, dst = mParams['nuM'], mParams['m']
        sL, sR = mParams['nuML'], mParams['nuMR']
        dL, dR = mParams['mL'], mParams['mR']
        M, N = mParams['M'], mParams['N']
        isCompact = mParams['compact']
        instructions = []

        if M == 1 and N == 1:
            nuv = mmLoaduPd(Pointer(src[sL.of(0), sR.of(0)]))
            pc = AddressOf(sa(dst[dL.of(0), dR.of(0)]))
            instr = mmStoreSd(nuv, pc)
            instructions += [Comment("1x2 -> 1x1"), instr]
        elif M == 2 and N == 1:
            if not isCompact:
                nuv = mmLoaduPd(Pointer(src[sL.of(0), sR.of(0)]))
                e = mmShufflePd(nuv, nuv, (1, 1))
                pcs = [Pointer(dst[dL.of(i), dR.of(0)]) for i in range(2)]
                instr0 = mmStoreSd(nuv, pcs[0])
                instr1 = mmStoreSd(e, pcs[1])
                instructions += [
                    Comment("2x1 -> 2x1 - (Store) Incompact"), instr0, instr1
                ]

        return instructions
Пример #3
0
    def T(self, sParams, dParams, opts):

        nu = 2
        src, dst = sParams['nuM'], dParams['nuM']
        sL, sR = sParams['nuML'], sParams['nuMR']
        dL, dR = dParams['nuML'], dParams['nuMR']
        M, N = dParams['nuMM'], dParams['nuMN']
        instructions = []

        instructions += [
            Comment(str(nu) + "-BLAC: (" + str(N) + "x" + str(M) + ")^T")
        ]
        if M * N == nu:
            va = mmLoaduPd(Pointer(src[sL.of(0), sR.of(0)]))
            pc = Pointer(dst[dL.of(0), dR.of(0)])
            instr = mmStoreuPd(va, pc)
            instructions += [instr]
        else:
            va0 = mmLoaduPd(Pointer(src[sL.of(0), sR.of(0)]))
            va1 = mmLoaduPd(Pointer(src[sL.of(1), sR.of(0)]))
            pc0 = Pointer(dst[dL.of(0), dR.of(0)])
            pc1 = Pointer(dst[dL.of(1), dR.of(0)])
            vt0 = mmUnpackloPd(va0, va1)
            vt1 = mmUnpackhiPd(va0, va1)
            instr0 = mmStoreuPd(vt0, pc0)
            instr1 = mmStoreuPd(vt1, pc1)
            instructions += [instr0, instr1]

        return instructions
Пример #4
0
    def Add(self, s0Params, s1Params, dParams, opts):

        nu = 2
        src0, src1, dst = s0Params['nuM'], s1Params['nuM'], dParams['nuM']
        s0L, s0R = s0Params['nuML'], s0Params['nuMR']
        s1L, s1R = s1Params['nuML'], s1Params['nuMR']
        dL, dR = dParams['nuML'], dParams['nuMR']
        M, N = dParams['nuMM'], dParams['nuMN']
        instructions = []

        instructions += [
            Comment(
                str(nu) + "-BLAC: " + str(M) + "x" + str(N) + " + " + str(M) +
                "x" + str(N))
        ]
        if M * N == nu:
            va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
            vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
            pc = Pointer(dst[dL.of(0), dR.of(0)])
            instr = mmStoreuPd(mmAddPd(va, vb), pc)
            instructions += [instr]
        elif M == nu and N == nu:
            for i in range(M):
                va = mmLoaduPd(Pointer(src0[s0L.of(i), s0R.of(0)]))
                vb = mmLoaduPd(Pointer(src1[s1L.of(i), s1R.of(0)]))
                pc = Pointer(dst[dL.of(i), dR.of(0)])
                instr = mmStoreuPd(mmAddPd(va, vb), pc)
                instructions += [instr]

        return instructions
Пример #5
0
    def Kro(self, s0Params, s1Params, dParams, opts):

        nu = 2
        src0, src1, dst = s0Params['nuM'], s1Params['nuM'], dParams['nuM']
        s0L, s0R = s0Params['nuML'], s0Params['nuMR']
        s1L, s1R = s1Params['nuML'], s1Params['nuMR']
        dL, dR = dParams['nuML'], dParams['nuMR']
        oM, oK, oN, oP = s0Params['M'], s0Params['N'], s1Params['M'], s1Params[
            'N']
        M, K, N, P = s0Params['nuMM'], s0Params['nuMN'], s1Params[
            'nuMM'], s1Params['nuMN']
        instructions = []

        instructions += [
            Comment(
                str(nu) + "-BLAC: " + str(M) + "x" + str(K) + " Kro " +
                str(N) + "x" + str(P))
        ]
        if oM * oK * oN * oP == 1:
            pc = Pointer(dst[dL.of(0), dR.of(0)])
            va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
            vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
            instr = mmStoreuPd(mmMulPd(va, vb), pc)
            instructions += [instr]
        elif oM * oK == 1:
            if N * P == nu:
                va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
                dup = mmShufflePd(va, va, (0, 0))
                vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
                pc = Pointer(dst[dL.of(0), dR.of(0)])
                instr = mmStoreuPd(mmMulPd(dup, vb), pc)
                instructions += [instr]
            else:
                va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
                dup = mmShufflePd(va, va, (0, 0))
                for i in range(nu):
                    vb = mmLoaduPd(Pointer(src1[s1L.of(i), s1R.of(0)]))
                    pc = Pointer(dst[dL.of(i), dR.of(0)])
                    instr = mmStoreuPd(mmMulPd(dup, vb), pc)
                    instructions += [instr]
        else:
            if M * K == nu:
                vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
                dup = mmShufflePd(vb, vb, (0, 0))
                va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
                pc = Pointer(dst[dL.of(0), dR.of(0)])
                instr = mmStoreuPd(mmMulPd(va, dup), pc)
                instructions += [instr]
            else:
                vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
                dup = mmShufflePd(vb, vb, (0, 0))
                for i in range(nu):
                    va = mmLoaduPd(Pointer(src0[s0L.of(i), s0R.of(0)]))
                    pc = Pointer(dst[dL.of(i), dR.of(0)])
                    instr = mmStoreuPd(mmMulPd(va, dup), pc)
                    instructions += [instr]

        return instructions
Пример #6
0
 def ScaLoad(self, src, repList):  #repList is a list of tuples (line, dst)
     isFlt = (self.opts['precision'] == 'float')
     if src.pointer.at[1] == 0:
         return mmCvtssf32(mmLoaduPs(
             repList[0][1].pointer)) if isFlt else mmCvtsdf64(
                 mmLoaduPd(repList[0][1].pointer))
     if isFlt:
         return mmCvtssf32(
             mmShufflePs(mmLoaduPs(repList[0][1].pointer),
                         mmLoaduPs(repList[0][1].pointer),
                         (0, 0, 0, src.pointer.at[1])))
     return mmCvtsdf64(
         mmShufflePd(mmLoaduPd(repList[0][1].pointer),
                     mmLoaduPd(repList[0][1].pointer),
                     (0, src.pointer.at[1])))
Пример #7
0
 def ScaLoad(self, src, repList, bounds):
     '''
     src is the ScaLoad object we want to replace. 
     repList is a list of tuples (line, dst). The dst elements of the tuples are store commands.
     '''
     sList = sorted(repList, key=lambda t: t[0], reverse=True)
     isFlt = (self.opts['precision'] == 'float')
     if src.pointer.at[1] == 0:
         return mmCvtssf32(mmLoaduPs(
             sList[0][1].pointer)) if isFlt else mmCvtsdf64(
                 mmLoaduPd(sList[0][1].pointer))
     if isFlt:
         return mmCvtssf32(
             mmShufflePs(mmLoaduPs(sList[0][1].pointer),
                         mmLoaduPs(sList[0][1].pointer),
                         (0, 0, 0, src.pointer.at[1])))
     return mmCvtsdf64(
         mmShufflePd(mmLoaduPd(sList[0][1].pointer),
                     mmLoaduPd(sList[0][1].pointer),
                     (0, src.pointer.at[1])))
Пример #8
0
    def Mul(self, s0Params, s1Params, dParams, opts):

        nu = 2
        src0, src1, dst = s0Params['nuM'], s1Params['nuM'], dParams['nuM']
        s0L, s0R = s0Params['nuML'], s0Params['nuMR']
        s1L, s1R = s1Params['nuML'], s1Params['nuMR']
        dL, dR = dParams['nuML'], dParams['nuMR']
        M, K, N = s0Params['nuMM'], s0Params['nuMN'], s1Params['nuMN']
        instructions = []

        instructions += [
            Comment(
                str(nu) + "-BLAC: " + str(M) + "x" + str(K) + " * " + str(K) +
                "x" + str(N))
        ]
        if M == 1:
            if N == 1:
                va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
                vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
                pc = Pointer(dst[dL.of(0), dR.of(0)])
                instr = mmStoreuPd(mmHaddPd(mmMulPd(va, vb), mmSetzeroPd()),
                                   pc)
                #                     instr = mmStoreSd(mmHaddPd(mmMulPd(va, vb), mmSetzeroPd()), pc)
                instructions += [instr]
            else:
                va = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
                vb0 = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
                vb1 = mmLoaduPd(Pointer(src1[s1L.of(1), s1R.of(0)]))
                vbt0 = mmUnpackloPd(vb0, vb1)
                vbt1 = mmUnpackhiPd(vb0, vb1)
                pc = Pointer(dst[dL.of(0), dR.of(0)])
                instr = mmStoreuPd(
                    mmHaddPd(mmMulPd(va, vbt0), mmMulPd(va, vbt1)), pc)
                instructions += [instr]
        else:
            if K == 1:
                va0 = mmLoaddupPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
                va1 = mmLoaddupPd(Pointer(src0[s0L.of(1), s0R.of(0)]))
                vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
                pc0 = Pointer(dst[dL.of(0), dR.of(0)])
                pc1 = Pointer(dst[dL.of(1), dR.of(0)])
                instr0 = mmStoreuPd(mmMulPd(va0, vb), pc0)
                instr1 = mmStoreuPd(mmMulPd(va1, vb), pc1)
                instructions += [instr0, instr1]
            else:
                if N == 1:
                    va0 = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
                    va1 = mmLoaduPd(Pointer(src0[s0L.of(1), s0R.of(0)]))
                    vb = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
                    pc = Pointer(dst[dL.of(0), dR.of(0)])
                    instr = mmStoreuPd(
                        mmHaddPd(mmMulPd(va0, vb), mmMulPd(va1, vb)), pc)
                    instructions += [instr]
                else:
                    va0 = mmLoaduPd(Pointer(src0[s0L.of(0), s0R.of(0)]))
                    va1 = mmLoaduPd(Pointer(src0[s0L.of(1), s0R.of(0)]))
                    vb0 = mmLoaduPd(Pointer(src1[s1L.of(0), s1R.of(0)]))
                    vb1 = mmLoaduPd(Pointer(src1[s1L.of(1), s1R.of(0)]))
                    vbt0 = mmUnpackloPd(vb0, vb1)
                    vbt1 = mmUnpackhiPd(vb0, vb1)
                    pc0 = Pointer(dst[dL.of(0), dR.of(0)])
                    pc1 = Pointer(dst[dL.of(1), dR.of(0)])
                    instr0 = mmStoreuPd(
                        mmHaddPd(mmMulPd(va0, vbt0), mmMulPd(va0, vbt1)), pc0)
                    instr1 = mmStoreuPd(
                        mmHaddPd(mmMulPd(va1, vbt0), mmMulPd(va1, vbt1)), pc1)
                    instructions += [instr0, instr1]

        return instructions