Exemplo n.º 1
0
class TestRandomDiscretizedProjections(unittest.TestCase):

    def setUp(self):
        self.rbp = RandomDiscretizedProjections('testHash', 10, 0.1)
        self.rbp.reset(100)

    def test_hash_format(self):
        h = self.rbp.hash_vector(numpy.random.randn(100))
        self.assertEqual(len(h), 1)
        self.assertEqual(type(h[0]), type(''))

    def test_hash_deterministic(self):
        x = numpy.random.randn(100)
        first_hash = self.rbp.hash_vector(x)[0]
        for k in range(100):
            self.assertEqual(first_hash, self.rbp.hash_vector(x)[0])
Exemplo n.º 2
0
class featureLsh():
    def __init__(self, stage, bucket):
        self.parentLevel = 5
        self.rdp = RandomDiscretizedProjections('rdp',
                                                stage,
                                                bucket,
                                                rand_seed=98412194)
        self.rdp.reset(5)
        self.hash_dict = {}
        self.data = defaultdict(list)

    def get_hash(self, vector):
        h = self.rdp.hash_vector(vector)[0]
        return h

    def set_hash(self, header):
        self.hash_dict["program"] = "program"
        for i in header:
            key_vec = i.split("_")
            vec = []
            for j in key_vec:
                vec.append(int(j))
            newkey = self.get_hash(vec)
            self.hash_dict[i] = newkey
        print("Setting hash done. Running lsh...")

    def update_dict(self, dicts):
        print("updating_dict")
        for dict in dicts:
            newdict = {}
            for key, value in dict.items():
                newkey = self.hash_dict[key]
                if newkey == "program":
                    newdict[newkey] = value
                else:
                    if not newkey in newdict:
                        if type(value) == str:
                            newdict[newkey] = float(value)
                        else:
                            if isnan(value):
                                newdict[newkey] = 0
                            else:
                                newdict[newkey] = float(value)
                    else:
                        if type(value) == str:
                            newdict[newkey] += float(value)
                        else:
                            if not isnan(value):
                                newdict[newkey] += float(value)
            for key, value in newdict.items():
                self.data[key].append(value)
Exemplo n.º 3
0
class FeatureBuilder(Template2Listener):

    def __init__(self, level):
        self.feature_vector = {}
        self.parentLevel = int(level)
        print(self.parentLevel)
        self.hashes = dict()

        self.rdp = RandomDiscretizedProjections('rdp', 5, 6, rand_seed=98412194)
        self.rdp.reset(self.parentLevel)

    def getHash(self, vector):
        if len(vector) < self.parentLevel:
            vector = vector + (self.parentLevel - len(vector)) * [0]
        h = self.rdp.hash_vector(vector)[0]
        # h = '_'.join([str(x) for x in vector])

        return h

    def getParents(self, ctx):
        curLevel = 0
        curNode = ctx
        path = []
        while curNode is not None and curLevel < self.parentLevel:
            #path.append(curNode.getRuleIndex())
            nodename = curNode.__class__.__name__
            path.append(fixed_hashes[nodename])
            curLevel += 1
            curNode = curNode.parentCtx
        return path

    def update_vector(self, ctx):
        if self.parentLevel <= 1:
            name = type(ctx).__name__
            if ctx.parentCtx is not None:
                parentName = type(ctx.parentCtx).__name__
                feature_name = 't_' + parentName + '_' + name
                if feature_name not in self.feature_vector:
                    self.feature_vector[feature_name] = 0
                self.feature_vector[feature_name] += 1
        else:
            path=self.getParents(ctx)
            name=self.getHash(path)

        if name not in self.feature_vector:
            self.feature_vector[name] = 0
        self.feature_vector[name] += 1

    def enterAddop(self, ctx):
        self.update_vector(ctx)

    def enterAnd(self, ctx):
        self.update_vector(ctx)

    def enterArray(self, ctx):
        self.update_vector(ctx)

    def enterArray_access(self, ctx):
        self.update_vector(ctx)

    def enterAssign(self, ctx):
        self.update_vector(ctx)

    def enterBlock(self, ctx):
        self.update_vector(ctx)

    def enterBrackets(self, ctx):
        self.update_vector(ctx)

    def enterData(self, ctx):
        self.update_vector(ctx)

    def enterDecl(self, ctx):
        self.update_vector(ctx)

    def enterPrimitive(self, ctx):
        self.update_vector(ctx)

    def enterNumber(self, ctx):
        self.update_vector(ctx)

    def enterDtype(self, ctx):
        self.update_vector(ctx)

    def enterVector(self, ctx):
        self.update_vector(ctx)

    def enterDims(self, ctx):
        self.update_vector(ctx)

    def enterVectorDIMS(self, ctx):
        self.update_vector(ctx)

    def enterLimits(self, ctx):
        self.update_vector(ctx)

    def enterPrior(self, ctx):
        self.update_vector(ctx)

    def enterParam(self, ctx):
        self.update_vector(ctx)

    def enterParams(self, ctx):
        self.update_vector(ctx)

    def enterDistexpr(self, ctx):
        self.update_vector(ctx)

    def enterLoopcomp(self, ctx):
        self.update_vector(ctx)

    def enterFor_loop(self, ctx):
        self.update_vector(ctx)

    def enterIf_stmt(self, ctx):
        self.update_vector(ctx)

    def enterElse_blk(self, ctx):
        self.update_vector(ctx)

    def enterFunction_call(self, ctx):
        self.update_vector(ctx)

    def enterFparam(self, ctx):
        self.update_vector(ctx)

    def enterFparams(self, ctx):
        self.update_vector(ctx)

    def enterReturn_or_param_type(self, ctx):
        self.update_vector(ctx)

    def enterFunction_decl(self, ctx):
        self.update_vector(ctx)

    def enterTransformedparam(self, ctx):
        self.update_vector(ctx)

    def enterTransformeddata(self, ctx):
        self.update_vector(ctx)

    def enterGeneratedquantities(self, ctx):
        self.update_vector(ctx)

    def enterFunctions(self, ctx):
        self.update_vector(ctx)

    def enterVal(self, ctx):
        self.update_vector(ctx)

    def enterDivop(self, ctx):
        self.update_vector(ctx)

    def enterString(self, ctx):
        self.update_vector(ctx)

    def enterExponop(self, ctx):
        self.update_vector(ctx)

    def enterMinusop(self, ctx):
        self.update_vector(ctx)

    def enterLt(self, ctx):
        self.update_vector(ctx)

    def enterUnary(self, ctx):
        self.update_vector(ctx)

    def enterEq(self, ctx):
        self.update_vector(ctx)

    def enterGt(self, ctx):
        self.update_vector(ctx)

    def enterRef(self, ctx):
        self.update_vector(ctx)

    def enterGeq(self, ctx):
        self.update_vector(ctx)

    def enterMulop(self, ctx):
        self.update_vector(ctx)

    def enterFunction(self, ctx):
        self.update_vector(ctx)

    def enterVecmulop(self, ctx):
        self.update_vector(ctx)

    def enterNe(self, ctx):
        self.update_vector(ctx)

    def enterLeq(self, ctx):
        self.update_vector(ctx)

    def enterTranspose(self, ctx):
        self.update_vector(ctx)

    def enterVecdivop(self, ctx):
        self.update_vector(ctx)

    def enterTernary(self, ctx):
        self.update_vector(ctx)

    def enterSubset(self, ctx):
        self.update_vector(ctx)

    def enterObserve(self, ctx):
        self.update_vector(ctx)

    def enterStatement(self, ctx):
        self.update_vector(ctx)

    def enterQuery(self, ctx):
        self.update_vector(ctx)

    def enterTemplate(self, ctx):
        self.update_vector(ctx)