Пример #1
0
def RunWithPars(pars, uid):
    startTime = Common.getCurrentTimeMil()
    reposFol = 'SavePath/Repos/'
    if not os.path.exists(config.basedir + '/' + reposFol):
        os.makedirs(config.basedir + '/' + reposFol)
    fname = reposFol + Common.gen_rnd_filename()
    os.makedirs(config.basedir + '/' + fname)
    ##ADD Field
    e = Repo()
    e.cloneFinishDate = "--RUNNING--"
    e.cloneStartDate = str(startTime)
    e.repoInfo = ''
    e.isPrivate = int(pars['isPrivate'])
    e.path = fname
    e.repoName = pars['repoName']
    e.url = pars['url']
    e.userId = uid
    db.session.add(e)
    db.session.commit()

    try:
        porcelain.clone(pars['url'], config.basedir + '/' + fname)
        endTime = Common.getCurrentTimeMil()
        e.cloneFinishDate = str(endTime)
        db.session.commit()

    except Exception as ex:
        print(ex)
        e.delete()
        db.session.commit()
Пример #2
0
    def NNFilterMulti(trainSeti, testSet, file, fout, name, vecin, count,
                      clfName, tunelrn, vSets):

        startTime = Common.getCurrentTimeMil()

        trainSet = DPLIB.NNFilterMulti(trainSeti, testSet, count)

        l = GLOB(clfName, tunelrn).getClassifier()
        if (tunelrn):

            l = l.getTunedCLF(trainSet, vSets, fout, name, file)

            print("#TUNE-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(l.selectedParams))

            fout.write("#TUNE-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(l.selectedParams))
            fout.write("\n")
            sCheck = l.getCLFOptions()

            print("#SETSET-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(sCheck))

            fout.write("#SETSET-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(sCheck))
            fout.write("\n")

        l.buildClassifier(trainSet)

        vec = l.evaluateModel(testSet)

        vecin = vec

        tvals = DPLIB.getConfMatrix(testSet[:, -1], vecin)

        print("#CONF-TEST-" + name + ":" + file + ": " + str(tvals))

        fout.write("#CONF-TEST-" + name + ":" + file + ": ")
        fout.write(str(tvals))
        fout.write("\n")

        auc = DPLIB.getAUC(testSet[:, -1], vec)
        vals = DPLIB.getMeasures(tvals)

        print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

        fout.write(name + ":" + file + ": ")
        fout.write(str(vals))
        fout.write(" AUC = ")
        fout.write(str(auc))
        fout.write("\n")

        time = Common.getCurrentTimeMil() - startTime

        print("#TIME-FOR:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR:" + name + ":" + file + ": " + str(time) + "\n")

        return vecin
Пример #3
0
def RunWithPars(pars, uid):
    """
    docstring here
        :param pars: 
        :param uid: 
    """

    startTime = Common.getCurrentTimeMil()

    modelsFol = 'SavePath/models/'
    if not os.path.exists(config.basedir + '/' + modelsFol):
        os.makedirs(config.basedir + '/' + modelsFol)
    fname = modelsFol + Common.gen_rnd_filename() + '.pkl'
    #ADD Field

    pars['pid'] = os.getpid()
    e = Experiment()
    e.endDateTime = "--RUNNING--"
    e.startDateTime = str(startTime)
    e.pars = json.dumps(pars)
    e.results = ""
    e.type = pars['type']
    e.expModelsFileName = ''
    e.userId = uid

    db.session.add(e)
    db.session.commit()

    ret, expModels = BCC.BCC.RunTests(uid, pars['type'], pars, config.basedir)
    pickle.dump(expModels, open(config.basedir + '/' + fname, 'wb'))
    if ret is None:
        e.delete()
        db.session.commit()
    else:
        endTime = Common.getCurrentTimeMil()
        e.endDateTime = str(endTime)
        e.startDateTime = str(startTime)
        e.pars = json.dumps(pars)
        e.expModelsFileName = fname
        e.results = json.dumps(ret)
        db.session.commit()

    for method in ret.keys():
        for ds in ret[method].keys():
            perfs = ret[method][ds]

            ep = ExperimentPrediction()

            ep.data = None
            ep.datasetName = ds
            ep.predResult = json.dumps(perfs)
            ep.expId = e.id
            ep.predDateTime = e.endDateTime
            ep.predType = 0  ###Test with known labels

            db.session.add(ep)
            db.session.commit()
Пример #4
0
    def CreateBucketsTune(self, trainSet, testSet, vSets, name, testCut,
                          iternum, save, superbit, clfName, tunelrn):

        startTime = Common.getCurrentTimeMil()
        best = None
        bestStages = 0
        bestBuckets = 0
        bestOut = None
        spentTimeIS = 0
        tempTime = 0
        rand = random.Random(Common.getCurrentTimeMil())
        for i in range(self.numOptions):

            while True:
                stages = rand.randint(0, self.maxStages - 1) + self.minStages
                buckets = rand.randint(0,
                                       self.maxBuckets - 1) + self.minBuckets
                break
                #if buckets>stages:
                #    break

            chrm = None
            while (True):
                chrm, out = self.CreateBuckets(trainSet, testSet, vSets, name,
                                               testCut, iternum, save,
                                               superbit, stages, buckets,
                                               False, clfName, tunelrn)
                if chrm != None:
                    break

            spentTimeIS += float(chrm.extra["SPENT-TIME-IS"])

            if (best == None):
                best = chrm
                bestBuckets = buckets
                bestStages = stages
                bestOut = out
            else:
                if (chrm.getFitness() > best.getFitness()):
                    best = chrm
                    bestBuckets = buckets
                    bestStages = stages
                    bestOut = out

        self.output += out
        time = Common.getCurrentTimeMil() - startTime

        self.prnt("#TIME-FOR:" + name + ":" + self.file + ": " + str(time) +
                  "\n")
        self.prnt("#TIME-FOR-IS:" + name + ":" + self.file + ": " +
                  str(spentTimeIS) + "\n")

        return best, out
Пример #5
0
def upload():
    """CKEditor file upload"""
    error = ''
    url = ''
    #callback = request.args.get("CKEditorFuncNum")
    if request.method == 'POST' and 'upload' in request.files:
        fileobj = request.files['upload']
        fname, fext = os.path.splitext(fileobj.filename)
        if fext.lower() not in ['.jpg', '.png']:
            return upload_fail('Only .jpg and .png extensions are allowed')
        rnd_name = '%s%s' % (Common.gen_rnd_filename(), fext)
        filepath = config.uploadsPath + '/' + rnd_name
        dirname = config.uploadsPath
        if not os.path.exists(dirname):
            try:
                os.makedirs(dirname)
            except:
                return upload_fail('ERROR_CREATE_DIR')
        elif not os.access(dirname, os.W_OK):
            return upload_fail('ERROR_DIR_NOT_WRITEABLE')
        if not error:
            #with open(filepath, 'w+') as destination:
            #    for chunk in fileobj.chunks():
            #        destination.write(chunk)
            fileobj.save(filepath)
            url = url_for('static', filename='upload/' + rnd_name)
            return upload_success(url=url)
        else:
            return upload_fail(error)
    else:
        return upload_fail('post error')
Пример #6
0
    def LOC50(testSeti, file, fout, name, locIndex):
        startTime = Common.getCurrentTimeMil()
        spentISTime = 0
        tempTime = 0
        spentISTime = Common.getCurrentTimeMil()
        allloc = testSeti[:, locIndex]

        med = np.median(allloc)
        predicted = [1 if t >= med else 0 for t in allloc]
        actual = testSeti[:, -1]
        tvals = DPLIB.getConfMatrix(actual, predicted)

        print("#CONF-TEST-" + name + ":" + file + ": " + str(tvals))

        fout.write("#CONF-TEST-" + name + ":" + file + ": ")
        fout.write(str(tvals))
        fout.write("\n")

        vals = DPLIB.getMeasures(tvals)

        auc = DPLIB.getAUC(actual, predicted)
        print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

        fout.write(name + ":" + file + ": ")
        fout.write(str(vals))
        fout.write(" AUC = ")
        fout.write(str(auc))
        fout.write("\n")

        time = Common.getCurrentTimeMil() - startTime

        print("#TIME-FOR:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR:" + name + ":" + file + ": " + str(time) + "\n")

        print("#TIME-FOR-IS:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR-IS:" + name + ":" + file + ": " + str(time) +
                   "\n")
Пример #7
0
def submit_comment_reply():

    message = None
    if request.method == 'POST':

        isAnonym = True
        data = json.loads(request.data)
        commentText = data['replyText']
        inReplyTo = int(data['id'])
        commentorId = 0
        postId = int(data['postId'])

        if Shared.isLoggedIn():
            #return redirect(url_for('index'))
            user = Shared.getLoggedInUser()
            isAnonym = False
            name = user.name
            email = user.email
            commentorId = user.id
        else:
            name = 'Anonym'
            email = ''
            commentorId = 0

        pc = PostComment()
        pc.commentorEmail = email
        pc.commentorName = name
        pc.commentText = commentText
        pc.commentorId = commentorId
        pc.inReplyTo = inReplyTo
        pc.postId = postId
        pc.commentDate = Common.getCurrentTimeMil()

        db.session.add(pc)
        db.session.commit()
        resp = make_response(
            render_template('blog/singleComment.html', comment=pc))
        return json.dumps({
            'html': resp.data.decode("utf-8"),
            "id": str(inReplyTo)
        })
        #else:
        #    return render_template('blog/singleComment.html', comment = pc)
    return ""
Пример #8
0
def create_post():
    if not Shared.isLoggedIn():
        return redirect(url_for('index'))
    user = Shared.getLoggedInUser()
    if user.utype != 1:
        print('Not authorised to access')
        return redirect(url_for('index'))

    message = None
    if request.method == 'POST':
        title = request.form.get('title')
        body = request.form.get('ckeditor')
        bp = BlogPost()
        bp.postBody = body
        bp.postTitle = title
        bp.postDate = Common.getCurrentTimeMil()
        bp.postCreatorId = user.id
        bp.postPerms = 0
        bp.viewCount = 0
        db.session.add(bp)
        db.session.commit()
        message = 'Post Saved Successfully'
        return redirect(url_for('BPBlog.posts'))
    return render_template('blog/create_post.html', message=message)
Пример #9
0
def viewpost(id):

    if request.method == 'POST':
        if 'formAddComment' in request.form:
            name, email = None, None
            commentorId = 0
            if not Shared.isLoggedIn():
                name = request.form['commentorName']
                email = request.form['commentorEmail']

            else:
                user = Shared.getLoggedInUser()
                name = user.name
                email = user.email
                commentorId = user.id
            commentText = request.form['commentText']

            pc = PostComment()
            pc.commentorEmail = email
            pc.commentorName = name
            pc.commentText = commentText
            pc.commentorId = commentorId
            pc.inReplyTo = 0
            pc.postId = id
            pc.commentDate = Common.getCurrentTimeMil()

            db.session.add(pc)
            db.session.commit()

    post = BlogPost.query.filter_by(id=id).first()
    comments = PostComment.query.filter_by(postId=id)

    #if comments.count()<=0:
    #    comments = None

    commentsDict = None
    commentReplies = None
    if comments:
        commentsDict = {comment.id: comment for comment in comments}
        commentReplies = {}
        #ht = ""

        for comment in comments:
            commentReplies[comment.id] = []

        for comment in comments:
            if comment.inReplyTo == 0:
                continue
            commentReplies[comment.inReplyTo].append(comment)
            #if comment.inReplyTo == 0:

            #    parents.append(comment)
            #    p = 0
            #    ht+='<div class="row">'+ comment.commentText +'<a href="reply('+str(comment.id)+')" class="replyfont"> Reply </a>'+'</div>'
            #    ht = checkChilds(comment,ht,comments, p)
            #else:
            #    depths[comment.id] = depths[comment.inReplyTo]+1

    return render_template(
        'blog/viewpost.html',
        post=post,
        comments=comments,
        commentsDict=commentsDict,
        commentReplies=commentReplies)  # comments = comments, ht = ht)
Пример #10
0
    def CreateBuckets(self, trainSet, testSet, vSets, name, testCut, iternum,
                      save, superbit, stages, buckets, doprint, clfName,
                      tunelrn):

        out = []
        if self.isCount:
            keySet = list(
                DPLIB.getMeasuresCount([0, 1, 2, 3], [0, 1, 2, 3]).keys())
        else:
            keySet = list(
                DPLIB.getExtMeasures({
                    "tp": 1,
                    "tn": 2,
                    "fp": 3,
                    "fn": 4
                }).keys())

        out.append("#STARTED FOR-" + name + ":" + self.file + ": ")

        startTime = Common.getCurrentTimeMil()
        spentIsTime = 0
        tempTime = 0

        out.append("#Using also Label For train in LSH")

        if (vSets == None):

            vSets = []
            vSets.append(trainSet)

        if (save):
            DPLIB.SaveToCsv(
                trainSet, "MAIN-TRAIN-FILE-" + "ITER=" + str(iternum) + "--" +
                "METHOD=" + name + "--FILE=" + self.file + "--")
            DPLIB.SaveToCsv(
                testSet, "MAIN-TEST-FILE-" + "ITER=" + str(iternum) + "--" +
                "METHOD=" + name + "--FILE=" + self.file + "--")
            for i in range(len(vSets)):

                DPLIB.SaveToCsv(
                    trainSet,
                    "VSET-FILE-" + "INDEX=" + str(i) + "ITER=" + str(iternum) +
                    "--" + "METHOD=" + name + "--FILE=" + self.file + "--")

        np.random.shuffle(trainSet)
        np.random.shuffle(testSet)
        tempTime = Common.getCurrentTimeMil()
        count = len(trainSet)
        bins = {}
        # R^n
        n = trainSet.shape[1] - 1

        binid = 0
        #lshmin = LSHMinHash(stages, buckets, n);
        try:
            lshsuper = LSHSuperBit(stages=stages,
                                   buckets=buckets,
                                   dimensions=n)
        except Exception as ex:
            print('##SuperBit with specified parameters failed:' + str(ex))
            return None
        sp = 0.75
        # Compute a SuperBit signature, and a LSH hash
        for i in range(count):
            vector = trainSet[i, 1:].tolist()

            hash = None
            if (superbit):
                hash = lshsuper.hash(vector)
            else:
                ##Minhash support
                # #hash = lshmin.hash(vecBool);
                pass

            binid = hash[0]
            if not binid in bins.keys():
                bins[binid] = []

            bins[binid].append(trainSet[i])

        spentIsTime += Common.getCurrentTimeMil() - tempTime

        numBins = len(bins.keys())

        for binid in bins.keys():
            bins[binid] = np.array(bins[binid])

        out.append("#Number of BINS:" + name + ":" + self.file + ": " +
                   str(numBins))

        pop = []

        for i in bins.keys():

            trSet = bins[i]
            l = GLOB(clfName, tunelrn).getClassifier()

            #if (tunelrn):
            #    l = l.getTunedCLF(trSet, vSets,fout,name, file);

            l.buildClassifier(trSet)
            cf = 0
            j = 0

            allvecs = []
            confs = []
            allcfs = []
            allaucs = []
            valsA = None
            confsA = None
            aucA = 0.0
            for vSet in vSets:

                vec = None
                actuals = None

                vec = l.evaluateModel(vSet)
                actuals = vSet[:, -1]

                vals = None
                auc = 0
                if self.isCount:
                    vals = DPLIB.getMeasuresCount(actuals, vec)
                else:

                    auc = DPLIB.getAUC(actuals, vec)
                    aucA += auc
                    allaucs.append(auc)
                    if (testCut):
                        vCF = 0.1
                        bestCF = 0
                        bestCFVal = -1
                        bestVals = None

                        while True:

                            tvals = DPLIB.getConfMatrix(actuals, vec, vCF)
                            measures = DPLIB.getMeasures(tvals)
                            fit = measures["F"] * measures["GMean1"]
                            if (fit > bestCFVal or bestVals == None):

                                bestCFVal = fit
                                bestCF = vCF
                                bestVals = tvals

                            vCF += 0.1

                            if (vCF >= 1):
                                break

                        if (confsA == None):

                            confsA = {key: 0 for key in bestVals.keys()}

                        for j in confsA.keys():
                            confsA[j] += bestVals[j]

                        confs.append(bestVals)

                        vals = DPLIB.getMeasures(bestVals)
                        cf += bestCF
                        allcfs.append(bestCF)

                    else:

                        tvals = DPLIB.getConfMatrix(actuals, vec)

                        if (confsA == None):

                            confsA = {key: 0 for key in tvals.keys()}

                        for j in confsA.keys():
                            confsA[j] += tvals[j]

                        confs.append(tvals)

                        vals = DPLIB.getMeasures(tvals)
                        allcfs.append(DPLIB.DefaultCF)

                allvecs.append(vals)

                if (valsA == None):
                    valsA = {key: 0 for key in keySet}

                for j in keySet:
                    valsA[j] += vals[j]

            for j in keySet:
                valsA[j] /= len(vSets)

            h = None
            if not self.isCount:
                for j in confsA.keys():
                    confsA[j] /= len(vSets)

                if (testCut):
                    cf /= len(vSets)

                aucA /= len(vSets)

                h = CHRM_GIS(trSet, valsA, aucA)
                h.fitnesses = allvecs
                h.aucs = allaucs
                h.conf = confsA
                h.confs = confs
                h.allcfs = allcfs
                if (testCut):
                    h.bestCF = cf
                else:
                    h.bestCF = DPLIB.DefaultCF
            else:

                h = CHRM_GIS_Count(trSet, valsA)
                h.fitnesses = allvecs

            pop.append(h)
            l = None

        tempTime = Common.getCurrentTimeMil()
        pop = DPLIB.MySort(pop)
        spentIsTime += Common.getCurrentTimeMil() - tempTime
        top = pop[0]

        out.append("#Instances in Top:" + str(len(top.ds)))

        out.append("#STAGES:" + name + ":" + self.file + ": " + str(stages))
        out.append("#BUCKETS:" + name + ":" + self.file + ": " + str(buckets))
        if not self.isCount:
            out.append("#BEST-CF-VALUE:" + name + ":" + self.file + ": " +
                       str(top.bestCF))

        l = GLOB(clfName, tunelrn).getClassifier()

        if (tunelrn):

            l = l.getTunedCLF(top.ds, vSets, fout, name, file)

            out.append("#TUNE-LRN-PARAMS-" + name + ":" + self.file + ": " +
                       str(l.selectedParams))
            sCheck = l.getCLFOptions()
            out.append("#SETSET-LRN-PARAMS-" + name + ":" + self.file + ": " +
                       str(sCheck))

        l.buildClassifier(top.ds)

        vec = l.evaluateModel(testSet)

        out.append("#LSH-FOR-TOP-ONLY")

        if self.isCount:
            vals = DPLIB.getMeasuresCount(testSet[:, -1], vec)
            out.append(name + ":" + self.file + ": " + str(vals))
        else:
            tvals = DPLIB.getConfMatrix(testSet[:, -1], vec, top.bestCF)
            out.append("#CONF-TEST-" + name + ":" + self.file + ": " +
                       str(tvals))
            vals = DPLIB.getMeasures(tvals)
            auc = DPLIB.getAUC(testSet[:, -1], vec)
            vals['auc'] = auc
            out.append(name + ":" + self.file + ": " + str(vals))

        for i in range(len(pop)):

            pop[i] = None

        pop = None

        for i in bins.keys():
            bins[i] = None

        bins = None

        time = Common.getCurrentTimeMil() - startTime

        if (name.find("LSHTune") < 0):
            out.append("#TIME-FOR:" + name + ":" + self.file + ": " +
                       str(time))
            out.append("#TIME-FOR-IS:" + name + ":" + self.file + ": " +
                       str(spentIsTime))
            self.output = +out

        top.addToExtra("SPENT-TIME-IS", float(spentIsTime))

        return top, out
Пример #11
0
    def run(self, trainSeti, testSeti, name, fout, vSets, vSetType,
            fixedTrainSize, log, ignoreOK, threshold, thresholds, rejectedFits,
            rejectedPerfs, rejectedTestPerfs, clfName):

        mad = 0.0
        if self.isCount:
            keySet = list(
                DPLIB.getMeasuresCount([0, 1, 2, 3], [0, 1, 2, 3]).keys())
            mad = DPLIB.SetBugCountForMut(trainSeti)
        else:
            keySet = list(
                DPLIB.getExtMeasures({
                    "tp": 1,
                    "tn": 2,
                    "fp": 3,
                    "fn": 4
                }).keys())
        startTime = Common.getCurrentTimeMil()
        tempTime = 0
        spentISTime = 0

        #For Binary Prediction, isCount = False
        auc = 0
        preds = []
        pop = []

        trainSet = np.copy(trainSeti)
        testSet = np.copy(testSeti)
        pop.clear()

        tstSize = len(testSet)
        partSize = int(tstSize / self.numParts)
        preds.clear()
        diffs = []
        auc = 0.0

        #For isCount = True
        actuals = []
        prrs = []

        if (log):
            self.prnt("#GIS-OPTIONS;;For=" + name + "@" + ":iters=" +
                      str(self.iters) + "-POPSIZE=" + str(self.popSize) +
                      "-NumParts=" + str(self.numParts) + "-NumGens=" +
                      str(self.numGens) + "-sizeTop=" + str(self.sizeTopP) +
                      "-Learner=" + clfName + "\n")

        isOK = True

        np.random.shuffle(testSet)
        self.FinalLearners = []
        self.FinalDatasets = []
        for p in range(self.numParts):

            diffp = []

            self.prnt("\n" + str(p) + ": ")

            tempTime = Common.getCurrentTimeMil()
            pop.clear()
            start = p * partSize
            end = (p + 1) * partSize
            if (end > tstSize):
                end = tstSize

            if (p == self.numParts - 1):
                end = tstSize

            testPart = testSet[start:end, :]

            spentISTime += Common.getCurrentTimeMil() - tempTime

            uinds = set()
            if (vSets == None or len(vSets) == 0):

                if (vSets == None):
                    vSets = []

                vSet = None
                retVal = ""
                if (vSetType == 'Train Set'):

                    vSet = trainSeti
                    if (log):
                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")

                        retVal = None

                elif (vSetType == 'NN-Filter'):
                    tempTime = Common.getCurrentTimeMil()
                    vSet = DPLIB.NNFilter(trainSet, testPart, 1)
                    spentISTime += Common.getCurrentTimeMil() - tempTime

                    if (log):

                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")
                        retVal = None

                #If random, but not fed into the func, generate one randomly, with size of testPart
                elif (vSetType == 'Multiple Random'
                      or vSetType == 'Single Random'):

                    size = len(testPart)
                    vSet = []
                    j = 0
                    while (j < size):
                        index = np.random.randint(trainSet.numInstances())

                        if (not index in uinds):
                            uinds.add(index)
                        else:
                            continue

                        vSets.append(trainSet[index])

                        j += 1

                    if (log):

                        retVal = DPLIB.getStats(vSet, true, true, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")
                        retVal = None

                    vSet = np.array(vSet)

                elif (vSetType == '!!TEST!!'):

                    #Upper Bound. Should not be used.
                    self.prnt("Should not be used.")
                    vSet = testSeti
                    if (log):

                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")

                        retVal = None

                elif vSetType == 'KS2':
                    vSet = None
                vSets.append(vSet)

            else:

                retVal = ""
                for vSet in vSets:

                    if (log):
                        retVal = DPLIB.getStats(vSet, True, True, True)
                        self.prnt("#VSETINFO;;prt=" + str(p) + ";;For=" +
                                  name + "@" + ":" + retVal + "\n")
                        retVal = None

            for i in range(self.popSize):
                tempTime = Common.getCurrentTimeMil()
                uinds.clear()

                size = 0

                if (fixedTrainSize):
                    size = self.chrmSize
                else:
                    size = np.random.randint(self.chrmSize) + 10

                while True:
                    trSet = []
                    j = 0
                    while (j < size):
                        index = np.random.randint(len(trainSet))

                        trSet.append(trainSet[index])

                        if (not index in uinds):
                            uinds.add(index)

                        j += 1

                    spentISTime += Common.getCurrentTimeMil() - tempTime
                    trSet = np.array(trSet)
                    if len(set(list(trSet[:, -1]))) >= 2:
                        break

                tempTime = Common.getCurrentTimeMil()

                pv, p_vals = DPLIB.checkSimilarity(trSet[:, :-1],
                                                   testPart[:, :-1])

                if self.isCount:
                    h = CHRM_GIS_Count(trSet, None, extraAsFitness='p-val')
                    h.addToExtra('p-val', sum(p_vals))
                    pop.append(h)
                else:

                    h = CHRM_GIS(trSet, None, None, extraAsFitness='p-val')
                    h.addToExtra('p-val', sum(p_vals))
                    pop.append(h)

                spentISTime += Common.getCurrentTimeMil() - tempTime

            tempTime = Common.getCurrentTimeMil()
            pop = DPLIB.MySort(pop)
            spentISTime += Common.getCurrentTimeMil() - tempTime

            cnt = 0
            g = 0
            for g in range(self.numGens):
                self.prnt(str(g) + " ")
                if (log):
                    pass
                    #retVal = ""
                    #for i in range(len(pop)):

                    #    chrm = pop[i]
                    #    retVal = DPLIB.getStats(chrm.ds, False, False, False);
                    #    self.prnt("#POPITNFO;;gn="+str(g)+";;prt="+str(p)+";;For="+name+"@"+":"+retVal+"\n");
                    #    self.prnt("#POPITVALS;;gn="+str(g)+";;prt="+str(p)+";;For="+name+"@"+":"+"rpaf="+str(chrm.fitness).replace(", ", ",")
                    #            +";;conf="+str(chrm.conf).replace(", ", ",")+";;fit="+str(chrm.getFitness())+";;TConf2="+str(chrm.testConf).replace(", ", ",")+";;TRpaf2="+str(chrm.testFitness).replace(", ", ",")+"\n");
                    #    retVal = None;

                tempTime = Common.getCurrentTimeMil()
                newPop = []
                for i in range(self.sizeTopP):
                    newPop.append(pop[i])

                i = 0
                for i in range(0, len(pop) - self.sizeTopP, 2):
                    idx1 = 0
                    idx2 = 0
                    while (idx1 == idx2):
                        if (cnt >= 3):
                            idx1 = np.random.randint(len(pop))
                            idx2 = np.random.randint(len(pop))
                        else:
                            idx1 = GA.tornament(pop)
                            idx2 = GA.tornament(pop)
                            cnt += 1

                    cnt = 0
                    ds1 = pop[idx1].ds
                    ds2 = pop[idx2].ds
                    while True:

                        ds1, ds2 = GA.crossOver(ds1,
                                                ds2,
                                                fixedTrainSize,
                                                isCount=self.isCount)
                        if len(set(list(ds1[:, -1]))) >= 2 and len(
                                set(list(ds2[:, -1]))) >= 2:
                            break
                        self.prnt('repeat cross')
                    while True:
                        ds1 = GA.Mutate(ds1, isCount=self.isCount, mad=mad)
                        if len(set(list(ds1[:, -1]))) >= 2:
                            break
                        self.prnt(
                            'repeat mut ds1, because all elements are of type one class'
                        )

                    while True:

                        ds2 = GA.Mutate(ds2, isCount=self.isCount, mad=mad)
                        if len(set(list(ds2[:, -1]))) >= 2:
                            break
                        self.prnt(
                            'repeat mut ds1, because all elements are of type one class'
                        )
                    if self.isCount:
                        newPop.append(
                            CHRM_GIS_Count(ds1, None, extraAsFitness='p-val'))
                        newPop.append(
                            CHRM_GIS_Count(ds2, None, extraAsFitness='p-val'))
                    else:
                        newPop.append(
                            CHRM_GIS(ds1, None, extraAsFitness='p-val'))
                        newPop.append(
                            CHRM_GIS(ds2, None, extraAsFitness='p-val'))

                spentISTime += Common.getCurrentTimeMil() - tempTime

                for i in range(len(newPop)):

                    tempTime = Common.getCurrentTimeMil()

                    pv, p_vals = DPLIB.checkSimilarity(newPop[i].ds[:, :-1],
                                                       testPart[:, :-1])

                    newPop[i].addToExtra('p-val', sum(p_vals))

                    spentISTime += Common.getCurrentTimeMil() - tempTime

                tempTime = Common.getCurrentTimeMil()

                newPop = DPLIB.MySort(newPop)
                exit = False
                countComp = 0

                newPop, rdel = DPLIB.CombinePops(pop, newPop)

                if (log):
                    pass
                    #retVal = ""
                    #for i in range(len(rdel)):

                    #    chrm = rdel[i];
                    #    retVal = DPLIB.getStats(chrm.ds, False, False, False);
                    #    self.prnt("#POPDELITNFO;;gn="+str(g)+";;prt="+str(p)+";;For="+name+"@"+":"+retVal+";;rpaf="+str(chrm.fitness).replace(", ", ",")
                    #            +";;conf="+str(chrm.conf).replace(", ", ",")+";;fit="+str(chrm.getFitness())+";;TConf2="+str(chrm.testConf).replace(", ", ",")+";;TRpaf2="+str(chrm.testFitness).replace(", ", ",")
                    #            +"\n");

                    #    retVal = None;

                rdel = None

                diff = abs(
                    GA.GetMeanFittness(pop, countComp) -
                    GA.GetMeanFittness(newPop, countComp))
                if (diff < 0.000001):
                    exit = True

                diffp.append(diff)

                pop = newPop
                if (pop[0].getFitness() > 0.0) and (exit):
                    break

                exit = False
                spentISTime += Common.getCurrentTimeMil() - tempTime

            w = []
            if (self.count == 0):
                self.count = len(pop)

            for i in range(self.count):
                l = GLOB(clfName).getClassifier()
                tds = pop[i].ds
                self.FinalLearners.append(l)
                self.FinalDatasets.append(tds)
                testPartI = testPart

                l.buildClassifier(tds)

                if self.isCount:
                    actual = DPLIB.getActuals(testPartI)
                    prr = l.evaluateModel(testPartI)
                    #vals = DPLIB.getMeasuresCount(actual,prr)

                    actall = None
                    predall = None
                    if (len(actuals) == self.count):

                        actuals[i] = actuals[i] + actual
                        prrs[i] = prrs[i] + prr
                    else:
                        actuals.append(actual)
                        prrs.append(prr)

                else:

                    vec = l.evaluateModel(testPartI)

                    if (len(preds) == self.count):
                        preds[i] += list(vec)
                    else:
                        preds.append(list(vec))

                if (log):
                    pass
                    #retVal = DPLIB.getStats(tds, True, True, True);
                    #self.prnt("#TRPRTNFO;;prt="+str(p)+";;For="+name+"@"+":"+retVal+"\n");

                    #retVal = DPLIB.getStats(testPart,true,true, True);
                    #self.prnt("#TSTPRTNFO;;prt="+str(p)+";;For="+name+"@"+":"+retVal+"\n");
                    #vals = DPLIB.getConfMatrix(testPart[:,-1],vec)

                    #self.prnt("#TSTPRTVALS;;prt="+str(p)+";;For="+name+"@"+":"+
                    #        "rpaf="+str(DPLIB.getMeasures(vals)).replace(", ", ",")
                    #            +";;conf="+str(vals).replace(", ", ",")+"\n");

                    #retVal = None;

                w.append(pop[i].getFitness())

        isOK = True

        if not isOK:
            pass
        else:
            thresholds.append(pop[0].getFitness())

        self.prnt()
        self.prnt("Best Top Fitness:" + str(pop[0].fitness))
        self.prnt("Best Fitness (mean):", pop[0].getMeanFitness())

        if self.isCount:
            vals = DPLIB.getMeasuresCountSet(actuals, prrs)
        else:
            vals1 = DPLIB.getConfMatrixSet(testSet[:, -1], preds)
            vals = DPLIB.getMeasures(vals1)

        if (isOK):

            if not self.isCount:

                if (len(preds) == 1):
                    auc = DPLIB.getAUC(testSet[:, -1], preds[0])
                else:
                    auc = DPLIB.getAUCSet(testSet[:, -1], preds)

                vals['auc'] = auc
                self.prnt()
                self.prnt("#CONF-TEST:" + name + ":" + self.file + ": " +
                          str(vals1))

                self.prnt()
                self.prnt(name + ":" + self.file + ": " + str(vals))

                self.prnt()
            else:
                self.prnt()
                self.prnt(name + ":" + self.file + ": " + str(vals))
                self.prnt()
        else:

            bestI = pop[0]
            rejectedFits.append(bestI.getFitness())

            rejVals = copy.deepcopy(bestI.fitness)
            rejectedPerfs.append(rejVals)

            testRejVals = copy.deepcopy(vals)

            rejectedTestPerfs.eppend(testRejVals)

            self.prnt("#NOTOKPREDS----" + name + ":" + self.file + ": " +
                      str(vals))

            if not self.isCount:
                self.prnt()
                self.prnt("#NOTOKPREDS----" + "#CONF-TEST:" + name + ":" +
                          self.file + ": " + str(vals1))

        time = Common.getCurrentTimeMil() - startTime

        self.prnt("#TIME-FOR:" + name + ":" + self.file + ": " + str(time))

        self.prnt("#TIME-FOR-IS:" + name + ":" + self.file + ": " +
                  str(spentISTime))

        return isOK
Пример #12
0
    def NNFilter(trainSeti, testSet, file, fout, name, vecin, count, clfName,
                 tunelrn, vSets, testCut):

        startTime = Common.getCurrentTimeMil()
        spentISTime = 0
        tempTime = 0

        bestFit = 0.0
        bestCount = 0
        btrainSet = None
        cfbf = DPLIB.DefaultCF

        if (count == 0):
            for i in range(1, 11):

                tempTime = Common.getCurrentTimeMil()

                trainSet = DPLIB.NNFilter(trainSeti, testSet, i)

                spentISTime += Common.getCurrentTimeMil() - tempTime

                l = GLOB(clfName, tunelrn).getClassifier()

                if (tunelrn):
                    l = l.getTunedCLF(trainSet, vSets, fout, name, file)

                l.buildClassifier(trainSet)

                avgFit = 0.0
                j = 0
                for j in range(len(vSets)):

                    vec = l.evaluateModel(vSets[j])

                    tvals = DPLIB.getConfMatrix(vSets[j][:, -1], vec)
                    measures = DPLIB.getExtMeasures(tvals)
                    fit = measures["F"] * measures["GMean1"]
                    avgFit += fit

                avgFit /= len(vSets)

                if (avgFit > bestFit):
                    bestFit = avgFit
                    bestCount = i
                    btrainSet = trainSet[:, :]

            if (testCut):

                cf = 0
                j = 0

                trainSet = btrainSet

                l = GLOB(clfName, tunelrn).getClassifier()

                if (tunelrn):
                    l = l.getTunedCLF(trainSet, vSets, fout, name, file)

                l.buildClassifier(trainSet)
                avgFit = 0.0

                for j in range(len(vSets)):

                    vec = l.evaluateModel(vSets[j])
                    vCF = 0.1
                    bestCF = 0
                    bestCFVal = -1
                    bestVals = None

                    while True:
                        tvals = DPLIB.getConfMatrix(vSets[j][:, -1], vec, vCF)
                        measures = DPLIB.getExtMeasures(tvals)
                        fit = measures["F"] * measures["GMean1"]
                        if (fit > bestCFVal or bestVals == None):
                            bestCFVal = fit
                            bestCF = vCF
                            bestVals = tvals

                        vCF += 0.1
                        if vCF >= 1:
                            break
                    cf += bestCF

                cf /= vSets.size()
                cfbf = cf

        trainSet = None
        if (count == 0):
            trainSet = btrainSet
        else:
            tempTime = Common.getCurrentTimeMil()
            trainSet = DPLIB.NNFilter(trainSeti, testSet, count)
            spentISTime = Common.getCurrentTimeMil() - tempTime
            bestCount = count

        l = GLOB(clfName, tunelrn).getClassifier()

        if (tunelrn):
            l = l.getTunedCLF(trainSet, vSets, fout, name, file)

            print("#TUNE-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(l.selectedParams))
            fout.write("#TUNE-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(l.selectedParams))
            fout.write("\n")
            sCheck = l.getCLFOptions()

            print("#SETSET-LRN-PARAMS-" + name + ":" + file + ": " +
                  str(sCheck))
            fout.write("#SETSET-LRN-PARAMS-" + name + ":" + file + ": ")
            fout.write(str(sCheck))
            fout.write("\n")

        l.buildClassifier(trainSet)

        vec = l.evaluateModel(testSet)

        vecin = vec

        tvals = DPLIB.getConfMatrix(testSet[:, -1], vecin, cfbf)
        if (count == 0):

            print("#BESTCOUNT-" + name + ":" + file + ": " + str(bestCount))

            fout.write("#BESTCOUNT-" + name + ":" + file + ": ")
            fout.write(str(bestCount))
            fout.write("\n")

            print("#BESTFIT-" + name + ":" + file + ": " + str(bestFit))
            fout.write("#BESTFIT-" + name + ":" + file + ": ")
            fout.write(str(bestFit))
            fout.write("\n")

        print("#CONF-TEST-" + name + ":" + file + ": " + str(tvals))
        fout.write("#CONF-TEST-" + name + ":" + file + ": ")
        fout.write(str(tvals))
        fout.write("\n")
        if (testCut):

            print("#NN-BEST-CF-VALUE:" + name + ":" + file + ": " + str(cfbf))

            fout.write("#NN-BEST-CF-VALUE:" + name + ":" + file + ": ")
            fout.write(str(cfbf))
            fout.write("\n")

        vals = DPLIB.getMeasures(tvals)
        auc = DPLIB.getAUC(testSet[:, -1], vecin)
        print(name + ":" + file + ": " + str(vals) + " AUC = " + str(auc))

        fout.write(name + ":" + file + ": ")
        fout.write(str(vals))
        fout.write(" AUC = ")
        fout.write(str(auc))
        fout.write("\n")

        time = Common.getCurrentTimeMil() - startTime

        print("#TIME-FOR:" + name + ":" + file + ": " + str(time))
        fout.write("#TIME-FOR:" + name + ":" + file + ": " + str(time) + "\n")

        print("#TIME-FOR-IS:" + name + ":" + file + ": " + str(spentISTime))
        fout.write("#TIME-FOR-IS:" + name + ":" + file + ": " +
                   str(spentISTime) + "\n")

        return vecin
Пример #13
0
 def test001(self):
     res = Common.execute_command("tns --version")
     print res
     assert 1 == 1
Пример #14
0
 def tearDown(self):
     print "End of test " + self._testMethodName
     name = cwd + "\\" + self._testMethodName + ".png"
     if sys.exc_info() == (None, None, None):
         Common.capture_screen(name)
         os.remove(name)
Пример #15
0
    def WCFolds(testSet, folds, file, fout, name, clfName):

        auc = 0
        preds = []
        actuals = []
        vals = None
        tssCopy = testSet[:, :]
        rnd = random.Random(Common.getCurrentTimeMil())
        np.random.shuffle(tssCopy)

        skf = StratifiedKFold(n_splits=folds)
        X = tssCopy[:, :-1]
        y = tssCopy[:, -1]
        for train_index, test_index in skf.split(X, y):

            cvtrain, cvtest = X[train_index], X[test_index]
            cvtrainY, cvtestY = y[train_index], y[test_index]

            cvtrain = np.append(cvtrain,
                                cvtrainY.reshape((len(cvtrainY), 1)),
                                axis=1)

            cvtest = np.append(cvtest,
                               cvtestY.reshape((len(cvtestY), 1)),
                               axis=1)

            if (name.lower().find("infogain") >= 0):
                pass
                #int indi[] = DPLIB.fSelectInfoGain(cvtrain);
                #if (DPLIB.useIterativeInfoGainSubsetting)
                #{
                #    indi = DPLIB.iterativeInfoGainSubsetting(cvtrain, indi, clfName);
                #}
                #else
                #    indi = DPLIB.getTopX(indi);
                #cvtrain = DPLIB.fSelectSet(cvtrain, indi);
                #cvtest = DPLIB.fSelectSet(cvtest, indi);

            m = GLOB(clfName).getClassifier()
            m.buildClassifier(cvtrain)

            vec = m.evaluateModel(cvtest)

            preds.append(vec)
            actuals.append(cvtestY)
            if vals == None:

                vals = DPLIB.getConfMatrix(cvtestY, vec)

            else:

                v2 = DPLIB.getConfMatrix(cvtestY, vec)

                for key in vals.keys():
                    vals[key] += v2[key]

        auc = DPLIB.getAUCCV(actuals, preds)
        vals1 = DPLIB.getMeasures(vals)
        print(name + ":" + file + ": " + str(vals1) + " AUC = " + str(auc))
        fout.write("\n" + name + ":" + file + ": " + " AUC = " + str(auc) +
                   ";" + "Vals=" + str(vals1))
Пример #16
0
    def start_test(self):
        #set up
        TcStatus=0
        msg='测试开始...'
        s_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        log.WritetoLog(self.logfile,msg)
        #test
        try:
            #set up
            self.driver = webdriver.Remote('http://localhost:4723/wd/hub', self.desired_caps)
            time.sleep(8)
            #test

            #registry
            '''
            registry_result = account.create_user(self.sn)
            self.username = registry_result[0]
            self.password = registry_result[1]
            '''
            self.username = '******'
            self.password = '******'
            #Login
            self.login_result = al.login_to_page(self.driver,self.username,self.password,self.sn)
            log.WritetoLog(self.logfile,'Login_result:'+str(self.login_result))
            assert self.login_result == 1
            time.sleep(5)

            #Get gateway version             
            version_content = al.get_version(self.driver)
            log.WritetoLog(self.logfile,'Get_version_result:'+str(version_content))
            if self.gateway_version == version_content:
                self.version_check_result = 1
            assert self.version_check_result == 1

            #add area and devices
            area = 'Test'
            device_list = [[self.shuangkai_mac,'1','2'],[self.sankai_mac,'2','1']]
            add_device_result = al.add_area_devices(self.driver,area,device_list)
            assert add_device_result == 1

            '''
            #wait for device
            msg = 'Please wait 2 minutes'
            log.WritetoLog(self.logfile,msg)
            al.wait_for_access(self.driver)
            #time.sleep(120)
            '''
            '''
            #status check
            light_control_result  = al.get_all_opendevice_in_light(self.driver)
            if len(light_control_result) == 1 and light_control_result[0] == self.light_result:
                self.status_check_result = 1
            else:
                self.status_check_result = 0
            log.WritetoLog(self.logfile,'status_check_result:'+str(self.status_check_result))
            assert self.status_check_result == 1

            #control test
            control_test_result = al.control_device_in_device(self.driver)
            log.WritetoLog(self.logfile,'Control_test_result:'+str(control_test_result))
            self.control_test_result = control_test_result
            assert self.control_test_result == 1
            '''
            TcStatus=1
        except Exception as e:
            msg= 'Error occurred:'+str(e)
            log.WritetoLog(self.logfile,msg)
            self.driver.quit()
            self.driver = webdriver.Remote('http://localhost:4723/wd/hub', self.desired_caps)
            time.sleep(5)
            self.login_result = al.login_to_page(self.driver,self.username,self.password,self.sn)
        finally:
            #teardown
            try:
                clear_env_result  = al.clear_env(self.driver)

                log.WritetoLog(self.logfile,'环境恢复成功!')
            except Exception as e:
                print e
                log.WritetoLog(self.logfile,'环境恢复失败!')
            finally:
                self.driver.quit()

            msg='测试结果:'+str(TcStatus) #Set Result
            log.WritetoLog(self.logfile,msg)             
            msg='测试结束...'
            log.WritetoLog(self.logfile,msg) 
            e_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            #generate report
            Common.generate_report(self.report,self.sn,self.login_result,self.version_check_result,self.status_check_result,self.control_test_result,self.env_clear_result,TcStatus,s_time,e_time)
Пример #17
0
    def run(self):
        lrnrnames = self.lrnrs
        try:
            rnd = random.Random(Common.getCurrentTimeMil())            
            if self.expType == 'GIS':
                if self.isKS:
                    self.gis = GISKS2(self.pars,self.file)
                else:
                    self.gis = GIS(self.pars)
            elif self.expType == 'LSH':
                lsh = CPDP_LSH_Binary(self.pars, self.file)

            trainSetAll = DPLIB.LoadCSV(self.train, self.dp, self.features, convertToBinary = not self.isCount);
            testSetAll = DPLIB.LoadCSV(self.test, self.dp, self.features, convertToBinary = not self.isCount);  
            

            ft = 'A'
            indi = None

            if not self.isCount:

                if self.pars['features'] == 'Iterative InfoGain Subsetting':
                    ft = 'IG'
                    indi = DPLIB.fSelectInfoGain(trainSetAll);

            
            if self.pars['features'] == 'All':
                print ('All')

            if self.pars['features'] == 'PCA':
                ft = 'PCA'
                print ('PCA')
                trainSetAll, testSetAll = DPLIB.applyPCA(trainSetAll, testSetAll, 0.95)
                        
            for lk in range(len(lrnrnames)):                

                lrnr = "-" + lrnrnames[lk];

                clfName = lrnrnames[lk];

                vSets = None                                
                
                if not self.isCount:
                    if self.pars['features'] == 'Iterative InfoGain Subsetting':

                        indis2 = DPLIB.iterativeInfoGainSubsetting(trainSetAll, indi, clfName);
                        trainSetAll = DPLIB.fSelectSet(trainSetAll, indis2);
                        testSetAll = DPLIB.fSelectSet(testSetAll, indis2);
                
                if self.pars['vSetType'] in ['Single Random','Multiple Random']:
                    vSets = DatasetUtils.getRandomDatasets()
                c = 0
                while (c < self.iters):

                    print("Start:" + self.file + ": " + str(c));
                    print("====================================================");
                    #fout.write("#ITERINFO:For File=" +self.file + "-Iter:" + str(c) + "\n");

                    stages = None
                    buckets = None
                    sbtx = ""
                                                                    
                    if self.expType == 'GIS':
                        self.doGIS(trainSetAll, testSetAll, "FIXED-VMUL-GEN-"+ft, lrnr, fout, vSets, False, clfName, gis=gis);
                        gis.prnt('---------------------------------------\n')
                    
                        
                    elif self.expType == 'LSH':
                        lsh.CreateBucketsTune(trainSetAll, testSetAll, vSets, name= "LSHTune-ALL-TOP-SUPER" + sbtx + lrnr, testCut= self.pars['tunecut'], iternum=c, save=False, superbit=self.pars['lshType'] =='SuperBit', clfName=clfName,tunelrn = self.pars['tunelrnr']);
                        lsh.prnt('---------------------------------------\n')

                    c+=1
                    
            #fout.write("===================================================================\n");
            #fout.close();

            print("File Processing Ended:" +self.file);
        except Exception as e:

            try:
                print (str(e))
                print(traceback.format_exc())                
            except Exception as ex2:
                print("X2", str(ex2));
                print(traceback.format_exc())
                
        if self.expType == 'GIS':
            return gis
        elif self.expType == 'LSH':
            return lsh